Linux Security Modules development
 help / color / mirror / Atom feed
* [PATCH][V2] security: keys: trusted: Fix memory leaks on allocated blob
From: Colin King @ 2021-07-26 11:44 UTC (permalink / raw)
  To: James Bottomley, Jarkko Sakkinen, Mimi Zohar, David Howells,
	James Morris, Serge E . Hallyn, linux-integrity, keyrings,
	linux-security-module
  Cc: kernel-janitors, linux-kernel

From: Colin Ian King <colin.king@canonical.com>

There are several error return paths that don't kfree the allocated
blob, leading to memory leaks. Ensure blob is initialized to null as
some of the error return paths in function tpm2_key_decode do not
change blob. Add an error return path to kfree blob and use this on
the current leaky returns.

Addresses-Coverity: ("Resource leak")
Fixes: f2219745250f ("security: keys: trusted: use ASN.1 TPM2 key format for the blobs")
Acked-by: Sumit Garg <sumit.garg@linaro.org>
Signed-off-by: Colin Ian King <colin.king@canonical.com>

---

V2: Add a couple more leaky return path fixes as noted by Sumit Garg
    Add the if (blob != payload->blob) check on the kfree as
    noted by Dan Carpenter

---
 security/keys/trusted-keys/trusted_tpm2.c | 39 ++++++++++++++++-------
 1 file changed, 27 insertions(+), 12 deletions(-)

diff --git a/security/keys/trusted-keys/trusted_tpm2.c b/security/keys/trusted-keys/trusted_tpm2.c
index 0165da386289..a2cfdfdf17fa 100644
--- a/security/keys/trusted-keys/trusted_tpm2.c
+++ b/security/keys/trusted-keys/trusted_tpm2.c
@@ -366,7 +366,7 @@ static int tpm2_load_cmd(struct tpm_chip *chip,
 	unsigned int private_len;
 	unsigned int public_len;
 	unsigned int blob_len;
-	u8 *blob, *pub;
+	u8 *blob = NULL, *pub;
 	int rc;
 	u32 attrs;
 
@@ -378,22 +378,30 @@ static int tpm2_load_cmd(struct tpm_chip *chip,
 	}
 
 	/* new format carries keyhandle but old format doesn't */
-	if (!options->keyhandle)
-		return -EINVAL;
+	if (!options->keyhandle) {
+		rc = -EINVAL;
+		goto err;
+	}
 
 	/* must be big enough for at least the two be16 size counts */
-	if (payload->blob_len < 4)
-		return -EINVAL;
+	if (payload->blob_len < 4) {
+		rc = -EINVAL;
+		goto err;
+	}
 
 	private_len = get_unaligned_be16(blob);
 
 	/* must be big enough for following public_len */
-	if (private_len + 2 + 2 > (payload->blob_len))
-		return -E2BIG;
+	if (private_len + 2 + 2 > (payload->blob_len)) {
+		rc = -E2BIG;
+		goto err;
+	}
 
 	public_len = get_unaligned_be16(blob + 2 + private_len);
-	if (private_len + 2 + public_len + 2 > payload->blob_len)
-		return -E2BIG;
+	if (private_len + 2 + public_len + 2 > payload->blob_len) {
+		rc = -E2BIG;
+		goto err;
+	}
 
 	pub = blob + 2 + private_len + 2;
 	/* key attributes are always at offset 4 */
@@ -406,12 +414,14 @@ static int tpm2_load_cmd(struct tpm_chip *chip,
 		payload->migratable = 1;
 
 	blob_len = private_len + public_len + 4;
-	if (blob_len > payload->blob_len)
-		return -E2BIG;
+	if (blob_len > payload->blob_len) {
+		rc = -E2BIG;
+		goto err;
+	}
 
 	rc = tpm_buf_init(&buf, TPM2_ST_SESSIONS, TPM2_CC_LOAD);
 	if (rc)
-		return rc;
+		goto err;
 
 	tpm_buf_append_u32(&buf, options->keyhandle);
 	tpm2_buf_append_auth(&buf, TPM2_RS_PW,
@@ -441,6 +451,11 @@ static int tpm2_load_cmd(struct tpm_chip *chip,
 		rc = -EPERM;
 
 	return rc;
+
+err:
+	if (blob != payload->blob)
+		kfree(blob);
+	return rc;
 }
 
 /**
-- 
2.31.1


^ permalink raw reply related

* Re: [PATCH 1/2] net: cipso: fix warnings in netlbl_cipsov4_add_std
From: Pavel Skripkin @ 2021-07-26 11:11 UTC (permalink / raw)
  To: paul, davem, yoshfuji, dsahern, kuba
  Cc: netdev, linux-security-module, linux-kernel,
	syzbot+cdd51ee2e6b0b2e18c0d
In-Reply-To: <53de0ccd1aa3fffa6bce2a2ae7a5ca07e0af6d3a.1625900431.git.paskripkin@gmail.com>

On Sat, 10 Jul 2021 10:03:13 +0300
Pavel Skripkin <paskripkin@gmail.com> wrote:

> Syzbot reported warning in netlbl_cipsov4_add(). The
> problem was in too big doi_def->map.std->lvl.local_size
> passed to kcalloc(). Since this value comes from userpace there is
> no need to warn if value is not correct.
> 
> The same problem may occur with other kcalloc() calls in
> this function, so, I've added __GFP_NOWARN flag to all
> kcalloc() calls there.
> 
> Reported-and-tested-by:
> syzbot+cdd51ee2e6b0b2e18c0d@syzkaller.appspotmail.com Fixes:
> 96cb8e3313c7 ("[NetLabel]: CIPSOv4 and Unlabeled packet integration")
> Signed-off-by: Pavel Skripkin <paskripkin@gmail.com> ---
>  net/netlabel/netlabel_cipso_v4.c | 8 ++++----
>  1 file changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/net/netlabel/netlabel_cipso_v4.c
> b/net/netlabel/netlabel_cipso_v4.c index 4f50a64315cf..50f40943c815
> 100644 --- a/net/netlabel/netlabel_cipso_v4.c
> +++ b/net/netlabel/netlabel_cipso_v4.c
> @@ -187,14 +187,14 @@ static int netlbl_cipsov4_add_std(struct
> genl_info *info, }
>  	doi_def->map.std->lvl.local =
> kcalloc(doi_def->map.std->lvl.local_size, sizeof(u32),
> -					      GFP_KERNEL);
> +					      GFP_KERNEL |
> __GFP_NOWARN); if (doi_def->map.std->lvl.local == NULL) {
>  		ret_val = -ENOMEM;
>  		goto add_std_failure;
>  	}
>  	doi_def->map.std->lvl.cipso =
> kcalloc(doi_def->map.std->lvl.cipso_size, sizeof(u32),
> -					      GFP_KERNEL);
> +					      GFP_KERNEL |
> __GFP_NOWARN); if (doi_def->map.std->lvl.cipso == NULL) {
>  		ret_val = -ENOMEM;
>  		goto add_std_failure;
> @@ -263,7 +263,7 @@ static int netlbl_cipsov4_add_std(struct
> genl_info *info, doi_def->map.std->cat.local = kcalloc(
>  					      doi_def->map.std->cat.local_size,
>  					      sizeof(u32),
> -					      GFP_KERNEL);
> +					      GFP_KERNEL |
> __GFP_NOWARN); if (doi_def->map.std->cat.local == NULL) {
>  			ret_val = -ENOMEM;
>  			goto add_std_failure;
> @@ -271,7 +271,7 @@ static int netlbl_cipsov4_add_std(struct
> genl_info *info, doi_def->map.std->cat.cipso = kcalloc(
>  					      doi_def->map.std->cat.cipso_size,
>  					      sizeof(u32),
> -					      GFP_KERNEL);
> +					      GFP_KERNEL |
> __GFP_NOWARN); if (doi_def->map.std->cat.cipso == NULL) {
>  			ret_val = -ENOMEM;
>  			goto add_std_failure;


Hi, net developers!

Is this patch merged somewhere? I've checked net tree and Paul Moore
tree on https://git.kernel.org/, but didn't find it. Did I miss it
somewhere? If not, it's just a gentle ping :)

Btw: maybe I should send it as separete patch, since 2/2 in this
series is invalid as already in-tree?


 
With regards,
Pavel Skripkin


^ permalink raw reply

* Re: [PATCH] security: keys: trusted: Fix memory leaks on allocated blob
From: Colin Ian King @ 2021-07-26 10:56 UTC (permalink / raw)
  To: Dan Carpenter
  Cc: James Bottomley, Jarkko Sakkinen, Mimi Zohar, David Howells,
	James Morris, Serge E . Hallyn, linux-integrity, keyrings,
	linux-security-module, kernel-janitors, linux-kernel
In-Reply-To: <20210726085051.GG1931@kadam>

On 26/07/2021 09:50, Dan Carpenter wrote:
> On Fri, Jul 23, 2021 at 06:21:21PM +0100, Colin King wrote:
>> @@ -441,6 +449,10 @@ static int tpm2_load_cmd(struct tpm_chip *chip,
>>  		rc = -EPERM;
>>  
>>  	return rc;
>> +
>> +err:
>> +	kfree(blob);
> 
> This needs to be:
> 
> 	if (blob != payload->blob)
> 		kfree(blob);

Good spot! Thanks Dan.

> 
> Otherwise it leads to a use after free.
> 
>> +	return rc;
>>  }
> 
> How this is allocated is pretty scary looking!

it is kinda mind bending.

Colin

> 
> security/keys/trusted-keys/trusted_tpm2.c
>     96  static int tpm2_key_decode(struct trusted_key_payload *payload,
>     97                             struct trusted_key_options *options,
>     98                             u8 **buf)
>     99  {
>    100          int ret;
>    101          struct tpm2_key_context ctx;
>    102          u8 *blob;
>    103  
>    104          memset(&ctx, 0, sizeof(ctx));
>    105  
>    106          ret = asn1_ber_decoder(&tpm2key_decoder, &ctx, payload->blob,
>    107                                 payload->blob_len);
>    108          if (ret < 0)
>    109                  return ret;
> 
> Old form?
> 
>    110  
>    111          if (ctx.priv_len + ctx.pub_len > MAX_BLOB_SIZE)
>    112                  return -EINVAL;
> 
> It's really scary to me that if the lengths are too large for kmalloc()
> then we just use "payload->blob".
> 
>    113  
>    114          blob = kmalloc(ctx.priv_len + ctx.pub_len + 4, GFP_KERNEL);
> 
> blob is allocated here.
> 
>    115          if (!blob)
>    116                  return -ENOMEM;
>    117  
>    118          *buf = blob;
>    119          options->keyhandle = ctx.parent;
>    120  
>    121          memcpy(blob, ctx.priv, ctx.priv_len);
>    122          blob += ctx.priv_len;
>    123  
>    124          memcpy(blob, ctx.pub, ctx.pub_len);
>    125  
>    126          return 0;
>    127  }
> 
> [ snip ]
> 
>    371          u32 attrs;
>    372  
>    373          rc = tpm2_key_decode(payload, options, &blob);
>    374          if (rc) {
>    375                  /* old form */
>    376                  blob = payload->blob;
>                         ^^^^^^^^^^^^^^^^^^^^
> 
>    377                  payload->old_format = 1;
>    378          }
>    379  
> 
> regards,
> dan carpenter
> 


^ permalink raw reply

* Re: [PATCH] security: keys: trusted: Fix memory leaks on allocated blob
From: Dan Carpenter @ 2021-07-26  8:50 UTC (permalink / raw)
  To: Colin King
  Cc: James Bottomley, Jarkko Sakkinen, Mimi Zohar, David Howells,
	James Morris, Serge E . Hallyn, linux-integrity, keyrings,
	linux-security-module, kernel-janitors, linux-kernel
In-Reply-To: <20210723172121.156687-1-colin.king@canonical.com>

On Fri, Jul 23, 2021 at 06:21:21PM +0100, Colin King wrote:
> @@ -441,6 +449,10 @@ static int tpm2_load_cmd(struct tpm_chip *chip,
>  		rc = -EPERM;
>  
>  	return rc;
> +
> +err:
> +	kfree(blob);

This needs to be:

	if (blob != payload->blob)
		kfree(blob);

Otherwise it leads to a use after free.

> +	return rc;
>  }

How this is allocated is pretty scary looking!

security/keys/trusted-keys/trusted_tpm2.c
    96  static int tpm2_key_decode(struct trusted_key_payload *payload,
    97                             struct trusted_key_options *options,
    98                             u8 **buf)
    99  {
   100          int ret;
   101          struct tpm2_key_context ctx;
   102          u8 *blob;
   103  
   104          memset(&ctx, 0, sizeof(ctx));
   105  
   106          ret = asn1_ber_decoder(&tpm2key_decoder, &ctx, payload->blob,
   107                                 payload->blob_len);
   108          if (ret < 0)
   109                  return ret;

Old form?

   110  
   111          if (ctx.priv_len + ctx.pub_len > MAX_BLOB_SIZE)
   112                  return -EINVAL;

It's really scary to me that if the lengths are too large for kmalloc()
then we just use "payload->blob".

   113  
   114          blob = kmalloc(ctx.priv_len + ctx.pub_len + 4, GFP_KERNEL);

blob is allocated here.

   115          if (!blob)
   116                  return -ENOMEM;
   117  
   118          *buf = blob;
   119          options->keyhandle = ctx.parent;
   120  
   121          memcpy(blob, ctx.priv, ctx.priv_len);
   122          blob += ctx.priv_len;
   123  
   124          memcpy(blob, ctx.pub, ctx.pub_len);
   125  
   126          return 0;
   127  }

[ snip ]

   371          u32 attrs;
   372  
   373          rc = tpm2_key_decode(payload, options, &blob);
   374          if (rc) {
   375                  /* old form */
   376                  blob = payload->blob;
                        ^^^^^^^^^^^^^^^^^^^^

   377                  payload->old_format = 1;
   378          }
   379  

regards,
dan carpenter


^ permalink raw reply

* Re: [PATCH] security: keys: trusted: Fix memory leaks on allocated blob
From: Sumit Garg @ 2021-07-26  5:33 UTC (permalink / raw)
  To: Colin King
  Cc: James Bottomley, Jarkko Sakkinen, Mimi Zohar, David Howells,
	James Morris, Serge E . Hallyn, linux-integrity,
	open list:ASYMMETRIC KEYS, open list:SECURITY SUBSYSTEM,
	kernel-janitors, Linux Kernel Mailing List
In-Reply-To: <20210723172121.156687-1-colin.king@canonical.com>

Hi Colin,

On Fri, 23 Jul 2021 at 22:51, Colin King <colin.king@canonical.com> wrote:
>
> From: Colin Ian King <colin.king@canonical.com>
>
> There are several error return paths that don't kfree the allocated
> blob, leading to memory leaks. Ensure blob is initialized to null as
> some of the error return paths in function tpm2_key_decode do not
> change blob. Add an error return path to kfree blob and use this on
> the current leaky returns.
>

It looks like there are still leaky return paths left such as
tpm_buf_init() failure etc. which needs to be fixed as well.

With that addressed, feel free to add:

Acked-by: Sumit Garg <sumit.garg@linaro.org>

-Sumit

> Addresses-Coverity: ("Resource leak")
> Fixes: f2219745250f ("security: keys: trusted: use ASN.1 TPM2 key format for the blobs")
> Signed-off-by: Colin Ian King <colin.king@canonical.com>
> ---
>  security/keys/trusted-keys/trusted_tpm2.c | 30 ++++++++++++++++-------
>  1 file changed, 21 insertions(+), 9 deletions(-)
>
> diff --git a/security/keys/trusted-keys/trusted_tpm2.c b/security/keys/trusted-keys/trusted_tpm2.c
> index 0165da386289..930c67f98611 100644
> --- a/security/keys/trusted-keys/trusted_tpm2.c
> +++ b/security/keys/trusted-keys/trusted_tpm2.c
> @@ -366,7 +366,7 @@ static int tpm2_load_cmd(struct tpm_chip *chip,
>         unsigned int private_len;
>         unsigned int public_len;
>         unsigned int blob_len;
> -       u8 *blob, *pub;
> +       u8 *blob = NULL, *pub;
>         int rc;
>         u32 attrs;
>
> @@ -378,22 +378,30 @@ static int tpm2_load_cmd(struct tpm_chip *chip,
>         }
>
>         /* new format carries keyhandle but old format doesn't */
> -       if (!options->keyhandle)
> -               return -EINVAL;
> +       if (!options->keyhandle) {
> +               rc = -EINVAL;
> +               goto err;
> +       }
>
>         /* must be big enough for at least the two be16 size counts */
> -       if (payload->blob_len < 4)
> -               return -EINVAL;
> +       if (payload->blob_len < 4) {
> +               rc = -EINVAL;
> +               goto err;
> +       }
>
>         private_len = get_unaligned_be16(blob);
>
>         /* must be big enough for following public_len */
> -       if (private_len + 2 + 2 > (payload->blob_len))
> -               return -E2BIG;
> +       if (private_len + 2 + 2 > (payload->blob_len)) {
> +               rc = -E2BIG;
> +               goto err;
> +       }
>
>         public_len = get_unaligned_be16(blob + 2 + private_len);
> -       if (private_len + 2 + public_len + 2 > payload->blob_len)
> -               return -E2BIG;
> +       if (private_len + 2 + public_len + 2 > payload->blob_len) {
> +               rc = -E2BIG;
> +               goto err;
> +       }
>
>         pub = blob + 2 + private_len + 2;
>         /* key attributes are always at offset 4 */
> @@ -441,6 +449,10 @@ static int tpm2_load_cmd(struct tpm_chip *chip,
>                 rc = -EPERM;
>
>         return rc;
> +
> +err:
> +       kfree(blob);
> +       return rc;
>  }
>
>  /**
> --
> 2.31.1
>

^ permalink raw reply

* Re: [PATCH RFC 0/9] sk_buff: optimize layout for GRO
From: Florian Westphal @ 2021-07-25 22:52 UTC (permalink / raw)
  To: Casey Schaufler
  Cc: Florian Westphal, Paul Moore, Paolo Abeni, netdev,
	David S. Miller, Jakub Kicinski, Eric Dumazet,
	linux-security-module, selinux
In-Reply-To: <75982e4e-f6b1-ade2-311f-1532254e2764@schaufler-ca.com>

Casey Schaufler <casey@schaufler-ca.com> wrote:
> RedHat and android use SELinux and will want this. Ubuntu doesn't
> yet, but netfilter in in the AppArmor task list. Tizen definitely
> uses it with Smack. The notion that security modules are only used
> in fringe cases is antiquated. 

I was not talking about LSM in general, I was referring to the
extended info that Paul mentioned.

If thats indeed going to be used on every distro then skb extensions
are not suitable for this, it would result in extr akmalloc for every
skb.

> > It certainly makes more sense to me than doing lookups
> > in a hashtable based on a ID
> 
> Agreed. The data burden required to support a hash scheme
> for the security module stacking case is staggering.

It depends on the type of data (and its lifetime).

I suspect you have something that is more like skb->dev/dst,
i.e. reference to object that persists after the skb is free'd.

^ permalink raw reply

* Re: [PATCH RFC 0/9] sk_buff: optimize layout for GRO
From: Casey Schaufler @ 2021-07-25 21:53 UTC (permalink / raw)
  To: Florian Westphal, Paul Moore
  Cc: Paolo Abeni, netdev, David S. Miller, Jakub Kicinski,
	Eric Dumazet, linux-security-module, selinux, Casey Schaufler
In-Reply-To: <20210725162528.GK9904@breakpoint.cc>

On 7/25/2021 9:25 AM, Florian Westphal wrote:
> Paul Moore <paul@paul-moore.com> wrote:
>>> There is the skb extension infra, does that work for you?
>> I was hopeful that when the skb_ext capability was introduced we might
>> be able to use it for the LSM(s), but when I asked netdev if they
>> would be willing to accept patches to leverage the skb_ext
>> infrastructure I was told "no".
> I found
>
> https://lore.kernel.org/netdev/CAHC9VhSz1_KA1tCJtNjwK26BOkGhKGbPT7v1O82mWPduvWwd4A@mail.gmail.com/#r
>
> and from what I gather from your comments and that of Casey
> I think skb extensions is the correct thing for this (i.e., needs
> netlabel/secid config/enablement so typically won't be active on
> a distro kernel by default).

RedHat and android use SELinux and will want this. Ubuntu doesn't
yet, but netfilter in in the AppArmor task list. Tizen definitely
uses it with Smack. The notion that security modules are only used
in fringe cases is antiquated. 

> It certainly makes more sense to me than doing lookups
> in a hashtable based on a ID

Agreed. The data burden required to support a hash scheme
for the security module stacking case is staggering.

>  (I tried to do that to get rid of skb->nf_bridge
> pointer years ago and it I could not figure out how to invalidate an entry
> without adding a new skb destructor callback).


^ permalink raw reply

* Re: [PATCH RFC 0/9] sk_buff: optimize layout for GRO
From: Florian Westphal @ 2021-07-25 16:25 UTC (permalink / raw)
  To: Paul Moore
  Cc: Florian Westphal, Paolo Abeni, Casey Schaufler, netdev,
	David S. Miller, Jakub Kicinski, Eric Dumazet,
	linux-security-module, selinux
In-Reply-To: <CAHC9VhSsNWSus4xr7erxQs_4GyfJYb7_6a8juisWue6Xc4fVkQ@mail.gmail.com>

Paul Moore <paul@paul-moore.com> wrote:
> > There is the skb extension infra, does that work for you?
> 
> I was hopeful that when the skb_ext capability was introduced we might
> be able to use it for the LSM(s), but when I asked netdev if they
> would be willing to accept patches to leverage the skb_ext
> infrastructure I was told "no".

I found

https://lore.kernel.org/netdev/CAHC9VhSz1_KA1tCJtNjwK26BOkGhKGbPT7v1O82mWPduvWwd4A@mail.gmail.com/#r

and from what I gather from your comments and that of Casey
I think skb extensions is the correct thing for this (i.e., needs
netlabel/secid config/enablement so typically won't be active on
a distro kernel by default).

It certainly makes more sense to me than doing lookups
in a hashtable based on a ID (I tried to do that to get rid of skb->nf_bridge
pointer years ago and it I could not figure out how to invalidate an entry
without adding a new skb destructor callback).

^ permalink raw reply

* Re: [PATCH RFC 0/9] sk_buff: optimize layout for GRO
From: Paul Moore @ 2021-07-25 14:57 UTC (permalink / raw)
  To: Florian Westphal
  Cc: Paolo Abeni, Casey Schaufler, netdev, David S. Miller,
	Jakub Kicinski, Eric Dumazet, linux-security-module, selinux
In-Reply-To: <20210724185141.GJ9904@breakpoint.cc>

On Sat, Jul 24, 2021 at 2:51 PM Florian Westphal <fw@strlen.de> wrote:
> Paul Moore <paul@paul-moore.com> wrote:
>  > Tow main drivers on my side:
> > > - there are use cases/deployments that do not use them.
> > > - moving them around was doable in term of required changes.
> > >
> > > There are no "slow-path" implications on my side. For example, vlan_*
> > > fields are very critical performance wise, if the traffic is tagged.
> > > But surely there are busy servers not using tagget traffic which will
> > > enjoy the reduced cachelines footprint, and this changeset will not
> > > impact negatively the first case.
> > >
> > > WRT to the vlan example, secmark and nfct require an extra conditional
> > > to fetch the data. My understanding is that such additional conditional
> > > is not measurable performance-wise when benchmarking the security
> > > modules (or conntrack) because they have to do much more intersting
> > > things after fetching a few bytes from an already hot cacheline.
> > >
> > > Not sure if the above somehow clarify my statements.
> > >
> > > As for expanding secmark to 64 bits, I guess that could be an
> > > interesting follow-up discussion :)
> >
> > The intersection between netdev and the LSM has a long and somewhat
> > tortured past with each party making sacrifices along the way to get
> > where we are at today.  It is far from perfect, at least from a LSM
> > perspective, but it is what we've got and since performance is usually
> > used as a club to beat back any changes proposed by the LSM side, I
> > would like to object to these changes that negatively impact the LSM
> > performance without some concession in return.  It has been a while
> > since Casey and I have spoken about this, but I think the prefered
> > option would be to exchange the current __u32 "sk_buff.secmark" field
> > with a void* "sk_buff.security" field, like so many other kernel level
> > objects.  Previous objections have eventually boiled down to the
> > additional space in the sk_buff for the extra bits (there is some
> > additional editorializing that could be done here, but I'll refrain),
> > but based on the comments thus far in this thread it sounds like
> > perhaps we can now make a deal here: move the LSM field down to a
> > "colder" cacheline in exchange for converting the LSM field to a
> > proper pointer.
> >
> > Thoughts?
>
> Is there a summary disucssion somewhere wrt. what exactly LSMs need?

My network access is limited for the next week so I don't have the
ability to dig through the list archives, but if you look through the
netdev/LSM/lists over the past decade (maybe go back ~15 years?) you
will see multiple instances where we/I've brought up different
solutions with the netdev folks only to hit a brick wall.  The LSM ask
for sk_buff is really the same as any other kernel object that we want
to control with LSM access controls, e.g. inodes; we basically want a
void* blob with the necessary hooks so that the opaque blob can be
managed through the skb's lifetime.

> There is the skb extension infra, does that work for you?

I was hopeful that when the skb_ext capability was introduced we might
be able to use it for the LSM(s), but when I asked netdev if they
would be willing to accept patches to leverage the skb_ext
infrastructure I was told "no".

-- 
paul moore
www.paul-moore.com

^ permalink raw reply

* Re: [PATCH RFC 0/9] sk_buff: optimize layout for GRO
From: Florian Westphal @ 2021-07-24 18:51 UTC (permalink / raw)
  To: Paul Moore
  Cc: Paolo Abeni, Casey Schaufler, netdev, David S. Miller,
	Jakub Kicinski, Florian Westphal, Eric Dumazet,
	linux-security-module, selinux
In-Reply-To: <CAHC9VhT0uuBdmmT1HhMjjQswiJxWuy3cZdRQZ4Zzf-H8n5arLQ@mail.gmail.com>

Paul Moore <paul@paul-moore.com> wrote:
 > Tow main drivers on my side:
> > - there are use cases/deployments that do not use them.
> > - moving them around was doable in term of required changes.
> >
> > There are no "slow-path" implications on my side. For example, vlan_*
> > fields are very critical performance wise, if the traffic is tagged.
> > But surely there are busy servers not using tagget traffic which will
> > enjoy the reduced cachelines footprint, and this changeset will not
> > impact negatively the first case.
> >
> > WRT to the vlan example, secmark and nfct require an extra conditional
> > to fetch the data. My understanding is that such additional conditional
> > is not measurable performance-wise when benchmarking the security
> > modules (or conntrack) because they have to do much more intersting
> > things after fetching a few bytes from an already hot cacheline.
> >
> > Not sure if the above somehow clarify my statements.
> >
> > As for expanding secmark to 64 bits, I guess that could be an
> > interesting follow-up discussion :)
> 
> The intersection between netdev and the LSM has a long and somewhat
> tortured past with each party making sacrifices along the way to get
> where we are at today.  It is far from perfect, at least from a LSM
> perspective, but it is what we've got and since performance is usually
> used as a club to beat back any changes proposed by the LSM side, I
> would like to object to these changes that negatively impact the LSM
> performance without some concession in return.  It has been a while
> since Casey and I have spoken about this, but I think the prefered
> option would be to exchange the current __u32 "sk_buff.secmark" field
> with a void* "sk_buff.security" field, like so many other kernel level
> objects.  Previous objections have eventually boiled down to the
> additional space in the sk_buff for the extra bits (there is some
> additional editorializing that could be done here, but I'll refrain),
> but based on the comments thus far in this thread it sounds like
> perhaps we can now make a deal here: move the LSM field down to a
> "colder" cacheline in exchange for converting the LSM field to a
> proper pointer.
> 
> Thoughts?

Is there a summary disucssion somewhere wrt. what exactly LSMs need?

There is the skb extension infra, does that work for you?

^ permalink raw reply

* Re: [PATCH v28 12/25] LSM: Use lsmblob in security_cred_getsecid
From: kernel test robot @ 2021-07-23 23:56 UTC (permalink / raw)
  To: Casey Schaufler, casey.schaufler, jmorris, linux-security-module,
	selinux
  Cc: kbuild-all, casey, linux-audit, keescook, john.johansen,
	penguin-kernel, paul
In-Reply-To: <20210722004758.12371-13-casey@schaufler-ca.com>

[-- Attachment #1: Type: text/plain, Size: 2691 bytes --]

Hi Casey,

I love your patch! Perhaps something to improve:

[auto build test WARNING on pcmoore-audit/next]
[also build test WARNING on nf/master linus/master v5.14-rc2 next-20210723]
[cannot apply to nf-next/master security/next-testing]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Casey-Schaufler/LSM-Infrastructure-management-of-the-sock-security/20210722-094735
base:   https://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/audit.git next
config: i386-randconfig-s002-20210722 (attached as .config)
compiler: gcc-10 (Ubuntu 10.3.0-1ubuntu1~20.04) 10.3.0
reproduce:
        # apt-get install sparse
        # sparse version: v0.6.3-341-g8af24329-dirty
        # https://github.com/0day-ci/linux/commit/7a5105a372e90078ee746d774c21979cfb9fcaf6
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Casey-Schaufler/LSM-Infrastructure-management-of-the-sock-security/20210722-094735
        git checkout 7a5105a372e90078ee746d774c21979cfb9fcaf6
        # save the attached .config to linux build tree
        make W=1 C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' O=build_dir ARCH=i386 SHELL=/bin/bash

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>


sparse warnings: (new ones prefixed by >>)
>> kernel/audit.c:128:25: sparse: sparse: symbol 'audit_sig_lsm' was not declared. Should it be static?
   kernel/audit.c:2181:9: sparse: sparse: incorrect type in argument 1 (different address spaces) @@     expected struct spinlock [usertype] *lock @@     got struct spinlock [noderef] __rcu * @@
   kernel/audit.c:2181:9: sparse:     expected struct spinlock [usertype] *lock
   kernel/audit.c:2181:9: sparse:     got struct spinlock [noderef] __rcu *
   kernel/audit.c:2184:40: sparse: sparse: incorrect type in argument 1 (different address spaces) @@     expected struct spinlock [usertype] *lock @@     got struct spinlock [noderef] __rcu * @@
   kernel/audit.c:2184:40: sparse:     expected struct spinlock [usertype] *lock
   kernel/audit.c:2184:40: sparse:     got struct spinlock [noderef] __rcu *

vim +/audit_sig_lsm +128 kernel/audit.c

   124	
   125	/* The identity of the user shutting down the audit system. */
   126	static kuid_t		audit_sig_uid = INVALID_UID;
   127	static pid_t		audit_sig_pid = -1;
 > 128	struct lsmblob		audit_sig_lsm;
   129	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 36851 bytes --]

^ permalink raw reply

* [PATCH] security: keys: trusted: Fix memory leaks on allocated blob
From: Colin King @ 2021-07-23 17:21 UTC (permalink / raw)
  To: James Bottomley, Jarkko Sakkinen, Mimi Zohar, David Howells,
	James Morris, Serge E . Hallyn, linux-integrity, keyrings,
	linux-security-module
  Cc: kernel-janitors, linux-kernel

From: Colin Ian King <colin.king@canonical.com>

There are several error return paths that don't kfree the allocated
blob, leading to memory leaks. Ensure blob is initialized to null as
some of the error return paths in function tpm2_key_decode do not
change blob. Add an error return path to kfree blob and use this on
the current leaky returns.

Addresses-Coverity: ("Resource leak")
Fixes: f2219745250f ("security: keys: trusted: use ASN.1 TPM2 key format for the blobs")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
---
 security/keys/trusted-keys/trusted_tpm2.c | 30 ++++++++++++++++-------
 1 file changed, 21 insertions(+), 9 deletions(-)

diff --git a/security/keys/trusted-keys/trusted_tpm2.c b/security/keys/trusted-keys/trusted_tpm2.c
index 0165da386289..930c67f98611 100644
--- a/security/keys/trusted-keys/trusted_tpm2.c
+++ b/security/keys/trusted-keys/trusted_tpm2.c
@@ -366,7 +366,7 @@ static int tpm2_load_cmd(struct tpm_chip *chip,
 	unsigned int private_len;
 	unsigned int public_len;
 	unsigned int blob_len;
-	u8 *blob, *pub;
+	u8 *blob = NULL, *pub;
 	int rc;
 	u32 attrs;
 
@@ -378,22 +378,30 @@ static int tpm2_load_cmd(struct tpm_chip *chip,
 	}
 
 	/* new format carries keyhandle but old format doesn't */
-	if (!options->keyhandle)
-		return -EINVAL;
+	if (!options->keyhandle) {
+		rc = -EINVAL;
+		goto err;
+	}
 
 	/* must be big enough for at least the two be16 size counts */
-	if (payload->blob_len < 4)
-		return -EINVAL;
+	if (payload->blob_len < 4) {
+		rc = -EINVAL;
+		goto err;
+	}
 
 	private_len = get_unaligned_be16(blob);
 
 	/* must be big enough for following public_len */
-	if (private_len + 2 + 2 > (payload->blob_len))
-		return -E2BIG;
+	if (private_len + 2 + 2 > (payload->blob_len)) {
+		rc = -E2BIG;
+		goto err;
+	}
 
 	public_len = get_unaligned_be16(blob + 2 + private_len);
-	if (private_len + 2 + public_len + 2 > payload->blob_len)
-		return -E2BIG;
+	if (private_len + 2 + public_len + 2 > payload->blob_len) {
+		rc = -E2BIG;
+		goto err;
+	}
 
 	pub = blob + 2 + private_len + 2;
 	/* key attributes are always at offset 4 */
@@ -441,6 +449,10 @@ static int tpm2_load_cmd(struct tpm_chip *chip,
 		rc = -EPERM;
 
 	return rc;
+
+err:
+	kfree(blob);
+	return rc;
 }
 
 /**
-- 
2.31.1


^ permalink raw reply related

* RE: [PATCH v4 1/3] ima: Introduce ima_get_current_hash_algo()
From: Roberto Sassu @ 2021-07-23 12:51 UTC (permalink / raw)
  To: Mimi Zohar, paul@paul-moore.com
  Cc: stephen.smalley.work@gmail.com, prsriva02@gmail.com,
	tusharsu@linux.microsoft.com, nramas@linux.microsoft.com,
	linux-integrity@vger.kernel.org,
	linux-security-module@vger.kernel.org,
	linux-kernel@vger.kernel.org, selinux@vger.kernel.org
In-Reply-To: <676e9af54eb252c26410788e6105c149c57b2c15.camel@linux.ibm.com>

> From: Mimi Zohar [mailto:zohar@linux.ibm.com]
> Sent: Friday, July 23, 2021 2:49 PM
> On Fri, 2021-07-23 at 10:53 +0200, Roberto Sassu wrote:
> > Buffer measurements, unlike file measurements, are not accessible after the
> > measurement is done, as buffers are not suitable for use with the
> > integrity_iint_cache structure (there is no index, for files it is the
> > inode number). In the subsequent patches, the measurement (digest) will be
> > returned directly by the functions that perform the buffer measurement,
> > ima_measure_critical_data() and process_buffer_measurement().
> >
> > A caller of those functions also needs to know the algorithm used to
> > calculate the digest. Instead of adding the algorithm as a new parameter to
> > the functions, this patch provides it separately with the new function
> > ima_get_current_hash_algo().
> >
> > Since the hash algorithm does not change after the IMA setup phase, there
> > is no risk of races (obtaining a digest calculated with a different
> > algorithm than the one returned).
> 
> Perfect explaination for annotating ima_hash_algo like:
> 
> int __ro_after_init ima_hash_algo = HASH_ALGO_SHA1;
> 
> Assuming you don't object, I'll include this change in this patch.

Sure, thanks.

Roberto

HUAWEI TECHNOLOGIES Duesseldorf GmbH, HRB 56063
Managing Director: Li Peng, Li Jian, Shi Yanli

> thanks,
> 
> Mimi
> 
> >
> > Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
> > Reviewed-by: Lakshmi Ramasubramanian <nramas@linux.microsoft.com>


^ permalink raw reply

* Re: [PATCH v4 1/3] ima: Introduce ima_get_current_hash_algo()
From: Mimi Zohar @ 2021-07-23 12:49 UTC (permalink / raw)
  To: Roberto Sassu, paul
  Cc: stephen.smalley.work, prsriva02, tusharsu, nramas,
	linux-integrity, linux-security-module, linux-kernel, selinux
In-Reply-To: <20210723085304.1760138-2-roberto.sassu@huawei.com>

On Fri, 2021-07-23 at 10:53 +0200, Roberto Sassu wrote:
> Buffer measurements, unlike file measurements, are not accessible after the
> measurement is done, as buffers are not suitable for use with the
> integrity_iint_cache structure (there is no index, for files it is the
> inode number). In the subsequent patches, the measurement (digest) will be
> returned directly by the functions that perform the buffer measurement,
> ima_measure_critical_data() and process_buffer_measurement().
> 
> A caller of those functions also needs to know the algorithm used to
> calculate the digest. Instead of adding the algorithm as a new parameter to
> the functions, this patch provides it separately with the new function
> ima_get_current_hash_algo().
> 
> Since the hash algorithm does not change after the IMA setup phase, there
> is no risk of races (obtaining a digest calculated with a different
> algorithm than the one returned).

Perfect explaination for annotating ima_hash_algo like:

int __ro_after_init ima_hash_algo = HASH_ALGO_SHA1;

Assuming you don't object, I'll include this change in this patch.

thanks,

Mimi

> 
> Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
> Reviewed-by: Lakshmi Ramasubramanian <nramas@linux.microsoft.com>


^ permalink raw reply

* [PATCH v4 2/3] ima: Return int in the functions to measure a buffer
From: Roberto Sassu @ 2021-07-23  8:53 UTC (permalink / raw)
  To: zohar, paul
  Cc: stephen.smalley.work, prsriva02, tusharsu, nramas,
	linux-integrity, linux-security-module, linux-kernel, selinux,
	Roberto Sassu
In-Reply-To: <20210723085304.1760138-1-roberto.sassu@huawei.com>

ima_measure_critical_data() and process_buffer_measurement() currently
don't return a result as, unlike appraisal-related functions, the result is
not used by callers to deny an operation. Measurement-related functions
instead rely on the audit subsystem to notify the system administrator when
an error occurs.

However, ima_measure_critical_data() and process_buffer_measurement() are a
special case, as these are the only functions that can return a buffer
measurement (for files, there is ima_file_hash()). In a subsequent patch,
they will be modified to return the calculated digest.

In preparation to return the result of the digest calculation, this patch
modifies the return type from void to int, and returns 0 if the buffer has
been successfully measured, a negative value otherwise.

Given that the result of the measurement is still not necessary, this patch
does not modify the behavior of existing callers by processing the returned
value. For those, the return value is ignored.

Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Reviewed-by: Lakshmi Ramasubramanian <nramas@linux.microsoft.com>
Acked-by: Paul Moore <paul@paul-moore.com> (for the SELinux bits)
---
 include/linux/ima.h               | 15 +++++++-----
 security/integrity/ima/ima.h      | 10 ++++----
 security/integrity/ima/ima_main.c | 40 ++++++++++++++++++-------------
 3 files changed, 37 insertions(+), 28 deletions(-)

diff --git a/include/linux/ima.h b/include/linux/ima.h
index 81e830d01ced..60492263aa64 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -35,10 +35,10 @@ extern void ima_post_path_mknod(struct user_namespace *mnt_userns,
 extern int ima_file_hash(struct file *file, char *buf, size_t buf_size);
 extern int ima_inode_hash(struct inode *inode, char *buf, size_t buf_size);
 extern void ima_kexec_cmdline(int kernel_fd, const void *buf, int size);
-extern void ima_measure_critical_data(const char *event_label,
-				      const char *event_name,
-				      const void *buf, size_t buf_len,
-				      bool hash);
+extern int ima_measure_critical_data(const char *event_label,
+				     const char *event_name,
+				     const void *buf, size_t buf_len,
+				     bool hash);
 
 #ifdef CONFIG_IMA_APPRAISE_BOOTPARAM
 extern void ima_appraise_parse_cmdline(void);
@@ -144,10 +144,13 @@ static inline int ima_inode_hash(struct inode *inode, char *buf, size_t buf_size
 
 static inline void ima_kexec_cmdline(int kernel_fd, const void *buf, int size) {}
 
-static inline void ima_measure_critical_data(const char *event_label,
+static inline int ima_measure_critical_data(const char *event_label,
 					     const char *event_name,
 					     const void *buf, size_t buf_len,
-					     bool hash) {}
+					     bool hash)
+{
+	return -ENOENT;
+}
 
 #endif /* CONFIG_IMA */
 
diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
index f0e448ed1f9f..03db221324c3 100644
--- a/security/integrity/ima/ima.h
+++ b/security/integrity/ima/ima.h
@@ -264,11 +264,11 @@ void ima_store_measurement(struct integrity_iint_cache *iint, struct file *file,
 			   struct evm_ima_xattr_data *xattr_value,
 			   int xattr_len, const struct modsig *modsig, int pcr,
 			   struct ima_template_desc *template_desc);
-void process_buffer_measurement(struct user_namespace *mnt_userns,
-				struct inode *inode, const void *buf, int size,
-				const char *eventname, enum ima_hooks func,
-				int pcr, const char *func_data,
-				bool buf_hash);
+int process_buffer_measurement(struct user_namespace *mnt_userns,
+			       struct inode *inode, const void *buf, int size,
+			       const char *eventname, enum ima_hooks func,
+			       int pcr, const char *func_data,
+			       bool buf_hash);
 void ima_audit_measurement(struct integrity_iint_cache *iint,
 			   const unsigned char *filename);
 int ima_alloc_init_template(struct ima_event_data *event_data,
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index 8ef1fa357e0c..b512c06d8ee1 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -827,7 +827,7 @@ int ima_post_load_data(char *buf, loff_t size,
 	return 0;
 }
 
-/*
+/**
  * process_buffer_measurement - Measure the buffer or the buffer data hash
  * @mnt_userns:	user namespace of the mount the inode was found from
  * @inode: inode associated with the object being measured (NULL for KEY_CHECK)
@@ -840,12 +840,15 @@ int ima_post_load_data(char *buf, loff_t size,
  * @buf_hash: measure buffer data hash
  *
  * Based on policy, either the buffer data or buffer data hash is measured
+ *
+ * Return: 0 if the buffer has been successfully measured, a negative value
+ * otherwise.
  */
-void process_buffer_measurement(struct user_namespace *mnt_userns,
-				struct inode *inode, const void *buf, int size,
-				const char *eventname, enum ima_hooks func,
-				int pcr, const char *func_data,
-				bool buf_hash)
+int process_buffer_measurement(struct user_namespace *mnt_userns,
+			       struct inode *inode, const void *buf, int size,
+			       const char *eventname, enum ima_hooks func,
+			       int pcr, const char *func_data,
+			       bool buf_hash)
 {
 	int ret = 0;
 	const char *audit_cause = "ENOMEM";
@@ -867,7 +870,7 @@ void process_buffer_measurement(struct user_namespace *mnt_userns,
 	u32 secid;
 
 	if (!ima_policy_flag)
-		return;
+		return -ENOENT;
 
 	template = ima_template_desc_buf();
 	if (!template) {
@@ -889,7 +892,7 @@ void process_buffer_measurement(struct user_namespace *mnt_userns,
 					secid, 0, func, &pcr, &template,
 					func_data);
 		if (!(action & IMA_MEASURE))
-			return;
+			return -ENOENT;
 	}
 
 	if (!pcr)
@@ -937,7 +940,7 @@ void process_buffer_measurement(struct user_namespace *mnt_userns,
 					func_measure_str(func),
 					audit_cause, ret, 0, ret);
 
-	return;
+	return ret;
 }
 
 /**
@@ -977,18 +980,21 @@ void ima_kexec_cmdline(int kernel_fd, const void *buf, int size)
  * and extend the pcr.  Examples of critical data could be various data
  * structures, policies, and states stored in kernel memory that can
  * impact the integrity of the system.
+ *
+ * Return: 0 if the buffer has been successfully measured, a negative value
+ * otherwise.
  */
-void ima_measure_critical_data(const char *event_label,
-			       const char *event_name,
-			       const void *buf, size_t buf_len,
-			       bool hash)
+int ima_measure_critical_data(const char *event_label,
+			      const char *event_name,
+			      const void *buf, size_t buf_len,
+			      bool hash)
 {
 	if (!event_name || !event_label || !buf || !buf_len)
-		return;
+		return -ENOPARAM;
 
-	process_buffer_measurement(&init_user_ns, NULL, buf, buf_len, event_name,
-				   CRITICAL_DATA, 0, event_label,
-				   hash);
+	return process_buffer_measurement(&init_user_ns, NULL, buf, buf_len,
+					  event_name, CRITICAL_DATA, 0,
+					  event_label, hash);
 }
 
 static int __init init_ima(void)
-- 
2.25.1


^ permalink raw reply related

* [PATCH v4 3/3] ima: Add digest and digest_len params to the functions to measure a buffer
From: Roberto Sassu @ 2021-07-23  8:53 UTC (permalink / raw)
  To: zohar, paul
  Cc: stephen.smalley.work, prsriva02, tusharsu, nramas,
	linux-integrity, linux-security-module, linux-kernel, selinux,
	Roberto Sassu
In-Reply-To: <20210723085304.1760138-1-roberto.sassu@huawei.com>

This patch performs the final modification necessary to pass the buffer
measurement to callers, so that they provide a functionality similar to
ima_file_hash(). It adds the 'digest' and 'digest_len' parameters to
ima_measure_critical_data() and process_buffer_measurement().

These functions calculate the digest even if there is no suitable rule in
the IMA policy and, in this case, they simply return 1 before generating a
new measurement entry.

Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Reviewed-by: Lakshmi Ramasubramanian <nramas@linux.microsoft.com>
---
 include/linux/ima.h                          |  5 +--
 security/integrity/ima/ima.h                 |  2 +-
 security/integrity/ima/ima_appraise.c        |  2 +-
 security/integrity/ima/ima_asymmetric_keys.c |  2 +-
 security/integrity/ima/ima_init.c            |  3 +-
 security/integrity/ima/ima_main.c            | 36 ++++++++++++++------
 security/integrity/ima/ima_queue_keys.c      |  2 +-
 security/selinux/ima.c                       |  6 ++--
 8 files changed, 39 insertions(+), 19 deletions(-)

diff --git a/include/linux/ima.h b/include/linux/ima.h
index 60492263aa64..b6ab66a546ae 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -38,7 +38,7 @@ extern void ima_kexec_cmdline(int kernel_fd, const void *buf, int size);
 extern int ima_measure_critical_data(const char *event_label,
 				     const char *event_name,
 				     const void *buf, size_t buf_len,
-				     bool hash);
+				     bool hash, u8 *digest, size_t digest_len);
 
 #ifdef CONFIG_IMA_APPRAISE_BOOTPARAM
 extern void ima_appraise_parse_cmdline(void);
@@ -147,7 +147,8 @@ static inline void ima_kexec_cmdline(int kernel_fd, const void *buf, int size) {
 static inline int ima_measure_critical_data(const char *event_label,
 					     const char *event_name,
 					     const void *buf, size_t buf_len,
-					     bool hash)
+					     bool hash, u8 *digest,
+					     size_t digest_len)
 {
 	return -ENOENT;
 }
diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
index 03db221324c3..2f4c20b16ad7 100644
--- a/security/integrity/ima/ima.h
+++ b/security/integrity/ima/ima.h
@@ -268,7 +268,7 @@ int process_buffer_measurement(struct user_namespace *mnt_userns,
 			       struct inode *inode, const void *buf, int size,
 			       const char *eventname, enum ima_hooks func,
 			       int pcr, const char *func_data,
-			       bool buf_hash);
+			       bool buf_hash, u8 *digest, size_t digest_len);
 void ima_audit_measurement(struct integrity_iint_cache *iint,
 			   const unsigned char *filename);
 int ima_alloc_init_template(struct ima_event_data *event_data,
diff --git a/security/integrity/ima/ima_appraise.c b/security/integrity/ima/ima_appraise.c
index ef9dcfce45d4..63bec42c353f 100644
--- a/security/integrity/ima/ima_appraise.c
+++ b/security/integrity/ima/ima_appraise.c
@@ -357,7 +357,7 @@ int ima_check_blacklist(struct integrity_iint_cache *iint,
 		if ((rc == -EPERM) && (iint->flags & IMA_MEASURE))
 			process_buffer_measurement(&init_user_ns, NULL, digest, digestsize,
 						   "blacklisted-hash", NONE,
-						   pcr, NULL, false);
+						   pcr, NULL, false, NULL, 0);
 	}
 
 	return rc;
diff --git a/security/integrity/ima/ima_asymmetric_keys.c b/security/integrity/ima/ima_asymmetric_keys.c
index c985418698a4..f6aa0b47a772 100644
--- a/security/integrity/ima/ima_asymmetric_keys.c
+++ b/security/integrity/ima/ima_asymmetric_keys.c
@@ -62,5 +62,5 @@ void ima_post_key_create_or_update(struct key *keyring, struct key *key,
 	 */
 	process_buffer_measurement(&init_user_ns, NULL, payload, payload_len,
 				   keyring->description, KEY_CHECK, 0,
-				   keyring->description, false);
+				   keyring->description, false, NULL, 0);
 }
diff --git a/security/integrity/ima/ima_init.c b/security/integrity/ima/ima_init.c
index 5076a7d9d23e..b26fa67476b4 100644
--- a/security/integrity/ima/ima_init.c
+++ b/security/integrity/ima/ima_init.c
@@ -154,7 +154,8 @@ int __init ima_init(void)
 	ima_init_key_queue();
 
 	ima_measure_critical_data("kernel_info", "kernel_version",
-				  UTS_RELEASE, strlen(UTS_RELEASE), false);
+				  UTS_RELEASE, strlen(UTS_RELEASE), false,
+				  NULL, 0);
 
 	return rc;
 }
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index b512c06d8ee1..360266da5a10 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -838,17 +838,20 @@ int ima_post_load_data(char *buf, loff_t size,
  * @pcr: pcr to extend the measurement
  * @func_data: func specific data, may be NULL
  * @buf_hash: measure buffer data hash
+ * @digest: buffer digest will be written to
+ * @digest_len: buffer length
  *
  * Based on policy, either the buffer data or buffer data hash is measured
  *
- * Return: 0 if the buffer has been successfully measured, a negative value
- * otherwise.
+ * Return: 0 if the buffer has been successfully measured, 1 if the digest
+ * has been written to the passed location but not added to a measurement entry,
+ * a negative value otherwise.
  */
 int process_buffer_measurement(struct user_namespace *mnt_userns,
 			       struct inode *inode, const void *buf, int size,
 			       const char *eventname, enum ima_hooks func,
 			       int pcr, const char *func_data,
-			       bool buf_hash)
+			       bool buf_hash, u8 *digest, size_t digest_len)
 {
 	int ret = 0;
 	const char *audit_cause = "ENOMEM";
@@ -869,7 +872,10 @@ int process_buffer_measurement(struct user_namespace *mnt_userns,
 	int action = 0;
 	u32 secid;
 
-	if (!ima_policy_flag)
+	if (digest && digest_len < digest_hash_len)
+		return -EINVAL;
+
+	if (!ima_policy_flag && !digest)
 		return -ENOENT;
 
 	template = ima_template_desc_buf();
@@ -891,7 +897,7 @@ int process_buffer_measurement(struct user_namespace *mnt_userns,
 		action = ima_get_action(mnt_userns, inode, current_cred(),
 					secid, 0, func, &pcr, &template,
 					func_data);
-		if (!(action & IMA_MEASURE))
+		if (!(action & IMA_MEASURE) && !digest)
 			return -ENOENT;
 	}
 
@@ -922,6 +928,12 @@ int process_buffer_measurement(struct user_namespace *mnt_userns,
 		event_data.buf_len = digest_hash_len;
 	}
 
+	if (digest)
+		memcpy(digest, iint.ima_hash->digest, digest_hash_len);
+
+	if (!ima_policy_flag || (func && !(action & IMA_MEASURE)))
+		return 1;
+
 	ret = ima_alloc_init_template(&event_data, &entry, template);
 	if (ret < 0) {
 		audit_cause = "alloc_entry";
@@ -964,7 +976,7 @@ void ima_kexec_cmdline(int kernel_fd, const void *buf, int size)
 
 	process_buffer_measurement(file_mnt_user_ns(f.file), file_inode(f.file),
 				   buf, size, "kexec-cmdline", KEXEC_CMDLINE, 0,
-				   NULL, false);
+				   NULL, false, NULL, 0);
 	fdput(f);
 }
 
@@ -975,26 +987,30 @@ void ima_kexec_cmdline(int kernel_fd, const void *buf, int size)
  * @buf: pointer to buffer data
  * @buf_len: length of buffer data (in bytes)
  * @hash: measure buffer data hash
+ * @digest: buffer digest will be written to
+ * @digest_len: buffer length
  *
  * Measure data critical to the integrity of the kernel into the IMA log
  * and extend the pcr.  Examples of critical data could be various data
  * structures, policies, and states stored in kernel memory that can
  * impact the integrity of the system.
  *
- * Return: 0 if the buffer has been successfully measured, a negative value
- * otherwise.
+ * Return: 0 if the buffer has been successfully measured, 1 if the digest
+ * has been written to the passed location but not added to a measurement entry,
+ * a negative value otherwise.
  */
 int ima_measure_critical_data(const char *event_label,
 			      const char *event_name,
 			      const void *buf, size_t buf_len,
-			      bool hash)
+			      bool hash, u8 *digest, size_t digest_len)
 {
 	if (!event_name || !event_label || !buf || !buf_len)
 		return -ENOPARAM;
 
 	return process_buffer_measurement(&init_user_ns, NULL, buf, buf_len,
 					  event_name, CRITICAL_DATA, 0,
-					  event_label, hash);
+					  event_label, hash, digest,
+					  digest_len);
 }
 
 static int __init init_ima(void)
diff --git a/security/integrity/ima/ima_queue_keys.c b/security/integrity/ima/ima_queue_keys.c
index 979ef6c71f3d..93056c03bf5a 100644
--- a/security/integrity/ima/ima_queue_keys.c
+++ b/security/integrity/ima/ima_queue_keys.c
@@ -165,7 +165,7 @@ void ima_process_queued_keys(void)
 						   entry->keyring_name,
 						   KEY_CHECK, 0,
 						   entry->keyring_name,
-						   false);
+						   false, NULL, 0);
 		list_del(&entry->list);
 		ima_free_key_entry(entry);
 	}
diff --git a/security/selinux/ima.c b/security/selinux/ima.c
index 34d421861bfc..727c4e43219d 100644
--- a/security/selinux/ima.c
+++ b/security/selinux/ima.c
@@ -86,7 +86,8 @@ void selinux_ima_measure_state_locked(struct selinux_state *state)
 	}
 
 	ima_measure_critical_data("selinux", "selinux-state",
-				  state_str, strlen(state_str), false);
+				  state_str, strlen(state_str), false,
+				  NULL, 0);
 
 	kfree(state_str);
 
@@ -103,7 +104,8 @@ void selinux_ima_measure_state_locked(struct selinux_state *state)
 	}
 
 	ima_measure_critical_data("selinux", "selinux-policy-hash",
-				  policy, policy_len, true);
+				  policy, policy_len, true,
+				  NULL, 0);
 
 	vfree(policy);
 }
-- 
2.25.1


^ permalink raw reply related

* [PATCH v4 1/3] ima: Introduce ima_get_current_hash_algo()
From: Roberto Sassu @ 2021-07-23  8:53 UTC (permalink / raw)
  To: zohar, paul
  Cc: stephen.smalley.work, prsriva02, tusharsu, nramas,
	linux-integrity, linux-security-module, linux-kernel, selinux,
	Roberto Sassu
In-Reply-To: <20210723085304.1760138-1-roberto.sassu@huawei.com>

Buffer measurements, unlike file measurements, are not accessible after the
measurement is done, as buffers are not suitable for use with the
integrity_iint_cache structure (there is no index, for files it is the
inode number). In the subsequent patches, the measurement (digest) will be
returned directly by the functions that perform the buffer measurement,
ima_measure_critical_data() and process_buffer_measurement().

A caller of those functions also needs to know the algorithm used to
calculate the digest. Instead of adding the algorithm as a new parameter to
the functions, this patch provides it separately with the new function
ima_get_current_hash_algo().

Since the hash algorithm does not change after the IMA setup phase, there
is no risk of races (obtaining a digest calculated with a different
algorithm than the one returned).

Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Reviewed-by: Lakshmi Ramasubramanian <nramas@linux.microsoft.com>
---
 include/linux/ima.h               | 7 +++++++
 security/integrity/ima/ima_main.c | 5 +++++
 2 files changed, 12 insertions(+)

diff --git a/include/linux/ima.h b/include/linux/ima.h
index 61d5723ec303..81e830d01ced 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -11,9 +11,11 @@
 #include <linux/fs.h>
 #include <linux/security.h>
 #include <linux/kexec.h>
+#include <crypto/hash_info.h>
 struct linux_binprm;
 
 #ifdef CONFIG_IMA
+extern enum hash_algo ima_get_current_hash_algo(void);
 extern int ima_bprm_check(struct linux_binprm *bprm);
 extern int ima_file_check(struct file *file, int mask);
 extern void ima_post_create_tmpfile(struct user_namespace *mnt_userns,
@@ -64,6 +66,11 @@ static inline const char * const *arch_get_ima_policy(void)
 #endif
 
 #else
+static inline enum hash_algo ima_get_current_hash_algo(void)
+{
+	return HASH_ALGO__LAST;
+}
+
 static inline int ima_bprm_check(struct linux_binprm *bprm)
 {
 	return 0;
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index 287b90509006..8ef1fa357e0c 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -76,6 +76,11 @@ static int __init hash_setup(char *str)
 }
 __setup("ima_hash=", hash_setup);
 
+enum hash_algo ima_get_current_hash_algo(void)
+{
+	return ima_hash_algo;
+}
+
 /* Prevent mmap'ing a file execute that is already mmap'ed write */
 static int mmap_violation_check(enum ima_hooks func, struct file *file,
 				char **pathbuf, const char **pathname,
-- 
2.25.1


^ permalink raw reply related

* [PATCH v4 0/3] ima: Provide more info about buffer measurement
From: Roberto Sassu @ 2021-07-23  8:53 UTC (permalink / raw)
  To: zohar, paul
  Cc: stephen.smalley.work, prsriva02, tusharsu, nramas,
	linux-integrity, linux-security-module, linux-kernel, selinux,
	Roberto Sassu

This patch set provides more information about buffer measurement. It
requires the modification of the existing functions
ima_measure_critical_data() and process_buffer_measurement() as, unlike
for files, there is no integrity_iint_cache structure that can be used to
store and retrieve at a later time the buffer measurement (there is no
index, for files it is the inode number).

First, this patch set introduces the new function
ima_get_current_hash_algo(), to obtain the algorithm used to calculate the
buffer digest (patch 1).

Second, it changes the type of return value of ima_measure_critical_data()
and process_buffer_measurement() from void to int, to signal to the callers
whether or not the buffer has been measured, or just the digest has been
calculated and written to the supplied location (patch 2).

Lastly, it adds two new parameters to the functions above ('digest' and
'digest_len'), so that those functions can write the buffer digest to the
location supplied by the callers (patch 3).

This patch set replaces the patch 'ima: Add digest, algo, measured
parameters to ima_measure_critical_data()' in:

https://lore.kernel.org/linux-integrity/20210625165614.2284243-1-roberto.sassu@huawei.com/

Changelog

v3:
- explain better the motivation for the patches (suggested by Mimi)

v2:
- remove assignments of ima_measure_critical_data() and
  process_buffer_measurement() return values (suggested by Lakshmi)

v1:
- add digest_len parameter to ima_measure_critical_data() and
  process_buffer_measurement() (suggested by Lakshmi)
- fix doc formatting issues

Huawei Digest Lists patch set:
- introduce ima_get_current_hash_algo() (suggested by Mimi)
- remove algo and measured parameters from ima_measure_critical_data() and
  process_buffer_measurement() (suggested by Mimi)
- return an integer from ima_measure_critical_data() and
  process_buffer_measurement() (suggested by Mimi)
- correctly check when process_buffer_measurement() should return earlier

Roberto Sassu (3):
  ima: Introduce ima_get_current_hash_algo()
  ima: Return int in the functions to measure a buffer
  ima: Add digest and digest_len params to the functions to measure a
    buffer

 include/linux/ima.h                          | 23 +++++--
 security/integrity/ima/ima.h                 | 10 +--
 security/integrity/ima/ima_appraise.c        |  2 +-
 security/integrity/ima/ima_asymmetric_keys.c |  2 +-
 security/integrity/ima/ima_init.c            |  3 +-
 security/integrity/ima/ima_main.c            | 67 ++++++++++++++------
 security/integrity/ima/ima_queue_keys.c      |  2 +-
 security/selinux/ima.c                       |  6 +-
 8 files changed, 78 insertions(+), 37 deletions(-)

-- 
2.25.1


^ permalink raw reply

* Re: [RFC PATCH] userfaultfd: open userfaultfds with O_RDONLY
From: Ondrej Mosnacek @ 2021-07-23  8:39 UTC (permalink / raw)
  To: Alexander Viro
  Cc: Andrea Arcangeli, Lokesh Gidra, Linux FS Devel,
	Linux Security Module list, SElinux list,
	Linux kernel mailing list, Robert O'Callahan
In-Reply-To: <20210624152515.1844133-1-omosnace@redhat.com>

On Thu, Jun 24, 2021 at 5:25 PM Ondrej Mosnacek <omosnace@redhat.com> wrote:
>
> Since userfaultfd doesn't implement a write operation, it is more
> appropriate to open it read-only.
>
> When userfaultfds are opened read-write like it is now, and such fd is
> passed from one process to another, SELinux will check both read and
> write permissions for the target process, even though it can't actually
> do any write operation on the fd later.
>
> Inspired by the following bug report, which has hit the SELinux scenario
> described above:
> https://bugzilla.redhat.com/show_bug.cgi?id=1974559
>
> Reported-by: Robert O'Callahan <roc@ocallahan.org>
> Fixes: 86039bd3b4e6 ("userfaultfd: add new syscall to provide memory externalization")
> Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
> ---
>
> I marked this as RFC, because I'm not sure if this has any unwanted side
> effects. I only ran this patch through selinux-testsuite, which has a
> simple userfaultfd subtest, and a reproducer from the Bugzilla report.
>
> Please tell me whether this makes sense and/or if it passes any
> userfaultfd tests you guys might have.
>
>  fs/userfaultfd.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
> index 14f92285d04f..24e14c36068f 100644
> --- a/fs/userfaultfd.c
> +++ b/fs/userfaultfd.c
> @@ -986,7 +986,7 @@ static int resolve_userfault_fork(struct userfaultfd_ctx *new,
>         int fd;
>
>         fd = anon_inode_getfd_secure("[userfaultfd]", &userfaultfd_fops, new,
> -                       O_RDWR | (new->flags & UFFD_SHARED_FCNTL_FLAGS), inode);
> +                       O_RDONLY | (new->flags & UFFD_SHARED_FCNTL_FLAGS), inode);
>         if (fd < 0)
>                 return fd;
>
> @@ -2088,7 +2088,7 @@ SYSCALL_DEFINE1(userfaultfd, int, flags)
>         mmgrab(ctx->mm);
>
>         fd = anon_inode_getfd_secure("[userfaultfd]", &userfaultfd_fops, ctx,
> -                       O_RDWR | (flags & UFFD_SHARED_FCNTL_FLAGS), NULL);
> +                       O_RDONLY | (flags & UFFD_SHARED_FCNTL_FLAGS), NULL);
>         if (fd < 0) {
>                 mmdrop(ctx->mm);
>                 kmem_cache_free(userfaultfd_ctx_cachep, ctx);
> --
> 2.31.1

Ping? Any comments on this patch?

-- 
Ondrej Mosnacek
Software Engineer, Linux Security - SELinux kernel
Red Hat, Inc.


^ permalink raw reply

* Re: AVC denied for docker while trying to set labels for tmpfs mounts
From: Sujithra P @ 2021-07-23  6:06 UTC (permalink / raw)
  To: dwalsh; +Cc: Paul Moore, linux-security-module, selinux
In-Reply-To: <c22dcebb-e766-d1e2-1e15-15a85e2124bb@redhat.com>

Thanks Daniel.

I'm not seeing anything suspicious in the audit logs, the. following
are the MAC_POLICY_LOAD events

type=SYSCALL msg=audit(1622991228.547:183): arch=c000003e syscall=1
success=yes exit=8577048 a0=4 a1=7fd682c61000 a2=82e018 a3=0 items=0
ppid=62178 pid=62186 auid=1004 uid=0 gid=0 euid=0 suid=0 fsuid=0
egid=0 sgid=0 fsgid=0 tty=pts0 ses=1 comm="load_policy"
exe="/usr/sbin/load_policy"
subj=unconfined_u:unconfined_r:load_policy_t:s0-s0:c0.c1023 key=(null)

type=MAC_POLICY_LOAD msg=audit(1627002776.825:6075): auid=4294967295
ses=4294967295 lsm=selinux res=1
----
type=MAC_POLICY_LOAD msg=audit(1627008064.852:7615): auid=4294967295
ses=4294967295 lsm=selinux res=1
----
type=MAC_POLICY_LOAD msg=audit(1627008273.029:7617): auid=4294967295
ses=4294967295 lsm=selinux res=1
----
type=MAC_POLICY_LOAD msg=audit(1627009159.383:8392): auid=4294967295
ses=4294967295 lsm=selinux res=1
----

On Thu, Jul 22, 2021 at 2:38 AM Daniel Walsh <dwalsh@redhat.com> wrote:
>
> On 7/21/21 18:17, Sujithra P wrote:
> > Thanks Paul!
> >
> > Is there any specific centos/RH mailing list that I can ask? Not sure
> > whether it is a problem with kernel/docker/kubelet.
> > semodule -R seems to fix the problem, but not sure what is causing the
> > loaded policy to get corrupt.
> > Any insight on how to figure this out would be very much appreciated.
> >
> > Thanks
> > Sujithra.
> I am guessing that one of the containers is loading policy.  You should
> be able to see something in the auditlog, about a policy load.
> > On Wed, Jul 21, 2021 at 2:01 PM Paul Moore <paul@paul-moore.com> wrote:
> >> On Wed, Jul 21, 2021 at 2:46 PM Sujithra P <sujithrap@gmail.com> wrote:
> >>> Hi SELinux Experts,
> >>>
> >>> The following issue is described in the below post as well.
> >>> https://github.com/containers/container-selinux/issues/141
> >>>
> >>> Occasionally running into the following selinux denials for docker
> >>>
> >>> type=AVC msg=audit(1626732057.636:4583): avc:  denied  { associate }
> >>> for  pid=57450 comm="dockerd" name="/" dev="tmpfs" ino=150014
> >>> scontext=system_u:object_r:container_file_t:s0:c263,c914
> >>> tcontext=system_u:object_r:lib_t:s0 tclass=filesystem permissive=0
> >>>
> >>> type=AVC msg=audit(1626812823.170:9434): avc:  denied  { associate }
> >>> for  pid=20027 comm="dockerd" name="/" dev="tmpfs" ino=198147
> >>> scontext=system_u:object_r:container_file_t:s0:c578,c672
> >>> tcontext=system_u:object_r:locale_t:s0 tclass=filesystem permissive=0
> >>>
> >>>
> >>>   level=error msg="Handler for POST
> >>> /v1.40/containers/a3a875e7896384e3bff53b8317e91ed4301a13957f42187eb227f28e09bd877c/start
> >>> returned error: error setting label on mount source
> >>> '/var/lib/kubelet/pods/f7cee5b2-bcd9-4aa1-9d67-c75b677ba2a1/volumes/kubernetes.io~secret/secret':
> >>> failed to set file label on
> >>> /var/lib/kubelet/pods/f7cee5b2-bcd9-4aa1-9d67-c75b677ba2a1/volumes/kubernetes.io~secret/secret:
> >>> permission denied"
> >>>
> >>>
> >>> Docker is not able to set labels for these tmpfs mounts because they
> >>> end up having wrong labels when they are created (sometimes
> >>> "locale_t", sometimes "lib_t" which of course is not the
> >>> default/correct context for tmpfs fs).
> >>> Apparently semodule -R and deleting these tmps files or reboot of the
> >>> node fixes the problem.
> >>> Not sure what is causing the tmpfs mounts to get wrong labels in the
> >>> first place.
> >>>
> >>> Everything seems to be fine to begin with, but as the system keeps
> >>> scheduling pods on the node, this behavior is observed sometimes (not
> >>> consistent always).
> >>>
> >>>
> >>> OS Details:
> >>>
> >>> NAME="CentOS Linux"
> >>> VERSION="8 (Core)"
> >>> ID="centos"
> >>> ID_LIKE="rhel fedora"
> >>> VERSION_ID="8"
> >>> PLATFORM_ID="platform:el8"
> >>> PRETTY_NAME="CentOS Linux 8 (Core)"
> >>>
> >>> Docker Version:
> >>> Client: Docker Engine - Community
> >>> Version: 19.03.13
> >>> API version: 1.40
> >>> Go version: go1.13.15
> >>> Git commit: 4484c46d9d
> >>> Built: Wed Sep 16 17:02:36 2020
> >>> OS/Arch: linux/amd64
> >>> Experimental: false
> >>>
> >>> Kubernetes Version*
> >>> v1.20.8-gke.1500
> >>>
> >>>
> >>> Any help on how to debug this issue  would be greatly appreciated.
> >> This sounds like it might be a problem with CentOS and/or your Docker
> >> install, have you tried talking with the RH/CentOS folks about this
> >> problem?  We focus mostly on upstream issues here and it isn't clear
> >> to me at this moment that this is an upstream issue.
> >>
> >> --
> >> paul moore
> >> www.paul-moore.com
>
>

^ permalink raw reply

* Re: [PATCH RFC 0/9] sk_buff: optimize layout for GRO
From: Paul Moore @ 2021-07-22 18:41 UTC (permalink / raw)
  To: Paolo Abeni, Casey Schaufler
  Cc: netdev, David S. Miller, Jakub Kicinski, Florian Westphal,
	Eric Dumazet, linux-security-module, selinux
In-Reply-To: <d3fe6ae85b8fad9090288c553f8d248603758506.camel@redhat.com>

On Thu, Jul 22, 2021 at 12:59 PM Paolo Abeni <pabeni@redhat.com> wrote:
> On Thu, 2021-07-22 at 09:04 -0700, Casey Schaufler wrote:
> > On 7/22/2021 12:10 AM, Paolo Abeni wrote:
> > > On Wed, 2021-07-21 at 11:15 -0700, Casey Schaufler wrote:
> > > > On 7/21/2021 9:44 AM, Paolo Abeni wrote:
> > > > > This is a very early draft - in a different world would be
> > > > > replaced by hallway discussion at in-person conference - aimed at
> > > > > outlining some ideas and collect feedback on the overall outlook.
> > > > > There are still bugs to be fixed, more test and benchmark need, etc.
> > > > >
> > > > > There are 3 main goals:
> > > > > - [try to] avoid the overhead for uncommon conditions at GRO time
> > > > >   (patches 1-4)
> > > > > - enable backpressure for the veth GRO path (patches 5-6)
> > > > > - reduce the number of cacheline used by the sk_buff lifecycle
> > > > >   from 4 to 3, at least in some common scenarios (patches 1,7-9).
> > > > >   The idea here is avoid the initialization of some fields and
> > > > >   control their validity with a bitmask, as presented by at least
> > > > >   Florian and Jesper in the past.
> > > > If I understand correctly, you're creating an optimized case
> > > > which excludes ct, secmark, vlan and UDP tunnel. Is this correct,
> > > > and if so, why those particular fields? What impact will this have
> > > > in the non-optimal (with any of the excluded fields) case?
> > > Thank you for the feedback.
> >
> > You're most welcome. You did request comments.
> >
> > > There are 2 different relevant points:
> > >
> > > - the GRO stage.
> > >   packets carring any of CT, dst, sk or skb_ext will do 2 additional
> > > conditionals per gro_receive WRT the current code. My understanding is
> > > that having any of such field set at GRO receive time is quite
> > > exceptional for real nic. All others packet will do 4 or 5 less
> > > conditionals, and will traverse a little less code.
> > >
> > > - sk_buff lifecycle
> > >   * packets carrying vlan and UDP will not see any differences: sk_buff
> > > lifecycle will stil use 4 cachelines, as currently does, and no
> > > additional conditional is introduced.
> > >   * packets carring nfct or secmark will see an additional conditional
> > > every time such field is accessed. The number of cacheline used will
> > > still be 4, as in the current code. My understanding is that when such
> > > access happens, there is already a relevant amount of "additional" code
> > > to be executed, the conditional overhead should not be measurable.
> >
> > I'm responsible for some of that "additonal" code. If the secmark
> > is considered to be outside the performance critical data there are
> > changes I would like to make that will substantially improve the
> > performance of that "additional" code that would include a u64
> > secmark. If use of a secmark is considered indicative of a "slow"
> > path, the rationale for restricting it to u32, that it might impact
> > the "usual" case performance, seems specious. I can't say that I
> > understand all the nuances and implications involved. It does
> > appear that the changes you've suggested could negate the classic
> > argument that requires the u32 secmark.
>
> I see now I did not reply to one of you questions - why I picked-up
>  vlan, tunnel secmark fields to move them at sk_buff tail.
>
> Tow main drivers on my side:
> - there are use cases/deployments that do not use them.
> - moving them around was doable in term of required changes.
>
> There are no "slow-path" implications on my side. For example, vlan_*
> fields are very critical performance wise, if the traffic is tagged.
> But surely there are busy servers not using tagget traffic which will
> enjoy the reduced cachelines footprint, and this changeset will not
> impact negatively the first case.
>
> WRT to the vlan example, secmark and nfct require an extra conditional
> to fetch the data. My understanding is that such additional conditional
> is not measurable performance-wise when benchmarking the security
> modules (or conntrack) because they have to do much more intersting
> things after fetching a few bytes from an already hot cacheline.
>
> Not sure if the above somehow clarify my statements.
>
> As for expanding secmark to 64 bits, I guess that could be an
> interesting follow-up discussion :)

The intersection between netdev and the LSM has a long and somewhat
tortured past with each party making sacrifices along the way to get
where we are at today.  It is far from perfect, at least from a LSM
perspective, but it is what we've got and since performance is usually
used as a club to beat back any changes proposed by the LSM side, I
would like to object to these changes that negatively impact the LSM
performance without some concession in return.  It has been a while
since Casey and I have spoken about this, but I think the prefered
option would be to exchange the current __u32 "sk_buff.secmark" field
with a void* "sk_buff.security" field, like so many other kernel level
objects.  Previous objections have eventually boiled down to the
additional space in the sk_buff for the extra bits (there is some
additional editorializing that could be done here, but I'll refrain),
but based on the comments thus far in this thread it sounds like
perhaps we can now make a deal here: move the LSM field down to a
"colder" cacheline in exchange for converting the LSM field to a
proper pointer.

Thoughts?

-- 
paul moore
www.paul-moore.com

^ permalink raw reply

* Re: [PATCH v28 21/25] audit: support non-syscall auxiliary records
From: kernel test robot @ 2021-07-22 17:02 UTC (permalink / raw)
  To: Casey Schaufler, casey.schaufler, jmorris, linux-security-module,
	selinux
  Cc: kbuild-all, casey, linux-audit, keescook, john.johansen,
	penguin-kernel, paul
In-Reply-To: <20210722004758.12371-22-casey@schaufler-ca.com>

[-- Attachment #1: Type: text/plain, Size: 10113 bytes --]

Hi Casey,

I love your patch! Yet something to improve:

[auto build test ERROR on pcmoore-audit/next]
[also build test ERROR on nf/master linus/master v5.14-rc2]
[cannot apply to nf-next/master next-20210722]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Casey-Schaufler/LSM-Infrastructure-management-of-the-sock-security/20210722-094735
base:   https://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/audit.git next
config: openrisc-randconfig-r025-20210722 (attached as .config)
compiler: or1k-linux-gcc (GCC) 10.3.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/0day-ci/linux/commit/63ea5078624b9ff368f945d654ace5f79160fb6a
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Casey-Schaufler/LSM-Infrastructure-management-of-the-sock-security/20210722-094735
        git checkout 63ea5078624b9ff368f945d654ace5f79160fb6a
        # save the attached .config to linux build tree
        mkdir build_dir
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-10.3.0 make.cross O=build_dir ARCH=openrisc SHELL=/bin/bash

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   In file included from init/init_task.c:12:
>> include/linux/audit.h:557:1: error: expected identifier or '(' before '+' token
     557 | +static inline struct audit_context *audit_alloc_local(gfp_t gfpflags)
         | ^
--
   In file included from arch/openrisc/kernel/ptrace.c:23:
>> include/linux/audit.h:557:1: error: expected identifier or '(' before '+' token
     557 | +static inline struct audit_context *audit_alloc_local(gfp_t gfpflags)
         | ^
   arch/openrisc/kernel/ptrace.c:157:17: warning: no previous prototype for 'do_syscall_trace_enter' [-Wmissing-prototypes]
     157 | asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
         |                 ^~~~~~~~~~~~~~~~~~~~~~
   arch/openrisc/kernel/ptrace.c:176:17: warning: no previous prototype for 'do_syscall_trace_leave' [-Wmissing-prototypes]
     176 | asmlinkage void do_syscall_trace_leave(struct pt_regs *regs)
         |                 ^~~~~~~~~~~~~~~~~~~~~~
--
   In file included from kernel/fork.c:63:
>> include/linux/audit.h:557:1: error: expected identifier or '(' before '+' token
     557 | +static inline struct audit_context *audit_alloc_local(gfp_t gfpflags)
         | ^
   kernel/fork.c:162:13: warning: no previous prototype for 'arch_release_task_struct' [-Wmissing-prototypes]
     162 | void __weak arch_release_task_struct(struct task_struct *tsk)
         |             ^~~~~~~~~~~~~~~~~~~~~~~~
   kernel/fork.c:752:20: warning: no previous prototype for 'arch_task_cache_init' [-Wmissing-prototypes]
     752 | void __init __weak arch_task_cache_init(void) { }
         |                    ^~~~~~~~~~~~~~~~~~~~
   kernel/fork.c:847:12: warning: no previous prototype for 'arch_dup_task_struct' [-Wmissing-prototypes]
     847 | int __weak arch_dup_task_struct(struct task_struct *dst,
         |            ^~~~~~~~~~~~~~~~~~~~
--
   In file included from kernel/exit.c:49:
>> include/linux/audit.h:557:1: error: expected identifier or '(' before '+' token
     557 | +static inline struct audit_context *audit_alloc_local(gfp_t gfpflags)
         | ^
   kernel/exit.c:1810:13: warning: no previous prototype for 'abort' [-Wmissing-prototypes]
    1810 | __weak void abort(void)
         |             ^~~~~
--
   In file included from kernel/audit.c:51:
>> include/linux/audit.h:557:1: error: expected identifier or '(' before '+' token
     557 | +static inline struct audit_context *audit_alloc_local(gfp_t gfpflags)
         | ^
   kernel/audit.c:1781:14: warning: no previous prototype for 'audit_serial' [-Wmissing-prototypes]
    1781 | unsigned int audit_serial(void)
         |              ^~~~~~~~~~~~
   kernel/audit.c: In function 'audit_log_vformat':
   kernel/audit.c:1929:2: warning: function 'audit_log_vformat' might be a candidate for 'gnu_printf' format attribute [-Wsuggest-attribute=format]
    1929 |  len = vsnprintf(skb_tail_pointer(skb), avail, fmt, args);
         |  ^~~
   kernel/audit.c:1938:3: warning: function 'audit_log_vformat' might be a candidate for 'gnu_printf' format attribute [-Wsuggest-attribute=format]
    1938 |   len = vsnprintf(skb_tail_pointer(skb), avail, fmt, args2);
         |   ^~~
--
   In file included from fs/pipe.c:23:
>> include/linux/audit.h:557:1: error: expected identifier or '(' before '+' token
     557 | +static inline struct audit_context *audit_alloc_local(gfp_t gfpflags)
         | ^
   fs/pipe.c:741:15: warning: no previous prototype for 'account_pipe_buffers' [-Wmissing-prototypes]
     741 | unsigned long account_pipe_buffers(struct user_struct *user,
         |               ^~~~~~~~~~~~~~~~~~~~
   fs/pipe.c:747:6: warning: no previous prototype for 'too_many_pipe_buffers_soft' [-Wmissing-prototypes]
     747 | bool too_many_pipe_buffers_soft(unsigned long user_bufs)
         |      ^~~~~~~~~~~~~~~~~~~~~~~~~~
   fs/pipe.c:754:6: warning: no previous prototype for 'too_many_pipe_buffers_hard' [-Wmissing-prototypes]
     754 | bool too_many_pipe_buffers_hard(unsigned long user_bufs)
         |      ^~~~~~~~~~~~~~~~~~~~~~~~~~
   fs/pipe.c:761:6: warning: no previous prototype for 'pipe_is_unprivileged_user' [-Wmissing-prototypes]
     761 | bool pipe_is_unprivileged_user(void)
         |      ^~~~~~~~~~~~~~~~~~~~~~~~~
   fs/pipe.c:1231:5: warning: no previous prototype for 'pipe_resize_ring' [-Wmissing-prototypes]
    1231 | int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots)
         |     ^~~~~~~~~~~~~~~~
--
   In file included from include/linux/fsnotify.h:16,
                    from kernel/trace/trace.c:49:
>> include/linux/audit.h:557:1: error: expected identifier or '(' before '+' token
     557 | +static inline struct audit_context *audit_alloc_local(gfp_t gfpflags)
         | ^
   kernel/trace/trace.c: In function 'trace_check_vprintf':
   kernel/trace/trace.c:3815:3: warning: function 'trace_check_vprintf' might be a candidate for 'gnu_printf' format attribute [-Wsuggest-attribute=format]
    3815 |   trace_seq_vprintf(&iter->seq, iter->fmt, ap);
         |   ^~~~~~~~~~~~~~~~~
   kernel/trace/trace.c:3870:3: warning: function 'trace_check_vprintf' might be a candidate for 'gnu_printf' format attribute [-Wsuggest-attribute=format]
    3870 |   trace_seq_vprintf(&iter->seq, p, ap);
         |   ^~~~~~~~~~~~~~~~~
   At top level:
   kernel/trace/trace.c:1692:37: warning: 'tracing_max_lat_fops' defined but not used [-Wunused-const-variable=]
    1692 | static const struct file_operations tracing_max_lat_fops;
         |                                     ^~~~~~~~~~~~~~~~~~~~
--
   In file included from net/socket.c:81:
>> include/linux/audit.h:557:1: error: expected identifier or '(' before '+' token
     557 | +static inline struct audit_context *audit_alloc_local(gfp_t gfpflags)
         | ^
   net/socket.c: In function '__sys_getsockopt':
   net/socket.c:2178:6: warning: variable 'max_optlen' set but not used [-Wunused-but-set-variable]
    2178 |  int max_optlen;
         |      ^~~~~~~~~~


vim +557 include/linux/audit.h

   553	
   554	extern int audit_n_rules;
   555	extern int audit_signals;
   556	#else /* CONFIG_AUDITSYSCALL */
 > 557	+static inline struct audit_context *audit_alloc_local(gfp_t gfpflags)
   558	{
   559		return NULL;
   560	}
   561	static inline void audit_free_context(struct audit_context *context)
   562	{ }
   563	static inline int audit_alloc(struct task_struct *task)
   564	{
   565		return 0;
   566	}
   567	static inline void audit_free(struct task_struct *task)
   568	{ }
   569	static inline void audit_syscall_entry(int major, unsigned long a0,
   570					       unsigned long a1, unsigned long a2,
   571					       unsigned long a3)
   572	{ }
   573	static inline void audit_syscall_exit(void *pt_regs)
   574	{ }
   575	static inline bool audit_dummy_context(void)
   576	{
   577		return true;
   578	}
   579	static inline void audit_set_context(struct task_struct *task, struct audit_context *ctx)
   580	{ }
   581	static inline struct audit_context *audit_context(void)
   582	{
   583		return NULL;
   584	}
   585	static inline struct filename *audit_reusename(const __user char *name)
   586	{
   587		return NULL;
   588	}
   589	static inline void audit_getname(struct filename *name)
   590	{ }
   591	static inline void audit_inode(struct filename *name,
   592					const struct dentry *dentry,
   593					unsigned int aflags)
   594	{ }
   595	static inline void audit_file(struct file *file)
   596	{
   597	}
   598	static inline void audit_inode_parent_hidden(struct filename *name,
   599					const struct dentry *dentry)
   600	{ }
   601	static inline void audit_inode_child(struct inode *parent,
   602					     const struct dentry *dentry,
   603					     const unsigned char type)
   604	{ }
   605	static inline void audit_core_dumps(long signr)
   606	{ }
   607	static inline void audit_seccomp(unsigned long syscall, long signr, int code)
   608	{ }
   609	static inline void audit_seccomp_actions_logged(const char *names,
   610							const char *old_names, int res)
   611	{ }
   612	static inline void audit_ipc_obj(struct kern_ipc_perm *ipcp)
   613	{ }
   614	static inline void audit_ipc_set_perm(unsigned long qbytes, uid_t uid,
   615						gid_t gid, umode_t mode)
   616	{ }
   617	static inline void audit_bprm(struct linux_binprm *bprm)
   618	{ }
   619	static inline int audit_socketcall(int nargs, unsigned long *args)
   620	{
   621		return 0;
   622	}
   623	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 34024 bytes --]

^ permalink raw reply

* Re: [PATCH RFC 0/9] sk_buff: optimize layout for GRO
From: Paolo Abeni @ 2021-07-22 16:57 UTC (permalink / raw)
  To: Casey Schaufler, netdev
  Cc: David S. Miller, Jakub Kicinski, Florian Westphal, Eric Dumazet,
	linux-security-module, selinux
In-Reply-To: <2e9e57f0-98f9-b64d-fd82-aecef84835c5@schaufler-ca.com>

On Thu, 2021-07-22 at 09:04 -0700, Casey Schaufler wrote:
> On 7/22/2021 12:10 AM, Paolo Abeni wrote:
> > On Wed, 2021-07-21 at 11:15 -0700, Casey Schaufler wrote:
> > > On 7/21/2021 9:44 AM, Paolo Abeni wrote:
> > > > This is a very early draft - in a different world would be
> > > > replaced by hallway discussion at in-person conference - aimed at
> > > > outlining some ideas and collect feedback on the overall outlook.
> > > > There are still bugs to be fixed, more test and benchmark need, etc.
> > > > 
> > > > There are 3 main goals:
> > > > - [try to] avoid the overhead for uncommon conditions at GRO time
> > > >   (patches 1-4)
> > > > - enable backpressure for the veth GRO path (patches 5-6)
> > > > - reduce the number of cacheline used by the sk_buff lifecycle
> > > >   from 4 to 3, at least in some common scenarios (patches 1,7-9).
> > > >   The idea here is avoid the initialization of some fields and
> > > >   control their validity with a bitmask, as presented by at least
> > > >   Florian and Jesper in the past.
> > > If I understand correctly, you're creating an optimized case
> > > which excludes ct, secmark, vlan and UDP tunnel. Is this correct,
> > > and if so, why those particular fields? What impact will this have
> > > in the non-optimal (with any of the excluded fields) case?
> > Thank you for the feedback.
> 
> You're most welcome. You did request comments.
> 
> > There are 2 different relevant points:
> > 
> > - the GRO stage.
> >   packets carring any of CT, dst, sk or skb_ext will do 2 additional
> > conditionals per gro_receive WRT the current code. My understanding is
> > that having any of such field set at GRO receive time is quite
> > exceptional for real nic. All others packet will do 4 or 5 less
> > conditionals, and will traverse a little less code.
> > 
> > - sk_buff lifecycle
> >   * packets carrying vlan and UDP will not see any differences: sk_buff
> > lifecycle will stil use 4 cachelines, as currently does, and no
> > additional conditional is introduced.
> >   * packets carring nfct or secmark will see an additional conditional
> > every time such field is accessed. The number of cacheline used will
> > still be 4, as in the current code. My understanding is that when such
> > access happens, there is already a relevant amount of "additional" code
> > to be executed, the conditional overhead should not be measurable.
> 
> I'm responsible for some of that "additonal" code. If the secmark
> is considered to be outside the performance critical data there are
> changes I would like to make that will substantially improve the
> performance of that "additional" code that would include a u64
> secmark. If use of a secmark is considered indicative of a "slow"
> path, the rationale for restricting it to u32, that it might impact
> the "usual" case performance, seems specious. I can't say that I
> understand all the nuances and implications involved. It does
> appear that the changes you've suggested could negate the classic
> argument that requires the u32 secmark.

I see now I did not reply to one of you questions - why I picked-up
 vlan, tunnel secmark fields to move them at sk_buff tail. 

Tow main drivers on my side:
- there are use cases/deployments that do not use them.
- moving them around was doable in term of required changes.

There are no "slow-path" implications on my side. For example, vlan_*
fields are very critical performance wise, if the traffic is tagged.
But surely there are busy servers not using tagget traffic which will
enjoy the reduced cachelines footprint, and this changeset will not
impact negatively the first case.

WRT to the vlan example, secmark and nfct require an extra conditional
to fetch the data. My understanding is that such additional conditional
is not measurable performance-wise when benchmarking the security
modules (or conntrack) because they have to do much more intersting
things after fetching a few bytes from an already hot cacheline.

Not sure if the above somehow clarify my statements.

As for expanding secmark to 64 bits, I guess that could be an
interesting follow-up discussion :)

Cheers,

Paolo


^ permalink raw reply

* Re: [PATCH RFC 0/9] sk_buff: optimize layout for GRO
From: Casey Schaufler @ 2021-07-22 16:04 UTC (permalink / raw)
  To: Paolo Abeni, netdev
  Cc: David S. Miller, Jakub Kicinski, Florian Westphal, Eric Dumazet,
	linux-security-module, selinux, Casey Schaufler
In-Reply-To: <e6200ddd38510216f9f32051ce1acff21fc9c6d0.camel@redhat.com>

On 7/22/2021 12:10 AM, Paolo Abeni wrote:
> Hello,
>
> On Wed, 2021-07-21 at 11:15 -0700, Casey Schaufler wrote:
>> On 7/21/2021 9:44 AM, Paolo Abeni wrote:
>>> This is a very early draft - in a different world would be
>>> replaced by hallway discussion at in-person conference - aimed at
>>> outlining some ideas and collect feedback on the overall outlook.
>>> There are still bugs to be fixed, more test and benchmark need, etc.
>>>
>>> There are 3 main goals:
>>> - [try to] avoid the overhead for uncommon conditions at GRO time
>>>   (patches 1-4)
>>> - enable backpressure for the veth GRO path (patches 5-6)
>>> - reduce the number of cacheline used by the sk_buff lifecycle
>>>   from 4 to 3, at least in some common scenarios (patches 1,7-9).
>>>   The idea here is avoid the initialization of some fields and
>>>   control their validity with a bitmask, as presented by at least
>>>   Florian and Jesper in the past.
>> If I understand correctly, you're creating an optimized case
>> which excludes ct, secmark, vlan and UDP tunnel. Is this correct,
>> and if so, why those particular fields? What impact will this have
>> in the non-optimal (with any of the excluded fields) case?
> Thank you for the feedback.

You're most welcome. You did request comments.

>
> There are 2 different relevant points:
>
> - the GRO stage.
>   packets carring any of CT, dst, sk or skb_ext will do 2 additional
> conditionals per gro_receive WRT the current code. My understanding is
> that having any of such field set at GRO receive time is quite
> exceptional for real nic. All others packet will do 4 or 5 less
> conditionals, and will traverse a little less code.
>
> - sk_buff lifecycle
>   * packets carrying vlan and UDP will not see any differences: sk_buff
> lifecycle will stil use 4 cachelines, as currently does, and no
> additional conditional is introduced.
>   * packets carring nfct or secmark will see an additional conditional
> every time such field is accessed. The number of cacheline used will
> still be 4, as in the current code. My understanding is that when such
> access happens, there is already a relevant amount of "additional" code
> to be executed, the conditional overhead should not be measurable.

I'm responsible for some of that "additonal" code. If the secmark
is considered to be outside the performance critical data there are
changes I would like to make that will substantially improve the
performance of that "additional" code that would include a u64
secmark. If use of a secmark is considered indicative of a "slow"
path, the rationale for restricting it to u32, that it might impact
the "usual" case performance, seems specious. I can't say that I
understand all the nuances and implications involved. It does
appear that the changes you've suggested could negate the classic
argument that requires the u32 secmark.

>
> Cheers,
>
> Paolo
>


^ permalink raw reply

* Re: AVC denied for docker while trying to set labels for tmpfs mounts
From: Daniel Walsh @ 2021-07-22  9:38 UTC (permalink / raw)
  To: Sujithra P, Paul Moore; +Cc: linux-security-module, selinux
In-Reply-To: <CAP198X-7NZ+1QfYK3cUUkMMNoaJTwNzBN8wr27egWT1kVh=g3Q@mail.gmail.com>

On 7/21/21 18:17, Sujithra P wrote:
> Thanks Paul!
>
> Is there any specific centos/RH mailing list that I can ask? Not sure
> whether it is a problem with kernel/docker/kubelet.
> semodule -R seems to fix the problem, but not sure what is causing the
> loaded policy to get corrupt.
> Any insight on how to figure this out would be very much appreciated.
>
> Thanks
> Sujithra.
I am guessing that one of the containers is loading policy.  You should 
be able to see something in the auditlog, about a policy load.
> On Wed, Jul 21, 2021 at 2:01 PM Paul Moore <paul@paul-moore.com> wrote:
>> On Wed, Jul 21, 2021 at 2:46 PM Sujithra P <sujithrap@gmail.com> wrote:
>>> Hi SELinux Experts,
>>>
>>> The following issue is described in the below post as well.
>>> https://github.com/containers/container-selinux/issues/141
>>>
>>> Occasionally running into the following selinux denials for docker
>>>
>>> type=AVC msg=audit(1626732057.636:4583): avc:  denied  { associate }
>>> for  pid=57450 comm="dockerd" name="/" dev="tmpfs" ino=150014
>>> scontext=system_u:object_r:container_file_t:s0:c263,c914
>>> tcontext=system_u:object_r:lib_t:s0 tclass=filesystem permissive=0
>>>
>>> type=AVC msg=audit(1626812823.170:9434): avc:  denied  { associate }
>>> for  pid=20027 comm="dockerd" name="/" dev="tmpfs" ino=198147
>>> scontext=system_u:object_r:container_file_t:s0:c578,c672
>>> tcontext=system_u:object_r:locale_t:s0 tclass=filesystem permissive=0
>>>
>>>
>>>   level=error msg="Handler for POST
>>> /v1.40/containers/a3a875e7896384e3bff53b8317e91ed4301a13957f42187eb227f28e09bd877c/start
>>> returned error: error setting label on mount source
>>> '/var/lib/kubelet/pods/f7cee5b2-bcd9-4aa1-9d67-c75b677ba2a1/volumes/kubernetes.io~secret/secret':
>>> failed to set file label on
>>> /var/lib/kubelet/pods/f7cee5b2-bcd9-4aa1-9d67-c75b677ba2a1/volumes/kubernetes.io~secret/secret:
>>> permission denied"
>>>
>>>
>>> Docker is not able to set labels for these tmpfs mounts because they
>>> end up having wrong labels when they are created (sometimes
>>> "locale_t", sometimes "lib_t" which of course is not the
>>> default/correct context for tmpfs fs).
>>> Apparently semodule -R and deleting these tmps files or reboot of the
>>> node fixes the problem.
>>> Not sure what is causing the tmpfs mounts to get wrong labels in the
>>> first place.
>>>
>>> Everything seems to be fine to begin with, but as the system keeps
>>> scheduling pods on the node, this behavior is observed sometimes (not
>>> consistent always).
>>>
>>>
>>> OS Details:
>>>
>>> NAME="CentOS Linux"
>>> VERSION="8 (Core)"
>>> ID="centos"
>>> ID_LIKE="rhel fedora"
>>> VERSION_ID="8"
>>> PLATFORM_ID="platform:el8"
>>> PRETTY_NAME="CentOS Linux 8 (Core)"
>>>
>>> Docker Version:
>>> Client: Docker Engine - Community
>>> Version: 19.03.13
>>> API version: 1.40
>>> Go version: go1.13.15
>>> Git commit: 4484c46d9d
>>> Built: Wed Sep 16 17:02:36 2020
>>> OS/Arch: linux/amd64
>>> Experimental: false
>>>
>>> Kubernetes Version*
>>> v1.20.8-gke.1500
>>>
>>>
>>> Any help on how to debug this issue  would be greatly appreciated.
>> This sounds like it might be a problem with CentOS and/or your Docker
>> install, have you tried talking with the RH/CentOS folks about this
>> problem?  We focus mostly on upstream issues here and it isn't clear
>> to me at this moment that this is an upstream issue.
>>
>> --
>> paul moore
>> www.paul-moore.com



^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox