netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] ipv4: tcp: security_sk_alloc() needed for unicast_sock
       [not found]     ` <1695034.0lrQgQPOMT@sifl>
@ 2012-08-09 14:50       ` Eric Dumazet
  2012-08-09 15:07         ` Paul Moore
  2012-08-09 20:06         ` Eric Paris
  0 siblings, 2 replies; 16+ messages in thread
From: Eric Dumazet @ 2012-08-09 14:50 UTC (permalink / raw)
  To: Paul Moore, David Miller
  Cc: Casey Schaufler, Eric Paris, John Stultz, Serge E. Hallyn, lkml,
	James Morris, selinux, john.johansen, LSM, netdev

From: Eric Dumazet <edumazet@google.com>

commit be9f4a44e7d41cee (ipv4: tcp: remove per net tcp_sock) added a
selinux regression, reported and bisected by John Stultz

selinux_ip_postroute_compat() expect to find a valid sk->sk_security
pointer, but this field is NULL for unicast_sock

Fix this by adding a new 'kernel' parameter to security_sk_alloc(),
set to true if socket might already have a valid sk->sk_security
pointer. ip_send_unicast_reply() uses a percpu fake socket, so the first
call to security_sk_alloc() will populate sk->sk_security pointer,
subsequent ones will reuse existing context.

Reported-by: John Stultz <johnstul@us.ibm.com>
Bisected-by: John Stultz <johnstul@us.ibm.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Paul Moore <paul@paul-moore.com>
Cc: Eric Paris <eparis@parisplace.org>
Cc: "Serge E. Hallyn" <serge@hallyn.com>
---
 include/linux/security.h   |    6 +++---
 net/core/sock.c            |    2 +-
 net/ipv4/ip_output.c       |    4 +++-
 security/security.c        |    4 ++--
 security/selinux/hooks.c   |    5 ++++-
 security/smack/smack_lsm.c |   10 ++++++++--
 6 files changed, 21 insertions(+), 10 deletions(-)

diff --git a/include/linux/security.h b/include/linux/security.h
index 4e5a73c..4d8e454 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1601,7 +1601,7 @@ struct security_operations {
 	int (*socket_sock_rcv_skb) (struct sock *sk, struct sk_buff *skb);
 	int (*socket_getpeersec_stream) (struct socket *sock, char __user *optval, int __user *optlen, unsigned len);
 	int (*socket_getpeersec_dgram) (struct socket *sock, struct sk_buff *skb, u32 *secid);
-	int (*sk_alloc_security) (struct sock *sk, int family, gfp_t priority);
+	int (*sk_alloc_security) (struct sock *sk, int family, gfp_t priority, bool kernel);
 	void (*sk_free_security) (struct sock *sk);
 	void (*sk_clone_security) (const struct sock *sk, struct sock *newsk);
 	void (*sk_getsecid) (struct sock *sk, u32 *secid);
@@ -2539,7 +2539,7 @@ int security_sock_rcv_skb(struct sock *sk, struct sk_buff *skb);
 int security_socket_getpeersec_stream(struct socket *sock, char __user *optval,
 				      int __user *optlen, unsigned len);
 int security_socket_getpeersec_dgram(struct socket *sock, struct sk_buff *skb, u32 *secid);
-int security_sk_alloc(struct sock *sk, int family, gfp_t priority);
+int security_sk_alloc(struct sock *sk, int family, gfp_t priority, bool kernel);
 void security_sk_free(struct sock *sk);
 void security_sk_clone(const struct sock *sk, struct sock *newsk);
 void security_sk_classify_flow(struct sock *sk, struct flowi *fl);
@@ -2667,7 +2667,7 @@ static inline int security_socket_getpeersec_dgram(struct socket *sock, struct s
 	return -ENOPROTOOPT;
 }
 
-static inline int security_sk_alloc(struct sock *sk, int family, gfp_t priority)
+static inline int security_sk_alloc(struct sock *sk, int family, gfp_t priority, bool kernel)
 {
 	return 0;
 }
diff --git a/net/core/sock.c b/net/core/sock.c
index 8f67ced..e00cadf 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1186,7 +1186,7 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
 	if (sk != NULL) {
 		kmemcheck_annotate_bitfield(sk, flags);
 
-		if (security_sk_alloc(sk, family, priority))
+		if (security_sk_alloc(sk, family, priority, false))
 			goto out_free;
 
 		if (!try_module_get(prot->owner))
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 76dde25..b233d6e 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1524,6 +1524,8 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr,
 	sk->sk_priority = skb->priority;
 	sk->sk_protocol = ip_hdr(skb)->protocol;
 	sk->sk_bound_dev_if = arg->bound_dev_if;
+	if (security_sk_alloc(sk, PF_INET, GFP_ATOMIC, true))
+		goto out;
 	sock_net_set(sk, net);
 	__skb_queue_head_init(&sk->sk_write_queue);
 	sk->sk_sndbuf = sysctl_wmem_default;
@@ -1539,7 +1541,7 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr,
 		skb_set_queue_mapping(nskb, skb_get_queue_mapping(skb));
 		ip_push_pending_frames(sk, &fl4);
 	}
-
+out:
 	put_cpu_var(unicast_sock);
 
 	ip_rt_put(rt);
diff --git a/security/security.c b/security/security.c
index 860aeb3..23cf297 100644
--- a/security/security.c
+++ b/security/security.c
@@ -1146,9 +1146,9 @@ int security_socket_getpeersec_dgram(struct socket *sock, struct sk_buff *skb, u
 }
 EXPORT_SYMBOL(security_socket_getpeersec_dgram);
 
-int security_sk_alloc(struct sock *sk, int family, gfp_t priority)
+int security_sk_alloc(struct sock *sk, int family, gfp_t priority, bool kernel)
 {
-	return security_ops->sk_alloc_security(sk, family, priority);
+	return security_ops->sk_alloc_security(sk, family, priority, kernel);
 }
 
 void security_sk_free(struct sock *sk)
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 6c77f63..ccd4374 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -4289,10 +4289,13 @@ out:
 	return 0;
 }
 
-static int selinux_sk_alloc_security(struct sock *sk, int family, gfp_t priority)
+static int selinux_sk_alloc_security(struct sock *sk, int family, gfp_t priority, bool kernel)
 {
 	struct sk_security_struct *sksec;
 
+	if (kernel && sk->sk_security)
+		return 0;
+
 	sksec = kzalloc(sizeof(*sksec), priority);
 	if (!sksec)
 		return -ENOMEM;
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 8221514..0b066d0 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -1749,20 +1749,26 @@ static void smack_task_to_inode(struct task_struct *p, struct inode *inode)
  * @sk: the socket
  * @family: unused
  * @gfp_flags: memory allocation flags
+ * @kernel: true if we should check sk_security being already set
  *
  * Assign Smack pointers to current
  *
  * Returns 0 on success, -ENOMEM is there's no memory
  */
-static int smack_sk_alloc_security(struct sock *sk, int family, gfp_t gfp_flags)
+static int smack_sk_alloc_security(struct sock *sk, int family, gfp_t gfp_flags, bool kernel)
 {
-	char *csp = smk_of_current();
+	char *csp;
 	struct socket_smack *ssp;
 
+	if (kernel && sk->sk_security)
+		return 0;
+
 	ssp = kzalloc(sizeof(struct socket_smack), gfp_flags);
 	if (ssp == NULL)
 		return -ENOMEM;
 
+	csp = kernel ? smack_net_ambient : smk_of_current();
+
 	ssp->smk_in = csp;
 	ssp->smk_out = csp;
 	ssp->smk_packet = NULL;

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH] ipv4: tcp: security_sk_alloc() needed for unicast_sock
  2012-08-09 14:50       ` [PATCH] ipv4: tcp: security_sk_alloc() needed for unicast_sock Eric Dumazet
@ 2012-08-09 15:07         ` Paul Moore
  2012-08-09 15:36           ` Eric Dumazet
  2012-08-09 20:06         ` Eric Paris
  1 sibling, 1 reply; 16+ messages in thread
From: Paul Moore @ 2012-08-09 15:07 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David Miller, Casey Schaufler, Eric Paris, John Stultz,
	Serge E. Hallyn, lkml, James Morris, selinux, john.johansen, LSM,
	netdev

On Thursday, August 09, 2012 04:50:33 PM Eric Dumazet wrote:
> From: Eric Dumazet <edumazet@google.com>
> 
> commit be9f4a44e7d41cee (ipv4: tcp: remove per net tcp_sock) added a
> selinux regression, reported and bisected by John Stultz
> 
> selinux_ip_postroute_compat() expect to find a valid sk->sk_security
> pointer, but this field is NULL for unicast_sock
> 
> Fix this by adding a new 'kernel' parameter to security_sk_alloc(),
> set to true if socket might already have a valid sk->sk_security
> pointer. ip_send_unicast_reply() uses a percpu fake socket, so the first
> call to security_sk_alloc() will populate sk->sk_security pointer,
> subsequent ones will reuse existing context.
> 
> Reported-by: John Stultz <johnstul@us.ibm.com>
> Bisected-by: John Stultz <johnstul@us.ibm.com>
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Cc: Paul Moore <paul@paul-moore.com>
> Cc: Eric Paris <eparis@parisplace.org>
> Cc: "Serge E. Hallyn" <serge@hallyn.com>

...

> diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
> index 76dde25..b233d6e 100644
> --- a/net/ipv4/ip_output.c
> +++ b/net/ipv4/ip_output.c
> @@ -1524,6 +1524,8 @@ void ip_send_unicast_reply(struct net *net, struct
> sk_buff *skb, __be32 daddr, sk->sk_priority = skb->priority;
>  	sk->sk_protocol = ip_hdr(skb)->protocol;
>  	sk->sk_bound_dev_if = arg->bound_dev_if;
> +	if (security_sk_alloc(sk, PF_INET, GFP_ATOMIC, true))
> +		goto out;
>  	sock_net_set(sk, net);
>  	__skb_queue_head_init(&sk->sk_write_queue);
>  	sk->sk_sndbuf = sysctl_wmem_default;

Is is possible to do the call to security_sk_alloc() in the ip_init() function 
or does the per-cpu nature of the socket make this a pain?

-- 
paul moore
www.paul-moore.com


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] ipv4: tcp: security_sk_alloc() needed for unicast_sock
  2012-08-09 15:07         ` Paul Moore
@ 2012-08-09 15:36           ` Eric Dumazet
  2012-08-09 15:59             ` Paul Moore
  2012-08-09 16:05             ` Eric Paris
  0 siblings, 2 replies; 16+ messages in thread
From: Eric Dumazet @ 2012-08-09 15:36 UTC (permalink / raw)
  To: Paul Moore
  Cc: David Miller, Casey Schaufler, Eric Paris, John Stultz,
	Serge E. Hallyn, lkml, James Morris, selinux, john.johansen, LSM,
	netdev

On Thu, 2012-08-09 at 11:07 -0400, Paul Moore wrote:

> Is is possible to do the call to security_sk_alloc() in the ip_init() function 
> or does the per-cpu nature of the socket make this a pain?
> 

Its a pain, if we want NUMA affinity.

Here, each cpu should get memory from its closest node.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] ipv4: tcp: security_sk_alloc() needed for unicast_sock
  2012-08-09 15:36           ` Eric Dumazet
@ 2012-08-09 15:59             ` Paul Moore
  2012-08-09 16:05             ` Eric Paris
  1 sibling, 0 replies; 16+ messages in thread
From: Paul Moore @ 2012-08-09 15:59 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David Miller, Casey Schaufler, Eric Paris, John Stultz,
	Serge E. Hallyn, lkml, James Morris, selinux, john.johansen, LSM,
	netdev

On Thu, Aug 9, 2012 at 11:36 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> On Thu, 2012-08-09 at 11:07 -0400, Paul Moore wrote:
>
>> Is is possible to do the call to security_sk_alloc() in the ip_init() function
>> or does the per-cpu nature of the socket make this a pain?
>>
>
> Its a pain, if we want NUMA affinity.
>
> Here, each cpu should get memory from its closest node.

Okay, makes sense.

Acked-by: Paul Moore <paul@paul-moore.com>

-- 
paul moore
www.paul-moore.com

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] ipv4: tcp: security_sk_alloc() needed for unicast_sock
  2012-08-09 15:36           ` Eric Dumazet
  2012-08-09 15:59             ` Paul Moore
@ 2012-08-09 16:05             ` Eric Paris
  2012-08-09 16:09               ` Paul Moore
  2012-08-09 17:46               ` Eric Dumazet
  1 sibling, 2 replies; 16+ messages in thread
From: Eric Paris @ 2012-08-09 16:05 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Paul Moore, David Miller, Casey Schaufler, John Stultz,
	Serge E. Hallyn, lkml, James Morris, selinux, john.johansen, LSM,
	netdev

On Thu, Aug 9, 2012 at 11:36 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> On Thu, 2012-08-09 at 11:07 -0400, Paul Moore wrote:
>
>> Is is possible to do the call to security_sk_alloc() in the ip_init() function
>> or does the per-cpu nature of the socket make this a pain?
>>
>
> Its a pain, if we want NUMA affinity.
>
> Here, each cpu should get memory from its closest node.

I really really don't like it.  I won't say NAK, but it is the first
and only place in the kernel where I believe we allocate an object and
don't allocate the security blob until some random later point in
time.  If it is such a performance issue to have the security blob in
the same numa node, isn't adding a number of branches and putting this
function call on every output at least as bad?  Aren't we discouraged
from GFP_ATOMIC?  In __init we can use GFP_KERNEL.

This still doesn't fix these sockets entirely.  We now have the
security blob allocated, but it was never set to something useful.
Paul, are you looking into this?  This is a bandaide, not a fix....

-Eric

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] ipv4: tcp: security_sk_alloc() needed for unicast_sock
  2012-08-09 16:05             ` Eric Paris
@ 2012-08-09 16:09               ` Paul Moore
  2012-08-09 17:46               ` Eric Dumazet
  1 sibling, 0 replies; 16+ messages in thread
From: Paul Moore @ 2012-08-09 16:09 UTC (permalink / raw)
  To: Eric Paris
  Cc: Eric Dumazet, David Miller, Casey Schaufler, John Stultz,
	Serge E. Hallyn, lkml, James Morris, selinux, john.johansen, LSM,
	netdev

On Thu, Aug 9, 2012 at 12:05 PM, Eric Paris <eparis@parisplace.org> wrote:
> Paul, are you looking into this?  This is a bandaide, not a fix....

Yep, I mentioned this a few times in the other thread.  The problem is
there is not going to be an easy fix for the labeling so I'd rather we
see this patch, or something like it, go in now to resolve the kernel
panic, and fix the labeling later.

-- 
paul moore
www.paul-moore.com

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] ipv4: tcp: security_sk_alloc() needed for unicast_sock
  2012-08-09 16:05             ` Eric Paris
  2012-08-09 16:09               ` Paul Moore
@ 2012-08-09 17:46               ` Eric Dumazet
  1 sibling, 0 replies; 16+ messages in thread
From: Eric Dumazet @ 2012-08-09 17:46 UTC (permalink / raw)
  To: Eric Paris
  Cc: Paul Moore, David Miller, Casey Schaufler, John Stultz,
	Serge E. Hallyn, lkml, James Morris, selinux, john.johansen, LSM,
	netdev

On Thu, 2012-08-09 at 12:05 -0400, Eric Paris wrote:
> On Thu, Aug 9, 2012 at 11:36 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> > On Thu, 2012-08-09 at 11:07 -0400, Paul Moore wrote:
> >
> >> Is is possible to do the call to security_sk_alloc() in the ip_init() function
> >> or does the per-cpu nature of the socket make this a pain?
> >>
> >
> > Its a pain, if we want NUMA affinity.
> >
> > Here, each cpu should get memory from its closest node.
> 
> I really really don't like it.  I won't say NAK, but it is the first
> and only place in the kernel where I believe we allocate an object and
> don't allocate the security blob until some random later point in
> time.

...

>   If it is such a performance issue to have the security blob in
> the same numa node, isn't adding a number of branches and putting this
> function call on every output at least as bad?  Aren't we discouraged
> from GFP_ATOMIC?  In __init we can use GFP_KERNEL.

What a big deal. Its done _once_ time per cpu, and this is so small blob
of memory you'll have to show us one single failure out of one million
boots.

If the security_sk_alloc() fails, we dont care. We are about sending a
RESET or ACK packet. They can be lost by the network, or even skb
allocation can fail. Nobody ever noticed and complained.

Every time we accept() a new socket (and call security_sk_alloc()), its
done under soft irq, thus GFP_ATOMIC, and you didn't complain yet, while
a socket needs about 2 Kbytes of memory...

> 
> This still doesn't fix these sockets entirely.  We now have the
> security blob allocated, but it was never set to something useful.
> Paul, are you looking into this?  This is a bandaide, not a fix....
> 

Please do so, on a followup patch, dont pretend I must fix all this
stuff.



^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] ipv4: tcp: security_sk_alloc() needed for unicast_sock
  2012-08-09 14:50       ` [PATCH] ipv4: tcp: security_sk_alloc() needed for unicast_sock Eric Dumazet
  2012-08-09 15:07         ` Paul Moore
@ 2012-08-09 20:06         ` Eric Paris
       [not found]           ` <CACLa4ptkvKj2GT4ZL+msMuWOHW885Hugk8nz3hvptOoY9-totw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  2012-08-09 21:29           ` Eric Dumazet
  1 sibling, 2 replies; 16+ messages in thread
From: Eric Paris @ 2012-08-09 20:06 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Paul Moore, David Miller, Casey Schaufler, John Stultz,
	Serge E. Hallyn, lkml, James Morris, selinux, john.johansen, LSM,
	netdev

[-- Attachment #1: Type: text/plain, Size: 8961 bytes --]

NAK.

I personally think commit be9f4a44e7d41cee should be reverted until it
is fixed.  Let me explain what all I believe it broke and how.

Old callchain of the creation of the 'equivalent' socket previous to
the patch in question just for reference:

    inet_ctl_sock_create
      sock_create_kern
        __sock_create
          pf->create (inet_create)
            sk_alloc
              sk_prot_alloc
                security_sk_alloc()


This WAS working properly.  All of it.  The equivalent struct sock was
being created and allocated in inet_create(), which called to
sk_alloc->sk_prot_alloc->security_sk_alloc().  We all agree that
failing to call security_sk_alloc() is the first regression
introduced.

The second regression was the labeling issue.  There was a call to
security_socket_post_create (from __sock_create) which was properly
setting the labels on both the socket and sock.  This new patch broke
that as well.  We don't expose an equivalent
security_sock_post_create() interface in the LSM currently, and until
we do, this can't be fixed.  It's why I say it should be reverted.

I have a patch I'm testing right now which takes care of the first
part the way I like (and yes, I'm doing the allocation on the correct
number node).  It basically looks like so:

+       for_each_possible_cpu(cpu) {
+               sock = &per_cpu(unicast_sock, cpu);
+               rc = security_sk_alloc(&sock->sk, PF_INET, GFP_KERNEL,
cpu_to_node(cpu));
+               if (rc)
+                       return rc;
+       }

I'm going to work right now on exposing the equivalent struct sock LSM
interface so we can call that as well.  But it's going to take me a
bit.  Attached is the patch just to (hopefully untested) shut up the
panic.

-Eric

On Thu, Aug 9, 2012 at 10:50 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> From: Eric Dumazet <edumazet@google.com>
>
> commit be9f4a44e7d41cee (ipv4: tcp: remove per net tcp_sock) added a
> selinux regression, reported and bisected by John Stultz
>
> selinux_ip_postroute_compat() expect to find a valid sk->sk_security
> pointer, but this field is NULL for unicast_sock
>
> Fix this by adding a new 'kernel' parameter to security_sk_alloc(),
> set to true if socket might already have a valid sk->sk_security
> pointer. ip_send_unicast_reply() uses a percpu fake socket, so the first
> call to security_sk_alloc() will populate sk->sk_security pointer,
> subsequent ones will reuse existing context.
>
> Reported-by: John Stultz <johnstul@us.ibm.com>
> Bisected-by: John Stultz <johnstul@us.ibm.com>
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Cc: Paul Moore <paul@paul-moore.com>
> Cc: Eric Paris <eparis@parisplace.org>
> Cc: "Serge E. Hallyn" <serge@hallyn.com>
> ---
>  include/linux/security.h   |    6 +++---
>  net/core/sock.c            |    2 +-
>  net/ipv4/ip_output.c       |    4 +++-
>  security/security.c        |    4 ++--
>  security/selinux/hooks.c   |    5 ++++-
>  security/smack/smack_lsm.c |   10 ++++++++--
>  6 files changed, 21 insertions(+), 10 deletions(-)
>
> diff --git a/include/linux/security.h b/include/linux/security.h
> index 4e5a73c..4d8e454 100644
> --- a/include/linux/security.h
> +++ b/include/linux/security.h
> @@ -1601,7 +1601,7 @@ struct security_operations {
>         int (*socket_sock_rcv_skb) (struct sock *sk, struct sk_buff *skb);
>         int (*socket_getpeersec_stream) (struct socket *sock, char __user *optval, int __user *optlen, unsigned len);
>         int (*socket_getpeersec_dgram) (struct socket *sock, struct sk_buff *skb, u32 *secid);
> -       int (*sk_alloc_security) (struct sock *sk, int family, gfp_t priority);
> +       int (*sk_alloc_security) (struct sock *sk, int family, gfp_t priority, bool kernel);
>         void (*sk_free_security) (struct sock *sk);
>         void (*sk_clone_security) (const struct sock *sk, struct sock *newsk);
>         void (*sk_getsecid) (struct sock *sk, u32 *secid);
> @@ -2539,7 +2539,7 @@ int security_sock_rcv_skb(struct sock *sk, struct sk_buff *skb);
>  int security_socket_getpeersec_stream(struct socket *sock, char __user *optval,
>                                       int __user *optlen, unsigned len);
>  int security_socket_getpeersec_dgram(struct socket *sock, struct sk_buff *skb, u32 *secid);
> -int security_sk_alloc(struct sock *sk, int family, gfp_t priority);
> +int security_sk_alloc(struct sock *sk, int family, gfp_t priority, bool kernel);
>  void security_sk_free(struct sock *sk);
>  void security_sk_clone(const struct sock *sk, struct sock *newsk);
>  void security_sk_classify_flow(struct sock *sk, struct flowi *fl);
> @@ -2667,7 +2667,7 @@ static inline int security_socket_getpeersec_dgram(struct socket *sock, struct s
>         return -ENOPROTOOPT;
>  }
>
> -static inline int security_sk_alloc(struct sock *sk, int family, gfp_t priority)
> +static inline int security_sk_alloc(struct sock *sk, int family, gfp_t priority, bool kernel)
>  {
>         return 0;
>  }
> diff --git a/net/core/sock.c b/net/core/sock.c
> index 8f67ced..e00cadf 100644
> --- a/net/core/sock.c
> +++ b/net/core/sock.c
> @@ -1186,7 +1186,7 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
>         if (sk != NULL) {
>                 kmemcheck_annotate_bitfield(sk, flags);
>
> -               if (security_sk_alloc(sk, family, priority))
> +               if (security_sk_alloc(sk, family, priority, false))
>                         goto out_free;
>
>                 if (!try_module_get(prot->owner))
> diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
> index 76dde25..b233d6e 100644
> --- a/net/ipv4/ip_output.c
> +++ b/net/ipv4/ip_output.c
> @@ -1524,6 +1524,8 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr,
>         sk->sk_priority = skb->priority;
>         sk->sk_protocol = ip_hdr(skb)->protocol;
>         sk->sk_bound_dev_if = arg->bound_dev_if;
> +       if (security_sk_alloc(sk, PF_INET, GFP_ATOMIC, true))
> +               goto out;
>         sock_net_set(sk, net);
>         __skb_queue_head_init(&sk->sk_write_queue);
>         sk->sk_sndbuf = sysctl_wmem_default;
> @@ -1539,7 +1541,7 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr,
>                 skb_set_queue_mapping(nskb, skb_get_queue_mapping(skb));
>                 ip_push_pending_frames(sk, &fl4);
>         }
> -
> +out:
>         put_cpu_var(unicast_sock);
>
>         ip_rt_put(rt);
> diff --git a/security/security.c b/security/security.c
> index 860aeb3..23cf297 100644
> --- a/security/security.c
> +++ b/security/security.c
> @@ -1146,9 +1146,9 @@ int security_socket_getpeersec_dgram(struct socket *sock, struct sk_buff *skb, u
>  }
>  EXPORT_SYMBOL(security_socket_getpeersec_dgram);
>
> -int security_sk_alloc(struct sock *sk, int family, gfp_t priority)
> +int security_sk_alloc(struct sock *sk, int family, gfp_t priority, bool kernel)
>  {
> -       return security_ops->sk_alloc_security(sk, family, priority);
> +       return security_ops->sk_alloc_security(sk, family, priority, kernel);
>  }
>
>  void security_sk_free(struct sock *sk)
> diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
> index 6c77f63..ccd4374 100644
> --- a/security/selinux/hooks.c
> +++ b/security/selinux/hooks.c
> @@ -4289,10 +4289,13 @@ out:
>         return 0;
>  }
>
> -static int selinux_sk_alloc_security(struct sock *sk, int family, gfp_t priority)
> +static int selinux_sk_alloc_security(struct sock *sk, int family, gfp_t priority, bool kernel)
>  {
>         struct sk_security_struct *sksec;
>
> +       if (kernel && sk->sk_security)
> +               return 0;
> +
>         sksec = kzalloc(sizeof(*sksec), priority);
>         if (!sksec)
>                 return -ENOMEM;
> diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
> index 8221514..0b066d0 100644
> --- a/security/smack/smack_lsm.c
> +++ b/security/smack/smack_lsm.c
> @@ -1749,20 +1749,26 @@ static void smack_task_to_inode(struct task_struct *p, struct inode *inode)
>   * @sk: the socket
>   * @family: unused
>   * @gfp_flags: memory allocation flags
> + * @kernel: true if we should check sk_security being already set
>   *
>   * Assign Smack pointers to current
>   *
>   * Returns 0 on success, -ENOMEM is there's no memory
>   */
> -static int smack_sk_alloc_security(struct sock *sk, int family, gfp_t gfp_flags)
> +static int smack_sk_alloc_security(struct sock *sk, int family, gfp_t gfp_flags, bool kernel)
>  {
> -       char *csp = smk_of_current();
> +       char *csp;
>         struct socket_smack *ssp;
>
> +       if (kernel && sk->sk_security)
> +               return 0;
> +
>         ssp = kzalloc(sizeof(struct socket_smack), gfp_flags);
>         if (ssp == NULL)
>                 return -ENOMEM;
>
> +       csp = kernel ? smack_net_ambient : smk_of_current();
> +
>         ssp->smk_in = csp;
>         ssp->smk_out = csp;
>         ssp->smk_packet = NULL;
>
>

[-- Attachment #2: tmp.patch --]
[-- Type: application/octet-stream, Size: 12667 bytes --]

commit feaf4fe8a8e4509540286899d02cd88f09c0d343
Author: Eric Paris <eparis@redhat.com>
Date:   Thu Aug 9 14:08:12 2012 -0400

    Network/Security: allocate security data when we allocate unicast sockets
    
    commit be9f4a44e7d41cee (ipv4: tcp: remove per net tcp_sock) added a
    regression because it did not properly initialize the new per cpu sockets.
    This was reported and bisected by John Stultz:
    
    [   69.272927] BUG: unable to handle kernel NULL pointer dereference at 0000000000000010
    [   69.273374] IP: [<ffffffff8132e7c4>] selinux_ip_postroute_compat+0xa4/0xe0
    [   69.273374] PGD 3a85b067 PUD 3f50b067 PMD 0
    [   69.273374] Oops: 0000 [#1] PREEMPT SMP
    [   69.273374] CPU 3
    [   69.273374] Pid: 2392, comm: hwclock Not tainted 3.6.0-rc1john+ #106 Bochs Bochs
    [   69.273374] RIP: 0010:[<ffffffff8132e7c4>]  [<ffffffff8132e7c4>] selinux_ip_postroute_compat+0xa4/0xe0
    [   69.273374] RSP: 0018:ffff88003f003720  EFLAGS: 00010246
    [   69.273374] RAX: 0000000000000000 RBX: ffff88003f5fa9d8 RCX: 0000000000000006
    [   69.273374] RDX: ffff88003f003740 RSI: ffff88003c6b256c RDI: ffff88003f5fa9d8
                                                                             [ OK ]
    [   69.273374] RBP: ffff88003f0037a0 R08: 0000000000000000 R09: ffff88003f1d0cc0
    [   69.273374] R10: 0000000000000003 R11: 0000000000000000 R12: 0000000000000000
    [   69.273374] R13: 0000000000000002 R14: ffff88003f0037c0 R15: 0000000000000004
    [   69.273374] FS:  00007fa398211700(0000) GS:ffff88003f000000(0000) knlGS:0000000000000000
    [   69.273374] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
    [   69.273374] CR2: 0000000000000010 CR3: 000000003b52a000 CR4: 00000000000006e0
    [   69.273374] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
    [   69.273374] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
    [   69.273374] Process hwclock (pid: 2392, threadinfo ffff88003a0ee000, task ffff88003fa82b80)
    [   69.273374] Stack:
    [   69.273374]  ffff88003c6b2558 0000000000000006 0000000000000000 0000160067d70002
    [   69.273374]  0f02000a0202000a 0000000000000000 0000000000000000 0000000000000000
    [   69.273374]  ffff88003f003802 ffff88003f003728 ffff88003f1d42d0 ffff88003d6c3560
    [   69.273374] Call Trace:
    [   69.273374]  <IRQ>
    [   69.273374]  [<ffffffff8132eaab>] selinux_ip_postroute+0x2ab/0x3e0
    [   69.273374]  [<ffffffff8132ec1c>] selinux_ipv4_postroute+0x1c/0x20
    [   69.273374]  [<ffffffff8198265c>] nf_iterate+0xac/0x140
    [   69.273374]  [<ffffffff819827a5>] nf_hook_slow+0xb5/0x210
    [   69.273374]  [<ffffffff8199cbba>] ip_output+0xaa/0x150
    [   69.273374]  [<ffffffff8199a9af>] ip_local_out+0x7f/0x110
    [   69.273374]  [<ffffffff8199d82e>] ip_send_skb+0xe/0x40
    [   69.273374]  [<ffffffff8199d88b>] ip_push_pending_frames+0x2b/0x30
    [   69.273374]  [<ffffffff8199dc97>] ip_send_unicast_reply+0x2c7/0x3c0
    [   69.273374]  [<ffffffff819bb215>] tcp_v4_send_reset+0x1f5/0x3f0
    [   69.273374]  [<ffffffff819bf04b>] tcp_v4_rcv+0x2bb/0x1080
    [   69.273374]  [<ffffffff81994d73>] ip_local_deliver_finish+0x133/0x4d0
    [   69.273374]  [<ffffffff819953e0>] ip_local_deliver+0x90/0xa0
    [   69.273374]  [<ffffffff819945b2>] ip_rcv_finish+0x262/0x8f0
    [   69.273374]  [<ffffffff81995742>] ip_rcv+0x352/0x3a0
    [   69.323844]  [<ffffffff81925244>] __netif_receive_skb+0xcb4/0x10e0
    [   69.323844]  [<ffffffff8192ba5d>] netif_receive_skb+0x18d/0x230
    [   69.323844]  [<ffffffff81746abc>] virtnet_poll+0x58c/0x7b0
    [   69.323844]  [<ffffffff8192cf59>] net_rx_action+0x289/0x550
    [   69.323844]  [<ffffffff8105846a>] __do_softirq+0x1da/0x560
    [   69.323844]  [<ffffffff81b5c2bc>] call_softirq+0x1c/0x30
    [   69.323844]  [<ffffffff81004d75>] do_softirq+0x105/0x1e0
    [   69.323844]  [<ffffffff81058bbe>] irq_exit+0x9e/0x100
    [   69.323844]  [<ffffffff81b5c9d3>] do_IRQ+0x63/0xd0
    [   69.323844]  [<ffffffff81b5a56f>] common_interrupt+0x6f/0x6f
    [   69.323844]  <EOI>
    [   69.323844]  [<ffffffff810993ad>] __might_sleep+0x1cd/0x280
    [   69.323844]  [<ffffffff81160e74>] might_fault+0x34/0xb0
    [   69.323844]  [<ffffffff8105657e>] sys_gettimeofday+0xbe/0xf0
    [   69.323844]  [<ffffffff81b5afe9>] system_call_fastpath+0x16/0x1b
    [   69.323844] Code: c0 45 31 c9 b1 01 ba 2a 00 00 00 e8 a7 89 ff ff 85 c0 b9 00 00 6f 00 74 0e 48 83 c4 70 89 c8 5b 41 5c 5d c3 0f 1f 00 0f b6 4d ef <41> 8b 7c 24 10 48 8d 55 c0 48 89 de e8 ab 6d 01 00 83 f8 01 19
    [   69.323844] RIP  [<ffffffff8132e7c4>] selinux_ip_postroute_compat+0xa4/0xe0
    [   69.323844]  RSP <ffff88003f003720>
    [   69.323844] CR2: 0000000000000010
    [   69.357489] ---[ end trace 0cd3e1a60dee6096 ]---
    [   69.358353] Kernel panic - not syncing: Fatal exception in interrupt
    
    The reason for the regresion is because of how the new sock is created.  The
    old socket was created using inet_ctl_sock_create() which uses all generic
    functions to establish the struct socket, struct sock, and do all of the
    allocation and initialization of the socket and its appropriate security data.
    
    aka:
    
    inet_ctl_sock_create
      sock_create_kern
        __sock_create
          pf->create (inet_create)
            sk_alloc
              sk_prot_alloc
                sec_sk_alloc()
    
    These new per_cpu skip all of that initialization and instead try to do it by
    hand.  Doing it by hand causes a second regression.  The __sock_create()
    function calls security_socket_post_create() which initializes the securty
    state on both the socket and the sock.  However here we don't set up the
    security structure.  I'd like to use security_socket_post_create() but it needs
    the socket and in this case we skipped straight to the struct sock.  Looks like
    this custom hackary is going to require a second patch which exposes the inards
    of security_socket_post_create() as security_sock_post_create() so we can do
    the labeling of this created this way.  But at least this one won't panic the
    kernel.
    
    Reported-by: John Stultz <johnstul@us.ibm.com>
    Bisected-by: John Stultz <johnstul@us.ibm.com>
    Signed-off-by: Eric Paris <eparis@redhat.com>
    Cc: Eric Dumazet <edumazet@google.com>
    Cc: Paul Moore <paul@paul-moore.com>
    Cc: "Serge E. Hallyn" <serge@hallyn.com>

diff --git a/include/linux/security.h b/include/linux/security.h
index 4e5a73c..1e0c5a7 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1601,7 +1601,7 @@ struct security_operations {
 	int (*socket_sock_rcv_skb) (struct sock *sk, struct sk_buff *skb);
 	int (*socket_getpeersec_stream) (struct socket *sock, char __user *optval, int __user *optlen, unsigned len);
 	int (*socket_getpeersec_dgram) (struct socket *sock, struct sk_buff *skb, u32 *secid);
-	int (*sk_alloc_security) (struct sock *sk, int family, gfp_t priority);
+	int (*sk_alloc_security) (struct sock *sk, int family, gfp_t priority, int numa_node);
 	void (*sk_free_security) (struct sock *sk);
 	void (*sk_clone_security) (const struct sock *sk, struct sock *newsk);
 	void (*sk_getsecid) (struct sock *sk, u32 *secid);
@@ -2539,7 +2539,7 @@ int security_sock_rcv_skb(struct sock *sk, struct sk_buff *skb);
 int security_socket_getpeersec_stream(struct socket *sock, char __user *optval,
 				      int __user *optlen, unsigned len);
 int security_socket_getpeersec_dgram(struct socket *sock, struct sk_buff *skb, u32 *secid);
-int security_sk_alloc(struct sock *sk, int family, gfp_t priority);
+int security_sk_alloc(struct sock *sk, int family, gfp_t priority, int numa_node);
 void security_sk_free(struct sock *sk);
 void security_sk_clone(const struct sock *sk, struct sock *newsk);
 void security_sk_classify_flow(struct sock *sk, struct flowi *fl);
@@ -2667,7 +2667,7 @@ static inline int security_socket_getpeersec_dgram(struct socket *sock, struct s
 	return -ENOPROTOOPT;
 }
 
-static inline int security_sk_alloc(struct sock *sk, int family, gfp_t priority)
+static inline int security_sk_alloc(struct sock *sk, int family, gfp_t priority, int numa_node)
 {
 	return 0;
 }
diff --git a/include/net/ip.h b/include/net/ip.h
index bd5e444..340905d 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -105,7 +105,7 @@ extern void		ip_send_check(struct iphdr *ip);
 extern int		__ip_local_out(struct sk_buff *skb);
 extern int		ip_local_out(struct sk_buff *skb);
 extern int		ip_queue_xmit(struct sk_buff *skb, struct flowi *fl);
-extern void		ip_init(void);
+extern int		ip_init(void);
 extern int		ip_append_data(struct sock *sk, struct flowi4 *fl4,
 				       int getfrag(void *from, char *to, int offset, int len,
 						   int odd, struct sk_buff *skb),
diff --git a/net/core/sock.c b/net/core/sock.c
index 8f67ced8..2cab455 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1186,7 +1186,7 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
 	if (sk != NULL) {
 		kmemcheck_annotate_bitfield(sk, flags);
 
-		if (security_sk_alloc(sk, family, priority))
+		if (security_sk_alloc(sk, family, priority, numa_node_id()))
 			goto out_free;
 
 		if (!try_module_get(prot->owner))
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 6681ccf..3f79f37 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1720,7 +1720,8 @@ static int __init inet_init(void)
 	 *	Set the IP module up
 	 */
 
-	ip_init();
+	if (ip_init() < 0)
+		panic("Failed to initialize ip.\n");
 
 	tcp_v4_init();
 
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 76dde25..4a775b3 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1545,12 +1545,23 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr,
 	ip_rt_put(rt);
 }
 
-void __init ip_init(void)
+int __init ip_init(void)
 {
+	struct inet_sock *sock;
+	int rc, cpu;
+
 	ip_rt_init();
 	inet_initpeers();
 
 #if defined(CONFIG_IP_MULTICAST) && defined(CONFIG_PROC_FS)
 	igmp_mc_proc_init();
 #endif
+
+	for_each_possible_cpu(cpu) {
+		sock = &per_cpu(unicast_sock, cpu);
+		rc = security_sk_alloc(&sock->sk, PF_INET, GFP_KERNEL, cpu_to_node(cpu));
+		if (rc)
+			return rc;
+	}
+	return 0;
 }
diff --git a/security/capability.c b/security/capability.c
index 61095df..0525d28 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -650,7 +650,7 @@ static int cap_socket_getpeersec_dgram(struct socket *sock,
 	return -ENOPROTOOPT;
 }
 
-static int cap_sk_alloc_security(struct sock *sk, int family, gfp_t priority)
+static int cap_sk_alloc_security(struct sock *sk, int family, gfp_t priority, int numa_node)
 {
 	return 0;
 }
diff --git a/security/security.c b/security/security.c
index 860aeb3..02a7f76 100644
--- a/security/security.c
+++ b/security/security.c
@@ -1146,9 +1146,9 @@ int security_socket_getpeersec_dgram(struct socket *sock, struct sk_buff *skb, u
 }
 EXPORT_SYMBOL(security_socket_getpeersec_dgram);
 
-int security_sk_alloc(struct sock *sk, int family, gfp_t priority)
+int security_sk_alloc(struct sock *sk, int family, gfp_t priority, int numa_node)
 {
-	return security_ops->sk_alloc_security(sk, family, priority);
+	return security_ops->sk_alloc_security(sk, family, priority, numa_node);
 }
 
 void security_sk_free(struct sock *sk)
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 6c77f63..bdcfd0c 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -4289,11 +4289,12 @@ out:
 	return 0;
 }
 
-static int selinux_sk_alloc_security(struct sock *sk, int family, gfp_t priority)
+static int selinux_sk_alloc_security(struct sock *sk, int family, gfp_t priority,
+				     int numa_node)
 {
 	struct sk_security_struct *sksec;
 
-	sksec = kzalloc(sizeof(*sksec), priority);
+	sksec = kmalloc_node(sizeof(*sksec), priority | __GFP_ZERO, numa_node);
 	if (!sksec)
 		return -ENOMEM;
 
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 8221514..b43ae5d 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -1754,12 +1754,14 @@ static void smack_task_to_inode(struct task_struct *p, struct inode *inode)
  *
  * Returns 0 on success, -ENOMEM is there's no memory
  */
-static int smack_sk_alloc_security(struct sock *sk, int family, gfp_t gfp_flags)
+static int smack_sk_alloc_security(struct sock *sk, int family, gfp_t gfp_flags,
+				   int numa_node)
 {
 	char *csp = smk_of_current();
 	struct socket_smack *ssp;
 
-	ssp = kzalloc(sizeof(struct socket_smack), gfp_flags);
+	ssp = kmalloc_node(sizeof(struct socket_smack), gfp_flags | __GFP_ZERO,
+			   numa_node);
 	if (ssp == NULL)
 		return -ENOMEM;
 

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH] ipv4: tcp: security_sk_alloc() needed for unicast_sock
       [not found]           ` <CACLa4ptkvKj2GT4ZL+msMuWOHW885Hugk8nz3hvptOoY9-totw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2012-08-09 20:19             ` Paul Moore
  0 siblings, 0 replies; 16+ messages in thread
From: Paul Moore @ 2012-08-09 20:19 UTC (permalink / raw)
  To: Eric Paris
  Cc: Eric Dumazet, David Miller, Casey Schaufler, John Stultz,
	Serge E. Hallyn, lkml, James Morris,
	selinux-+05T5uksL2qpZYMLLGbcSA,
	john.johansen-Z7WLFzj8eWMS+FvcfC7Uqw, LSM, netdev

On Thu, Aug 9, 2012 at 4:06 PM, Eric Paris <eparis-FjpueFixGhCM4zKIHC2jIg@public.gmane.org> wrote:
> I'm going to work right now on exposing the equivalent struct sock LSM
> interface so we can call that as well.  But it's going to take me a
> bit.

Before you go too far down this path, can you elaborate on what
exactly you mean by the above?

I'm asking because I'm not convinced the labeling, either the old way
or the new way, was 100% correct and I think we're going to need to
change things regardless.  I'm just not sure what the right solution
is just yet.

-- 
paul moore
www.paul-moore.com

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] ipv4: tcp: security_sk_alloc() needed for unicast_sock
  2012-08-09 20:06         ` Eric Paris
       [not found]           ` <CACLa4ptkvKj2GT4ZL+msMuWOHW885Hugk8nz3hvptOoY9-totw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2012-08-09 21:29           ` Eric Dumazet
  2012-08-09 21:53             ` Casey Schaufler
  2012-08-09 23:38             ` David Miller
  1 sibling, 2 replies; 16+ messages in thread
From: Eric Dumazet @ 2012-08-09 21:29 UTC (permalink / raw)
  To: Eric Paris
  Cc: Paul Moore, David Miller, Casey Schaufler, John Stultz,
	Serge E. Hallyn, lkml, James Morris, selinux, john.johansen, LSM,
	netdev

On Thu, 2012-08-09 at 16:06 -0400, Eric Paris wrote:
> NAK.
> 
> I personally think commit be9f4a44e7d41cee should be reverted until it
> is fixed.  Let me explain what all I believe it broke and how.
> 

Suggesting to revert this commit while we have known working fixes is a
bit of strange reaction.

I understand you are upset, but I believe we tried to fix it.

> Old callchain of the creation of the 'equivalent' socket previous to
> the patch in question just for reference:
> 
>     inet_ctl_sock_create
>       sock_create_kern
>         __sock_create
>           pf->create (inet_create)
>             sk_alloc
>               sk_prot_alloc
>                 security_sk_alloc()
> 
> 
> This WAS working properly.  All of it. 

Nobody denies it. But acknowledge my patch uncovered a fundamental
issue.

What kind of 'security module' can decide to let RST packets being sent
or not, on a global scale ? (one socket for the whole machine)

smack_sk_alloc_security() uses smk_of_current() : What can be the
meaning of smk_of_current() in the context of 'kernel' sockets...

Your patch tries to maintain this status quo.

In fact I suggest the following one liner patch, unless you can really
demonstrate what can be the meaning of providing a fake socket for these
packets.

This mess only happened because ip_append_data()/ip_push_pending_frames()
are so complex and use an underlying socket.

But this socket should not be ever used outside of its scope.

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 76dde25..ec410e0 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1536,6 +1536,7 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr,
 			  arg->csumoffset) = csum_fold(csum_add(nskb->csum,
 								arg->csum));
 		nskb->ip_summed = CHECKSUM_NONE;
+		skb_orphan(nskb);
 		skb_set_queue_mapping(nskb, skb_get_queue_mapping(skb));
 		ip_push_pending_frames(sk, &fl4);
 	}

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH] ipv4: tcp: security_sk_alloc() needed for unicast_sock
  2012-08-09 21:29           ` Eric Dumazet
@ 2012-08-09 21:53             ` Casey Schaufler
  2012-08-09 22:05               ` Eric Dumazet
  2012-08-09 23:38             ` David Miller
  1 sibling, 1 reply; 16+ messages in thread
From: Casey Schaufler @ 2012-08-09 21:53 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Eric Paris, Paul Moore, David Miller, John Stultz,
	Serge E. Hallyn, lkml, James Morris, selinux, john.johansen, LSM,
	netdev, Casey Schaufler

On 8/9/2012 2:29 PM, Eric Dumazet wrote:
> On Thu, 2012-08-09 at 16:06 -0400, Eric Paris wrote:
>> NAK.
>>
>> I personally think commit be9f4a44e7d41cee should be reverted until it
>> is fixed.  Let me explain what all I believe it broke and how.
>>
> Suggesting to revert this commit while we have known working fixes is a
> bit of strange reaction.

A couple of potential short term workarounds have been identified,
but no one is happy with them for the long term. That does not
qualify as a "working fix" in engineering terms.

> I understand you are upset, but I believe we tried to fix it.
>
>> Old callchain of the creation of the 'equivalent' socket previous to
>> the patch in question just for reference:
>>
>>     inet_ctl_sock_create
>>       sock_create_kern
>>         __sock_create
>>           pf->create (inet_create)
>>             sk_alloc
>>               sk_prot_alloc
>>                 security_sk_alloc()
>>
>>
>> This WAS working properly.  All of it. 
> Nobody denies it. But acknowledge my patch uncovered a fundamental
> issue.
>
> What kind of 'security module' can decide to let RST packets being sent
> or not, on a global scale ? (one socket for the whole machine)

The short answer is "any security module that wants to".

And before we go any further, I'm a little surprised that
SELinux doesn't do this already.

>
> smack_sk_alloc_security() uses smk_of_current() : What can be the
> meaning of smk_of_current() in the context of 'kernel' sockets...

Yes, and all of it's callers - to date - have had an appropriate
value of current. It is using the API in the way it is supposed to.
It is assuming a properly formed socket. You want to give it a
cobbled together partial socket structure without task context.
Your predecessor did not have this problem.

>
> Your patch tries to maintain this status quo.
>
> In fact I suggest the following one liner patch, unless you can really
> demonstrate what can be the meaning of providing a fake socket for these
> packets.
>
> This mess only happened because ip_append_data()/ip_push_pending_frames()
> are so complex and use an underlying socket.
>
> But this socket should not be ever used outside of its scope.
>
> diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
> index 76dde25..ec410e0 100644
> --- a/net/ipv4/ip_output.c
> +++ b/net/ipv4/ip_output.c
> @@ -1536,6 +1536,7 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr,
>  			  arg->csumoffset) = csum_fold(csum_add(nskb->csum,
>  								arg->csum));
>  		nskb->ip_summed = CHECKSUM_NONE;
> +		skb_orphan(nskb);
>  		skb_set_queue_mapping(nskb, skb_get_queue_mapping(skb));
>  		ip_push_pending_frames(sk, &fl4);
>  	}
>
>
>


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] ipv4: tcp: security_sk_alloc() needed for unicast_sock
  2012-08-09 21:53             ` Casey Schaufler
@ 2012-08-09 22:05               ` Eric Dumazet
  2012-08-09 22:26                 ` Casey Schaufler
  0 siblings, 1 reply; 16+ messages in thread
From: Eric Dumazet @ 2012-08-09 22:05 UTC (permalink / raw)
  To: Casey Schaufler
  Cc: Eric Paris, Paul Moore, David Miller, John Stultz,
	Serge E. Hallyn, lkml, James Morris, selinux, john.johansen, LSM,
	netdev

On Thu, 2012-08-09 at 14:53 -0700, Casey Schaufler wrote:
> On 8/9/2012 2:29 PM, Eric Dumazet wrote:

> > smack_sk_alloc_security() uses smk_of_current() : What can be the
> > meaning of smk_of_current() in the context of 'kernel' sockets...
> 
> Yes, and all of it's callers - to date - have had an appropriate
> value of current. It is using the API in the way it is supposed to.
> It is assuming a properly formed socket. You want to give it a
> cobbled together partial socket structure without task context.
> Your predecessor did not have this problem.

My predecessor ? You mean before the patch ?

tcp socket was preallocated by at kernel boot time.

What is the 'user' owning this socket ?

You guys focus on an implementation detail of TCP stack.
You should never use this fake socket.

I repeat : There are no true socket for these control packets.

If you want them, then you'll have to add fields in timewait socket.

I can decide to rewrite the whole thing just building a TCP packet on
its own, and send it without any fake socket.

Some guy 15 years ago tried to reuse some high level functions, able to
build super packets and use sophisticated tricks, while we only want so
send a 40 or 60 bytes packet.




^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] ipv4: tcp: security_sk_alloc() needed for unicast_sock
  2012-08-09 22:05               ` Eric Dumazet
@ 2012-08-09 22:26                 ` Casey Schaufler
  0 siblings, 0 replies; 16+ messages in thread
From: Casey Schaufler @ 2012-08-09 22:26 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Eric Paris, Paul Moore, David Miller, John Stultz,
	Serge E. Hallyn, lkml, James Morris,
	selinux-+05T5uksL2qpZYMLLGbcSA,
	john.johansen-Z7WLFzj8eWMS+FvcfC7Uqw, LSM, netdev,
	Casey Schaufler

On 8/9/2012 3:05 PM, Eric Dumazet wrote:
> On Thu, 2012-08-09 at 14:53 -0700, Casey Schaufler wrote:
>> On 8/9/2012 2:29 PM, Eric Dumazet wrote:
>>> smack_sk_alloc_security() uses smk_of_current(): What can be the
> I repeat: There are no true socket for these control packets.

OK, fine. You have an optimization. I'm good with that. Just don't
expect that the entire software stack you are taking advantage of
is going to change to accommodate your special case.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] ipv4: tcp: security_sk_alloc() needed for unicast_sock
  2012-08-09 21:29           ` Eric Dumazet
  2012-08-09 21:53             ` Casey Schaufler
@ 2012-08-09 23:38             ` David Miller
  2012-08-09 23:56               ` [PATCH] ipv4: tcp: unicast_sock should not land outside of TCP stack Eric Dumazet
  1 sibling, 1 reply; 16+ messages in thread
From: David Miller @ 2012-08-09 23:38 UTC (permalink / raw)
  To: eric.dumazet
  Cc: eparis, paul, casey, johnstul, serge, linux-kernel,
	james.l.morris, selinux, john.johansen, linux-security-module,
	netdev

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 09 Aug 2012 23:29:03 +0200

> diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
> index 76dde25..ec410e0 100644
> --- a/net/ipv4/ip_output.c
> +++ b/net/ipv4/ip_output.c
> @@ -1536,6 +1536,7 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr,
>  			  arg->csumoffset) = csum_fold(csum_add(nskb->csum,
>  								arg->csum));
>  		nskb->ip_summed = CHECKSUM_NONE;
> +		skb_orphan(nskb);
>  		skb_set_queue_mapping(nskb, skb_get_queue_mapping(skb));
>  		ip_push_pending_frames(sk, &fl4);
>  	}
> 

This is definitely the best fix, please submit this formally.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH] ipv4: tcp: unicast_sock should not land outside of TCP stack
  2012-08-09 23:38             ` David Miller
@ 2012-08-09 23:56               ` Eric Dumazet
  2012-08-10  4:05                 ` David Miller
  0 siblings, 1 reply; 16+ messages in thread
From: Eric Dumazet @ 2012-08-09 23:56 UTC (permalink / raw)
  To: David Miller
  Cc: eparis, paul, casey, johnstul, serge, linux-kernel,
	james.l.morris, selinux, john.johansen, linux-security-module,
	netdev

From: Eric Dumazet <edumazet@google.com>

commit be9f4a44e7d41cee (ipv4: tcp: remove per net tcp_sock) added a
selinux regression, reported and bisected by John Stultz

selinux_ip_postroute_compat() expect to find a valid sk->sk_security
pointer, but this field is NULL for unicast_sock

It turns out that unicast_sock are really temporary stuff to be able
to reuse  part of IP stack (ip_append_data()/ip_push_pending_frames())

Fact is that frames sent by ip_send_unicast_reply() should be orphaned
to not fool LSM.

Note IPv6 never had this problem, as tcp_v6_send_response() doesnt use a
fake socket at all. I'll probably implement tcp_v4_send_response() to
remove these unicast_sock in linux-3.7

Reported-by: John Stultz <johnstul@us.ibm.com>
Bisected-by: John Stultz <johnstul@us.ibm.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Paul Moore <paul@paul-moore.com>
Cc: Eric Paris <eparis@parisplace.org>
Cc: "Serge E. Hallyn" <serge@hallyn.com>
---
 net/ipv4/ip_output.c |    1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 76dde25..ec410e0 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1536,6 +1536,7 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr,
 			  arg->csumoffset) = csum_fold(csum_add(nskb->csum,
 								arg->csum));
 		nskb->ip_summed = CHECKSUM_NONE;
+		skb_orphan(nskb);
 		skb_set_queue_mapping(nskb, skb_get_queue_mapping(skb));
 		ip_push_pending_frames(sk, &fl4);
 	}



^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH] ipv4: tcp: unicast_sock should not land outside of TCP stack
  2012-08-09 23:56               ` [PATCH] ipv4: tcp: unicast_sock should not land outside of TCP stack Eric Dumazet
@ 2012-08-10  4:05                 ` David Miller
  0 siblings, 0 replies; 16+ messages in thread
From: David Miller @ 2012-08-10  4:05 UTC (permalink / raw)
  To: eric.dumazet
  Cc: eparis, paul, casey, johnstul, serge, linux-kernel,
	james.l.morris, selinux, john.johansen, linux-security-module,
	netdev

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 10 Aug 2012 01:56:06 +0200

> From: Eric Dumazet <edumazet@google.com>
> 
> commit be9f4a44e7d41cee (ipv4: tcp: remove per net tcp_sock) added a
> selinux regression, reported and bisected by John Stultz
> 
> selinux_ip_postroute_compat() expect to find a valid sk->sk_security
> pointer, but this field is NULL for unicast_sock
> 
> It turns out that unicast_sock are really temporary stuff to be able
> to reuse  part of IP stack (ip_append_data()/ip_push_pending_frames())
> 
> Fact is that frames sent by ip_send_unicast_reply() should be orphaned
> to not fool LSM.
> 
> Note IPv6 never had this problem, as tcp_v6_send_response() doesnt use a
> fake socket at all. I'll probably implement tcp_v4_send_response() to
> remove these unicast_sock in linux-3.7
> 
> Reported-by: John Stultz <johnstul@us.ibm.com>
> Bisected-by: John Stultz <johnstul@us.ibm.com>
> Signed-off-by: Eric Dumazet <edumazet@google.com>

Applied.

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2012-08-10  4:05 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <50215A7E.8000701@linaro.org>
     [not found] ` <1344462889.28967.328.camel@edumazet-glaptop>
     [not found]   ` <5022FD9A.4020603@schaufler-ca.com>
     [not found]     ` <1695034.0lrQgQPOMT@sifl>
2012-08-09 14:50       ` [PATCH] ipv4: tcp: security_sk_alloc() needed for unicast_sock Eric Dumazet
2012-08-09 15:07         ` Paul Moore
2012-08-09 15:36           ` Eric Dumazet
2012-08-09 15:59             ` Paul Moore
2012-08-09 16:05             ` Eric Paris
2012-08-09 16:09               ` Paul Moore
2012-08-09 17:46               ` Eric Dumazet
2012-08-09 20:06         ` Eric Paris
     [not found]           ` <CACLa4ptkvKj2GT4ZL+msMuWOHW885Hugk8nz3hvptOoY9-totw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2012-08-09 20:19             ` Paul Moore
2012-08-09 21:29           ` Eric Dumazet
2012-08-09 21:53             ` Casey Schaufler
2012-08-09 22:05               ` Eric Dumazet
2012-08-09 22:26                 ` Casey Schaufler
2012-08-09 23:38             ` David Miller
2012-08-09 23:56               ` [PATCH] ipv4: tcp: unicast_sock should not land outside of TCP stack Eric Dumazet
2012-08-10  4:05                 ` David Miller

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).