Netdev List
 help / color / mirror / Atom feed
* [PATCH v5] igb: Convert kmap() to kmap_local_page()
From: Alaa Mohamed @ 2022-04-19 23:43 UTC (permalink / raw)
  To: outreachy
  Cc: jesse.brandeburg, anthony.l.nguyen, davem, kuba, pabeni,
	intel-wired-lan, netdev, linux-kernel, ira.weiny,
	eng.alaamohamedsoliman.am

kmap() is being deprecated and these usages are all local to the thread
so there is no reason kmap_local_page() can't be used.

Replace kmap() calls with kmap_local_page().

Signed-off-by: Alaa Mohamed <eng.alaamohamedsoliman.am@gmail.com>
---
changes in V2:
	fix kunmap_local path value to take address of the mapped page.
---
changes in V3:
	edit commit message to be clearer
---
changes in V4:
	edit the commit message
---
changes in V5:
	-edit commit subject
	-edit commit message
---
 drivers/net/ethernet/intel/igb/igb_ethtool.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c
index 2a5782063f4c..c14fc871dd41 100644
--- a/drivers/net/ethernet/intel/igb/igb_ethtool.c
+++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c
@@ -1798,14 +1798,14 @@ static int igb_check_lbtest_frame(struct igb_rx_buffer *rx_buffer,
 
 	frame_size >>= 1;
 
-	data = kmap(rx_buffer->page);
+	data = kmap_local_page(rx_buffer->page);
 
 	if (data[3] != 0xFF ||
 	    data[frame_size + 10] != 0xBE ||
 	    data[frame_size + 12] != 0xAF)
 		match = false;
 
-	kunmap(rx_buffer->page);
+	kunmap_local(data);
 
 	return match;
 }
-- 
2.35.2


^ permalink raw reply related

* [PATCH bpf-next] bpf: use bpf_prog_run_array_cg_flags everywhere
From: Stanislav Fomichev @ 2022-04-19 22:22 UTC (permalink / raw)
  To: netdev, bpf; +Cc: ast, daniel, andrii, Stanislav Fomichev

Rename bpf_prog_run_array_cg_flags to bpf_prog_run_array_cg and
use it everywhere. check_return_code already enforces sane
return ranges for all cgroup types. (only egress and bind hooks have
uncanonical return ranges, the rest is using [0, 1])

No functional changes.

Suggested-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Stanislav Fomichev <sdf@google.com>
---
 include/linux/bpf-cgroup.h |  8 ++---
 kernel/bpf/cgroup.c        | 70 ++++++++++++--------------------------
 2 files changed, 24 insertions(+), 54 deletions(-)

diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 88a51b242adc..669d96d074ad 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -225,24 +225,20 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk,
 
 #define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, atype)				       \
 ({									       \
-	u32 __unused_flags;						       \
 	int __ret = 0;							       \
 	if (cgroup_bpf_enabled(atype))					       \
 		__ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, atype,     \
-							  NULL,		       \
-							  &__unused_flags);    \
+							  NULL, NULL);	       \
 	__ret;								       \
 })
 
 #define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, atype, t_ctx)		       \
 ({									       \
-	u32 __unused_flags;						       \
 	int __ret = 0;							       \
 	if (cgroup_bpf_enabled(atype))	{				       \
 		lock_sock(sk);						       \
 		__ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, atype,     \
-							  t_ctx,	       \
-							  &__unused_flags);    \
+							  t_ctx, NULL);	       \
 		release_sock(sk);					       \
 	}								       \
 	__ret;								       \
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 0cb6211fcb58..f61eca32c747 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -25,50 +25,18 @@ EXPORT_SYMBOL(cgroup_bpf_enabled_key);
 /* __always_inline is necessary to prevent indirect call through run_prog
  * function pointer.
  */
-static __always_inline int
-bpf_prog_run_array_cg_flags(const struct cgroup_bpf *cgrp,
-			    enum cgroup_bpf_attach_type atype,
-			    const void *ctx, bpf_prog_run_fn run_prog,
-			    int retval, u32 *ret_flags)
-{
-	const struct bpf_prog_array_item *item;
-	const struct bpf_prog *prog;
-	const struct bpf_prog_array *array;
-	struct bpf_run_ctx *old_run_ctx;
-	struct bpf_cg_run_ctx run_ctx;
-	u32 func_ret;
-
-	run_ctx.retval = retval;
-	migrate_disable();
-	rcu_read_lock();
-	array = rcu_dereference(cgrp->effective[atype]);
-	item = &array->items[0];
-	old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
-	while ((prog = READ_ONCE(item->prog))) {
-		run_ctx.prog_item = item;
-		func_ret = run_prog(prog, ctx);
-		if (!(func_ret & 1) && !IS_ERR_VALUE((long)run_ctx.retval))
-			run_ctx.retval = -EPERM;
-		*(ret_flags) |= (func_ret >> 1);
-		item++;
-	}
-	bpf_reset_run_ctx(old_run_ctx);
-	rcu_read_unlock();
-	migrate_enable();
-	return run_ctx.retval;
-}
-
 static __always_inline int
 bpf_prog_run_array_cg(const struct cgroup_bpf *cgrp,
 		      enum cgroup_bpf_attach_type atype,
 		      const void *ctx, bpf_prog_run_fn run_prog,
-		      int retval)
+		      int retval, u32 *ret_flags)
 {
 	const struct bpf_prog_array_item *item;
 	const struct bpf_prog *prog;
 	const struct bpf_prog_array *array;
 	struct bpf_run_ctx *old_run_ctx;
 	struct bpf_cg_run_ctx run_ctx;
+	u32 func_ret;
 
 	run_ctx.retval = retval;
 	migrate_disable();
@@ -78,8 +46,11 @@ bpf_prog_run_array_cg(const struct cgroup_bpf *cgrp,
 	old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
 	while ((prog = READ_ONCE(item->prog))) {
 		run_ctx.prog_item = item;
-		if (!run_prog(prog, ctx) && !IS_ERR_VALUE((long)run_ctx.retval))
+		func_ret = run_prog(prog, ctx);
+		if (!(func_ret & 1) && !IS_ERR_VALUE((long)run_ctx.retval))
 			run_ctx.retval = -EPERM;
+		if (ret_flags)
+			*(ret_flags) |= (func_ret >> 1);
 		item++;
 	}
 	bpf_reset_run_ctx(old_run_ctx);
@@ -1144,9 +1115,8 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
 		u32 flags = 0;
 		bool cn;
 
-		ret = bpf_prog_run_array_cg_flags(
-			&cgrp->bpf, atype,
-			skb, __bpf_prog_run_save_cb, 0, &flags);
+		ret = bpf_prog_run_array_cg(&cgrp->bpf, atype, skb,
+					    __bpf_prog_run_save_cb, 0, &flags);
 
 		/* Return values of CGROUP EGRESS BPF programs are:
 		 *   0: drop packet
@@ -1172,7 +1142,8 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
 			ret = (cn ? NET_XMIT_DROP : ret);
 	} else {
 		ret = bpf_prog_run_array_cg(&cgrp->bpf, atype,
-					    skb, __bpf_prog_run_save_cb, 0);
+					    skb, __bpf_prog_run_save_cb, 0,
+					    NULL);
 		if (ret && !IS_ERR_VALUE((long)ret))
 			ret = -EFAULT;
 	}
@@ -1202,7 +1173,8 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk,
 {
 	struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
 
-	return bpf_prog_run_array_cg(&cgrp->bpf, atype, sk, bpf_prog_run, 0);
+	return bpf_prog_run_array_cg(&cgrp->bpf, atype, sk, bpf_prog_run, 0,
+				     NULL);
 }
 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
 
@@ -1247,8 +1219,8 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
 	}
 
 	cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
-	return bpf_prog_run_array_cg_flags(&cgrp->bpf, atype,
-					   &ctx, bpf_prog_run, 0, flags);
+	return bpf_prog_run_array_cg(&cgrp->bpf, atype, &ctx, bpf_prog_run,
+				     0, flags);
 }
 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr);
 
@@ -1275,7 +1247,7 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
 	struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
 
 	return bpf_prog_run_array_cg(&cgrp->bpf, atype, sock_ops, bpf_prog_run,
-				     0);
+				     0, NULL);
 }
 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops);
 
@@ -1292,7 +1264,8 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
 
 	rcu_read_lock();
 	cgrp = task_dfl_cgroup(current);
-	ret = bpf_prog_run_array_cg(&cgrp->bpf, atype, &ctx, bpf_prog_run, 0);
+	ret = bpf_prog_run_array_cg(&cgrp->bpf, atype, &ctx, bpf_prog_run, 0,
+				    NULL);
 	rcu_read_unlock();
 
 	return ret;
@@ -1457,7 +1430,8 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
 
 	rcu_read_lock();
 	cgrp = task_dfl_cgroup(current);
-	ret = bpf_prog_run_array_cg(&cgrp->bpf, atype, &ctx, bpf_prog_run, 0);
+	ret = bpf_prog_run_array_cg(&cgrp->bpf, atype, &ctx, bpf_prog_run, 0,
+				    NULL);
 	rcu_read_unlock();
 
 	kfree(ctx.cur_val);
@@ -1550,7 +1524,7 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
 
 	lock_sock(sk);
 	ret = bpf_prog_run_array_cg(&cgrp->bpf, CGROUP_SETSOCKOPT,
-				    &ctx, bpf_prog_run, 0);
+				    &ctx, bpf_prog_run, 0, NULL);
 	release_sock(sk);
 
 	if (ret)
@@ -1650,7 +1624,7 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
 
 	lock_sock(sk);
 	ret = bpf_prog_run_array_cg(&cgrp->bpf, CGROUP_GETSOCKOPT,
-				    &ctx, bpf_prog_run, retval);
+				    &ctx, bpf_prog_run, retval, NULL);
 	release_sock(sk);
 
 	if (ret < 0)
@@ -1699,7 +1673,7 @@ int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level,
 	 */
 
 	ret = bpf_prog_run_array_cg(&cgrp->bpf, CGROUP_GETSOCKOPT,
-				    &ctx, bpf_prog_run, retval);
+				    &ctx, bpf_prog_run, retval, NULL);
 	if (ret < 0)
 		return ret;
 
-- 
2.36.0.rc0.470.gd361397f0d-goog


^ permalink raw reply related

* Re: [PATCH RFC 08/15] SUNRPC: Add RPC_TASK_CORK flag
From: Trond Myklebust @ 2022-04-19 22:08 UTC (permalink / raw)
  To: chuck.lever@oracle.com
  Cc: linux-cifs@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	linux-nvme@lists.infradead.org, netdev@vger.kernel.org,
	simo@redhat.com, ak@tempesta-tech.com, borisp@nvidia.com,
	linux-nfs@vger.kernel.org
In-Reply-To: <B7355D85-1CCF-4836-9B85-E6C9E019CD9E@oracle.com>

On Tue, 2022-04-19 at 19:40 +0000, Chuck Lever III wrote:
> 
> 
> > On Apr 19, 2022, at 3:04 PM, Trond Myklebust
> > <trondmy@hammerspace.com> wrote:
> > 
> > On Tue, 2022-04-19 at 18:16 +0000, Chuck Lever III wrote:
> > > 
> > > 
> > > > On Apr 18, 2022, at 10:57 PM, Trond Myklebust
> > > > <trondmy@hammerspace.com> wrote:
> > > > 
> > > > On Mon, 2022-04-18 at 12:52 -0400, Chuck Lever wrote:
> > > > > Introduce a mechanism to cause xprt_transmit() to break out
> > > > > of
> > > > > its
> > > > > sending loop at a specific rpc_rqst, rather than draining the
> > > > > whole
> > > > > transmit queue.
> > > > > 
> > > > > This enables the client to send just an RPC TLS probe and
> > > > > then
> > > > > wait
> > > > > for the response before proceeding with the rest of the
> > > > > queue.
> > > > > 
> > > > > Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
> > > > > ---
> > > > >  include/linux/sunrpc/sched.h  |    2 ++
> > > > >  include/trace/events/sunrpc.h |    1 +
> > > > >  net/sunrpc/xprt.c             |    2 ++
> > > > >  3 files changed, 5 insertions(+)
> > > > > 
> > > > > diff --git a/include/linux/sunrpc/sched.h
> > > > > b/include/linux/sunrpc/sched.h
> > > > > index 599133fb3c63..f8c09638fa69 100644
> > > > > --- a/include/linux/sunrpc/sched.h
> > > > > +++ b/include/linux/sunrpc/sched.h
> > > > > @@ -125,6 +125,7 @@ struct rpc_task_setup {
> > > > >  #define RPC_TASK_TLSCRED               0x00000008      /*
> > > > > Use
> > > > > AUTH_TLS credential */
> > > > >  #define RPC_TASK_NULLCREDS             0x00000010      /*
> > > > > Use
> > > > > AUTH_NULL credential */
> > > > >  #define RPC_CALL_MAJORSEEN             0x00000020      /*
> > > > > major
> > > > > timeout seen */
> > > > > +#define RPC_TASK_CORK                  0x00000040      /*
> > > > > cork
> > > > > the
> > > > > xmit queue */
> > > > >  #define RPC_TASK_DYNAMIC               0x00000080      /*
> > > > > task
> > > > > was
> > > > > kmalloc'ed */
> > > > >  #define        RPC_TASK_NO_ROUND_ROBIN        
> > > > > 0x00000100     
> > > > > /*
> > > > > send requests on "main" xprt */
> > > > >  #define RPC_TASK_SOFT                  0x00000200      /*
> > > > > Use
> > > > > soft
> > > > > timeouts */
> > > > > @@ -137,6 +138,7 @@ struct rpc_task_setup {
> > > > >  
> > > > >  #define RPC_IS_ASYNC(t)                ((t)->tk_flags &
> > > > > RPC_TASK_ASYNC)
> > > > >  #define RPC_IS_SWAPPER(t)      ((t)->tk_flags &
> > > > > RPC_TASK_SWAPPER)
> > > > > +#define RPC_IS_CORK(t)         ((t)->tk_flags &
> > > > > RPC_TASK_CORK)
> > > > >  #define RPC_IS_SOFT(t)         ((t)->tk_flags &
> > > > > (RPC_TASK_SOFT|RPC_TASK_TIMEOUT))
> > > > >  #define RPC_IS_SOFTCONN(t)     ((t)->tk_flags &
> > > > > RPC_TASK_SOFTCONN)
> > > > >  #define RPC_WAS_SENT(t)                ((t)->tk_flags &
> > > > > RPC_TASK_SENT)
> > > > > diff --git a/include/trace/events/sunrpc.h
> > > > > b/include/trace/events/sunrpc.h
> > > > > index 811187c47ebb..e8d6adff1a50 100644
> > > > > --- a/include/trace/events/sunrpc.h
> > > > > +++ b/include/trace/events/sunrpc.h
> > > > > @@ -312,6 +312,7 @@ TRACE_EVENT(rpc_request,
> > > > >                 { RPC_TASK_TLSCRED, "TLSCRED"
> > > > > },                        \
> > > > >                 { RPC_TASK_NULLCREDS, "NULLCREDS"
> > > > > },                    \
> > > > >                 { RPC_CALL_MAJORSEEN, "MAJORSEEN"
> > > > > },                    \
> > > > > +               { RPC_TASK_CORK, "CORK"
> > > > > },                              \
> > > > >                 { RPC_TASK_DYNAMIC, "DYNAMIC"
> > > > > },                        \
> > > > >                 { RPC_TASK_NO_ROUND_ROBIN, "NO_ROUND_ROBIN"
> > > > > },          \
> > > > >                 { RPC_TASK_SOFT, "SOFT"
> > > > > },                              \
> > > > > diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
> > > > > index 86d62cffba0d..4b303b945b51 100644
> > > > > --- a/net/sunrpc/xprt.c
> > > > > +++ b/net/sunrpc/xprt.c
> > > > > @@ -1622,6 +1622,8 @@ xprt_transmit(struct rpc_task *task)
> > > > >                 if (xprt_request_data_received(task) &&
> > > > >                     !test_bit(RPC_TASK_NEED_XMIT, &task-
> > > > > > tk_runstate))
> > > > >                         break;
> > > > > +               if (RPC_IS_CORK(task))
> > > > > +                       break;
> > > > >                 cond_resched_lock(&xprt->queue_lock);
> > > > >         }
> > > > >         spin_unlock(&xprt->queue_lock);
> > > > > 
> > > > > 
> > > > 
> > > > This is entirely the wrong place for this kind of control
> > > > mechanism.
> > > 
> > > I'm not sure I entirely understand your concern, so bear with
> > > me while I try to clarify.
> > > 
> > > 
> > > > TLS vs not-TLS needs to be decided up front when we initialise
> > > > the
> > > > transport (i.e. at mount time or whenever the pNFS channels are
> > > > set
> > > > up). Otherwise, we're vulnerable to downgrade attacks.
> > > 
> > > Downgrade attacks are prevented by using "xprtsec=tls" because
> > > in that case, transport creation fails if either the AUTH_TLS
> > > fails or the handshake fails.
> > > 
> > > The TCP connection has to be established first, though. Then the
> > > client can send the RPC_AUTH_TLS probe, which is the same as the
> > > NULL ping that it already sends. That mechanism is independent
> > > of the lower layer transport (TCP in this case).
> > > 
> > > Therefore, RPC traffic must be stoppered while the client:
> > > 
> > > 1. waits for the AUTH_TLS probe's reply, and
> > > 
> > > 2. waits for the handshake to complete
> > > 
> > > Because an RPC message is involved in this interaction, I didn't
> > > see a way to implement it completely within xprtsock's TCP
> > > connection logic. IMO, driving the handshake has to be done by
> > > the generic RPC client.
> > > 
> > > So, do you mean that I need to replace RPC_TASK_CORK with a
> > > special return code from xs_tcp_send_request() ?
> > 
> > 
> > I mean the right mechanism for controlling whether or not the
> > transport
> > is ready to serve RPC requests is through the XPRT_CONNECTED flag.
> > All
> > the existing generic RPC error handling, congestion handling, etc
> > depends on that flag being set correctly.
> > 
> > Until the TLS socket has completed its handshake protocol and is
> > ready
> > to transmit data, it should not be declared connected. The
> > distinction
> > between the two states 'TCP is unconnected' and 'TLS handshake is
> > incomplete' is a socket/transport setup detail as far as the RPC
> > xprt
> > layer is concerned: just another set of intermediate states between
> > SYN_SENT and ESTABLISHED.
> 
> First, TLS is technically an upper layer protocol. It's not
> part of the transport protocol. This is exactly how it's
> implemented in the Linux kernel. And, TLS works on transports
> other than TCP, so that makes it a reasonable candidate for
> treatment in the generic client rather than in a particular
> transport mechanism.

Sorry, but no! As far as the RPC layer is concerned, there is no
difference between a TLS socket and a TCP socket. The xprt layer should
not have to know or care about the existence of TLS other that as a
transport option to be configured at connection time.

> 
> Second, the "intermediate states" would be /outside/ of SYN_SENT
> and ESTABLISHED. A TCP transport has to be in the ESTABLISHED
> state (ie, the transport's connection handshake has to be
> complete) before any TLS traffic can go over it.
> 

My point is we don't give a damn about the intermediate states in the
RPC layer.

> Most importantly, the client has to send an RPC message first
> before it can start a TLS handshake. The RPC-with-TLS protocol
> specification requires that the handshake be preceded with the
> NULL AUTH_TLS request, which is an RPC. Otherwise, there's no
> way for the server end to know when to expect a handshake.
> 

Sure, but those are 2 non-overlapping states. The socket is first in a
state where it needs to do a NULL ping using regular RPC/TCP. Then it
needs to do the TLS handshake. Then it transitions into the state where
it can act like any other transport.

> In today's RPC client, the underlying connection has to be in
> the XPRT_CONNECTED state before the RPC client can exchange any
> RPC transaction, including AUTH_TLS NULL.
> 
> To make it work the way you've suggested, we would have to build
> a mechanism that could send the AUTH_TLS NULL and receive and
> parse its reply /before/ the client has put the transport into
> the XPRT_CONNECTED state, and that NULL request would have to
> be driven from inside the transport instance (not via the FSM
> where all other RPC traffic originates).
> 
> Do you have any suggestions about how to make this last point
> less painful?

This isn't too different from what we already do with the rpcbind call
for performing port discovery. The only difference is that the NULL
ping needs to happen on the same transport as the one being constructed
and that it needs to happen after the TCP connection is complete.

So I'd suggest that TLS/TCP needs to be a different xprt_class than the
base TCP, then doing the whole "do-NULL-ping-and-TLS-handshake" in the
connect() callback for that new class.

The connect() callback can set up a private rpc client and do the NULL
call asynchronously just like we do in rpcb_getport_async(). When the
RPC call completes, we steal the resulting socket from that private rpc
client and kick off the TLS handshake on it. All that can be done in
the rpc_call_done callback (i.e. the equivalent of rpcb_getport_done).

Once the TLS handshake is done, you can set the XPRT_CONNECTED state
and call xprt_wake_pending_tasks().

-- 
Trond Myklebust
Linux NFS client maintainer, Hammerspace
trond.myklebust@hammerspace.com



^ permalink raw reply

* Re: [PATCH net-next] PCI: add Corigine vendor ID into pci_ids.h
From: Andrew Lunn @ 2022-04-19 22:00 UTC (permalink / raw)
  To: Yinjun Zhang; +Cc: davem, kuba, netdev, Bjorn Helgaas, linux-pci, Simon Horman
In-Reply-To: <1650362568-11119-1-git-send-email-yinjun.zhang@corigine.com>

On Tue, Apr 19, 2022 at 06:02:48PM +0800, Yinjun Zhang wrote:
> Cc: Bjorn Helgaas <bhelgaas@google.com>
> Cc: linux-pci@vger.kernel.org
> Signed-off-by: Yinjun Zhang <yinjun.zhang@corigine.com>
> Signed-off-by: Simon Horman <simon.horman@corigine.com>
> ---
>  include/linux/pci_ids.h | 2 ++

The very top of this file says:

 *      Do not add new entries to this file unless the definitions
 *      are shared between multiple drivers.

Please add to the commit messages the two or more drivers which share
this definition.

Thanks

     Andrew

^ permalink raw reply

* Re: [PATCH] net: ethernet: mtk_eth_soc: fix error check return value of debugfs_create_dir()
From: Andrew Lunn @ 2022-04-19 21:55 UTC (permalink / raw)
  To: cgel.zte
  Cc: nbd, john, sean.wang, Mark-MC.Lee, davem, kuba, pabeni,
	matthias.bgg, netdev, linux-arm-kernel, linux-mediatek,
	linux-kernel, Lv Ruyi, Zeal Robot
In-Reply-To: <20220419015832.2562366-1-lv.ruyi@zte.com.cn>

On Tue, Apr 19, 2022 at 01:58:32AM +0000, cgel.zte@gmail.com wrote:
> From: Lv Ruyi <lv.ruyi@zte.com.cn>
> 
> If an error occurs, debugfs_create_file() will return ERR_PTR(-ERROR),
> so use IS_ERR() to check it.

Please take a look at for example:

https://lkml.iu.edu/hypermail/linux/kernel/1901.2/06005.html
https://lkml.iu.edu/hypermail/linux/kernel/1901.2/06006.html
https://lkml.iu.edu/hypermail/linux/kernel/1901.2/05993.html

This is the author of debugfs remove exactly the sort of code you are
adding.

Please teach the Zeal Bot that such code is wrong, and you should be
submitting patches to actually remove testing the return values for
anything which starts with debugfs_

	Andrew

^ permalink raw reply

* Re: BUG: Kernel NULL pointer dereference on write at 0x00000000 (rtmsg_ifinfo_build_skb)
From: Nathan Chancellor @ 2022-04-19 21:34 UTC (permalink / raw)
  To: Paul Menzel
  Cc: Zhouyi Zhou, Paul E. McKenney, Josh Triplett, rcu, LKML,
	David S. Miller, Jakub Kicinski, netdev, Nick Desaulniers, llvm,
	Fangrui Song
In-Reply-To: <YhOv6m3K7rsPjTA8@archlinux-ax161>

On Mon, Feb 21, 2022 at 08:29:46AM -0700, Nathan Chancellor wrote:
> Hi Paul,
> 
> On Mon, Feb 21, 2022 at 12:17:40PM +0100, Paul Menzel wrote:
> > Am 17.02.22 um 02:16 schrieb Nathan Chancellor:
> > 
> > > On Wed, Feb 16, 2022 at 02:19:51PM +0100, Paul Menzel wrote:
> > > > [Cc: +LLVM/clang build support folks]
> > 
> > […]
> > 
> > > > To recap: On a ppc64le machine, building Linux in Ubuntu 21.10 with *llvm*
> > > > and *clang* 1:13.0-53~exp1
> > > > 
> > > >      $ clang --version
> > > >      Ubuntu clang version 13.0.0-2
> > > >      Target: powerpc64le-unknown-linux-gnu
> > > >      Thread model: posix
> > > >      InstalledDir: /usr/bin
> > > > 
> > > > results in a segmentation fault, while it works when building with GCC.
> > > > 
> > > >      $ gcc --version
> > > >      gcc (Ubuntu 11.2.0-7ubuntu2) 11.2.0
> > > 
> > > Thank you for keying us in. I am going to have a bit of a brain dump
> > > here based on the information I have uncovered after a couple of hours
> > > of debugging.
> > > 
> > > TL;DR: It seems like something is broken with __read_mostly + ld.lld
> > > before 14.0.0.
> > > 
> > > My initial reproduction steps (boot-qemu.sh comes from
> > > https://github.com/ClangBuiltLinux/boot-utils):
> > > 
> > > $ clang --version
> > > clang version 13.0.1 (Fedora 13.0.1-1.fc37)
> > > Target: x86_64-redhat-linux-gnu
> > > Thread model: posix
> > > InstalledDir: /usr/bin
> > > 
> > > $ powerpc64le-linux-gnu-as --version
> > > GNU assembler version 2.37-2.fc36
> > > Copyright (C) 2021 Free Software Foundation, Inc.
> > > This program is free software; you may redistribute it under the terms of
> > > the GNU General Public License version 3 or later.
> > > This program has absolutely no warranty.
> > > This assembler was configured for a target of `powerpc64le-linux-gnu'.
> > > 
> > > $ curl -LSso .config https://lore.kernel.org/all/f41550c7-26c0-cf81-7de9-aa924434a565@molgen.mpg.de/3-linux-5.17-rc4-rcu-dev-config.txt
> > > 
> > > $ scripts/config --set-val INITRAMFS_SOURCE '""'
> > > 
> > > $ make -skj"$(nproc)" ARCH=powerpc CROSS_COMPILE=powerpc64le-linux-gnu- LLVM=1 LLVM_IAS=0 all
> > > 
> > > $ boot-qemu.sh -a ppc64le -k . -t 45s
> > > QEMU location: /usr/bin
> > > 
> > > QEMU version: QEMU emulator version 6.2.0 (qemu-6.2.0-5.fc37)
> > > 
> > > + timeout --foreground 45s stdbuf -oL -eL qemu-system-ppc64 -initrd \
> > > /home/nathan/cbl/github/boot-utils-ro/images/ppc64le/rootfs.cpio -device \
> > > ipmi-bmc-sim,id=bmc0 -device isa-ipmi-bt,bmc=bmc0,irq=10 -L \
> > > /home/nathan/cbl/github/boot-utils-ro/images/ppc64le/ -bios skiboot.lid \
> > > -machine powernv8 -display none -kernel \
> > > /home/nathan/cbl/src/linux/arch/powerpc/boot/zImage.epapr -m 2G \
> > > -nodefaults -serial mon:stdio
> > > ...
> > > [    1.478028][    T1] BUG: Kernel NULL pointer dereference on read at 0x00000000
> > > [    1.478630][    T1] Faulting instruction address: 0xc00000000090bee0
> > > [    1.479521][    T1] Oops: Kernel access of bad area, sig: 11 [#1]
> > > [    1.480036][    T1] LE PAGE_SIZE=64K MMU=Hash PREEMPT SMP NR_CPUS=16 NUMA PowerNV
> > > [    1.480853][    T1] Modules linked in:
> > > [    1.481265][    T1] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.17.0-rc4-00001-gfa15c7cb550f #1
> > > [    1.481967][    T1] NIP:  c00000000090bee0 LR: c000000000d96b60 CTR: c0000000000d5b4c
> > > [    1.482596][    T1] REGS: c000000007443330 TRAP: 0380   Not tainted  (5.17.0-rc4-00001-gfa15c7cb550f)
> > > [    1.483305][    T1] MSR:  9000000002009033 <SF,HV,VEC,EE,ME,IR,DR,RI,LE>  CR: 22800a87  XER: 00000000
> > > [    1.484277][    T1] CFAR: c000000000d96b5c IRQMASK: 0
> > > [    1.484277][    T1] GPR00: c000000000d96b54 c0000000074435d0 c0000000028bc600 0000000000000000
> > > [    1.484277][    T1] GPR04: ffffffffffffffff ffffffffff1ea558 ffffffffff1ebfe4 c00000000261ae88
> > > [    1.484277][    T1] GPR08: 0000000000000003 0000000000000004 c00000000261ae88 0000000000000000
> > > [    1.484277][    T1] GPR12: 0000000000800000 c000000002a60000 c000000000012518 0000000000000000
> > > [    1.484277][    T1] GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
> > > [    1.484277][    T1] GPR20: 0000000000000000 c0000000027bff80 0000000000000cc0 0000000000000000
> > > [    1.484277][    T1] GPR24: 0000000000000010 0000000000000000 0000000000000000 0000000000000000
> > > [    1.484277][    T1] GPR28: c0000000028fcfd8 c000000007b83000 0000000000000000 c0000000074435d0
> > > [    1.490325][    T1] NIP [c00000000090bee0] strlen+0x10/0x30
> > > [    1.490788][    T1] LR [c000000000d96b60] if_nlmsg_size+0x2b0/0x390
> > > [    1.491319][    T1] Call Trace:
> > > [    1.491573][    T1] [c0000000074435d0] [c000000000d96b54] if_nlmsg_size+0x2a4/0x390 (unreliable)
> > > [    1.492291][    T1] [c000000007443680] [c000000000d96790] rtmsg_ifinfo_build_skb+0x80/0x1a0
> > > [    1.492958][    T1] [c000000007443740] [c000000000d97590] rtmsg_ifinfo+0x70/0xd0
> > > [    1.493559][    T1] [c000000007443790] [c000000000d7d528] register_netdevice+0x5d8/0x670
> > > [    1.494205][    T1] [c000000007443820] [c000000000d7d94c] register_netdev+0x4c/0x80
> > > [    1.494823][    T1] [c000000007443850] [c000000000f826d8] sit_init_net+0x1b8/0x200
> > > [    1.495426][    T1] [c0000000074438d0] [c000000000d63b5c] ops_init+0x14c/0x1c0
> > > [    1.496014][    T1] [c000000007443930] [c000000000d6314c] register_pernet_operations+0xec/0x1e0
> > > [    1.496716][    T1] [c000000007443990] [c000000000d633d0] register_pernet_device+0x60/0xd0
> > > [    1.497372][    T1] [c0000000074439e0] [c000000002085194] sit_init+0x54/0x160
> > > [    1.497950][    T1] [c000000007443a70] [c000000000011c58] do_one_initcall+0x108/0x3e0
> > > [    1.498573][    T1] [c000000007443c70] [c000000002006190] do_initcall_level+0xe4/0x1c4
> > > [    1.499219][    T1] [c000000007443cc0] [c00000000200604c] do_initcalls+0x84/0xe4
> > > [    1.499799][    T1] [c000000007443d40] [c000000002005da8] kernel_init_freeable+0x160/0x1ec
> > > [    1.500444][    T1] [c000000007443da0] [c00000000001254c] kernel_init+0x3c/0x270
> > > [    1.501042][    T1] [c000000007443e10] [c00000000000cd64] ret_from_kernel_thread+0x5c/0x64
> > > [    1.501721][    T1] Instruction dump:
> > > [    1.502202][    T1] eb81ffe0 7c0803a6 4e800020 00000000 00000000 00000000 60000000 60000000
> > > [    1.502934][    T1] 3883ffff 60000000 60000000 60000000 <8ca40001> 28050000 4082fff8 7c632050
> > > [    1.504028][    T1] ---[ end trace 0000000000000000 ]---
> > > ...
> > > 
> > > First thing was figuring out where the NULL pointer dereference happens,
> > > which appears to the "strlen(ops->kind)" in rtnl_link_get_size():
> > > 
> > > 515 static size_t rtnl_link_get_size(const struct net_device *dev)
> > > 516 {
> > > 517 	const struct rtnl_link_ops *ops = dev->rtnl_link_ops;
> > > 518 	size_t size;
> > > 519
> > > 520 	if (!ops)
> > > 521 		return 0;
> > > 522
> > > 523 	size = nla_total_size(sizeof(struct nlattr)) + /* IFLA_LINKINFO */
> > > 524 	       nla_total_size(strlen(ops->kind) + 1);  /* IFLA_INFO_KIND */
> > > 
> > > which I confirmed some really rudimentary printk debugging:
> > > 
> > > [    1.476862][    T1] nathan: rtnl_link_get_size(): name: sit0, ops: c0000000028fcfd8, ops->kind: (null)
> > > 
> > > diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
> > > index 710da8a36729..c8d928e83aec 100644
> > > --- a/net/core/rtnetlink.c
> > > +++ b/net/core/rtnetlink.c
> > > @@ -520,6 +520,9 @@ static size_t rtnl_link_get_size(const struct net_device *dev)
> > >   	if (!ops)
> > >   		return 0;
> > > +	pr_err("nathan: %s(): name: %s, ops: %px, ops->kind: %s\n", __func__,
> > > +	       dev->name, ops, ops->kind);
> > > +
> > >   	size = nla_total_size(sizeof(struct nlattr)) + /* IFLA_LINKINFO */
> > >   	       nla_total_size(strlen(ops->kind) + 1);  /* IFLA_INFO_KIND */
> > > 
> > > Okay... how did sit0 end up with a NULL kind...? It is very clearly
> > > defined as "sit":
> > > 
> > > 1830 static struct rtnl_link_ops sit_link_ops __read_mostly = {
> > > 1831 	.kind		= "sit",
> > > 
> > > Adding some more debug prints to net/ipv6/sit.c:
> > > 
> > > diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
> > > index c0b138c20992..7b9edbed2fcd 100644
> > > --- a/net/ipv6/sit.c
> > > +++ b/net/ipv6/sit.c
> > > @@ -1920,6 +1920,12 @@ static int __net_init sit_init_net(struct net *net)
> > >   	 */
> > >   	sitn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
> > > +	pr_err("nathan: %s(): &sit_link_ops: %px\n", __func__, &sit_link_ops);
> > > +	pr_err("nathan: %s(): sit_link_ops.kind: %s\n", __func__, sit_link_ops.kind);
> > > +	pr_err("nathan: %s(): sit_link_ops.maxtype: %u\n", __func__, sit_link_ops.maxtype);
> > > +	pr_err("nathan: %s(): sitn->fb_tunnel_dev->rtnl_link_ops: %px\n", __func__, sitn->fb_tunnel_dev->rtnl_link_ops);
> > > +	pr_err("nathan: %s(): sitn->fb_tunnel_dev->rtnl_link_ops->kind: %s\n", __func__, sitn->fb_tunnel_dev->rtnl_link_ops->kind);
> > > +
> > >   	err = register_netdev(sitn->fb_tunnel_dev);
> > >   	if (err)
> > >   		goto err_reg_dev;
> > > 
> > > reveals:
> > > 
> > > [    1.471920][    T1] sit: nathan: sit_init_net(): &sit_link_ops: c0000000028fcfd8
> > > [    1.472534][    T1] sit: nathan: sit_init_net(): sit_link_ops.kind: (null)
> > > [    1.473088][    T1] sit: nathan: sit_init_net(): sit_link_ops.maxtype: 20
> > > [    1.473639][    T1] sit: nathan: sit_init_net(): sitn->fb_tunnel_dev->rtnl_link_ops: c0000000028fcfd8
> > > [    1.474370][    T1] sit: nathan: sit_init_net(): sitn->fb_tunnel_dev->rtnl_link_ops->kind: (null)
> > > 
> > > This is super bizarre, as the maxtype member appears to have the correct
> > > value, but how is kind's initial getting dropped on the floor?
> > > 
> > > Removing the __read_mostly annotation "fixes" it:
> > > 
> > > [    1.481708][    T1] sit: nathan: sit_init_net(): &sit_link_ops: c0000000027d3f60
> > > [    1.482319][    T1] sit: nathan: sit_init_net(): sit_link_ops.kind: sit
> > > [    1.482878][    T1] sit: nathan: sit_init_net(): sit_link_ops.maxtype: 20
> > > [    1.483429][    T1] sit: nathan: sit_init_net(): sitn->fb_tunnel_dev->rtnl_link_ops: c0000000027d3f60
> > > [    1.484174][    T1] sit: nathan: sit_init_net(): sitn->fb_tunnel_dev->rtnl_link_ops->kind: sit
> > > ...
> > > Linux version 5.17.0-rc4-00001-g956f02ad5c31-dirty (nathan@dev-fedora.archlinux-ax161) (clang version 13.0.1 (Fedora 13.0.1-1.fc37), LLD 13.0.1) #2 SMP PREEMPT Wed Feb 16 13:29:49 MST 2022
> > > ...
> > > 
> > > diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
> > > index 7b9edbed2fcd..f109c7a0233b 100644
> > > --- a/net/ipv6/sit.c
> > > +++ b/net/ipv6/sit.c
> > > @@ -70,7 +70,7 @@ static void ipip6_tunnel_setup(struct net_device *dev);
> > >   static void ipip6_dev_free(struct net_device *dev);
> > >   static bool check_6rd(struct ip_tunnel *tunnel, const struct in6_addr *v6dst,
> > >   		      __be32 *v4dst);
> > > -static struct rtnl_link_ops sit_link_ops __read_mostly;
> > > +static struct rtnl_link_ops sit_link_ops;
> > >   static unsigned int sit_net_id __read_mostly;
> > >   struct sit_net {
> > > @@ -1827,7 +1827,7 @@ static void ipip6_dellink(struct net_device *dev, struct list_head *head)
> > >   		unregister_netdevice_queue(dev, head);
> > >   }
> > > -static struct rtnl_link_ops sit_link_ops __read_mostly = {
> > > +static struct rtnl_link_ops sit_link_ops = {
> > >   	.kind		= "sit",
> > >   	.maxtype	= IFLA_IPTUN_MAX,
> > >   	.policy		= ipip6_policy,
> > > 
> > > Switching to ld.bfd also resolves it:
> > > 
> > > [    1.470405][    T1] sit: nathan: sit_init_net(): &sit_link_ops: c0000000028acfd8
> > > [    1.471016][    T1] sit: nathan: sit_init_net(): sit_link_ops.kind: sit
> > > [    1.471534][    T1] sit: nathan: sit_init_net(): sit_link_ops.maxtype: 20
> > > [    1.472062][    T1] sit: nathan: sit_init_net(): sitn->fb_tunnel_dev->rtnl_link_ops: c0000000028acfd8
> > > [    1.472790][    T1] sit: nathan: sit_init_net(): sitn->fb_tunnel_dev->rtnl_link_ops->kind: sit
> > > ...
> > > Linux version 5.17.0-rc4-00001-g956f02ad5c31 (nathan@dev-fedora.archlinux-ax161) (clang version 13.0.1 (Fedora 13.0.1-1.fc37), GNU ld version 2.37-2.fc36) #3 SMP PREEMPT Wed Feb 16 13:33:42 MST 2022
> > > ...
> > > 
> > > I tested with ToT LLVM (or at least, close to it, since there is an
> > > unrelated ld.lld regression there) and I could not reproduce it there,
> > > so I did a reverse bisect to see what commit fixes this issue in LLVM 14
> > > and I landed on:
> > > 
> > > commit 55c14d6dbfd8e7b86c15d2613fea3490078e2ae4
> > > Author: Fangrui Song <i@maskray.me>
> > > Date:   Thu Nov 25 14:12:34 2021 -0800
> > > 
> > >      [ELF] Simplify DynamicSection content computation. NFC
> > > 
> > >      The new code computes the content twice, but avoides the tricky
> > >      std::function<uint64_t()>. Removed 13KiB code in a Release build.
> > > 
> > >   lld/ELF/SyntheticSections.cpp | 117 ++++++++++++++++--------------------------
> > >   lld/ELF/SyntheticSections.h   |  12 +----
> > >   2 files changed, 44 insertions(+), 85 deletions(-)
> > > 
> > > That's... interesting, given that commit title says No Functional
> > > Change, even though there clearly is one. That commit has a couple
> > > mentions of PowerPC synthetic sections, so it is possible that the
> > > new content calculation lines up with ld.bfd?
> > > 
> > > I am not really sure where to go from here, as I don't fully understand
> > > what the problem was before that LLD change. I'll see if I can do some
> > > more investigation tomorrow (unless someone wants to beat me to it ;)
> > 
> > Thank you for looking into this, and sharing your analysis.
> > 
> > I built LLVM/clang from the master branch, rebuilt, but can still reproduce
> > this.
> > 
> >     $ git clone --depth=1 https://github.com/llvm/llvm-project.git
> >     $ cd llvm-project/
> >     $ git log --oneline
> >     41cb504b7 [mlir][linalg][bufferize][NFC] Move interface impl to Linalg
> > Transforms
> >     $ mkdir build
> >     $ cd build
> >     $ cmake -DLLVM_ENABLE_PROJECTS=clang -G "Unix Makefiles"
> 
> Since this is something related to ld.lld, not clang, this should be:
> 
> ... -DLLVM_ENABLE_PROJECTS="clang;lld" ...
> 
> > -DCMAKE_BUILD_TYPE=Release -DLLVM_INSTALL_UTILS=ON
> > -DCMAKE_INSTALL_PREFIX=/scratch/local2/llvm ../llvm
> >     $ make -j20
> >     $ make -j20 clang-check
> 
> You can also do 'check-lld' if you want.
> 
> >     $ make install
> >     $ /scratch/local2/llvm/bin/clang --version
> >     clang version 15.0.0 (https://github.com/llvm/llvm-project.git
> > 41cb504b7c4b18ac15830107431a0c1eec73a6b2)
> >     Target: powerpc64le-unknown-linux-gnu
> >     Thread model: posix
> >     InstalledDir: /scratch/local2/llvm/bin
> > 
> > Then build Linux after `make clean` with `/scratch/local2/llvm/bin` in the
> > path.
> > 
> >     $ LLVM=1 LLVM_IAS=0 eatmydata make -j20
> > 
> >     $ qemu-system-ppc64 -enable-kvm -nographic -smp cores=1,threads=1 -net
> > none -enable-kvm -M pseries -nodefaults -device spapr-vscsi -serial stdio -m
> > 512 -kernel /dev/shm/linux/vmlinux -append "debug_boot_weak_hash panic=-1
> > console=ttyS0 rcupdate.rcu_cpu_stall_suppress_at_boot=1
> > torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000
> > rcupdate.rcu_self_test=1 rcutorture.onoff_interval=1000
> > rcutorture.onoff_holdoff=30 rcutorture.n_barrier_cbs=4
> > rcutorture.stat_interval=15 rcutorture.shutdown_secs=420
> > rcutorture.test_no_idle_hz=1 rcutorture.verbose=1"
> >     […]
> >     Preparing to boot Linux version 5.17.0-rc5-00178-ga4b9a8fb20e7
> > (pmenzel@flughafenberlinbrandenburgwillybrandt.molgen.mpg.de) (clang version
> > 15.0.0 (https://github.com/llvm/llvm-project.git
> > 41cb504b7c4b18ac15830107431a0c1eec73a6b2), LLD 13.0.0) #29 SMP PREEMPT Mon
> 
>                                              ^ still using ld.lld 13.0.0.
> 
> If you want to test the master branch, I would checkout LLVM at
> 460830a9c664e8cce959c660648faa7747ad8bdc, as the next commit introduces
> a boot regression unrelated to this issue:
> 
> https://github.com/ClangBuiltLinux/linux/issues/1581
> 
> That should at least confirm this is resolved in a newer release.
> 
> > Feb 21 10:58:54 CET 2022
> >     […]
> >     [    0.465889][    T1] BUG: Kernel NULL pointer dereference on read at
> > 0x00000000
> >     [    0.466749][    T1] Faulting instruction address: 0xc0000000008fc300
> >     [    0.467507][    T1] Oops: Kernel access of bad area, sig: 11 [#1]
> >     […]
> 
> I do intend to do further analysis at some point over the next few days
> to see if I can figure out exactly why that commit that I mentioned
> above fixes the issue then we can look into what we should do about it
> in the kernel sources.

Sorry for taking so long to get back to this. For me, commit
d79976918852 ("powerpc/64: Add UADDR64 relocation support") resolves
this for ld.lld 13.x. I have started a separate thread about whether or
not this commit is suitable for stable, specifically 5.17 and 5.15:

https://lore.kernel.org/Yl8pNxSGUgeHZ1FT@dev-arch.thelio-3990X/

Cheers,
Nathan

^ permalink raw reply

* Re: [PATCH RFC 00/15] Prototype implementation of RPC-with-TLS
From: Rick Macklem @ 2022-04-19 20:49 UTC (permalink / raw)
  To: Chuck Lever III, Trond Myklebust
  Cc: linux-cifs@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	linux-nvme@lists.infradead.org, netdev@vger.kernel.org,
	simo@redhat.com, ak@tempesta-tech.com, Linux NFS Mailing List,
	borisp@nvidia.com
In-Reply-To: <E7B440A0-FD0A-4F67-8238-CAE9A6882F10@oracle.com>

> Chuck Lever III <chuck.lever@oracle.com> wrote:
> > On Apr 19, 2022, at 2:48 PM, Trond Myklebust <trondmy@hammerspace.com> wrote:
> >
> > On Tue, 2022-04-19 at 16:00 +0000, Chuck Lever III wrote:
> >> Hi Trond-
> >>
> >> Thanks for the early review!
>>
[good stuff snipped]
> >>
> >> However, one of Rick's preferences is that "auto" not use
> >> transport-layer security unless the server requires it via
> >> a SECINFO/MNT pseudoflavor, which only the kernel would be
> >> privy to. I'll have to think about whether we want to make
> >> that happen.
Just fyi, the above was not exactly what I thought.

My concern with "xprtsec=auto" was that the client (user/admin doing the
mount) would not know if on the wire encryption was happening or not.
As such, this case in not implemented in the FreeBSD client at this time.
(I may do so in order to ne Linux compatible, but I doubt it will be the
 default. Of course, it is really up to the "FreeBSD collective" and not
 just me.)

For the "xprtsec=auto" case, I am fine with the client attempting the
NULL AUTH_TLS RPC probe as soon as a connection is established,
followed by a TLS handshake, if the NULL AUTH_TLS RPC probe succeeds.

At this time, the FreeBSD client does not use indications from the server,
such as SECINFO to decide to do the NULL AUTH_TLS RPC. The current
implementation does it optionally (just called "tls", which is the
equivalent of "xprtsec=tls"), as soon as a connection is established.

> >
> > That sounds like a terrible protocol hack. TLS is not an authentication
> > flavour but a transport level protection.
Not sure if I lost the "context" w.r.t. this comment, but I argued that this
should not be more "sec=XXX" options, since it was related to the transport
and not user authentication.

> Fair enough. We've been discussing this on nfsv4@ietf.org, and
> it's certainly not written in stone yet.
Yes. I cannot guarantee FreeBSD will become Linux compatible, but what
Linux chooses is certainly up to the Linux community. Since Linux is the
"big player", I do attempt to keep FreeBSD's mount options compatible,
whenever practical.

> I invite you to join the conversation and share your concerns
> (and possibly any alternative solutions you might have).
>
>
> > That said, I don't see how this invalidates my argument. When told to
> > use TLS, the kernel client can still return a mount time error if the
> > server fails to advertise support through this pseudoflavour and leave
> > it up to userspace to decide how to deal with that.
>
> Sure. I'm just saying I haven't thought it through yet. I don't
> think it will be a problem to move more (or all) of the transport
> security policy to mount.nfs.
It happens to be implemented in the kernel for FreeBSD, but that
was just what was convenient for FreeBSD. (New TCP connections
for RPCs, including reconnects, are done in the krpc for FreeBSD,
so that is where it needed to know whether or not to do the
NULL AUTH_TLS RPC probe.)

rick

--
Chuck Lever





^ permalink raw reply

* Re: [PATCH] driver: usb: nullify dangling pointer in cdc_ncm_free
From: Bjørn Mork @ 2022-04-19 20:25 UTC (permalink / raw)
  To: Oliver Neukum
  Cc: Andy Shevchenko, Johan Hovold, Oleksij Rempel, Dongliang Mu,
	Oliver Neukum, David S. Miller, Jakub Kicinski, Paolo Abeni,
	Dongliang Mu, syzbot+eabbf2aaa999cc507108, USB, netdev,
	Linux Kernel Mailing List
In-Reply-To: <d851497f-7960-b606-2f87-eb9bff89c8ac@suse.com>

Oliver Neukum <oneukum@suse.com> writes:

> It seems to me that fundamentally the order of actions to handle
> a hotunplug must mirror the order in a hotplug. We can add more hooks
> if that turns out to be necessary for some drivers, but the basic
> reverse mirrored order must be supported and I very much favor
> restoring it as default.

FWIW, I agree 100% with this.  Please go ahead with the revert of commit
2c9d6c2b871d ("usbnet: run unbind() before unregister_netdev()").

AFAICS, your proposed new hook should solve the original problem just
fine.


Bjørn






^ permalink raw reply

* Re: [PATCH nf v2 2/2] netfilter: Use l3mdev flow key when re-routing mangled packets
From: David Ahern @ 2022-04-19 20:06 UTC (permalink / raw)
  To: Martin Willi, Pablo Neira Ayuso, Florian Westphal; +Cc: netfilter-devel, netdev
In-Reply-To: <20220419134701.153090-3-martin@strongswan.org>

On 4/19/22 7:47 AM, Martin Willi wrote:
> Commit 40867d74c374 ("net: Add l3mdev index to flow struct and avoid oif
> reset for port devices") introduces a flow key specific for layer 3
> domains, such as a VRF master device. This allows for explicit VRF domain
> selection instead of abusing the oif flow key.
> 
> Update ip[6]_route_me_harder() to make use of that new key when re-routing
> mangled packets within VRFs instead of setting the flow oif, making it
> consistent with other users.
> 
> Signed-off-by: Martin Willi <martin@strongswan.org>
> ---
>  net/ipv4/netfilter.c | 3 +--
>  net/ipv6/netfilter.c | 3 +--
>  2 files changed, 2 insertions(+), 4 deletions(-)
> 
>

This one will go to -next

Reviewed-by: David Ahern <dsahern@kernel.org>



^ permalink raw reply

* Re: [PATCH nf v2 1/2] netfilter: Update ip6_route_me_harder to consider L3 domain
From: David Ahern @ 2022-04-19 20:05 UTC (permalink / raw)
  To: Martin Willi, Pablo Neira Ayuso, Florian Westphal; +Cc: netfilter-devel, netdev
In-Reply-To: <20220419134701.153090-2-martin@strongswan.org>

On 4/19/22 7:47 AM, Martin Willi wrote:
> The commit referenced below fixed packet re-routing if Netfilter mangles
> a routing key property of a packet and the packet is routed in a VRF L3
> domain. The fix, however, addressed IPv4 re-routing, only.
> 
> This commit applies the same behavior for IPv6. While at it, untangle
> the nested ternary operator to make the code more readable.
> 
> Fixes: 6d8b49c3a3a3 ("netfilter: Update ip_route_me_harder to consider L3 domain")
> Cc: stable@vger.kernel.org
> Signed-off-by: Martin Willi <martin@strongswan.org>
> ---
>  net/ipv6/netfilter.c | 10 ++++++++--
>  1 file changed, 8 insertions(+), 2 deletions(-)
> 

Reviewed-by: David Ahern <dsahern@kernel.org>



^ permalink raw reply

* Re: [PATCH net-next] net/ipv6: Enforce limits for accept_unsolicited_na sysctl
From: David Ahern @ 2022-04-19 20:04 UTC (permalink / raw)
  To: Arun Ajith S, netdev; +Cc: davem, linux-kernel, yoshfuji, kuba, pabeni
In-Reply-To: <20220419105910.686-1-aajith@arista.com>

On 4/19/22 4:59 AM, Arun Ajith S wrote:
> Fix mistake in the original patch where limits were specified but the
> handler didn't take care of the limits.
> 
> Signed-off-by: Arun Ajith S <aajith@arista.com>
> ---
>  net/ipv6/addrconf.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 


Reviewed-by: David Ahern <dsahern@kernel.org>


^ permalink raw reply

* Re: [PATCH RFC 08/15] SUNRPC: Add RPC_TASK_CORK flag
From: Chuck Lever III @ 2022-04-19 19:40 UTC (permalink / raw)
  To: Trond Myklebust
  Cc: linux-cifs@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	linux-nvme@lists.infradead.org, netdev@vger.kernel.org,
	simo@redhat.com, ak@tempesta-tech.com, Linux NFS Mailing List,
	borisp@nvidia.com
In-Reply-To: <36618d90e44961aed7b40c4640952fd574fce60c.camel@hammerspace.com>



> On Apr 19, 2022, at 3:04 PM, Trond Myklebust <trondmy@hammerspace.com> wrote:
> 
> On Tue, 2022-04-19 at 18:16 +0000, Chuck Lever III wrote:
>> 
>> 
>>> On Apr 18, 2022, at 10:57 PM, Trond Myklebust
>>> <trondmy@hammerspace.com> wrote:
>>> 
>>> On Mon, 2022-04-18 at 12:52 -0400, Chuck Lever wrote:
>>>> Introduce a mechanism to cause xprt_transmit() to break out of
>>>> its
>>>> sending loop at a specific rpc_rqst, rather than draining the
>>>> whole
>>>> transmit queue.
>>>> 
>>>> This enables the client to send just an RPC TLS probe and then
>>>> wait
>>>> for the response before proceeding with the rest of the queue.
>>>> 
>>>> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
>>>> ---
>>>>  include/linux/sunrpc/sched.h  |    2 ++
>>>>  include/trace/events/sunrpc.h |    1 +
>>>>  net/sunrpc/xprt.c             |    2 ++
>>>>  3 files changed, 5 insertions(+)
>>>> 
>>>> diff --git a/include/linux/sunrpc/sched.h
>>>> b/include/linux/sunrpc/sched.h
>>>> index 599133fb3c63..f8c09638fa69 100644
>>>> --- a/include/linux/sunrpc/sched.h
>>>> +++ b/include/linux/sunrpc/sched.h
>>>> @@ -125,6 +125,7 @@ struct rpc_task_setup {
>>>>  #define RPC_TASK_TLSCRED               0x00000008      /* Use
>>>> AUTH_TLS credential */
>>>>  #define RPC_TASK_NULLCREDS             0x00000010      /* Use
>>>> AUTH_NULL credential */
>>>>  #define RPC_CALL_MAJORSEEN             0x00000020      /* major
>>>> timeout seen */
>>>> +#define RPC_TASK_CORK                  0x00000040      /* cork
>>>> the
>>>> xmit queue */
>>>>  #define RPC_TASK_DYNAMIC               0x00000080      /* task
>>>> was
>>>> kmalloc'ed */
>>>>  #define        RPC_TASK_NO_ROUND_ROBIN         0x00000100     
>>>> /*
>>>> send requests on "main" xprt */
>>>>  #define RPC_TASK_SOFT                  0x00000200      /* Use
>>>> soft
>>>> timeouts */
>>>> @@ -137,6 +138,7 @@ struct rpc_task_setup {
>>>>  
>>>>  #define RPC_IS_ASYNC(t)                ((t)->tk_flags &
>>>> RPC_TASK_ASYNC)
>>>>  #define RPC_IS_SWAPPER(t)      ((t)->tk_flags &
>>>> RPC_TASK_SWAPPER)
>>>> +#define RPC_IS_CORK(t)         ((t)->tk_flags & RPC_TASK_CORK)
>>>>  #define RPC_IS_SOFT(t)         ((t)->tk_flags &
>>>> (RPC_TASK_SOFT|RPC_TASK_TIMEOUT))
>>>>  #define RPC_IS_SOFTCONN(t)     ((t)->tk_flags &
>>>> RPC_TASK_SOFTCONN)
>>>>  #define RPC_WAS_SENT(t)                ((t)->tk_flags &
>>>> RPC_TASK_SENT)
>>>> diff --git a/include/trace/events/sunrpc.h
>>>> b/include/trace/events/sunrpc.h
>>>> index 811187c47ebb..e8d6adff1a50 100644
>>>> --- a/include/trace/events/sunrpc.h
>>>> +++ b/include/trace/events/sunrpc.h
>>>> @@ -312,6 +312,7 @@ TRACE_EVENT(rpc_request,
>>>>                 { RPC_TASK_TLSCRED, "TLSCRED"
>>>> },                        \
>>>>                 { RPC_TASK_NULLCREDS, "NULLCREDS"
>>>> },                    \
>>>>                 { RPC_CALL_MAJORSEEN, "MAJORSEEN"
>>>> },                    \
>>>> +               { RPC_TASK_CORK, "CORK"
>>>> },                              \
>>>>                 { RPC_TASK_DYNAMIC, "DYNAMIC"
>>>> },                        \
>>>>                 { RPC_TASK_NO_ROUND_ROBIN, "NO_ROUND_ROBIN"
>>>> },          \
>>>>                 { RPC_TASK_SOFT, "SOFT"
>>>> },                              \
>>>> diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
>>>> index 86d62cffba0d..4b303b945b51 100644
>>>> --- a/net/sunrpc/xprt.c
>>>> +++ b/net/sunrpc/xprt.c
>>>> @@ -1622,6 +1622,8 @@ xprt_transmit(struct rpc_task *task)
>>>>                 if (xprt_request_data_received(task) &&
>>>>                     !test_bit(RPC_TASK_NEED_XMIT, &task-
>>>>> tk_runstate))
>>>>                         break;
>>>> +               if (RPC_IS_CORK(task))
>>>> +                       break;
>>>>                 cond_resched_lock(&xprt->queue_lock);
>>>>         }
>>>>         spin_unlock(&xprt->queue_lock);
>>>> 
>>>> 
>>> 
>>> This is entirely the wrong place for this kind of control
>>> mechanism.
>> 
>> I'm not sure I entirely understand your concern, so bear with
>> me while I try to clarify.
>> 
>> 
>>> TLS vs not-TLS needs to be decided up front when we initialise the
>>> transport (i.e. at mount time or whenever the pNFS channels are set
>>> up). Otherwise, we're vulnerable to downgrade attacks.
>> 
>> Downgrade attacks are prevented by using "xprtsec=tls" because
>> in that case, transport creation fails if either the AUTH_TLS
>> fails or the handshake fails.
>> 
>> The TCP connection has to be established first, though. Then the
>> client can send the RPC_AUTH_TLS probe, which is the same as the
>> NULL ping that it already sends. That mechanism is independent
>> of the lower layer transport (TCP in this case).
>> 
>> Therefore, RPC traffic must be stoppered while the client:
>> 
>> 1. waits for the AUTH_TLS probe's reply, and
>> 
>> 2. waits for the handshake to complete
>> 
>> Because an RPC message is involved in this interaction, I didn't
>> see a way to implement it completely within xprtsock's TCP
>> connection logic. IMO, driving the handshake has to be done by
>> the generic RPC client.
>> 
>> So, do you mean that I need to replace RPC_TASK_CORK with a
>> special return code from xs_tcp_send_request() ?
> 
> 
> I mean the right mechanism for controlling whether or not the transport
> is ready to serve RPC requests is through the XPRT_CONNECTED flag. All
> the existing generic RPC error handling, congestion handling, etc
> depends on that flag being set correctly.
> 
> Until the TLS socket has completed its handshake protocol and is ready
> to transmit data, it should not be declared connected. The distinction
> between the two states 'TCP is unconnected' and 'TLS handshake is
> incomplete' is a socket/transport setup detail as far as the RPC xprt
> layer is concerned: just another set of intermediate states between
> SYN_SENT and ESTABLISHED.

First, TLS is technically an upper layer protocol. It's not
part of the transport protocol. This is exactly how it's
implemented in the Linux kernel. And, TLS works on transports
other than TCP, so that makes it a reasonable candidate for
treatment in the generic client rather than in a particular
transport mechanism.

Second, the "intermediate states" would be /outside/ of SYN_SENT
and ESTABLISHED. A TCP transport has to be in the ESTABLISHED
state (ie, the transport's connection handshake has to be
complete) before any TLS traffic can go over it.

Most importantly, the client has to send an RPC message first
before it can start a TLS handshake. The RPC-with-TLS protocol
specification requires that the handshake be preceded with the
NULL AUTH_TLS request, which is an RPC. Otherwise, there's no
way for the server end to know when to expect a handshake.

In today's RPC client, the underlying connection has to be in
the XPRT_CONNECTED state before the RPC client can exchange any
RPC transaction, including AUTH_TLS NULL.

To make it work the way you've suggested, we would have to build
a mechanism that could send the AUTH_TLS NULL and receive and
parse its reply /before/ the client has put the transport into
the XPRT_CONNECTED state, and that NULL request would have to
be driven from inside the transport instance (not via the FSM
where all other RPC traffic originates).

Do you have any suggestions about how to make this last point
less painful?


--
Chuck Lever




^ permalink raw reply

* Re: [RFC v7 net-next 10/13] mfd: ocelot: add support for the vsc7512 chip via spi
From: Colin Foster @ 2022-04-20  2:13 UTC (permalink / raw)
  To: Lee Jones
  Cc: linux-arm-kernel, linux-gpio, linux-phy, netdev, linux-kernel,
	Rafael J. Wysocki, Marc Zyngier, Hector Martin, Angela Czubak,
	Steen Hegelund, Lars Povlsen, Linus Walleij, Vinod Koul,
	Kishon Vijay Abraham I, Russell King, Heiner Kallweit,
	Jakub Kicinski, David S. Miller, Florian Fainelli, Vivien Didelot,
	Andrew Lunn, UNGLinuxDriver, Alexandre Belloni, Claudiu Manoil,
	Vladimir Oltean, katie.morris
In-Reply-To: <Yl57uP+rsl/bsI4i@google.com>

Thanks for the feedback Lee,

I'll do some final cleanup (hopefully this month...?) and prepare
another patch set.


Something I plan to do, lest anyone object, is send the next patch set
that explicitly states that ports 4-10 currently aren't supported, and 
inform a user as such. One main reason for this is that the additional
ports rely on the drivers/phy/mscc/phy-ocelot-serdes driver, which
utilizes syscon_node_to_regmap. The same issue comes up, where syscon of
course only supports mmio. I can foresee that being worthy of its own
rounds of reviews, and will change the .

But I'm quite new to this process - so if that isn't an acceptable path
forward I understand.


On Tue, Apr 19, 2022 at 10:07:04AM +0100, Lee Jones wrote:
> [Adding everyone/lists back on Cc]

Oops... I'm not sure how I did that. Thanks!

> 
> On Thu, 14 Apr 2022, Colin Foster wrote:
> 
> > Hi Lee,
> > 
> > Thanks for the feedback. I agree with (and have made) your suggestions.
> > Additional comments below.
> 
> I'm swamped right now, so I cannot do a full re-review, but please see
> in-line for a couple of (most likely flippant i.e. not fully
> thought out comments).
> 
> Please submit the changes you end up making off the back of this
> review and I'll conduct another on the next version you send.
> 
> > On Wed, Apr 13, 2022 at 09:32:22AM +0100, Lee Jones wrote:
> > > On Sun, 06 Mar 2022, Colin Foster wrote:
> > > 
> > [...]
> > > > +
> > > > +int ocelot_core_init(struct ocelot_core *core)
> > > > +{
> > > > +	struct device *dev = core->dev;
> > > > +	int ret;
> > > > +
> > > > +	dev_set_drvdata(dev, core);
> > > > +
> > > > +	core->gcb_regmap = ocelot_devm_regmap_init(core, dev,
> > > > +						   &vsc7512_gcb_resource);
> > > 
> > > This just ends up calling ocelot_spi_devm_get_regmap() right?
> > > 
> > > Why not call that from inside ocelot-spi.c where 'core' was allocated?
> > 
> > core->gcb_regmap doesn't handle anything more than chip reset. This will
> > have to happen regardless of the interface.
> > 
> > The "spi" part uses the core->cpuorg_regmap, which is needed for
> > configuring the SPI bus. In the case of I2C, this cpu_org regmap
> > (likely?) wouldn't be necessary, but the gcb_regmap absolutely would.
> > That's why gcb is allocated in core and cpuorg is allocated in SPI.
> > 
> > The previous RFC had cpuorg_regmap hidden inside a private struct to
> > emphasize this separation. As you pointed out, there was a lot of
> > bouncing between "core" structs and "spi" structs that got ugly.
> > 
> > (Looking at this more now... the value of cpuorg_regmap should have been
> > in the CONFIG_MFD_OCELOT_SPI ifdef, which might have made this
> > distinction more clear)
> 
> The TL;DR of my review point would be to make this as simple as
> possible.  If you can call a single function, instead of needlessly
> sending the thread of execution through three, please do.
> 
> > > > +	if (IS_ERR(core->gcb_regmap))
> > > > +		return -ENOMEM;
> > > > +
> > > > +	ret = ocelot_reset(core);
> > > > +	if (ret) {
> > > > +		dev_err(dev, "Failed to reset device: %d\n", ret);
> > > > +		return ret;
> > > > +	}
> > > > +
> > > > +	/*
> > > > +	 * A chip reset will clear the SPI configuration, so it needs to be done
> > > > +	 * again before we can access any registers
> > > > +	 */
> > > > +	ret = ocelot_spi_initialize(core);
> > > 
> > > Not a fan of calling back into the file which called us.
> > > 
> > > And what happens if SPI isn't controlling us?
> > > 
> > > Doesn't the documentation above say this device can also be
> > > communicated with via I2C and PCIe?
> > 
> > During the last RFC this was done through a callback. You had requested
> > I not use callbacks.
> > 
> > From that exchange:
> > """"
> > > > > +	ret = core->config->init_bus(core->config);
> > > >
> > > > You're not writing a bus.  I doubt you need ops call-backs.
> > >
> > > In the case of SPI, the chip needs to be configured both before and
> > > after reset. It sets up the bus for endianness, padding bytes, etc. This
> > > is currently achieved by running "ocelot_spi_init_bus" once during SPI
> > > probe, and once immediately after chip reset.
> > >
> > > For other control mediums I doubt this is necessary. Perhaps "init_bus"
> > > is a misnomer in this scenario...
> > 
> > Please find a clearer way to do this without function pointers.
> > """"
> 
> Yes, I remember.
> 
> This is an improvement on that, but it doesn't mean it's ideal.
> 
> > The idea is that we set up the SPI bus so we can read registers. The
> > protocol changes based on bus speed, so this is necessary.
> > 
> > This initial setup is done in ocelot-spi.c, before ocelot_core_init is
> > called.
> > 
> > We then reset the chip by writing some registers. This chip reset also
> > clears the SPI configuration, so we need to reconfigure the SPI bus
> > before we can read any additional registers.
> > 
> > Short of using function pointers, I imagine this will have to be
> > something akin to:
> > 
> > if (core->is_spi) {
> >     ocelot_spi_initalize();
> > }
> 
> What about if, instead of calling from SPI into Core, which calls back
> into SPI again, you do this from SPI instead:
> 
> [flippant - I haven't fully assessed the viability of this suggestion]
> 
> foo_type spi_probe(bar_type baz)
> {
>   setup_spi();
> 
>   core_init();
> 
>   more_spi_stuff();
> }
> 
> > I feel if the additional buses are added, they'll have to implement this
> > type of change. But as I don't (and don't plan to) have hardware to
> > build those interfaces out, right now ocelot_core assumes the bus is
> > SPI.
> 
> What are the chances of someone else coming along and implementing the
> other interfaces?  You might very well be over-complicating this
> implementation for support that may never be required.

I had one person email me about this already, though I understand they
went another direction.

But I could see this back-and-forth going away. I'll take another look
at it and try to clean it up a little more.

> 
> > > > +	if (ret) {
> > > > +		dev_err(dev, "Failed to initialize SPI interface: %d\n", ret);
> > > > +		return ret;
> > > > +	}
> > > > +
> > > > +	ret = devm_mfd_add_devices(dev, PLATFORM_DEVID_AUTO, vsc7512_devs,
> > > > +				   ARRAY_SIZE(vsc7512_devs), NULL, 0, NULL);
> > > > +	if (ret) {
> > > > +		dev_err(dev, "Failed to add sub-devices: %d\n", ret);
> > > > +		return ret;
> > > > +	}
> > > > +
> > > > +	return 0;
> > > > +}
> > > > +EXPORT_SYMBOL(ocelot_core_init);
> > > > +
> > > > +MODULE_DESCRIPTION("Externally Controlled Ocelot Chip Driver");
> > > > +MODULE_AUTHOR("Colin Foster <colin.foster@in-advantage.com>");
> > > > +MODULE_LICENSE("GPL v2");
> > > > diff --git a/drivers/mfd/ocelot-spi.c b/drivers/mfd/ocelot-spi.c
> > > > new file mode 100644
> > > > index 000000000000..c788e239c9a7
> > > > --- /dev/null
> > > > +++ b/drivers/mfd/ocelot-spi.c
> > > > @@ -0,0 +1,313 @@
> > > > +// SPDX-License-Identifier: (GPL-2.0 OR MIT)
> > > > +/*
> > > > + * SPI core driver for the Ocelot chip family.
> > > > + *
> > > > + * This driver will handle everything necessary to allow for communication over
> > > > + * SPI to the VSC7511, VSC7512, VSC7513 and VSC7514 chips. The main functions
> > > > + * are to prepare the chip's SPI interface for a specific bus speed, and a host
> > > > + * processor's endianness. This will create and distribute regmaps for any MFD
> > > 
> > > As above, please drop references to MFD.
> > > 
> > > > + * children.
> > > > + *
> > > > + * Copyright 2021 Innovative Advantage Inc.
> > > > + *
> > > > + * Author: Colin Foster <colin.foster@in-advantage.com>
> > > > + */
> > > > +
> > > > +#include <linux/iopoll.h>
> > > > +#include <linux/kconfig.h>
> > > > +#include <linux/module.h>
> > > > +#include <linux/of.h>
> > > > +#include <linux/regmap.h>
> > > > +#include <linux/spi/spi.h>
> > > > +
> > > > +#include <asm/byteorder.h>
> > > > +
> > > > +#include "ocelot.h"
> > > > +
> > > > +#define DEV_CPUORG_IF_CTRL	0x0000
> > > > +#define DEV_CPUORG_IF_CFGSTAT	0x0004
> > > > +
> > > > +#define CFGSTAT_IF_NUM_VCORE	(0 << 24)
> > > > +#define CFGSTAT_IF_NUM_VRAP	(1 << 24)
> > > > +#define CFGSTAT_IF_NUM_SI	(2 << 24)
> > > > +#define CFGSTAT_IF_NUM_MIIM	(3 << 24)
> > > > +
> > > > +
> > > > +static const struct resource vsc7512_dev_cpuorg_resource = {
> > > > +	.start	= 0x71000000,
> > > > +	.end	= 0x710002ff,
> > > 
> > > No magic numbers.  Please define these addresses.
> > 
> > I missed these. Thanks.
> > 
> > > 
> > > > +	.name	= "devcpu_org",
> > > > +};
> > > > +
> > > > +#define VSC7512_BYTE_ORDER_LE 0x00000000
> > > > +#define VSC7512_BYTE_ORDER_BE 0x81818181
> > > > +#define VSC7512_BIT_ORDER_MSB 0x00000000
> > > > +#define VSC7512_BIT_ORDER_LSB 0x42424242
> > > > +
> > > > +int ocelot_spi_initialize(struct ocelot_core *core)
> > > > +{
> > > > +	u32 val, check;
> > > > +	int err;
> > > > +
> > > > +#ifdef __LITTLE_ENDIAN
> > > > +	val = VSC7512_BYTE_ORDER_LE;
> > > > +#else
> > > > +	val = VSC7512_BYTE_ORDER_BE;
> > > > +#endif
> > > 
> > > Not a fan of ifdefery in the middle of C files.
> > > 
> > > Please create a macro or define somewhere.
> > 
> > I'll clear this up in comments and move things around. This macro
> > specifically tends to lend itself to this type of ifdef dropping:
> > 
> > https://elixir.bootlin.com/linux/v5.18-rc2/C/ident/__LITTLE_ENDIAN
> 
> I see that the majority of implementations exist in header files as I
> would expect.  With respect to the others, past acceptance and what is
> acceptable in other subsystems has little bearing on what will be
> accepted here and now.
> 
> > The comment I'm adding is:
> >         /*
> >          * The SPI address must be big-endian, but we want the payload to match
> >          * our CPU. These are two bits (0 and 1) but they're repeated such that
> >          * the write from any configuration will be valid. The four
> >          * configurations are:
> >          *
> >          * 0b00: little-endian, MSB first
> >          * |            111111   | 22221111 | 33222222 |
> >          * | 76543210 | 54321098 | 32109876 | 10987654 |
> >          *
> >          * 0b01: big-endian, MSB first
> >          * | 33222222 | 22221111 | 111111   |          |
> >          * | 10987654 | 32109876 | 54321098 | 76543210 |
> >          *
> >          * 0b10: little-endian, LSB first
> >          * |              111111 | 11112222 | 22222233 |
> >          * | 01234567 | 89012345 | 67890123 | 45678901 |
> >          *
> >          * 0b11: big-endian, LSB first
> >          * | 22222233 | 11112222 |   111111 |          |
> >          * | 45678901 | 67890123 | 89012345 | 01234567 |
> >          */
> > 
> > With this info, would you recommend:
> > 1. A file-scope static const at the top of this file
> > 2. A macro assigned to one of those sequences
> > 3. A function to "detect" which architecture we're running
> 
> I do not have a strong opinion.
> 
> Just tuck the #iferry away somewhere in a header file.

Will do

> 
> > > > +	err = regmap_write(core->cpuorg_regmap, DEV_CPUORG_IF_CTRL, val);
> > > > +	if (err)
> > > > +		return err;
> > > 
> > > Comment.
> > > 
> > > > +	val = core->spi_padding_bytes;
> > > > +	err = regmap_write(core->cpuorg_regmap, DEV_CPUORG_IF_CFGSTAT, val);
> > > > +	if (err)
> > > > +		return err;
> > > 
> > > Comment.
> > 
> > Adding:
> > 
> > /*
> >  * Apply the number of padding bytes between a read request and the data
> >  * payload. Some registers have access times of up to 1us, so if the
> >  * first payload bit is shifted out too quickly, the read will fail.
> >  */
> > 
> > > 
> > > > +	/*
> > > > +	 * After we write the interface configuration, read it back here. This
> > > > +	 * will verify several different things. The first is that the number of
> > > > +	 * padding bytes actually got written correctly. These are found in bits
> > > > +	 * 0:3.
> > > > +	 *
> > > > +	 * The second is that bit 16 is cleared. Bit 16 is IF_CFGSTAT:IF_STAT,
> > > > +	 * and will be set if the register access is too fast. This would be in
> > > > +	 * the condition that the number of padding bytes is insufficient for
> > > > +	 * the SPI bus frequency.
> > > > +	 *
> > > > +	 * The last check is for bits 31:24, which define the interface by which
> > > > +	 * the registers are being accessed. Since we're accessing them via the
> > > > +	 * serial interface, it must return IF_NUM_SI.
> > > > +	 */
> > > > +	check = val | CFGSTAT_IF_NUM_SI;
> > > > +
> > > > +	err = regmap_read(core->cpuorg_regmap, DEV_CPUORG_IF_CFGSTAT, &val);
> > > > +	if (err)
> > > > +		return err;
> > > > +
> > > > +	if (check != val)
> > > > +		return -ENODEV;
> > > > +
> > > > +	return 0;
> > > > +}
> > > > +EXPORT_SYMBOL(ocelot_spi_initialize);
> > > > +
> > > > +/*
> > > > + * The SPI protocol for interfacing with the ocelot chips uses 24 bits, while
> > > > + * the register locations are defined as 32-bit. The least significant two bits
> > > > + * get shifted out, as register accesses must always be word-aligned, leaving
> > > > + * bits 21:0 as the 22-bit address. It must always be big-endian, whereas the
> > > > + * payload can be optimized for bit / byte order to match whatever architecture
> > > > + * the controlling CPU has.
> > > > + */
> > > > +static unsigned int ocelot_spi_translate_address(unsigned int reg)
> > > > +{
> > > > +	return cpu_to_be32((reg & 0xffffff) >> 2);
> > > > +}
> > > > +
> > > > +struct ocelot_spi_regmap_context {
> > > > +	u32 base;
> > > > +	struct ocelot_core *core;
> > > > +};
> > > > +
> > > > +static int ocelot_spi_reg_read(void *context, unsigned int reg,
> > > > +			       unsigned int *val)
> > > > +{
> > > > +	struct ocelot_spi_regmap_context *regmap_context = context;
> > > > +	struct ocelot_core *core = regmap_context->core;
> > > > +	struct spi_transfer tx, padding, rx;
> > > > +	struct spi_message msg;
> > > 
> > > How big are all of these?
> > > 
> > > We will receive warnings if they occupy too much stack space.
> > 
> > Looking at the structs they're on the order of 10s of bytes. Maybe 70
> > bytes per instance (back of napkin calculation)
> > 
> > But it seems very common to stack-allocate spi_transfers:
> > 
> > https://elixir.bootlin.com/linux/v5.18-rc2/source/drivers/spi/spi.c#L4097
> > https://elixir.bootlin.com/linux/v5.18-rc2/source/include/linux/spi/spi.h#L1244
> > 
> > Do you have a feel for at what point that becomes a concern?
> 
> That's fine.  I just want you to bear this in mind.
> 
> I wish to prevent adding yet more W=1 level warnings into the kernel.
> 
> > > > +	struct spi_device *spi;
> > > > +	unsigned int addr;
> > > > +	u8 *tx_buf;
> > > > +
> > > > +	spi = core->spi;
> > > > +
> > > > +	addr = ocelot_spi_translate_address(reg + regmap_context->base);
> > > > +	tx_buf = (u8 *)&addr;
> > > > +
> > > > +	spi_message_init(&msg);
> > > > +
> > > > +	memset(&tx, 0, sizeof(tx));
> > > > +
> > > > +	/* Ignore the first byte for the 24-bit address */
> > > > +	tx.tx_buf = &tx_buf[1];
> > > > +	tx.len = 3;
> > > > +
> > > > +	spi_message_add_tail(&tx, &msg);
> > > > +
> > > > +	if (core->spi_padding_bytes > 0) {
> > > > +		u8 dummy_buf[16] = {0};
> > > > +
> > > > +		memset(&padding, 0, sizeof(padding));
> > > > +
> > > > +		/* Just toggle the clock for padding bytes */
> > > > +		padding.len = core->spi_padding_bytes;
> > > > +		padding.tx_buf = dummy_buf;
> > > > +		padding.dummy_data = 1;
> > > > +
> > > > +		spi_message_add_tail(&padding, &msg);
> > > > +	}
> > > > +
> > > > +	memset(&rx, 0, sizeof(rx));
> > > > +	rx.rx_buf = val;
> > > > +	rx.len = 4;
> > > > +
> > > > +	spi_message_add_tail(&rx, &msg);
> > > > +
> > > > +	return spi_sync(spi, &msg);
> > > > +}
> > > > +
> > > > +static int ocelot_spi_reg_write(void *context, unsigned int reg,
> > > > +				unsigned int val)
> > > > +{
> > > > +	struct ocelot_spi_regmap_context *regmap_context = context;
> > > > +	struct ocelot_core *core = regmap_context->core;
> > > > +	struct spi_transfer tx[2] = {0};
> > > > +	struct spi_message msg;
> > > > +	struct spi_device *spi;
> > > > +	unsigned int addr;
> > > > +	u8 *tx_buf;
> > > > +
> > > > +	spi = core->spi;
> > > > +
> > > > +	addr = ocelot_spi_translate_address(reg + regmap_context->base);
> > > > +	tx_buf = (u8 *)&addr;
> > > > +
> > > > +	spi_message_init(&msg);
> > > > +
> > > > +	/* Ignore the first byte for the 24-bit address and set the write bit */
> > > > +	tx_buf[1] |= BIT(7);
> > > > +	tx[0].tx_buf = &tx_buf[1];
> > > > +	tx[0].len = 3;
> > > > +
> > > > +	spi_message_add_tail(&tx[0], &msg);
> > > > +
> > > > +	memset(&tx[1], 0, sizeof(struct spi_transfer));
> > > > +	tx[1].tx_buf = &val;
> > > > +	tx[1].len = 4;
> > > > +
> > > > +	spi_message_add_tail(&tx[1], &msg);
> > > > +
> > > > +	return spi_sync(spi, &msg);
> > > > +}
> > > > +
> > > > +static const struct regmap_config ocelot_spi_regmap_config = {
> > > > +	.reg_bits = 24,
> > > > +	.reg_stride = 4,
> > > > +	.val_bits = 32,
> > > > +
> > > > +	.reg_read = ocelot_spi_reg_read,
> > > > +	.reg_write = ocelot_spi_reg_write,
> > > > +
> > > > +	.max_register = 0xffffffff,
> > > > +	.use_single_write = true,
> > > > +	.use_single_read = true,
> > > > +	.can_multi_write = false,
> > > > +
> > > > +	.reg_format_endian = REGMAP_ENDIAN_BIG,
> > > > +	.val_format_endian = REGMAP_ENDIAN_NATIVE,
> > > > +};
> > > > +
> > > > +struct regmap *
> > > > +ocelot_spi_devm_get_regmap(struct ocelot_core *core, struct device *child,
> > > > +			   const struct resource *res)
> > > 
> > > This seems to always initialise a new Regmap.
> > > 
> > > To me 'get' implies that it could fetch an already existing one.
> > > 
> > > ... and *perhaps* init a new one if none exists..
> > 
> > That's exactly what my intention was when I started.
> > 
> > But it seems like *if* that is something that is required, it should be
> > done through a syscon / device tree implementation and not be snuck into
> > this regmap getter. I was trying to do too much.
> > 
> > I'm renaming to "init"
> > 
> > > 
> > > > +{
> > > > +	struct ocelot_spi_regmap_context *context;
> > > > +	struct regmap_config regmap_config;
> > > > +
> > > > +	context = devm_kzalloc(child, sizeof(*context), GFP_KERNEL);
> > > > +	if (IS_ERR(context))
> > > > +		return ERR_CAST(context);
> > > > +
> > > > +	context->base = res->start;
> > > > +	context->core = core;
> > > > +
> > > > +	memcpy(&regmap_config, &ocelot_spi_regmap_config,
> > > > +	       sizeof(ocelot_spi_regmap_config));
> > > > +
> > > > +	regmap_config.name = res->name;
> > > > +	regmap_config.max_register = res->end - res->start;
> > > > +
> > > > +	return devm_regmap_init(child, NULL, context, &regmap_config);
> > > > +}
> > > > +
> > > > +static int ocelot_spi_probe(struct spi_device *spi)
> > > > +{
> > > > +	struct device *dev = &spi->dev;
> > > > +	struct ocelot_core *core;
> > > 
> > > This would be more in keeping with current drivers if you dropped the
> > > '_core' part of the struct name and called the variable ddata.
> > 
> > There's already a "struct ocelot" defined in include/soc/mscc/ocelot.h.
> > I suppose it could be renamed to align with what it actually is: the
> > "switch" component of the ocelot chip.
> > 
> > Vladimir, Alexandre, Horaitu, others:
> > Any opinions about this becoming "struct ocelot" and the current struct
> > being "struct ocelot_switch"?
> > 
> > Or maybe a technical / philosophical question: is "ocelot" the switch
> > core that can be implemented in other hardware? Or is it the chip family
> > entirely, (pinctrl, sgpio, etc.) who's switch core was brought into
> > other products?
> > 
> > The existing struct change would hit about 30 files.
> > https://elixir.bootlin.com/linux/v5.18-rc2/C/ident/ocelot
> 
> That's not ideal.
> 
> Please consider using 'ocelot_ddata' for now and consider a larger
> overhaul at a later date, if it makes sense to do so.
> 
> [...]
> 
> -- 
> Lee Jones [李琼斯]
> Principal Technical Lead - Developer Services
> Linaro.org │ Open source software for Arm SoCs
> Follow Linaro: Facebook | Twitter | Blog

^ permalink raw reply

* Re: [PATCH RFC 08/15] SUNRPC: Add RPC_TASK_CORK flag
From: Trond Myklebust @ 2022-04-19 19:04 UTC (permalink / raw)
  To: chuck.lever@oracle.com
  Cc: linux-cifs@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	linux-nvme@lists.infradead.org, netdev@vger.kernel.org,
	simo@redhat.com, ak@tempesta-tech.com, linux-nfs@vger.kernel.org,
	borisp@nvidia.com
In-Reply-To: <AE1190F4-EDE4-4C2D-94C9-02A5EDAAFBC6@oracle.com>

On Tue, 2022-04-19 at 18:16 +0000, Chuck Lever III wrote:
> 
> 
> > On Apr 18, 2022, at 10:57 PM, Trond Myklebust
> > <trondmy@hammerspace.com> wrote:
> > 
> > On Mon, 2022-04-18 at 12:52 -0400, Chuck Lever wrote:
> > > Introduce a mechanism to cause xprt_transmit() to break out of
> > > its
> > > sending loop at a specific rpc_rqst, rather than draining the
> > > whole
> > > transmit queue.
> > > 
> > > This enables the client to send just an RPC TLS probe and then
> > > wait
> > > for the response before proceeding with the rest of the queue.
> > > 
> > > Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
> > > ---
> > >  include/linux/sunrpc/sched.h  |    2 ++
> > >  include/trace/events/sunrpc.h |    1 +
> > >  net/sunrpc/xprt.c             |    2 ++
> > >  3 files changed, 5 insertions(+)
> > > 
> > > diff --git a/include/linux/sunrpc/sched.h
> > > b/include/linux/sunrpc/sched.h
> > > index 599133fb3c63..f8c09638fa69 100644
> > > --- a/include/linux/sunrpc/sched.h
> > > +++ b/include/linux/sunrpc/sched.h
> > > @@ -125,6 +125,7 @@ struct rpc_task_setup {
> > >  #define RPC_TASK_TLSCRED               0x00000008      /* Use
> > > AUTH_TLS credential */
> > >  #define RPC_TASK_NULLCREDS             0x00000010      /* Use
> > > AUTH_NULL credential */
> > >  #define RPC_CALL_MAJORSEEN             0x00000020      /* major
> > > timeout seen */
> > > +#define RPC_TASK_CORK                  0x00000040      /* cork
> > > the
> > > xmit queue */
> > >  #define RPC_TASK_DYNAMIC               0x00000080      /* task
> > > was
> > > kmalloc'ed */
> > >  #define        RPC_TASK_NO_ROUND_ROBIN         0x00000100     
> > > /*
> > > send requests on "main" xprt */
> > >  #define RPC_TASK_SOFT                  0x00000200      /* Use
> > > soft
> > > timeouts */
> > > @@ -137,6 +138,7 @@ struct rpc_task_setup {
> > >  
> > >  #define RPC_IS_ASYNC(t)                ((t)->tk_flags &
> > > RPC_TASK_ASYNC)
> > >  #define RPC_IS_SWAPPER(t)      ((t)->tk_flags &
> > > RPC_TASK_SWAPPER)
> > > +#define RPC_IS_CORK(t)         ((t)->tk_flags & RPC_TASK_CORK)
> > >  #define RPC_IS_SOFT(t)         ((t)->tk_flags &
> > > (RPC_TASK_SOFT|RPC_TASK_TIMEOUT))
> > >  #define RPC_IS_SOFTCONN(t)     ((t)->tk_flags &
> > > RPC_TASK_SOFTCONN)
> > >  #define RPC_WAS_SENT(t)                ((t)->tk_flags &
> > > RPC_TASK_SENT)
> > > diff --git a/include/trace/events/sunrpc.h
> > > b/include/trace/events/sunrpc.h
> > > index 811187c47ebb..e8d6adff1a50 100644
> > > --- a/include/trace/events/sunrpc.h
> > > +++ b/include/trace/events/sunrpc.h
> > > @@ -312,6 +312,7 @@ TRACE_EVENT(rpc_request,
> > >                 { RPC_TASK_TLSCRED, "TLSCRED"
> > > },                        \
> > >                 { RPC_TASK_NULLCREDS, "NULLCREDS"
> > > },                    \
> > >                 { RPC_CALL_MAJORSEEN, "MAJORSEEN"
> > > },                    \
> > > +               { RPC_TASK_CORK, "CORK"
> > > },                              \
> > >                 { RPC_TASK_DYNAMIC, "DYNAMIC"
> > > },                        \
> > >                 { RPC_TASK_NO_ROUND_ROBIN, "NO_ROUND_ROBIN"
> > > },          \
> > >                 { RPC_TASK_SOFT, "SOFT"
> > > },                              \
> > > diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
> > > index 86d62cffba0d..4b303b945b51 100644
> > > --- a/net/sunrpc/xprt.c
> > > +++ b/net/sunrpc/xprt.c
> > > @@ -1622,6 +1622,8 @@ xprt_transmit(struct rpc_task *task)
> > >                 if (xprt_request_data_received(task) &&
> > >                     !test_bit(RPC_TASK_NEED_XMIT, &task-
> > > > tk_runstate))
> > >                         break;
> > > +               if (RPC_IS_CORK(task))
> > > +                       break;
> > >                 cond_resched_lock(&xprt->queue_lock);
> > >         }
> > >         spin_unlock(&xprt->queue_lock);
> > > 
> > > 
> > 
> > This is entirely the wrong place for this kind of control
> > mechanism.
> 
> I'm not sure I entirely understand your concern, so bear with
> me while I try to clarify.
> 
> 
> > TLS vs not-TLS needs to be decided up front when we initialise the
> > transport (i.e. at mount time or whenever the pNFS channels are set
> > up). Otherwise, we're vulnerable to downgrade attacks.
> 
> Downgrade attacks are prevented by using "xprtsec=tls" because
> in that case, transport creation fails if either the AUTH_TLS
> fails or the handshake fails.
> 
> The TCP connection has to be established first, though. Then the
> client can send the RPC_AUTH_TLS probe, which is the same as the
> NULL ping that it already sends. That mechanism is independent
> of the lower layer transport (TCP in this case).
> 
> Therefore, RPC traffic must be stoppered while the client:
> 
> 1. waits for the AUTH_TLS probe's reply, and
> 
> 2. waits for the handshake to complete
> 
> Because an RPC message is involved in this interaction, I didn't
> see a way to implement it completely within xprtsock's TCP
> connection logic. IMO, driving the handshake has to be done by
> the generic RPC client.
> 
> So, do you mean that I need to replace RPC_TASK_CORK with a
> special return code from xs_tcp_send_request() ?


I mean the right mechanism for controlling whether or not the transport
is ready to serve RPC requests is through the XPRT_CONNECTED flag. All
the existing generic RPC error handling, congestion handling, etc
depends on that flag being set correctly.

Until the TLS socket has completed its handshake protocol and is ready
to transmit data, it should not be declared connected. The distinction
between the two states 'TCP is unconnected' and 'TLS handshake is
incomplete' is a socket/transport setup detail as far as the RPC xprt
layer is concerned: just another set of intermediate states between
SYN_SENT and ESTABLISHED.

> > Once we've decided that TLS is the right thing to do, then we
> > shouldn't
> > declare to the RPC layer that the TLS-enabled transport is
> > connected
> > until the underlying transport connection is established, and the
> > TLS
> > handshake is done.
> 
> That logic is handled in patch 10/15.
> 
> Reconnecting and re-establishing a TLS session is handled in
> patches 11/15 and 12/15. Again, if the transport's policy setting
> is "must use TLS" then the client ensures that a TLS session is in
> use before allowing more RPC traffic on the new connection.
> 
> 
> --
> Chuck Lever
> 
> 
> 

-- 
Trond Myklebust
CTO, Hammerspace Inc
4984 El Camino Real, Suite 208
Los Altos, CA 94022
​
www.hammer.space


^ permalink raw reply

* [PATCH bpf-next v5 8/8] selftests/bpf: verify lsm_cgroup struct sock access
From: Stanislav Fomichev @ 2022-04-19 19:00 UTC (permalink / raw)
  To: netdev, bpf; +Cc: ast, daniel, andrii, Stanislav Fomichev
In-Reply-To: <20220419190053.3395240-1-sdf@google.com>

sk_priority & sk_mark are writable, the rest is readonly.

Add new ldx_offset fixups to lookup the offset of struct field.
Allow using test.kfunc regardless of prog_type.

One interesting thing here is that the verifier doesn't
really force me to add NULL checks anywhere :-/

Signed-off-by: Stanislav Fomichev <sdf@google.com>
---
 tools/testing/selftests/bpf/test_verifier.c   | 54 ++++++++++++++++++-
 .../selftests/bpf/verifier/lsm_cgroup.c       | 34 ++++++++++++
 2 files changed, 87 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/bpf/verifier/lsm_cgroup.c

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index a2cd236c32eb..d6bc55c54aaa 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -75,6 +75,12 @@ struct kfunc_btf_id_pair {
 	int insn_idx;
 };
 
+struct ldx_offset {
+	const char *strct;
+	const char *field;
+	int insn_idx;
+};
+
 struct bpf_test {
 	const char *descr;
 	struct bpf_insn	insns[MAX_INSNS];
@@ -102,6 +108,7 @@ struct bpf_test {
 	int fixup_map_ringbuf[MAX_FIXUPS];
 	int fixup_map_timer[MAX_FIXUPS];
 	struct kfunc_btf_id_pair fixup_kfunc_btf_id[MAX_FIXUPS];
+	struct ldx_offset fixup_ldx[MAX_FIXUPS];
 	/* Expected verifier log output for result REJECT or VERBOSE_ACCEPT.
 	 * Can be a tab-separated sequence of expected strings. An empty string
 	 * means no log verification.
@@ -755,6 +762,7 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
 	int *fixup_map_ringbuf = test->fixup_map_ringbuf;
 	int *fixup_map_timer = test->fixup_map_timer;
 	struct kfunc_btf_id_pair *fixup_kfunc_btf_id = test->fixup_kfunc_btf_id;
+	struct ldx_offset *fixup_ldx = test->fixup_ldx;
 
 	if (test->fill_helper) {
 		test->fill_insns = calloc(MAX_TEST_INSNS, sizeof(struct bpf_insn));
@@ -967,6 +975,50 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
 			fixup_kfunc_btf_id++;
 		} while (fixup_kfunc_btf_id->kfunc);
 	}
+
+	if (fixup_ldx->strct) {
+		const struct btf_member *memb;
+		const struct btf_type *tp;
+		const char *name;
+		struct btf *btf;
+		int btf_id;
+		int off;
+		int i;
+
+		btf = btf__load_vmlinux_btf();
+
+		do {
+			off = -1;
+			if (!btf)
+				goto next_ldx;
+
+			btf_id = btf__find_by_name_kind(btf,
+							fixup_ldx->strct,
+							BTF_KIND_STRUCT);
+			if (btf_id < 0)
+				goto next_ldx;
+
+			tp = btf__type_by_id(btf, btf_id);
+			memb = btf_members(tp);
+
+			for (i = 0; i < btf_vlen(tp); i++) {
+				name = btf__name_by_offset(btf,
+							   memb->name_off);
+				if (strcmp(fixup_ldx->field, name) == 0) {
+					off = memb->offset / 8;
+					break;
+				}
+				memb++;
+			}
+
+next_ldx:
+			prog[fixup_ldx->insn_idx].off = off;
+			fixup_ldx++;
+
+		} while (fixup_ldx->strct);
+
+		btf__free(btf);
+	}
 }
 
 struct libcap {
@@ -1131,7 +1183,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 		opts.log_level = 4;
 	opts.prog_flags = pflags;
 
-	if (prog_type == BPF_PROG_TYPE_TRACING && test->kfunc) {
+	if (test->kfunc) {
 		int attach_btf_id;
 
 		attach_btf_id = libbpf_find_vmlinux_btf_id(test->kfunc,
diff --git a/tools/testing/selftests/bpf/verifier/lsm_cgroup.c b/tools/testing/selftests/bpf/verifier/lsm_cgroup.c
new file mode 100644
index 000000000000..af0efe783511
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/lsm_cgroup.c
@@ -0,0 +1,34 @@
+#define SK_WRITABLE_FIELD(tp, field, size, res) \
+{ \
+	.descr = field, \
+	.insns = { \
+		/* r1 = *(u64 *)(r1 + 0) */ \
+		BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0), \
+		/* r1 = *(u64 *)(r1 + offsetof(struct socket, sk)) */ \
+		BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0), \
+		/* r2 = *(u64 *)(r1 + offsetof(struct sock, <field>)) */ \
+		BPF_LDX_MEM(size, BPF_REG_2, BPF_REG_1, 0), \
+		/* *(u64 *)(r1 + offsetof(struct sock, <field>)) = r2 */ \
+		BPF_STX_MEM(size, BPF_REG_1, BPF_REG_2, 0), \
+		BPF_MOV64_IMM(BPF_REG_0, 1), \
+		BPF_EXIT_INSN(), \
+	}, \
+	.result = res, \
+	.errstr = res ? "no write support to 'struct sock' at off" : "", \
+	.prog_type = BPF_PROG_TYPE_LSM, \
+	.expected_attach_type = BPF_LSM_CGROUP, \
+	.kfunc = "socket_post_create", \
+	.fixup_ldx = { \
+		{ "socket", "sk", 1 }, \
+		{ tp, field, 2 }, \
+		{ tp, field, 3 }, \
+	}, \
+}
+
+SK_WRITABLE_FIELD("sock_common", "skc_family", BPF_H, REJECT),
+SK_WRITABLE_FIELD("sock", "sk_sndtimeo", BPF_DW, REJECT),
+SK_WRITABLE_FIELD("sock", "sk_priority", BPF_W, ACCEPT),
+SK_WRITABLE_FIELD("sock", "sk_mark", BPF_W, ACCEPT),
+SK_WRITABLE_FIELD("sock", "sk_pacing_rate", BPF_DW, REJECT),
+
+#undef SK_WRITABLE_FIELD
-- 
2.36.0.rc0.470.gd361397f0d-goog


^ permalink raw reply related

* [PATCH bpf-next v5 7/8] selftests/bpf: lsm_cgroup functional test
From: Stanislav Fomichev @ 2022-04-19 19:00 UTC (permalink / raw)
  To: netdev, bpf; +Cc: ast, daniel, andrii, Stanislav Fomichev
In-Reply-To: <20220419190053.3395240-1-sdf@google.com>

Functional test that exercises the following:

1. apply default sk_priority policy
2. permit TX-only AF_PACKET socket
3. cgroup attach/detach/replace
4. reusing trampoline shim

Signed-off-by: Stanislav Fomichev <sdf@google.com>
---
 .../selftests/bpf/prog_tests/lsm_cgroup.c     | 213 ++++++++++++++++++
 .../testing/selftests/bpf/progs/lsm_cgroup.c  | 126 +++++++++++
 2 files changed, 339 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c
 create mode 100644 tools/testing/selftests/bpf/progs/lsm_cgroup.c

diff --git a/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c b/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c
new file mode 100644
index 000000000000..fbd6ed29f7d4
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c
@@ -0,0 +1,213 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <test_progs.h>
+
+#include "lsm_cgroup.skel.h"
+#include "cgroup_helpers.h"
+#include "network_helpers.h"
+
+void test_lsm_cgroup(void)
+{
+	DECLARE_LIBBPF_OPTS(bpf_prog_attach_opts, attach_opts);
+	DECLARE_LIBBPF_OPTS(bpf_link_update_opts, update_opts);
+	int cgroup_fd, cgroup_fd2, err, fd, prio;
+	int listen_fd, client_fd, accepted_fd;
+	struct lsm_cgroup *skel = NULL;
+	int post_create_prog_fd2 = -1;
+	int post_create_prog_fd = -1;
+	int bind_link_fd2 = -1;
+	int bind_prog_fd2 = -1;
+	int alloc_prog_fd = -1;
+	int bind_prog_fd = -1;
+	int bind_link_fd = -1;
+	int clone_prog_fd = -1;
+	socklen_t socklen;
+
+	cgroup_fd = test__join_cgroup("/sock_policy");
+	if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup"))
+		goto close_skel;
+
+	cgroup_fd2 = create_and_get_cgroup("/sock_policy2");
+	if (!ASSERT_GE(cgroup_fd2, 0, "create second cgroup"))
+		goto close_skel;
+
+	skel = lsm_cgroup__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "open_and_load"))
+		goto close_cgroup;
+
+	post_create_prog_fd = bpf_program__fd(skel->progs.socket_post_create);
+	post_create_prog_fd2 = bpf_program__fd(skel->progs.socket_post_create2);
+	bind_prog_fd = bpf_program__fd(skel->progs.socket_bind);
+	bind_prog_fd2 = bpf_program__fd(skel->progs.socket_bind2);
+	alloc_prog_fd = bpf_program__fd(skel->progs.socket_alloc);
+	clone_prog_fd = bpf_program__fd(skel->progs.socket_clone);
+
+	err = bpf_prog_attach(alloc_prog_fd, cgroup_fd, BPF_LSM_CGROUP, 0);
+	if (!ASSERT_OK(err, "attach alloc_prog_fd"))
+		goto detach_cgroup;
+
+	err = bpf_prog_attach(clone_prog_fd, cgroup_fd, BPF_LSM_CGROUP, 0);
+	if (!ASSERT_OK(err, "attach clone_prog_fd"))
+		goto detach_cgroup;
+
+	/* Make sure replacing works.
+	 */
+
+	err = bpf_prog_attach(post_create_prog_fd, cgroup_fd,
+			      BPF_LSM_CGROUP, 0);
+	if (!ASSERT_OK(err, "attach post_create_prog_fd"))
+		goto close_cgroup;
+
+	attach_opts.replace_prog_fd = post_create_prog_fd;
+	err = bpf_prog_attach_opts(post_create_prog_fd2, cgroup_fd,
+				   BPF_LSM_CGROUP, &attach_opts);
+	if (!ASSERT_OK(err, "prog replace post_create_prog_fd"))
+		goto detach_cgroup;
+
+	/* Try the same attach/replace via link API.
+	 */
+
+	bind_link_fd = bpf_link_create(bind_prog_fd, cgroup_fd,
+				       BPF_LSM_CGROUP, NULL);
+	if (!ASSERT_GE(bind_link_fd, 0, "link create bind_prog_fd"))
+		goto detach_cgroup;
+
+	update_opts.old_prog_fd = bind_prog_fd;
+	update_opts.flags = BPF_F_REPLACE;
+
+	err = bpf_link_update(bind_link_fd, bind_prog_fd2, &update_opts);
+	if (!ASSERT_OK(err, "link update bind_prog_fd"))
+		goto detach_cgroup;
+
+	/* Attach another instance of bind program to another cgroup.
+	 * This should trigger the reuse of the trampoline shim (two
+	 * programs attaching to the same btf_id).
+	 */
+
+	bind_link_fd2 = bpf_link_create(bind_prog_fd2, cgroup_fd2,
+					BPF_LSM_CGROUP, NULL);
+	if (!ASSERT_GE(bind_link_fd2, 0, "link create bind_prog_fd2"))
+		goto detach_cgroup;
+
+	/* AF_UNIX is prohibited.
+	 */
+
+	fd = socket(AF_UNIX, SOCK_STREAM, 0);
+	ASSERT_LT(fd, 0, "socket(AF_UNIX)");
+
+	/* AF_INET6 gets default policy (sk_priority).
+	 */
+
+	fd = socket(AF_INET6, SOCK_STREAM, 0);
+	if (!ASSERT_GE(fd, 0, "socket(SOCK_STREAM)"))
+		goto detach_cgroup;
+
+	prio = 0;
+	socklen = sizeof(prio);
+	ASSERT_GE(getsockopt(fd, SOL_SOCKET, SO_PRIORITY, &prio, &socklen), 0,
+		  "getsockopt");
+	ASSERT_EQ(prio, 123, "sk_priority");
+
+	close(fd);
+
+	/* TX-only AF_PACKET is allowed.
+	 */
+
+	ASSERT_LT(socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL)), 0,
+		  "socket(AF_PACKET, ..., ETH_P_ALL)");
+
+	fd = socket(AF_PACKET, SOCK_RAW, 0);
+	ASSERT_GE(fd, 0, "socket(AF_PACKET, ..., 0)");
+
+	/* TX-only AF_PACKET can not be rebound.
+	 */
+
+	struct sockaddr_ll sa = {
+		.sll_family = AF_PACKET,
+		.sll_protocol = htons(ETH_P_ALL),
+	};
+	ASSERT_LT(bind(fd, (struct sockaddr *)&sa, sizeof(sa)), 0,
+		  "bind(ETH_P_ALL)");
+
+	close(fd);
+
+	/* Trigger passive open.
+	 */
+
+	listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
+	ASSERT_GE(listen_fd, 0, "start_server");
+	client_fd = connect_to_fd(listen_fd, 0);
+	ASSERT_GE(client_fd, 0, "connect_to_fd");
+	accepted_fd = accept(listen_fd, NULL, NULL);
+	ASSERT_GE(accepted_fd, 0, "accept");
+
+	prio = 0;
+	socklen = sizeof(prio);
+	ASSERT_GE(getsockopt(accepted_fd, SOL_SOCKET, SO_PRIORITY, &prio, &socklen), 0,
+		  "getsockopt");
+	ASSERT_EQ(prio, 234, "sk_priority");
+
+	/* These are replaced and never called. */
+	ASSERT_EQ(skel->bss->called_socket_post_create, 0, "called_create");
+	ASSERT_EQ(skel->bss->called_socket_bind, 0, "called_bind");
+
+	/* AF_INET6+SOCK_STREAM
+	 * AF_PACKET+SOCK_RAW
+	 * listen_fd
+	 * client_fd
+	 * accepted_fd
+	 */
+	ASSERT_EQ(skel->bss->called_socket_post_create2, 5, "called_create2");
+
+	/* start_server
+	 * bind(ETH_P_ALL)
+	 */
+	ASSERT_EQ(skel->bss->called_socket_bind2, 2, "called_bind2");
+	/* Single accept(). */
+	ASSERT_EQ(skel->bss->called_socket_clone, 1, "called_clone");
+
+	/* AF_UNIX+SOCK_STREAM (failed)
+	 * AF_INET6+SOCK_STREAM
+	 * AF_PACKET+SOCK_RAW (failed)
+	 * AF_PACKET+SOCK_RAW
+	 * listen_fd
+	 * client_fd
+	 * accepted_fd
+	 */
+	ASSERT_EQ(skel->bss->called_socket_alloc, 7, "called_alloc");
+
+	/* Make sure other cgroup doesn't trigger the programs.
+	 */
+
+	if (!ASSERT_OK(join_cgroup(""), "join root cgroup"))
+		goto detach_cgroup;
+
+	fd = socket(AF_INET6, SOCK_STREAM, 0);
+	if (!ASSERT_GE(fd, 0, "socket(SOCK_STREAM)"))
+		goto detach_cgroup;
+
+	prio = 0;
+	socklen = sizeof(prio);
+	ASSERT_GE(getsockopt(fd, SOL_SOCKET, SO_PRIORITY, &prio, &socklen), 0,
+		  "getsockopt");
+	ASSERT_EQ(prio, 0, "sk_priority");
+
+	close(fd);
+
+detach_cgroup:
+	ASSERT_GE(bpf_prog_detach2(post_create_prog_fd2, cgroup_fd,
+				   BPF_LSM_CGROUP), 0, "detach_create");
+	close(bind_link_fd);
+	/* Don't close bind_link_fd2, exercise cgroup release cleanup. */
+	ASSERT_GE(bpf_prog_detach2(alloc_prog_fd, cgroup_fd,
+				   BPF_LSM_CGROUP), 0, "detach_alloc");
+	ASSERT_GE(bpf_prog_detach2(clone_prog_fd, cgroup_fd,
+				   BPF_LSM_CGROUP), 0, "detach_clone");
+
+close_cgroup:
+	close(cgroup_fd);
+close_skel:
+	lsm_cgroup__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/lsm_cgroup.c b/tools/testing/selftests/bpf/progs/lsm_cgroup.c
new file mode 100644
index 000000000000..70f4ffd90d49
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/lsm_cgroup.c
@@ -0,0 +1,126 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+#ifndef AF_PACKET
+#define AF_PACKET 17
+#endif
+
+#ifndef AF_UNIX
+#define AF_UNIX 1
+#endif
+
+#ifndef EPERM
+#define EPERM 1
+#endif
+
+int called_socket_post_create;
+int called_socket_post_create2;
+int called_socket_bind;
+int called_socket_bind2;
+int called_socket_alloc;
+int called_socket_clone;
+
+static __always_inline int real_create(struct socket *sock, int family,
+				       int protocol)
+{
+	struct sock *sk;
+
+	/* Reject non-tx-only AF_PACKET.
+	 */
+	if (family == AF_PACKET && protocol != 0)
+		return 0; /* EPERM */
+
+	sk = sock->sk;
+	if (!sk)
+		return 1;
+
+	/* The rest of the sockets get default policy.
+	 */
+	sk->sk_priority = 123;
+	return 1;
+}
+
+/* __cgroup_bpf_run_lsm_socket */
+SEC("lsm_cgroup/socket_post_create")
+int BPF_PROG(socket_post_create, struct socket *sock, int family,
+	     int type, int protocol, int kern)
+{
+	called_socket_post_create++;
+	return real_create(sock, family, protocol);
+}
+
+/* __cgroup_bpf_run_lsm_socket */
+SEC("lsm_cgroup/socket_post_create")
+int BPF_PROG(socket_post_create2, struct socket *sock, int family,
+	     int type, int protocol, int kern)
+{
+	called_socket_post_create2++;
+	return real_create(sock, family, protocol);
+}
+
+static __always_inline int real_bind(struct socket *sock,
+				     struct sockaddr *address,
+				     int addrlen)
+{
+	struct sockaddr_ll sa = {};
+
+	if (sock->sk->__sk_common.skc_family != AF_PACKET)
+		return 1;
+
+	if (sock->sk->sk_kern_sock)
+		return 1;
+
+	bpf_probe_read_kernel(&sa, sizeof(sa), address);
+	if (sa.sll_protocol)
+		return 0; /* EPERM */
+
+	return 1;
+}
+
+/* __cgroup_bpf_run_lsm_socket */
+SEC("lsm_cgroup/socket_bind")
+int BPF_PROG(socket_bind, struct socket *sock, struct sockaddr *address,
+	     int addrlen)
+{
+	called_socket_bind++;
+	return real_bind(sock, address, addrlen);
+}
+
+/* __cgroup_bpf_run_lsm_socket */
+SEC("lsm_cgroup/socket_bind")
+int BPF_PROG(socket_bind2, struct socket *sock, struct sockaddr *address,
+	     int addrlen)
+{
+	called_socket_bind2++;
+	return real_bind(sock, address, addrlen);
+}
+
+/* __cgroup_bpf_run_lsm_current (via bpf_lsm_current_hooks) */
+SEC("lsm_cgroup/sk_alloc_security")
+int BPF_PROG(socket_alloc, struct sock *sk, int family, gfp_t priority)
+{
+	called_socket_alloc++;
+	if (family == AF_UNIX)
+		return 0; /* EPERM */
+	return 1;
+}
+
+/* __cgroup_bpf_run_lsm_sock */
+SEC("lsm_cgroup/inet_csk_clone")
+int BPF_PROG(socket_clone, struct sock *newsk, const struct request_sock *req)
+{
+	called_socket_clone++;
+
+	if (!newsk)
+		return 1;
+
+	/* Accepted request sockets get a different priority.
+	 */
+	newsk->sk_priority = 234;
+	return 1;
+}
-- 
2.36.0.rc0.470.gd361397f0d-goog


^ permalink raw reply related

* [PATCH bpf-next v5 6/8] libbpf: add lsm_cgoup_sock type
From: Stanislav Fomichev @ 2022-04-19 19:00 UTC (permalink / raw)
  To: netdev, bpf; +Cc: ast, daniel, andrii, Stanislav Fomichev
In-Reply-To: <20220419190053.3395240-1-sdf@google.com>

lsm_cgroup/ is the prefix for BPF_LSM_CGROUP.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
---
 tools/bpf/bpftool/common.c | 1 +
 tools/lib/bpf/libbpf.c     | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index c740142c24d8..a7a913784c47 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -66,6 +66,7 @@ const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE] = {
 	[BPF_TRACE_FEXIT]		= "fexit",
 	[BPF_MODIFY_RETURN]		= "mod_ret",
 	[BPF_LSM_MAC]			= "lsm_mac",
+	[BPF_LSM_CGROUP]		= "lsm_cgroup",
 	[BPF_SK_LOOKUP]			= "sk_lookup",
 	[BPF_TRACE_ITER]		= "trace_iter",
 	[BPF_XDP_DEVMAP]		= "xdp_devmap",
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index bf4f7ac54ebf..912cfa13814d 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -8701,6 +8701,7 @@ static const struct bpf_sec_def section_defs[] = {
 	SEC_DEF("freplace/",		EXT, 0, SEC_ATTACH_BTF, attach_trace),
 	SEC_DEF("lsm/",			LSM, BPF_LSM_MAC, SEC_ATTACH_BTF, attach_lsm),
 	SEC_DEF("lsm.s/",		LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm),
+	SEC_DEF("lsm_cgroup/",	LSM, BPF_LSM_CGROUP, SEC_ATTACH_BTF),
 	SEC_DEF("iter/",		TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter),
 	SEC_DEF("iter.s/",		TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_iter),
 	SEC_DEF("syscall",		SYSCALL, 0, SEC_SLEEPABLE),
@@ -9122,6 +9123,7 @@ void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type,
 		*kind = BTF_KIND_TYPEDEF;
 		break;
 	case BPF_LSM_MAC:
+	case BPF_LSM_CGROUP:
 		*prefix = BTF_LSM_PREFIX;
 		*kind = BTF_KIND_FUNC;
 		break;
-- 
2.36.0.rc0.470.gd361397f0d-goog


^ permalink raw reply related

* [PATCH bpf-next v5 5/8] bpf: allow writing to a subset of sock fields from lsm progtype
From: Stanislav Fomichev @ 2022-04-19 19:00 UTC (permalink / raw)
  To: netdev, bpf; +Cc: ast, daniel, andrii, Stanislav Fomichev
In-Reply-To: <20220419190053.3395240-1-sdf@google.com>

For now, allow only the obvious ones, like sk_priority and sk_mark.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
---
 kernel/bpf/bpf_lsm.c  | 58 +++++++++++++++++++++++++++++++++++++++++++
 kernel/bpf/verifier.c |  3 ++-
 2 files changed, 60 insertions(+), 1 deletion(-)

diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c
index 7f8a631da90f..d49663685946 100644
--- a/kernel/bpf/bpf_lsm.c
+++ b/kernel/bpf/bpf_lsm.c
@@ -301,7 +301,65 @@ bool bpf_lsm_is_sleepable_hook(u32 btf_id)
 const struct bpf_prog_ops lsm_prog_ops = {
 };
 
+static int lsm_btf_struct_access(struct bpf_verifier_log *log,
+					const struct btf *btf,
+					const struct btf_type *t, int off,
+					int size, enum bpf_access_type atype,
+					u32 *next_btf_id,
+					enum bpf_type_flag *flag)
+{
+	const struct btf_type *sock_type;
+	struct btf *btf_vmlinux;
+	s32 type_id;
+	size_t end;
+
+	if (atype == BPF_READ)
+		return btf_struct_access(log, btf, t, off, size, atype, next_btf_id,
+					 flag);
+
+	btf_vmlinux = bpf_get_btf_vmlinux();
+	if (!btf_vmlinux) {
+		bpf_log(log, "no vmlinux btf\n");
+		return -EOPNOTSUPP;
+	}
+
+	type_id = btf_find_by_name_kind(btf_vmlinux, "sock", BTF_KIND_STRUCT);
+	if (type_id < 0) {
+		bpf_log(log, "'struct sock' not found in vmlinux btf\n");
+		return -EINVAL;
+	}
+
+	sock_type = btf_type_by_id(btf_vmlinux, type_id);
+
+	if (t != sock_type) {
+		bpf_log(log, "only 'struct sock' writes are supported\n");
+		return -EACCES;
+	}
+
+	switch (off) {
+	case bpf_ctx_range(struct sock, sk_priority):
+		end = offsetofend(struct sock, sk_priority);
+		break;
+	case bpf_ctx_range(struct sock, sk_mark):
+		end = offsetofend(struct sock, sk_mark);
+		break;
+	default:
+		bpf_log(log, "no write support to 'struct sock' at off %d\n", off);
+		return -EACCES;
+	}
+
+	if (off + size > end) {
+		bpf_log(log,
+			"write access at off %d with size %d beyond the member of 'struct sock' ended at %zu\n",
+			off, size, end);
+		return -EACCES;
+	}
+
+	return NOT_INIT;
+}
+
 const struct bpf_verifier_ops lsm_verifier_ops = {
 	.get_func_proto = bpf_lsm_func_proto,
 	.is_valid_access = btf_ctx_access,
+	.btf_struct_access = lsm_btf_struct_access,
 };
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index cc84954846d7..0d6d5be30a36 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -12872,7 +12872,8 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
 				insn->code = BPF_LDX | BPF_PROBE_MEM |
 					BPF_SIZE((insn)->code);
 				env->prog->aux->num_exentries++;
-			} else if (resolve_prog_type(env->prog) != BPF_PROG_TYPE_STRUCT_OPS) {
+			} else if (resolve_prog_type(env->prog) != BPF_PROG_TYPE_STRUCT_OPS &&
+				   resolve_prog_type(env->prog) != BPF_PROG_TYPE_LSM) {
 				verbose(env, "Writes through BTF pointers are not allowed\n");
 				return -EINVAL;
 			}
-- 
2.36.0.rc0.470.gd361397f0d-goog


^ permalink raw reply related

* [PATCH bpf-next v5 4/8] bpf: minimize number of allocated lsm slots per program
From: Stanislav Fomichev @ 2022-04-19 19:00 UTC (permalink / raw)
  To: netdev, bpf; +Cc: ast, daniel, andrii, Stanislav Fomichev
In-Reply-To: <20220419190053.3395240-1-sdf@google.com>

Previous patch adds 1:1 mapping between all 211 LSM hooks
and bpf_cgroup program array. Instead of reserving a slot per
possible hook, reserve 10 slots per cgroup for lsm programs.
Those slots are dynamically allocated on demand and reclaimed.

It should be possible to eventually extend this idea to all hooks if
the memory consumption is unacceptable and shrink overall effective
programs array.

struct cgroup_bpf {
	struct bpf_prog_array *    effective[33];        /*     0   264 */
	/* --- cacheline 4 boundary (256 bytes) was 8 bytes ago --- */
	struct hlist_head          progs[33];            /*   264   264 */
	/* --- cacheline 8 boundary (512 bytes) was 16 bytes ago --- */
	u8                         flags[33];            /*   528    33 */

	/* XXX 7 bytes hole, try to pack */

	struct list_head           storages;             /*   568    16 */
	/* --- cacheline 9 boundary (576 bytes) was 8 bytes ago --- */
	struct bpf_prog_array *    inactive;             /*   584     8 */
	struct percpu_ref          refcnt;               /*   592    16 */
	struct work_struct         release_work;         /*   608    72 */

	/* size: 680, cachelines: 11, members: 7 */
	/* sum members: 673, holes: 1, sum holes: 7 */
	/* last cacheline: 40 bytes */
};

Signed-off-by: Stanislav Fomichev <sdf@google.com>
---
 include/linux/bpf-cgroup-defs.h |   4 +-
 include/linux/bpf_lsm.h         |   6 --
 kernel/bpf/bpf_lsm.c            |   5 --
 kernel/bpf/cgroup.c             | 108 +++++++++++++++++++++++++++-----
 4 files changed, 96 insertions(+), 27 deletions(-)

diff --git a/include/linux/bpf-cgroup-defs.h b/include/linux/bpf-cgroup-defs.h
index d5a70a35dace..603f0a51eb3f 100644
--- a/include/linux/bpf-cgroup-defs.h
+++ b/include/linux/bpf-cgroup-defs.h
@@ -10,7 +10,9 @@
 
 struct bpf_prog_array;
 
-#define CGROUP_LSM_NUM 211 /* will be addressed in the next patch */
+/* Maximum number of concurrently attachable per-cgroup LSM hooks.
+ */
+#define CGROUP_LSM_NUM 10
 
 enum cgroup_bpf_attach_type {
 	CGROUP_BPF_ATTACH_TYPE_INVALID = -1,
diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h
index 7f0e59f5f9be..613de44aa429 100644
--- a/include/linux/bpf_lsm.h
+++ b/include/linux/bpf_lsm.h
@@ -43,7 +43,6 @@ extern const struct bpf_func_proto bpf_inode_storage_delete_proto;
 void bpf_inode_storage_free(struct inode *inode);
 
 int bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog, bpf_func_t *bpf_func);
-int bpf_lsm_hook_idx(u32 btf_id);
 
 #else /* !CONFIG_BPF_LSM */
 
@@ -74,11 +73,6 @@ static inline int bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog,
 	return -ENOENT;
 }
 
-static inline int bpf_lsm_hook_idx(u32 btf_id)
-{
-	return -EINVAL;
-}
-
 #endif /* CONFIG_BPF_LSM */
 
 #endif /* _LINUX_BPF_LSM_H */
diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c
index 2161cba1fe0c..7f8a631da90f 100644
--- a/kernel/bpf/bpf_lsm.c
+++ b/kernel/bpf/bpf_lsm.c
@@ -93,11 +93,6 @@ int bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog,
 	return 0;
 }
 
-int bpf_lsm_hook_idx(u32 btf_id)
-{
-	return btf_id_set_index(&bpf_lsm_hooks, btf_id);
-}
-
 int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog,
 			const struct bpf_prog *prog)
 {
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index ba0e0c7a661d..539d17f7c9b2 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -106,10 +106,13 @@ unsigned int __cgroup_bpf_run_lsm_sock(const void *ctx,
 	shim_prog = (const struct bpf_prog *)((void *)insn - offsetof(struct bpf_prog, insnsi));
 
 	cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
-	if (likely(cgrp))
+	if (likely(cgrp)) {
+		rcu_read_lock(); /* See bpf_lsm_attach_type_get(). */
 		ret = bpf_prog_run_array_cg(&cgrp->bpf,
 					    shim_prog->aux->cgroup_atype,
 					    ctx, bpf_prog_run, 0);
+		rcu_read_unlock();
+	}
 	return ret;
 }
 
@@ -128,10 +131,13 @@ unsigned int __cgroup_bpf_run_lsm_socket(const void *ctx,
 	shim_prog = (const struct bpf_prog *)((void *)insn - offsetof(struct bpf_prog, insnsi));
 
 	cgrp = sock_cgroup_ptr(&sock->sk->sk_cgrp_data);
-	if (likely(cgrp))
+	if (likely(cgrp)) {
+		rcu_read_lock(); /* See bpf_lsm_attach_type_get(). */
 		ret = bpf_prog_run_array_cg(&cgrp->bpf,
 					    shim_prog->aux->cgroup_atype,
 					    ctx, bpf_prog_run, 0);
+		rcu_read_unlock();
+	}
 	return ret;
 }
 
@@ -148,7 +154,7 @@ unsigned int __cgroup_bpf_run_lsm_current(const void *ctx,
 	/*shim_prog = container_of(insn, struct bpf_prog, insnsi);*/
 	shim_prog = (const struct bpf_prog *)((void *)insn - offsetof(struct bpf_prog, insnsi));
 
-	rcu_read_lock();
+	rcu_read_lock(); /* See bpf_lsm_attach_type_get(). */
 	cgrp = task_dfl_cgroup(current);
 	if (likely(cgrp))
 		ret = bpf_prog_run_array_cg(&cgrp->bpf,
@@ -159,15 +165,68 @@ unsigned int __cgroup_bpf_run_lsm_current(const void *ctx,
 }
 
 #ifdef CONFIG_BPF_LSM
+/* Readers are protected by rcu+synchronize_rcu.
+ * Writers are protected by cgroup_mutex.
+ */
+int cgroup_lsm_atype_usecnt[CGROUP_LSM_NUM];
+u32 cgroup_lsm_atype_btf_id[CGROUP_LSM_NUM];
+
 static enum cgroup_bpf_attach_type bpf_lsm_attach_type_get(u32 attach_btf_id)
 {
-	return CGROUP_LSM_START + bpf_lsm_hook_idx(attach_btf_id);
+	int i;
+
+	WARN_ON_ONCE(!mutex_is_locked(&cgroup_mutex));
+
+	for (i = 0; i < ARRAY_SIZE(cgroup_lsm_atype_btf_id); i++) {
+		if (cgroup_lsm_atype_btf_id[i] != attach_btf_id)
+			continue;
+
+		cgroup_lsm_atype_usecnt[i]++;
+		return CGROUP_LSM_START + i;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(cgroup_lsm_atype_usecnt); i++) {
+		if (cgroup_lsm_atype_usecnt[i] != 0)
+			continue;
+
+		cgroup_lsm_atype_btf_id[i] = attach_btf_id;
+		cgroup_lsm_atype_usecnt[i] = 1;
+		return CGROUP_LSM_START + i;
+	}
+
+	return -E2BIG;
+}
+
+static void bpf_lsm_attach_type_put(u32 attach_btf_id)
+{
+	int i;
+
+	WARN_ON_ONCE(!mutex_is_locked(&cgroup_mutex));
+
+	for (i = 0; i < ARRAY_SIZE(cgroup_lsm_atype_btf_id); i++) {
+		if (cgroup_lsm_atype_btf_id[i] != attach_btf_id)
+			continue;
+
+		if (--cgroup_lsm_atype_usecnt[i] <= 0) {
+			/* Wait for any existing users to finish.
+			 */
+			synchronize_rcu();
+			WARN_ON_ONCE(cgroup_lsm_atype_usecnt[i] < 0);
+		}
+		return;
+	}
+
+	WARN_ON_ONCE(1);
 }
 #else
 static enum cgroup_bpf_attach_type bpf_lsm_attach_type_get(u32 attach_btf_id)
 {
 	return -EOPNOTSUPP;
 }
+
+static void bpf_lsm_attach_type_put(u32 attach_btf_id)
+{
+}
 #endif /* CONFIG_BPF_LSM */
 
 void cgroup_bpf_offline(struct cgroup *cgrp)
@@ -264,6 +323,7 @@ static void bpf_cgroup_lsm_shim_release(struct bpf_prog *prog,
 		return;
 
 	bpf_trampoline_unlink_cgroup_shim(prog);
+	bpf_lsm_attach_type_put(prog->aux->attach_btf_id);
 }
 
 /**
@@ -663,27 +723,37 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp,
 
 	progs = &cgrp->bpf.progs[atype];
 
-	if (!hierarchy_allows_attach(cgrp, atype))
-		return -EPERM;
+	if (!hierarchy_allows_attach(cgrp, atype)) {
+		err = -EPERM;
+		goto cleanup_attach_type;
+	}
 
-	if (!hlist_empty(progs) && cgrp->bpf.flags[atype] != saved_flags)
+	if (!hlist_empty(progs) && cgrp->bpf.flags[atype] != saved_flags) {
 		/* Disallow attaching non-overridable on top
 		 * of existing overridable in this cgroup.
 		 * Disallow attaching multi-prog if overridable or none
 		 */
-		return -EPERM;
+		err = -EPERM;
+		goto cleanup_attach_type;
+	}
 
-	if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS)
-		return -E2BIG;
+	if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS) {
+		err = -E2BIG;
+		goto cleanup_attach_type;
+	}
 
 	pl = find_attach_entry(progs, prog, link, replace_prog,
 			       flags & BPF_F_ALLOW_MULTI);
-	if (IS_ERR(pl))
-		return PTR_ERR(pl);
+	if (IS_ERR(pl)) {
+		err = PTR_ERR(pl);
+		goto cleanup_attach_type;
+	}
 
 	if (bpf_cgroup_storages_alloc(storage, new_storage, type,
-				      prog ? : link->link.prog, cgrp))
-		return -ENOMEM;
+				      prog ? : link->link.prog, cgrp)) {
+		err = -ENOMEM;
+		goto cleanup_attach_type;
+	}
 
 	if (pl) {
 		old_prog = pl->prog;
@@ -693,7 +763,8 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp,
 		pl = kmalloc(sizeof(*pl), GFP_KERNEL);
 		if (!pl) {
 			bpf_cgroup_storages_free(new_storage);
-			return -ENOMEM;
+			err = -ENOMEM;
+			goto cleanup_attach_type;
 		}
 		if (hlist_empty(progs))
 			hlist_add_head(&pl->node, progs);
@@ -745,6 +816,13 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp,
 		hlist_del(&pl->node);
 		kfree(pl);
 	}
+
+cleanup_attach_type:
+	if (type == BPF_LSM_CGROUP) {
+		struct bpf_prog *p = prog ? : link->link.prog;
+
+		bpf_lsm_attach_type_put(p->aux->attach_btf_id);
+	}
 	return err;
 }
 
-- 
2.36.0.rc0.470.gd361397f0d-goog


^ permalink raw reply related

* [PATCH v1] RISC-V: defconfigs: Add syzkaller's config as a defconfig
From: Palmer Dabbelt @ 2022-04-19 17:49 UTC (permalink / raw)
  To: dvyukov, nogikh, syzkaller
  Cc: Paul Walmsley, Palmer Dabbelt, aou, ast, daniel, andrii, kafai,
	songliubraving, yhs, john.fastabend, kpsingh, mic, linux-riscv,
	linux-kernel, netdev, bpf, linux-security-module, Palmer Dabbelt

From: Palmer Dabbelt <palmer@rivosinc.com>

This is just the skzkaller configuration, pulled out via savedefconfig.
There have been some build breakages around here, this should make it
easier for users to test.

Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>

---

It feels like there's a cleaner way to do this, essentially I've just
taken
<https://raw.githubusercontent.com/google/syzkaller/master/dashboard/config/linux/upstream-riscv64-kasan.config>
and saved it as a defconfig.  I'm not sure that's the best way to do it, but
this is failing to boot for me so at least this way other folks can see what's
going on.
---
 arch/riscv/configs/syzkaller_defconfig | 1506 ++++++++++++++++++++++++
 1 file changed, 1506 insertions(+)
 create mode 100644 arch/riscv/configs/syzkaller_defconfig

diff --git a/arch/riscv/configs/syzkaller_defconfig b/arch/riscv/configs/syzkaller_defconfig
new file mode 100644
index 000000000000..907d40230598
--- /dev/null
+++ b/arch/riscv/configs/syzkaller_defconfig
@@ -0,0 +1,1506 @@
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_WATCH_QUEUE=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_BPF_JIT=y
+CONFIG_BPF_JIT_ALWAYS_ON=y
+CONFIG_PREEMPT=y
+CONFIG_VIRT_CPU_ACCOUNTING_GEN=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_BSD_PROCESS_ACCT_V3=y
+CONFIG_TASKSTATS=y
+CONFIG_TASK_XACCT=y
+CONFIG_TASK_IO_ACCOUNTING=y
+CONFIG_PSI=y
+# CONFIG_CPU_ISOLATION is not set
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=18
+CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT=16
+CONFIG_NUMA_BALANCING=y
+CONFIG_CGROUPS=y
+CONFIG_MEMCG=y
+CONFIG_BLK_CGROUP=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_CFS_BANDWIDTH=y
+CONFIG_CGROUP_PIDS=y
+CONFIG_CGROUP_RDMA=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_HUGETLB=y
+CONFIG_CPUSETS=y
+# CONFIG_PROC_PID_CPUSET is not set
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_CGROUP_PERF=y
+CONFIG_CGROUP_BPF=y
+CONFIG_NAMESPACES=y
+CONFIG_USER_NS=y
+CONFIG_CHECKPOINT_RESTORE=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+# CONFIG_SYSFS_SYSCALL is not set
+CONFIG_USERFAULTFD=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_PROFILING=y
+CONFIG_SOC_MICROCHIP_POLARFIRE=y
+CONFIG_SOC_SIFIVE=y
+CONFIG_SOC_VIRT=y
+CONFIG_SMP=y
+CONFIG_HOTPLUG_CPU=y
+CONFIG_NUMA=y
+CONFIG_HZ_100=y
+CONFIG_CMDLINE="earlyprintk=serial net.ifnames=0 sysctl.kernel.hung_task_all_cpu_backtrace=1 ima_policy=tcb nf-conntrack-ftp.ports=20000 nf-conntrack-tftp.ports=20000 nf-conntrack-sip.ports=20000 nf-conntrack-irc.ports=20000 nf-conntrack-sane.ports=20000 binder.debug_mask=0 rcupdate.rcu_expedited=1 no_hash_pointers page_owner=on sysctl.vm.nr_hugepages=4 sysctl.vm.nr_overcommit_hugepages=4 secretmem.enable=1 sysctl.max_rcu_stall_to_panic=1 dummy_hcd.num=2 watchdog_thresh=165 workqueue.watchdog_thresh=420 sysctl.net.core.netdev_unregister_timeout_secs=420 panic_on_warn=1"
+CONFIG_CMDLINE_EXTEND=y
+CONFIG_PM=y
+CONFIG_PM_DEBUG=y
+CONFIG_CPU_IDLE=y
+CONFIG_VIRTUALIZATION=y
+CONFIG_KVM=y
+CONFIG_JUMP_LABEL=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_FORCE_UNLOAD=y
+CONFIG_MODVERSIONS=y
+CONFIG_MODULE_SRCVERSION_ALL=y
+# CONFIG_MODULE_SIG_ALL is not set
+CONFIG_BLK_DEV_ZONED=y
+CONFIG_BLK_DEV_THROTTLING=y
+CONFIG_BLK_WBT=y
+CONFIG_BLK_CGROUP_IOLATENCY=y
+CONFIG_BLK_CGROUP_IOCOST=y
+CONFIG_BLK_INLINE_ENCRYPTION=y
+CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK=y
+CONFIG_IOSCHED_BFQ=y
+CONFIG_BFQ_GROUP_IOSCHED=y
+CONFIG_BFQ_CGROUP_DEBUG=y
+CONFIG_BINFMT_FLAT=y
+CONFIG_BINFMT_MISC=y
+# CONFIG_SPARSEMEM_VMEMMAP is not set
+CONFIG_KSM=y
+CONFIG_CMA=y
+CONFIG_ZSWAP=y
+CONFIG_ZSMALLOC=y
+CONFIG_PERCPU_STATS=y
+CONFIG_ANON_VMA_NAME=y
+CONFIG_DAMON=y
+CONFIG_DAMON_VADDR=y
+CONFIG_DAMON_PADDR=y
+CONFIG_DAMON_DBGFS=y
+CONFIG_DAMON_RECLAIM=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_PACKET_DIAG=y
+CONFIG_UNIX=y
+CONFIG_UNIX_DIAG=y
+CONFIG_TLS=y
+CONFIG_TLS_DEVICE=y
+CONFIG_TLS_TOE=y
+CONFIG_XFRM_USER=y
+CONFIG_XFRM_INTERFACE=y
+CONFIG_XFRM_SUB_POLICY=y
+CONFIG_XFRM_STATISTICS=y
+CONFIG_NET_KEY=y
+CONFIG_NET_KEY_MIGRATE=y
+CONFIG_SMC=y
+CONFIG_SMC_DIAG=y
+CONFIG_XDP_SOCKETS=y
+CONFIG_XDP_SOCKETS_DIAG=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_FIB_TRIE_STATS=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_VERBOSE=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_IP_PNP_RARP=y
+CONFIG_NET_IPIP=y
+CONFIG_NET_IPGRE_DEMUX=y
+CONFIG_NET_IPGRE=y
+CONFIG_NET_IPGRE_BROADCAST=y
+CONFIG_IP_MROUTE=y
+CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+CONFIG_NET_IPVTI=y
+CONFIG_NET_FOU_IP_TUNNELS=y
+CONFIG_INET_AH=y
+CONFIG_INET_ESP=y
+CONFIG_INET_ESP_OFFLOAD=y
+CONFIG_INET_ESPINTCP=y
+CONFIG_INET_IPCOMP=y
+CONFIG_INET_UDP_DIAG=y
+CONFIG_INET_RAW_DIAG=y
+CONFIG_INET_DIAG_DESTROY=y
+CONFIG_TCP_CONG_ADVANCED=y
+CONFIG_TCP_CONG_BIC=y
+CONFIG_TCP_CONG_WESTWOOD=y
+CONFIG_TCP_CONG_HTCP=y
+CONFIG_TCP_CONG_HSTCP=y
+CONFIG_TCP_CONG_HYBLA=y
+CONFIG_TCP_CONG_NV=y
+CONFIG_TCP_CONG_SCALABLE=y
+CONFIG_TCP_CONG_LP=y
+CONFIG_TCP_CONG_VENO=y
+CONFIG_TCP_CONG_YEAH=y
+CONFIG_TCP_CONG_ILLINOIS=y
+CONFIG_TCP_CONG_DCTCP=y
+CONFIG_TCP_CONG_CDG=y
+CONFIG_TCP_CONG_BBR=y
+CONFIG_TCP_MD5SIG=y
+CONFIG_IPV6_ROUTER_PREF=y
+CONFIG_IPV6_ROUTE_INFO=y
+CONFIG_IPV6_OPTIMISTIC_DAD=y
+CONFIG_INET6_AH=y
+CONFIG_INET6_ESP=y
+CONFIG_INET6_ESP_OFFLOAD=y
+CONFIG_INET6_ESPINTCP=y
+CONFIG_INET6_IPCOMP=y
+CONFIG_IPV6_MIP6=y
+CONFIG_IPV6_ILA=y
+CONFIG_IPV6_VTI=y
+CONFIG_IPV6_SIT_6RD=y
+CONFIG_IPV6_GRE=y
+CONFIG_IPV6_SUBTREES=y
+CONFIG_IPV6_MROUTE=y
+CONFIG_IPV6_MROUTE_MULTIPLE_TABLES=y
+CONFIG_IPV6_PIMSM_V2=y
+CONFIG_IPV6_SEG6_LWTUNNEL=y
+CONFIG_IPV6_SEG6_HMAC=y
+CONFIG_IPV6_RPL_LWTUNNEL=y
+CONFIG_NETLABEL=y
+CONFIG_MPTCP=y
+CONFIG_NETWORK_SECMARK=y
+CONFIG_NETFILTER=y
+# CONFIG_NETFILTER_EGRESS is not set
+CONFIG_NF_CONNTRACK=y
+CONFIG_NF_CONNTRACK_SECMARK=y
+CONFIG_NF_CONNTRACK_ZONES=y
+# CONFIG_NF_CONNTRACK_PROCFS is not set
+CONFIG_NF_CONNTRACK_EVENTS=y
+CONFIG_NF_CONNTRACK_TIMEOUT=y
+CONFIG_NF_CONNTRACK_TIMESTAMP=y
+# CONFIG_NF_CT_PROTO_DCCP is not set
+# CONFIG_NF_CT_PROTO_SCTP is not set
+CONFIG_NF_CONNTRACK_AMANDA=y
+CONFIG_NF_CONNTRACK_FTP=y
+CONFIG_NF_CONNTRACK_H323=y
+CONFIG_NF_CONNTRACK_IRC=y
+CONFIG_NF_CONNTRACK_NETBIOS_NS=y
+CONFIG_NF_CONNTRACK_SNMP=y
+CONFIG_NF_CONNTRACK_PPTP=y
+CONFIG_NF_CONNTRACK_SANE=y
+CONFIG_NF_CONNTRACK_SIP=y
+CONFIG_NF_CONNTRACK_TFTP=y
+CONFIG_NF_CT_NETLINK=y
+CONFIG_NF_CT_NETLINK_TIMEOUT=y
+CONFIG_NF_CT_NETLINK_HELPER=y
+CONFIG_NETFILTER_NETLINK_GLUE_CT=y
+CONFIG_NF_TABLES=y
+CONFIG_NF_TABLES_INET=y
+CONFIG_NF_TABLES_NETDEV=y
+CONFIG_NFT_NUMGEN=y
+CONFIG_NFT_CT=y
+CONFIG_NFT_FLOW_OFFLOAD=y
+CONFIG_NFT_CONNLIMIT=y
+CONFIG_NFT_LOG=y
+CONFIG_NFT_LIMIT=y
+CONFIG_NFT_MASQ=y
+CONFIG_NFT_REDIR=y
+CONFIG_NFT_NAT=y
+CONFIG_NFT_TUNNEL=y
+CONFIG_NFT_OBJREF=y
+CONFIG_NFT_QUEUE=y
+CONFIG_NFT_QUOTA=y
+CONFIG_NFT_REJECT=y
+CONFIG_NFT_COMPAT=y
+CONFIG_NFT_HASH=y
+CONFIG_NFT_FIB_INET=y
+CONFIG_NFT_XFRM=y
+CONFIG_NFT_SOCKET=y
+CONFIG_NFT_OSF=y
+CONFIG_NFT_TPROXY=y
+CONFIG_NFT_SYNPROXY=y
+CONFIG_NFT_DUP_NETDEV=y
+CONFIG_NFT_FWD_NETDEV=y
+CONFIG_NFT_FIB_NETDEV=y
+CONFIG_NFT_REJECT_NETDEV=y
+CONFIG_NF_FLOW_TABLE_INET=y
+CONFIG_NF_FLOW_TABLE=y
+CONFIG_NETFILTER_XT_TARGET_AUDIT=y
+CONFIG_NETFILTER_XT_TARGET_CHECKSUM=y
+CONFIG_NETFILTER_XT_TARGET_CLASSIFY=y
+CONFIG_NETFILTER_XT_TARGET_CONNMARK=y
+CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y
+CONFIG_NETFILTER_XT_TARGET_DSCP=y
+CONFIG_NETFILTER_XT_TARGET_HMARK=y
+CONFIG_NETFILTER_XT_TARGET_IDLETIMER=y
+CONFIG_NETFILTER_XT_TARGET_LED=y
+CONFIG_NETFILTER_XT_TARGET_LOG=y
+CONFIG_NETFILTER_XT_TARGET_MARK=y
+CONFIG_NETFILTER_XT_TARGET_NFLOG=y
+CONFIG_NETFILTER_XT_TARGET_NFQUEUE=y
+CONFIG_NETFILTER_XT_TARGET_NOTRACK=y
+CONFIG_NETFILTER_XT_TARGET_TEE=y
+CONFIG_NETFILTER_XT_TARGET_TPROXY=y
+CONFIG_NETFILTER_XT_TARGET_TRACE=y
+CONFIG_NETFILTER_XT_TARGET_SECMARK=y
+CONFIG_NETFILTER_XT_TARGET_TCPMSS=y
+CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=y
+CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=y
+CONFIG_NETFILTER_XT_MATCH_BPF=y
+CONFIG_NETFILTER_XT_MATCH_CGROUP=y
+CONFIG_NETFILTER_XT_MATCH_CLUSTER=y
+CONFIG_NETFILTER_XT_MATCH_COMMENT=y
+CONFIG_NETFILTER_XT_MATCH_CONNBYTES=y
+CONFIG_NETFILTER_XT_MATCH_CONNLABEL=y
+CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=y
+CONFIG_NETFILTER_XT_MATCH_CONNMARK=y
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y
+CONFIG_NETFILTER_XT_MATCH_CPU=y
+CONFIG_NETFILTER_XT_MATCH_DEVGROUP=y
+CONFIG_NETFILTER_XT_MATCH_DSCP=y
+CONFIG_NETFILTER_XT_MATCH_ESP=y
+CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=y
+CONFIG_NETFILTER_XT_MATCH_HELPER=y
+CONFIG_NETFILTER_XT_MATCH_IPCOMP=y
+CONFIG_NETFILTER_XT_MATCH_IPRANGE=y
+CONFIG_NETFILTER_XT_MATCH_LENGTH=y
+CONFIG_NETFILTER_XT_MATCH_LIMIT=y
+CONFIG_NETFILTER_XT_MATCH_MAC=y
+CONFIG_NETFILTER_XT_MATCH_MARK=y
+CONFIG_NETFILTER_XT_MATCH_MULTIPORT=y
+CONFIG_NETFILTER_XT_MATCH_NFACCT=y
+CONFIG_NETFILTER_XT_MATCH_OSF=y
+CONFIG_NETFILTER_XT_MATCH_OWNER=y
+CONFIG_NETFILTER_XT_MATCH_POLICY=y
+CONFIG_NETFILTER_XT_MATCH_PKTTYPE=y
+CONFIG_NETFILTER_XT_MATCH_QUOTA=y
+CONFIG_NETFILTER_XT_MATCH_RATEEST=y
+CONFIG_NETFILTER_XT_MATCH_REALM=y
+CONFIG_NETFILTER_XT_MATCH_RECENT=y
+# CONFIG_NETFILTER_XT_MATCH_SCTP is not set
+CONFIG_NETFILTER_XT_MATCH_SOCKET=y
+CONFIG_NETFILTER_XT_MATCH_STATE=y
+CONFIG_NETFILTER_XT_MATCH_STATISTIC=y
+CONFIG_NETFILTER_XT_MATCH_STRING=y
+CONFIG_NETFILTER_XT_MATCH_TCPMSS=y
+CONFIG_NETFILTER_XT_MATCH_TIME=y
+CONFIG_NETFILTER_XT_MATCH_U32=y
+CONFIG_NFT_DUP_IPV4=y
+CONFIG_NFT_FIB_IPV4=y
+CONFIG_NF_TABLES_ARP=y
+CONFIG_NF_LOG_ARP=y
+CONFIG_IP_NF_IPTABLES=y
+CONFIG_IP_NF_MATCH_AH=y
+CONFIG_IP_NF_MATCH_ECN=y
+CONFIG_IP_NF_MATCH_RPFILTER=y
+CONFIG_IP_NF_MATCH_TTL=y
+CONFIG_IP_NF_FILTER=y
+CONFIG_IP_NF_TARGET_REJECT=y
+CONFIG_IP_NF_TARGET_SYNPROXY=y
+CONFIG_IP_NF_NAT=y
+CONFIG_IP_NF_TARGET_MASQUERADE=y
+CONFIG_IP_NF_TARGET_NETMAP=y
+CONFIG_IP_NF_TARGET_REDIRECT=y
+CONFIG_IP_NF_MANGLE=y
+CONFIG_IP_NF_TARGET_CLUSTERIP=y
+CONFIG_IP_NF_TARGET_ECN=y
+CONFIG_IP_NF_TARGET_TTL=y
+CONFIG_IP_NF_RAW=y
+CONFIG_IP_NF_SECURITY=y
+CONFIG_IP_NF_ARPTABLES=y
+CONFIG_IP_NF_ARPFILTER=y
+CONFIG_IP_NF_ARP_MANGLE=y
+CONFIG_NFT_DUP_IPV6=y
+CONFIG_NFT_FIB_IPV6=y
+CONFIG_IP6_NF_IPTABLES=y
+CONFIG_IP6_NF_MATCH_AH=y
+CONFIG_IP6_NF_MATCH_EUI64=y
+CONFIG_IP6_NF_MATCH_FRAG=y
+CONFIG_IP6_NF_MATCH_OPTS=y
+CONFIG_IP6_NF_MATCH_HL=y
+CONFIG_IP6_NF_MATCH_IPV6HEADER=y
+CONFIG_IP6_NF_MATCH_MH=y
+CONFIG_IP6_NF_MATCH_RPFILTER=y
+CONFIG_IP6_NF_MATCH_RT=y
+CONFIG_IP6_NF_MATCH_SRH=y
+CONFIG_IP6_NF_TARGET_HL=y
+CONFIG_IP6_NF_FILTER=y
+CONFIG_IP6_NF_TARGET_REJECT=y
+CONFIG_IP6_NF_TARGET_SYNPROXY=y
+CONFIG_IP6_NF_MANGLE=y
+CONFIG_IP6_NF_RAW=y
+CONFIG_IP6_NF_SECURITY=y
+CONFIG_IP6_NF_NAT=y
+CONFIG_IP6_NF_TARGET_MASQUERADE=y
+CONFIG_IP6_NF_TARGET_NPT=y
+CONFIG_RDS=y
+CONFIG_RDS_RDMA=y
+CONFIG_RDS_TCP=y
+CONFIG_L2TP=y
+CONFIG_L2TP_V3=y
+CONFIG_L2TP_IP=y
+CONFIG_L2TP_ETH=y
+CONFIG_NET_DSA=y
+CONFIG_NET_DSA_TAG_BRCM=y
+CONFIG_NET_DSA_TAG_BRCM_PREPEND=y
+CONFIG_NET_DSA_TAG_MTK=y
+CONFIG_NET_DSA_TAG_QCA=y
+CONFIG_NET_DSA_TAG_RTL4_A=y
+CONFIG_VLAN_8021Q=y
+CONFIG_VLAN_8021Q_GVRP=y
+CONFIG_VLAN_8021Q_MVRP=y
+CONFIG_LLC2=y
+CONFIG_NET_SCHED=y
+CONFIG_NET_SCH_CBQ=y
+CONFIG_NET_SCH_HTB=y
+CONFIG_NET_SCH_HFSC=y
+CONFIG_NET_SCH_PRIO=y
+CONFIG_NET_SCH_MULTIQ=y
+CONFIG_NET_SCH_RED=y
+CONFIG_NET_SCH_SFB=y
+CONFIG_NET_SCH_SFQ=y
+CONFIG_NET_SCH_TEQL=y
+CONFIG_NET_SCH_TBF=y
+CONFIG_NET_SCH_CBS=y
+CONFIG_NET_SCH_ETF=y
+CONFIG_NET_SCH_TAPRIO=y
+CONFIG_NET_SCH_GRED=y
+CONFIG_NET_SCH_DSMARK=y
+CONFIG_NET_SCH_NETEM=y
+CONFIG_NET_SCH_DRR=y
+CONFIG_NET_SCH_MQPRIO=y
+CONFIG_NET_SCH_SKBPRIO=y
+CONFIG_NET_SCH_CHOKE=y
+CONFIG_NET_SCH_QFQ=y
+CONFIG_NET_SCH_CODEL=y
+CONFIG_NET_SCH_FQ_CODEL=y
+CONFIG_NET_SCH_CAKE=y
+CONFIG_NET_SCH_FQ=y
+CONFIG_NET_SCH_HHF=y
+CONFIG_NET_SCH_PIE=y
+CONFIG_NET_SCH_FQ_PIE=y
+CONFIG_NET_SCH_INGRESS=y
+CONFIG_NET_SCH_PLUG=y
+CONFIG_NET_SCH_ETS=y
+CONFIG_NET_SCH_DEFAULT=y
+CONFIG_NET_CLS_BASIC=y
+CONFIG_NET_CLS_TCINDEX=y
+CONFIG_NET_CLS_ROUTE4=y
+CONFIG_NET_CLS_FW=y
+CONFIG_NET_CLS_U32=y
+CONFIG_CLS_U32_PERF=y
+CONFIG_CLS_U32_MARK=y
+CONFIG_NET_CLS_RSVP=y
+CONFIG_NET_CLS_RSVP6=y
+CONFIG_NET_CLS_FLOW=y
+CONFIG_NET_CLS_CGROUP=y
+CONFIG_NET_CLS_BPF=y
+CONFIG_NET_CLS_FLOWER=y
+CONFIG_NET_CLS_MATCHALL=y
+CONFIG_NET_EMATCH=y
+CONFIG_NET_EMATCH_CMP=y
+CONFIG_NET_EMATCH_NBYTE=y
+CONFIG_NET_EMATCH_U32=y
+CONFIG_NET_EMATCH_META=y
+CONFIG_NET_EMATCH_TEXT=y
+CONFIG_NET_EMATCH_CANID=y
+CONFIG_NET_EMATCH_IPT=y
+CONFIG_NET_CLS_ACT=y
+CONFIG_NET_ACT_POLICE=y
+CONFIG_NET_ACT_GACT=y
+CONFIG_GACT_PROB=y
+CONFIG_NET_ACT_MIRRED=y
+CONFIG_NET_ACT_SAMPLE=y
+CONFIG_NET_ACT_IPT=y
+CONFIG_NET_ACT_NAT=y
+CONFIG_NET_ACT_PEDIT=y
+CONFIG_NET_ACT_SIMP=y
+CONFIG_NET_ACT_SKBEDIT=y
+CONFIG_NET_ACT_CSUM=y
+CONFIG_NET_ACT_VLAN=y
+CONFIG_NET_ACT_BPF=y
+CONFIG_NET_ACT_CONNMARK=y
+CONFIG_NET_ACT_CTINFO=y
+CONFIG_NET_ACT_SKBMOD=y
+CONFIG_NET_ACT_IFE=y
+CONFIG_NET_ACT_TUNNEL_KEY=y
+CONFIG_NET_ACT_CT=y
+CONFIG_NET_ACT_GATE=y
+CONFIG_NET_IFE_SKBMARK=y
+CONFIG_NET_IFE_SKBPRIO=y
+CONFIG_NET_IFE_SKBTCINDEX=y
+CONFIG_NET_TC_SKB_EXT=y
+CONFIG_DCB=y
+CONFIG_VSOCKETS=y
+CONFIG_VIRTIO_VSOCKETS=y
+CONFIG_NETLINK_DIAG=y
+CONFIG_HSR=y
+CONFIG_NET_NCSI=y
+# CONFIG_PCPU_DEV_REFCNT is not set
+CONFIG_CGROUP_NET_PRIO=y
+CONFIG_BPF_STREAM_PARSER=y
+CONFIG_NET_DROP_MONITOR=y
+CONFIG_CAN=y
+CONFIG_CAN_J1939=y
+CONFIG_CAN_ISOTP=y
+CONFIG_CAN_VCAN=y
+CONFIG_CAN_VXCAN=y
+CONFIG_CAN_SLCAN=y
+CONFIG_CAN_IFI_CANFD=y
+CONFIG_CAN_8DEV_USB=y
+CONFIG_CAN_EMS_USB=y
+CONFIG_CAN_ESD_USB2=y
+CONFIG_CAN_GS_USB=y
+CONFIG_CAN_KVASER_USB=y
+CONFIG_CAN_MCBA_USB=y
+CONFIG_CAN_PEAK_USB=y
+CONFIG_AF_KCM=y
+CONFIG_RFKILL=y
+CONFIG_RFKILL_INPUT=y
+CONFIG_NET_9P=y
+CONFIG_NET_9P_VIRTIO=y
+CONFIG_NET_9P_RDMA=y
+CONFIG_CEPH_LIB=y
+CONFIG_CEPH_LIB_USE_DNS_RESOLVER=y
+CONFIG_NFC=y
+CONFIG_NFC_DIGITAL=y
+CONFIG_NFC_NCI=y
+CONFIG_NFC_NCI_UART=y
+CONFIG_NFC_HCI=y
+CONFIG_NFC_SHDLC=y
+CONFIG_NFC_SIM=y
+CONFIG_NFC_PORT100=y
+CONFIG_NFC_VIRTUAL_NCI=y
+CONFIG_NFC_FDP=y
+CONFIG_NFC_PN533_USB=y
+CONFIG_NFC_MRVL_USB=y
+CONFIG_PCI=y
+CONFIG_PCIEPORTBUS=y
+CONFIG_HOTPLUG_PCI_PCIE=y
+CONFIG_PCIEAER=y
+CONFIG_HOTPLUG_PCI=y
+CONFIG_PCI_HOST_GENERIC=y
+CONFIG_PCIE_XILINX=y
+CONFIG_PCIE_FU740=y
+CONFIG_PCI_ENDPOINT=y
+CONFIG_PCCARD=y
+# CONFIG_PCMCIA_LOAD_CIS is not set
+# CONFIG_CARDBUS is not set
+CONFIG_UEVENT_HELPER=y
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+CONFIG_FW_LOADER_USER_HELPER=y
+CONFIG_FW_LOADER_USER_HELPER_FALLBACK=y
+CONFIG_FW_LOADER_COMPRESS=y
+CONFIG_DEBUG_DEVRES=y
+CONFIG_CONNECTOR=y
+# CONFIG_PROC_EVENTS is not set
+CONFIG_GOOGLE_FIRMWARE=y
+CONFIG_GOOGLE_COREBOOT_TABLE=y
+CONFIG_GOOGLE_MEMCONSOLE_COREBOOT=y
+CONFIG_GOOGLE_VPD=y
+# CONFIG_EFI_VARS_PSTORE is not set
+CONFIG_MTD=y
+# CONFIG_MTD_OF_PARTS is not set
+CONFIG_MTD_BLOCK=y
+CONFIG_FTL=y
+CONFIG_MTD_SLRAM=y
+CONFIG_MTD_PHRAM=y
+CONFIG_MTD_MTDRAM=y
+CONFIG_MTDRAM_TOTAL_SIZE=128
+CONFIG_MTDRAM_ERASE_SIZE=4
+CONFIG_MTD_BLOCK2MTD=y
+CONFIG_MTD_UBI=y
+CONFIG_PARPORT=y
+CONFIG_BLK_DEV_NULL_BLK=y
+CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION=y
+CONFIG_ZRAM=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_LOOP_MIN_COUNT=16
+CONFIG_BLK_DEV_NBD=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_ATA_OVER_ETH=y
+CONFIG_VIRTIO_BLK=y
+CONFIG_BLK_DEV_RNBD_CLIENT=y
+CONFIG_BLK_DEV_NVME=y
+CONFIG_NVME_MULTIPATH=y
+CONFIG_NVME_RDMA=y
+CONFIG_NVME_FC=y
+CONFIG_NVME_TCP=y
+CONFIG_NVME_TARGET=y
+CONFIG_NVME_TARGET_LOOP=y
+CONFIG_NVME_TARGET_RDMA=y
+CONFIG_NVME_TARGET_FC=y
+CONFIG_NVME_TARGET_FCLOOP=y
+CONFIG_NVME_TARGET_TCP=y
+CONFIG_EEPROM_93CX6=y
+CONFIG_MISC_RTSX_USB=y
+CONFIG_RAID_ATTRS=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_ST=y
+CONFIG_BLK_DEV_SR=y
+CONFIG_CHR_DEV_SG=y
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_LOGGING=y
+CONFIG_SCSI_SCAN_ASYNC=y
+CONFIG_SCSI_SPI_ATTRS=y
+CONFIG_SCSI_FC_ATTRS=y
+CONFIG_SCSI_SAS_LIBSAS=y
+CONFIG_SCSI_SAS_ATA=y
+# CONFIG_SCSI_SAS_HOST_SMP is not set
+CONFIG_SCSI_HPSA=y
+CONFIG_SCSI_VIRTIO=y
+CONFIG_ATA=y
+CONFIG_SATA_AHCI=y
+CONFIG_SATA_AHCI_PLATFORM=y
+CONFIG_ATA_PIIX=y
+CONFIG_PATA_AMD=y
+CONFIG_PATA_OLDPIIX=y
+CONFIG_PATA_SCH=y
+CONFIG_ATA_GENERIC=y
+CONFIG_MD=y
+# CONFIG_MD_AUTODETECT is not set
+CONFIG_MD_LINEAR=y
+CONFIG_MD_MULTIPATH=y
+CONFIG_BCACHE=y
+CONFIG_BLK_DEV_DM=y
+CONFIG_DM_CRYPT=y
+CONFIG_DM_SNAPSHOT=y
+CONFIG_DM_THIN_PROVISIONING=y
+CONFIG_DM_CACHE=y
+CONFIG_DM_WRITECACHE=y
+CONFIG_DM_CLONE=y
+CONFIG_DM_MIRROR=y
+CONFIG_DM_RAID=y
+CONFIG_DM_ZERO=y
+CONFIG_DM_MULTIPATH=y
+CONFIG_DM_MULTIPATH_QL=y
+CONFIG_DM_MULTIPATH_ST=y
+CONFIG_DM_UEVENT=y
+CONFIG_DM_FLAKEY=y
+CONFIG_DM_VERITY=y
+CONFIG_DM_VERITY_FEC=y
+CONFIG_DM_INTEGRITY=y
+CONFIG_DM_ZONED=y
+CONFIG_TARGET_CORE=y
+CONFIG_FIREWIRE=y
+CONFIG_FIREWIRE_OHCI=y
+CONFIG_FIREWIRE_SBP2=y
+CONFIG_FIREWIRE_NET=y
+CONFIG_NETDEVICES=y
+CONFIG_BONDING=y
+CONFIG_DUMMY=y
+CONFIG_WIREGUARD=y
+CONFIG_EQUALIZER=y
+CONFIG_NET_FC=y
+CONFIG_IFB=y
+CONFIG_MACVLAN=y
+CONFIG_MACVTAP=y
+CONFIG_IPVLAN=y
+CONFIG_IPVTAP=y
+CONFIG_VXLAN=y
+CONFIG_GENEVE=y
+CONFIG_BAREUDP=y
+CONFIG_GTP=y
+CONFIG_MACSEC=y
+CONFIG_NETCONSOLE=y
+CONFIG_TUN=y
+CONFIG_TUN_VNET_CROSS_LE=y
+CONFIG_VETH=y
+CONFIG_VIRTIO_NET=y
+CONFIG_NLMON=y
+CONFIG_NET_VRF=y
+CONFIG_VSOCKMON=y
+# CONFIG_NET_VENDOR_3COM is not set
+# CONFIG_NET_VENDOR_ADAPTEC is not set
+# CONFIG_NET_VENDOR_AGERE is not set
+# CONFIG_NET_VENDOR_ALACRITECH is not set
+# CONFIG_NET_VENDOR_AMD is not set
+# CONFIG_NET_VENDOR_AQUANTIA is not set
+# CONFIG_NET_VENDOR_ARC is not set
+# CONFIG_NET_VENDOR_ATHEROS is not set
+# CONFIG_NET_VENDOR_BROADCOM is not set
+# CONFIG_NET_VENDOR_BROCADE is not set
+# CONFIG_NET_VENDOR_CADENCE is not set
+# CONFIG_NET_VENDOR_CAVIUM is not set
+# CONFIG_NET_VENDOR_CHELSIO is not set
+# CONFIG_NET_VENDOR_CORTINA is not set
+# CONFIG_NET_VENDOR_DEC is not set
+# CONFIG_NET_VENDOR_DLINK is not set
+# CONFIG_NET_VENDOR_EMULEX is not set
+# CONFIG_NET_VENDOR_EZCHIP is not set
+# CONFIG_NET_VENDOR_FUJITSU is not set
+# CONFIG_NET_VENDOR_HUAWEI is not set
+CONFIG_E100=y
+CONFIG_E1000E=y
+# CONFIG_NET_VENDOR_MARVELL is not set
+# CONFIG_MLX4_DEBUG is not set
+# CONFIG_MLX4_CORE_GEN2 is not set
+# CONFIG_NET_VENDOR_MICREL is not set
+# CONFIG_NET_VENDOR_MICROCHIP is not set
+# CONFIG_NET_VENDOR_MICROSEMI is not set
+# CONFIG_NET_VENDOR_MYRI is not set
+# CONFIG_NET_VENDOR_NATSEMI is not set
+# CONFIG_NET_VENDOR_NETERION is not set
+# CONFIG_NET_VENDOR_NETRONOME is not set
+# CONFIG_NET_VENDOR_NI is not set
+# CONFIG_NET_VENDOR_NVIDIA is not set
+# CONFIG_NET_VENDOR_OKI is not set
+# CONFIG_NET_VENDOR_PACKET_ENGINES is not set
+# CONFIG_NET_VENDOR_PENSANDO is not set
+# CONFIG_NET_VENDOR_QLOGIC is not set
+# CONFIG_NET_VENDOR_QUALCOMM is not set
+# CONFIG_NET_VENDOR_RDC is not set
+# CONFIG_NET_VENDOR_REALTEK is not set
+# CONFIG_NET_VENDOR_RENESAS is not set
+# CONFIG_NET_VENDOR_ROCKER is not set
+# CONFIG_NET_VENDOR_SAMSUNG is not set
+# CONFIG_NET_VENDOR_SEEQ is not set
+# CONFIG_NET_VENDOR_SOLARFLARE is not set
+# CONFIG_NET_VENDOR_SILAN is not set
+# CONFIG_NET_VENDOR_SIS is not set
+# CONFIG_NET_VENDOR_SMSC is not set
+# CONFIG_NET_VENDOR_SOCIONEXT is not set
+# CONFIG_NET_VENDOR_STMICRO is not set
+# CONFIG_NET_VENDOR_SUN is not set
+# CONFIG_NET_VENDOR_SYNOPSYS is not set
+# CONFIG_NET_VENDOR_TEHUTI is not set
+# CONFIG_NET_VENDOR_TI is not set
+# CONFIG_NET_VENDOR_VIA is not set
+# CONFIG_NET_VENDOR_WIZNET is not set
+# CONFIG_NET_VENDOR_XILINX is not set
+# CONFIG_NET_VENDOR_XIRCOM is not set
+CONFIG_FDDI=y
+CONFIG_MICROSEMI_PHY=y
+CONFIG_REALTEK_PHY=y
+CONFIG_USB_CATC=y
+CONFIG_USB_KAWETH=y
+CONFIG_USB_PEGASUS=y
+CONFIG_USB_RTL8150=y
+CONFIG_USB_RTL8152=y
+CONFIG_USB_LAN78XX=y
+CONFIG_USB_USBNET=y
+CONFIG_USB_NET_CDC_EEM=y
+CONFIG_USB_NET_HUAWEI_CDC_NCM=y
+CONFIG_USB_NET_CDC_MBIM=y
+CONFIG_USB_NET_DM9601=y
+CONFIG_USB_NET_SR9700=y
+CONFIG_USB_NET_SR9800=y
+CONFIG_USB_NET_SMSC75XX=y
+CONFIG_USB_NET_SMSC95XX=y
+CONFIG_USB_NET_GL620A=y
+CONFIG_USB_NET_PLUSB=y
+CONFIG_USB_NET_MCS7830=y
+CONFIG_USB_NET_RNDIS_HOST=y
+CONFIG_USB_ALI_M5632=y
+CONFIG_USB_AN2720=y
+CONFIG_USB_EPSON2888=y
+CONFIG_USB_KC2190=y
+CONFIG_USB_NET_CX82310_ETH=y
+CONFIG_USB_NET_KALMIA=y
+CONFIG_USB_NET_QMI_WWAN=y
+CONFIG_USB_HSO=y
+CONFIG_USB_NET_INT51X1=y
+CONFIG_USB_IPHETH=y
+CONFIG_USB_SIERRA_NET=y
+CONFIG_USB_VL600=y
+CONFIG_USB_NET_CH9200=y
+# CONFIG_WLAN is not set
+CONFIG_VMXNET3=y
+CONFIG_USB4_NET=y
+CONFIG_NETDEVSIM=y
+# CONFIG_INPUT_LEDS is not set
+CONFIG_INPUT_MOUSEDEV=y
+CONFIG_INPUT_MOUSEDEV_PSAUX=y
+CONFIG_INPUT_JOYDEV=y
+CONFIG_INPUT_EVDEV=y
+# CONFIG_MOUSE_PS2_SYNAPTICS_SMBUS is not set
+CONFIG_MOUSE_APPLETOUCH=y
+CONFIG_MOUSE_BCM5974=y
+CONFIG_MOUSE_SYNAPTICS_USB=y
+CONFIG_INPUT_JOYSTICK=y
+CONFIG_JOYSTICK_IFORCE=y
+CONFIG_JOYSTICK_IFORCE_USB=y
+CONFIG_JOYSTICK_XPAD=y
+CONFIG_JOYSTICK_XPAD_FF=y
+CONFIG_JOYSTICK_XPAD_LEDS=y
+CONFIG_INPUT_TABLET=y
+CONFIG_TABLET_USB_ACECAD=y
+CONFIG_TABLET_USB_AIPTEK=y
+CONFIG_TABLET_USB_HANWANG=y
+CONFIG_TABLET_USB_KBTAB=y
+CONFIG_TABLET_USB_PEGASUS=y
+CONFIG_INPUT_TOUCHSCREEN=y
+CONFIG_TOUCHSCREEN_USB_COMPOSITE=y
+CONFIG_INPUT_MISC=y
+CONFIG_INPUT_ATI_REMOTE2=y
+CONFIG_INPUT_KEYSPAN_REMOTE=y
+CONFIG_INPUT_POWERMATE=y
+CONFIG_INPUT_YEALINK=y
+CONFIG_INPUT_CM109=y
+CONFIG_INPUT_UINPUT=y
+CONFIG_INPUT_IMS_PCU=y
+CONFIG_USERIO=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+# CONFIG_SERIAL_8250_DMA is not set
+# CONFIG_SERIAL_8250_EXAR is not set
+CONFIG_SERIAL_8250_NR_UARTS=32
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_MANY_PORTS=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+CONFIG_SERIAL_8250_DETECT_IRQ=y
+CONFIG_SERIAL_8250_RSA=y
+CONFIG_SERIAL_OF_PLATFORM=y
+CONFIG_SERIAL_NONSTANDARD=y
+CONFIG_N_HDLC=y
+CONFIG_N_GSM=y
+CONFIG_NOZOMI=y
+CONFIG_NULL_TTY=y
+CONFIG_SERIAL_DEV_BUS=y
+CONFIG_TTY_PRINTK=y
+CONFIG_VIRTIO_CONSOLE=y
+CONFIG_HW_RANDOM=y
+CONFIG_HW_RANDOM_VIRTIO=y
+# CONFIG_DEVMEM is not set
+# CONFIG_DEVPORT is not set
+# CONFIG_HW_RANDOM_TPM is not set
+CONFIG_RANDOM_TRUST_BOOTLOADER=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_MUX=y
+CONFIG_I2C_MUX_REG=y
+CONFIG_I2C_DESIGNWARE_PLATFORM=y
+CONFIG_I2C_DIOLAN_U2C=y
+CONFIG_I2C_DLN2=y
+CONFIG_I2C_ROBOTFUZZ_OSIF=y
+CONFIG_I2C_TINY_USB=y
+CONFIG_I2C_VIPERBOARD=y
+CONFIG_I2C_SLAVE=y
+CONFIG_I2C_SLAVE_EEPROM=y
+CONFIG_SPI=y
+CONFIG_SPI_DLN2=y
+CONFIG_SPI_SIFIVE=y
+# CONFIG_PTP_1588_CLOCK is not set
+CONFIG_GPIOLIB=y
+CONFIG_GPIO_SIFIVE=y
+CONFIG_GPIO_DLN2=y
+CONFIG_GPIO_VIPERBOARD=y
+CONFIG_CHARGER_ISP1704=y
+CONFIG_CHARGER_BQ24190=y
+CONFIG_THERMAL=y
+CONFIG_THERMAL_NETLINK=y
+# CONFIG_THERMAL_HWMON is not set
+# CONFIG_THERMAL_OF is not set
+CONFIG_WATCHDOG=y
+# CONFIG_WATCHDOG_HANDLE_BOOT_ENABLED is not set
+CONFIG_USBPCWATCHDOG=y
+CONFIG_SSB=y
+# CONFIG_SSB_PCIHOST is not set
+CONFIG_BCMA=y
+# CONFIG_BCMA_HOST_PCI is not set
+# CONFIG_BCMA_DRIVER_PCI is not set
+CONFIG_MFD_DLN2=y
+CONFIG_LPC_ICH=y
+CONFIG_MFD_VIPERBOARD=y
+CONFIG_MFD_RETU=y
+CONFIG_TWL4030_CORE=y
+CONFIG_REGULATOR=y
+CONFIG_REGULATOR_TWL4030=y
+CONFIG_RC_CORE=y
+# CONFIG_RC_MAP is not set
+CONFIG_RC_DEVICES=y
+CONFIG_IR_IGORPLUGUSB=y
+CONFIG_IR_IGUANA=y
+CONFIG_IR_IMON=y
+CONFIG_IR_MCEUSB=y
+CONFIG_IR_REDRAT3=y
+CONFIG_IR_STREAMZAP=y
+CONFIG_IR_TTUSBIR=y
+CONFIG_RC_ATI_REMOTE=y
+CONFIG_MEDIA_CEC_SUPPORT=y
+CONFIG_USB_PULSE8_CEC=y
+CONFIG_USB_RAINSHADOW_CEC=y
+CONFIG_DRM=y
+CONFIG_DRM_DP_AUX_CHARDEV=y
+CONFIG_DRM_DEBUG_MM=y
+# CONFIG_DRM_DEBUG_MODESET_LOCK is not set
+CONFIG_DRM_I2C_CH7006=y
+CONFIG_DRM_I2C_SIL164=y
+CONFIG_DRM_RADEON=y
+CONFIG_DRM_NOUVEAU=y
+CONFIG_DRM_VGEM=y
+CONFIG_DRM_VKMS=y
+CONFIG_DRM_UDL=y
+CONFIG_DRM_VIRTIO_GPU=y
+CONFIG_DRM_BOCHS=y
+CONFIG_DRM_CIRRUS_QEMU=y
+CONFIG_DRM_SIMPLEDRM=y
+CONFIG_FB=y
+CONFIG_FB_TILEBLITTING=y
+CONFIG_FB_VIRTUAL=y
+CONFIG_LCD_CLASS_DEVICE=y
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y
+CONFIG_LOGO=y
+# CONFIG_LOGO_LINUX_CLUT224 is not set
+CONFIG_SOUND=y
+CONFIG_SND=y
+CONFIG_SND_OSSEMUL=y
+CONFIG_SND_MIXER_OSS=y
+CONFIG_SND_PCM_OSS=y
+# CONFIG_SND_PCM_TIMER is not set
+CONFIG_SND_HRTIMER=y
+# CONFIG_SND_SUPPORT_OLD_API is not set
+# CONFIG_SND_PROC_FS is not set
+CONFIG_SND_SEQUENCER=y
+CONFIG_SND_SEQ_DUMMY=y
+CONFIG_SND_SEQUENCER_OSS=y
+# CONFIG_SND_SEQ_HRTIMER_DEFAULT is not set
+CONFIG_SND_DUMMY=y
+CONFIG_SND_ALOOP=y
+CONFIG_SND_VIRMIDI=y
+CONFIG_SND_HDA_INTEL=y
+CONFIG_SND_HDA_HWDEP=y
+CONFIG_SND_HDA_INPUT_BEEP=y
+CONFIG_SND_HDA_PATCH_LOADER=y
+CONFIG_SND_HDA_CODEC_REALTEK=y
+CONFIG_SND_HDA_CODEC_ANALOG=y
+CONFIG_SND_HDA_CODEC_SIGMATEL=y
+CONFIG_SND_HDA_CODEC_VIA=y
+CONFIG_SND_HDA_CODEC_HDMI=y
+CONFIG_SND_HDA_CODEC_CIRRUS=y
+CONFIG_SND_HDA_CODEC_CONEXANT=y
+CONFIG_SND_HDA_CODEC_CA0110=y
+CONFIG_SND_HDA_CODEC_CA0132=y
+# CONFIG_SND_HDA_CODEC_CA0132_DSP is not set
+CONFIG_SND_HDA_CODEC_CMEDIA=y
+CONFIG_SND_HDA_CODEC_SI3054=y
+CONFIG_SND_HDA_PREALLOC_SIZE=0
+# CONFIG_SND_SPI is not set
+CONFIG_SND_USB_AUDIO=y
+CONFIG_SND_USB_UA101=y
+CONFIG_SND_USB_CAIAQ=y
+CONFIG_SND_USB_CAIAQ_INPUT=y
+CONFIG_SND_USB_6FIRE=y
+CONFIG_SND_USB_HIFACE=y
+CONFIG_SND_BCD2000=y
+CONFIG_SND_USB_POD=y
+CONFIG_SND_USB_PODHD=y
+CONFIG_SND_USB_TONEPORT=y
+CONFIG_SND_USB_VARIAX=y
+# CONFIG_SND_FIREWIRE is not set
+# CONFIG_SND_PCMCIA is not set
+CONFIG_SND_VIRTIO=y
+CONFIG_HID_BATTERY_STRENGTH=y
+CONFIG_HIDRAW=y
+CONFIG_UHID=y
+CONFIG_HID_A4TECH=y
+CONFIG_HID_ACCUTOUCH=y
+CONFIG_HID_ACRUX=y
+CONFIG_HID_ACRUX_FF=y
+CONFIG_HID_APPLE=y
+CONFIG_HID_APPLEIR=y
+CONFIG_HID_ASUS=y
+CONFIG_HID_AUREAL=y
+CONFIG_HID_BELKIN=y
+CONFIG_HID_BETOP_FF=y
+CONFIG_HID_CHERRY=y
+CONFIG_HID_CHICONY=y
+CONFIG_HID_CORSAIR=y
+CONFIG_HID_PRODIKEYS=y
+CONFIG_HID_CMEDIA=y
+CONFIG_HID_CP2112=y
+CONFIG_HID_CYPRESS=y
+CONFIG_HID_DRAGONRISE=y
+CONFIG_DRAGONRISE_FF=y
+CONFIG_HID_EMS_FF=y
+CONFIG_HID_ELECOM=y
+CONFIG_HID_ELO=y
+CONFIG_HID_EZKEY=y
+CONFIG_HID_GEMBIRD=y
+CONFIG_HID_GFRM=y
+CONFIG_HID_HOLTEK=y
+CONFIG_HOLTEK_FF=y
+CONFIG_HID_GT683R=y
+CONFIG_HID_KEYTOUCH=y
+CONFIG_HID_KYE=y
+CONFIG_HID_UCLOGIC=y
+CONFIG_HID_WALTOP=y
+CONFIG_HID_GYRATION=y
+CONFIG_HID_ICADE=y
+CONFIG_HID_ITE=y
+CONFIG_HID_TWINHAN=y
+CONFIG_HID_KENSINGTON=y
+CONFIG_HID_LCPOWER=y
+CONFIG_HID_LENOVO=y
+CONFIG_HID_LOGITECH=y
+CONFIG_HID_LOGITECH_DJ=y
+CONFIG_LOGITECH_FF=y
+CONFIG_LOGIRUMBLEPAD2_FF=y
+CONFIG_LOGIG940_FF=y
+CONFIG_HID_MAGICMOUSE=y
+CONFIG_HID_MAYFLASH=y
+CONFIG_HID_MICROSOFT=y
+CONFIG_HID_MONTEREY=y
+CONFIG_HID_MULTITOUCH=y
+CONFIG_HID_NTI=y
+CONFIG_HID_NTRIG=y
+CONFIG_HID_ORTEK=y
+CONFIG_HID_PANTHERLORD=y
+CONFIG_PANTHERLORD_FF=y
+CONFIG_HID_PENMOUNT=y
+CONFIG_HID_PETALYNX=y
+CONFIG_HID_PICOLCD=y
+CONFIG_HID_PICOLCD_FB=y
+CONFIG_HID_PICOLCD_BACKLIGHT=y
+CONFIG_HID_PICOLCD_LCD=y
+CONFIG_HID_PICOLCD_LEDS=y
+CONFIG_HID_PICOLCD_CIR=y
+CONFIG_HID_PLANTRONICS=y
+CONFIG_HID_PRIMAX=y
+CONFIG_HID_RETRODE=y
+CONFIG_HID_ROCCAT=y
+CONFIG_HID_SAITEK=y
+CONFIG_HID_SAMSUNG=y
+CONFIG_HID_SONY=y
+CONFIG_SONY_FF=y
+CONFIG_HID_SPEEDLINK=y
+CONFIG_HID_STEELSERIES=y
+CONFIG_HID_SUNPLUS=y
+CONFIG_HID_RMI=y
+CONFIG_HID_GREENASIA=y
+CONFIG_GREENASIA_FF=y
+CONFIG_HID_SMARTJOYPLUS=y
+CONFIG_SMARTJOYPLUS_FF=y
+CONFIG_HID_TIVO=y
+CONFIG_HID_TOPSEED=y
+CONFIG_HID_THINGM=y
+CONFIG_HID_THRUSTMASTER=y
+CONFIG_THRUSTMASTER_FF=y
+CONFIG_HID_UDRAW_PS3=y
+CONFIG_HID_WACOM=y
+CONFIG_HID_WIIMOTE=y
+CONFIG_HID_XINMO=y
+CONFIG_HID_ZEROPLUS=y
+CONFIG_ZEROPLUS_FF=y
+CONFIG_HID_ZYDACRON=y
+CONFIG_HID_SENSOR_HUB=y
+CONFIG_HID_SENSOR_CUSTOM_SENSOR=y
+CONFIG_HID_ALPS=y
+CONFIG_HID_PID=y
+CONFIG_USB_HIDDEV=y
+CONFIG_USB_LED_TRIG=y
+CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
+CONFIG_USB_FEW_INIT_RETRIES=y
+CONFIG_USB_DYNAMIC_MINORS=y
+CONFIG_USB_OTG=y
+CONFIG_USB_OTG_FSM=y
+CONFIG_USB_LEDS_TRIGGER_USBPORT=y
+CONFIG_USB_MON=y
+CONFIG_USB_C67X00_HCD=y
+CONFIG_USB_XHCI_HCD=y
+CONFIG_USB_XHCI_DBGCAP=y
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_OXU210HP_HCD=y
+CONFIG_USB_ISP116X_HCD=y
+CONFIG_USB_FOTG210_HCD=y
+CONFIG_USB_MAX3421_HCD=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_UHCI_HCD=y
+CONFIG_USB_U132_HCD=y
+CONFIG_USB_SL811_HCD=y
+CONFIG_USB_SL811_HCD_ISO=y
+CONFIG_USB_SL811_CS=y
+CONFIG_USB_R8A66597_HCD=y
+CONFIG_USB_HCD_BCMA=y
+CONFIG_USB_HCD_SSB=y
+CONFIG_USB_PRINTER=y
+CONFIG_USB_TMC=y
+CONFIG_USB_STORAGE=y
+CONFIG_USB_STORAGE_REALTEK=y
+CONFIG_USB_STORAGE_DATAFAB=y
+CONFIG_USB_STORAGE_FREECOM=y
+CONFIG_USB_STORAGE_ISD200=y
+CONFIG_USB_STORAGE_USBAT=y
+CONFIG_USB_STORAGE_SDDR09=y
+CONFIG_USB_STORAGE_SDDR55=y
+CONFIG_USB_STORAGE_JUMPSHOT=y
+CONFIG_USB_STORAGE_ALAUDA=y
+CONFIG_USB_STORAGE_ONETOUCH=y
+CONFIG_USB_STORAGE_KARMA=y
+CONFIG_USB_STORAGE_CYPRESS_ATACB=y
+CONFIG_USB_STORAGE_ENE_UB6250=y
+CONFIG_USB_UAS=y
+CONFIG_USB_MDC800=y
+CONFIG_USB_MICROTEK=y
+CONFIG_USB_MUSB_HDRC=y
+CONFIG_MUSB_PIO_ONLY=y
+CONFIG_USB_DWC3=y
+CONFIG_USB_DWC3_ULPI=y
+CONFIG_USB_DWC3_GADGET=y
+# CONFIG_USB_DWC3_HAPS is not set
+CONFIG_USB_DWC2=y
+CONFIG_USB_DWC2_HOST=y
+CONFIG_USB_DWC2_PCI=y
+CONFIG_USB_CHIPIDEA=y
+CONFIG_USB_CHIPIDEA_UDC=y
+CONFIG_USB_CHIPIDEA_HOST=y
+CONFIG_USB_ISP1760=y
+CONFIG_USB_USS720=y
+CONFIG_USB_SERIAL=y
+CONFIG_USB_SERIAL_CONSOLE=y
+CONFIG_USB_SERIAL_GENERIC=y
+CONFIG_USB_SERIAL_SIMPLE=y
+CONFIG_USB_SERIAL_AIRCABLE=y
+CONFIG_USB_SERIAL_ARK3116=y
+CONFIG_USB_SERIAL_BELKIN=y
+CONFIG_USB_SERIAL_CH341=y
+CONFIG_USB_SERIAL_WHITEHEAT=y
+CONFIG_USB_SERIAL_DIGI_ACCELEPORT=y
+CONFIG_USB_SERIAL_CP210X=y
+CONFIG_USB_SERIAL_CYPRESS_M8=y
+CONFIG_USB_SERIAL_EMPEG=y
+CONFIG_USB_SERIAL_FTDI_SIO=y
+CONFIG_USB_SERIAL_VISOR=y
+CONFIG_USB_SERIAL_IPAQ=y
+CONFIG_USB_SERIAL_IR=y
+CONFIG_USB_SERIAL_EDGEPORT=y
+CONFIG_USB_SERIAL_EDGEPORT_TI=y
+CONFIG_USB_SERIAL_F81232=y
+CONFIG_USB_SERIAL_F8153X=y
+CONFIG_USB_SERIAL_GARMIN=y
+CONFIG_USB_SERIAL_IPW=y
+CONFIG_USB_SERIAL_IUU=y
+CONFIG_USB_SERIAL_KEYSPAN_PDA=y
+CONFIG_USB_SERIAL_KEYSPAN=y
+CONFIG_USB_SERIAL_KLSI=y
+CONFIG_USB_SERIAL_KOBIL_SCT=y
+CONFIG_USB_SERIAL_MCT_U232=y
+CONFIG_USB_SERIAL_METRO=y
+CONFIG_USB_SERIAL_MOS7720=y
+CONFIG_USB_SERIAL_MOS7715_PARPORT=y
+CONFIG_USB_SERIAL_MOS7840=y
+CONFIG_USB_SERIAL_MXUPORT=y
+CONFIG_USB_SERIAL_NAVMAN=y
+CONFIG_USB_SERIAL_PL2303=y
+CONFIG_USB_SERIAL_OTI6858=y
+CONFIG_USB_SERIAL_QCAUX=y
+CONFIG_USB_SERIAL_QUALCOMM=y
+CONFIG_USB_SERIAL_SPCP8X5=y
+CONFIG_USB_SERIAL_SAFE=y
+CONFIG_USB_SERIAL_SIERRAWIRELESS=y
+CONFIG_USB_SERIAL_SYMBOL=y
+CONFIG_USB_SERIAL_TI=y
+CONFIG_USB_SERIAL_CYBERJACK=y
+CONFIG_USB_SERIAL_OPTION=y
+CONFIG_USB_SERIAL_OMNINET=y
+CONFIG_USB_SERIAL_OPTICON=y
+CONFIG_USB_SERIAL_XSENS_MT=y
+CONFIG_USB_SERIAL_WISHBONE=y
+CONFIG_USB_SERIAL_SSU100=y
+CONFIG_USB_SERIAL_QT2=y
+CONFIG_USB_SERIAL_UPD78F0730=y
+CONFIG_USB_SERIAL_XR=y
+CONFIG_USB_SERIAL_DEBUG=y
+CONFIG_USB_EMI62=y
+CONFIG_USB_EMI26=y
+CONFIG_USB_ADUTUX=y
+CONFIG_USB_SEVSEG=y
+CONFIG_USB_LEGOTOWER=y
+CONFIG_USB_LCD=y
+CONFIG_USB_CYPRESS_CY7C63=y
+CONFIG_USB_CYTHERM=y
+CONFIG_USB_IDMOUSE=y
+CONFIG_USB_FTDI_ELAN=y
+CONFIG_USB_APPLEDISPLAY=y
+CONFIG_USB_SISUSBVGA=y
+CONFIG_USB_LD=y
+CONFIG_USB_TRANCEVIBRATOR=y
+CONFIG_USB_IOWARRIOR=y
+CONFIG_USB_TEST=y
+CONFIG_USB_EHSET_TEST_FIXTURE=y
+CONFIG_USB_ISIGHTFW=y
+CONFIG_USB_YUREX=y
+CONFIG_USB_HUB_USB251XB=y
+CONFIG_USB_HSIC_USB3503=y
+CONFIG_USB_HSIC_USB4604=y
+CONFIG_USB_LINK_LAYER_TEST=y
+CONFIG_USB_CHAOSKEY=y
+CONFIG_USB_GPIO_VBUS=y
+CONFIG_TAHVO_USB=y
+CONFIG_TAHVO_USB_HOST_BY_DEFAULT=y
+CONFIG_USB_ISP1301=y
+CONFIG_USB_GADGET=y
+CONFIG_USB_GADGET_DEBUG_FILES=y
+CONFIG_USB_GADGET_DEBUG_FS=y
+CONFIG_USB_GADGET_VBUS_DRAW=500
+CONFIG_U_SERIAL_CONSOLE=y
+CONFIG_USB_FOTG210_UDC=y
+CONFIG_USB_GR_UDC=y
+CONFIG_USB_R8A66597=y
+CONFIG_USB_PXA27X=y
+CONFIG_USB_MV_UDC=y
+CONFIG_USB_MV_U3D=y
+CONFIG_USB_BDC_UDC=y
+CONFIG_USB_AMD5536UDC=y
+CONFIG_USB_NET2272=y
+CONFIG_USB_NET2272_DMA=y
+CONFIG_USB_NET2280=y
+CONFIG_USB_GOKU=y
+CONFIG_USB_EG20T=y
+CONFIG_USB_DUMMY_HCD=y
+CONFIG_USB_CONFIGFS=y
+CONFIG_USB_CONFIGFS_SERIAL=y
+CONFIG_USB_CONFIGFS_ACM=y
+CONFIG_USB_CONFIGFS_OBEX=y
+CONFIG_USB_CONFIGFS_NCM=y
+CONFIG_USB_CONFIGFS_ECM=y
+CONFIG_USB_CONFIGFS_ECM_SUBSET=y
+CONFIG_USB_CONFIGFS_RNDIS=y
+CONFIG_USB_CONFIGFS_EEM=y
+CONFIG_USB_CONFIGFS_MASS_STORAGE=y
+CONFIG_USB_CONFIGFS_F_LB_SS=y
+CONFIG_USB_CONFIGFS_F_FS=y
+CONFIG_USB_CONFIGFS_F_UAC1=y
+CONFIG_USB_CONFIGFS_F_UAC1_LEGACY=y
+CONFIG_USB_CONFIGFS_F_UAC2=y
+CONFIG_USB_CONFIGFS_F_MIDI=y
+CONFIG_USB_CONFIGFS_F_HID=y
+CONFIG_USB_CONFIGFS_F_PRINTER=y
+CONFIG_USB_CONFIGFS_F_TCM=y
+CONFIG_USB_GADGETFS=y
+CONFIG_USB_RAW_GADGET=y
+CONFIG_TYPEC=y
+CONFIG_TYPEC_TCPM=y
+CONFIG_TYPEC_TCPCI=y
+CONFIG_TYPEC_FUSB302=y
+CONFIG_TYPEC_UCSI=y
+CONFIG_TYPEC_TPS6598X=y
+CONFIG_MMC=y
+CONFIG_MMC_SDHCI=y
+CONFIG_MMC_SDHCI_PLTFM=y
+CONFIG_MMC_SDHCI_CADENCE=y
+CONFIG_MMC_SPI=y
+CONFIG_MMC_VUB300=y
+CONFIG_MMC_USHC=y
+CONFIG_MMC_REALTEK_USB=y
+CONFIG_MEMSTICK=y
+CONFIG_MEMSTICK_REALTEK_USB=y
+CONFIG_INFINIBAND=y
+CONFIG_INFINIBAND_USER_MAD=y
+CONFIG_INFINIBAND_USER_ACCESS=y
+CONFIG_MLX4_INFINIBAND=y
+CONFIG_RDMA_RXE=y
+CONFIG_RDMA_SIW=y
+CONFIG_INFINIBAND_IPOIB=y
+CONFIG_INFINIBAND_IPOIB_CM=y
+CONFIG_INFINIBAND_SRP=y
+CONFIG_INFINIBAND_ISER=y
+CONFIG_INFINIBAND_RTRS_CLIENT=y
+CONFIG_EDAC=y
+# CONFIG_EDAC_LEGACY_SYSFS is not set
+CONFIG_RTC_CLASS=y
+# CONFIG_RTC_HCTOSYS is not set
+# CONFIG_RTC_NVMEM is not set
+CONFIG_RTC_DRV_HID_SENSOR_TIME=y
+CONFIG_DMADEVICES=y
+CONFIG_ASYNC_TX_DMA=y
+CONFIG_SW_SYNC=y
+CONFIG_UDMABUF=y
+CONFIG_DMABUF_MOVE_NOTIFY=y
+CONFIG_DMABUF_HEAPS=y
+CONFIG_DMABUF_HEAPS_SYSTEM=y
+CONFIG_DMABUF_HEAPS_CMA=y
+CONFIG_VIRTIO_PCI=y
+CONFIG_VIRTIO_VDPA=y
+CONFIG_VIRTIO_BALLOON=y
+CONFIG_VIRTIO_INPUT=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y
+CONFIG_VDPA=y
+CONFIG_VP_VDPA=y
+CONFIG_VHOST_NET=y
+CONFIG_VHOST_VSOCK=y
+CONFIG_VHOST_VDPA=y
+CONFIG_VHOST_CROSS_ENDIAN_LEGACY=y
+CONFIG_GREYBUS=y
+CONFIG_GREYBUS_ES2=y
+CONFIG_COMEDI=y
+CONFIG_COMEDI_USB_DRIVERS=y
+CONFIG_COMEDI_DT9812=y
+CONFIG_COMEDI_NI_USB6501=y
+CONFIG_COMEDI_USBDUX=y
+CONFIG_COMEDI_USBDUXFAST=y
+CONFIG_COMEDI_USBDUXSIGMA=y
+CONFIG_COMEDI_VMK80XX=y
+CONFIG_STAGING=y
+CONFIG_GREYBUS_HID=y
+CONFIG_GREYBUS_BRIDGED_PHY=y
+CONFIG_GREYBUS_USB=y
+CONFIG_RPMSG_CHAR=y
+CONFIG_RPMSG_VIRTIO=y
+CONFIG_IIO=y
+CONFIG_HID_SENSOR_ACCEL_3D=y
+CONFIG_DLN2_ADC=y
+CONFIG_VIPERBOARD_ADC=y
+CONFIG_HID_SENSOR_GYRO_3D=y
+CONFIG_HID_SENSOR_HUMIDITY=y
+CONFIG_HID_SENSOR_ALS=y
+CONFIG_HID_SENSOR_PROX=y
+CONFIG_HID_SENSOR_MAGNETOMETER_3D=y
+CONFIG_HID_SENSOR_INCLINOMETER_3D=y
+CONFIG_HID_SENSOR_DEVICE_ROTATION=y
+CONFIG_HID_SENSOR_PRESS=y
+CONFIG_HID_SENSOR_TEMP=y
+CONFIG_PHY_CPCAP_USB=y
+CONFIG_PHY_QCOM_USB_HS=y
+CONFIG_PHY_QCOM_USB_HSIC=y
+CONFIG_PHY_SAMSUNG_USB2=y
+CONFIG_PHY_TUSB1210=y
+CONFIG_USB4=y
+CONFIG_ANDROID=y
+CONFIG_ANDROID_BINDER_IPC=y
+CONFIG_ANDROID_BINDERFS=y
+CONFIG_ANDROID_BINDER_DEVICES="binder0,binder1"
+CONFIG_COUNTER=y
+CONFIG_MOST=y
+CONFIG_VALIDATE_FS_PARSER=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_FS_ENCRYPTION=y
+CONFIG_FS_VERITY=y
+CONFIG_FS_VERITY_BUILTIN_SIGNATURES=y
+CONFIG_FANOTIFY=y
+CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
+CONFIG_QUOTA=y
+CONFIG_QUOTA_NETLINK_INTERFACE=y
+# CONFIG_PRINT_QUOTA_WARNING is not set
+CONFIG_QFMT_V2=y
+CONFIG_AUTOFS4_FS=y
+CONFIG_FUSE_FS=y
+CONFIG_CUSE=y
+CONFIG_VIRTIO_FS=y
+CONFIG_FSCACHE=y
+CONFIG_CACHEFILES=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_HUGETLBFS=y
+CONFIG_EFIVAR_FS=y
+CONFIG_ECRYPT_FS=y
+CONFIG_ECRYPT_FS_MESSAGING=y
+CONFIG_PSTORE=y
+CONFIG_PSTORE_LZO_COMPRESS=y
+CONFIG_PSTORE_LZ4_COMPRESS=y
+CONFIG_PSTORE_LZ4HC_COMPRESS=y
+CONFIG_PSTORE_842_COMPRESS=y
+CONFIG_PSTORE_ZSTD_COMPRESS=y
+CONFIG_NLS_DEFAULT="utf8"
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_CODEPAGE_737=y
+CONFIG_NLS_CODEPAGE_775=y
+CONFIG_NLS_CODEPAGE_850=y
+CONFIG_NLS_CODEPAGE_852=y
+CONFIG_NLS_CODEPAGE_855=y
+CONFIG_NLS_CODEPAGE_857=y
+CONFIG_NLS_CODEPAGE_860=y
+CONFIG_NLS_CODEPAGE_861=y
+CONFIG_NLS_CODEPAGE_862=y
+CONFIG_NLS_CODEPAGE_863=y
+CONFIG_NLS_CODEPAGE_864=y
+CONFIG_NLS_CODEPAGE_865=y
+CONFIG_NLS_CODEPAGE_866=y
+CONFIG_NLS_CODEPAGE_869=y
+CONFIG_NLS_CODEPAGE_936=y
+CONFIG_NLS_CODEPAGE_950=y
+CONFIG_NLS_CODEPAGE_932=y
+CONFIG_NLS_CODEPAGE_949=y
+CONFIG_NLS_CODEPAGE_874=y
+CONFIG_NLS_ISO8859_8=y
+CONFIG_NLS_CODEPAGE_1250=y
+CONFIG_NLS_CODEPAGE_1251=y
+CONFIG_NLS_ASCII=y
+CONFIG_NLS_ISO8859_1=y
+CONFIG_NLS_ISO8859_2=y
+CONFIG_NLS_ISO8859_3=y
+CONFIG_NLS_ISO8859_4=y
+CONFIG_NLS_ISO8859_5=y
+CONFIG_NLS_ISO8859_6=y
+CONFIG_NLS_ISO8859_7=y
+CONFIG_NLS_ISO8859_9=y
+CONFIG_NLS_ISO8859_13=y
+CONFIG_NLS_ISO8859_14=y
+CONFIG_NLS_ISO8859_15=y
+CONFIG_NLS_KOI8_R=y
+CONFIG_NLS_KOI8_U=y
+CONFIG_NLS_MAC_ROMAN=y
+CONFIG_NLS_MAC_CELTIC=y
+CONFIG_NLS_MAC_CENTEURO=y
+CONFIG_NLS_MAC_CROATIAN=y
+CONFIG_NLS_MAC_CYRILLIC=y
+CONFIG_NLS_MAC_GAELIC=y
+CONFIG_NLS_MAC_GREEK=y
+CONFIG_NLS_MAC_ICELAND=y
+CONFIG_NLS_MAC_INUIT=y
+CONFIG_NLS_MAC_ROMANIAN=y
+CONFIG_NLS_MAC_TURKISH=y
+CONFIG_NLS_UTF8=y
+CONFIG_DLM=y
+CONFIG_UNICODE=y
+CONFIG_KEYS_REQUEST_CACHE=y
+CONFIG_PERSISTENT_KEYRINGS=y
+CONFIG_BIG_KEYS=y
+CONFIG_TRUSTED_KEYS=y
+CONFIG_KEY_DH_OPERATIONS=y
+CONFIG_KEY_NOTIFICATIONS=y
+CONFIG_SECURITY=y
+CONFIG_SECURITY_INFINIBAND=y
+CONFIG_SECURITY_NETWORK_XFRM=y
+CONFIG_HARDENED_USERCOPY=y
+CONFIG_SECURITY_TOMOYO=y
+CONFIG_SECURITY_TOMOYO_MAX_ACCEPT_ENTRY=64
+CONFIG_SECURITY_TOMOYO_MAX_AUDIT_LOG=32
+CONFIG_SECURITY_TOMOYO_INSECURE_BUILTIN_SETTING=y
+CONFIG_SECURITY_APPARMOR=y
+CONFIG_SECURITY_APPARMOR_DEBUG=y
+CONFIG_SECURITY_YAMA=y
+CONFIG_SECURITY_SAFESETID=y
+CONFIG_SECURITY_LOCKDOWN_LSM=y
+CONFIG_SECURITY_LOCKDOWN_LSM_EARLY=y
+CONFIG_SECURITY_LANDLOCK=y
+CONFIG_INTEGRITY_SIGNATURE=y
+CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y
+CONFIG_IMA=y
+CONFIG_IMA_DEFAULT_HASH_SHA256=y
+CONFIG_IMA_WRITE_POLICY=y
+CONFIG_IMA_APPRAISE=y
+# CONFIG_IMA_APPRAISE_BOOTPARAM is not set
+CONFIG_IMA_APPRAISE_MODSIG=y
+# CONFIG_IMA_TRUSTED_KEYRING is not set
+CONFIG_EVM=y
+CONFIG_EVM_ADD_XATTRS=y
+CONFIG_DEFAULT_SECURITY_APPARMOR=y
+CONFIG_LSM="landlock,lockdown,yama,safesetid,integrity,tomoyo,apparmor,bpf"
+CONFIG_INIT_ON_ALLOC_DEFAULT_ON=y
+CONFIG_CRYPTO_USER=y
+CONFIG_CRYPTO_PCRYPT=y
+CONFIG_CRYPTO_ECRDSA=y
+CONFIG_CRYPTO_SM2=y
+CONFIG_CRYPTO_CURVE25519=y
+CONFIG_CRYPTO_CHACHA20POLY1305=y
+CONFIG_CRYPTO_AEGIS128=y
+CONFIG_CRYPTO_CFB=y
+CONFIG_CRYPTO_LRW=y
+CONFIG_CRYPTO_OFB=y
+CONFIG_CRYPTO_KEYWRAP=y
+CONFIG_CRYPTO_ADIANTUM=y
+CONFIG_CRYPTO_XCBC=y
+CONFIG_CRYPTO_VMAC=y
+CONFIG_CRYPTO_XXHASH=y
+CONFIG_CRYPTO_BLAKE2S=y
+CONFIG_CRYPTO_MICHAEL_MIC=y
+CONFIG_CRYPTO_RMD160=y
+CONFIG_CRYPTO_SHA3=y
+CONFIG_CRYPTO_SM3=y
+CONFIG_CRYPTO_WP512=y
+CONFIG_CRYPTO_AES_TI=y
+CONFIG_CRYPTO_ANUBIS=y
+CONFIG_CRYPTO_ARC4=y
+CONFIG_CRYPTO_BLOWFISH=y
+CONFIG_CRYPTO_CAMELLIA=y
+CONFIG_CRYPTO_DES=y
+CONFIG_CRYPTO_KHAZAD=y
+CONFIG_CRYPTO_SEED=y
+CONFIG_CRYPTO_SM4=y
+CONFIG_CRYPTO_TEA=y
+CONFIG_CRYPTO_TWOFISH=y
+CONFIG_CRYPTO_ANSI_CPRNG=y
+CONFIG_CRYPTO_DRBG_HASH=y
+CONFIG_CRYPTO_DRBG_CTR=y
+CONFIG_CRYPTO_USER_API_HASH=y
+CONFIG_CRYPTO_USER_API_SKCIPHER=y
+CONFIG_CRYPTO_USER_API_RNG=y
+CONFIG_CRYPTO_USER_API_AEAD=y
+CONFIG_CRYPTO_DEV_VIRTIO=y
+CONFIG_PKCS8_PRIVATE_KEY_PARSER=y
+CONFIG_PKCS7_TEST_KEY=y
+CONFIG_SIGNED_PE_FILE_VERIFICATION=y
+CONFIG_SECONDARY_TRUSTED_KEYRING=y
+# CONFIG_RAID6_PQ_BENCHMARK is not set
+CONFIG_CRC4=y
+CONFIG_DMA_CMA=y
+CONFIG_PRINTK_TIME=y
+CONFIG_PRINTK_CALLER=y
+CONFIG_DYNAMIC_DEBUG=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
+CONFIG_UBSAN=y
+# CONFIG_UBSAN_BOOL is not set
+# CONFIG_UBSAN_ENUM is not set
+# CONFIG_UBSAN_ALIGNMENT is not set
+# CONFIG_UBSAN_SANITIZE_ALL is not set
+CONFIG_NET_DEV_REFCNT_TRACKER=y
+CONFIG_NET_NS_REFCNT_TRACKER=y
+CONFIG_DEBUG_PAGEALLOC=y
+CONFIG_PAGE_OWNER=y
+CONFIG_PAGE_POISONING=y
+CONFIG_PTDUMP_DEBUGFS=y
+CONFIG_DEBUG_OBJECTS=y
+CONFIG_DEBUG_OBJECTS_FREE=y
+CONFIG_DEBUG_OBJECTS_TIMERS=y
+CONFIG_DEBUG_OBJECTS_WORK=y
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=y
+CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER=y
+CONFIG_SCHED_STACK_END_CHECK=y
+CONFIG_DEBUG_VM=y
+CONFIG_DEBUG_VM_VMACACHE=y
+CONFIG_DEBUG_VM_RB=y
+CONFIG_DEBUG_VM_PGFLAGS=y
+CONFIG_DEBUG_VIRTUAL=y
+CONFIG_DEBUG_MEMORY_INIT=y
+CONFIG_DEBUG_PER_CPU_MAPS=y
+CONFIG_KASAN=y
+CONFIG_KFENCE=y
+CONFIG_KFENCE_STATIC_KEYS=y
+CONFIG_PANIC_ON_OOPS=y
+CONFIG_PANIC_TIMEOUT=86400
+CONFIG_SOFTLOCKUP_DETECTOR=y
+CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y
+CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=420
+CONFIG_BOOTPARAM_HUNG_TASK_PANIC=y
+CONFIG_WQ_WATCHDOG=y
+# CONFIG_SCHED_DEBUG is not set
+CONFIG_SCHEDSTATS=y
+CONFIG_DEBUG_TIMEKEEPING=y
+CONFIG_PROVE_LOCKING=y
+CONFIG_LOCKDEP_BITS=16
+CONFIG_LOCKDEP_CHAINS_BITS=17
+CONFIG_LOCKDEP_STACK_TRACE_BITS=20
+CONFIG_DEBUG_ATOMIC_SLEEP=y
+CONFIG_DEBUG_IRQFLAGS=y
+CONFIG_DEBUG_PLIST=y
+CONFIG_DEBUG_SG=y
+CONFIG_DEBUG_NOTIFIERS=y
+CONFIG_BUG_ON_DATA_CORRUPTION=y
+CONFIG_DEBUG_CREDENTIALS=y
+CONFIG_RCU_CPU_STALL_TIMEOUT=300
+# CONFIG_RCU_TRACE is not set
+CONFIG_RCU_EQS_DEBUG=y
+# CONFIG_FTRACE is not set
+CONFIG_FAULT_INJECTION=y
+CONFIG_FAILSLAB=y
+CONFIG_FAIL_PAGE_ALLOC=y
+CONFIG_FAULT_INJECTION_USERCOPY=y
+CONFIG_FAIL_MAKE_REQUEST=y
+CONFIG_FAIL_IO_TIMEOUT=y
+CONFIG_FAIL_FUTEX=y
+CONFIG_FAULT_INJECTION_DEBUG_FS=y
+CONFIG_KCOV=y
+CONFIG_KCOV_ENABLE_COMPARISONS=y
+# CONFIG_RUNTIME_TESTING_MENU is not set
+CONFIG_MEMTEST=y
-- 
2.34.1


^ permalink raw reply related

* [PATCH bpf-next v5 2/8] bpf: convert cgroup_bpf.progs to hlist
From: Stanislav Fomichev @ 2022-04-19 19:00 UTC (permalink / raw)
  To: netdev, bpf; +Cc: ast, daniel, andrii, Stanislav Fomichev, Jakub Sitnicki
In-Reply-To: <20220419190053.3395240-1-sdf@google.com>

This lets us reclaim some space to be used by new cgroup lsm slots.

Before:
struct cgroup_bpf {
	struct bpf_prog_array *    effective[23];        /*     0   184 */
	/* --- cacheline 2 boundary (128 bytes) was 56 bytes ago --- */
	struct list_head           progs[23];            /*   184   368 */
	/* --- cacheline 8 boundary (512 bytes) was 40 bytes ago --- */
	u32                        flags[23];            /*   552    92 */

	/* XXX 4 bytes hole, try to pack */

	/* --- cacheline 10 boundary (640 bytes) was 8 bytes ago --- */
	struct list_head           storages;             /*   648    16 */
	struct bpf_prog_array *    inactive;             /*   664     8 */
	struct percpu_ref          refcnt;               /*   672    16 */
	struct work_struct         release_work;         /*   688    32 */

	/* size: 720, cachelines: 12, members: 7 */
	/* sum members: 716, holes: 1, sum holes: 4 */
	/* last cacheline: 16 bytes */
};

After:
struct cgroup_bpf {
	struct bpf_prog_array *    effective[23];        /*     0   184 */
	/* --- cacheline 2 boundary (128 bytes) was 56 bytes ago --- */
	struct hlist_head          progs[23];            /*   184   184 */
	/* --- cacheline 5 boundary (320 bytes) was 48 bytes ago --- */
	u8                         flags[23];            /*   368    23 */

	/* XXX 1 byte hole, try to pack */

	/* --- cacheline 6 boundary (384 bytes) was 8 bytes ago --- */
	struct list_head           storages;             /*   392    16 */
	struct bpf_prog_array *    inactive;             /*   408     8 */
	struct percpu_ref          refcnt;               /*   416    16 */
	struct work_struct         release_work;         /*   432    72 */

	/* size: 504, cachelines: 8, members: 7 */
	/* sum members: 503, holes: 1, sum holes: 1 */
	/* last cacheline: 56 bytes */
};

Suggested-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: Stanislav Fomichev <sdf@google.com>
---
 include/linux/bpf-cgroup-defs.h |  4 +-
 include/linux/bpf-cgroup.h      |  2 +-
 kernel/bpf/cgroup.c             | 72 +++++++++++++++++++--------------
 3 files changed, 45 insertions(+), 33 deletions(-)

diff --git a/include/linux/bpf-cgroup-defs.h b/include/linux/bpf-cgroup-defs.h
index 695d1224a71b..5d268e76d8e6 100644
--- a/include/linux/bpf-cgroup-defs.h
+++ b/include/linux/bpf-cgroup-defs.h
@@ -47,8 +47,8 @@ struct cgroup_bpf {
 	 * have either zero or one element
 	 * when BPF_F_ALLOW_MULTI the list can have up to BPF_CGROUP_MAX_PROGS
 	 */
-	struct list_head progs[MAX_CGROUP_BPF_ATTACH_TYPE];
-	u32 flags[MAX_CGROUP_BPF_ATTACH_TYPE];
+	struct hlist_head progs[MAX_CGROUP_BPF_ATTACH_TYPE];
+	u8 flags[MAX_CGROUP_BPF_ATTACH_TYPE];
 
 	/* list of cgroup shared storages */
 	struct list_head storages;
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 88a51b242adc..69b611f7b2c7 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -95,7 +95,7 @@ struct bpf_cgroup_link {
 };
 
 struct bpf_prog_list {
-	struct list_head node;
+	struct hlist_node node;
 	struct bpf_prog *prog;
 	struct bpf_cgroup_link *link;
 	struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE];
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 0cb6211fcb58..aaf9e36f2736 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -184,11 +184,12 @@ static void cgroup_bpf_release(struct work_struct *work)
 	mutex_lock(&cgroup_mutex);
 
 	for (atype = 0; atype < ARRAY_SIZE(cgrp->bpf.progs); atype++) {
-		struct list_head *progs = &cgrp->bpf.progs[atype];
-		struct bpf_prog_list *pl, *pltmp;
+		struct hlist_head *progs = &cgrp->bpf.progs[atype];
+		struct bpf_prog_list *pl;
+		struct hlist_node *pltmp;
 
-		list_for_each_entry_safe(pl, pltmp, progs, node) {
-			list_del(&pl->node);
+		hlist_for_each_entry_safe(pl, pltmp, progs, node) {
+			hlist_del(&pl->node);
 			if (pl->prog)
 				bpf_prog_put(pl->prog);
 			if (pl->link)
@@ -244,12 +245,12 @@ static struct bpf_prog *prog_list_prog(struct bpf_prog_list *pl)
 /* count number of elements in the list.
  * it's slow but the list cannot be long
  */
-static u32 prog_list_length(struct list_head *head)
+static u32 prog_list_length(struct hlist_head *head)
 {
 	struct bpf_prog_list *pl;
 	u32 cnt = 0;
 
-	list_for_each_entry(pl, head, node) {
+	hlist_for_each_entry(pl, head, node) {
 		if (!prog_list_prog(pl))
 			continue;
 		cnt++;
@@ -318,7 +319,7 @@ static int compute_effective_progs(struct cgroup *cgrp,
 		if (cnt > 0 && !(p->bpf.flags[atype] & BPF_F_ALLOW_MULTI))
 			continue;
 
-		list_for_each_entry(pl, &p->bpf.progs[atype], node) {
+		hlist_for_each_entry(pl, &p->bpf.progs[atype], node) {
 			if (!prog_list_prog(pl))
 				continue;
 
@@ -369,7 +370,7 @@ int cgroup_bpf_inherit(struct cgroup *cgrp)
 		cgroup_bpf_get(p);
 
 	for (i = 0; i < NR; i++)
-		INIT_LIST_HEAD(&cgrp->bpf.progs[i]);
+		INIT_HLIST_HEAD(&cgrp->bpf.progs[i]);
 
 	INIT_LIST_HEAD(&cgrp->bpf.storages);
 
@@ -445,7 +446,7 @@ static int update_effective_progs(struct cgroup *cgrp,
 
 #define BPF_CGROUP_MAX_PROGS 64
 
-static struct bpf_prog_list *find_attach_entry(struct list_head *progs,
+static struct bpf_prog_list *find_attach_entry(struct hlist_head *progs,
 					       struct bpf_prog *prog,
 					       struct bpf_cgroup_link *link,
 					       struct bpf_prog *replace_prog,
@@ -455,12 +456,12 @@ static struct bpf_prog_list *find_attach_entry(struct list_head *progs,
 
 	/* single-attach case */
 	if (!allow_multi) {
-		if (list_empty(progs))
+		if (hlist_empty(progs))
 			return NULL;
-		return list_first_entry(progs, typeof(*pl), node);
+		return hlist_entry(progs->first, typeof(*pl), node);
 	}
 
-	list_for_each_entry(pl, progs, node) {
+	hlist_for_each_entry(pl, progs, node) {
 		if (prog && pl->prog == prog && prog != replace_prog)
 			/* disallow attaching the same prog twice */
 			return ERR_PTR(-EINVAL);
@@ -471,7 +472,7 @@ static struct bpf_prog_list *find_attach_entry(struct list_head *progs,
 
 	/* direct prog multi-attach w/ replacement case */
 	if (replace_prog) {
-		list_for_each_entry(pl, progs, node) {
+		hlist_for_each_entry(pl, progs, node) {
 			if (pl->prog == replace_prog)
 				/* a match found */
 				return pl;
@@ -507,7 +508,7 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp,
 	struct bpf_cgroup_storage *new_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {};
 	enum cgroup_bpf_attach_type atype;
 	struct bpf_prog_list *pl;
-	struct list_head *progs;
+	struct hlist_head *progs;
 	int err;
 
 	if (((flags & BPF_F_ALLOW_OVERRIDE) && (flags & BPF_F_ALLOW_MULTI)) ||
@@ -530,7 +531,7 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp,
 	if (!hierarchy_allows_attach(cgrp, atype))
 		return -EPERM;
 
-	if (!list_empty(progs) && cgrp->bpf.flags[atype] != saved_flags)
+	if (!hlist_empty(progs) && cgrp->bpf.flags[atype] != saved_flags)
 		/* Disallow attaching non-overridable on top
 		 * of existing overridable in this cgroup.
 		 * Disallow attaching multi-prog if overridable or none
@@ -552,12 +553,22 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp,
 	if (pl) {
 		old_prog = pl->prog;
 	} else {
+		struct hlist_node *last = NULL;
+
 		pl = kmalloc(sizeof(*pl), GFP_KERNEL);
 		if (!pl) {
 			bpf_cgroup_storages_free(new_storage);
 			return -ENOMEM;
 		}
-		list_add_tail(&pl->node, progs);
+		if (hlist_empty(progs))
+			hlist_add_head(&pl->node, progs);
+		else
+			hlist_for_each(last, progs) {
+				if (last->next)
+					continue;
+				hlist_add_behind(&pl->node, last);
+				break;
+			}
 	}
 
 	pl->prog = prog;
@@ -583,7 +594,7 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp,
 	}
 	bpf_cgroup_storages_free(new_storage);
 	if (!old_prog) {
-		list_del(&pl->node);
+		hlist_del(&pl->node);
 		kfree(pl);
 	}
 	return err;
@@ -614,7 +625,7 @@ static void replace_effective_prog(struct cgroup *cgrp,
 	struct cgroup_subsys_state *css;
 	struct bpf_prog_array *progs;
 	struct bpf_prog_list *pl;
-	struct list_head *head;
+	struct hlist_head *head;
 	struct cgroup *cg;
 	int pos;
 
@@ -630,7 +641,7 @@ static void replace_effective_prog(struct cgroup *cgrp,
 				continue;
 
 			head = &cg->bpf.progs[atype];
-			list_for_each_entry(pl, head, node) {
+			hlist_for_each_entry(pl, head, node) {
 				if (!prog_list_prog(pl))
 					continue;
 				if (pl->link == link)
@@ -664,7 +675,7 @@ static int __cgroup_bpf_replace(struct cgroup *cgrp,
 	enum cgroup_bpf_attach_type atype;
 	struct bpf_prog *old_prog;
 	struct bpf_prog_list *pl;
-	struct list_head *progs;
+	struct hlist_head *progs;
 	bool found = false;
 
 	atype = to_cgroup_bpf_attach_type(link->type);
@@ -676,7 +687,7 @@ static int __cgroup_bpf_replace(struct cgroup *cgrp,
 	if (link->link.prog->type != new_prog->type)
 		return -EINVAL;
 
-	list_for_each_entry(pl, progs, node) {
+	hlist_for_each_entry(pl, progs, node) {
 		if (pl->link == link) {
 			found = true;
 			break;
@@ -715,7 +726,7 @@ static int cgroup_bpf_replace(struct bpf_link *link, struct bpf_prog *new_prog,
 	return ret;
 }
 
-static struct bpf_prog_list *find_detach_entry(struct list_head *progs,
+static struct bpf_prog_list *find_detach_entry(struct hlist_head *progs,
 					       struct bpf_prog *prog,
 					       struct bpf_cgroup_link *link,
 					       bool allow_multi)
@@ -723,14 +734,14 @@ static struct bpf_prog_list *find_detach_entry(struct list_head *progs,
 	struct bpf_prog_list *pl;
 
 	if (!allow_multi) {
-		if (list_empty(progs))
+		if (hlist_empty(progs))
 			/* report error when trying to detach and nothing is attached */
 			return ERR_PTR(-ENOENT);
 
 		/* to maintain backward compatibility NONE and OVERRIDE cgroups
 		 * allow detaching with invalid FD (prog==NULL) in legacy mode
 		 */
-		return list_first_entry(progs, typeof(*pl), node);
+		return hlist_entry(progs->first, typeof(*pl), node);
 	}
 
 	if (!prog && !link)
@@ -740,7 +751,7 @@ static struct bpf_prog_list *find_detach_entry(struct list_head *progs,
 		return ERR_PTR(-EINVAL);
 
 	/* find the prog or link and detach it */
-	list_for_each_entry(pl, progs, node) {
+	hlist_for_each_entry(pl, progs, node) {
 		if (pl->prog == prog && pl->link == link)
 			return pl;
 	}
@@ -764,7 +775,7 @@ static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
 	enum cgroup_bpf_attach_type atype;
 	struct bpf_prog *old_prog;
 	struct bpf_prog_list *pl;
-	struct list_head *progs;
+	struct hlist_head *progs;
 	u32 flags;
 	int err;
 
@@ -793,9 +804,10 @@ static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
 		goto cleanup;
 
 	/* now can actually delete it from this cgroup list */
-	list_del(&pl->node);
+	hlist_del(&pl->node);
+
 	kfree(pl);
-	if (list_empty(progs))
+	if (hlist_empty(progs))
 		/* last program was detached, reset flags to zero */
 		cgrp->bpf.flags[atype] = 0;
 	if (old_prog)
@@ -829,7 +841,7 @@ static int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
 	enum bpf_attach_type type = attr->query.attach_type;
 	enum cgroup_bpf_attach_type atype;
 	struct bpf_prog_array *effective;
-	struct list_head *progs;
+	struct hlist_head *progs;
 	struct bpf_prog *prog;
 	int cnt, ret = 0, i;
 	u32 flags;
@@ -868,7 +880,7 @@ static int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
 		u32 id;
 
 		i = 0;
-		list_for_each_entry(pl, progs, node) {
+		hlist_for_each_entry(pl, progs, node) {
 			prog = prog_list_prog(pl);
 			id = prog->aux->id;
 			if (copy_to_user(prog_ids + i, &id, sizeof(id)))
-- 
2.36.0.rc0.470.gd361397f0d-goog


^ permalink raw reply related

* [PATCH bpf-next v5 3/8] bpf: per-cgroup lsm flavor
From: Stanislav Fomichev @ 2022-04-19 19:00 UTC (permalink / raw)
  To: netdev, bpf; +Cc: ast, daniel, andrii, Stanislav Fomichev
In-Reply-To: <20220419190053.3395240-1-sdf@google.com>

Allow attaching to lsm hooks in the cgroup context.

Attaching to per-cgroup LSM works exactly like attaching
to other per-cgroup hooks. New BPF_LSM_CGROUP is added
to trigger new mode; the actual lsm hook we attach to is
signaled via existing attach_btf_id.

For the hooks that have 'struct socket' as its first argument,
we use the cgroup associated with that socket. For the rest,
we use 'current' cgroup (this is all on default hierarchy == v2 only).
Note that for the hooks that work on 'struct sock' we still
take the cgroup from 'current' because most of the time,
the 'sock' argument is not properly initialized.

Behind the scenes, we allocate a shim program that is attached
to the trampoline and runs cgroup effective BPF programs array.
This shim has some rudimentary ref counting and can be shared
between several programs attaching to the same per-cgroup lsm hook.

Note that this patch bloats cgroup size because we add 211
cgroup_bpf_attach_type(s) for simplicity sake. This will be
addressed in the subsequent patch.

Also note that we only add non-sleepable flavor for now. To enable
sleepable use-cases, BPF_PROG_RUN_ARRAY_CG has to grab trace rcu,
shim programs have to be freed via trace rcu, cgroup_bpf.effective
should be also trace-rcu-managed + maybe some other changes that
I'm not aware of.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
---
 include/linux/bpf-cgroup-defs.h |   6 +
 include/linux/bpf-cgroup.h      |   7 ++
 include/linux/bpf.h             |  13 +++
 include/linux/bpf_lsm.h         |  14 +++
 include/uapi/linux/bpf.h        |   1 +
 kernel/bpf/bpf_lsm.c            |  63 +++++++++++
 kernel/bpf/btf.c                |  11 ++
 kernel/bpf/cgroup.c             | 189 ++++++++++++++++++++++++++++++--
 kernel/bpf/syscall.c            |  10 ++
 kernel/bpf/trampoline.c         | 145 ++++++++++++++++++++++++
 kernel/bpf/verifier.c           |   1 +
 tools/include/uapi/linux/bpf.h  |   1 +
 12 files changed, 449 insertions(+), 12 deletions(-)

diff --git a/include/linux/bpf-cgroup-defs.h b/include/linux/bpf-cgroup-defs.h
index 5d268e76d8e6..d5a70a35dace 100644
--- a/include/linux/bpf-cgroup-defs.h
+++ b/include/linux/bpf-cgroup-defs.h
@@ -10,6 +10,8 @@
 
 struct bpf_prog_array;
 
+#define CGROUP_LSM_NUM 211 /* will be addressed in the next patch */
+
 enum cgroup_bpf_attach_type {
 	CGROUP_BPF_ATTACH_TYPE_INVALID = -1,
 	CGROUP_INET_INGRESS = 0,
@@ -35,6 +37,10 @@ enum cgroup_bpf_attach_type {
 	CGROUP_INET4_GETSOCKNAME,
 	CGROUP_INET6_GETSOCKNAME,
 	CGROUP_INET_SOCK_RELEASE,
+#ifdef CONFIG_BPF_LSM
+	CGROUP_LSM_START,
+	CGROUP_LSM_END = CGROUP_LSM_START + CGROUP_LSM_NUM - 1,
+#endif
 	MAX_CGROUP_BPF_ATTACH_TYPE
 };
 
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 69b611f7b2c7..2a3482161ca2 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -23,6 +23,13 @@ struct ctl_table;
 struct ctl_table_header;
 struct task_struct;
 
+unsigned int __cgroup_bpf_run_lsm_sock(const void *ctx,
+				       const struct bpf_insn *insn);
+unsigned int __cgroup_bpf_run_lsm_socket(const void *ctx,
+					 const struct bpf_insn *insn);
+unsigned int __cgroup_bpf_run_lsm_current(const void *ctx,
+					  const struct bpf_insn *insn);
+
 #ifdef CONFIG_CGROUP_BPF
 
 #define CGROUP_ATYPE(type) \
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index ab2ba5f4bc9c..20328c8fe24f 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -980,6 +980,7 @@ struct bpf_prog_aux {
 	u64 load_time; /* ns since boottime */
 	u32 verified_insns;
 	struct bpf_map *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE];
+	int cgroup_atype; /* enum cgroup_bpf_attach_type */
 	char name[BPF_OBJ_NAME_LEN];
 #ifdef CONFIG_SECURITY
 	void *security;
@@ -1117,6 +1118,9 @@ struct bpf_dummy_ops {
 int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
 			    union bpf_attr __user *uattr);
 #endif
+int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog,
+				    struct bpf_attach_target_info *tgt_info);
+void bpf_trampoline_unlink_cgroup_shim(struct bpf_prog *prog);
 #else
 static inline const struct bpf_struct_ops *bpf_struct_ops_find(u32 type_id)
 {
@@ -1140,6 +1144,14 @@ static inline int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map,
 {
 	return -EINVAL;
 }
+static inline int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog,
+						  struct bpf_attach_target_info *tgt_info)
+{
+	return -EOPNOTSUPP;
+}
+static inline void bpf_trampoline_unlink_cgroup_shim(struct bpf_prog *prog)
+{
+}
 #endif
 
 struct bpf_array {
@@ -2282,6 +2294,7 @@ void *bpf_arch_text_copy(void *dst, void *src, size_t len);
 
 struct btf_id_set;
 bool btf_id_set_contains(const struct btf_id_set *set, u32 id);
+int btf_id_set_index(const struct btf_id_set *set, u32 id);
 
 #define MAX_BPRINTF_VARARGS		12
 
diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h
index 479c101546ad..7f0e59f5f9be 100644
--- a/include/linux/bpf_lsm.h
+++ b/include/linux/bpf_lsm.h
@@ -42,6 +42,9 @@ extern const struct bpf_func_proto bpf_inode_storage_get_proto;
 extern const struct bpf_func_proto bpf_inode_storage_delete_proto;
 void bpf_inode_storage_free(struct inode *inode);
 
+int bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog, bpf_func_t *bpf_func);
+int bpf_lsm_hook_idx(u32 btf_id);
+
 #else /* !CONFIG_BPF_LSM */
 
 static inline bool bpf_lsm_is_sleepable_hook(u32 btf_id)
@@ -65,6 +68,17 @@ static inline void bpf_inode_storage_free(struct inode *inode)
 {
 }
 
+static inline int bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog,
+					   bpf_func_t *bpf_func)
+{
+	return -ENOENT;
+}
+
+static inline int bpf_lsm_hook_idx(u32 btf_id)
+{
+	return -EINVAL;
+}
+
 #endif /* CONFIG_BPF_LSM */
 
 #endif /* _LINUX_BPF_LSM_H */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index d14b10b85e51..bbe48a2dd852 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -998,6 +998,7 @@ enum bpf_attach_type {
 	BPF_SK_REUSEPORT_SELECT_OR_MIGRATE,
 	BPF_PERF_EVENT,
 	BPF_TRACE_KPROBE_MULTI,
+	BPF_LSM_CGROUP,
 	__MAX_BPF_ATTACH_TYPE
 };
 
diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c
index 064eccba641d..2161cba1fe0c 100644
--- a/kernel/bpf/bpf_lsm.c
+++ b/kernel/bpf/bpf_lsm.c
@@ -16,6 +16,7 @@
 #include <linux/bpf_local_storage.h>
 #include <linux/btf_ids.h>
 #include <linux/ima.h>
+#include <linux/bpf-cgroup.h>
 
 /* For every LSM hook that allows attachment of BPF programs, declare a nop
  * function where a BPF program can be attached.
@@ -35,6 +36,68 @@ BTF_SET_START(bpf_lsm_hooks)
 #undef LSM_HOOK
 BTF_SET_END(bpf_lsm_hooks)
 
+/* List of LSM hooks that should operate on 'current' cgroup regardless
+ * of function signature.
+ */
+BTF_SET_START(bpf_lsm_current_hooks)
+/* operate on freshly allocated sk without any cgroup association */
+BTF_ID(func, bpf_lsm_sk_alloc_security)
+BTF_ID(func, bpf_lsm_sk_free_security)
+BTF_SET_END(bpf_lsm_current_hooks)
+
+int bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog,
+			     bpf_func_t *bpf_func)
+{
+	const struct btf_type *first_arg_type;
+	const struct btf_type *sock_type;
+	const struct btf_type *sk_type;
+	const struct btf *btf_vmlinux;
+	const struct btf_param *args;
+	s32 type_id;
+
+	if (!prog->aux->attach_func_proto ||
+	    !btf_type_is_func_proto(prog->aux->attach_func_proto))
+		return -EINVAL;
+
+	if (btf_type_vlen(prog->aux->attach_func_proto) < 1 ||
+	    btf_id_set_contains(&bpf_lsm_current_hooks,
+				prog->aux->attach_btf_id)) {
+		*bpf_func = __cgroup_bpf_run_lsm_current;
+		return 0;
+	}
+
+	args = btf_params(prog->aux->attach_func_proto);
+
+	btf_vmlinux = bpf_get_btf_vmlinux();
+	if (!btf_vmlinux)
+		return -EINVAL;
+
+	type_id = btf_find_by_name_kind(btf_vmlinux, "socket", BTF_KIND_STRUCT);
+	if (type_id < 0)
+		return -EINVAL;
+	sock_type = btf_type_by_id(btf_vmlinux, type_id);
+
+	type_id = btf_find_by_name_kind(btf_vmlinux, "sock", BTF_KIND_STRUCT);
+	if (type_id < 0)
+		return -EINVAL;
+	sk_type = btf_type_by_id(btf_vmlinux, type_id);
+
+	first_arg_type = btf_type_resolve_ptr(btf_vmlinux, args[0].type, NULL);
+	if (first_arg_type == sock_type)
+		*bpf_func = __cgroup_bpf_run_lsm_socket;
+	else if (first_arg_type == sk_type)
+		*bpf_func = __cgroup_bpf_run_lsm_sock;
+	else
+		*bpf_func = __cgroup_bpf_run_lsm_current;
+
+	return 0;
+}
+
+int bpf_lsm_hook_idx(u32 btf_id)
+{
+	return btf_id_set_index(&bpf_lsm_hooks, btf_id);
+}
+
 int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog,
 			const struct bpf_prog *prog)
 {
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 0918a39279f6..4199de31f49c 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -4971,6 +4971,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
 
 	if (arg == nr_args) {
 		switch (prog->expected_attach_type) {
+		case BPF_LSM_CGROUP:
 		case BPF_LSM_MAC:
 		case BPF_TRACE_FEXIT:
 			/* When LSM programs are attached to void LSM hooks
@@ -6396,6 +6397,16 @@ static int btf_id_cmp_func(const void *a, const void *b)
 	return *pa - *pb;
 }
 
+int btf_id_set_index(const struct btf_id_set *set, u32 id)
+{
+	const u32 *p;
+
+	p = bsearch(&id, set->ids, set->cnt, sizeof(u32), btf_id_cmp_func);
+	if (!p)
+		return -1;
+	return p - set->ids;
+}
+
 bool btf_id_set_contains(const struct btf_id_set *set, u32 id)
 {
 	return bsearch(&id, set->ids, set->cnt, sizeof(u32), btf_id_cmp_func) != NULL;
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index aaf9e36f2736..ba0e0c7a661d 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -14,6 +14,9 @@
 #include <linux/string.h>
 #include <linux/bpf.h>
 #include <linux/bpf-cgroup.h>
+#include <linux/btf_ids.h>
+#include <linux/bpf_lsm.h>
+#include <linux/bpf_verifier.h>
 #include <net/sock.h>
 #include <net/bpf_sk_storage.h>
 
@@ -88,6 +91,85 @@ bpf_prog_run_array_cg(const struct cgroup_bpf *cgrp,
 	return run_ctx.retval;
 }
 
+unsigned int __cgroup_bpf_run_lsm_sock(const void *ctx,
+				       const struct bpf_insn *insn)
+{
+	const struct bpf_prog *shim_prog;
+	struct sock *sk;
+	struct cgroup *cgrp;
+	int ret = 0;
+	u64 *regs;
+
+	regs = (u64 *)ctx;
+	sk = (void *)(unsigned long)regs[BPF_REG_0];
+	/*shim_prog = container_of(insn, struct bpf_prog, insnsi);*/
+	shim_prog = (const struct bpf_prog *)((void *)insn - offsetof(struct bpf_prog, insnsi));
+
+	cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
+	if (likely(cgrp))
+		ret = bpf_prog_run_array_cg(&cgrp->bpf,
+					    shim_prog->aux->cgroup_atype,
+					    ctx, bpf_prog_run, 0);
+	return ret;
+}
+
+unsigned int __cgroup_bpf_run_lsm_socket(const void *ctx,
+					 const struct bpf_insn *insn)
+{
+	const struct bpf_prog *shim_prog;
+	struct socket *sock;
+	struct cgroup *cgrp;
+	int ret = 0;
+	u64 *regs;
+
+	regs = (u64 *)ctx;
+	sock = (void *)(unsigned long)regs[BPF_REG_0];
+	/*shim_prog = container_of(insn, struct bpf_prog, insnsi);*/
+	shim_prog = (const struct bpf_prog *)((void *)insn - offsetof(struct bpf_prog, insnsi));
+
+	cgrp = sock_cgroup_ptr(&sock->sk->sk_cgrp_data);
+	if (likely(cgrp))
+		ret = bpf_prog_run_array_cg(&cgrp->bpf,
+					    shim_prog->aux->cgroup_atype,
+					    ctx, bpf_prog_run, 0);
+	return ret;
+}
+
+unsigned int __cgroup_bpf_run_lsm_current(const void *ctx,
+					  const struct bpf_insn *insn)
+{
+	const struct bpf_prog *shim_prog;
+	struct cgroup *cgrp;
+	int ret = 0;
+
+	if (unlikely(!current))
+		return 0;
+
+	/*shim_prog = container_of(insn, struct bpf_prog, insnsi);*/
+	shim_prog = (const struct bpf_prog *)((void *)insn - offsetof(struct bpf_prog, insnsi));
+
+	rcu_read_lock();
+	cgrp = task_dfl_cgroup(current);
+	if (likely(cgrp))
+		ret = bpf_prog_run_array_cg(&cgrp->bpf,
+					    shim_prog->aux->cgroup_atype,
+					    ctx, bpf_prog_run, 0);
+	rcu_read_unlock();
+	return ret;
+}
+
+#ifdef CONFIG_BPF_LSM
+static enum cgroup_bpf_attach_type bpf_lsm_attach_type_get(u32 attach_btf_id)
+{
+	return CGROUP_LSM_START + bpf_lsm_hook_idx(attach_btf_id);
+}
+#else
+static enum cgroup_bpf_attach_type bpf_lsm_attach_type_get(u32 attach_btf_id)
+{
+	return -EOPNOTSUPP;
+}
+#endif /* CONFIG_BPF_LSM */
+
 void cgroup_bpf_offline(struct cgroup *cgrp)
 {
 	cgroup_get(cgrp);
@@ -155,6 +237,14 @@ static void bpf_cgroup_storages_link(struct bpf_cgroup_storage *storages[],
 		bpf_cgroup_storage_link(storages[stype], cgrp, attach_type);
 }
 
+static void bpf_cgroup_storages_unlink(struct bpf_cgroup_storage *storages[])
+{
+	enum bpf_cgroup_storage_type stype;
+
+	for_each_cgroup_storage_type(stype)
+		bpf_cgroup_storage_unlink(storages[stype]);
+}
+
 /* Called when bpf_cgroup_link is auto-detached from dying cgroup.
  * It drops cgroup and bpf_prog refcounts, and marks bpf_link as defunct. It
  * doesn't free link memory, which will eventually be done by bpf_link's
@@ -166,6 +256,16 @@ static void bpf_cgroup_link_auto_detach(struct bpf_cgroup_link *link)
 	link->cgroup = NULL;
 }
 
+static void bpf_cgroup_lsm_shim_release(struct bpf_prog *prog,
+					enum cgroup_bpf_attach_type atype)
+{
+	if (prog->aux->cgroup_atype < CGROUP_LSM_START ||
+	    prog->aux->cgroup_atype > CGROUP_LSM_END)
+		return;
+
+	bpf_trampoline_unlink_cgroup_shim(prog);
+}
+
 /**
  * cgroup_bpf_release() - put references of all bpf programs and
  *                        release all cgroup bpf data
@@ -190,10 +290,18 @@ static void cgroup_bpf_release(struct work_struct *work)
 
 		hlist_for_each_entry_safe(pl, pltmp, progs, node) {
 			hlist_del(&pl->node);
-			if (pl->prog)
+			if (pl->prog) {
+				if (atype == BPF_LSM_CGROUP)
+					bpf_cgroup_lsm_shim_release(pl->prog,
+								    atype);
 				bpf_prog_put(pl->prog);
-			if (pl->link)
+			}
+			if (pl->link) {
+				if (atype == BPF_LSM_CGROUP)
+					bpf_cgroup_lsm_shim_release(pl->link->link.prog,
+								    atype);
 				bpf_cgroup_link_auto_detach(pl->link);
+			}
 			kfree(pl);
 			static_branch_dec(&cgroup_bpf_enabled_key[atype]);
 		}
@@ -506,6 +614,7 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp,
 	struct bpf_prog *old_prog = NULL;
 	struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {};
 	struct bpf_cgroup_storage *new_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {};
+	struct bpf_attach_target_info tgt_info = {};
 	enum cgroup_bpf_attach_type atype;
 	struct bpf_prog_list *pl;
 	struct hlist_head *progs;
@@ -522,9 +631,35 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp,
 		/* replace_prog implies BPF_F_REPLACE, and vice versa */
 		return -EINVAL;
 
-	atype = to_cgroup_bpf_attach_type(type);
-	if (atype < 0)
-		return -EINVAL;
+	if (type == BPF_LSM_CGROUP) {
+		struct bpf_prog *p = prog ? : link->link.prog;
+
+		if (replace_prog) {
+			/* Reusing shim from the original program.
+			 */
+			if (replace_prog->aux->attach_btf_id !=
+			    p->aux->attach_btf_id)
+				return -EINVAL;
+
+			atype = replace_prog->aux->cgroup_atype;
+		} else {
+			err = bpf_check_attach_target(NULL, p, NULL,
+						      p->aux->attach_btf_id,
+						      &tgt_info);
+			if (err)
+				return -EINVAL;
+
+			atype = bpf_lsm_attach_type_get(p->aux->attach_btf_id);
+			if (atype < 0)
+				return atype;
+		}
+
+		p->aux->cgroup_atype = atype;
+	} else {
+		atype = to_cgroup_bpf_attach_type(type);
+		if (atype < 0)
+			return -EINVAL;
+	}
 
 	progs = &cgrp->bpf.progs[atype];
 
@@ -580,13 +715,26 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp,
 	if (err)
 		goto cleanup;
 
+	bpf_cgroup_storages_link(new_storage, cgrp, type);
+
+	if (type == BPF_LSM_CGROUP && !old_prog) {
+		struct bpf_prog *p = prog ? : link->link.prog;
+
+		err = bpf_trampoline_link_cgroup_shim(p, &tgt_info);
+		if (err)
+			goto cleanup_trampoline;
+	}
+
 	if (old_prog)
 		bpf_prog_put(old_prog);
 	else
 		static_branch_inc(&cgroup_bpf_enabled_key[atype]);
-	bpf_cgroup_storages_link(new_storage, cgrp, type);
+
 	return 0;
 
+cleanup_trampoline:
+	bpf_cgroup_storages_unlink(new_storage);
+
 cleanup:
 	if (old_prog) {
 		pl->prog = old_prog;
@@ -678,9 +826,13 @@ static int __cgroup_bpf_replace(struct cgroup *cgrp,
 	struct hlist_head *progs;
 	bool found = false;
 
-	atype = to_cgroup_bpf_attach_type(link->type);
-	if (atype < 0)
-		return -EINVAL;
+	if (link->type == BPF_LSM_CGROUP) {
+		atype = link->link.prog->aux->cgroup_atype;
+	} else {
+		atype = to_cgroup_bpf_attach_type(link->type);
+		if (atype < 0)
+			return -EINVAL;
+	}
 
 	progs = &cgrp->bpf.progs[atype];
 
@@ -696,6 +848,9 @@ static int __cgroup_bpf_replace(struct cgroup *cgrp,
 	if (!found)
 		return -ENOENT;
 
+	if (link->type == BPF_LSM_CGROUP)
+		new_prog->aux->cgroup_atype = atype;
+
 	old_prog = xchg(&link->link.prog, new_prog);
 	replace_effective_prog(cgrp, atype, link);
 	bpf_prog_put(old_prog);
@@ -779,9 +934,15 @@ static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
 	u32 flags;
 	int err;
 
-	atype = to_cgroup_bpf_attach_type(type);
-	if (atype < 0)
-		return -EINVAL;
+	if (type == BPF_LSM_CGROUP) {
+		struct bpf_prog *p = prog ? : link->link.prog;
+
+		atype = p->aux->cgroup_atype;
+	} else {
+		atype = to_cgroup_bpf_attach_type(type);
+		if (atype < 0)
+			return -EINVAL;
+	}
 
 	progs = &cgrp->bpf.progs[atype];
 	flags = cgrp->bpf.flags[atype];
@@ -803,6 +964,10 @@ static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
 	if (err)
 		goto cleanup;
 
+	if (type == BPF_LSM_CGROUP)
+		bpf_cgroup_lsm_shim_release(prog ? : link->link.prog,
+					    atype);
+
 	/* now can actually delete it from this cgroup list */
 	hlist_del(&pl->node);
 
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index e9621cfa09f2..d94f4951154e 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -3139,6 +3139,11 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
 		return prog->enforce_expected_attach_type &&
 			prog->expected_attach_type != attach_type ?
 			-EINVAL : 0;
+	case BPF_PROG_TYPE_LSM:
+		if (prog->expected_attach_type != BPF_LSM_CGROUP)
+			return -EINVAL;
+		return 0;
+
 	default:
 		return 0;
 	}
@@ -3194,6 +3199,8 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type)
 		return BPF_PROG_TYPE_SK_LOOKUP;
 	case BPF_XDP:
 		return BPF_PROG_TYPE_XDP;
+	case BPF_LSM_CGROUP:
+		return BPF_PROG_TYPE_LSM;
 	default:
 		return BPF_PROG_TYPE_UNSPEC;
 	}
@@ -3247,6 +3254,7 @@ static int bpf_prog_attach(const union bpf_attr *attr)
 	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
 	case BPF_PROG_TYPE_CGROUP_SYSCTL:
 	case BPF_PROG_TYPE_SOCK_OPS:
+	case BPF_PROG_TYPE_LSM:
 		ret = cgroup_bpf_prog_attach(attr, ptype, prog);
 		break;
 	default:
@@ -3284,6 +3292,7 @@ static int bpf_prog_detach(const union bpf_attr *attr)
 	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
 	case BPF_PROG_TYPE_CGROUP_SYSCTL:
 	case BPF_PROG_TYPE_SOCK_OPS:
+	case BPF_PROG_TYPE_LSM:
 		return cgroup_bpf_prog_detach(attr, ptype);
 	default:
 		return -EINVAL;
@@ -4317,6 +4326,7 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr)
 	case BPF_PROG_TYPE_CGROUP_DEVICE:
 	case BPF_PROG_TYPE_CGROUP_SYSCTL:
 	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
+	case BPF_PROG_TYPE_LSM:
 		ret = cgroup_bpf_link_attach(attr, prog);
 		break;
 	case BPF_PROG_TYPE_TRACING:
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index 0c4fd194e801..c76dfa4ea2d9 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -11,6 +11,8 @@
 #include <linux/rcupdate_wait.h>
 #include <linux/module.h>
 #include <linux/static_call.h>
+#include <linux/bpf_verifier.h>
+#include <linux/bpf_lsm.h>
 
 /* dummy _ops. The verifier will operate on target program's ops. */
 const struct bpf_verifier_ops bpf_extension_verifier_ops = {
@@ -485,6 +487,149 @@ int bpf_trampoline_unlink_prog(struct bpf_prog *prog, struct bpf_trampoline *tr)
 	return err;
 }
 
+#if defined(CONFIG_BPF_JIT) && defined(CONFIG_BPF_SYSCALL)
+static struct bpf_prog *cgroup_shim_alloc(const struct bpf_prog *prog,
+					  bpf_func_t bpf_func)
+{
+	struct bpf_prog *p;
+
+	p = bpf_prog_alloc(1, 0);
+	if (!p)
+		return NULL;
+
+	p->jited = false;
+	p->bpf_func = bpf_func;
+
+	p->aux->cgroup_atype = prog->aux->cgroup_atype;
+	p->aux->attach_func_proto = prog->aux->attach_func_proto;
+	p->aux->attach_btf_id = prog->aux->attach_btf_id;
+	p->aux->attach_btf = prog->aux->attach_btf;
+	btf_get(p->aux->attach_btf);
+	p->type = BPF_PROG_TYPE_LSM;
+	p->expected_attach_type = BPF_LSM_MAC;
+	bpf_prog_inc(p);
+
+	return p;
+}
+
+static struct bpf_prog *cgroup_shim_find(struct bpf_trampoline *tr,
+					 bpf_func_t bpf_func)
+{
+	const struct bpf_prog_aux *aux;
+	int kind;
+
+	for (kind = 0; kind < BPF_TRAMP_MAX; kind++) {
+		hlist_for_each_entry(aux, &tr->progs_hlist[kind], tramp_hlist) {
+			struct bpf_prog *p = aux->prog;
+
+			if (p->bpf_func == bpf_func)
+				return p;
+		}
+	}
+
+	return NULL;
+}
+
+int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog,
+				    struct bpf_attach_target_info *tgt_info)
+{
+	struct bpf_prog *shim_prog = NULL;
+	struct bpf_trampoline *tr;
+	bpf_func_t bpf_func;
+	u64 key;
+	int err;
+
+	key = bpf_trampoline_compute_key(NULL, prog->aux->attach_btf,
+					 prog->aux->attach_btf_id);
+
+	err = bpf_lsm_find_cgroup_shim(prog, &bpf_func);
+	if (err)
+		return err;
+
+	tr = bpf_trampoline_get(key, tgt_info);
+	if (!tr)
+		return  -ENOMEM;
+
+	mutex_lock(&tr->mutex);
+
+	shim_prog = cgroup_shim_find(tr, bpf_func);
+	if (shim_prog) {
+		/* Reusing existing shim attached by the other program.
+		 */
+		bpf_prog_inc(shim_prog);
+		mutex_unlock(&tr->mutex);
+		return 0;
+	}
+
+	/* Allocate and install new shim.
+	 */
+
+	shim_prog = cgroup_shim_alloc(prog, bpf_func);
+	if (!shim_prog) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	err = __bpf_trampoline_link_prog(shim_prog, tr);
+	if (err)
+		goto out;
+
+	mutex_unlock(&tr->mutex);
+
+	return 0;
+out:
+	if (shim_prog)
+		bpf_prog_put(shim_prog);
+
+	mutex_unlock(&tr->mutex);
+	return err;
+}
+
+void bpf_trampoline_unlink_cgroup_shim(struct bpf_prog *prog)
+{
+	struct bpf_prog *shim_prog;
+	struct bpf_trampoline *tr;
+	bpf_func_t bpf_func;
+	u64 key;
+	int err;
+
+	key = bpf_trampoline_compute_key(NULL, prog->aux->attach_btf,
+					 prog->aux->attach_btf_id);
+
+	err = bpf_lsm_find_cgroup_shim(prog, &bpf_func);
+	if (err)
+		return;
+
+	tr = bpf_trampoline_lookup(key);
+	if (!tr)
+		return;
+
+	mutex_lock(&tr->mutex);
+
+	shim_prog = cgroup_shim_find(tr, bpf_func);
+	if (shim_prog) {
+		/* We use shim_prog refcnt for tracking whether to
+		 * remove the shim program from the trampoline.
+		 * Trampoline's mutex is held while refcnt is
+		 * added/subtracted so we don't need to care about
+		 * potential races.
+		 */
+
+		if (atomic64_read(&shim_prog->aux->refcnt) == 1)
+			WARN_ON_ONCE(__bpf_trampoline_unlink_prog(shim_prog, tr));
+
+		bpf_prog_put(shim_prog);
+	}
+
+	mutex_unlock(&tr->mutex);
+
+	bpf_trampoline_put(tr); /* bpf_trampoline_lookup */
+
+	if (shim_prog)
+		bpf_trampoline_put(tr);
+}
+#endif
+
 struct bpf_trampoline *bpf_trampoline_get(u64 key,
 					  struct bpf_attach_target_info *tgt_info)
 {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 9c1a02b82ecd..cc84954846d7 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -14197,6 +14197,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
 		fallthrough;
 	case BPF_MODIFY_RETURN:
 	case BPF_LSM_MAC:
+	case BPF_LSM_CGROUP:
 	case BPF_TRACE_FENTRY:
 	case BPF_TRACE_FEXIT:
 		if (!btf_type_is_func(t)) {
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index d14b10b85e51..bbe48a2dd852 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -998,6 +998,7 @@ enum bpf_attach_type {
 	BPF_SK_REUSEPORT_SELECT_OR_MIGRATE,
 	BPF_PERF_EVENT,
 	BPF_TRACE_KPROBE_MULTI,
+	BPF_LSM_CGROUP,
 	__MAX_BPF_ATTACH_TYPE
 };
 
-- 
2.36.0.rc0.470.gd361397f0d-goog


^ permalink raw reply related

* [PATCH bpf-next v5 0/8] bpf: cgroup_sock lsm flavor
From: Stanislav Fomichev @ 2022-04-19 19:00 UTC (permalink / raw)
  To: netdev, bpf
  Cc: ast, daniel, andrii, Stanislav Fomichev, kafai, kpsingh, jakub

This series implements new lsm flavor for attaching per-cgroup programs to
existing lsm hooks. The cgroup is taken out of 'current', unless
the first argument of the hook is 'struct socket'. In this case,
the cgroup association is taken out of socket. The attachment
looks like a regular per-cgroup attachment: we add new BPF_LSM_CGROUP
attach type which, together with attach_btf_id, signals per-cgroup lsm.
Behind the scenes, we allocate trampoline shim program and
attach to lsm. This program looks up cgroup from current/socket
and runs cgroup's effective prog array. The rest of the per-cgroup BPF
stays the same: hierarchy, local storage, retval conventions
(return 1 == success).

Current limitations:
* haven't considered sleepable bpf; can be extended later on
* not sure the verifier does the right thing with null checks;
  see latest selftest for details
* total of 10 (global) per-cgroup LSM attach points; this bloats
  bpf_cgroup a bit

Cc: ast@kernel.org
Cc: daniel@iogearbox.net
Cc: kafai@fb.com
Cc: kpsingh@kernel.org
Cc: jakub@cloudflare.com

v5:
- __cgroup_bpf_run_lsm_socket remove NULL sock/sk checks (Martin KaFai Lau)
- __cgroup_bpf_run_lsm_{socket,current} s/prog/shim_prog/ (Martin)
- make sure bpf_lsm_find_cgroup_shim works for hooks without args (Martin)
- __cgroup_bpf_attach make sure attach_btf_id is the same when replacing (Martin)
- call bpf_cgroup_lsm_shim_release only for LSM_CGROUP (Martin)
- drop BPF_LSM_CGROUP from bpf_attach_type_to_tramp (Martin)
- drop jited check from cgroup_shim_find (Martin)
- new patch to convert cgroup_bpf to hlist_node (Jakub Sitnicki)
- new shim flavor for 'struct sock' + list of exceptions (Martin)

v4:
- fix build when jit is on but syscall is off

v3:
- add BPF_LSM_CGROUP to bpftool
- use simple int instead of refcnt_t (to avoid use-after-free
  false positive)

v2:
- addressed build bot failures

Stanislav Fomichev (8):
  bpf: add bpf_func_t and trampoline helpers
  bpf: convert cgroup_bpf.progs to hlist
  bpf: per-cgroup lsm flavor
  bpf: minimize number of allocated lsm slots per program
  bpf: allow writing to a subset of sock fields from lsm progtype
  libbpf: add lsm_cgoup_sock type
  selftests/bpf: lsm_cgroup functional test
  selftests/bpf: verify lsm_cgroup struct sock access

 include/linux/bpf-cgroup-defs.h               |  12 +-
 include/linux/bpf-cgroup.h                    |   9 +-
 include/linux/bpf.h                           |  24 +-
 include/linux/bpf_lsm.h                       |   8 +
 include/uapi/linux/bpf.h                      |   1 +
 kernel/bpf/bpf_lsm.c                          | 116 ++++++
 kernel/bpf/btf.c                              |  11 +
 kernel/bpf/cgroup.c                           | 361 +++++++++++++++---
 kernel/bpf/syscall.c                          |  10 +
 kernel/bpf/trampoline.c                       | 206 ++++++++--
 kernel/bpf/verifier.c                         |   4 +-
 tools/bpf/bpftool/common.c                    |   1 +
 tools/include/uapi/linux/bpf.h                |   1 +
 tools/lib/bpf/libbpf.c                        |   2 +
 .../selftests/bpf/prog_tests/lsm_cgroup.c     | 213 +++++++++++
 .../testing/selftests/bpf/progs/lsm_cgroup.c  | 126 ++++++
 tools/testing/selftests/bpf/test_verifier.c   |  54 ++-
 .../selftests/bpf/verifier/lsm_cgroup.c       |  34 ++
 18 files changed, 1102 insertions(+), 91 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c
 create mode 100644 tools/testing/selftests/bpf/progs/lsm_cgroup.c
 create mode 100644 tools/testing/selftests/bpf/verifier/lsm_cgroup.c

-- 
2.36.0.rc0.470.gd361397f0d-goog


^ permalink raw reply

* [PATCH bpf-next v5 1/8] bpf: add bpf_func_t and trampoline helpers
From: Stanislav Fomichev @ 2022-04-19 19:00 UTC (permalink / raw)
  To: netdev, bpf; +Cc: ast, daniel, andrii, Stanislav Fomichev
In-Reply-To: <20220419190053.3395240-1-sdf@google.com>

I'll be adding lsm cgroup specific helpers that grab
trampoline mutex.

No functional changes.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
---
 include/linux/bpf.h     | 11 ++++---
 kernel/bpf/trampoline.c | 63 +++++++++++++++++++++++------------------
 2 files changed, 40 insertions(+), 34 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 7bf441563ffc..ab2ba5f4bc9c 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -52,6 +52,8 @@ typedef u64 (*bpf_callback_t)(u64, u64, u64, u64, u64);
 typedef int (*bpf_iter_init_seq_priv_t)(void *private_data,
 					struct bpf_iter_aux_info *aux);
 typedef void (*bpf_iter_fini_seq_priv_t)(void *private_data);
+typedef unsigned int (*bpf_func_t)(const void *,
+				   const struct bpf_insn *);
 struct bpf_iter_seq_info {
 	const struct seq_operations *seq_ops;
 	bpf_iter_init_seq_priv_t init_seq_private;
@@ -798,8 +800,7 @@ struct bpf_dispatcher {
 static __always_inline __nocfi unsigned int bpf_dispatcher_nop_func(
 	const void *ctx,
 	const struct bpf_insn *insnsi,
-	unsigned int (*bpf_func)(const void *,
-				 const struct bpf_insn *))
+	bpf_func_t bpf_func)
 {
 	return bpf_func(ctx, insnsi);
 }
@@ -827,8 +828,7 @@ int arch_prepare_bpf_dispatcher(void *image, s64 *funcs, int num_funcs);
 	noinline __nocfi unsigned int bpf_dispatcher_##name##_func(	\
 		const void *ctx,					\
 		const struct bpf_insn *insnsi,				\
-		unsigned int (*bpf_func)(const void *,			\
-					 const struct bpf_insn *))	\
+		bpf_func_t bpf_func)					\
 	{								\
 		return bpf_func(ctx, insnsi);				\
 	}								\
@@ -839,8 +839,7 @@ int arch_prepare_bpf_dispatcher(void *image, s64 *funcs, int num_funcs);
 	unsigned int bpf_dispatcher_##name##_func(			\
 		const void *ctx,					\
 		const struct bpf_insn *insnsi,				\
-		unsigned int (*bpf_func)(const void *,			\
-					 const struct bpf_insn *));	\
+		bpf_func_t bpf_func);					\
 	extern struct bpf_dispatcher bpf_dispatcher_##name;
 #define BPF_DISPATCHER_FUNC(name) bpf_dispatcher_##name##_func
 #define BPF_DISPATCHER_PTR(name) (&bpf_dispatcher_##name)
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index ada97751ae1b..0c4fd194e801 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -407,42 +407,34 @@ static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(struct bpf_prog *prog)
 	}
 }
 
-int bpf_trampoline_link_prog(struct bpf_prog *prog, struct bpf_trampoline *tr)
+static int __bpf_trampoline_link_prog(struct bpf_prog *prog,
+				      struct bpf_trampoline *tr)
 {
 	enum bpf_tramp_prog_type kind;
 	int err = 0;
 	int cnt;
 
 	kind = bpf_attach_type_to_tramp(prog);
-	mutex_lock(&tr->mutex);
-	if (tr->extension_prog) {
+	if (tr->extension_prog)
 		/* cannot attach fentry/fexit if extension prog is attached.
 		 * cannot overwrite extension prog either.
 		 */
-		err = -EBUSY;
-		goto out;
-	}
+		return -EBUSY;
+
 	cnt = tr->progs_cnt[BPF_TRAMP_FENTRY] + tr->progs_cnt[BPF_TRAMP_FEXIT];
 	if (kind == BPF_TRAMP_REPLACE) {
 		/* Cannot attach extension if fentry/fexit are in use. */
-		if (cnt) {
-			err = -EBUSY;
-			goto out;
-		}
+		if (cnt)
+			return -EBUSY;
 		tr->extension_prog = prog;
-		err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP, NULL,
-					 prog->bpf_func);
-		goto out;
-	}
-	if (cnt >= BPF_MAX_TRAMP_PROGS) {
-		err = -E2BIG;
-		goto out;
+		return bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP, NULL,
+					  prog->bpf_func);
 	}
-	if (!hlist_unhashed(&prog->aux->tramp_hlist)) {
+	if (cnt >= BPF_MAX_TRAMP_PROGS)
+		return -E2BIG;
+	if (!hlist_unhashed(&prog->aux->tramp_hlist))
 		/* prog already linked */
-		err = -EBUSY;
-		goto out;
-	}
+		return -EBUSY;
 	hlist_add_head(&prog->aux->tramp_hlist, &tr->progs_hlist[kind]);
 	tr->progs_cnt[kind]++;
 	err = bpf_trampoline_update(tr);
@@ -450,30 +442,45 @@ int bpf_trampoline_link_prog(struct bpf_prog *prog, struct bpf_trampoline *tr)
 		hlist_del_init(&prog->aux->tramp_hlist);
 		tr->progs_cnt[kind]--;
 	}
-out:
+	return err;
+}
+
+int bpf_trampoline_link_prog(struct bpf_prog *prog, struct bpf_trampoline *tr)
+{
+	int err;
+
+	mutex_lock(&tr->mutex);
+	err = __bpf_trampoline_link_prog(prog, tr);
 	mutex_unlock(&tr->mutex);
 	return err;
 }
 
-/* bpf_trampoline_unlink_prog() should never fail. */
-int bpf_trampoline_unlink_prog(struct bpf_prog *prog, struct bpf_trampoline *tr)
+static int __bpf_trampoline_unlink_prog(struct bpf_prog *prog,
+					struct bpf_trampoline *tr)
 {
 	enum bpf_tramp_prog_type kind;
 	int err;
 
 	kind = bpf_attach_type_to_tramp(prog);
-	mutex_lock(&tr->mutex);
 	if (kind == BPF_TRAMP_REPLACE) {
 		WARN_ON_ONCE(!tr->extension_prog);
 		err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP,
 					 tr->extension_prog->bpf_func, NULL);
 		tr->extension_prog = NULL;
-		goto out;
+		return err;
 	}
 	hlist_del_init(&prog->aux->tramp_hlist);
 	tr->progs_cnt[kind]--;
-	err = bpf_trampoline_update(tr);
-out:
+	return bpf_trampoline_update(tr);
+}
+
+/* bpf_trampoline_unlink_prog() should never fail. */
+int bpf_trampoline_unlink_prog(struct bpf_prog *prog, struct bpf_trampoline *tr)
+{
+	int err;
+
+	mutex_lock(&tr->mutex);
+	err = __bpf_trampoline_unlink_prog(prog, tr);
 	mutex_unlock(&tr->mutex);
 	return err;
 }
-- 
2.36.0.rc0.470.gd361397f0d-goog


^ permalink raw reply related

* Re: [PATCH RFC 00/15] Prototype implementation of RPC-with-TLS
From: Chuck Lever III @ 2022-04-19 18:53 UTC (permalink / raw)
  To: Trond Myklebust
  Cc: linux-cifs@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	linux-nvme@lists.infradead.org, netdev@vger.kernel.org,
	simo@redhat.com, ak@tempesta-tech.com, Linux NFS Mailing List,
	borisp@nvidia.com
In-Reply-To: <8597368113bcc38e605e9bbd11916a0ac8b7852d.camel@hammerspace.com>



> On Apr 19, 2022, at 2:48 PM, Trond Myklebust <trondmy@hammerspace.com> wrote:
> 
> On Tue, 2022-04-19 at 16:00 +0000, Chuck Lever III wrote:
>> Hi Trond-
>> 
>> Thanks for the early review!
>> 
>> 
>>> On Apr 18, 2022, at 11:31 PM, Trond Myklebust
>>> <trondmy@hammerspace.com> wrote:
>>> 
>>> On Mon, 2022-04-18 at 12:51 -0400, Chuck Lever wrote:
>>>> This series implements RPC-with-TLS in the Linux kernel:
>>>> 
>>>> https://datatracker.ietf.org/doc/draft-ietf-nfsv4-rpc-tls/
>>>> 
>>>> This prototype is based on the previously posted mechanism for
>>>> providing a TLS handshake facility to in-kernel TLS consumers.
>>>> 
>>>> For the purpose of demonstration, the Linux NFS client is
>>>> modified
>>>> to add a new mount option: xprtsec = [ none|auto|tls ] . Updates
>>>> to the nfs(5) man page are being developed separately.
>>>> 
>>> 
>>> I'm fine with having a userspace level 'auto' option if that's a
>>> requirement for someone, however I see no reason why we would need
>>> to
>>> implement that in the kernel.
>>> 
>>> Let's just have a robust mechanism for immediately returning an
>>> error
>>> if the user supplies a 'tls' option on the client that the server
>>> doesn't support, and let the negotiation policy be worked out in
>>> userspace by the 'mount.nfs' utility. Otherwise we'll rathole into
>>> another twisty maze of policy decisions that generate kernel level
>>> CVEs
>>> instead of a set of more gentle fixes.
>> 
>> Noted.
>> 
>> However, one of Rick's preferences is that "auto" not use
>> transport-layer security unless the server requires it via
>> a SECINFO/MNT pseudoflavor, which only the kernel would be
>> privy to. I'll have to think about whether we want to make
>> that happen.
> 
> That sounds like a terrible protocol hack. TLS is not an authentication
> flavour but a transport level protection.

Fair enough. We've been discussing this on nfsv4@ietf.org, and
it's certainly not written in stone yet.

I invite you to join the conversation and share your concerns
(and possibly any alternative solutions you might have).


> That said, I don't see how this invalidates my argument. When told to
> use TLS, the kernel client can still return a mount time error if the
> server fails to advertise support through this pseudoflavour and leave
> it up to userspace to decide how to deal with that.

Sure. I'm just saying I haven't thought it through yet. I don't
think it will be a problem to move more (or all) of the transport
security policy to mount.nfs.


--
Chuck Lever




^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox