Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH v3 2/4] sctp: Add ip option support
From: Marcelo Ricardo Leitner @ 2017-12-22 13:05 UTC (permalink / raw)
  To: selinux, netdev, linux-sctp, linux-security-module
  Cc: paul, vyasevich, nhorman, sds, eparis, marcelo.leitner,
	richard_c_haines
In-Reply-To: <cover.1513940757.git.marcelo.leitner@gmail.com>

From: Richard Haines <richard_c_haines@btinternet.com>

Add ip option support to allow LSM security modules to utilise CIPSO/IPv4
and CALIPSO/IPv6 services.

Signed-off-by: Richard Haines <richard_c_haines@btinternet.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
---
 include/net/sctp/sctp.h    |  4 +++-
 include/net/sctp/structs.h |  2 ++
 net/sctp/chunk.c           | 13 ++++++++-----
 net/sctp/ipv6.c            | 42 +++++++++++++++++++++++++++++++++++-------
 net/sctp/output.c          |  5 ++++-
 net/sctp/protocol.c        | 36 ++++++++++++++++++++++++++++++++++++
 net/sctp/socket.c          |  9 +++++++--
 7 files changed, 95 insertions(+), 16 deletions(-)

diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index d7d8cba014697602832fe20e414b632104c9f239..1b2f40a3a87875c10647fc768372cabea61fe3b8 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -436,9 +436,11 @@ static inline int sctp_list_single_entry(struct list_head *head)
 static inline int sctp_frag_point(const struct sctp_association *asoc, int pmtu)
 {
 	struct sctp_sock *sp = sctp_sk(asoc->base.sk);
+	struct sctp_af *af = sp->pf->af;
 	int frag = pmtu;
 
-	frag -= sp->pf->af->net_header_len;
+	frag -= af->ip_options_len(asoc->base.sk);
+	frag -= af->net_header_len;
 	frag -= sizeof(struct sctphdr) + sizeof(struct sctp_data_chunk);
 
 	if (asoc->user_frag)
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 0477945de1a3cf5c27348e99d9a30e02c491d1de..9942ed5159448c924f0f018abeea9bab93fc3437 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -461,6 +461,7 @@ struct sctp_af {
 	void		(*ecn_capable)(struct sock *sk);
 	__u16		net_header_len;
 	int		sockaddr_len;
+	int		(*ip_options_len)(struct sock *sk);
 	sa_family_t	sa_family;
 	struct list_head list;
 };
@@ -485,6 +486,7 @@ struct sctp_pf {
 	int (*addr_to_user)(struct sctp_sock *sk, union sctp_addr *addr);
 	void (*to_sk_saddr)(union sctp_addr *, struct sock *sk);
 	void (*to_sk_daddr)(union sctp_addr *, struct sock *sk);
+	void (*copy_ip_options)(struct sock *sk, struct sock *newsk);
 	struct sctp_af *af;
 };
 
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 3afac275ee82dbec825dd71378dffe69a53718a7..9d130f447f636c034e4232a9e6426ffce07007ca 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -153,7 +153,6 @@ static void sctp_datamsg_assign(struct sctp_datamsg *msg, struct sctp_chunk *chu
 	chunk->msg = msg;
 }
 
-
 /* A data chunk can have a maximum payload of (2^16 - 20).  Break
  * down any such message into smaller chunks.  Opportunistically, fragment
  * the chunks down to the current MTU constraints.  We may get refragmented
@@ -170,6 +169,8 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
 	struct list_head *pos, *temp;
 	struct sctp_chunk *chunk;
 	struct sctp_datamsg *msg;
+	struct sctp_sock *sp;
+	struct sctp_af *af;
 	int err;
 
 	msg = sctp_datamsg_new(GFP_KERNEL);
@@ -188,9 +189,12 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
 	/* This is the biggest possible DATA chunk that can fit into
 	 * the packet
 	 */
-	max_data = asoc->pathmtu -
-		   sctp_sk(asoc->base.sk)->pf->af->net_header_len -
-		   sizeof(struct sctphdr) - sizeof(struct sctp_data_chunk);
+	sp = sctp_sk(asoc->base.sk);
+	af = sp->pf->af;
+	max_data = asoc->pathmtu - af->net_header_len -
+		   sizeof(struct sctphdr) - sizeof(struct sctp_data_chunk) -
+		   af->ip_options_len(asoc->base.sk);
+
 	max_data = SCTP_TRUNC4(max_data);
 
 	/* If the the peer requested that we authenticate DATA chunks
@@ -210,7 +214,6 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
 
 	/* Set first_len and then account for possible bundles on first frag */
 	first_len = max_data;
-
 	/* Check to see if we have a pending SACK and try to let it be bundled
 	 * with this message.  Do this if we don't have any data queued already.
 	 * To check that, look at out_qlen and retransmit list.
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 51c4887695909d171285b98ce1be779a3adedbab..3baede99a06d17b3f7a0826df4874c9c5af77617 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -423,6 +423,38 @@ static void sctp_v6_copy_addrlist(struct list_head *addrlist,
 	rcu_read_unlock();
 }
 
+/* Copy over any ip options */
+static void sctp_v6_copy_ip_options(struct sock *sk, struct sock *newsk)
+{
+	struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
+	struct ipv6_txoptions *opt;
+
+	newnp = inet6_sk(newsk);
+
+	rcu_read_lock();
+	opt = rcu_dereference(np->opt);
+	if (opt)
+		opt = ipv6_dup_options(newsk, opt);
+	RCU_INIT_POINTER(newnp->opt, opt);
+	rcu_read_unlock();
+}
+
+/* Account for the IP options */
+static int sctp_v6_ip_options_len(struct sock *sk)
+{
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct ipv6_txoptions *opt;
+	int len = 0;
+
+	rcu_read_lock();
+	opt = rcu_dereference(np->opt);
+	if (opt)
+		len = opt->opt_flen + opt->opt_nflen;
+
+	rcu_read_unlock();
+	return len;
+}
+
 /* Initialize a sockaddr_storage from in incoming skb. */
 static void sctp_v6_from_skb(union sctp_addr *addr, struct sk_buff *skb,
 			     int is_saddr)
@@ -662,7 +694,6 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk,
 	struct sock *newsk;
 	struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
 	struct sctp6_sock *newsctp6sk;
-	struct ipv6_txoptions *opt;
 
 	newsk = sk_alloc(sock_net(sk), PF_INET6, GFP_KERNEL, sk->sk_prot, kern);
 	if (!newsk)
@@ -685,12 +716,7 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk,
 	newnp->ipv6_ac_list = NULL;
 	newnp->ipv6_fl_list = NULL;
 
-	rcu_read_lock();
-	opt = rcu_dereference(np->opt);
-	if (opt)
-		opt = ipv6_dup_options(newsk, opt);
-	RCU_INIT_POINTER(newnp->opt, opt);
-	rcu_read_unlock();
+	sctp_v6_copy_ip_options(sk, newsk);
 
 	/* Initialize sk's sport, dport, rcv_saddr and daddr for getsockname()
 	 * and getpeername().
@@ -1033,6 +1059,7 @@ static struct sctp_af sctp_af_inet6 = {
 	.ecn_capable	   = sctp_v6_ecn_capable,
 	.net_header_len	   = sizeof(struct ipv6hdr),
 	.sockaddr_len	   = sizeof(struct sockaddr_in6),
+	.ip_options_len	   = sctp_v6_ip_options_len,
 #ifdef CONFIG_COMPAT
 	.compat_setsockopt = compat_ipv6_setsockopt,
 	.compat_getsockopt = compat_ipv6_getsockopt,
@@ -1051,6 +1078,7 @@ static struct sctp_pf sctp_pf_inet6 = {
 	.addr_to_user  = sctp_v6_addr_to_user,
 	.to_sk_saddr   = sctp_v6_to_sk_saddr,
 	.to_sk_daddr   = sctp_v6_to_sk_daddr,
+	.copy_ip_options = sctp_v6_copy_ip_options,
 	.af            = &sctp_af_inet6,
 };
 
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 4a865cd06d76cd5b2aa417de618da3203f7b53e4..2b39c704e1e5b99597a2cfdd35f75c78288d943b 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -151,7 +151,10 @@ void sctp_packet_init(struct sctp_packet *packet,
 	INIT_LIST_HEAD(&packet->chunk_list);
 	if (asoc) {
 		struct sctp_sock *sp = sctp_sk(asoc->base.sk);
-		overhead = sp->pf->af->net_header_len;
+		struct sctp_af *af = sp->pf->af;
+
+		overhead = af->net_header_len +
+			   af->ip_options_len(asoc->base.sk);
 	} else {
 		overhead = sizeof(struct ipv6hdr);
 	}
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index fcd80feb293f61bd734988f037aa8f210880fb1d..cde051a2ed84b4085447b30af5809e2507a69277 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -237,6 +237,38 @@ int sctp_copy_local_addr_list(struct net *net, struct sctp_bind_addr *bp,
 	return error;
 }
 
+/* Copy over any ip options */
+static void sctp_v4_copy_ip_options(struct sock *sk, struct sock *newsk)
+{
+	struct inet_sock *newinet, *inet = inet_sk(sk);
+	struct ip_options_rcu *inet_opt, *newopt = NULL;
+
+	newinet = inet_sk(newsk);
+
+	rcu_read_lock();
+	inet_opt = rcu_dereference(inet->inet_opt);
+	if (inet_opt) {
+		newopt = sock_kmalloc(newsk, sizeof(*inet_opt) +
+				      inet_opt->opt.optlen, GFP_ATOMIC);
+		if (newopt)
+			memcpy(newopt, inet_opt, sizeof(*inet_opt) +
+			       inet_opt->opt.optlen);
+	}
+	RCU_INIT_POINTER(newinet->inet_opt, newopt);
+	rcu_read_unlock();
+}
+
+/* Account for the IP options */
+static int sctp_v4_ip_options_len(struct sock *sk)
+{
+	struct inet_sock *inet = inet_sk(sk);
+
+	if (inet->inet_opt)
+		return inet->inet_opt->opt.optlen;
+	else
+		return 0;
+}
+
 /* Initialize a sctp_addr from in incoming skb.  */
 static void sctp_v4_from_skb(union sctp_addr *addr, struct sk_buff *skb,
 			     int is_saddr)
@@ -590,6 +622,8 @@ static struct sock *sctp_v4_create_accept_sk(struct sock *sk,
 	sctp_copy_sock(newsk, sk, asoc);
 	sock_reset_flag(newsk, SOCK_ZAPPED);
 
+	sctp_v4_copy_ip_options(sk, newsk);
+
 	newinet = inet_sk(newsk);
 
 	newinet->inet_daddr = asoc->peer.primary_addr.v4.sin_addr.s_addr;
@@ -1008,6 +1042,7 @@ static struct sctp_pf sctp_pf_inet = {
 	.addr_to_user  = sctp_v4_addr_to_user,
 	.to_sk_saddr   = sctp_v4_to_sk_saddr,
 	.to_sk_daddr   = sctp_v4_to_sk_daddr,
+	.copy_ip_options = sctp_v4_copy_ip_options,
 	.af            = &sctp_af_inet
 };
 
@@ -1092,6 +1127,7 @@ static struct sctp_af sctp_af_inet = {
 	.ecn_capable	   = sctp_v4_ecn_capable,
 	.net_header_len	   = sizeof(struct iphdr),
 	.sockaddr_len	   = sizeof(struct sockaddr_in),
+	.ip_options_len	   = sctp_v4_ip_options_len,
 #ifdef CONFIG_COMPAT
 	.compat_setsockopt = compat_ip_setsockopt,
 	.compat_getsockopt = compat_ip_getsockopt,
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index d4730ada7f3233367be7a0e3bb10e286a25602c8..274082cf49a8380fda06866c631cdc22dc4f157b 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -3125,8 +3125,11 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned
 
 	if (asoc) {
 		if (val == 0) {
+			struct sctp_af *af = sp->pf->af;
+
 			val = asoc->pathmtu;
-			val -= sp->pf->af->net_header_len;
+			val -= af->ip_options_len(asoc->base.sk);
+			val -= af->net_header_len;
 			val -= sizeof(struct sctphdr) +
 					sizeof(struct sctp_data_chunk);
 		}
@@ -4929,9 +4932,11 @@ int sctp_do_peeloff(struct sock *sk, sctp_assoc_t id, struct socket **sockp)
 	sctp_copy_sock(sock->sk, sk, asoc);
 
 	/* Make peeled-off sockets more like 1-1 accepted sockets.
-	 * Set the daddr and initialize id to something more random
+	 * Set the daddr and initialize id to something more random and also
+	 * copy over any ip options.
 	 */
 	sp->pf->to_sk_daddr(&asoc->peer.primary_addr, sk);
+	sp->pf->copy_ip_options(sk, sock->sk);
 
 	/* Populate the fields of the newsk from the oldsk and migrate the
 	 * asoc to the newsk.
-- 
2.14.3

^ permalink raw reply related

* [PATCH v3 1/4] security: Add support for SCTP security hooks
From: Marcelo Ricardo Leitner @ 2017-12-22 13:05 UTC (permalink / raw)
  To: selinux, netdev, linux-sctp, linux-security-module
  Cc: paul, vyasevich, nhorman, sds, eparis, marcelo.leitner,
	richard_c_haines
In-Reply-To: <cover.1513940757.git.marcelo.leitner@gmail.com>

From: Richard Haines <richard_c_haines@btinternet.com>

The SCTP security hooks are explained in:
Documentation/security/LSM-sctp.rst

Signed-off-by: Richard Haines <richard_c_haines@btinternet.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
---
 Documentation/security/LSM-sctp.rst | 194 ++++++++++++++++++++++++++++++++++++
 include/linux/lsm_hooks.h           |  35 +++++++
 include/linux/security.h            |  25 +++++
 security/security.c                 |  22 ++++
 4 files changed, 276 insertions(+)
 create mode 100644 Documentation/security/LSM-sctp.rst

diff --git a/Documentation/security/LSM-sctp.rst b/Documentation/security/LSM-sctp.rst
new file mode 100644
index 0000000000000000000000000000000000000000..61373672ce9f63bbd52d953500f44cdf3427c3f0
--- /dev/null
+++ b/Documentation/security/LSM-sctp.rst
@@ -0,0 +1,194 @@
+SCTP LSM Support
+================
+
+For security module support, three sctp specific hooks have been implemented::
+
+    security_sctp_assoc_request()
+    security_sctp_bind_connect()
+    security_sctp_sk_clone()
+
+Also the following security hook has been utilised::
+
+    security_inet_conn_established()
+
+The usage of these hooks are described below with the SELinux implementation
+described in ``Documentation/security/SELinux-sctp.rst``
+
+
+security_sctp_assoc_request()
+-----------------------------
+This new hook passes the ``@ep`` and ``@chunk->skb`` (the association INIT
+packet) to the security module. Returns 0 on success, error on failure.
+::
+
+    @ep - pointer to sctp endpoint structure.
+    @skb - pointer to skbuff of association packet.
+
+The security module performs the following operations:
+     IF this is the first association on ``@ep->base.sk``, then set the peer
+     sid to that in ``@skb``. This will ensure there is only one peer sid
+     assigned to ``@ep->base.sk`` that may support multiple associations.
+
+     ELSE validate the ``@ep->base.sk peer_sid`` against the ``@skb peer sid``
+     to determine whether the association should be allowed or denied.
+
+     Set the sctp ``@ep sid`` to socket's sid (from ``ep->base.sk``) with
+     MLS portion taken from ``@skb peer sid``. This will be used by SCTP
+     TCP style sockets and peeled off connections as they cause a new socket
+     to be generated.
+
+     If IP security options are configured (CIPSO/CALIPSO), then the ip
+     options are set on the socket.
+
+
+security_sctp_bind_connect()
+-----------------------------
+This new hook passes one or more ipv4/ipv6 addresses to the security module
+for validation based on the ``@optname`` that will result in either a bind or
+connect service as shown in the permission check tables below.
+Returns 0 on success, error on failure.
+::
+
+    @sk      - Pointer to sock structure.
+    @optname - Name of the option to validate.
+    @address - One or more ipv4 / ipv6 addresses.
+    @addrlen - The total length of address(s). This is calculated on each
+               ipv4 or ipv6 address using sizeof(struct sockaddr_in) or
+               sizeof(struct sockaddr_in6).
+
+  ------------------------------------------------------------------
+  |                     BIND Type Checks                           |
+  |       @optname             |         @address contains         |
+  |----------------------------|-----------------------------------|
+  | SCTP_SOCKOPT_BINDX_ADD     | One or more ipv4 / ipv6 addresses |
+  | SCTP_PRIMARY_ADDR          | Single ipv4 or ipv6 address       |
+  | SCTP_SET_PEER_PRIMARY_ADDR | Single ipv4 or ipv6 address       |
+  ------------------------------------------------------------------
+
+  ------------------------------------------------------------------
+  |                   CONNECT Type Checks                          |
+  |       @optname             |         @address contains         |
+  |----------------------------|-----------------------------------|
+  | SCTP_SOCKOPT_CONNECTX      | One or more ipv4 / ipv6 addresses |
+  | SCTP_PARAM_ADD_IP          | One or more ipv4 / ipv6 addresses |
+  | SCTP_SENDMSG_CONNECT       | Single ipv4 or ipv6 address       |
+  | SCTP_PARAM_SET_PRIMARY     | Single ipv4 or ipv6 address       |
+  ------------------------------------------------------------------
+
+A summary of the ``@optname`` entries is as follows::
+
+    SCTP_SOCKOPT_BINDX_ADD - Allows additional bind addresses to be
+                             associated after (optionally) calling
+                             bind(3).
+                             sctp_bindx(3) adds a set of bind
+                             addresses on a socket.
+
+    SCTP_SOCKOPT_CONNECTX - Allows the allocation of multiple
+                            addresses for reaching a peer
+                            (multi-homed).
+                            sctp_connectx(3) initiates a connection
+                            on an SCTP socket using multiple
+                            destination addresses.
+
+    SCTP_SENDMSG_CONNECT  - Initiate a connection that is generated by a
+                            sendmsg(2) or sctp_sendmsg(3) on a new asociation.
+
+    SCTP_PRIMARY_ADDR     - Set local primary address.
+
+    SCTP_SET_PEER_PRIMARY_ADDR - Request peer sets address as
+                                 association primary.
+
+    SCTP_PARAM_ADD_IP          - These are used when Dynamic Address
+    SCTP_PARAM_SET_PRIMARY     - Reconfiguration is enabled as explained below.
+
+
+To support Dynamic Address Reconfiguration the following parameters must be
+enabled on both endpoints (or use the appropriate **setsockopt**\(2))::
+
+    /proc/sys/net/sctp/addip_enable
+    /proc/sys/net/sctp/addip_noauth_enable
+
+then the following *_PARAM_*'s are sent to the peer in an
+ASCONF chunk when the corresponding ``@optname``'s are present::
+
+          @optname                      ASCONF Parameter
+         ----------                    ------------------
+    SCTP_SOCKOPT_BINDX_ADD     ->   SCTP_PARAM_ADD_IP
+    SCTP_SET_PEER_PRIMARY_ADDR ->   SCTP_PARAM_SET_PRIMARY
+
+
+security_sctp_sk_clone()
+-------------------------
+This new hook is called whenever a new socket is created by **accept**\(2)
+(i.e. a TCP style socket) or when a socket is 'peeled off' e.g userspace
+calls **sctp_peeloff**\(3). ``security_sctp_sk_clone()`` will set the new
+sockets sid and peer sid to that contained in the ``@ep sid`` and
+``@ep peer sid`` respectively.
+::
+
+    @ep - pointer to old sctp endpoint structure.
+    @sk - pointer to old sock structure.
+    @sk - pointer to new sock structure.
+
+
+security_inet_conn_established()
+---------------------------------
+This hook has been added to the receive COOKIE ACK processing where it sets
+the connection's peer sid to that in ``@skb``::
+
+    @sk  - pointer to sock structure.
+    @skb - pointer to skbuff of the COOKIE ACK packet.
+
+
+Security Hooks used for Association Establishment
+=================================================
+The following diagram shows the use of ``security_sctp_connect_bind()``,
+``security_sctp_assoc_request()``, ``security_inet_conn_established()`` when
+establishing an association.
+::
+
+      SCTP endpoint "A"                                SCTP endpoint "Z"
+      =================                                =================
+    sctp_sf_do_prm_asoc()
+ Association setup can be initiated
+ by a connect(2), sctp_connectx(3),
+ sendmsg(2) or sctp_sendmsg(3).
+ These will result in a call to
+ security_sctp_bind_connect() to
+ initiate an association to
+ SCTP peer endpoint "Z".
+         INIT --------------------------------------------->
+                                                   sctp_sf_do_5_1B_init()
+                                                 Respond to an INIT chunk.
+                                             SCTP peer endpoint "A" is
+                                             asking for an association. Call
+                                             security_sctp_assoc_request()
+                                             to set the peer label if first
+                                             association.
+                                             If not first association, check
+                                             whether allowed, IF so send:
+          <----------------------------------------------- INIT ACK
+          |                                  ELSE audit event and silently
+          |                                       discard the packet.
+          |
+    COOKIE ECHO ------------------------------------------>
+                                                          |
+                                                          |
+                                                          |
+          <------------------------------------------- COOKIE ACK
+          |                                               |
+    sctp_sf_do_5_1E_ca                                    |
+ Call security_inet_conn_established()                    |
+ to set the correct peer sid.                             |
+          |                                               |
+          |                               If SCTP_SOCKET_TCP or peeled off
+          |                               socket security_sctp_sk_clone() is
+          |                               called to clone the new socket.
+          |                                               |
+      ESTABLISHED                                    ESTABLISHED
+          |                                               |
+    ------------------------------------------------------------------
+    |                     Association Established                    |
+    ------------------------------------------------------------------
+
+
diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index c9258124e41757187cdb8b2f83c5901966345902..92ee9c6c604212ce38590bd2e5fcba55617b9c04 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -906,6 +906,32 @@
  *	associated with the TUN device's security structure.
  *	@security pointer to the TUN devices's security structure.
  *
+ * Security hooks for SCTP
+ *
+ * @sctp_assoc_request:
+ *	If first association, then set the peer sid to that in @skb. If
+ *	@sctp_cid is from an INIT chunk, then set the sctp endpoint sid to
+ *	socket's sid (ep->base.sk) with MLS portion taken from peer sid.
+ *	@ep pointer to sctp endpoint structure.
+ *	@skb pointer to skbuff of association packet.
+ *	Return 0 on success, error on failure.
+ * @sctp_bind_connect:
+ *	Validiate permissions required for each address associated with sock
+ *	@sk. Depending on @optname, the addresses will be treated as either
+ *	for a connect or bind service. The @addrlen is calculated on each
+ *	ipv4 and ipv6 address using sizeof(struct sockaddr_in) or
+ *	sizeof(struct sockaddr_in6).
+ *	@sk pointer to sock structure.
+ *	@optname name of the option to validate.
+ *	@address list containing one or more ipv4/ipv6 addresses.
+ *	@addrlen total length of address(s).
+ *	Return 0 on success, error on failure.
+ * @sctp_sk_clone:
+ *	Sets the new child socket's sid to the old endpoint sid.
+ *	@ep pointer to old sctp endpoint structure.
+ *	@sk pointer to old sock structure.
+ *	@sk pointer to new sock structure.
+ *
  * Security hooks for Infiniband
  *
  * @ib_pkey_access:
@@ -1631,6 +1657,12 @@ union security_list_options {
 	int (*tun_dev_attach_queue)(void *security);
 	int (*tun_dev_attach)(struct sock *sk, void *security);
 	int (*tun_dev_open)(void *security);
+	int (*sctp_assoc_request)(struct sctp_endpoint *ep,
+				  struct sk_buff *skb);
+	int (*sctp_bind_connect)(struct sock *sk, int optname,
+				 struct sockaddr *address, int addrlen);
+	void (*sctp_sk_clone)(struct sctp_endpoint *ep, struct sock *sk,
+			      struct sock *newsk);
 #endif	/* CONFIG_SECURITY_NETWORK */
 
 #ifdef CONFIG_SECURITY_INFINIBAND
@@ -1869,6 +1901,9 @@ struct security_hook_heads {
 	struct list_head tun_dev_attach_queue;
 	struct list_head tun_dev_attach;
 	struct list_head tun_dev_open;
+	struct list_head sctp_assoc_request;
+	struct list_head sctp_bind_connect;
+	struct list_head sctp_sk_clone;
 #endif	/* CONFIG_SECURITY_NETWORK */
 #ifdef CONFIG_SECURITY_INFINIBAND
 	struct list_head ib_pkey_access;
diff --git a/include/linux/security.h b/include/linux/security.h
index ce6265960d6c430a90e1ad3c3749d0a438ecaca9..51f6cc2417f278674dfbd434587af805cb0c03d3 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -115,6 +115,7 @@ struct xfrm_policy;
 struct xfrm_state;
 struct xfrm_user_sec_ctx;
 struct seq_file;
+struct sctp_endpoint;
 
 #ifdef CONFIG_MMU
 extern unsigned long mmap_min_addr;
@@ -1229,6 +1230,11 @@ int security_tun_dev_create(void);
 int security_tun_dev_attach_queue(void *security);
 int security_tun_dev_attach(struct sock *sk, void *security);
 int security_tun_dev_open(void *security);
+int security_sctp_assoc_request(struct sctp_endpoint *ep, struct sk_buff *skb);
+int security_sctp_bind_connect(struct sock *sk, int optname,
+			       struct sockaddr *address, int addrlen);
+void security_sctp_sk_clone(struct sctp_endpoint *ep, struct sock *sk,
+			    struct sock *newsk);
 
 #else	/* CONFIG_SECURITY_NETWORK */
 static inline int security_unix_stream_connect(struct sock *sock,
@@ -1421,6 +1427,25 @@ static inline int security_tun_dev_open(void *security)
 {
 	return 0;
 }
+
+static inline int security_sctp_assoc_request(struct sctp_endpoint *ep,
+					      struct sk_buff *skb)
+{
+	return 0;
+}
+
+static inline int security_sctp_bind_connect(struct sock *sk, int optname,
+					     struct sockaddr *address,
+					     int addrlen)
+{
+	return 0;
+}
+
+static inline void security_sctp_sk_clone(struct sctp_endpoint *ep,
+					  struct sock *sk,
+					  struct sock *newsk)
+{
+}
 #endif	/* CONFIG_SECURITY_NETWORK */
 
 #ifdef CONFIG_SECURITY_INFINIBAND
diff --git a/security/security.c b/security/security.c
index 4bf0f571b4ef94df1d3c44b7fed6b7b651c1924f..1400678f6b72b36123f2fa2b909f35d257a62cd4 100644
--- a/security/security.c
+++ b/security/security.c
@@ -1472,6 +1472,7 @@ void security_inet_conn_established(struct sock *sk,
 {
 	call_void_hook(inet_conn_established, sk, skb);
 }
+EXPORT_SYMBOL(security_inet_conn_established);
 
 int security_secmark_relabel_packet(u32 secid)
 {
@@ -1527,6 +1528,27 @@ int security_tun_dev_open(void *security)
 }
 EXPORT_SYMBOL(security_tun_dev_open);
 
+int security_sctp_assoc_request(struct sctp_endpoint *ep, struct sk_buff *skb)
+{
+	return call_int_hook(sctp_assoc_request, 0, ep, skb);
+}
+EXPORT_SYMBOL(security_sctp_assoc_request);
+
+int security_sctp_bind_connect(struct sock *sk, int optname,
+			       struct sockaddr *address, int addrlen)
+{
+	return call_int_hook(sctp_bind_connect, 0, sk, optname,
+			     address, addrlen);
+}
+EXPORT_SYMBOL(security_sctp_bind_connect);
+
+void security_sctp_sk_clone(struct sctp_endpoint *ep, struct sock *sk,
+			    struct sock *newsk)
+{
+	call_void_hook(sctp_sk_clone, ep, sk, newsk);
+}
+EXPORT_SYMBOL(security_sctp_sk_clone);
+
 #endif	/* CONFIG_SECURITY_NETWORK */
 
 #ifdef CONFIG_SECURITY_INFINIBAND
-- 
2.14.3

^ permalink raw reply related

* [PATCH v3 0/4] Add SELinux SCTP protocol support
From: Marcelo Ricardo Leitner @ 2017-12-22 13:05 UTC (permalink / raw)
  To: selinux, netdev, linux-sctp, linux-security-module
  Cc: paul, vyasevich, nhorman, sds, eparis, marcelo.leitner,
	richard_c_haines

Posting on behalf of Richard Haines. Patchset is based on
selinux-tree/next. Some small conflicts are expected when merging with
current net-next due to I-Data patches, including one at
include/uapi/linux/sctp.h, on which the fix is to update the define
SCTP_SENDMSG_CONNECT to a higher number.
Below is the original cover letter from Richard, and the changes from
v2->v3.

The kernel patches have been built on Fedora 27 with kernel 4.13.12 plus
the following userspace patches to enable testing:

1) Updates to libsepol 2.7 to support the sctp portcon statement.
   The patch is available from:
         http://arctic.selinuxproject.org/~rhaines/selinux-sctp/
         selinux-Add-support-for-the-SCTP-portcon-keyword.patch

2) Updates to the SELinux Test Suite adding SCTP tests. Please read the
   selinux-testsuite/README.sctp for details. The patch is available from:
         http://arctic.selinuxproject.org/~rhaines/selinux-sctp/
         selinux-testsuite-Add-SCTP-test-support.patch

3) Updates to lksctp-tools that show SELinux info in sctp_darn and
   sctp_test. It also contains a minor patch for test_1_to_1_connect.c
   as when CIPSO/CALIPSO configured, NetLabel returns a different error
   code for illegal addresses in test 5. The patch is available from:
         http://arctic.selinuxproject.org/~rhaines/selinux-sctp/
         lksctp-tools-Add-SELinux-support-to-sctp_test-and-sc.patch

All SCTP lksctp-tools/src/func_tests run correctly in enforcing mode.

All SCTP regression tests "./sctp-tests run" run correctly in enforcing
mode. These tests are obtained from: https://github.com/sctp/sctp-tests

The selinux-testsuite patch also adds remote tests (that need some manual
configuration). These are useful for testing CIPSO/CALIPSO over a network
with a number of categories to produce large ip option fields with various
message sizes forcing fragmentation etc..

Changes since RFC Patch:
Removed the NetLabel patch (was [RFC PATCH 4/5] netlabel: Add SCTP support)
as re-engineered. However this patchset will require the NetLabel
patch at [1] to fully run the SCTP selinux-testsuite.

PATCH 1/4
Remove unused parameter from security_sctp_assoc_request().
Reformat and update LSM-sctp.rst documentation.
PATCH 2/4
Add variables and RCU locks as requested in [2] to support IP options.
PATCH 3/4
Added security_sctp_assoc_request() hook to sctp_sf_do_unexpected_init()
and sctp_sf_do_5_2_4_dupcook().
Removed security_sctp_assoc_request() hook from sctp_sf_do_5_1C_ack() as
no longer required.
PATCH 4/4
Reformat and update SELinux-sctp.rst documentation.
Remove bindx and connectx permissions.
Rework selinux_socket_connect() and selinux_netlbl_socket_connect() to
utilise helpers for code reuse.
Add spinlock to selinux_sctp_assoc_request().
Remove unused parameter from security_sctp_assoc_request().
Use address->sa_family == AF_INET in *_bind and *_connect to ensure
correct address type.
Minor cleanups.

Changes since v2 post by Richard:
Updated sctp_frag_point() to also consider the ip options len.

[1] https://marc.info/?l=selinux&m=151061619115945&w=2
[2] https://marc.info/?l=selinux&m=150962470215797&w=2

Richard Haines (4):
  security: Add support for SCTP security hooks
  sctp: Add ip option support
  sctp: Add LSM hooks
  selinux: Add SCTP support

 Documentation/security/LSM-sctp.rst     | 194 ++++++++++++++++++++++
 Documentation/security/SELinux-sctp.rst | 104 ++++++++++++
 include/linux/lsm_hooks.h               |  35 ++++
 include/linux/security.h                |  25 +++
 include/net/sctp/sctp.h                 |   4 +-
 include/net/sctp/structs.h              |  12 ++
 include/uapi/linux/sctp.h               |   1 +
 net/sctp/chunk.c                        |  13 +-
 net/sctp/ipv6.c                         |  42 ++++-
 net/sctp/output.c                       |   5 +-
 net/sctp/protocol.c                     |  36 +++++
 net/sctp/sm_make_chunk.c                |  12 ++
 net/sctp/sm_statefuns.c                 |  18 +++
 net/sctp/socket.c                       |  70 +++++++-
 security/security.c                     |  22 +++
 security/selinux/hooks.c                | 278 +++++++++++++++++++++++++++++---
 security/selinux/include/classmap.h     |   2 +-
 security/selinux/include/netlabel.h     |  15 +-
 security/selinux/include/objsec.h       |   4 +
 security/selinux/netlabel.c             | 128 +++++++++++++--
 20 files changed, 971 insertions(+), 49 deletions(-)
 create mode 100644 Documentation/security/LSM-sctp.rst
 create mode 100644 Documentation/security/SELinux-sctp.rst

-- 
2.14.3

^ permalink raw reply

* Re: [PATCH net] RDS: Check cmsg_len before dereferencing CMSG_DATA
From: Yuval Shaia @ 2017-12-22 12:55 UTC (permalink / raw)
  To: Avinash Repaka
  Cc: Santosh Shilimkar, David S. Miller, netdev, linux-rdma, rds-devel,
	linux-kernel
In-Reply-To: <1513916224-9445-1-git-send-email-avinash.repaka@oracle.com>

On Thu, Dec 21, 2017 at 08:17:04PM -0800, Avinash Repaka wrote:
> RDS currently doesn't check if the length of the control message is
> large enough to hold the required data, before dereferencing the control
> message data. This results in following crash:
> 
> BUG: KASAN: stack-out-of-bounds in rds_rdma_bytes net/rds/send.c:1013
> [inline]
> BUG: KASAN: stack-out-of-bounds in rds_sendmsg+0x1f02/0x1f90
> net/rds/send.c:1066
> Read of size 8 at addr ffff8801c928fb70 by task syzkaller455006/3157
> 
> CPU: 0 PID: 3157 Comm: syzkaller455006 Not tainted 4.15.0-rc3+ #161
> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
> Google 01/01/2011
> Call Trace:
>  __dump_stack lib/dump_stack.c:17 [inline]
>  dump_stack+0x194/0x257 lib/dump_stack.c:53
>  print_address_description+0x73/0x250 mm/kasan/report.c:252
>  kasan_report_error mm/kasan/report.c:351 [inline]
>  kasan_report+0x25b/0x340 mm/kasan/report.c:409
>  __asan_report_load8_noabort+0x14/0x20 mm/kasan/report.c:430
>  rds_rdma_bytes net/rds/send.c:1013 [inline]
>  rds_sendmsg+0x1f02/0x1f90 net/rds/send.c:1066
>  sock_sendmsg_nosec net/socket.c:628 [inline]
>  sock_sendmsg+0xca/0x110 net/socket.c:638
>  ___sys_sendmsg+0x320/0x8b0 net/socket.c:2018
>  __sys_sendmmsg+0x1ee/0x620 net/socket.c:2108
>  SYSC_sendmmsg net/socket.c:2139 [inline]
>  SyS_sendmmsg+0x35/0x60 net/socket.c:2134
>  entry_SYSCALL_64_fastpath+0x1f/0x96
> RIP: 0033:0x43fe49
> RSP: 002b:00007fffbe244ad8 EFLAGS: 00000217 ORIG_RAX: 0000000000000133
> RAX: ffffffffffffffda RBX: 00000000004002c8 RCX: 000000000043fe49
> RDX: 0000000000000001 RSI: 000000002020c000 RDI: 0000000000000003
> RBP: 00000000006ca018 R08: 0000000000000000 R09: 0000000000000000
> R10: 0000000000000000 R11: 0000000000000217 R12: 00000000004017b0
> R13: 0000000000401840 R14: 0000000000000000 R15: 0000000000000000
> 
> To fix this, we verify that the cmsg_len is large enough to hold the
> data to be read, before proceeding further.
> 
> Reported-by: syzbot <syzkaller-bugs@googlegroups.com>
> Signed-off-by: Avinash Repaka <avinash.repaka@oracle.com>
> ---
>  net/rds/send.c | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/net/rds/send.c b/net/rds/send.c
> index b52cdc8..f72466c 100644
> --- a/net/rds/send.c
> +++ b/net/rds/send.c
> @@ -1009,6 +1009,9 @@ static int rds_rdma_bytes(struct msghdr *msg, size_t *rdma_bytes)
>  			continue;
>  
>  		if (cmsg->cmsg_type == RDS_CMSG_RDMA_ARGS) {
> +			if (cmsg->cmsg_len <
> +			    CMSG_LEN(sizeof(struct rds_rdma_args)))
> +				return -EINVAL;
>  			args = CMSG_DATA(cmsg);
>  			*rdma_bytes += args->remote_vec.bytes;
>  		}

Reviewed-by: Yuval Shaia <yuval.shaia@oracle.com>

> -- 
> 2.4.11
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* [patch net] i40e: flower: Fix return value for unsupported offload
From: Jiri Pirko @ 2017-12-22 11:45 UTC (permalink / raw)
  To: netdev
  Cc: davem, jhs, xiyou.wangcong, mlxsw, amritha.nambiar,
	jeffrey.t.kirsher, intel-wired-lan

From: Jiri Pirko <jiri@mellanox.com>

When filter configuration is not supported, drivers should return
-EOPNOTSUPP so the core can react correctly.

Fixes: 2f4b411a3d67 ("i40e: Enable cloud filters via tc-flower")
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
---
 drivers/net/ethernet/intel/i40e/i40e_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 321d8be..9da2069 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -7356,7 +7356,7 @@ static int i40e_configure_clsflower(struct i40e_vsi *vsi,
 
 	if (tc < 0) {
 		dev_err(&vsi->back->pdev->dev, "Invalid traffic class\n");
-		return -EINVAL;
+		return -EOPNOTSUPP;
 	}
 
 	if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state) ||
-- 
2.9.5

^ permalink raw reply related

* [RFC PATCH bpf-next 3/3] error-injection: Separate error-injection from kprobe
From: Masami Hiramatsu @ 2017-12-22 11:39 UTC (permalink / raw)
  To: Alexei Starovoitov, Josef Bacik
  Cc: rostedt, mingo, davem, netdev, linux-kernel, ast, kernel-team,
	daniel, linux-btrfs, darrick.wong, mhiramat, Josef Bacik
In-Reply-To: <151394269314.5598.6344820223613246849.stgit@devbox>

Since error-injection framework is not limited to be used
by kprobes, nor bpf. Other kernel subsystems can use it
freely for checking safeness of error-injection, e.g.
livepatch, ftrace etc.
So this separate error-injection framework from kprobes.

Some differences has been made:

- "kprobe" word is removed from any APIs/structures.
- BPF_ALLOW_ERROR_INJECTION() is renamed to
  ALLOW_ERROR_INJECTION() since it is not limited for BPF too.
- CONFIG_FUNCTION_ERROR_INJECTION is the config item of this
  feature. It is automatically enabled if the arch supports
  error injection feature for kprobe or ftrace etc.

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
---
 arch/Kconfig                           |    2 
 arch/x86/Kconfig                       |    2 
 arch/x86/include/asm/error-injection.h |   12 ++
 arch/x86/kernel/kprobes/ftrace.c       |   14 --
 arch/x86/lib/Makefile                  |    2 
 arch/x86/lib/error-inject.c            |   19 +++
 fs/btrfs/disk-io.c                     |    2 
 fs/btrfs/free-space-cache.c            |    2 
 include/asm-generic/error-injection.h  |   20 +++
 include/asm-generic/vmlinux.lds.h      |   14 +-
 include/linux/bpf.h                    |   12 --
 include/linux/error-injection.h        |   21 +++
 include/linux/kprobes.h                |    1 
 include/linux/module.h                 |    6 -
 kernel/kprobes.c                       |  163 --------------------------
 kernel/module.c                        |    8 +
 kernel/trace/Kconfig                   |    2 
 kernel/trace/bpf_trace.c               |    2 
 kernel/trace/trace_kprobe.c            |    3 
 lib/Kconfig.debug                      |    4 +
 lib/Makefile                           |    1 
 lib/error-inject.c                     |  200 ++++++++++++++++++++++++++++++++
 22 files changed, 302 insertions(+), 210 deletions(-)
 create mode 100644 arch/x86/include/asm/error-injection.h
 create mode 100644 arch/x86/lib/error-inject.c
 create mode 100644 include/asm-generic/error-injection.h
 create mode 100644 include/linux/error-injection.h
 create mode 100644 lib/error-inject.c

diff --git a/arch/Kconfig b/arch/Kconfig
index d3f4aaf9cb7a..97376accfb14 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -196,7 +196,7 @@ config HAVE_OPTPROBES
 config HAVE_KPROBES_ON_FTRACE
 	bool
 
-config HAVE_KPROBE_OVERRIDE
+config HAVE_FUNCTION_ERROR_INJECTION
 	bool
 
 config HAVE_NMI
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 04d66e6fa447..fc519e3ae754 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -154,7 +154,7 @@ config X86
 	select HAVE_KERNEL_XZ
 	select HAVE_KPROBES
 	select HAVE_KPROBES_ON_FTRACE
-	select HAVE_KPROBE_OVERRIDE
+	select HAVE_FUNCTION_ERROR_INJECTION
 	select HAVE_KRETPROBES
 	select HAVE_KVM
 	select HAVE_LIVEPATCH			if X86_64
diff --git a/arch/x86/include/asm/error-injection.h b/arch/x86/include/asm/error-injection.h
new file mode 100644
index 000000000000..d89759a0354c
--- /dev/null
+++ b/arch/x86/include/asm/error-injection.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_ERROR_INJECTION_H
+#define _ASM_ERROR_INJECTION_H
+
+#include <asm/linkage.h>
+#include <asm/ptrace.h>
+#include <asm-generic/error-injection.h>
+
+asmlinkage void just_return_func(void);
+void override_function_to_return(struct pt_regs *regs);
+
+#endif /* _ASM_ERROR_INJECTION_H */
diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c
index 1ea748d682fd..8dc0161cec8f 100644
--- a/arch/x86/kernel/kprobes/ftrace.c
+++ b/arch/x86/kernel/kprobes/ftrace.c
@@ -97,17 +97,3 @@ int arch_prepare_kprobe_ftrace(struct kprobe *p)
 	p->ainsn.boostable = false;
 	return 0;
 }
-
-asmlinkage void override_func(void);
-asm(
-	".type override_func, @function\n"
-	"override_func:\n"
-	"	ret\n"
-	".size override_func, .-override_func\n"
-);
-
-void arch_ftrace_kprobe_override_function(struct pt_regs *regs)
-{
-	regs->ip = (unsigned long)&override_func;
-}
-NOKPROBE_SYMBOL(arch_ftrace_kprobe_override_function);
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 7b181b61170e..081f09435d28 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -26,6 +26,8 @@ lib-y += memcpy_$(BITS).o
 lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
 lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o
 lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
+lib-$(CONFIG_FUNCTION_ERROR_INJECTION)	+= error-inject.o
+
 
 obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
 
diff --git a/arch/x86/lib/error-inject.c b/arch/x86/lib/error-inject.c
new file mode 100644
index 000000000000..1998d4ae161e
--- /dev/null
+++ b/arch/x86/lib/error-inject.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/error-injection.h>
+#include <linux/kprobes.h>
+
+asmlinkage void just_return_func(void);
+
+asm(
+	".type just_return_func, @function\n"
+	"just_return_func:\n"
+	"	ret\n"
+	".size just_return_func, .-just_return_func\n"
+);
+
+void override_function_to_return(struct pt_regs *regs)
+{
+	regs->ip = (unsigned long)&just_return_func;
+}
+NOKPROBE_SYMBOL(override_function_to_return);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 5da18ebc9222..5c540129ad81 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3124,7 +3124,7 @@ int open_ctree(struct super_block *sb,
 		goto fail_block_groups;
 	goto retry_root_backup;
 }
-BPF_ALLOW_ERROR_INJECTION(open_ctree);
+ALLOW_ERROR_INJECTION(open_ctree);
 
 static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
 {
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index fb1382893bfc..2a75e088b215 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -333,7 +333,7 @@ static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode,
 
 	return 0;
 }
-BPF_ALLOW_ERROR_INJECTION(io_ctl_init);
+ALLOW_ERROR_INJECTION(io_ctl_init);
 
 static void io_ctl_free(struct btrfs_io_ctl *io_ctl)
 {
diff --git a/include/asm-generic/error-injection.h b/include/asm-generic/error-injection.h
new file mode 100644
index 000000000000..08352c9d9f97
--- /dev/null
+++ b/include/asm-generic/error-injection.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_GENERIC_ERROR_INJECTION_H
+#define _ASM_GENERIC_ERROR_INJECTION_H
+
+#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
+#ifdef CONFIG_FUNCTION_ERROR_INJECTION
+/*
+ * Whitelist ganerating macro. Specify functions which can be
+ * error-injectable using this macro.
+ */
+#define ALLOW_ERROR_INJECTION(fname)					\
+static unsigned long __used						\
+	__attribute__((__section__("_error_injection_whitelist")))	\
+	_eil_addr_##fname = (unsigned long)fname;
+#else
+#define ALLOW_ERROR_INJECTION(fname)
+#endif
+#endif
+
+#endif /* _ASM_GENERIC_ERROR_INJECTION_H */
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index a2e8582d094a..fad8b8c4210c 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -136,13 +136,13 @@
 #define KPROBE_BLACKLIST()
 #endif
 
-#ifdef CONFIG_BPF_KPROBE_OVERRIDE
-#define ERROR_INJECT_LIST()	. = ALIGN(8);						\
-				VMLINUX_SYMBOL(__start_kprobe_error_inject_list) = .;	\
-				KEEP(*(_kprobe_error_inject_list))			\
-				VMLINUX_SYMBOL(__stop_kprobe_error_inject_list) = .;
+#ifdef CONFIG_FUNCTION_ERROR_INJECTION
+#define ERROR_INJECT_WHITELIST()	. = ALIGN(8);					\
+				VMLINUX_SYMBOL(__start_error_injection_whitelist) = .;	\
+				KEEP(*(_error_injection_whitelist))			\
+				VMLINUX_SYMBOL(__stop_error_injection_whitelist) = .;
 #else
-#define ERROR_INJECT_LIST()
+#define ERROR_INJECT_WHITELIST()
 #endif
 
 #ifdef CONFIG_EVENT_TRACING
@@ -573,7 +573,7 @@
 	FTRACE_EVENTS()							\
 	TRACE_SYSCALLS()						\
 	KPROBE_BLACKLIST()						\
-	ERROR_INJECT_LIST()						\
+	ERROR_INJECT_WHITELIST()					\
 	MEM_DISCARD(init.rodata)					\
 	CLK_OF_TABLES()							\
 	RESERVEDMEM_OF_TABLES()						\
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index da54ef644fcd..6426a6a81b3e 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -16,6 +16,7 @@
 #include <linux/rbtree_latch.h>
 #include <linux/numa.h>
 #include <linux/wait.h>
+#include <linux/error-injection.h>
 
 struct perf_event;
 struct bpf_prog;
@@ -583,15 +584,4 @@ extern const struct bpf_func_proto bpf_sock_map_update_proto;
 void bpf_user_rnd_init_once(void);
 u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
 
-#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
-#ifdef CONFIG_BPF_KPROBE_OVERRIDE
-#define BPF_ALLOW_ERROR_INJECTION(fname)				\
-static unsigned long __used						\
-	__attribute__((__section__("_kprobe_error_inject_list")))	\
-	_eil_addr_##fname = (unsigned long)fname;
-#else
-#define BPF_ALLOW_ERROR_INJECTION(fname)
-#endif
-#endif
-
 #endif /* _LINUX_BPF_H */
diff --git a/include/linux/error-injection.h b/include/linux/error-injection.h
new file mode 100644
index 000000000000..130a67c50dac
--- /dev/null
+++ b/include/linux/error-injection.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_ERROR_INJECTION_H
+#define _LINUX_ERROR_INJECTION_H
+
+#ifdef CONFIG_FUNCTION_ERROR_INJECTION
+
+#include <asm/error-injection.h>
+
+extern bool within_error_injection_list(unsigned long addr);
+
+#else /* !CONFIG_FUNCTION_ERROR_INJECTION */
+
+#include <asm-generic/error-injection.h>
+static inline bool within_error_injection_list(unsigned long addr)
+{
+	return false;
+}
+
+#endif
+
+#endif /* _LINUX_ERROR_INJECTION_H */
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 963fd364f3d6..9440a2fc8893 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -271,7 +271,6 @@ extern bool arch_kprobe_on_func_entry(unsigned long offset);
 extern bool kprobe_on_func_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset);
 
 extern bool within_kprobe_blacklist(unsigned long addr);
-extern bool within_kprobe_error_injection_list(unsigned long addr);
 
 struct kprobe_insn_cache {
 	struct mutex mutex;
diff --git a/include/linux/module.h b/include/linux/module.h
index 548fa09fa806..792e51d83bda 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -476,9 +476,9 @@ struct module {
 	unsigned int num_ctors;
 #endif
 
-#ifdef CONFIG_BPF_KPROBE_OVERRIDE
-	unsigned int num_kprobe_ei_funcs;
-	unsigned long *kprobe_ei_funcs;
+#ifdef CONFIG_FUNCTION_ERROR_INJECTION
+	unsigned int num_ei_funcs;
+	unsigned long *ei_funcs;
 #endif
 } ____cacheline_aligned __randomize_layout;
 #ifndef MODULE_ARCH_INIT
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index b4aab48ad258..da2ccf142358 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -83,16 +83,6 @@ static raw_spinlock_t *kretprobe_table_lock_ptr(unsigned long hash)
 	return &(kretprobe_table_locks[hash].lock);
 }
 
-/* List of symbols that can be overriden for error injection. */
-static LIST_HEAD(kprobe_error_injection_list);
-static DEFINE_MUTEX(kprobe_ei_mutex);
-struct kprobe_ei_entry {
-	struct list_head list;
-	unsigned long start_addr;
-	unsigned long end_addr;
-	void *priv;
-};
-
 /* Blacklist -- list of struct kprobe_blacklist_entry */
 static LIST_HEAD(kprobe_blacklist);
 
@@ -1404,17 +1394,6 @@ bool within_kprobe_blacklist(unsigned long addr)
 	return false;
 }
 
-bool within_kprobe_error_injection_list(unsigned long addr)
-{
-	struct kprobe_ei_entry *ent;
-
-	list_for_each_entry(ent, &kprobe_error_injection_list, list) {
-		if (addr >= ent->start_addr && addr < ent->end_addr)
-			return true;
-	}
-	return false;
-}
-
 /*
  * If we have a symbol_name argument, look it up and add the offset field
  * to it. This way, we can specify a relative address to a symbol.
@@ -2189,86 +2168,6 @@ static int __init populate_kprobe_blacklist(unsigned long *start,
 	return 0;
 }
 
-#ifdef CONFIG_BPF_KPROBE_OVERRIDE
-/* Markers of the _kprobe_error_inject_list section */
-extern unsigned long __start_kprobe_error_inject_list[];
-extern unsigned long __stop_kprobe_error_inject_list[];
-
-/*
- * Lookup and populate the kprobe_error_injection_list.
- *
- * For safety reasons we only allow certain functions to be overriden with
- * bpf_error_injection, so we need to populate the list of the symbols that have
- * been marked as safe for overriding.
- */
-static void populate_kprobe_error_injection_list(unsigned long *start,
-						 unsigned long *end,
-						 void *priv)
-{
-	unsigned long *iter;
-	struct kprobe_ei_entry *ent;
-	unsigned long entry, offset = 0, size = 0;
-
-	mutex_lock(&kprobe_ei_mutex);
-	for (iter = start; iter < end; iter++) {
-		entry = arch_deref_entry_point((void *)*iter);
-
-		if (!kernel_text_address(entry) ||
-		    !kallsyms_lookup_size_offset(entry, &size, &offset)) {
-			pr_err("Failed to find error inject entry at %p\n",
-				(void *)entry);
-			continue;
-		}
-
-		ent = kmalloc(sizeof(*ent), GFP_KERNEL);
-		if (!ent)
-			break;
-		ent->start_addr = entry;
-		ent->end_addr = entry + size;
-		ent->priv = priv;
-		INIT_LIST_HEAD(&ent->list);
-		list_add_tail(&ent->list, &kprobe_error_injection_list);
-	}
-	mutex_unlock(&kprobe_ei_mutex);
-}
-
-static void __init populate_kernel_kprobe_ei_list(void)
-{
-	populate_kprobe_error_injection_list(__start_kprobe_error_inject_list,
-					     __stop_kprobe_error_inject_list,
-					     NULL);
-}
-
-static void module_load_kprobe_ei_list(struct module *mod)
-{
-	if (!mod->num_kprobe_ei_funcs)
-		return;
-	populate_kprobe_error_injection_list(mod->kprobe_ei_funcs,
-					     mod->kprobe_ei_funcs +
-					     mod->num_kprobe_ei_funcs, mod);
-}
-
-static void module_unload_kprobe_ei_list(struct module *mod)
-{
-	struct kprobe_ei_entry *ent, *n;
-	if (!mod->num_kprobe_ei_funcs)
-		return;
-
-	mutex_lock(&kprobe_ei_mutex);
-	list_for_each_entry_safe(ent, n, &kprobe_error_injection_list, list) {
-		if (ent->priv == mod) {
-			list_del_init(&ent->list);
-			kfree(ent);
-		}
-	}
-	mutex_unlock(&kprobe_ei_mutex);
-}
-#else
-static inline void __init populate_kernel_kprobe_ei_list(void) {}
-static inline void module_load_kprobe_ei_list(struct module *m) {}
-static inline void module_unload_kprobe_ei_list(struct module *m) {}
-#endif
-
 /* Module notifier call back, checking kprobes on the module */
 static int kprobes_module_callback(struct notifier_block *nb,
 				   unsigned long val, void *data)
@@ -2279,11 +2178,6 @@ static int kprobes_module_callback(struct notifier_block *nb,
 	unsigned int i;
 	int checkcore = (val == MODULE_STATE_GOING);
 
-	if (val == MODULE_STATE_COMING)
-		module_load_kprobe_ei_list(mod);
-	else if (val == MODULE_STATE_GOING)
-		module_unload_kprobe_ei_list(mod);
-
 	if (val != MODULE_STATE_GOING && val != MODULE_STATE_LIVE)
 		return NOTIFY_DONE;
 
@@ -2346,8 +2240,6 @@ static int __init init_kprobes(void)
 		pr_err("Please take care of using kprobes.\n");
 	}
 
-	populate_kernel_kprobe_ei_list();
-
 	if (kretprobe_blacklist_size) {
 		/* lookup the function address from its name */
 		for (i = 0; kretprobe_blacklist[i].name != NULL; i++) {
@@ -2515,56 +2407,6 @@ static const struct file_operations debugfs_kprobe_blacklist_ops = {
 	.release        = seq_release,
 };
 
-/*
- * kprobes/error_injection_list -- shows which functions can be overriden for
- * error injection.
- * */
-static void *kprobe_ei_seq_start(struct seq_file *m, loff_t *pos)
-{
-	mutex_lock(&kprobe_ei_mutex);
-	return seq_list_start(&kprobe_error_injection_list, *pos);
-}
-
-static void kprobe_ei_seq_stop(struct seq_file *m, void *v)
-{
-	mutex_unlock(&kprobe_ei_mutex);
-}
-
-static void *kprobe_ei_seq_next(struct seq_file *m, void *v, loff_t *pos)
-{
-	return seq_list_next(v, &kprobe_error_injection_list, pos);
-}
-
-static int kprobe_ei_seq_show(struct seq_file *m, void *v)
-{
-	char buffer[KSYM_SYMBOL_LEN];
-	struct kprobe_ei_entry *ent =
-		list_entry(v, struct kprobe_ei_entry, list);
-
-	sprint_symbol(buffer, ent->start_addr);
-	seq_printf(m, "%s\n", buffer);
-	return 0;
-}
-
-static const struct seq_operations kprobe_ei_seq_ops = {
-	.start = kprobe_ei_seq_start,
-	.next  = kprobe_ei_seq_next,
-	.stop  = kprobe_ei_seq_stop,
-	.show  = kprobe_ei_seq_show,
-};
-
-static int kprobe_ei_open(struct inode *inode, struct file *filp)
-{
-	return seq_open(filp, &kprobe_ei_seq_ops);
-}
-
-static const struct file_operations debugfs_kprobe_ei_ops = {
-	.open           = kprobe_ei_open,
-	.read           = seq_read,
-	.llseek         = seq_lseek,
-	.release        = seq_release,
-};
-
 static void arm_all_kprobes(void)
 {
 	struct hlist_head *head;
@@ -2706,11 +2548,6 @@ static int __init debugfs_kprobe_init(void)
 	if (!file)
 		goto error;
 
-	file = debugfs_create_file("error_injection_list", 0444, dir, NULL,
-				  &debugfs_kprobe_ei_ops);
-	if (!file)
-		goto error;
-
 	return 0;
 
 error:
diff --git a/kernel/module.c b/kernel/module.c
index bd695bfdc5c4..588f86a1f9c3 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -3118,10 +3118,10 @@ static int find_module_sections(struct module *mod, struct load_info *info)
 					     sizeof(*mod->ftrace_callsites),
 					     &mod->num_ftrace_callsites);
 #endif
-#ifdef CONFIG_BPF_KPROBE_OVERRIDE
-	mod->kprobe_ei_funcs = section_objs(info, "_kprobe_error_inject_list",
-					    sizeof(*mod->kprobe_ei_funcs),
-					    &mod->num_kprobe_ei_funcs);
+#ifdef CONFIG_FUNCTION_ERROR_INJECT
+	mod->ei_funcs = section_objs(info, "_error_injection_whitelist",
+					    sizeof(*mod->ei_funcs),
+					    &mod->num_ei_funcs);
 #endif
 	mod->extable = section_objs(info, "__ex_table",
 				    sizeof(*mod->extable), &mod->num_exentries);
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 6400e1bf97c5..a356b8c1f830 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -533,7 +533,7 @@ config FUNCTION_PROFILER
 config BPF_KPROBE_OVERRIDE
 	bool "Enable BPF programs to override a kprobed function"
 	depends on BPF_EVENTS
-	depends on HAVE_KPROBE_OVERRIDE
+	depends on FUNCTION_ERROR_INJECT
 	default n
 	help
 	 Allows BPF to override the execution of a probed function and
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index cefa9b0e396c..36d40b2a1010 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -84,7 +84,7 @@ EXPORT_SYMBOL_GPL(trace_call_bpf);
 BPF_CALL_2(bpf_override_return, struct pt_regs *, regs, unsigned long, rc)
 {
 	regs_set_return_value(regs, rc);
-	arch_ftrace_kprobe_override_function(regs);
+	override_function_to_return(regs);
 	return 0;
 }
 
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index a7c7035963f2..23f88a062965 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -21,6 +21,7 @@
 #include <linux/module.h>
 #include <linux/uaccess.h>
 #include <linux/rculist.h>
+#include <linux/error-injection.h>
 
 #include "trace_probe.h"
 
@@ -106,7 +107,7 @@ bool trace_kprobe_error_injectable(struct trace_event_call *call)
 	} else {
 		addr = (unsigned long)tk->rp.kp.addr;
 	}
-	return within_kprobe_error_injection_list(addr);
+	return within_error_injection_list(addr);
 }
 
 static int register_kprobe_event(struct trace_kprobe *tk);
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 9d5b78aad4c5..fe88ac0f003c 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1500,6 +1500,10 @@ config FAULT_INJECTION
 	  Provide fault-injection framework.
 	  For more details, see Documentation/fault-injection/.
 
+config FUNCTION_ERROR_INJECTION
+	def_bool y
+	depends on HAVE_FUNCTION_ERROR_INJECTION
+
 config FAILSLAB
 	bool "Fault-injection capability for kmalloc"
 	depends on FAULT_INJECTION
diff --git a/lib/Makefile b/lib/Makefile
index a6c8529dd9b2..75ec13778cd8 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -149,6 +149,7 @@ obj-$(CONFIG_NETDEV_NOTIFIER_ERROR_INJECT) += netdev-notifier-error-inject.o
 obj-$(CONFIG_MEMORY_NOTIFIER_ERROR_INJECT) += memory-notifier-error-inject.o
 obj-$(CONFIG_OF_RECONFIG_NOTIFIER_ERROR_INJECT) += \
 	of-reconfig-notifier-error-inject.o
+obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
 
 lib-$(CONFIG_GENERIC_BUG) += bug.o
 
diff --git a/lib/error-inject.c b/lib/error-inject.c
new file mode 100644
index 000000000000..80c791d77593
--- /dev/null
+++ b/lib/error-inject.c
@@ -0,0 +1,200 @@
+// SPDX-License-Identifier: GPL-2.0
+// error-inject.c: Function-level error injection table
+#include <linux/error-injection.h>
+#include <linux/debugfs.h>
+#include <linux/kallsyms.h>
+#include <linux/kprobes.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+
+/* Whitelist of symbols that can be overridden for error injection. */
+static LIST_HEAD(error_injection_list);
+static DEFINE_MUTEX(ei_mutex);
+struct ei_entry {
+	struct list_head list;
+	unsigned long start_addr;
+	unsigned long end_addr;
+	void *priv;
+};
+
+bool within_error_injection_list(unsigned long addr)
+{
+	struct ei_entry *ent;
+
+	list_for_each_entry(ent, &error_injection_list, list) {
+		if (addr >= ent->start_addr && addr < ent->end_addr)
+			return true;
+	}
+	return false;
+}
+
+/*
+ * Lookup and populate the error_injection_list.
+ *
+ * For safety reasons we only allow certain functions to be overridden with
+ * bpf_error_injection, so we need to populate the list of the symbols that have
+ * been marked as safe for overriding.
+ */
+static void populate_error_injection_list(unsigned long *start,
+					  unsigned long *end, void *priv)
+{
+	unsigned long *iter;
+	struct ei_entry *ent;
+	unsigned long entry, offset = 0, size = 0;
+
+	mutex_lock(&ei_mutex);
+	for (iter = start; iter < end; iter++) {
+		entry = arch_deref_entry_point((void *)*iter);
+
+		if (!kernel_text_address(entry) ||
+		    !kallsyms_lookup_size_offset(entry, &size, &offset)) {
+			pr_err("Failed to find error inject entry at %p\n",
+				(void *)entry);
+			continue;
+		}
+
+		ent = kmalloc(sizeof(*ent), GFP_KERNEL);
+		if (!ent)
+			break;
+		ent->start_addr = entry;
+		ent->end_addr = entry + size;
+		ent->priv = priv;
+		INIT_LIST_HEAD(&ent->list);
+		list_add_tail(&ent->list, &error_injection_list);
+	}
+	mutex_unlock(&ei_mutex);
+}
+
+/* Markers of the _error_inject_whitelist section */
+extern unsigned long __start_error_injection_whitelist[];
+extern unsigned long __stop_error_injection_whitelist[];
+
+static void __init populate_kernel_ei_list(void)
+{
+	populate_error_injection_list(__start_error_injection_whitelist,
+				      __stop_error_injection_whitelist,
+				      NULL);
+}
+
+static void module_load_ei_list(struct module *mod)
+{
+	if (!mod->num_ei_funcs)
+		return;
+
+	populate_error_injection_list(mod->ei_funcs,
+				      mod->ei_funcs + mod->num_ei_funcs, mod);
+}
+
+static void module_unload_ei_list(struct module *mod)
+{
+	struct ei_entry *ent, *n;
+
+	if (!mod->num_ei_funcs)
+		return;
+
+	mutex_lock(&ei_mutex);
+	list_for_each_entry_safe(ent, n, &error_injection_list, list) {
+		if (ent->priv == mod) {
+			list_del_init(&ent->list);
+			kfree(ent);
+		}
+	}
+	mutex_unlock(&ei_mutex);
+}
+
+/* Module notifier call back, checking error injection table on the module */
+static int ei_module_callback(struct notifier_block *nb,
+			      unsigned long val, void *data)
+{
+	struct module *mod = data;
+
+	if (val == MODULE_STATE_COMING)
+		module_load_ei_list(mod);
+	else if (val == MODULE_STATE_GOING)
+		module_unload_ei_list(mod);
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block ei_module_nb = {
+	.notifier_call = ei_module_callback,
+	.priority = 0
+};
+
+/*
+ * error_injection/whitelist -- shows which functions can be overridden for
+ * error injection.
+ */
+static void *ei_seq_start(struct seq_file *m, loff_t *pos)
+{
+	mutex_lock(&ei_mutex);
+	return seq_list_start(&error_injection_list, *pos);
+}
+
+static void ei_seq_stop(struct seq_file *m, void *v)
+{
+	mutex_unlock(&ei_mutex);
+}
+
+static void *ei_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	return seq_list_next(v, &error_injection_list, pos);
+}
+
+static int ei_seq_show(struct seq_file *m, void *v)
+{
+	char buffer[KSYM_SYMBOL_LEN];
+	struct ei_entry *ent = list_entry(v, struct ei_entry, list);
+
+	sprint_symbol(buffer, ent->start_addr);
+	seq_printf(m, "%s\n", buffer);
+	return 0;
+}
+
+static const struct seq_operations ei_seq_ops = {
+	.start = ei_seq_start,
+	.next  = ei_seq_next,
+	.stop  = ei_seq_stop,
+	.show  = ei_seq_show,
+};
+
+static int ei_open(struct inode *inode, struct file *filp)
+{
+	return seq_open(filp, &ei_seq_ops);
+}
+
+static const struct file_operations debugfs_ei_ops = {
+	.open           = ei_open,
+	.read           = seq_read,
+	.llseek         = seq_lseek,
+	.release        = seq_release,
+};
+
+static int __init ei_debugfs_init(void)
+{
+	struct dentry *dir, *file;
+
+	dir = debugfs_create_dir("error_injection", NULL);
+	if (!dir)
+		return -ENOMEM;
+
+	file = debugfs_create_file("list", 0444, dir, NULL, &debugfs_ei_ops);
+	if (!file) {
+		debugfs_remove(dir);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static int __init init_error_injection(void)
+{
+	populate_kernel_ei_list();
+	if (!register_module_notifier(&ei_module_nb))
+		ei_debugfs_init();
+
+	return 0;
+}
+module_init(init_error_injection);

^ permalink raw reply related

* [RFC PATCH bpf-next 2/3] tracing/kprobe: bpf: Compare instruction pointer with original one
From: Masami Hiramatsu @ 2017-12-22 11:39 UTC (permalink / raw)
  To: Alexei Starovoitov, Josef Bacik
  Cc: rostedt, mingo, davem, netdev, linux-kernel, ast, kernel-team,
	daniel, linux-btrfs, darrick.wong, mhiramat, Josef Bacik
In-Reply-To: <151394269314.5598.6344820223613246849.stgit@devbox>

Compare instruction pointer with original one on the
stack instead using per-cpu bpf_kprobe_override flag.

This patch also consolidates reset_current_kprobe() and
preempt_enable_no_resched() blocks. Those can be done
in one place.

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
---
 kernel/trace/bpf_trace.c    |    1 -
 kernel/trace/trace_kprobe.c |   21 +++++++--------------
 2 files changed, 7 insertions(+), 15 deletions(-)

diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index d663660f8392..cefa9b0e396c 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -83,7 +83,6 @@ EXPORT_SYMBOL_GPL(trace_call_bpf);
 #ifdef CONFIG_BPF_KPROBE_OVERRIDE
 BPF_CALL_2(bpf_override_return, struct pt_regs *, regs, unsigned long, rc)
 {
-	__this_cpu_write(bpf_kprobe_override, 1);
 	regs_set_return_value(regs, rc);
 	arch_ftrace_kprobe_override_function(regs);
 	return 0;
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 265e3e27e8dc..a7c7035963f2 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -42,8 +42,6 @@ struct trace_kprobe {
 	(offsetof(struct trace_kprobe, tp.args) +	\
 	(sizeof(struct probe_arg) * (n)))
 
-DEFINE_PER_CPU(int, bpf_kprobe_override);
-
 static nokprobe_inline bool trace_kprobe_is_return(struct trace_kprobe *tk)
 {
 	return tk->rp.handler != NULL;
@@ -1204,6 +1202,7 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
 	int rctx;
 
 	if (bpf_prog_array_valid(call)) {
+		unsigned long orig_ip = instruction_pointer(regs);
 		int ret;
 
 		ret = trace_call_bpf(call, regs);
@@ -1211,12 +1210,13 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
 		/*
 		 * We need to check and see if we modified the pc of the
 		 * pt_regs, and if so clear the kprobe and return 1 so that we
-		 * don't do the instruction skipping.  Also reset our state so
-		 * we are clean the next pass through.
+		 * don't do the single stepping.
+		 * The ftrace kprobe handler leaves it up to us to re-enable
+		 * preemption here before returning if we've modified the ip.
 		 */
-		if (__this_cpu_read(bpf_kprobe_override)) {
-			__this_cpu_write(bpf_kprobe_override, 0);
+		if (orig_ip != instruction_pointer(regs)) {
 			reset_current_kprobe();
+			preempt_enable_no_resched();
 			return 1;
 		}
 		if (!ret)
@@ -1324,15 +1324,8 @@ static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
 	if (tk->tp.flags & TP_FLAG_TRACE)
 		kprobe_trace_func(tk, regs);
 #ifdef CONFIG_PERF_EVENTS
-	if (tk->tp.flags & TP_FLAG_PROFILE) {
+	if (tk->tp.flags & TP_FLAG_PROFILE)
 		ret = kprobe_perf_func(tk, regs);
-		/*
-		 * The ftrace kprobe handler leaves it up to us to re-enable
-		 * preemption here before returning if we've modified the ip.
-		 */
-		if (ret)
-			preempt_enable_no_resched();
-	}
 #endif
 	return ret;
 }


^ permalink raw reply related

* [RFC PATCH bpf-next 1/3] tracing/kprobe: bpf: Check error injectable event is on function entry
From: Masami Hiramatsu @ 2017-12-22 11:38 UTC (permalink / raw)
  To: Alexei Starovoitov, Josef Bacik
  Cc: rostedt, mingo, davem, netdev, linux-kernel, ast, kernel-team,
	daniel, linux-btrfs, darrick.wong, mhiramat, Josef Bacik
In-Reply-To: <151394269314.5598.6344820223613246849.stgit@devbox>

Check whether error injectable event is on function entry or not.
Currently it checks the event is ftrace-based kprobes or not,
but that is wrong. It should check if the event is on the entry
of target function. Since error injection will override a function
to just return with modified return value, that operation must
be done before the target function starts making stackframe.

As a side effect, bpf error injection is no need to depend on
function-tracer. It can work with sw-breakpoint based kprobe
events too.

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
---
 kernel/trace/Kconfig        |    2 --
 kernel/trace/bpf_trace.c    |    6 +++---
 kernel/trace/trace_kprobe.c |    8 +++++---
 kernel/trace/trace_probe.h  |   12 ++++++------
 4 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index ae3a2d519e50..6400e1bf97c5 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -533,9 +533,7 @@ config FUNCTION_PROFILER
 config BPF_KPROBE_OVERRIDE
 	bool "Enable BPF programs to override a kprobed function"
 	depends on BPF_EVENTS
-	depends on KPROBES_ON_FTRACE
 	depends on HAVE_KPROBE_OVERRIDE
-	depends on DYNAMIC_FTRACE_WITH_REGS
 	default n
 	help
 	 Allows BPF to override the execution of a probed function and
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index f6d2327ecb59..d663660f8392 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -800,11 +800,11 @@ int perf_event_attach_bpf_prog(struct perf_event *event,
 	int ret = -EEXIST;
 
 	/*
-	 * Kprobe override only works for ftrace based kprobes, and only if they
-	 * are on the opt-in list.
+	 * Kprobe override only works if they are on the function entry,
+	 * and only if they are on the opt-in list.
 	 */
 	if (prog->kprobe_override &&
-	    (!trace_kprobe_ftrace(event->tp_event) ||
+	    (!trace_kprobe_on_func_entry(event->tp_event) ||
 	     !trace_kprobe_error_injectable(event->tp_event)))
 		return -EINVAL;
 
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 91f4b57dab82..265e3e27e8dc 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -88,13 +88,15 @@ static nokprobe_inline unsigned long trace_kprobe_nhit(struct trace_kprobe *tk)
 	return nhit;
 }
 
-int trace_kprobe_ftrace(struct trace_event_call *call)
+bool trace_kprobe_on_func_entry(struct trace_event_call *call)
 {
 	struct trace_kprobe *tk = (struct trace_kprobe *)call->data;
-	return kprobe_ftrace(&tk->rp.kp);
+
+	return kprobe_on_func_entry(tk->rp.kp.addr, tk->rp.kp.symbol_name,
+				    tk->rp.kp.offset);
 }
 
-int trace_kprobe_error_injectable(struct trace_event_call *call)
+bool trace_kprobe_error_injectable(struct trace_event_call *call)
 {
 	struct trace_kprobe *tk = (struct trace_kprobe *)call->data;
 	unsigned long addr;
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index 5e54d748c84c..e101c5bb9eda 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -252,8 +252,8 @@ struct symbol_cache;
 unsigned long update_symbol_cache(struct symbol_cache *sc);
 void free_symbol_cache(struct symbol_cache *sc);
 struct symbol_cache *alloc_symbol_cache(const char *sym, long offset);
-int trace_kprobe_ftrace(struct trace_event_call *call);
-int trace_kprobe_error_injectable(struct trace_event_call *call);
+bool trace_kprobe_on_func_entry(struct trace_event_call *call);
+bool trace_kprobe_error_injectable(struct trace_event_call *call);
 #else
 /* uprobes do not support symbol fetch methods */
 #define fetch_symbol_u8			NULL
@@ -280,14 +280,14 @@ alloc_symbol_cache(const char *sym, long offset)
 	return NULL;
 }
 
-static inline int trace_kprobe_ftrace(struct trace_event_call *call)
+static inline bool trace_kprobe_on_func_entry(struct trace_event_call *call)
 {
-	return 0;
+	return false;
 }
 
-static inline int trace_kprobe_error_injectable(struct trace_event_call *call)
+static inline bool trace_kprobe_error_injectable(struct trace_event_call *call)
 {
-	return 0;
+	return false;
 }
 #endif /* CONFIG_KPROBE_EVENTS */
 

^ permalink raw reply related

* [RFC PATCH bpf-next 0/3] Separate error injection framework from kprobes
From: Masami Hiramatsu @ 2017-12-22 11:38 UTC (permalink / raw)
  To: Alexei Starovoitov, Josef Bacik
  Cc: rostedt, mingo, davem, netdev, linux-kernel, ast, kernel-team,
	daniel, linux-btrfs, darrick.wong, mhiramat, Josef Bacik

Hi Josef and Alexei,

Here are the patches which describe what I think more "natural"
introduction of error injection APIs. Basically what I did on
this series is to separate error injection from kprobes and put
it on new error-injection small subsystem which is currently
provide whitelists and just-return function stub.

There are 2 main reasons why I separate it from kprobes.

 - kprobes users can modify execution path not only at 
   error-injection whitelist functions but also other
   functions. I don't like to suggest user that such
   limitation is from kprobes itself.

 - This error injection information is also useful for
   ftrace (function-hook) and livepatch. It should not
   be limited by CONFIG_KPROBES.

So I introduced CONFIG_FUNCTION_ERROR_INJECTION for this feature.

This series also have some improvement suggestions.

 - [1/3] "kprobe override function" feature is not limited by
   ftrace-based kprobe, but also you can use it on sw-breakpoint
   based kprobe too. Also, you must check the kprobe is on the
   entry of function right before setting up the stackframe.

 - [2/3] If we store original instruction pointer and compare
   it with regs->ip, we don't need per-cpu bpf_kprobe_override.
   Also, reset_current_kprobe() and preempt_enable_no_resched()
   are no need to separate.

Any thoughts?

If it is good, I also add MAINTAINERS entry for this feature
and add some testcases using kprobes and ftrace to inject
error. (And maybe we also need a document how to use)

BTW, it seems there are many error injection frameworks in
lib/. We may also consider these distinctions.

Thank you,

---

Masami Hiramatsu (3):
      tracing/kprobe: bpf: Check error injectable event is on function entry
      tracing/kprobe: bpf: Compare instruction pointer with original one
      error-injection: Separate error-injection from kprobe


 arch/Kconfig                           |    2 
 arch/x86/Kconfig                       |    2 
 arch/x86/include/asm/error-injection.h |   12 ++
 arch/x86/kernel/kprobes/ftrace.c       |   14 --
 arch/x86/lib/Makefile                  |    2 
 arch/x86/lib/error-inject.c            |   19 +++
 fs/btrfs/disk-io.c                     |    2 
 fs/btrfs/free-space-cache.c            |    2 
 include/asm-generic/error-injection.h  |   20 +++
 include/asm-generic/vmlinux.lds.h      |   14 +-
 include/linux/bpf.h                    |   12 --
 include/linux/error-injection.h        |   21 +++
 include/linux/kprobes.h                |    1 
 include/linux/module.h                 |    6 -
 kernel/kprobes.c                       |  163 --------------------------
 kernel/module.c                        |    8 +
 kernel/trace/Kconfig                   |    4 -
 kernel/trace/bpf_trace.c               |    9 +
 kernel/trace/trace_kprobe.c            |   32 ++---
 kernel/trace/trace_probe.h             |   12 +-
 lib/Kconfig.debug                      |    4 +
 lib/Makefile                           |    1 
 lib/error-inject.c                     |  200 ++++++++++++++++++++++++++++++++
 23 files changed, 323 insertions(+), 239 deletions(-)
 create mode 100644 arch/x86/include/asm/error-injection.h
 create mode 100644 arch/x86/lib/error-inject.c
 create mode 100644 include/asm-generic/error-injection.h
 create mode 100644 include/linux/error-injection.h
 create mode 100644 lib/error-inject.c

^ permalink raw reply

* [PATCH 9/9] xfrm: update the stats documentation
From: Steffen Klassert @ 2017-12-22 11:24 UTC (permalink / raw)
  To: David Miller; +Cc: Herbert Xu, Steffen Klassert, netdev
In-Reply-To: <20171222112439.12476-1-steffen.klassert@secunet.com>

From: Shannon Nelson <shannon.nelson@oracle.com>

Add a couple of stats that aren't in the documentation file
and rework the top description to be a little more readable.

Signed-off-by: Shannon Nelson <shannon.nelson@oracle.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 Documentation/networking/xfrm_proc.txt | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/Documentation/networking/xfrm_proc.txt b/Documentation/networking/xfrm_proc.txt
index d0d8bafa9016..2eae619ab67b 100644
--- a/Documentation/networking/xfrm_proc.txt
+++ b/Documentation/networking/xfrm_proc.txt
@@ -5,13 +5,15 @@ Masahide NAKAMURA <nakam@linux-ipv6.org>
 
 Transformation Statistics
 -------------------------
-xfrm_proc is a statistics shown factor dropped by transformation
-for developer.
-It is a counter designed from current transformation source code
-and defined like linux private MIB.
 
-Inbound statistics
-~~~~~~~~~~~~~~~~~~
+The xfrm_proc code is a set of statistics showing numbers of packets
+dropped by the transformation code and why.  These counters are defined
+as part of the linux private MIB.  These counters can be viewed in
+/proc/net/xfrm_stat.
+
+
+Inbound errors
+~~~~~~~~~~~~~~
 XfrmInError:
 	All errors which is not matched others
 XfrmInBufferError:
@@ -46,6 +48,10 @@ XfrmInPolBlock:
 	Policy discards
 XfrmInPolError:
 	Policy error
+XfrmAcquireError:
+	State hasn't been fully acquired before use
+XfrmFwdHdrError:
+	Forward routing of a packet is not allowed
 
 Outbound errors
 ~~~~~~~~~~~~~~~
@@ -72,3 +78,5 @@ XfrmOutPolDead:
 	Policy is dead
 XfrmOutPolError:
 	Policy error
+XfrmOutStateInvalid:
+	State is invalid, perhaps expired
-- 
2.14.1

^ permalink raw reply related

* [PATCH 8/9] xfrm: wrap xfrmdev_ops with offload config
From: Steffen Klassert @ 2017-12-22 11:24 UTC (permalink / raw)
  To: David Miller; +Cc: Herbert Xu, Steffen Klassert, netdev
In-Reply-To: <20171222112439.12476-1-steffen.klassert@secunet.com>

From: Shannon Nelson <shannon.nelson@oracle.com>

There's no reason to define netdev->xfrmdev_ops if
the offload facility is not CONFIG'd in.

Signed-off-by: Shannon Nelson <shannon.nelson@oracle.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/linux/netdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c82d207ebc97..352066e4eeef 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1726,7 +1726,7 @@ struct net_device {
 	const struct ndisc_ops *ndisc_ops;
 #endif
 
-#ifdef CONFIG_XFRM
+#ifdef CONFIG_XFRM_OFFLOAD
 	const struct xfrmdev_ops *xfrmdev_ops;
 #endif
 
-- 
2.14.1

^ permalink raw reply related

* pull request (net-next): ipsec-next 2017-12-22
From: Steffen Klassert @ 2017-12-22 11:24 UTC (permalink / raw)
  To: David Miller; +Cc: Herbert Xu, Steffen Klassert, netdev

1) Separate ESP handling from segmentation for GRO packets.
   This unifies the IPsec GSO and non GSO codepath.

2) Add asynchronous callbacks for xfrm on layer 2. This
   adds the necessary infrastructure to core networking.

3) Allow to use the layer2 IPsec GSO codepath for software
   crypto, all infrastructure is there now.

4) Also allow IPsec GSO with software crypto for local sockets.

5) Don't require synchronous crypto fallback on IPsec offloading,
   it is not needed anymore.

6) Check for xdo_dev_state_free and only call it if implemented.
   From Shannon Nelson.

7) Check for the required add and delete functions when a driver
   registers xdo_dev_ops. From Shannon Nelson.

8) Define xfrmdev_ops only with offload config.
   From Shannon Nelson.

9) Update the xfrm stats documentation.
   From Shannon Nelson.

Please pull or let me know if there are problems.

Thanks!

The following changes since commit f39a5c01c3d24f2f61ec9d8c7d7e81f9aca506ce:

  Merge branch 'nfp-flower-add-Geneve-tunnel-support' (2017-12-19 14:52:13 -0500)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec-next.git master

for you to fetch changes up to 1a4bb1d14f7c0c4df418d08eb8e24d1c0e54b06a:

  xfrm: update the stats documentation (2017-12-22 06:45:48 +0100)

----------------------------------------------------------------
Shannon Nelson (4):
      xfrm: check for xdo_dev_state_free
      xfrm: check for xdo_dev_ops add and delete
      xfrm: wrap xfrmdev_ops with offload config
      xfrm: update the stats documentation

Steffen Klassert (5):
      xfrm: Separate ESP handling from segmentation for GRO packets.
      net: Add asynchronous callbacks for xfrm on layer 2.
      xfrm: Allow to use the layer2 IPsec GSO codepath for software crypto.
      xfrm: Allow IPsec GSO with software crypto for local sockets.
      esp: Don't require synchronous crypto fallback on offloading anymore.

 Documentation/networking/xfrm_proc.txt |  20 ++--
 include/linux/netdevice.h              |   8 +-
 include/net/xfrm.h                     |  29 ++++-
 net/core/dev.c                         |  19 ++--
 net/ipv4/esp4.c                        |  36 ++++---
 net/ipv4/esp4_offload.c                |  73 ++++---------
 net/ipv4/xfrm4_mode_tunnel.c           |   5 +-
 net/ipv6/esp6.c                        |  36 ++++---
 net/ipv6/esp6_offload.c                |  80 +++++---------
 net/ipv6/xfrm6_mode_tunnel.c           |   5 +-
 net/packet/af_packet.c                 |   3 +-
 net/sched/sch_generic.c                |  16 ++-
 net/xfrm/xfrm_device.c                 | 186 ++++++++++++++++++++++++++++-----
 13 files changed, 326 insertions(+), 190 deletions(-)

^ permalink raw reply

* [PATCH 2/9] net: Add asynchronous callbacks for xfrm on layer 2.
From: Steffen Klassert @ 2017-12-22 11:24 UTC (permalink / raw)
  To: David Miller; +Cc: Herbert Xu, Steffen Klassert, netdev
In-Reply-To: <20171222112439.12476-1-steffen.klassert@secunet.com>

This patch implements asynchronous crypto callbacks
and a backlog handler that can be used when IPsec
is done at layer 2 in the TX path. It also extends
the skb validate functions so that we can update
the driver transmit return codes based on async
crypto operation or to indicate that we queued the
packet in a backlog queue.

Joint work with: Aviv Heller <avivh@mellanox.com>

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/linux/netdevice.h |   6 ++-
 include/net/xfrm.h        |  22 ++++++++--
 net/core/dev.c            |  16 +++++---
 net/ipv4/esp4.c           |  24 +++++++++--
 net/ipv6/esp6.c           |  24 +++++++++--
 net/packet/af_packet.c    |   3 +-
 net/sched/sch_generic.c   |  16 +++++++-
 net/xfrm/xfrm_device.c    | 100 +++++++++++++++++++++++++++++++++++++---------
 8 files changed, 175 insertions(+), 36 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index cc4ce7456e38..c82d207ebc97 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2793,7 +2793,9 @@ struct softnet_data {
 	struct Qdisc		*output_queue;
 	struct Qdisc		**output_queue_tailp;
 	struct sk_buff		*completion_queue;
-
+#ifdef CONFIG_XFRM_OFFLOAD
+	struct sk_buff_head	xfrm_backlog;
+#endif
 #ifdef CONFIG_RPS
 	/* input_queue_head should be written by cpu owning this struct,
 	 * and only read by other cpus. Worth using a cache line.
@@ -3325,7 +3327,7 @@ int dev_get_phys_port_id(struct net_device *dev,
 int dev_get_phys_port_name(struct net_device *dev,
 			   char *name, size_t len);
 int dev_change_proto_down(struct net_device *dev, bool proto_down);
-struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev);
+struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev, bool *again);
 struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 				    struct netdev_queue *txq, int *ret);
 
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index df7f3d0ac4a1..2517c4f7781a 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1051,6 +1051,7 @@ struct xfrm_offload {
 #define	XFRM_GSO_SEGMENT	16
 #define	XFRM_GRO		32
 #define	XFRM_ESP_NO_TRAILER	64
+#define	XFRM_DEV_RESUME		128
 
 	__u32			status;
 #define CRYPTO_SUCCESS				1
@@ -1874,21 +1875,28 @@ static inline struct xfrm_state *xfrm_input_state(struct sk_buff *skb)
 {
 	return skb->sp->xvec[skb->sp->len - 1];
 }
+#endif
+
 static inline struct xfrm_offload *xfrm_offload(struct sk_buff *skb)
 {
+#ifdef CONFIG_XFRM
 	struct sec_path *sp = skb->sp;
 
 	if (!sp || !sp->olen || sp->len != sp->olen)
 		return NULL;
 
 	return &sp->ovec[sp->olen - 1];
-}
+#else
+	return NULL;
 #endif
+}
 
 void __net_init xfrm_dev_init(void);
 
 #ifdef CONFIG_XFRM_OFFLOAD
-struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features);
+void xfrm_dev_resume(struct sk_buff *skb);
+void xfrm_dev_backlog(struct softnet_data *sd);
+struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features, bool *again);
 int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
 		       struct xfrm_user_offload *xuo);
 bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x);
@@ -1929,7 +1937,15 @@ static inline void xfrm_dev_state_free(struct xfrm_state *x)
 	}
 }
 #else
-static inline struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features)
+static inline void xfrm_dev_resume(struct sk_buff *skb)
+{
+}
+
+static inline void xfrm_dev_backlog(struct softnet_data *sd)
+{
+}
+
+static inline struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features, bool *again)
 {
 	return skb;
 }
diff --git a/net/core/dev.c b/net/core/dev.c
index fb7a24a373d1..821dd8cb7169 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3059,7 +3059,7 @@ int skb_csum_hwoffload_help(struct sk_buff *skb,
 }
 EXPORT_SYMBOL(skb_csum_hwoffload_help);
 
-static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev)
+static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev, bool *again)
 {
 	netdev_features_t features;
 
@@ -3099,7 +3099,7 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device
 		}
 	}
 
-	skb = validate_xmit_xfrm(skb, features);
+	skb = validate_xmit_xfrm(skb, features, again);
 
 	return skb;
 
@@ -3110,7 +3110,7 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device
 	return NULL;
 }
 
-struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev)
+struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev, bool *again)
 {
 	struct sk_buff *next, *head = NULL, *tail;
 
@@ -3121,7 +3121,7 @@ struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *d
 		/* in case skb wont be segmented, point to itself */
 		skb->prev = skb;
 
-		skb = validate_xmit_skb(skb, dev);
+		skb = validate_xmit_skb(skb, dev, again);
 		if (!skb)
 			continue;
 
@@ -3448,6 +3448,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
 	struct netdev_queue *txq;
 	struct Qdisc *q;
 	int rc = -ENOMEM;
+	bool again = false;
 
 	skb_reset_mac_header(skb);
 
@@ -3509,7 +3510,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
 				     XMIT_RECURSION_LIMIT))
 				goto recursion_alert;
 
-			skb = validate_xmit_skb(skb, dev);
+			skb = validate_xmit_skb(skb, dev, &again);
 			if (!skb)
 				goto out;
 
@@ -4193,6 +4194,8 @@ static __latent_entropy void net_tx_action(struct softirq_action *h)
 				spin_unlock(root_lock);
 		}
 	}
+
+	xfrm_dev_backlog(sd);
 }
 
 #if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_ATM_LANE)
@@ -8874,6 +8877,9 @@ static int __init net_dev_init(void)
 
 		skb_queue_head_init(&sd->input_pkt_queue);
 		skb_queue_head_init(&sd->process_queue);
+#ifdef CONFIG_XFRM_OFFLOAD
+		skb_queue_head_init(&sd->xfrm_backlog);
+#endif
 		INIT_LIST_HEAD(&sd->poll_list);
 		sd->output_queue_tailp = &sd->output_queue;
 #ifdef CONFIG_RPS
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index d57aa64fa7c7..7948833dc204 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -121,14 +121,32 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp)
 static void esp_output_done(struct crypto_async_request *base, int err)
 {
 	struct sk_buff *skb = base->data;
+	struct xfrm_offload *xo = xfrm_offload(skb);
 	void *tmp;
-	struct dst_entry *dst = skb_dst(skb);
-	struct xfrm_state *x = dst->xfrm;
+	struct xfrm_state *x;
+
+	if (xo && (xo->flags & XFRM_DEV_RESUME))
+		x = skb->sp->xvec[skb->sp->len - 1];
+	else
+		x = skb_dst(skb)->xfrm;
 
 	tmp = ESP_SKB_CB(skb)->tmp;
 	esp_ssg_unref(x, tmp);
 	kfree(tmp);
-	xfrm_output_resume(skb, err);
+
+	if (xo && (xo->flags & XFRM_DEV_RESUME)) {
+		if (err) {
+			XFRM_INC_STATS(xs_net(x), LINUX_MIB_XFRMOUTSTATEPROTOERROR);
+			kfree_skb(skb);
+			return;
+		}
+
+		skb_push(skb, skb->data - skb_mac_header(skb));
+		secpath_reset(skb);
+		xfrm_dev_resume(skb);
+	} else {
+		xfrm_output_resume(skb, err);
+	}
 }
 
 /* Move ESP header back into place. */
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index a902ff8f59be..08a424fa8009 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -141,14 +141,32 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp)
 static void esp_output_done(struct crypto_async_request *base, int err)
 {
 	struct sk_buff *skb = base->data;
+	struct xfrm_offload *xo = xfrm_offload(skb);
 	void *tmp;
-	struct dst_entry *dst = skb_dst(skb);
-	struct xfrm_state *x = dst->xfrm;
+	struct xfrm_state *x;
+
+	if (xo && (xo->flags & XFRM_DEV_RESUME))
+		x = skb->sp->xvec[skb->sp->len - 1];
+	else
+		x = skb_dst(skb)->xfrm;
 
 	tmp = ESP_SKB_CB(skb)->tmp;
 	esp_ssg_unref(x, tmp);
 	kfree(tmp);
-	xfrm_output_resume(skb, err);
+
+	if (xo && (xo->flags & XFRM_DEV_RESUME)) {
+		if (err) {
+			XFRM_INC_STATS(xs_net(x), LINUX_MIB_XFRMOUTSTATEPROTOERROR);
+			kfree_skb(skb);
+			return;
+		}
+
+		skb_push(skb, skb->data - skb_mac_header(skb));
+		secpath_reset(skb);
+		xfrm_dev_resume(skb);
+	} else {
+		xfrm_output_resume(skb, err);
+	}
 }
 
 /* Move ESP header back into place. */
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index da215e5c1399..ee7aa0ba3a67 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -247,12 +247,13 @@ static int packet_direct_xmit(struct sk_buff *skb)
 	struct sk_buff *orig_skb = skb;
 	struct netdev_queue *txq;
 	int ret = NETDEV_TX_BUSY;
+	bool again = false;
 
 	if (unlikely(!netif_running(dev) ||
 		     !netif_carrier_ok(dev)))
 		goto drop;
 
-	skb = validate_xmit_skb_list(skb, dev);
+	skb = validate_xmit_skb_list(skb, dev, &again);
 	if (skb != orig_skb)
 		goto drop;
 
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 876fab2604b8..f9a8761f0ff2 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -32,6 +32,7 @@
 #include <net/pkt_sched.h>
 #include <net/dst.h>
 #include <trace/events/qdisc.h>
+#include <net/xfrm.h>
 
 /* Qdisc to use by default */
 const struct Qdisc_ops *default_qdisc_ops = &pfifo_fast_ops;
@@ -230,6 +231,8 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate,
 
 		/* skb in gso_skb were already validated */
 		*validate = false;
+		if (xfrm_offload(skb))
+			*validate = true;
 		/* check the reason of requeuing without tx lock first */
 		txq = skb_get_tx_queue(txq->dev, skb);
 		if (!netif_xmit_frozen_or_stopped(txq)) {
@@ -285,6 +288,7 @@ bool sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
 		     spinlock_t *root_lock, bool validate)
 {
 	int ret = NETDEV_TX_BUSY;
+	bool again = false;
 
 	/* And release qdisc */
 	if (root_lock)
@@ -292,7 +296,17 @@ bool sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
 
 	/* Note that we validate skb (GSO, checksum, ...) outside of locks */
 	if (validate)
-		skb = validate_xmit_skb_list(skb, dev);
+		skb = validate_xmit_skb_list(skb, dev, &again);
+
+#ifdef CONFIG_XFRM_OFFLOAD
+	if (unlikely(again)) {
+		if (root_lock)
+			spin_lock(root_lock);
+
+		dev_requeue_skb(skb, q);
+		return false;
+	}
+#endif
 
 	if (likely(skb)) {
 		HARD_TX_LOCK(dev, txq, smp_processor_id());
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index a5a7a716c465..fc8ab9f71127 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -23,12 +23,13 @@
 #include <linux/notifier.h>
 
 #ifdef CONFIG_XFRM_OFFLOAD
-struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features)
+struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features, bool *again)
 {
 	int err;
-	__u32 seq;
+	unsigned long flags;
 	struct xfrm_state *x;
 	struct sk_buff *skb2;
+	struct softnet_data *sd;
 	netdev_features_t esp_features = features;
 	struct xfrm_offload *xo = xfrm_offload(skb);
 
@@ -42,6 +43,16 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur
 	if (xo->flags & XFRM_GRO || x->xso.flags & XFRM_OFFLOAD_INBOUND)
 		return skb;
 
+	local_irq_save(flags);
+	sd = this_cpu_ptr(&softnet_data);
+	err = !skb_queue_empty(&sd->xfrm_backlog);
+	local_irq_restore(flags);
+
+	if (err) {
+		*again = true;
+		return skb;
+	}
+
 	if (skb_is_gso(skb)) {
 		struct net_device *dev = skb->dev;
 
@@ -54,23 +65,26 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur
 
 			segs = skb_gso_segment(skb, esp_features);
 			if (IS_ERR(segs)) {
-				XFRM_INC_STATS(xs_net(x), LINUX_MIB_XFRMOUTSTATEPROTOERROR);
 				kfree_skb(skb);
+				atomic_long_inc(&dev->tx_dropped);
 				return NULL;
 			} else {
 				consume_skb(skb);
 				skb = segs;
 			}
-		} else {
-			return skb;
 		}
 	}
 
 	if (!skb->next) {
 		x->outer_mode->xmit(x, skb);
 
+		xo->flags |= XFRM_DEV_RESUME;
+
 		err = x->type_offload->xmit(x, skb, esp_features);
 		if (err) {
+			if (err == -EINPROGRESS)
+				return NULL;
+
 			XFRM_INC_STATS(xs_net(x), LINUX_MIB_XFRMOUTSTATEPROTOERROR);
 			kfree_skb(skb);
 			return NULL;
@@ -82,36 +96,37 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur
 	}
 
 	skb2 = skb;
-	seq = xo->seq.low;
 
 	do {
 		struct sk_buff *nskb = skb2->next;
+		skb2->next = NULL;
 
 		xo = xfrm_offload(skb2);
-		xo->flags |= XFRM_GSO_SEGMENT;
-		xo->seq.low = seq;
-		xo->seq.hi = xfrm_replay_seqhi(x, seq);
-
-		if(!(features & NETIF_F_HW_ESP))
-			xo->flags |= CRYPTO_FALLBACK;
+		xo->flags |= XFRM_DEV_RESUME;
 
 		x->outer_mode->xmit(x, skb2);
 
 		err = x->type_offload->xmit(x, skb2, esp_features);
-		if (err) {
+		if (!err) {
+			skb2->next = nskb;
+		} else if (err != -EINPROGRESS) {
 			XFRM_INC_STATS(xs_net(x), LINUX_MIB_XFRMOUTSTATEPROTOERROR);
 			skb2->next = nskb;
 			kfree_skb_list(skb2);
 			return NULL;
-		}
+		} else {
+			if (skb == skb2)
+				skb = nskb;
+
+			if (!skb)
+				return NULL;
 
-		if (!skb_is_gso(skb2))
-			seq++;
-		else
-			seq += skb_shinfo(skb2)->gso_segs;
+			goto skip_push;
+		}
 
 		skb_push(skb2, skb2->data - skb_mac_header(skb2));
 
+skip_push:
 		skb2 = nskb;
 	} while (skb2);
 
@@ -207,6 +222,55 @@ bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x)
 	return true;
 }
 EXPORT_SYMBOL_GPL(xfrm_dev_offload_ok);
+
+void xfrm_dev_resume(struct sk_buff *skb)
+{
+	struct net_device *dev = skb->dev;
+	int ret = NETDEV_TX_BUSY;
+	struct netdev_queue *txq;
+	struct softnet_data *sd;
+	unsigned long flags;
+
+	rcu_read_lock();
+	txq = netdev_pick_tx(dev, skb, NULL);
+
+	HARD_TX_LOCK(dev, txq, smp_processor_id());
+	if (!netif_xmit_frozen_or_stopped(txq))
+		skb = dev_hard_start_xmit(skb, dev, txq, &ret);
+	HARD_TX_UNLOCK(dev, txq);
+
+	if (!dev_xmit_complete(ret)) {
+		local_irq_save(flags);
+		sd = this_cpu_ptr(&softnet_data);
+		skb_queue_tail(&sd->xfrm_backlog, skb);
+		raise_softirq_irqoff(NET_TX_SOFTIRQ);
+		local_irq_restore(flags);
+	}
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(xfrm_dev_resume);
+
+void xfrm_dev_backlog(struct softnet_data *sd)
+{
+	struct sk_buff_head *xfrm_backlog = &sd->xfrm_backlog;
+	struct sk_buff_head list;
+	struct sk_buff *skb;
+
+	if (skb_queue_empty(xfrm_backlog))
+		return;
+
+	__skb_queue_head_init(&list);
+
+	spin_lock(&xfrm_backlog->lock);
+	skb_queue_splice_init(xfrm_backlog, &list);
+	spin_unlock(&xfrm_backlog->lock);
+
+	while (!skb_queue_empty(&list)) {
+		skb = __skb_dequeue(&list);
+		xfrm_dev_resume(skb);
+	}
+
+}
 #endif
 
 static int xfrm_dev_register(struct net_device *dev)
-- 
2.14.1

^ permalink raw reply related

* [PATCH 6/9] xfrm: check for xdo_dev_state_free
From: Steffen Klassert @ 2017-12-22 11:24 UTC (permalink / raw)
  To: David Miller; +Cc: Herbert Xu, Steffen Klassert, netdev
In-Reply-To: <20171222112439.12476-1-steffen.klassert@secunet.com>

From: Shannon Nelson <shannon.nelson@oracle.com>

The current XFRM code assumes that we've implemented the
xdo_dev_state_free() callback, even if it is meaningless to the driver.
This patch adds a check for it before calling, as done in other APIs,
to prevent a NULL function pointer kernel crash.

Signed-off-by: Shannon Nelson <shannon.nelson@oracle.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/xfrm.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 357764a2bb4e..079ea9455bcd 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1933,7 +1933,8 @@ static inline void xfrm_dev_state_free(struct xfrm_state *x)
 	 struct net_device *dev = xso->dev;
 
 	if (dev && dev->xfrmdev_ops) {
-		dev->xfrmdev_ops->xdo_dev_state_free(x);
+		if (dev->xfrmdev_ops->xdo_dev_state_free)
+			dev->xfrmdev_ops->xdo_dev_state_free(x);
 		xso->dev = NULL;
 		dev_put(dev);
 	}
-- 
2.14.1

^ permalink raw reply related

* [PATCH 7/9] xfrm: check for xdo_dev_ops add and delete
From: Steffen Klassert @ 2017-12-22 11:24 UTC (permalink / raw)
  To: David Miller; +Cc: Herbert Xu, Steffen Klassert, netdev
In-Reply-To: <20171222112439.12476-1-steffen.klassert@secunet.com>

From: Shannon Nelson <shannon.nelson@oracle.com>

This adds a check for the required add and delete functions up front
at registration time to be sure both are defined.

Since both the features check and the registration check are looking
at the same things, break out the check for both to call.

Lastly, for some reason the feature check was setting xfrmdev_ops to
NULL if the NETIF_F_HW_ESP bit was missing, which would probably
surprise the driver later if the driver turned its NETIF_F_HW_ESP bit
back on.  We shouldn't be messing with the driver's callback list, so
we stop doing that with this patch.

Signed-off-by: Shannon Nelson <shannon.nelson@oracle.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_device.c | 31 ++++++++++++++++++-------------
 1 file changed, 18 insertions(+), 13 deletions(-)

diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 20a96181867a..75982506617b 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -273,17 +273,31 @@ void xfrm_dev_backlog(struct softnet_data *sd)
 }
 #endif
 
-static int xfrm_dev_register(struct net_device *dev)
+static int xfrm_api_check(struct net_device *dev)
 {
-	if ((dev->features & NETIF_F_HW_ESP) && !dev->xfrmdev_ops)
-		return NOTIFY_BAD;
+#ifdef CONFIG_XFRM_OFFLOAD
 	if ((dev->features & NETIF_F_HW_ESP_TX_CSUM) &&
 	    !(dev->features & NETIF_F_HW_ESP))
 		return NOTIFY_BAD;
 
+	if ((dev->features & NETIF_F_HW_ESP) &&
+	    (!(dev->xfrmdev_ops &&
+	       dev->xfrmdev_ops->xdo_dev_state_add &&
+	       dev->xfrmdev_ops->xdo_dev_state_delete)))
+		return NOTIFY_BAD;
+#else
+	if (dev->features & (NETIF_F_HW_ESP | NETIF_F_HW_ESP_TX_CSUM))
+		return NOTIFY_BAD;
+#endif
+
 	return NOTIFY_DONE;
 }
 
+static int xfrm_dev_register(struct net_device *dev)
+{
+	return xfrm_api_check(dev);
+}
+
 static int xfrm_dev_unregister(struct net_device *dev)
 {
 	xfrm_policy_cache_flush();
@@ -292,16 +306,7 @@ static int xfrm_dev_unregister(struct net_device *dev)
 
 static int xfrm_dev_feat_change(struct net_device *dev)
 {
-	if ((dev->features & NETIF_F_HW_ESP) && !dev->xfrmdev_ops)
-		return NOTIFY_BAD;
-	else if (!(dev->features & NETIF_F_HW_ESP))
-		dev->xfrmdev_ops = NULL;
-
-	if ((dev->features & NETIF_F_HW_ESP_TX_CSUM) &&
-	    !(dev->features & NETIF_F_HW_ESP))
-		return NOTIFY_BAD;
-
-	return NOTIFY_DONE;
+	return xfrm_api_check(dev);
 }
 
 static int xfrm_dev_down(struct net_device *dev)
-- 
2.14.1

^ permalink raw reply related

* [PATCH 1/9] xfrm: Separate ESP handling from segmentation for GRO packets.
From: Steffen Klassert @ 2017-12-22 11:24 UTC (permalink / raw)
  To: David Miller; +Cc: Herbert Xu, Steffen Klassert, netdev
In-Reply-To: <20171222112439.12476-1-steffen.klassert@secunet.com>

We change the ESP GSO handlers to only segment the packets.
The ESP handling and encryption is defered to validate_xmit_xfrm()
where this is done for non GRO packets too. This makes the code
more robust and prepares for asynchronous crypto handling.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/xfrm.h           |  6 +--
 net/core/dev.c               |  5 +--
 net/ipv4/esp4_offload.c      | 73 +++++++++++--------------------------
 net/ipv4/xfrm4_mode_tunnel.c |  5 +--
 net/ipv6/esp6_offload.c      | 80 ++++++++++++----------------------------
 net/ipv6/xfrm6_mode_tunnel.c |  5 +--
 net/xfrm/xfrm_device.c       | 87 +++++++++++++++++++++++++++++++++++++++-----
 7 files changed, 129 insertions(+), 132 deletions(-)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 1ec0c4760646..df7f3d0ac4a1 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1888,7 +1888,7 @@ static inline struct xfrm_offload *xfrm_offload(struct sk_buff *skb)
 void __net_init xfrm_dev_init(void);
 
 #ifdef CONFIG_XFRM_OFFLOAD
-int validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features);
+struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features);
 int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
 		       struct xfrm_user_offload *xuo);
 bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x);
@@ -1929,9 +1929,9 @@ static inline void xfrm_dev_state_free(struct xfrm_state *x)
 	}
 }
 #else
-static inline int validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features)
+static inline struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features)
 {
-	return 0;
+	return skb;
 }
 
 static inline int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, struct xfrm_user_offload *xuo)
diff --git a/net/core/dev.c b/net/core/dev.c
index c7db39926769..fb7a24a373d1 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3083,9 +3083,6 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device
 		    __skb_linearize(skb))
 			goto out_kfree_skb;
 
-		if (validate_xmit_xfrm(skb, features))
-			goto out_kfree_skb;
-
 		/* If packet is not checksummed and device does not
 		 * support checksumming for this protocol, complete
 		 * checksumming here.
@@ -3102,6 +3099,8 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device
 		}
 	}
 
+	skb = validate_xmit_xfrm(skb, features);
+
 	return skb;
 
 out_kfree_skb:
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index f8b918c766b0..c359f3cfeec3 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -108,75 +108,36 @@ static void esp4_gso_encap(struct xfrm_state *x, struct sk_buff *skb)
 static struct sk_buff *esp4_gso_segment(struct sk_buff *skb,
 				        netdev_features_t features)
 {
-	__u32 seq;
-	int err = 0;
-	struct sk_buff *skb2;
 	struct xfrm_state *x;
 	struct ip_esp_hdr *esph;
 	struct crypto_aead *aead;
-	struct sk_buff *segs = ERR_PTR(-EINVAL);
 	netdev_features_t esp_features = features;
 	struct xfrm_offload *xo = xfrm_offload(skb);
 
 	if (!xo)
-		goto out;
-
-	seq = xo->seq.low;
+		return ERR_PTR(-EINVAL);
 
 	x = skb->sp->xvec[skb->sp->len - 1];
 	aead = x->data;
 	esph = ip_esp_hdr(skb);
 
 	if (esph->spi != x->id.spi)
-		goto out;
+		return ERR_PTR(-EINVAL);
 
 	if (!pskb_may_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead)))
-		goto out;
+		return ERR_PTR(-EINVAL);
 
 	__skb_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead));
 
 	skb->encap_hdr_csum = 1;
 
-	if (!(features & NETIF_F_HW_ESP))
+	if (!(features & NETIF_F_HW_ESP) || !x->xso.offload_handle ||
+	    (x->xso.dev != skb->dev))
 		esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK);
 
-	segs = x->outer_mode->gso_segment(x, skb, esp_features);
-	if (IS_ERR_OR_NULL(segs))
-		goto out;
-
-	__skb_pull(skb, skb->data - skb_mac_header(skb));
-
-	skb2 = segs;
-	do {
-		struct sk_buff *nskb = skb2->next;
-
-		xo = xfrm_offload(skb2);
-		xo->flags |= XFRM_GSO_SEGMENT;
-		xo->seq.low = seq;
-		xo->seq.hi = xfrm_replay_seqhi(x, seq);
+	xo->flags |= XFRM_GSO_SEGMENT;
 
-		if(!(features & NETIF_F_HW_ESP))
-			xo->flags |= CRYPTO_FALLBACK;
-
-		x->outer_mode->xmit(x, skb2);
-
-		err = x->type_offload->xmit(x, skb2, esp_features);
-		if (err) {
-			kfree_skb_list(segs);
-			return ERR_PTR(err);
-		}
-
-		if (!skb_is_gso(skb2))
-			seq++;
-		else
-			seq += skb_shinfo(skb2)->gso_segs;
-
-		skb_push(skb2, skb2->mac_len);
-		skb2 = nskb;
-	} while (skb2);
-
-out:
-	return segs;
+	return x->outer_mode->gso_segment(x, skb, esp_features);
 }
 
 static int esp_input_tail(struct xfrm_state *x, struct sk_buff *skb)
@@ -203,6 +164,7 @@ static int esp_xmit(struct xfrm_state *x, struct sk_buff *skb,  netdev_features_
 	struct crypto_aead *aead;
 	struct esp_info esp;
 	bool hw_offload = true;
+	__u32 seq;
 
 	esp.inplace = true;
 
@@ -241,23 +203,30 @@ static int esp_xmit(struct xfrm_state *x, struct sk_buff *skb,  netdev_features_
 			return esp.nfrags;
 	}
 
+	seq = xo->seq.low;
+
 	esph = esp.esph;
 	esph->spi = x->id.spi;
 
 	skb_push(skb, -skb_network_offset(skb));
 
 	if (xo->flags & XFRM_GSO_SEGMENT) {
-		esph->seq_no = htonl(xo->seq.low);
-	} else {
-		ip_hdr(skb)->tot_len = htons(skb->len);
-		ip_send_check(ip_hdr(skb));
+		esph->seq_no = htonl(seq);
+
+		if (!skb_is_gso(skb))
+			xo->seq.low++;
+		else
+			xo->seq.low += skb_shinfo(skb)->gso_segs;
 	}
 
+	esp.seqno = cpu_to_be64(seq + ((u64)xo->seq.hi << 32));
+
+	ip_hdr(skb)->tot_len = htons(skb->len);
+	ip_send_check(ip_hdr(skb));
+
 	if (hw_offload)
 		return 0;
 
-	esp.seqno = cpu_to_be64(xo->seq.low + ((u64)xo->seq.hi << 32));
-
 	err = esp_output_tail(x, skb, &esp);
 	if (err)
 		return err;
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index 7d885a44dc9d..8affc6d83d58 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -105,18 +105,15 @@ static struct sk_buff *xfrm4_mode_tunnel_gso_segment(struct xfrm_state *x,
 {
 	__skb_push(skb, skb->mac_len);
 	return skb_mac_gso_segment(skb, features);
-
 }
 
 static void xfrm4_mode_tunnel_xmit(struct xfrm_state *x, struct sk_buff *skb)
 {
 	struct xfrm_offload *xo = xfrm_offload(skb);
 
-	if (xo->flags & XFRM_GSO_SEGMENT) {
-		skb->network_header = skb->network_header - x->props.header_len;
+	if (xo->flags & XFRM_GSO_SEGMENT)
 		skb->transport_header = skb->network_header +
 					sizeof(struct iphdr);
-	}
 
 	skb_reset_mac_len(skb);
 	pskb_pull(skb, skb->mac_len + x->props.header_len);
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index 333a478aa161..0bb7d54cf2cb 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -135,75 +135,36 @@ static void esp6_gso_encap(struct xfrm_state *x, struct sk_buff *skb)
 static struct sk_buff *esp6_gso_segment(struct sk_buff *skb,
 				        netdev_features_t features)
 {
-	__u32 seq;
-	int err = 0;
-	struct sk_buff *skb2;
 	struct xfrm_state *x;
 	struct ip_esp_hdr *esph;
 	struct crypto_aead *aead;
-	struct sk_buff *segs = ERR_PTR(-EINVAL);
 	netdev_features_t esp_features = features;
 	struct xfrm_offload *xo = xfrm_offload(skb);
 
 	if (!xo)
-		goto out;
-
-	seq = xo->seq.low;
+		return ERR_PTR(-EINVAL);
 
 	x = skb->sp->xvec[skb->sp->len - 1];
 	aead = x->data;
 	esph = ip_esp_hdr(skb);
 
 	if (esph->spi != x->id.spi)
-		goto out;
+		return ERR_PTR(-EINVAL);
 
 	if (!pskb_may_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead)))
-		goto out;
+		return ERR_PTR(-EINVAL);
 
 	__skb_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead));
 
 	skb->encap_hdr_csum = 1;
 
-	if (!(features & NETIF_F_HW_ESP))
+	if (!(features & NETIF_F_HW_ESP) || !x->xso.offload_handle ||
+	    (x->xso.dev != skb->dev))
 		esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK);
 
-	segs = x->outer_mode->gso_segment(x, skb, esp_features);
-	if (IS_ERR_OR_NULL(segs))
-		goto out;
-
-	__skb_pull(skb, skb->data - skb_mac_header(skb));
-
-	skb2 = segs;
-	do {
-		struct sk_buff *nskb = skb2->next;
-
-		xo = xfrm_offload(skb2);
-		xo->flags |= XFRM_GSO_SEGMENT;
-		xo->seq.low = seq;
-		xo->seq.hi = xfrm_replay_seqhi(x, seq);
-
-		if(!(features & NETIF_F_HW_ESP))
-			xo->flags |= CRYPTO_FALLBACK;
-
-		x->outer_mode->xmit(x, skb2);
-
-		err = x->type_offload->xmit(x, skb2, esp_features);
-		if (err) {
-			kfree_skb_list(segs);
-			return ERR_PTR(err);
-		}
-
-		if (!skb_is_gso(skb2))
-			seq++;
-		else
-			seq += skb_shinfo(skb2)->gso_segs;
-
-		skb_push(skb2, skb2->mac_len);
-		skb2 = nskb;
-	} while (skb2);
+	xo->flags |= XFRM_GSO_SEGMENT;
 
-out:
-	return segs;
+	return x->outer_mode->gso_segment(x, skb, esp_features);
 }
 
 static int esp6_input_tail(struct xfrm_state *x, struct sk_buff *skb)
@@ -222,6 +183,7 @@ static int esp6_input_tail(struct xfrm_state *x, struct sk_buff *skb)
 
 static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb,  netdev_features_t features)
 {
+	int len;
 	int err;
 	int alen;
 	int blksize;
@@ -230,6 +192,7 @@ static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb,  netdev_features
 	struct crypto_aead *aead;
 	struct esp_info esp;
 	bool hw_offload = true;
+	__u32 seq;
 
 	esp.inplace = true;
 
@@ -265,28 +228,33 @@ static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb,  netdev_features
 			return esp.nfrags;
 	}
 
+	seq = xo->seq.low;
+
 	esph = ip_esp_hdr(skb);
 	esph->spi = x->id.spi;
 
 	skb_push(skb, -skb_network_offset(skb));
 
 	if (xo->flags & XFRM_GSO_SEGMENT) {
-		esph->seq_no = htonl(xo->seq.low);
-	} else {
-		int len;
-
-		len = skb->len - sizeof(struct ipv6hdr);
-		if (len > IPV6_MAXPLEN)
-			len = 0;
+		esph->seq_no = htonl(seq);
 
-		ipv6_hdr(skb)->payload_len = htons(len);
+		if (!skb_is_gso(skb))
+			xo->seq.low++;
+		else
+			xo->seq.low += skb_shinfo(skb)->gso_segs;
 	}
 
+	esp.seqno = cpu_to_be64(xo->seq.low + ((u64)xo->seq.hi << 32));
+
+	len = skb->len - sizeof(struct ipv6hdr);
+	if (len > IPV6_MAXPLEN)
+		len = 0;
+
+	ipv6_hdr(skb)->payload_len = htons(len);
+
 	if (hw_offload)
 		return 0;
 
-	esp.seqno = cpu_to_be64(xo->seq.low + ((u64)xo->seq.hi << 32));
-
 	err = esp6_output_tail(x, skb, &esp);
 	if (err)
 		return err;
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index e66b94f46532..4e12859bc2ee 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -105,17 +105,14 @@ static struct sk_buff *xfrm6_mode_tunnel_gso_segment(struct xfrm_state *x,
 {
 	__skb_push(skb, skb->mac_len);
 	return skb_mac_gso_segment(skb, features);
-
 }
 
 static void xfrm6_mode_tunnel_xmit(struct xfrm_state *x, struct sk_buff *skb)
 {
 	struct xfrm_offload *xo = xfrm_offload(skb);
 
-	if (xo->flags & XFRM_GSO_SEGMENT) {
-		skb->network_header = skb->network_header - x->props.header_len;
+	if (xo->flags & XFRM_GSO_SEGMENT)
 		skb->transport_header = skb->network_header + sizeof(struct ipv6hdr);
-	}
 
 	skb_reset_mac_len(skb);
 	pskb_pull(skb, skb->mac_len + x->props.header_len);
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 00641b611aed..a5a7a716c465 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -23,32 +23,99 @@
 #include <linux/notifier.h>
 
 #ifdef CONFIG_XFRM_OFFLOAD
-int validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features)
+struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features)
 {
 	int err;
+	__u32 seq;
 	struct xfrm_state *x;
+	struct sk_buff *skb2;
+	netdev_features_t esp_features = features;
 	struct xfrm_offload *xo = xfrm_offload(skb);
 
-	if (skb_is_gso(skb))
-		return 0;
+	if (!xo)
+		return skb;
 
-	if (xo) {
-		x = skb->sp->xvec[skb->sp->len - 1];
-		if (xo->flags & XFRM_GRO || x->xso.flags & XFRM_OFFLOAD_INBOUND)
-			return 0;
+	if (!(features & NETIF_F_HW_ESP))
+		esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK);
+
+	x = skb->sp->xvec[skb->sp->len - 1];
+	if (xo->flags & XFRM_GRO || x->xso.flags & XFRM_OFFLOAD_INBOUND)
+		return skb;
+
+	if (skb_is_gso(skb)) {
+		struct net_device *dev = skb->dev;
+
+		if (unlikely(!x->xso.offload_handle || (x->xso.dev != dev))) {
+			struct sk_buff *segs;
+
+			/* Packet got rerouted, fixup features and segment it. */
+			esp_features = esp_features & ~(NETIF_F_HW_ESP
+							| NETIF_F_GSO_ESP);
 
+			segs = skb_gso_segment(skb, esp_features);
+			if (IS_ERR(segs)) {
+				XFRM_INC_STATS(xs_net(x), LINUX_MIB_XFRMOUTSTATEPROTOERROR);
+				kfree_skb(skb);
+				return NULL;
+			} else {
+				consume_skb(skb);
+				skb = segs;
+			}
+		} else {
+			return skb;
+		}
+	}
+
+	if (!skb->next) {
 		x->outer_mode->xmit(x, skb);
 
-		err = x->type_offload->xmit(x, skb, features);
+		err = x->type_offload->xmit(x, skb, esp_features);
 		if (err) {
 			XFRM_INC_STATS(xs_net(x), LINUX_MIB_XFRMOUTSTATEPROTOERROR);
-			return err;
+			kfree_skb(skb);
+			return NULL;
 		}
 
 		skb_push(skb, skb->data - skb_mac_header(skb));
+
+		return skb;
 	}
 
-	return 0;
+	skb2 = skb;
+	seq = xo->seq.low;
+
+	do {
+		struct sk_buff *nskb = skb2->next;
+
+		xo = xfrm_offload(skb2);
+		xo->flags |= XFRM_GSO_SEGMENT;
+		xo->seq.low = seq;
+		xo->seq.hi = xfrm_replay_seqhi(x, seq);
+
+		if(!(features & NETIF_F_HW_ESP))
+			xo->flags |= CRYPTO_FALLBACK;
+
+		x->outer_mode->xmit(x, skb2);
+
+		err = x->type_offload->xmit(x, skb2, esp_features);
+		if (err) {
+			XFRM_INC_STATS(xs_net(x), LINUX_MIB_XFRMOUTSTATEPROTOERROR);
+			skb2->next = nskb;
+			kfree_skb_list(skb2);
+			return NULL;
+		}
+
+		if (!skb_is_gso(skb2))
+			seq++;
+		else
+			seq += skb_shinfo(skb2)->gso_segs;
+
+		skb_push(skb2, skb2->data - skb_mac_header(skb2));
+
+		skb2 = nskb;
+	} while (skb2);
+
+	return skb;
 }
 EXPORT_SYMBOL_GPL(validate_xmit_xfrm);
 
-- 
2.14.1

^ permalink raw reply related

* [PATCH 5/9] esp: Don't require synchronous crypto fallback on offloading anymore.
From: Steffen Klassert @ 2017-12-22 11:24 UTC (permalink / raw)
  To: David Miller; +Cc: Herbert Xu, Steffen Klassert, netdev
In-Reply-To: <20171222112439.12476-1-steffen.klassert@secunet.com>

We support asynchronous crypto on layer 2 ESP now.
So no need to force synchronous crypto fallback on
offloading anymore.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/ipv4/esp4.c | 12 ++----------
 net/ipv6/esp6.c | 12 ++----------
 2 files changed, 4 insertions(+), 20 deletions(-)

diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 7948833dc204..6f00e43120a8 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -843,17 +843,13 @@ static int esp_init_aead(struct xfrm_state *x)
 	char aead_name[CRYPTO_MAX_ALG_NAME];
 	struct crypto_aead *aead;
 	int err;
-	u32 mask = 0;
 
 	err = -ENAMETOOLONG;
 	if (snprintf(aead_name, CRYPTO_MAX_ALG_NAME, "%s(%s)",
 		     x->geniv, x->aead->alg_name) >= CRYPTO_MAX_ALG_NAME)
 		goto error;
 
-	if (x->xso.offload_handle)
-		mask |= CRYPTO_ALG_ASYNC;
-
-	aead = crypto_alloc_aead(aead_name, 0, mask);
+	aead = crypto_alloc_aead(aead_name, 0, 0);
 	err = PTR_ERR(aead);
 	if (IS_ERR(aead))
 		goto error;
@@ -883,7 +879,6 @@ static int esp_init_authenc(struct xfrm_state *x)
 	char authenc_name[CRYPTO_MAX_ALG_NAME];
 	unsigned int keylen;
 	int err;
-	u32 mask = 0;
 
 	err = -EINVAL;
 	if (!x->ealg)
@@ -909,10 +904,7 @@ static int esp_init_authenc(struct xfrm_state *x)
 			goto error;
 	}
 
-	if (x->xso.offload_handle)
-		mask |= CRYPTO_ALG_ASYNC;
-
-	aead = crypto_alloc_aead(authenc_name, 0, mask);
+	aead = crypto_alloc_aead(authenc_name, 0, 0);
 	err = PTR_ERR(aead);
 	if (IS_ERR(aead))
 		goto error;
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 08a424fa8009..7c888c6e53a9 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -752,17 +752,13 @@ static int esp_init_aead(struct xfrm_state *x)
 	char aead_name[CRYPTO_MAX_ALG_NAME];
 	struct crypto_aead *aead;
 	int err;
-	u32 mask = 0;
 
 	err = -ENAMETOOLONG;
 	if (snprintf(aead_name, CRYPTO_MAX_ALG_NAME, "%s(%s)",
 		     x->geniv, x->aead->alg_name) >= CRYPTO_MAX_ALG_NAME)
 		goto error;
 
-	if (x->xso.offload_handle)
-		mask |= CRYPTO_ALG_ASYNC;
-
-	aead = crypto_alloc_aead(aead_name, 0, mask);
+	aead = crypto_alloc_aead(aead_name, 0, 0);
 	err = PTR_ERR(aead);
 	if (IS_ERR(aead))
 		goto error;
@@ -792,7 +788,6 @@ static int esp_init_authenc(struct xfrm_state *x)
 	char authenc_name[CRYPTO_MAX_ALG_NAME];
 	unsigned int keylen;
 	int err;
-	u32 mask = 0;
 
 	err = -EINVAL;
 	if (!x->ealg)
@@ -818,10 +813,7 @@ static int esp_init_authenc(struct xfrm_state *x)
 			goto error;
 	}
 
-	if (x->xso.offload_handle)
-		mask |= CRYPTO_ALG_ASYNC;
-
-	aead = crypto_alloc_aead(authenc_name, 0, mask);
+	aead = crypto_alloc_aead(authenc_name, 0, 0);
 	err = PTR_ERR(aead);
 	if (IS_ERR(aead))
 		goto error;
-- 
2.14.1

^ permalink raw reply related

* [PATCH 3/9] xfrm: Allow to use the layer2 IPsec GSO codepath for software crypto.
From: Steffen Klassert @ 2017-12-22 11:24 UTC (permalink / raw)
  To: David Miller; +Cc: Herbert Xu, Steffen Klassert, netdev
In-Reply-To: <20171222112439.12476-1-steffen.klassert@secunet.com>

We now have support for asynchronous crypto operations in the layer 2 TX
path. This was the missing part to allow the GSO codepath for software
crypto, so allow this codepath now.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_device.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index fc8ab9f71127..20a96181867a 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -202,8 +202,8 @@ bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x)
 	if (!x->type_offload || x->encap)
 		return false;

-	if ((x->xso.offload_handle && (dev == xfrm_dst_path(dst)->dev)) &&
-	     !xdst->child->xfrm && x->type->get_mtu) {
+	if ((!dev || (x->xso.offload_handle && (dev == xfrm_dst_path(dst)->dev))) &&
+	     (!xdst->child->xfrm && x->type->get_mtu)) {
 		mtu = x->type->get_mtu(x, xdst->child_mtu_cached);

 		if (skb->len <= mtu)
-- 
2.14.1

^ permalink raw reply related

* [PATCH 4/9] xfrm: Allow IPsec GSO with software crypto for local sockets.
From: Steffen Klassert @ 2017-12-22 11:24 UTC (permalink / raw)
  To: David Miller; +Cc: Herbert Xu, Steffen Klassert, netdev
In-Reply-To: <20171222112439.12476-1-steffen.klassert@secunet.com>

With support of async crypto operations in the GSO codepath
we have everything in place to allow GSO for local sockets.
This patch enables the GSO codepath.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/xfrm.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 2517c4f7781a..357764a2bb4e 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1910,6 +1910,8 @@ static inline bool xfrm_dst_offload_ok(struct dst_entry *dst)
 		return false;

 	xdst = (struct xfrm_dst *) dst;
+	if (!x->xso.offload_handle && !xdst->child->xfrm)
+		return true;
 	if (x->xso.offload_handle && (x->xso.dev == xfrm_dst_path(dst)->dev) &&
 	    !xdst->child->xfrm)
 		return true;
-- 
2.14.1

^ permalink raw reply related

* Re: INFO: task hung in bpf_exit_net
From: Dmitry Vyukov @ 2017-12-22 11:24 UTC (permalink / raw)
  To: Marcelo Ricardo Leitner
  Cc: David Ahern, syzbot, LKML, Ingo Molnar, Peter Zijlstra,
	syzkaller-bugs, David Miller, Florian Westphal, Daniel Borkmann,
	Xin Long, jakub.kicinski, mschiffer, Vladislav Yasevich,
	Jiri Benc, netdev, Neil Horman, linux-sctp
In-Reply-To: <20171222111602.GB32765@localhost.localdomain>

On Fri, Dec 22, 2017 at 12:16 PM, Marcelo Ricardo Leitner
<marcelo.leitner@gmail.com> wrote:
> On Fri, Dec 22, 2017 at 11:58:08AM +0100, Dmitry Vyukov wrote:
>> On Tue, Dec 19, 2017 at 7:20 PM, David Ahern <dsahern@gmail.com> wrote:
>> > On 12/19/17 5:47 AM, Dmitry Vyukov wrote:
>> >> On Tue, Dec 19, 2017 at 1:36 PM, syzbot
>> >> <bot+21b498fc12cf2041655f8e1eeae0733807d794b3@syzkaller.appspotmail.com>
>> >> wrote:
>> >>> Hello,
>> >>>
>> >>> syzkaller hit the following crash on
>> >>> 7ceb97a071e80f1b5e4cd5a36de135612a836388
>> >>> git://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/master
>> >>> compiler: gcc (GCC) 7.1.1 20170620
>> >>> .config is attached
>> >>> Raw console output is attached.
>> >>>
>> >>> Unfortunately, I don't have any reproducer for this bug yet.
>> >>>
>> >>>
>> >>> sctp: sctp_transport_update_pmtu: Reported pmtu 508 too low, using default
>> >>> minimum of 512
>> >>> INFO: task kworker/u4:0:5 blocked for more than 120 seconds.
>> >>>       Not tainted 4.15.0-rc2-next-20171205+ #59
>> >>> "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
>> >>> kworker/u4:0    D15808     5      2 0x80000000
>> >>> Workqueue: netns cleanup_net
>> >>> Call Trace:
>> >>>  context_switch kernel/sched/core.c:2800 [inline]
>> >>>  __schedule+0x8eb/0x2060 kernel/sched/core.c:3376
>> >>>  schedule+0xf5/0x430 kernel/sched/core.c:3435
>> >>>  schedule_preempt_disabled+0x10/0x20 kernel/sched/core.c:3493
>> >>>  __mutex_lock_common kernel/locking/mutex.c:833 [inline]
>> >>>  __mutex_lock+0xaad/0x1a80 kernel/locking/mutex.c:893
>> >>>  mutex_lock_nested+0x16/0x20 kernel/locking/mutex.c:908
>> >>>  rtnl_lock+0x17/0x20 net/core/rtnetlink.c:74
>> >>>  tc_action_net_exit include/net/act_api.h:125 [inline]
>> >>>  bpf_exit_net+0x1a2/0x340 net/sched/act_bpf.c:408
>> >>>  ops_exit_list.isra.6+0xae/0x150 net/core/net_namespace.c:142
>> >>>  cleanup_net+0x5c7/0xb60 net/core/net_namespace.c:484
>> >>>  process_one_work+0xbfd/0x1bc0 kernel/workqueue.c:2113
>> >>>  worker_thread+0x223/0x1990 kernel/workqueue.c:2247
>> >>>  kthread+0x37a/0x440 kernel/kthread.c:238
>> >>>  ret_from_fork+0x24/0x30 arch/x86/entry/entry_64.S:517
>> >>>
>> >>> Showing all locks held in the system:
>> >>> 4 locks held by kworker/u4:0/5:
>> >>>  #0:  ((wq_completion)"%s""netns"){+.+.}, at: [<00000000b9f061a2>]
>> >>> __write_once_size include/linux/compiler.h:212 [inline]
>> >>>  #0:  ((wq_completion)"%s""netns"){+.+.}, at: [<00000000b9f061a2>]
>> >>> atomic64_set arch/x86/include/asm/atomic64_64.h:34 [inline]
>> >>>  #0:  ((wq_completion)"%s""netns"){+.+.}, at: [<00000000b9f061a2>]
>> >>> atomic_long_set include/asm-generic/atomic-long.h:57 [inline]
>> >>>  #0:  ((wq_completion)"%s""netns"){+.+.}, at: [<00000000b9f061a2>]
>> >>> set_work_data kernel/workqueue.c:619 [inline]
>> >>>  #0:  ((wq_completion)"%s""netns"){+.+.}, at: [<00000000b9f061a2>]
>> >>> set_work_pool_and_clear_pending kernel/workqueue.c:646 [inline]
>> >>>  #0:  ((wq_completion)"%s""netns"){+.+.}, at: [<00000000b9f061a2>]
>> >>> process_one_work+0xad4/0x1bc0 kernel/workqueue.c:2084
>> >>>  #1:  (net_cleanup_work){+.+.}, at: [<000000006c7c48a3>]
>> >>> process_one_work+0xb2f/0x1bc0 kernel/workqueue.c:2088
>> >>>  #2:  (net_mutex){+.+.}, at: [<00000000bf4709f3>] cleanup_net+0x247/0xb60
>> >>> net/core/net_namespace.c:450
>> >>>  #3:  (rtnl_mutex){+.+.}, at: [<0000000053390f0b>] rtnl_lock+0x17/0x20
>> >>> net/core/rtnetlink.c:74
>> >>> 3 locks held by kworker/1:0/17:
>> >>>  #0:  ((wq_completion)"%s"("ipv6_addrconf")){+.+.}, at: [<00000000b9f061a2>]
>> >>> __write_once_size include/linux/compiler.h:212 [inline]
>> >>>  #0:  ((wq_completion)"%s"("ipv6_addrconf")){+.+.}, at: [<00000000b9f061a2>]
>> >>> atomic64_set arch/x86/include/asm/atomic64_64.h:34 [inline]
>> >>>  #0:  ((wq_completion)"%s"("ipv6_addrconf")){+.+.}, at: [<00000000b9f061a2>]
>> >>> atomic_long_set include/asm-generic/atomic-long.h:57 [inline]
>> >>>  #0:  ((wq_completion)"%s"("ipv6_addrconf")){+.+.}, at: [<00000000b9f061a2>]
>> >>> set_work_data kernel/workqueue.c:619 [inline]
>> >>>  #0:  ((wq_completion)"%s"("ipv6_addrconf")){+.+.}, at: [<00000000b9f061a2>]
>> >>> set_work_pool_and_clear_pending kernel/workqueue.c:646 [inline]
>> >>>  #0:  ((wq_completion)"%s"("ipv6_addrconf")){+.+.}, at: [<00000000b9f061a2>]
>> >>> process_one_work+0xad4/0x1bc0 kernel/workqueue.c:2084
>> >>>  #1:  ((addr_chk_work).work){+.+.}, at: [<000000006c7c48a3>]
>> >>> process_one_work+0xb2f/0x1bc0 kernel/workqueue.c:2088
>> >>>  #2:  (rtnl_mutex){+.+.}, at: [<0000000053390f0b>] rtnl_lock+0x17/0x20
>> >>> net/core/rtnetlink.c:74
>> >>> 2 locks held by khungtaskd/675:
>> >>>  #0:  (rcu_read_lock){....}, at: [<00000000587c8471>]
>> >>> check_hung_uninterruptible_tasks kernel/hung_task.c:175 [inline]
>> >>>  #0:  (rcu_read_lock){....}, at: [<00000000587c8471>] watchdog+0x1c5/0xd60
>> >>> kernel/hung_task.c:249
>> >>>  #1:  (tasklist_lock){.+.+}, at: [<000000005288685e>]
>> >>> debug_show_all_locks+0xd3/0x400 kernel/locking/lockdep.c:4554
>> >>> 1 lock held by rsyslogd/2974:
>> >>>  #0:  (&f->f_pos_lock){+.+.}, at: [<0000000011e00499>]
>> >>> __fdget_pos+0x131/0x1a0 fs/file.c:770
>> >>> 2 locks held by getty/3056:
>> >>>  #0:  (&tty->ldisc_sem){++++}, at: [<00000000b9fd70a9>]
>> >>> ldsem_down_read+0x37/0x40 drivers/tty/tty_ldsem.c:365
>> >>>  #1:  (&ldata->atomic_read_lock){+.+.}, at: [<00000000abb3bd08>]
>> >>> n_tty_read+0x2f2/0x1a10 drivers/tty/n_tty.c:2131
>> >>> 2 locks held by getty/3057:
>> >>>  #0:  (&tty->ldisc_sem){++++}, at: [<00000000b9fd70a9>]
>> >>> ldsem_down_read+0x37/0x40 drivers/tty/tty_ldsem.c:365
>> >>>  #1:  (&ldata->atomic_read_lock){+.+.}, at: [<00000000abb3bd08>]
>> >>> n_tty_read+0x2f2/0x1a10 drivers/tty/n_tty.c:2131
>> >>> 2 locks held by getty/3058:
>> >>>  #0:  (&tty->ldisc_sem){++++}, at: [<00000000b9fd70a9>]
>> >>> ldsem_down_read+0x37/0x40 drivers/tty/tty_ldsem.c:365
>> >>>  #1:  (&ldata->atomic_read_lock){+.+.}, at: [<00000000abb3bd08>]
>> >>> n_tty_read+0x2f2/0x1a10 drivers/tty/n_tty.c:2131
>> >>> 2 locks held by getty/3059:
>> >>>  #0:  (&tty->ldisc_sem){++++}, at: [<00000000b9fd70a9>]
>> >>> ldsem_down_read+0x37/0x40 drivers/tty/tty_ldsem.c:365
>> >>>  #1:  (&ldata->atomic_read_lock){+.+.}, at: [<00000000abb3bd08>]
>> >>> n_tty_read+0x2f2/0x1a10 drivers/tty/n_tty.c:2131
>> >>> 2 locks held by getty/3060:
>> >>>  #0:  (&tty->ldisc_sem){++++}, at: [<00000000b9fd70a9>]
>> >>> ldsem_down_read+0x37/0x40 drivers/tty/tty_ldsem.c:365
>> >>>  #1:  (&ldata->atomic_read_lock){+.+.}, at: [<00000000abb3bd08>]
>> >>> n_tty_read+0x2f2/0x1a10 drivers/tty/n_tty.c:2131
>> >>> 2 locks held by getty/3061:
>> >>>  #0:  (&tty->ldisc_sem){++++}, at: [<00000000b9fd70a9>]
>> >>> ldsem_down_read+0x37/0x40 drivers/tty/tty_ldsem.c:365
>> >>>  #1:  (&ldata->atomic_read_lock){+.+.}, at: [<00000000abb3bd08>]
>> >>> n_tty_read+0x2f2/0x1a10 drivers/tty/n_tty.c:2131
>> >>> 2 locks held by getty/3062:
>> >>>  #0:  (&tty->ldisc_sem){++++}, at: [<00000000b9fd70a9>]
>> >>> ldsem_down_read+0x37/0x40 drivers/tty/tty_ldsem.c:365
>> >>>  #1:  (&ldata->atomic_read_lock){+.+.}, at: [<00000000abb3bd08>]
>> >>> n_tty_read+0x2f2/0x1a10 drivers/tty/n_tty.c:2131
>> >>>
>> >>> =============================================
>> >>>
>> >>> NMI backtrace for cpu 0
>> >>> CPU: 0 PID: 675 Comm: khungtaskd Not tainted 4.15.0-rc2-next-20171205+ #59
>> >>> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
>> >>> Google 01/01/2011
>> >>> Call Trace:
>> >>>  __dump_stack lib/dump_stack.c:17 [inline]
>> >>>  dump_stack+0x194/0x257 lib/dump_stack.c:53
>> >>>  nmi_cpu_backtrace+0x1d2/0x210 lib/nmi_backtrace.c:103
>> >>>  nmi_trigger_cpumask_backtrace+0x122/0x180 lib/nmi_backtrace.c:62
>> >>>  arch_trigger_cpumask_backtrace+0x14/0x20 arch/x86/kernel/apic/hw_nmi.c:38
>> >>>  trigger_all_cpu_backtrace include/linux/nmi.h:138 [inline]
>> >>>  check_hung_task kernel/hung_task.c:132 [inline]
>> >>>  check_hung_uninterruptible_tasks kernel/hung_task.c:190 [inline]
>> >>>  watchdog+0x90c/0xd60 kernel/hung_task.c:249
>> >>>  kthread+0x37a/0x440 kernel/kthread.c:238
>> >>>  ret_from_fork+0x24/0x30 arch/x86/entry/entry_64.S:517
>> >>> Sending NMI from CPU 0 to CPUs 1:
>> >>> NMI backtrace for cpu 1
>> >>> CPU: 1 PID: 13156 Comm: syz-executor7 Not tainted 4.15.0-rc2-next-20171205+
>> >>> #59
>> >>> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
>> >>> Google 01/01/2011
>> >>> task: 000000005209c120 task.stack: 00000000ffaab0e8
>> >>> RIP: 0010:inb arch/x86/include/asm/io.h:348 [inline]
>> >>> RIP: 0010:io_serial_in+0x6b/0x90 drivers/tty/serial/8250/8250_port.c:434
>> >>> RSP: 0018:ffff8801c0a16e70 EFLAGS: 00000002
>> >>> RAX: dffffc0000000000 RBX: 00000000000003fd RCX: 0000000000000000
>> >>> RDX: 00000000000003fd RSI: ffffc90003745000 RDI: ffffffff87cf1a40
>> >>> RBP: ffff8801c0a16e80 R08: 0000000000000005 R09: 000000000000000c
>> >>> R10: 0000000000000000 R11: ffffffff8748dd20 R12: ffffffff87cf1a00
>> >>> R13: 0000000000000020 R14: fffffbfff0f9e387 R15: fffffbfff0f9e34a
>> >>> FS:  00007f6d52e3f700(0000) GS:ffff8801db500000(0000) knlGS:0000000000000000
>> >>> CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>> >>> CR2: 000000c42005de80 CR3: 00000001c368a000 CR4: 00000000001406e0
>> >>> DR0: 0000000020000000 DR1: 0000000000000000 DR2: 0000000000000000
>> >>> DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000600
>> >>> Call Trace:
>> >>>  serial_in drivers/tty/serial/8250/8250.h:111 [inline]
>> >>>  wait_for_xmitr+0x93/0x1e0 drivers/tty/serial/8250/8250_port.c:2033
>> >>>  serial8250_console_putchar+0x1f/0x60
>> >>> drivers/tty/serial/8250/8250_port.c:3170
>> >>>  uart_console_write+0xac/0xe0 drivers/tty/serial/serial_core.c:1858
>> >>>  serial8250_console_write+0x647/0xa20
>> >>> drivers/tty/serial/8250/8250_port.c:3236
>> >>>  univ8250_console_write+0x5f/0x70 drivers/tty/serial/8250/8250_core.c:590
>> >>>  call_console_drivers kernel/printk/printk.c:1574 [inline]
>> >>>  console_unlock+0x788/0xd70 kernel/printk/printk.c:2233
>> >>>  vprintk_emit+0x4ad/0x590 kernel/printk/printk.c:1757
>> >>>  vprintk_default+0x28/0x30 kernel/printk/printk.c:1796
>> >>>  vprintk_func+0x57/0xc0 kernel/printk/printk_safe.c:379
>> >>>  printk+0xaa/0xca kernel/printk/printk.c:1829
>> >>>  nla_parse+0x374/0x3d0 lib/nlattr.c:257
>> >>>  nlmsg_parse include/net/netlink.h:398 [inline]
>> >>>  nl80211_dump_wiphy_parse.isra.37.constprop.83+0x138/0x5c0
>> >>> net/wireless/nl80211.c:1920
>> >>>  nl80211_dump_interface+0x596/0x820 net/wireless/nl80211.c:2660
>> >>>  genl_lock_dumpit+0x68/0x90 net/netlink/genetlink.c:480
>> >>>  netlink_dump+0x48c/0xce0 net/netlink/af_netlink.c:2186
>> >>>  __netlink_dump_start+0x4f0/0x6d0 net/netlink/af_netlink.c:2283
>> >>>  genl_family_rcv_msg+0xd27/0xfc0 net/netlink/genetlink.c:548
>> >>>  genl_rcv_msg+0xb2/0x140 net/netlink/genetlink.c:624
>> >>>  netlink_rcv_skb+0x216/0x440 net/netlink/af_netlink.c:2405
>> >>>  genl_rcv+0x28/0x40 net/netlink/genetlink.c:635
>> >>>  netlink_unicast_kernel net/netlink/af_netlink.c:1272 [inline]
>> >>>  netlink_unicast+0x4e8/0x6f0 net/netlink/af_netlink.c:1298
>> >>>  netlink_sendmsg+0xa4a/0xe70 net/netlink/af_netlink.c:1861
>> >>>  sock_sendmsg_nosec net/socket.c:636 [inline]
>> >>>  sock_sendmsg+0xca/0x110 net/socket.c:646
>> >>>  sock_write_iter+0x320/0x5e0 net/socket.c:915
>> >>>  call_write_iter include/linux/fs.h:1776 [inline]
>> >>>  new_sync_write fs/read_write.c:469 [inline]
>> >>>  __vfs_write+0x68a/0x970 fs/read_write.c:482
>> >>>  vfs_write+0x18f/0x510 fs/read_write.c:544
>> >>>  SYSC_write fs/read_write.c:589 [inline]
>> >>>  SyS_write+0xef/0x220 fs/read_write.c:581
>> >>>  entry_SYSCALL_64_fastpath+0x1f/0x96
>> >>> RIP: 0033:0x4529d9
>> >>> RSP: 002b:00007f6d52e3ec58 EFLAGS: 00000212 ORIG_RAX: 0000000000000001
>> >>> RAX: ffffffffffffffda RBX: 00007f6d52e3f700 RCX: 00000000004529d9
>> >>> RDX: 0000000000000024 RSI: 0000000020454000 RDI: 0000000000000016
>> >>> RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000
>> >>> R10: 0000000000000000 R11: 0000000000000212 R12: 0000000000000000
>> >>> R13: 0000000000a6f7ff R14: 00007f6d52e3f9c0 R15: 0000000000000000
>> >>> Code: 24 d9 00 00 00 49 8d 7c 24 40 48 b8 00 00 00 00 00 fc ff df 48 89 fa
>> >>> 48 c1 ea 03 d3 e3 80 3c 02 00 75 17 41 03 5c 24 40 89 da ec <5b> 0f b6 c0 41
>> >>> 5c 5d c3 e8 38 b0 18 ff eb c2 e8 91 b0 18 ff eb
>> >>>
>> >>>
>> >>> ---
>> >>> This bug is generated by a dumb bot. It may contain errors.
>> >>> See https://goo.gl/tpsmEJ for details.
>> >>> Direct all questions to syzkaller@googlegroups.com.
>> >>> Please credit me with: Reported-by: syzbot <syzkaller@googlegroups.com>
>> >>>
>> >>> syzbot will keep track of this bug report.
>> >>> Once a fix for this bug is merged into any tree, reply to this email with:
>> >>> #syz fix: exact-commit-title
>> >>> To mark this as a duplicate of another syzbot report, please reply with:
>> >>> #syz dup: exact-subject-of-another-report
>> >>> If it's a one-off invalid bug report, please reply with:
>> >>> #syz invalid
>> >>> Note: if the crash happens again, it will cause creation of a new bug
>> >>> report.
>> >>> Note: all commands must start from beginning of the line in the email body.
>> >>>
>> >>> --
>> >>> You received this message because you are subscribed to the Google Groups
>> >>> "syzkaller-bugs" group.
>> >>> To unsubscribe from this group and stop receiving emails from it, send an
>> >>> email to syzkaller-bugs+unsubscribe@googlegroups.com.
>> >>> To view this discussion on the web visit
>> >>> https://groups.google.com/d/msgid/syzkaller-bugs/001a1143fd00a8cc790560b0b552%40google.com.
>> >>> For more options, visit https://groups.google.com/d/optout.
>> >>
>> >>
>> >> This looks like +rtnetlink issue.
>> >>
>> >
>> > Same with this one, perhaps related to / fixed by:
>> >     http://patchwork.ozlabs.org/patch/850957/
>> >
>>
>>
>>
>> Looking at the log, this one seems to be an infinite loop in SCTP code
>> with console output in it. Kernel is busy printing gazilion of:
>
> Do you have a link for such log? I don't seem to have received the
> initial syzbot email, so I don't have its attachments. Or if you may,
> please fwd it to me/list.

https://groups.google.com/forum/#!searchin/syzkaller-bugs/%22INFO$3A$20task$20hung$20in$20bpf_exit_net%22%7Csort:date



> Thanks.
>
>>
>> [  176.491099] sctp: sctp_transport_update_pmtu: Reported pmtu 508 too
>> low, using default minimum of 512
>> ** 110 printk messages dropped **
>> [  176.503409] sctp: sctp_transport_update_pmtu: Reported pmtu 508 too
>> low, using default minimum of 512
>> ** 103 printk messages dropped **
>> ...
>> [  246.742374] sctp: sctp_transport_update_pmtu: Reported pmtu 508 too
>> low, using default minimum of 512
>> [  246.742484] sctp: sctp_transport_update_pmtu: Reported pmtu 508 too
>> low, using default minimum of 512
>> [  246.742590] sctp: sctp_transport_update_pmtu: Reported pmtu 508 too
>> low, using default minimum of 512
>>
>> Looks like a different issue.
>>

^ permalink raw reply

* Re: [PATCHv2 net] l2tp: fix missing print session offset info
From: Lorenzo Bianconi @ 2017-12-22 11:24 UTC (permalink / raw)
  To: Hangbin Liu; +Cc: netdev, James Chapman, David S. Miller
In-Reply-To: <1513913532-8090-1-git-send-email-liuhangbin@gmail.com>

> Fixes: 309795f4bec ("l2tp: Add netlink control API for L2TP")
> Reported-by: Jianlin Shi <jishi@redhat.com>
> Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
> ---
>  net/l2tp/l2tp_netlink.c | 2 ++
>  1 file changed, 2 insertions(+)
>
> diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
> index a1f24fb..36378b4 100644
> --- a/net/l2tp/l2tp_netlink.c
> +++ b/net/l2tp/l2tp_netlink.c
> @@ -761,6 +761,8 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int fl
>
>         if ((session->ifname[0] &&
>              nla_put_string(skb, L2TP_ATTR_IFNAME, session->ifname)) ||
> +           (session->offset &&
> +            nla_put_u16(skb, L2TP_ATTR_OFFSET, session->offset)) ||
>             (session->cookie_len &&
>              nla_put(skb, L2TP_ATTR_COOKIE, session->cookie_len,
>                      &session->cookie[0])) ||
> --
> 2.5.5
>

Hi David,

please hold on a while with this patch, I will send it in a patchset
that addresses some offset related stuff.
Regards,

Lorenzo

^ permalink raw reply

* Kopie van: "Fw :pingyanwu
From: Bergjuffer.nl @ 2017-12-22 10:56 UTC (permalink / raw)
  To: netdev

Kopie van:

Dit is een aanvraag via http://www.bergjuffer.nl/ van:
tangquegao <netdev@vger.kernel.org>

【太阳城集团】： www.3330780.com/? 一路真诚相伴，注册送28，存10送18，存50送28。
电子游艺投注1元起，天天返水1.8%无上限；公司存款倾情回馈1%起，笔笔皆有送！
您想要的女神范都在这里，百家乐你准备好了吗？
+QQ：362563309 获得〖特邀〗存款开运金（最高8888元）。
闭门群动息，积雪透疏林。有客寒方觉，无声晓已深。 
蚕饥使君马，雁避将军箭。宝柱惜离弦，流黄悲赤县。 
孟轲分邪正，眸子看了眊.杳然粹而清，可以镇浮躁， 
从军古云乐，谈笑青油幕。灯明夜观棋，月暗秋城柝。 ——李正封

^ permalink raw reply

* Re: INFO: task hung in bpf_exit_net
From: Marcelo Ricardo Leitner @ 2017-12-22 11:16 UTC (permalink / raw)
  To: Dmitry Vyukov
  Cc: David Ahern, syzbot, LKML, Ingo Molnar, Peter Zijlstra,
	syzkaller-bugs, David Miller, Florian Westphal, Daniel Borkmann,
	Xin Long, jakub.kicinski, mschiffer, Vladislav Yasevich,
	Jiri Benc, netdev, Neil Horman, linux-sctp
In-Reply-To: <CACT4Y+aaSX4S3KHJjqkiQhhOZAtEN_fMD1m_Ve3rz4u4x9KSWg@mail.gmail.com>

On Fri, Dec 22, 2017 at 11:58:08AM +0100, Dmitry Vyukov wrote:
> On Tue, Dec 19, 2017 at 7:20 PM, David Ahern <dsahern@gmail.com> wrote:
> > On 12/19/17 5:47 AM, Dmitry Vyukov wrote:
> >> On Tue, Dec 19, 2017 at 1:36 PM, syzbot
> >> <bot+21b498fc12cf2041655f8e1eeae0733807d794b3@syzkaller.appspotmail.com>
> >> wrote:
> >>> Hello,
> >>>
> >>> syzkaller hit the following crash on
> >>> 7ceb97a071e80f1b5e4cd5a36de135612a836388
> >>> git://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/master
> >>> compiler: gcc (GCC) 7.1.1 20170620
> >>> .config is attached
> >>> Raw console output is attached.
> >>>
> >>> Unfortunately, I don't have any reproducer for this bug yet.
> >>>
> >>>
> >>> sctp: sctp_transport_update_pmtu: Reported pmtu 508 too low, using default
> >>> minimum of 512
> >>> INFO: task kworker/u4:0:5 blocked for more than 120 seconds.
> >>>       Not tainted 4.15.0-rc2-next-20171205+ #59
> >>> "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
> >>> kworker/u4:0    D15808     5      2 0x80000000
> >>> Workqueue: netns cleanup_net
> >>> Call Trace:
> >>>  context_switch kernel/sched/core.c:2800 [inline]
> >>>  __schedule+0x8eb/0x2060 kernel/sched/core.c:3376
> >>>  schedule+0xf5/0x430 kernel/sched/core.c:3435
> >>>  schedule_preempt_disabled+0x10/0x20 kernel/sched/core.c:3493
> >>>  __mutex_lock_common kernel/locking/mutex.c:833 [inline]
> >>>  __mutex_lock+0xaad/0x1a80 kernel/locking/mutex.c:893
> >>>  mutex_lock_nested+0x16/0x20 kernel/locking/mutex.c:908
> >>>  rtnl_lock+0x17/0x20 net/core/rtnetlink.c:74
> >>>  tc_action_net_exit include/net/act_api.h:125 [inline]
> >>>  bpf_exit_net+0x1a2/0x340 net/sched/act_bpf.c:408
> >>>  ops_exit_list.isra.6+0xae/0x150 net/core/net_namespace.c:142
> >>>  cleanup_net+0x5c7/0xb60 net/core/net_namespace.c:484
> >>>  process_one_work+0xbfd/0x1bc0 kernel/workqueue.c:2113
> >>>  worker_thread+0x223/0x1990 kernel/workqueue.c:2247
> >>>  kthread+0x37a/0x440 kernel/kthread.c:238
> >>>  ret_from_fork+0x24/0x30 arch/x86/entry/entry_64.S:517
> >>>
> >>> Showing all locks held in the system:
> >>> 4 locks held by kworker/u4:0/5:
> >>>  #0:  ((wq_completion)"%s""netns"){+.+.}, at: [<00000000b9f061a2>]
> >>> __write_once_size include/linux/compiler.h:212 [inline]
> >>>  #0:  ((wq_completion)"%s""netns"){+.+.}, at: [<00000000b9f061a2>]
> >>> atomic64_set arch/x86/include/asm/atomic64_64.h:34 [inline]
> >>>  #0:  ((wq_completion)"%s""netns"){+.+.}, at: [<00000000b9f061a2>]
> >>> atomic_long_set include/asm-generic/atomic-long.h:57 [inline]
> >>>  #0:  ((wq_completion)"%s""netns"){+.+.}, at: [<00000000b9f061a2>]
> >>> set_work_data kernel/workqueue.c:619 [inline]
> >>>  #0:  ((wq_completion)"%s""netns"){+.+.}, at: [<00000000b9f061a2>]
> >>> set_work_pool_and_clear_pending kernel/workqueue.c:646 [inline]
> >>>  #0:  ((wq_completion)"%s""netns"){+.+.}, at: [<00000000b9f061a2>]
> >>> process_one_work+0xad4/0x1bc0 kernel/workqueue.c:2084
> >>>  #1:  (net_cleanup_work){+.+.}, at: [<000000006c7c48a3>]
> >>> process_one_work+0xb2f/0x1bc0 kernel/workqueue.c:2088
> >>>  #2:  (net_mutex){+.+.}, at: [<00000000bf4709f3>] cleanup_net+0x247/0xb60
> >>> net/core/net_namespace.c:450
> >>>  #3:  (rtnl_mutex){+.+.}, at: [<0000000053390f0b>] rtnl_lock+0x17/0x20
> >>> net/core/rtnetlink.c:74
> >>> 3 locks held by kworker/1:0/17:
> >>>  #0:  ((wq_completion)"%s"("ipv6_addrconf")){+.+.}, at: [<00000000b9f061a2>]
> >>> __write_once_size include/linux/compiler.h:212 [inline]
> >>>  #0:  ((wq_completion)"%s"("ipv6_addrconf")){+.+.}, at: [<00000000b9f061a2>]
> >>> atomic64_set arch/x86/include/asm/atomic64_64.h:34 [inline]
> >>>  #0:  ((wq_completion)"%s"("ipv6_addrconf")){+.+.}, at: [<00000000b9f061a2>]
> >>> atomic_long_set include/asm-generic/atomic-long.h:57 [inline]
> >>>  #0:  ((wq_completion)"%s"("ipv6_addrconf")){+.+.}, at: [<00000000b9f061a2>]
> >>> set_work_data kernel/workqueue.c:619 [inline]
> >>>  #0:  ((wq_completion)"%s"("ipv6_addrconf")){+.+.}, at: [<00000000b9f061a2>]
> >>> set_work_pool_and_clear_pending kernel/workqueue.c:646 [inline]
> >>>  #0:  ((wq_completion)"%s"("ipv6_addrconf")){+.+.}, at: [<00000000b9f061a2>]
> >>> process_one_work+0xad4/0x1bc0 kernel/workqueue.c:2084
> >>>  #1:  ((addr_chk_work).work){+.+.}, at: [<000000006c7c48a3>]
> >>> process_one_work+0xb2f/0x1bc0 kernel/workqueue.c:2088
> >>>  #2:  (rtnl_mutex){+.+.}, at: [<0000000053390f0b>] rtnl_lock+0x17/0x20
> >>> net/core/rtnetlink.c:74
> >>> 2 locks held by khungtaskd/675:
> >>>  #0:  (rcu_read_lock){....}, at: [<00000000587c8471>]
> >>> check_hung_uninterruptible_tasks kernel/hung_task.c:175 [inline]
> >>>  #0:  (rcu_read_lock){....}, at: [<00000000587c8471>] watchdog+0x1c5/0xd60
> >>> kernel/hung_task.c:249
> >>>  #1:  (tasklist_lock){.+.+}, at: [<000000005288685e>]
> >>> debug_show_all_locks+0xd3/0x400 kernel/locking/lockdep.c:4554
> >>> 1 lock held by rsyslogd/2974:
> >>>  #0:  (&f->f_pos_lock){+.+.}, at: [<0000000011e00499>]
> >>> __fdget_pos+0x131/0x1a0 fs/file.c:770
> >>> 2 locks held by getty/3056:
> >>>  #0:  (&tty->ldisc_sem){++++}, at: [<00000000b9fd70a9>]
> >>> ldsem_down_read+0x37/0x40 drivers/tty/tty_ldsem.c:365
> >>>  #1:  (&ldata->atomic_read_lock){+.+.}, at: [<00000000abb3bd08>]
> >>> n_tty_read+0x2f2/0x1a10 drivers/tty/n_tty.c:2131
> >>> 2 locks held by getty/3057:
> >>>  #0:  (&tty->ldisc_sem){++++}, at: [<00000000b9fd70a9>]
> >>> ldsem_down_read+0x37/0x40 drivers/tty/tty_ldsem.c:365
> >>>  #1:  (&ldata->atomic_read_lock){+.+.}, at: [<00000000abb3bd08>]
> >>> n_tty_read+0x2f2/0x1a10 drivers/tty/n_tty.c:2131
> >>> 2 locks held by getty/3058:
> >>>  #0:  (&tty->ldisc_sem){++++}, at: [<00000000b9fd70a9>]
> >>> ldsem_down_read+0x37/0x40 drivers/tty/tty_ldsem.c:365
> >>>  #1:  (&ldata->atomic_read_lock){+.+.}, at: [<00000000abb3bd08>]
> >>> n_tty_read+0x2f2/0x1a10 drivers/tty/n_tty.c:2131
> >>> 2 locks held by getty/3059:
> >>>  #0:  (&tty->ldisc_sem){++++}, at: [<00000000b9fd70a9>]
> >>> ldsem_down_read+0x37/0x40 drivers/tty/tty_ldsem.c:365
> >>>  #1:  (&ldata->atomic_read_lock){+.+.}, at: [<00000000abb3bd08>]
> >>> n_tty_read+0x2f2/0x1a10 drivers/tty/n_tty.c:2131
> >>> 2 locks held by getty/3060:
> >>>  #0:  (&tty->ldisc_sem){++++}, at: [<00000000b9fd70a9>]
> >>> ldsem_down_read+0x37/0x40 drivers/tty/tty_ldsem.c:365
> >>>  #1:  (&ldata->atomic_read_lock){+.+.}, at: [<00000000abb3bd08>]
> >>> n_tty_read+0x2f2/0x1a10 drivers/tty/n_tty.c:2131
> >>> 2 locks held by getty/3061:
> >>>  #0:  (&tty->ldisc_sem){++++}, at: [<00000000b9fd70a9>]
> >>> ldsem_down_read+0x37/0x40 drivers/tty/tty_ldsem.c:365
> >>>  #1:  (&ldata->atomic_read_lock){+.+.}, at: [<00000000abb3bd08>]
> >>> n_tty_read+0x2f2/0x1a10 drivers/tty/n_tty.c:2131
> >>> 2 locks held by getty/3062:
> >>>  #0:  (&tty->ldisc_sem){++++}, at: [<00000000b9fd70a9>]
> >>> ldsem_down_read+0x37/0x40 drivers/tty/tty_ldsem.c:365
> >>>  #1:  (&ldata->atomic_read_lock){+.+.}, at: [<00000000abb3bd08>]
> >>> n_tty_read+0x2f2/0x1a10 drivers/tty/n_tty.c:2131
> >>>
> >>> =============================================
> >>>
> >>> NMI backtrace for cpu 0
> >>> CPU: 0 PID: 675 Comm: khungtaskd Not tainted 4.15.0-rc2-next-20171205+ #59
> >>> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
> >>> Google 01/01/2011
> >>> Call Trace:
> >>>  __dump_stack lib/dump_stack.c:17 [inline]
> >>>  dump_stack+0x194/0x257 lib/dump_stack.c:53
> >>>  nmi_cpu_backtrace+0x1d2/0x210 lib/nmi_backtrace.c:103
> >>>  nmi_trigger_cpumask_backtrace+0x122/0x180 lib/nmi_backtrace.c:62
> >>>  arch_trigger_cpumask_backtrace+0x14/0x20 arch/x86/kernel/apic/hw_nmi.c:38
> >>>  trigger_all_cpu_backtrace include/linux/nmi.h:138 [inline]
> >>>  check_hung_task kernel/hung_task.c:132 [inline]
> >>>  check_hung_uninterruptible_tasks kernel/hung_task.c:190 [inline]
> >>>  watchdog+0x90c/0xd60 kernel/hung_task.c:249
> >>>  kthread+0x37a/0x440 kernel/kthread.c:238
> >>>  ret_from_fork+0x24/0x30 arch/x86/entry/entry_64.S:517
> >>> Sending NMI from CPU 0 to CPUs 1:
> >>> NMI backtrace for cpu 1
> >>> CPU: 1 PID: 13156 Comm: syz-executor7 Not tainted 4.15.0-rc2-next-20171205+
> >>> #59
> >>> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
> >>> Google 01/01/2011
> >>> task: 000000005209c120 task.stack: 00000000ffaab0e8
> >>> RIP: 0010:inb arch/x86/include/asm/io.h:348 [inline]
> >>> RIP: 0010:io_serial_in+0x6b/0x90 drivers/tty/serial/8250/8250_port.c:434
> >>> RSP: 0018:ffff8801c0a16e70 EFLAGS: 00000002
> >>> RAX: dffffc0000000000 RBX: 00000000000003fd RCX: 0000000000000000
> >>> RDX: 00000000000003fd RSI: ffffc90003745000 RDI: ffffffff87cf1a40
> >>> RBP: ffff8801c0a16e80 R08: 0000000000000005 R09: 000000000000000c
> >>> R10: 0000000000000000 R11: ffffffff8748dd20 R12: ffffffff87cf1a00
> >>> R13: 0000000000000020 R14: fffffbfff0f9e387 R15: fffffbfff0f9e34a
> >>> FS:  00007f6d52e3f700(0000) GS:ffff8801db500000(0000) knlGS:0000000000000000
> >>> CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> >>> CR2: 000000c42005de80 CR3: 00000001c368a000 CR4: 00000000001406e0
> >>> DR0: 0000000020000000 DR1: 0000000000000000 DR2: 0000000000000000
> >>> DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000600
> >>> Call Trace:
> >>>  serial_in drivers/tty/serial/8250/8250.h:111 [inline]
> >>>  wait_for_xmitr+0x93/0x1e0 drivers/tty/serial/8250/8250_port.c:2033
> >>>  serial8250_console_putchar+0x1f/0x60
> >>> drivers/tty/serial/8250/8250_port.c:3170
> >>>  uart_console_write+0xac/0xe0 drivers/tty/serial/serial_core.c:1858
> >>>  serial8250_console_write+0x647/0xa20
> >>> drivers/tty/serial/8250/8250_port.c:3236
> >>>  univ8250_console_write+0x5f/0x70 drivers/tty/serial/8250/8250_core.c:590
> >>>  call_console_drivers kernel/printk/printk.c:1574 [inline]
> >>>  console_unlock+0x788/0xd70 kernel/printk/printk.c:2233
> >>>  vprintk_emit+0x4ad/0x590 kernel/printk/printk.c:1757
> >>>  vprintk_default+0x28/0x30 kernel/printk/printk.c:1796
> >>>  vprintk_func+0x57/0xc0 kernel/printk/printk_safe.c:379
> >>>  printk+0xaa/0xca kernel/printk/printk.c:1829
> >>>  nla_parse+0x374/0x3d0 lib/nlattr.c:257
> >>>  nlmsg_parse include/net/netlink.h:398 [inline]
> >>>  nl80211_dump_wiphy_parse.isra.37.constprop.83+0x138/0x5c0
> >>> net/wireless/nl80211.c:1920
> >>>  nl80211_dump_interface+0x596/0x820 net/wireless/nl80211.c:2660
> >>>  genl_lock_dumpit+0x68/0x90 net/netlink/genetlink.c:480
> >>>  netlink_dump+0x48c/0xce0 net/netlink/af_netlink.c:2186
> >>>  __netlink_dump_start+0x4f0/0x6d0 net/netlink/af_netlink.c:2283
> >>>  genl_family_rcv_msg+0xd27/0xfc0 net/netlink/genetlink.c:548
> >>>  genl_rcv_msg+0xb2/0x140 net/netlink/genetlink.c:624
> >>>  netlink_rcv_skb+0x216/0x440 net/netlink/af_netlink.c:2405
> >>>  genl_rcv+0x28/0x40 net/netlink/genetlink.c:635
> >>>  netlink_unicast_kernel net/netlink/af_netlink.c:1272 [inline]
> >>>  netlink_unicast+0x4e8/0x6f0 net/netlink/af_netlink.c:1298
> >>>  netlink_sendmsg+0xa4a/0xe70 net/netlink/af_netlink.c:1861
> >>>  sock_sendmsg_nosec net/socket.c:636 [inline]
> >>>  sock_sendmsg+0xca/0x110 net/socket.c:646
> >>>  sock_write_iter+0x320/0x5e0 net/socket.c:915
> >>>  call_write_iter include/linux/fs.h:1776 [inline]
> >>>  new_sync_write fs/read_write.c:469 [inline]
> >>>  __vfs_write+0x68a/0x970 fs/read_write.c:482
> >>>  vfs_write+0x18f/0x510 fs/read_write.c:544
> >>>  SYSC_write fs/read_write.c:589 [inline]
> >>>  SyS_write+0xef/0x220 fs/read_write.c:581
> >>>  entry_SYSCALL_64_fastpath+0x1f/0x96
> >>> RIP: 0033:0x4529d9
> >>> RSP: 002b:00007f6d52e3ec58 EFLAGS: 00000212 ORIG_RAX: 0000000000000001
> >>> RAX: ffffffffffffffda RBX: 00007f6d52e3f700 RCX: 00000000004529d9
> >>> RDX: 0000000000000024 RSI: 0000000020454000 RDI: 0000000000000016
> >>> RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000
> >>> R10: 0000000000000000 R11: 0000000000000212 R12: 0000000000000000
> >>> R13: 0000000000a6f7ff R14: 00007f6d52e3f9c0 R15: 0000000000000000
> >>> Code: 24 d9 00 00 00 49 8d 7c 24 40 48 b8 00 00 00 00 00 fc ff df 48 89 fa
> >>> 48 c1 ea 03 d3 e3 80 3c 02 00 75 17 41 03 5c 24 40 89 da ec <5b> 0f b6 c0 41
> >>> 5c 5d c3 e8 38 b0 18 ff eb c2 e8 91 b0 18 ff eb
> >>>
> >>>
> >>> ---
> >>> This bug is generated by a dumb bot. It may contain errors.
> >>> See https://goo.gl/tpsmEJ for details.
> >>> Direct all questions to syzkaller@googlegroups.com.
> >>> Please credit me with: Reported-by: syzbot <syzkaller@googlegroups.com>
> >>>
> >>> syzbot will keep track of this bug report.
> >>> Once a fix for this bug is merged into any tree, reply to this email with:
> >>> #syz fix: exact-commit-title
> >>> To mark this as a duplicate of another syzbot report, please reply with:
> >>> #syz dup: exact-subject-of-another-report
> >>> If it's a one-off invalid bug report, please reply with:
> >>> #syz invalid
> >>> Note: if the crash happens again, it will cause creation of a new bug
> >>> report.
> >>> Note: all commands must start from beginning of the line in the email body.
> >>>
> >>> --
> >>> You received this message because you are subscribed to the Google Groups
> >>> "syzkaller-bugs" group.
> >>> To unsubscribe from this group and stop receiving emails from it, send an
> >>> email to syzkaller-bugs+unsubscribe@googlegroups.com.
> >>> To view this discussion on the web visit
> >>> https://groups.google.com/d/msgid/syzkaller-bugs/001a1143fd00a8cc790560b0b552%40google.com.
> >>> For more options, visit https://groups.google.com/d/optout.
> >>
> >>
> >> This looks like +rtnetlink issue.
> >>
> >
> > Same with this one, perhaps related to / fixed by:
> >     http://patchwork.ozlabs.org/patch/850957/
> >
> 
> 
> 
> Looking at the log, this one seems to be an infinite loop in SCTP code
> with console output in it. Kernel is busy printing gazilion of:

Do you have a link for such log? I don't seem to have received the
initial syzbot email, so I don't have its attachments. Or if you may,
please fwd it to me/list.

Thanks.

> 
> [  176.491099] sctp: sctp_transport_update_pmtu: Reported pmtu 508 too
> low, using default minimum of 512
> ** 110 printk messages dropped **
> [  176.503409] sctp: sctp_transport_update_pmtu: Reported pmtu 508 too
> low, using default minimum of 512
> ** 103 printk messages dropped **
> ...
> [  246.742374] sctp: sctp_transport_update_pmtu: Reported pmtu 508 too
> low, using default minimum of 512
> [  246.742484] sctp: sctp_transport_update_pmtu: Reported pmtu 508 too
> low, using default minimum of 512
> [  246.742590] sctp: sctp_transport_update_pmtu: Reported pmtu 508 too
> low, using default minimum of 512
> 
> Looks like a different issue.
> 

^ permalink raw reply

* Driver i40e issues changing NIC queue runtime under high-load
From: Jesper Dangaard Brouer @ 2017-12-22 11:04 UTC (permalink / raw)
  To: Jeff Kirsher, Björn Töpel, netdev@vger.kernel.org,
	intel-wired-lan
  Cc: brouer, Karlsson, Magnus

Hi Intel,

I discovered an issue with the driver i40e, when changing the number
of NIC queues, while running a high-load packet generator, and while
having an XDP program loaded.

Tested on clean latest net-next kernel at commit 0a80f0c26bf5
 - kernel 4.15.0-rc3-net-next-01003-g0a80f0c26bf5

The NIC goes into a fault state after reporting "PF reset failed, -15"
in dmesg. See below:

 i40e 0000:04:00.0: PF reset failed, -15
 i40e 0000:04:00.0: User requested queue count/HW max RSS count:  2/64
 i40e 0000:04:00.0: ignoring delete macvlan error on PF, err I40E_ERR_QUEUE_EMPTY, aq_err OK
 i40e 0000:04:00.0: PF reset failed, -15

The net_device is in a strange state, with ifconfig showing all zero
counters.  The driver ethtool stats show packets, but nothing reach
the kernel. Loading a new xdp prog also shows zero counters (thus NIC
HW must drop these packets).

The workaround is to wait for a long while, and then change the number
of queues again.
 * If it didn't work you see:
     "i40e 0000:04:00.0: PF reset failed, -15"
 * If it worked you see:
     "i40e 0000:04:00.0: User requested queue count/HW max RSS count:  6/64"

Could some Intel people take a closer look, and explain why the HW goes
into this state? (and explain why it recovers...)

Reproducer setup info:
----------------------
Running xdp program: samples/bpf/xdp1

Tested on latest net-next kernel at commit 0a80f0c26bf5, clean kernel
without any of my patches.
 - kernel 4.15.0-rc3-net-next-01003-g0a80f0c26bf5

Packet generator script: pktgen_sample04_many_flows.sh
 with 12 threads (-t12) generating arround 12 Mpps.

Command used for changing NIC queues (--set-channels|-L):

 ethtool -L i40e1 combined 2

The NIC ethtool stats report RX packets, but nothing reach the kernel:

 Show adapter(s) (i40e1) statistics (ONLY that changed!)
 Ethtool(i40e1   ) stat:    809566977 (    809,566,977) <= port.rx_bytes /sec
 Ethtool(i40e1   ) stat:     12649480 (     12,649,480) <= port.rx_size_64 /sec
 Ethtool(i40e1   ) stat:     12649479 (     12,649,479) <= port.rx_unicast /sec

-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer

Could some people take a closer look, wh

^ permalink raw reply

* Re: INFO: task hung in bpf_exit_net
From: Dmitry Vyukov @ 2017-12-22 10:58 UTC (permalink / raw)
  To: David Ahern
  Cc: syzbot, LKML, Ingo Molnar, Peter Zijlstra, syzkaller-bugs,
	David Miller, Florian Westphal, Daniel Borkmann, Xin Long,
	jakub.kicinski, mschiffer, Vladislav Yasevich, Jiri Benc, netdev,
	Neil Horman, linux-sctp
In-Reply-To: <f519a1f0-8166-027e-e063-13aa718ce4e4@gmail.com>

On Tue, Dec 19, 2017 at 7:20 PM, David Ahern <dsahern@gmail.com> wrote:
> On 12/19/17 5:47 AM, Dmitry Vyukov wrote:
>> On Tue, Dec 19, 2017 at 1:36 PM, syzbot
>> <bot+21b498fc12cf2041655f8e1eeae0733807d794b3@syzkaller.appspotmail.com>
>> wrote:
>>> Hello,
>>>
>>> syzkaller hit the following crash on
>>> 7ceb97a071e80f1b5e4cd5a36de135612a836388
>>> git://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/master
>>> compiler: gcc (GCC) 7.1.1 20170620
>>> .config is attached
>>> Raw console output is attached.
>>>
>>> Unfortunately, I don't have any reproducer for this bug yet.
>>>
>>>
>>> sctp: sctp_transport_update_pmtu: Reported pmtu 508 too low, using default
>>> minimum of 512
>>> INFO: task kworker/u4:0:5 blocked for more than 120 seconds.
>>>       Not tainted 4.15.0-rc2-next-20171205+ #59
>>> "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
>>> kworker/u4:0    D15808     5      2 0x80000000
>>> Workqueue: netns cleanup_net
>>> Call Trace:
>>>  context_switch kernel/sched/core.c:2800 [inline]
>>>  __schedule+0x8eb/0x2060 kernel/sched/core.c:3376
>>>  schedule+0xf5/0x430 kernel/sched/core.c:3435
>>>  schedule_preempt_disabled+0x10/0x20 kernel/sched/core.c:3493
>>>  __mutex_lock_common kernel/locking/mutex.c:833 [inline]
>>>  __mutex_lock+0xaad/0x1a80 kernel/locking/mutex.c:893
>>>  mutex_lock_nested+0x16/0x20 kernel/locking/mutex.c:908
>>>  rtnl_lock+0x17/0x20 net/core/rtnetlink.c:74
>>>  tc_action_net_exit include/net/act_api.h:125 [inline]
>>>  bpf_exit_net+0x1a2/0x340 net/sched/act_bpf.c:408
>>>  ops_exit_list.isra.6+0xae/0x150 net/core/net_namespace.c:142
>>>  cleanup_net+0x5c7/0xb60 net/core/net_namespace.c:484
>>>  process_one_work+0xbfd/0x1bc0 kernel/workqueue.c:2113
>>>  worker_thread+0x223/0x1990 kernel/workqueue.c:2247
>>>  kthread+0x37a/0x440 kernel/kthread.c:238
>>>  ret_from_fork+0x24/0x30 arch/x86/entry/entry_64.S:517
>>>
>>> Showing all locks held in the system:
>>> 4 locks held by kworker/u4:0/5:
>>>  #0:  ((wq_completion)"%s""netns"){+.+.}, at: [<00000000b9f061a2>]
>>> __write_once_size include/linux/compiler.h:212 [inline]
>>>  #0:  ((wq_completion)"%s""netns"){+.+.}, at: [<00000000b9f061a2>]
>>> atomic64_set arch/x86/include/asm/atomic64_64.h:34 [inline]
>>>  #0:  ((wq_completion)"%s""netns"){+.+.}, at: [<00000000b9f061a2>]
>>> atomic_long_set include/asm-generic/atomic-long.h:57 [inline]
>>>  #0:  ((wq_completion)"%s""netns"){+.+.}, at: [<00000000b9f061a2>]
>>> set_work_data kernel/workqueue.c:619 [inline]
>>>  #0:  ((wq_completion)"%s""netns"){+.+.}, at: [<00000000b9f061a2>]
>>> set_work_pool_and_clear_pending kernel/workqueue.c:646 [inline]
>>>  #0:  ((wq_completion)"%s""netns"){+.+.}, at: [<00000000b9f061a2>]
>>> process_one_work+0xad4/0x1bc0 kernel/workqueue.c:2084
>>>  #1:  (net_cleanup_work){+.+.}, at: [<000000006c7c48a3>]
>>> process_one_work+0xb2f/0x1bc0 kernel/workqueue.c:2088
>>>  #2:  (net_mutex){+.+.}, at: [<00000000bf4709f3>] cleanup_net+0x247/0xb60
>>> net/core/net_namespace.c:450
>>>  #3:  (rtnl_mutex){+.+.}, at: [<0000000053390f0b>] rtnl_lock+0x17/0x20
>>> net/core/rtnetlink.c:74
>>> 3 locks held by kworker/1:0/17:
>>>  #0:  ((wq_completion)"%s"("ipv6_addrconf")){+.+.}, at: [<00000000b9f061a2>]
>>> __write_once_size include/linux/compiler.h:212 [inline]
>>>  #0:  ((wq_completion)"%s"("ipv6_addrconf")){+.+.}, at: [<00000000b9f061a2>]
>>> atomic64_set arch/x86/include/asm/atomic64_64.h:34 [inline]
>>>  #0:  ((wq_completion)"%s"("ipv6_addrconf")){+.+.}, at: [<00000000b9f061a2>]
>>> atomic_long_set include/asm-generic/atomic-long.h:57 [inline]
>>>  #0:  ((wq_completion)"%s"("ipv6_addrconf")){+.+.}, at: [<00000000b9f061a2>]
>>> set_work_data kernel/workqueue.c:619 [inline]
>>>  #0:  ((wq_completion)"%s"("ipv6_addrconf")){+.+.}, at: [<00000000b9f061a2>]
>>> set_work_pool_and_clear_pending kernel/workqueue.c:646 [inline]
>>>  #0:  ((wq_completion)"%s"("ipv6_addrconf")){+.+.}, at: [<00000000b9f061a2>]
>>> process_one_work+0xad4/0x1bc0 kernel/workqueue.c:2084
>>>  #1:  ((addr_chk_work).work){+.+.}, at: [<000000006c7c48a3>]
>>> process_one_work+0xb2f/0x1bc0 kernel/workqueue.c:2088
>>>  #2:  (rtnl_mutex){+.+.}, at: [<0000000053390f0b>] rtnl_lock+0x17/0x20
>>> net/core/rtnetlink.c:74
>>> 2 locks held by khungtaskd/675:
>>>  #0:  (rcu_read_lock){....}, at: [<00000000587c8471>]
>>> check_hung_uninterruptible_tasks kernel/hung_task.c:175 [inline]
>>>  #0:  (rcu_read_lock){....}, at: [<00000000587c8471>] watchdog+0x1c5/0xd60
>>> kernel/hung_task.c:249
>>>  #1:  (tasklist_lock){.+.+}, at: [<000000005288685e>]
>>> debug_show_all_locks+0xd3/0x400 kernel/locking/lockdep.c:4554
>>> 1 lock held by rsyslogd/2974:
>>>  #0:  (&f->f_pos_lock){+.+.}, at: [<0000000011e00499>]
>>> __fdget_pos+0x131/0x1a0 fs/file.c:770
>>> 2 locks held by getty/3056:
>>>  #0:  (&tty->ldisc_sem){++++}, at: [<00000000b9fd70a9>]
>>> ldsem_down_read+0x37/0x40 drivers/tty/tty_ldsem.c:365
>>>  #1:  (&ldata->atomic_read_lock){+.+.}, at: [<00000000abb3bd08>]
>>> n_tty_read+0x2f2/0x1a10 drivers/tty/n_tty.c:2131
>>> 2 locks held by getty/3057:
>>>  #0:  (&tty->ldisc_sem){++++}, at: [<00000000b9fd70a9>]
>>> ldsem_down_read+0x37/0x40 drivers/tty/tty_ldsem.c:365
>>>  #1:  (&ldata->atomic_read_lock){+.+.}, at: [<00000000abb3bd08>]
>>> n_tty_read+0x2f2/0x1a10 drivers/tty/n_tty.c:2131
>>> 2 locks held by getty/3058:
>>>  #0:  (&tty->ldisc_sem){++++}, at: [<00000000b9fd70a9>]
>>> ldsem_down_read+0x37/0x40 drivers/tty/tty_ldsem.c:365
>>>  #1:  (&ldata->atomic_read_lock){+.+.}, at: [<00000000abb3bd08>]
>>> n_tty_read+0x2f2/0x1a10 drivers/tty/n_tty.c:2131
>>> 2 locks held by getty/3059:
>>>  #0:  (&tty->ldisc_sem){++++}, at: [<00000000b9fd70a9>]
>>> ldsem_down_read+0x37/0x40 drivers/tty/tty_ldsem.c:365
>>>  #1:  (&ldata->atomic_read_lock){+.+.}, at: [<00000000abb3bd08>]
>>> n_tty_read+0x2f2/0x1a10 drivers/tty/n_tty.c:2131
>>> 2 locks held by getty/3060:
>>>  #0:  (&tty->ldisc_sem){++++}, at: [<00000000b9fd70a9>]
>>> ldsem_down_read+0x37/0x40 drivers/tty/tty_ldsem.c:365
>>>  #1:  (&ldata->atomic_read_lock){+.+.}, at: [<00000000abb3bd08>]
>>> n_tty_read+0x2f2/0x1a10 drivers/tty/n_tty.c:2131
>>> 2 locks held by getty/3061:
>>>  #0:  (&tty->ldisc_sem){++++}, at: [<00000000b9fd70a9>]
>>> ldsem_down_read+0x37/0x40 drivers/tty/tty_ldsem.c:365
>>>  #1:  (&ldata->atomic_read_lock){+.+.}, at: [<00000000abb3bd08>]
>>> n_tty_read+0x2f2/0x1a10 drivers/tty/n_tty.c:2131
>>> 2 locks held by getty/3062:
>>>  #0:  (&tty->ldisc_sem){++++}, at: [<00000000b9fd70a9>]
>>> ldsem_down_read+0x37/0x40 drivers/tty/tty_ldsem.c:365
>>>  #1:  (&ldata->atomic_read_lock){+.+.}, at: [<00000000abb3bd08>]
>>> n_tty_read+0x2f2/0x1a10 drivers/tty/n_tty.c:2131
>>>
>>> =============================================
>>>
>>> NMI backtrace for cpu 0
>>> CPU: 0 PID: 675 Comm: khungtaskd Not tainted 4.15.0-rc2-next-20171205+ #59
>>> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
>>> Google 01/01/2011
>>> Call Trace:
>>>  __dump_stack lib/dump_stack.c:17 [inline]
>>>  dump_stack+0x194/0x257 lib/dump_stack.c:53
>>>  nmi_cpu_backtrace+0x1d2/0x210 lib/nmi_backtrace.c:103
>>>  nmi_trigger_cpumask_backtrace+0x122/0x180 lib/nmi_backtrace.c:62
>>>  arch_trigger_cpumask_backtrace+0x14/0x20 arch/x86/kernel/apic/hw_nmi.c:38
>>>  trigger_all_cpu_backtrace include/linux/nmi.h:138 [inline]
>>>  check_hung_task kernel/hung_task.c:132 [inline]
>>>  check_hung_uninterruptible_tasks kernel/hung_task.c:190 [inline]
>>>  watchdog+0x90c/0xd60 kernel/hung_task.c:249
>>>  kthread+0x37a/0x440 kernel/kthread.c:238
>>>  ret_from_fork+0x24/0x30 arch/x86/entry/entry_64.S:517
>>> Sending NMI from CPU 0 to CPUs 1:
>>> NMI backtrace for cpu 1
>>> CPU: 1 PID: 13156 Comm: syz-executor7 Not tainted 4.15.0-rc2-next-20171205+
>>> #59
>>> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
>>> Google 01/01/2011
>>> task: 000000005209c120 task.stack: 00000000ffaab0e8
>>> RIP: 0010:inb arch/x86/include/asm/io.h:348 [inline]
>>> RIP: 0010:io_serial_in+0x6b/0x90 drivers/tty/serial/8250/8250_port.c:434
>>> RSP: 0018:ffff8801c0a16e70 EFLAGS: 00000002
>>> RAX: dffffc0000000000 RBX: 00000000000003fd RCX: 0000000000000000
>>> RDX: 00000000000003fd RSI: ffffc90003745000 RDI: ffffffff87cf1a40
>>> RBP: ffff8801c0a16e80 R08: 0000000000000005 R09: 000000000000000c
>>> R10: 0000000000000000 R11: ffffffff8748dd20 R12: ffffffff87cf1a00
>>> R13: 0000000000000020 R14: fffffbfff0f9e387 R15: fffffbfff0f9e34a
>>> FS:  00007f6d52e3f700(0000) GS:ffff8801db500000(0000) knlGS:0000000000000000
>>> CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>>> CR2: 000000c42005de80 CR3: 00000001c368a000 CR4: 00000000001406e0
>>> DR0: 0000000020000000 DR1: 0000000000000000 DR2: 0000000000000000
>>> DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000600
>>> Call Trace:
>>>  serial_in drivers/tty/serial/8250/8250.h:111 [inline]
>>>  wait_for_xmitr+0x93/0x1e0 drivers/tty/serial/8250/8250_port.c:2033
>>>  serial8250_console_putchar+0x1f/0x60
>>> drivers/tty/serial/8250/8250_port.c:3170
>>>  uart_console_write+0xac/0xe0 drivers/tty/serial/serial_core.c:1858
>>>  serial8250_console_write+0x647/0xa20
>>> drivers/tty/serial/8250/8250_port.c:3236
>>>  univ8250_console_write+0x5f/0x70 drivers/tty/serial/8250/8250_core.c:590
>>>  call_console_drivers kernel/printk/printk.c:1574 [inline]
>>>  console_unlock+0x788/0xd70 kernel/printk/printk.c:2233
>>>  vprintk_emit+0x4ad/0x590 kernel/printk/printk.c:1757
>>>  vprintk_default+0x28/0x30 kernel/printk/printk.c:1796
>>>  vprintk_func+0x57/0xc0 kernel/printk/printk_safe.c:379
>>>  printk+0xaa/0xca kernel/printk/printk.c:1829
>>>  nla_parse+0x374/0x3d0 lib/nlattr.c:257
>>>  nlmsg_parse include/net/netlink.h:398 [inline]
>>>  nl80211_dump_wiphy_parse.isra.37.constprop.83+0x138/0x5c0
>>> net/wireless/nl80211.c:1920
>>>  nl80211_dump_interface+0x596/0x820 net/wireless/nl80211.c:2660
>>>  genl_lock_dumpit+0x68/0x90 net/netlink/genetlink.c:480
>>>  netlink_dump+0x48c/0xce0 net/netlink/af_netlink.c:2186
>>>  __netlink_dump_start+0x4f0/0x6d0 net/netlink/af_netlink.c:2283
>>>  genl_family_rcv_msg+0xd27/0xfc0 net/netlink/genetlink.c:548
>>>  genl_rcv_msg+0xb2/0x140 net/netlink/genetlink.c:624
>>>  netlink_rcv_skb+0x216/0x440 net/netlink/af_netlink.c:2405
>>>  genl_rcv+0x28/0x40 net/netlink/genetlink.c:635
>>>  netlink_unicast_kernel net/netlink/af_netlink.c:1272 [inline]
>>>  netlink_unicast+0x4e8/0x6f0 net/netlink/af_netlink.c:1298
>>>  netlink_sendmsg+0xa4a/0xe70 net/netlink/af_netlink.c:1861
>>>  sock_sendmsg_nosec net/socket.c:636 [inline]
>>>  sock_sendmsg+0xca/0x110 net/socket.c:646
>>>  sock_write_iter+0x320/0x5e0 net/socket.c:915
>>>  call_write_iter include/linux/fs.h:1776 [inline]
>>>  new_sync_write fs/read_write.c:469 [inline]
>>>  __vfs_write+0x68a/0x970 fs/read_write.c:482
>>>  vfs_write+0x18f/0x510 fs/read_write.c:544
>>>  SYSC_write fs/read_write.c:589 [inline]
>>>  SyS_write+0xef/0x220 fs/read_write.c:581
>>>  entry_SYSCALL_64_fastpath+0x1f/0x96
>>> RIP: 0033:0x4529d9
>>> RSP: 002b:00007f6d52e3ec58 EFLAGS: 00000212 ORIG_RAX: 0000000000000001
>>> RAX: ffffffffffffffda RBX: 00007f6d52e3f700 RCX: 00000000004529d9
>>> RDX: 0000000000000024 RSI: 0000000020454000 RDI: 0000000000000016
>>> RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000
>>> R10: 0000000000000000 R11: 0000000000000212 R12: 0000000000000000
>>> R13: 0000000000a6f7ff R14: 00007f6d52e3f9c0 R15: 0000000000000000
>>> Code: 24 d9 00 00 00 49 8d 7c 24 40 48 b8 00 00 00 00 00 fc ff df 48 89 fa
>>> 48 c1 ea 03 d3 e3 80 3c 02 00 75 17 41 03 5c 24 40 89 da ec <5b> 0f b6 c0 41
>>> 5c 5d c3 e8 38 b0 18 ff eb c2 e8 91 b0 18 ff eb
>>>
>>>
>>> ---
>>> This bug is generated by a dumb bot. It may contain errors.
>>> See https://goo.gl/tpsmEJ for details.
>>> Direct all questions to syzkaller@googlegroups.com.
>>> Please credit me with: Reported-by: syzbot <syzkaller@googlegroups.com>
>>>
>>> syzbot will keep track of this bug report.
>>> Once a fix for this bug is merged into any tree, reply to this email with:
>>> #syz fix: exact-commit-title
>>> To mark this as a duplicate of another syzbot report, please reply with:
>>> #syz dup: exact-subject-of-another-report
>>> If it's a one-off invalid bug report, please reply with:
>>> #syz invalid
>>> Note: if the crash happens again, it will cause creation of a new bug
>>> report.
>>> Note: all commands must start from beginning of the line in the email body.
>>>
>>> --
>>> You received this message because you are subscribed to the Google Groups
>>> "syzkaller-bugs" group.
>>> To unsubscribe from this group and stop receiving emails from it, send an
>>> email to syzkaller-bugs+unsubscribe@googlegroups.com.
>>> To view this discussion on the web visit
>>> https://groups.google.com/d/msgid/syzkaller-bugs/001a1143fd00a8cc790560b0b552%40google.com.
>>> For more options, visit https://groups.google.com/d/optout.
>>
>>
>> This looks like +rtnetlink issue.
>>
>
> Same with this one, perhaps related to / fixed by:
>     http://patchwork.ozlabs.org/patch/850957/
>



Looking at the log, this one seems to be an infinite loop in SCTP code
with console output in it. Kernel is busy printing gazilion of:

[  176.491099] sctp: sctp_transport_update_pmtu: Reported pmtu 508 too
low, using default minimum of 512
** 110 printk messages dropped **
[  176.503409] sctp: sctp_transport_update_pmtu: Reported pmtu 508 too
low, using default minimum of 512
** 103 printk messages dropped **
...
[  246.742374] sctp: sctp_transport_update_pmtu: Reported pmtu 508 too
low, using default minimum of 512
[  246.742484] sctp: sctp_transport_update_pmtu: Reported pmtu 508 too
low, using default minimum of 512
[  246.742590] sctp: sctp_transport_update_pmtu: Reported pmtu 508 too
low, using default minimum of 512

Looks like a different issue.

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox