Netdev List
 help / color / mirror / Atom feed
* RE: [PATCH v2] net/socket: fix GCC8+ Wpacked-not-aligned warnings
From: David Laight @ 2019-07-30 14:59 UTC (permalink / raw)
  To: 'Qian Cai', davem@davemloft.net
  Cc: vyasevich@gmail.com, nhorman@tuxdriver.com,
	marcelo.leitner@gmail.com, linux-sctp@vger.kernel.org,
	netdev@vger.kernel.org, linux-kernel@vger.kernel.org
In-Reply-To: <1564497488-3030-1-git-send-email-cai@lca.pw>

From: Qian Cai
> Sent: 30 July 2019 15:38
...
> Use an implicit alignment for "struct __kernel_sockaddr_storage" so it
> can keep the same alignments as a member in both packed and un-packed
> structures without breaking UAPI.
> 
> Suggested-by: David Laight <David.Laight@ACULAB.COM>
...

Although I suggested it needs a bit of tidy up.

Add a comment maybe:

/* The definition uses anonymous union and struct in order to
 * control the default alignment. */

>  struct __kernel_sockaddr_storage {
> -	__kernel_sa_family_t	ss_family;		/* address family */
> -	/* Following field(s) are implementation specific */
> -	char		__data[_K_SS_MAXSIZE - sizeof(unsigned short)];
> +	union {
> +		void *__align; /* implementation specific desired alignment */

Move the 'void *' below the struct so the first member is the 'public one.

> +		struct {
> +			__kernel_sa_family_t	ss_family; /* address family */
> +			/* Following field(s) are implementation specific */
> +			char __data[_K_SS_MAXSIZE - sizeof(unsigned short)];
>  				/* space to achieve desired size, */
>  				/* _SS_MAXSIZE value minus size of ss_family */
> -} __attribute__ ((aligned(_K_SS_ALIGNSIZE)));	/* force desired alignment */
> +			};
> +		};
> +};
> 
>  #endif /* _UAPI_LINUX_SOCKET_H */
> --
> 1.8.3.1

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)


^ permalink raw reply

* [PATCH net-next] rxrpc: Fix -Wframe-larger-than= warnings from on-stack crypto
From: David Howells @ 2019-07-30 14:56 UTC (permalink / raw)
  To: netdev; +Cc: dhowells, linux-afs, linux-kernel, clang-built-linux, arnd,
	keescook

rxkad sometimes triggers a warning about oversized stack frames when
building with clang for a 32-bit architecture:

net/rxrpc/rxkad.c:243:12: error: stack frame size of 1088 bytes in function 'rxkad_secure_packet' [-Werror,-Wframe-larger-than=]
net/rxrpc/rxkad.c:501:12: error: stack frame size of 1088 bytes in function 'rxkad_verify_packet' [-Werror,-Wframe-larger-than=]

The problem is the combination of SYNC_SKCIPHER_REQUEST_ON_STACK() in
rxkad_verify_packet()/rxkad_secure_packet() with the relatively large
scatterlist in rxkad_verify_packet_1()/rxkad_secure_packet_encrypt().

The warning does not show up when using gcc, which does not inline the
functions as aggressively, but the problem is still the same.

Allocate the cipher buffers from the slab instead, caching the allocated
packet crypto request memory used for DATA packet crypto in the rxrpc_call
struct.

Fixes: 17926a79320a ("[AF_RXRPC]: Provide secure RxRPC sockets for use by userspace and kernel both")
Reported-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
cc: Herbert Xu <herbert@gondor.apana.org.au>
---

 net/rxrpc/ar-internal.h |    4 ++
 net/rxrpc/call_object.c |    4 +-
 net/rxrpc/insecure.c    |    5 ++
 net/rxrpc/rxkad.c       |  103 ++++++++++++++++++++++++++++++++++++++---------
 4 files changed, 96 insertions(+), 20 deletions(-)

diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 80335b4ee4fd..bea2a02850af 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -226,6 +226,9 @@ struct rxrpc_security {
 	int (*verify_packet)(struct rxrpc_call *, struct sk_buff *,
 			     unsigned int, unsigned int, rxrpc_seq_t, u16);
 
+	/* Free crypto request on a call */
+	void (*free_call_crypto)(struct rxrpc_call *);
+
 	/* Locate the data in a received packet that has been verified. */
 	void (*locate_data)(struct rxrpc_call *, struct sk_buff *,
 			    unsigned int *, unsigned int *);
@@ -557,6 +560,7 @@ struct rxrpc_call {
 	unsigned long		expect_term_by;	/* When we expect call termination by */
 	u32			next_rx_timo;	/* Timeout for next Rx packet (jif) */
 	u32			next_req_timo;	/* Timeout for next Rx request packet (jif) */
+	struct skcipher_request	*cipher_req;	/* Packet cipher request buffer */
 	struct timer_list	timer;		/* Combined event timer */
 	struct work_struct	processor;	/* Event processor */
 	rxrpc_notify_rx_t	notify_rx;	/* kernel service Rx notification function */
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 217b12be9e08..60cbc81dc461 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -476,8 +476,10 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call)
 
 	_debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, conn);
 
-	if (conn)
+	if (conn) {
 		rxrpc_disconnect_call(call);
+		conn->security->free_call_crypto(call);
+	}
 
 	for (i = 0; i < RXRPC_RXTX_BUFF_SIZE; i++) {
 		rxrpc_free_skb(call->rxtx_buffer[i],
diff --git a/net/rxrpc/insecure.c b/net/rxrpc/insecure.c
index a29d26c273b5..f6c59f5fae9d 100644
--- a/net/rxrpc/insecure.c
+++ b/net/rxrpc/insecure.c
@@ -33,6 +33,10 @@ static int none_verify_packet(struct rxrpc_call *call, struct sk_buff *skb,
 	return 0;
 }
 
+static void none_free_call_crypto(struct rxrpc_call *call)
+{
+}
+
 static void none_locate_data(struct rxrpc_call *call, struct sk_buff *skb,
 			     unsigned int *_offset, unsigned int *_len)
 {
@@ -83,6 +87,7 @@ const struct rxrpc_security rxrpc_no_security = {
 	.exit				= none_exit,
 	.init_connection_security	= none_init_connection_security,
 	.prime_packet_security		= none_prime_packet_security,
+	.free_call_crypto		= none_free_call_crypto,
 	.secure_packet			= none_secure_packet,
 	.verify_packet			= none_verify_packet,
 	.locate_data			= none_locate_data,
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index ae8cd8926456..dbb109da1835 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -43,6 +43,7 @@ struct rxkad_level2_hdr {
  * packets
  */
 static struct crypto_sync_skcipher *rxkad_ci;
+static struct skcipher_request *rxkad_ci_req;
 static DEFINE_MUTEX(rxkad_ci_mutex);
 
 /*
@@ -99,8 +100,8 @@ static int rxkad_init_connection_security(struct rxrpc_connection *conn)
  */
 static int rxkad_prime_packet_security(struct rxrpc_connection *conn)
 {
+	struct skcipher_request *req;
 	struct rxrpc_key_token *token;
-	SYNC_SKCIPHER_REQUEST_ON_STACK(req, conn->cipher);
 	struct scatterlist sg;
 	struct rxrpc_crypt iv;
 	__be32 *tmpbuf;
@@ -115,6 +116,12 @@ static int rxkad_prime_packet_security(struct rxrpc_connection *conn)
 	if (!tmpbuf)
 		return -ENOMEM;
 
+	req = skcipher_request_alloc(&conn->cipher->base, GFP_NOFS);
+	if (!req) {
+		kfree(tmpbuf);
+		return -ENOMEM;
+	}
+
 	token = conn->params.key->payload.data[0];
 	memcpy(&iv, token->kad->session_key, sizeof(iv));
 
@@ -128,7 +135,7 @@ static int rxkad_prime_packet_security(struct rxrpc_connection *conn)
 	skcipher_request_set_callback(req, 0, NULL, NULL);
 	skcipher_request_set_crypt(req, &sg, &sg, tmpsize, iv.x);
 	crypto_skcipher_encrypt(req);
-	skcipher_request_zero(req);
+	skcipher_request_free(req);
 
 	memcpy(&conn->csum_iv, tmpbuf + 2, sizeof(conn->csum_iv));
 	kfree(tmpbuf);
@@ -136,6 +143,35 @@ static int rxkad_prime_packet_security(struct rxrpc_connection *conn)
 	return 0;
 }
 
+/*
+ * Allocate and prepare the crypto request on a call.  For any particular call,
+ * this is called serially for the packets, so no lock should be necessary.
+ */
+static struct skcipher_request *rxkad_get_call_crypto(struct rxrpc_call *call)
+{
+	struct crypto_skcipher *tfm = &call->conn->cipher->base;
+	struct skcipher_request	*cipher_req = call->cipher_req;
+
+	if (!cipher_req) {
+		cipher_req = skcipher_request_alloc(tfm, GFP_NOFS);
+		if (!cipher_req)
+			return NULL;
+		call->cipher_req = cipher_req;
+	}
+
+	return cipher_req;
+}
+
+/*
+ * Clean up the crypto on a call.
+ */
+static void rxkad_free_call_crypto(struct rxrpc_call *call)
+{
+	if (call->cipher_req)
+		skcipher_request_free(call->cipher_req);
+	call->cipher_req = NULL;
+}
+
 /*
  * partially encrypt a packet (level 1 security)
  */
@@ -246,7 +282,7 @@ static int rxkad_secure_packet(struct rxrpc_call *call,
 			       void *sechdr)
 {
 	struct rxrpc_skb_priv *sp;
-	SYNC_SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
+	struct skcipher_request	*req;
 	struct rxrpc_crypt iv;
 	struct scatterlist sg;
 	u32 x, y;
@@ -265,6 +301,10 @@ static int rxkad_secure_packet(struct rxrpc_call *call,
 	if (ret < 0)
 		return ret;
 
+	req = rxkad_get_call_crypto(call);
+	if (!req)
+		return -ENOMEM;
+
 	/* continue encrypting from where we left off */
 	memcpy(&iv, call->conn->csum_iv.x, sizeof(iv));
 
@@ -502,7 +542,7 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb,
 			       unsigned int offset, unsigned int len,
 			       rxrpc_seq_t seq, u16 expected_cksum)
 {
-	SYNC_SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
+	struct skcipher_request	*req;
 	struct rxrpc_crypt iv;
 	struct scatterlist sg;
 	bool aborted;
@@ -515,6 +555,10 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb,
 	if (!call->conn->cipher)
 		return 0;
 
+	req = rxkad_get_call_crypto(call);
+	if (!req)
+		return -ENOMEM;
+
 	/* continue encrypting from where we left off */
 	memcpy(&iv, call->conn->csum_iv.x, sizeof(iv));
 
@@ -747,14 +791,18 @@ static void rxkad_calc_response_checksum(struct rxkad_response *response)
 /*
  * encrypt the response packet
  */
-static void rxkad_encrypt_response(struct rxrpc_connection *conn,
-				   struct rxkad_response *resp,
-				   const struct rxkad_key *s2)
+static int rxkad_encrypt_response(struct rxrpc_connection *conn,
+				  struct rxkad_response *resp,
+				  const struct rxkad_key *s2)
 {
-	SYNC_SKCIPHER_REQUEST_ON_STACK(req, conn->cipher);
+	struct skcipher_request *req;
 	struct rxrpc_crypt iv;
 	struct scatterlist sg[1];
 
+	req = skcipher_request_alloc(&conn->cipher->base, GFP_NOFS);
+	if (!req)
+		return -ENOMEM;
+
 	/* continue encrypting from where we left off */
 	memcpy(&iv, s2->session_key, sizeof(iv));
 
@@ -764,7 +812,8 @@ static void rxkad_encrypt_response(struct rxrpc_connection *conn,
 	skcipher_request_set_callback(req, 0, NULL, NULL);
 	skcipher_request_set_crypt(req, sg, sg, sizeof(resp->encrypted), iv.x);
 	crypto_skcipher_encrypt(req);
-	skcipher_request_zero(req);
+	skcipher_request_free(req);
+	return 0;
 }
 
 /*
@@ -839,8 +888,9 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
 
 	/* calculate the response checksum and then do the encryption */
 	rxkad_calc_response_checksum(resp);
-	rxkad_encrypt_response(conn, resp, token->kad);
-	ret = rxkad_send_response(conn, &sp->hdr, resp, token->kad);
+	ret = rxkad_encrypt_response(conn, resp, token->kad);
+	if (ret == 0)
+		ret = rxkad_send_response(conn, &sp->hdr, resp, token->kad);
 	kfree(resp);
 	return ret;
 
@@ -1017,18 +1067,16 @@ static void rxkad_decrypt_response(struct rxrpc_connection *conn,
 				   struct rxkad_response *resp,
 				   const struct rxrpc_crypt *session_key)
 {
-	SYNC_SKCIPHER_REQUEST_ON_STACK(req, rxkad_ci);
+	struct skcipher_request *req = rxkad_ci_req;
 	struct scatterlist sg[1];
 	struct rxrpc_crypt iv;
 
 	_enter(",,%08x%08x",
 	       ntohl(session_key->n[0]), ntohl(session_key->n[1]));
 
-	ASSERT(rxkad_ci != NULL);
-
 	mutex_lock(&rxkad_ci_mutex);
 	if (crypto_sync_skcipher_setkey(rxkad_ci, session_key->x,
-				   sizeof(*session_key)) < 0)
+					sizeof(*session_key)) < 0)
 		BUG();
 
 	memcpy(&iv, session_key, sizeof(iv));
@@ -1222,10 +1270,26 @@ static void rxkad_clear(struct rxrpc_connection *conn)
  */
 static int rxkad_init(void)
 {
+	struct crypto_sync_skcipher *tfm;
+	struct skcipher_request *req;
+
 	/* pin the cipher we need so that the crypto layer doesn't invoke
 	 * keventd to go get it */
-	rxkad_ci = crypto_alloc_sync_skcipher("pcbc(fcrypt)", 0, 0);
-	return PTR_ERR_OR_ZERO(rxkad_ci);
+	tfm = crypto_alloc_sync_skcipher("pcbc(fcrypt)", 0, 0);
+	if (IS_ERR(tfm))
+		return PTR_ERR(tfm);
+
+	req = skcipher_request_alloc(&tfm->base, GFP_KERNEL);
+	if (!req)
+		goto nomem_tfm;
+
+	rxkad_ci_req = req;
+	rxkad_ci = tfm;
+	return 0;
+
+nomem_tfm:
+	crypto_free_sync_skcipher(tfm);
+	return -ENOMEM;
 }
 
 /*
@@ -1233,8 +1297,8 @@ static int rxkad_init(void)
  */
 static void rxkad_exit(void)
 {
-	if (rxkad_ci)
-		crypto_free_sync_skcipher(rxkad_ci);
+	crypto_free_sync_skcipher(rxkad_ci);
+	skcipher_request_free(rxkad_ci_req);
 }
 
 /*
@@ -1249,6 +1313,7 @@ const struct rxrpc_security rxkad = {
 	.prime_packet_security		= rxkad_prime_packet_security,
 	.secure_packet			= rxkad_secure_packet,
 	.verify_packet			= rxkad_verify_packet,
+	.free_call_crypto		= rxkad_free_call_crypto,
 	.locate_data			= rxkad_locate_data,
 	.issue_challenge		= rxkad_issue_challenge,
 	.respond_to_challenge		= rxkad_respond_to_challenge,


^ permalink raw reply related

* [PATCH net 2/2] rxrpc: Fix the lack of notification when sendmsg() fails on a DATA packet
From: David Howells @ 2019-07-30 14:50 UTC (permalink / raw)
  To: netdev; +Cc: dhowells, linux-afs, linux-kernel
In-Reply-To: <156449821120.9558.2821927090314866621.stgit@warthog.procyon.org.uk>

Fix the fact that a notification isn't sent to the recvmsg side to indicate
a call failed when sendmsg() fails to transmit a DATA packet with the error
ENETUNREACH, EHOSTUNREACH or ECONNREFUSED.

Without this notification, the afs client just sits there waiting for the
call to complete in some manner (which it's not now going to do), which
also pins the rxrpc call in place.

This can be seen if the client has a scope-level IPv6 address, but not a
global-level IPv6 address, and we try and transmit an operation to a
server's IPv6 address.

Looking in /proc/net/rxrpc/calls shows completed calls just sat there with
an abort code of RX_USER_ABORT and an error code of -ENETUNREACH.

Fixes: c54e43d752c7 ("rxrpc: Fix missing start of call timeout")
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Marc Dionne <marc.dionne@auristor.com>
Reviewed-by: Jeffrey Altman <jaltman@auristor.com>
---

 net/rxrpc/sendmsg.c |    1 +
 1 file changed, 1 insertion(+)

diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 5d3f33ce6d41..bae14438f869 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -226,6 +226,7 @@ static int rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
 			rxrpc_set_call_completion(call,
 						  RXRPC_CALL_LOCAL_ERROR,
 						  0, ret);
+			rxrpc_notify_socket(call);
 			goto out;
 		}
 		_debug("need instant resend %d", ret);


^ permalink raw reply related

* [PATCH net 1/2] rxrpc: Fix potential deadlock
From: David Howells @ 2019-07-30 14:50 UTC (permalink / raw)
  To: netdev; +Cc: dhowells, linux-afs, linux-kernel
In-Reply-To: <156449821120.9558.2821927090314866621.stgit@warthog.procyon.org.uk>

There is a potential deadlock in rxrpc_peer_keepalive_dispatch() whereby
rxrpc_put_peer() is called with the peer_hash_lock held, but if it reduces
the peer's refcount to 0, rxrpc_put_peer() calls __rxrpc_put_peer() - which
the tries to take the already held lock.

Fix this by providing a version of rxrpc_put_peer() that can be called in
situations where the lock is already held.

The bug may produce the following lockdep report:

============================================
WARNING: possible recursive locking detected
5.2.0-next-20190718 #41 Not tainted
--------------------------------------------
kworker/0:3/21678 is trying to acquire lock:
00000000aa5eecdf (&(&rxnet->peer_hash_lock)->rlock){+.-.}, at: spin_lock_bh
/./include/linux/spinlock.h:343 [inline]
00000000aa5eecdf (&(&rxnet->peer_hash_lock)->rlock){+.-.}, at:
__rxrpc_put_peer /net/rxrpc/peer_object.c:415 [inline]
00000000aa5eecdf (&(&rxnet->peer_hash_lock)->rlock){+.-.}, at:
rxrpc_put_peer+0x2d3/0x6a0 /net/rxrpc/peer_object.c:435

but task is already holding lock:
00000000aa5eecdf (&(&rxnet->peer_hash_lock)->rlock){+.-.}, at: spin_lock_bh
/./include/linux/spinlock.h:343 [inline]
00000000aa5eecdf (&(&rxnet->peer_hash_lock)->rlock){+.-.}, at:
rxrpc_peer_keepalive_dispatch /net/rxrpc/peer_event.c:378 [inline]
00000000aa5eecdf (&(&rxnet->peer_hash_lock)->rlock){+.-.}, at:
rxrpc_peer_keepalive_worker+0x6b3/0xd02 /net/rxrpc/peer_event.c:430

Fixes: 330bdcfadcee ("rxrpc: Fix the keepalive generator [ver #2]")
Reported-by: syzbot+72af434e4b3417318f84@syzkaller.appspotmail.com
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Marc Dionne <marc.dionne@auristor.com>
Reviewed-by: Jeffrey Altman <jaltman@auristor.com>
---

 net/rxrpc/ar-internal.h |    1 +
 net/rxrpc/peer_event.c  |    2 +-
 net/rxrpc/peer_object.c |   18 ++++++++++++++++++
 3 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 80335b4ee4fd..822f45386e31 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -1061,6 +1061,7 @@ void rxrpc_destroy_all_peers(struct rxrpc_net *);
 struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *);
 struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *);
 void rxrpc_put_peer(struct rxrpc_peer *);
+void rxrpc_put_peer_locked(struct rxrpc_peer *);
 
 /*
  * proc.c
diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c
index 9f2f45c09e58..7666ec72d37e 100644
--- a/net/rxrpc/peer_event.c
+++ b/net/rxrpc/peer_event.c
@@ -378,7 +378,7 @@ static void rxrpc_peer_keepalive_dispatch(struct rxrpc_net *rxnet,
 		spin_lock_bh(&rxnet->peer_hash_lock);
 		list_add_tail(&peer->keepalive_link,
 			      &rxnet->peer_keepalive[slot & mask]);
-		rxrpc_put_peer(peer);
+		rxrpc_put_peer_locked(peer);
 	}
 
 	spin_unlock_bh(&rxnet->peer_hash_lock);
diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c
index 9d3ce81cf8ae..9c3ac96f71cb 100644
--- a/net/rxrpc/peer_object.c
+++ b/net/rxrpc/peer_object.c
@@ -436,6 +436,24 @@ void rxrpc_put_peer(struct rxrpc_peer *peer)
 	}
 }
 
+/*
+ * Drop a ref on a peer record where the caller already holds the
+ * peer_hash_lock.
+ */
+void rxrpc_put_peer_locked(struct rxrpc_peer *peer)
+{
+	const void *here = __builtin_return_address(0);
+	int n;
+
+	n = atomic_dec_return(&peer->usage);
+	trace_rxrpc_peer(peer, rxrpc_peer_put, n, here);
+	if (n == 0) {
+		hash_del_rcu(&peer->hash_link);
+		list_del_init(&peer->keepalive_link);
+		kfree_rcu(peer, rcu);
+	}
+}
+
 /*
  * Make sure all peer records have been discarded.
  */


^ permalink raw reply related

* [PATCH net 0/2] rxrpc: Fixes
From: David Howells @ 2019-07-30 14:50 UTC (permalink / raw)
  To: netdev; +Cc: dhowells, linux-afs, linux-kernel


Here are a couple of fixes for rxrpc:

 (1) Fix a potential deadlock in the peer keepalive dispatcher.

 (2) Fix a missing notification when a UDP sendmsg error occurs in rxrpc.


The patches are tagged here:

	git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs.git
	rxrpc-fixes-20190730

and can also be found on the following branch:

	http://git.kernel.org/cgit/linux/kernel/git/dhowells/linux-fs.git/log/?h=rxrpc-fixes

David
---
David Howells (2):
      rxrpc: Fix potential deadlock
      rxrpc: Fix the lack of notification when sendmsg() fails on a DATA packet


 net/rxrpc/ar-internal.h |    1 +
 net/rxrpc/peer_event.c  |    2 +-
 net/rxrpc/peer_object.c |   18 ++++++++++++++++++
 net/rxrpc/sendmsg.c     |    1 +
 4 files changed, 21 insertions(+), 1 deletion(-)


^ permalink raw reply

* Re: [PATCH v2] net/socket: fix GCC8+ Wpacked-not-aligned warnings
From: Marcelo Ricardo Leitner @ 2019-07-30 14:45 UTC (permalink / raw)
  To: Qian Cai
  Cc: davem, vyasevich, nhorman, David.Laight, linux-sctp, netdev,
	linux-kernel
In-Reply-To: <1564497488-3030-1-git-send-email-cai@lca.pw>

On Tue, Jul 30, 2019 at 10:38:08AM -0400, Qian Cai wrote:
> In file included from ./include/linux/sctp.h:42,
>                  from net/core/skbuff.c:47:
> ./include/uapi/linux/sctp.h:395:1: warning: alignment 4 of 'struct
> sctp_paddr_change' is less than 8 [-Wpacked-not-aligned]
>  } __attribute__((packed, aligned(4)));
>  ^
> ./include/uapi/linux/sctp.h:728:1: warning: alignment 4 of 'struct
> sctp_setpeerprim' is less than 8 [-Wpacked-not-aligned]
>  } __attribute__((packed, aligned(4)));
>  ^
> ./include/uapi/linux/sctp.h:727:26: warning: 'sspp_addr' offset 4 in
> 'struct sctp_setpeerprim' isn't aligned to 8 [-Wpacked-not-aligned]
>   struct sockaddr_storage sspp_addr;
>                           ^~~~~~~~~
> ./include/uapi/linux/sctp.h:741:1: warning: alignment 4 of 'struct
> sctp_prim' is less than 8 [-Wpacked-not-aligned]
>  } __attribute__((packed, aligned(4)));
>  ^
> ./include/uapi/linux/sctp.h:740:26: warning: 'ssp_addr' offset 4 in
> 'struct sctp_prim' isn't aligned to 8 [-Wpacked-not-aligned]
>   struct sockaddr_storage ssp_addr;
>                           ^~~~~~~~
> ./include/uapi/linux/sctp.h:792:1: warning: alignment 4 of 'struct
> sctp_paddrparams' is less than 8 [-Wpacked-not-aligned]
>  } __attribute__((packed, aligned(4)));
>  ^
> ./include/uapi/linux/sctp.h:784:26: warning: 'spp_address' offset 4 in
> 'struct sctp_paddrparams' isn't aligned to 8 [-Wpacked-not-aligned]
>   struct sockaddr_storage spp_address;
>                           ^~~~~~~~~~~
> ./include/uapi/linux/sctp.h:905:1: warning: alignment 4 of 'struct
> sctp_paddrinfo' is less than 8 [-Wpacked-not-aligned]
>  } __attribute__((packed, aligned(4)));
>  ^
> ./include/uapi/linux/sctp.h:899:26: warning: 'spinfo_address' offset 4
> in 'struct sctp_paddrinfo' isn't aligned to 8 [-Wpacked-not-aligned]
>   struct sockaddr_storage spinfo_address;
>                           ^~~~~~~~~~~~~~
> 
> This is because the commit 20c9c825b12f ("[SCTP] Fix SCTP socket options
> to work with 32-bit apps on 64-bit kernels.") added "packed, aligned(4)"
> GCC attributes to some structures but one of the members, i.e, "struct
> sockaddr_storage" in those structures has the attribute,
> "aligned(__alignof__ (struct sockaddr *)" which is 8-byte on 64-bit
> systems, so the commit overwrites the designed alignments for
> "sockaddr_storage".
> 
> To fix this, "struct sockaddr_storage" needs to be aligned to 4-byte as
> it is only used in those packed sctp structure which is part of UAPI,
> and "struct __kernel_sockaddr_storage" is used in some other
> places of UAPI that need not to change alignments in order to not
> breaking userspace.
> 
> Use an implicit alignment for "struct __kernel_sockaddr_storage" so it
> can keep the same alignments as a member in both packed and un-packed
> structures without breaking UAPI.
> 
> Suggested-by: David Laight <David.Laight@ACULAB.COM>
> Signed-off-by: Qian Cai <cai@lca.pw>
> ---
> 
> v2: Use an implicit alignment for "struct __kernel_sockaddr_storage".
> 
>  include/uapi/linux/socket.h | 15 +++++++++------
>  1 file changed, 9 insertions(+), 6 deletions(-)
> 

SCTP-wise, LGTM.

^ permalink raw reply

* Re: [PATCH net-next] be2net: disable bh with spin_lock in be_process_mcc
From: Willem de Bruijn @ 2019-07-30 14:45 UTC (permalink / raw)
  To: Denis Kirjanov
  Cc: sathya.perla, ajit.khaparde, sriharsha.basavapatna,
	Network Development, Denis Kirjanov
In-Reply-To: <20190730113226.39845-1-dkirjanov@suse.com>

On Tue, Jul 30, 2019 at 7:33 AM Denis Kirjanov <kda@linux-powerpc.org> wrote:
>
> Signed-off-by: Denis Kirjanov <kdav@linux-powerpc.org>

This is a partial revert of the previous change to these lines in 2012
in commit 072a9c486004 ("netpoll: revert 6bdb7fe3104 and fix be_poll()
instead").

The commit message is empty. Can you give some context as to why this
is needed and correct?

^ permalink raw reply

* [PATCH v2 2/3 net-next] net: Use skb_frag_off accessors
From: Jonathan Lemon @ 2019-07-30 14:40 UTC (permalink / raw)
  To: willy, davem, jakub.kicinski; +Cc: kernel-team, netdev
In-Reply-To: <20190730144034.444022-1-jonathan.lemon@gmail.com>

Use accessor functions for skb fragment's page_offset instead
of direct references, in preparation for bvec conversion.

Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>
---
 drivers/atm/eni.c                             |  2 +-
 drivers/hsi/clients/ssi_protocol.c            |  2 +-
 drivers/infiniband/hw/hfi1/vnic_sdma.c        |  2 +-
 drivers/infiniband/ulp/ipoib/ipoib_ib.c       |  3 +-
 drivers/net/ethernet/broadcom/bnxt/bnxt.c     |  2 +-
 .../ethernet/cavium/thunder/nicvf_queues.c    |  2 +-
 drivers/net/ethernet/chelsio/cxgb3/sge.c      |  2 +-
 drivers/net/ethernet/emulex/benet/be_main.c   | 12 ++---
 .../ethernet/freescale/fs_enet/fs_enet-main.c |  2 +-
 drivers/net/ethernet/ibm/ibmvnic.c            |  2 +-
 drivers/net/ethernet/intel/i40e/i40e_txrx.c   |  2 +-
 drivers/net/ethernet/intel/iavf/iavf_txrx.c   |  2 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |  4 +-
 drivers/net/ethernet/jme.c                    |  4 +-
 drivers/net/ethernet/marvell/mv643xx_eth.c    |  2 +-
 .../net/ethernet/myricom/myri10ge/myri10ge.c  |  6 +--
 drivers/net/ethernet/sfc/tx.c                 |  2 +-
 drivers/net/ethernet/sun/cassini.c            |  8 +--
 drivers/net/ethernet/sun/niu.c                |  2 +-
 drivers/net/ethernet/sun/sunvnet_common.c     |  4 +-
 drivers/net/ethernet/ti/netcp_core.c          |  2 +-
 drivers/net/hyperv/netvsc_drv.c               |  4 +-
 drivers/net/thunderbolt.c                     |  2 +-
 drivers/net/usb/usbnet.c                      |  2 +-
 drivers/net/vmxnet3/vmxnet3_drv.c             |  2 +-
 drivers/net/xen-netback/netback.c             |  6 +--
 drivers/net/xen-netfront.c                    |  8 +--
 drivers/scsi/bnx2fc/bnx2fc_fcoe.c             |  2 +-
 drivers/scsi/fcoe/fcoe.c                      |  3 +-
 drivers/scsi/fcoe/fcoe_transport.c            |  2 +-
 drivers/scsi/qedf/qedf_main.c                 |  2 +-
 .../staging/unisys/visornic/visornic_main.c   |  2 +-
 drivers/target/iscsi/cxgbit/cxgbit_target.c   |  4 +-
 net/appletalk/ddp.c                           |  4 +-
 net/core/datagram.c                           |  6 +--
 net/core/dev.c                                |  2 +-
 net/core/pktgen.c                             |  2 +-
 net/core/skbuff.c                             | 54 ++++++++++---------
 net/ipv4/tcp.c                                |  6 +--
 net/ipv4/tcp_output.c                         |  2 +-
 net/kcm/kcmsock.c                             |  2 +-
 net/tls/tls_device.c                          |  8 +--
 net/tls/tls_device_fallback.c                 |  2 +-
 net/xfrm/xfrm_ipcomp.c                        |  2 +-
 44 files changed, 100 insertions(+), 98 deletions(-)

diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c
index 79b718430cd1..b23d1e4bad33 100644
--- a/drivers/atm/eni.c
+++ b/drivers/atm/eni.c
@@ -1136,7 +1136,7 @@ DPRINTK("doing direct send\n"); /* @@@ well, this doesn't work anyway */
 			else
 				put_dma(tx->index,eni_dev->dma,&j,(unsigned long)
 				    skb_frag_page(&skb_shinfo(skb)->frags[i]) +
-					skb_shinfo(skb)->frags[i].page_offset,
+					skb_frag_off(&skb_shinfo(skb)->frags[i]),
 				    skb_frag_size(&skb_shinfo(skb)->frags[i]));
 	}
 	if (skb->len & 3) {
diff --git a/drivers/hsi/clients/ssi_protocol.c b/drivers/hsi/clients/ssi_protocol.c
index c9e3f928b93d..0253e76f1df2 100644
--- a/drivers/hsi/clients/ssi_protocol.c
+++ b/drivers/hsi/clients/ssi_protocol.c
@@ -182,7 +182,7 @@ static void ssip_skb_to_msg(struct sk_buff *skb, struct hsi_msg *msg)
 		BUG_ON(!sg);
 		frag = &skb_shinfo(skb)->frags[i];
 		sg_set_page(sg, skb_frag_page(frag), skb_frag_size(frag),
-				frag->page_offset);
+				skb_frag_off(frag));
 	}
 }
 
diff --git a/drivers/infiniband/hw/hfi1/vnic_sdma.c b/drivers/infiniband/hw/hfi1/vnic_sdma.c
index 05a140504a99..7d90b900131b 100644
--- a/drivers/infiniband/hw/hfi1/vnic_sdma.c
+++ b/drivers/infiniband/hw/hfi1/vnic_sdma.c
@@ -108,7 +108,7 @@ static noinline int build_vnic_ulp_payload(struct sdma_engine *sde,
 		ret = sdma_txadd_page(sde->dd,
 				      &tx->txreq,
 				      skb_frag_page(frag),
-				      frag->page_offset,
+				      skb_frag_off(frag),
 				      skb_frag_size(frag));
 		if (unlikely(ret))
 			goto bail_txadd;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 78fa777c87b1..c332b4761816 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -293,7 +293,8 @@ int ipoib_dma_map_tx(struct ib_device *ca, struct ipoib_tx_buf *tx_req)
 		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 		mapping[i + off] = ib_dma_map_page(ca,
 						 skb_frag_page(frag),
-						 frag->page_offset, skb_frag_size(frag),
+						 skb_frag_off(frag),
+						 skb_frag_size(frag),
 						 DMA_TO_DEVICE);
 		if (unlikely(ib_dma_mapping_error(ca, mapping[i + off])))
 			goto partial_error;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index ac61c9352535..c23fbb34f0e9 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -957,7 +957,7 @@ static struct sk_buff *bnxt_rx_page_skb(struct bnxt *bp,
 
 	frag = &skb_shinfo(skb)->frags[0];
 	skb_frag_size_sub(frag, payload);
-	frag->page_offset += payload;
+	skb_frag_off_add(frag, payload);
 	skb->data_len -= payload;
 	skb->tail += payload;
 
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
index c0266a87794c..4ab57d33a87e 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
@@ -1594,7 +1594,7 @@ int nicvf_sq_append_skb(struct nicvf *nic, struct snd_queue *sq,
 		size = skb_frag_size(frag);
 		dma_addr = dma_map_page_attrs(&nic->pdev->dev,
 					      skb_frag_page(frag),
-					      frag->page_offset, size,
+					      skb_frag_off(frag), size,
 					      DMA_TO_DEVICE,
 					      DMA_ATTR_SKIP_CPU_SYNC);
 		if (dma_mapping_error(&nic->pdev->dev, dma_addr)) {
diff --git a/drivers/net/ethernet/chelsio/cxgb3/sge.c b/drivers/net/ethernet/chelsio/cxgb3/sge.c
index 310a232e00f0..6dabbf1502c7 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/sge.c
@@ -2182,7 +2182,7 @@ static void lro_add_page(struct adapter *adap, struct sge_qset *qs,
 
 	rx_frag += nr_frags;
 	__skb_frag_set_page(rx_frag, sd->pg_chunk.page);
-	rx_frag->page_offset = sd->pg_chunk.offset + offset;
+	skb_frag_off_set(rx_frag, sd->pg_chunk.offset + offset);
 	skb_frag_size_set(rx_frag, len);
 
 	skb->len += len;
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index e00a94a03879..1c9883019767 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -2346,8 +2346,8 @@ static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
 		memcpy(skb->data, start, hdr_len);
 		skb_shinfo(skb)->nr_frags = 1;
 		skb_frag_set_page(skb, 0, page_info->page);
-		skb_shinfo(skb)->frags[0].page_offset =
-					page_info->page_offset + hdr_len;
+		skb_frag_off_set(&skb_shinfo(skb)->frags[0],
+				 page_info->page_offset + hdr_len);
 		skb_frag_size_set(&skb_shinfo(skb)->frags[0],
 				  curr_frag_len - hdr_len);
 		skb->data_len = curr_frag_len - hdr_len;
@@ -2372,8 +2372,8 @@ static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
 			/* Fresh page */
 			j++;
 			skb_frag_set_page(skb, j, page_info->page);
-			skb_shinfo(skb)->frags[j].page_offset =
-							page_info->page_offset;
+			skb_frag_off_set(&skb_shinfo(skb)->frags[j],
+					 page_info->page_offset);
 			skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
 			skb_shinfo(skb)->nr_frags++;
 		} else {
@@ -2454,8 +2454,8 @@ static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
 			/* First frag or Fresh page */
 			j++;
 			skb_frag_set_page(skb, j, page_info->page);
-			skb_shinfo(skb)->frags[j].page_offset =
-							page_info->page_offset;
+			skb_frag_off_set(&skb_shinfo(skb)->frags[j],
+					 page_info->page_offset);
 			skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
 		} else {
 			put_page(page_info->page);
diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
index 5fad73b2e123..3981c06f082f 100644
--- a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
+++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
@@ -501,7 +501,7 @@ fs_enet_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		nr_frags = skb_shinfo(skb)->nr_frags;
 		frag = skb_shinfo(skb)->frags;
 		for (i = 0; i < nr_frags; i++, frag++) {
-			if (!IS_ALIGNED(frag->page_offset, 4)) {
+			if (!IS_ALIGNED(skb_frag_off(frag), 4)) {
 				is_aligned = 0;
 				break;
 			}
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 3da680073265..81a05ea38237 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1485,7 +1485,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 
 			memcpy(dst + cur,
 			       page_address(skb_frag_page(frag)) +
-			       frag->page_offset, skb_frag_size(frag));
+			       skb_frag_off(frag), skb_frag_size(frag));
 			cur += skb_frag_size(frag);
 		}
 	} else {
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index f162252f01b5..e3f29dc8b290 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -3306,7 +3306,7 @@ bool __i40e_chk_linearize(struct sk_buff *skb)
 		 * descriptor associated with the fragment.
 		 */
 		if (stale_size > I40E_MAX_DATA_PER_TXD) {
-			int align_pad = -(stale->page_offset) &
+			int align_pad = -(skb_frag_off(stale)) &
 					(I40E_MAX_READ_REQ_SIZE - 1);
 
 			sum -= align_pad;
diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
index fae7cd1c618a..7a30d5d5ef53 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
@@ -2205,7 +2205,7 @@ bool __iavf_chk_linearize(struct sk_buff *skb)
 		 * descriptor associated with the fragment.
 		 */
 		if (stale_size > IAVF_MAX_DATA_PER_TXD) {
-			int align_pad = -(stale->page_offset) &
+			int align_pad = -(skb_frag_off(stale)) &
 					(IAVF_MAX_READ_REQ_SIZE - 1);
 
 			sum -= align_pad;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index e12d23d1fa64..dc7b128c780e 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -1807,7 +1807,7 @@ static void ixgbe_pull_tail(struct ixgbe_ring *rx_ring,
 
 	/* update all of the pointers */
 	skb_frag_size_sub(frag, pull_len);
-	frag->page_offset += pull_len;
+	skb_frag_off_add(frag, pull_len);
 	skb->data_len -= pull_len;
 	skb->tail += pull_len;
 }
@@ -1844,7 +1844,7 @@ static void ixgbe_dma_sync_frag(struct ixgbe_ring *rx_ring,
 
 		dma_sync_single_range_for_cpu(rx_ring->dev,
 					      IXGBE_CB(skb)->dma,
-					      frag->page_offset,
+					      skb_frag_off(frag),
 					      skb_frag_size(frag),
 					      DMA_FROM_DEVICE);
 	}
diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c
index 9c3ab00643bd..6d52cf5ce20e 100644
--- a/drivers/net/ethernet/jme.c
+++ b/drivers/net/ethernet/jme.c
@@ -2040,8 +2040,8 @@ jme_map_tx_skb(struct jme_adapter *jme, struct sk_buff *skb, int idx)
 		ctxbi = txbi + ((idx + i + 2) & (mask));
 
 		ret = jme_fill_tx_map(jme->pdev, ctxdesc, ctxbi,
-				skb_frag_page(frag),
-				frag->page_offset, skb_frag_size(frag), hidma);
+				      skb_frag_page(frag), skb_frag_off(frag),
+				      skb_frag_size(frag), hidma);
 		if (ret) {
 			jme_drop_tx_map(jme, idx, i);
 			goto out;
diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c
index 88ea5ac83c93..82ea55ae5053 100644
--- a/drivers/net/ethernet/marvell/mv643xx_eth.c
+++ b/drivers/net/ethernet/marvell/mv643xx_eth.c
@@ -659,7 +659,7 @@ static inline unsigned int has_tiny_unaligned_frags(struct sk_buff *skb)
 	for (frag = 0; frag < skb_shinfo(skb)->nr_frags; frag++) {
 		const skb_frag_t *fragp = &skb_shinfo(skb)->frags[frag];
 
-		if (skb_frag_size(fragp) <= 8 && fragp->page_offset & 7)
+		if (skb_frag_size(fragp) <= 8 && skb_frag_off(fragp) & 7)
 			return 1;
 	}
 
diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
index 9ead6ecb7586..99eaadba555f 100644
--- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
+++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
@@ -1306,8 +1306,8 @@ myri10ge_vlan_rx(struct net_device *dev, void *addr, struct sk_buff *skb)
 		skb->len -= VLAN_HLEN;
 		skb->data_len -= VLAN_HLEN;
 		frag = skb_shinfo(skb)->frags;
-		frag->page_offset += VLAN_HLEN;
-		skb_frag_size_set(frag, skb_frag_size(frag) - VLAN_HLEN);
+		skb_frag_off_add(frag, VLAN_HLEN);
+		skb_frag_size_sub(frag, VLAN_HLEN);
 	}
 }
 
@@ -1364,7 +1364,7 @@ myri10ge_rx_done(struct myri10ge_slice_state *ss, int len, __wsum csum)
 	}
 
 	/* remove padding */
-	rx_frags[0].page_offset += MXGEFW_PAD;
+	skb_frag_off_add(&rx_frags[0], MXGEFW_PAD);
 	skb_frag_size_sub(&rx_frags[0], MXGEFW_PAD);
 	len -= MXGEFW_PAD;
 
diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c
index 31ec56091a5d..65e81ec1b314 100644
--- a/drivers/net/ethernet/sfc/tx.c
+++ b/drivers/net/ethernet/sfc/tx.c
@@ -274,7 +274,7 @@ static void efx_skb_copy_bits_to_pio(struct efx_nic *efx, struct sk_buff *skb,
 
 		vaddr = kmap_atomic(skb_frag_page(f));
 
-		efx_memcpy_toio_aligned_cb(efx, piobuf, vaddr + f->page_offset,
+		efx_memcpy_toio_aligned_cb(efx, piobuf, vaddr + skb_frag_off(f),
 					   skb_frag_size(f), copy_buf);
 		kunmap_atomic(vaddr);
 	}
diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c
index 6fc05c106afc..c91876f8c536 100644
--- a/drivers/net/ethernet/sun/cassini.c
+++ b/drivers/net/ethernet/sun/cassini.c
@@ -2034,7 +2034,7 @@ static int cas_rx_process_pkt(struct cas *cp, struct cas_rx_comp *rxc,
 
 		__skb_frag_set_page(frag, page->buffer);
 		__skb_frag_ref(frag);
-		frag->page_offset = off;
+		skb_frag_off_set(frag, off);
 		skb_frag_size_set(frag, hlen - swivel);
 
 		/* any more data? */
@@ -2058,7 +2058,7 @@ static int cas_rx_process_pkt(struct cas *cp, struct cas_rx_comp *rxc,
 
 			__skb_frag_set_page(frag, page->buffer);
 			__skb_frag_ref(frag);
-			frag->page_offset = 0;
+			skb_frag_off_set(frag, 0);
 			skb_frag_size_set(frag, hlen);
 			RX_USED_ADD(page, hlen + cp->crc_size);
 		}
@@ -2816,7 +2816,7 @@ static inline int cas_xmit_tx_ringN(struct cas *cp, int ring,
 		mapping = skb_frag_dma_map(&cp->pdev->dev, fragp, 0, len,
 					   DMA_TO_DEVICE);
 
-		tabort = cas_calc_tabort(cp, fragp->page_offset, len);
+		tabort = cas_calc_tabort(cp, skb_frag_off(fragp), len);
 		if (unlikely(tabort)) {
 			void *addr;
 
@@ -2827,7 +2827,7 @@ static inline int cas_xmit_tx_ringN(struct cas *cp, int ring,
 
 			addr = cas_page_map(skb_frag_page(fragp));
 			memcpy(tx_tiny_buf(cp, ring, entry),
-			       addr + fragp->page_offset + len - tabort,
+			       addr + skb_frag_off(fragp) + len - tabort,
 			       tabort);
 			cas_page_unmap(addr);
 			mapping = tx_tiny_map(cp, ring, entry, tentry);
diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c
index 0bc5863bffeb..f5fd1f3c07cc 100644
--- a/drivers/net/ethernet/sun/niu.c
+++ b/drivers/net/ethernet/sun/niu.c
@@ -6695,7 +6695,7 @@ static netdev_tx_t niu_start_xmit(struct sk_buff *skb,
 
 		len = skb_frag_size(frag);
 		mapping = np->ops->map_page(np->device, skb_frag_page(frag),
-					    frag->page_offset, len,
+					    skb_frag_off(frag), len,
 					    DMA_TO_DEVICE);
 
 		rp->tx_buffs[prod].skb = NULL;
diff --git a/drivers/net/ethernet/sun/sunvnet_common.c b/drivers/net/ethernet/sun/sunvnet_common.c
index baa3088b475c..646e67236b65 100644
--- a/drivers/net/ethernet/sun/sunvnet_common.c
+++ b/drivers/net/ethernet/sun/sunvnet_common.c
@@ -1088,7 +1088,7 @@ static inline int vnet_skb_map(struct ldc_channel *lp, struct sk_buff *skb,
 			vaddr = kmap_atomic(skb_frag_page(f));
 			blen = skb_frag_size(f);
 			blen += 8 - (blen & 7);
-			err = ldc_map_single(lp, vaddr + f->page_offset,
+			err = ldc_map_single(lp, vaddr + skb_frag_off(f),
 					     blen, cookies + nc, ncookies - nc,
 					     map_perm);
 			kunmap_atomic(vaddr);
@@ -1124,7 +1124,7 @@ static inline struct sk_buff *vnet_skb_shape(struct sk_buff *skb, int ncookies)
 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
 		skb_frag_t *f = &skb_shinfo(skb)->frags[i];
 
-		docopy |= f->page_offset & 7;
+		docopy |= skb_frag_off(f) & 7;
 	}
 	if (((unsigned long)skb->data & 7) != VNET_PACKET_SKIP ||
 	    skb_tailroom(skb) < pad ||
diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c
index 642843945031..1b2702f74455 100644
--- a/drivers/net/ethernet/ti/netcp_core.c
+++ b/drivers/net/ethernet/ti/netcp_core.c
@@ -1116,7 +1116,7 @@ netcp_tx_map_skb(struct sk_buff *skb, struct netcp_intf *netcp)
 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
 		skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 		struct page *page = skb_frag_page(frag);
-		u32 page_offset = frag->page_offset;
+		u32 page_offset = skb_frag_off(frag);
 		u32 buf_len = skb_frag_size(frag);
 		dma_addr_t desc_dma;
 		u32 desc_dma_32;
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 3544e1991579..86884c863013 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -435,7 +435,7 @@ static u32 init_page_array(void *hdr, u32 len, struct sk_buff *skb,
 		skb_frag_t *frag = skb_shinfo(skb)->frags + i;
 
 		slots_used += fill_pg_buf(skb_frag_page(frag),
-					frag->page_offset,
+					skb_frag_off(frag),
 					skb_frag_size(frag), &pb[slots_used]);
 	}
 	return slots_used;
@@ -449,7 +449,7 @@ static int count_skb_frag_slots(struct sk_buff *skb)
 	for (i = 0; i < frags; i++) {
 		skb_frag_t *frag = skb_shinfo(skb)->frags + i;
 		unsigned long size = skb_frag_size(frag);
-		unsigned long offset = frag->page_offset;
+		unsigned long offset = skb_frag_off(frag);
 
 		/* Skip unused frames from start of page */
 		offset &= ~PAGE_MASK;
diff --git a/drivers/net/thunderbolt.c b/drivers/net/thunderbolt.c
index fcf31335a8b6..dacb4f680fd4 100644
--- a/drivers/net/thunderbolt.c
+++ b/drivers/net/thunderbolt.c
@@ -1005,7 +1005,7 @@ static void *tbnet_kmap_frag(struct sk_buff *skb, unsigned int frag_num,
 	const skb_frag_t *frag = &skb_shinfo(skb)->frags[frag_num];
 
 	*len = skb_frag_size(frag);
-	return kmap_atomic(skb_frag_page(frag)) + frag->page_offset;
+	return kmap_atomic(skb_frag_page(frag)) + skb_frag_off(frag);
 }
 
 static netdev_tx_t tbnet_start_xmit(struct sk_buff *skb,
diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index ace7ffaf3913..58952a79b05f 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -1328,7 +1328,7 @@ static int build_dma_sg(const struct sk_buff *skb, struct urb *urb)
 
 		total_len += skb_frag_size(f);
 		sg_set_page(&urb->sg[i + s], skb_frag_page(f), skb_frag_size(f),
-				f->page_offset);
+			    skb_frag_off(f));
 	}
 	urb->transfer_buffer_length = total_len;
 
diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index 03feaeae89cd..216acf37ca7c 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -662,7 +662,7 @@ vmxnet3_append_frag(struct sk_buff *skb, struct Vmxnet3_RxCompDesc *rcd,
 	BUG_ON(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS);
 
 	__skb_frag_set_page(frag, rbi->page);
-	frag->page_offset = 0;
+	skb_frag_off_set(frag, 0);
 	skb_frag_size_set(frag, rcd->len);
 	skb->data_len += rcd->len;
 	skb->truesize += PAGE_SIZE;
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index a96c5c2a2c5a..3ef07b63613e 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -136,12 +136,12 @@ static inline struct xenvif_queue *ubuf_to_queue(const struct ubuf_info *ubuf)
 
 static u16 frag_get_pending_idx(skb_frag_t *frag)
 {
-	return (u16)frag->page_offset;
+	return (u16)skb_frag_off(frag);
 }
 
 static void frag_set_pending_idx(skb_frag_t *frag, u16 pending_idx)
 {
-	frag->page_offset = pending_idx;
+	skb_frag_off_set(frag, pending_idx);
 }
 
 static inline pending_ring_idx_t pending_index(unsigned i)
@@ -1068,7 +1068,7 @@ static int xenvif_handle_frag_list(struct xenvif_queue *queue, struct sk_buff *s
 
 		offset += len;
 		__skb_frag_set_page(&frags[i], page);
-		frags[i].page_offset = 0;
+		skb_frag_off_set(&frags[i], 0);
 		skb_frag_size_set(&frags[i], len);
 	}
 
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 8d33970a2950..b930d5f95222 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -531,7 +531,7 @@ static int xennet_count_skb_slots(struct sk_buff *skb)
 	for (i = 0; i < frags; i++) {
 		skb_frag_t *frag = skb_shinfo(skb)->frags + i;
 		unsigned long size = skb_frag_size(frag);
-		unsigned long offset = frag->page_offset;
+		unsigned long offset = skb_frag_off(frag);
 
 		/* Skip unused frames from start of page */
 		offset &= ~PAGE_MASK;
@@ -674,8 +674,8 @@ static netdev_tx_t xennet_start_xmit(struct sk_buff *skb, struct net_device *dev
 	/* Requests for all the frags. */
 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
 		skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
-		tx = xennet_make_txreqs(queue, tx, skb,
-					skb_frag_page(frag), frag->page_offset,
+		tx = xennet_make_txreqs(queue, tx, skb, skb_frag_page(frag),
+					skb_frag_off(frag),
 					skb_frag_size(frag));
 	}
 
@@ -1040,7 +1040,7 @@ static int xennet_poll(struct napi_struct *napi, int budget)
 		if (NETFRONT_SKB_CB(skb)->pull_to > RX_COPY_THRESHOLD)
 			NETFRONT_SKB_CB(skb)->pull_to = RX_COPY_THRESHOLD;
 
-		skb_shinfo(skb)->frags[0].page_offset = rx->offset;
+		skb_frag_off_set(&skb_shinfo(skb)->frags[0], rx->offset);
 		skb_frag_size_set(&skb_shinfo(skb)->frags[0], rx->status);
 		skb->data_len = rx->status;
 		skb->len += rx->status;
diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
index 7796799bf04a..9ff9429395eb 100644
--- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
+++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
@@ -346,7 +346,7 @@ static int bnx2fc_xmit(struct fc_lport *lport, struct fc_frame *fp)
 			return -ENOMEM;
 		}
 		frag = &skb_shinfo(skb)->frags[skb_shinfo(skb)->nr_frags - 1];
-		cp = kmap_atomic(skb_frag_page(frag)) + frag->page_offset;
+		cp = kmap_atomic(skb_frag_page(frag)) + skb_frag_off(frag);
 	} else {
 		cp = skb_put(skb, tlen);
 	}
diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c
index 00dd47bcbb1e..587d4bbb7d22 100644
--- a/drivers/scsi/fcoe/fcoe.c
+++ b/drivers/scsi/fcoe/fcoe.c
@@ -1522,8 +1522,7 @@ static int fcoe_xmit(struct fc_lport *lport, struct fc_frame *fp)
 			return -ENOMEM;
 		}
 		frag = &skb_shinfo(skb)->frags[skb_shinfo(skb)->nr_frags - 1];
-		cp = kmap_atomic(skb_frag_page(frag))
-			+ frag->page_offset;
+		cp = kmap_atomic(skb_frag_page(frag)) + skb_frag_off(frag);
 	} else {
 		cp = skb_put(skb, tlen);
 	}
diff --git a/drivers/scsi/fcoe/fcoe_transport.c b/drivers/scsi/fcoe/fcoe_transport.c
index d0550384cc38..a20ddc301c89 100644
--- a/drivers/scsi/fcoe/fcoe_transport.c
+++ b/drivers/scsi/fcoe/fcoe_transport.c
@@ -318,7 +318,7 @@ u32 fcoe_fc_crc(struct fc_frame *fp)
 
 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
 		frag = &skb_shinfo(skb)->frags[i];
-		off = frag->page_offset;
+		off = skb_frag_off(frag);
 		len = skb_frag_size(frag);
 		while (len > 0) {
 			clen = min(len, PAGE_SIZE - (off & ~PAGE_MASK));
diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c
index a42babde036d..42542720962f 100644
--- a/drivers/scsi/qedf/qedf_main.c
+++ b/drivers/scsi/qedf/qedf_main.c
@@ -1077,7 +1077,7 @@ static int qedf_xmit(struct fc_lport *lport, struct fc_frame *fp)
 			return -ENOMEM;
 		}
 		frag = &skb_shinfo(skb)->frags[skb_shinfo(skb)->nr_frags - 1];
-		cp = kmap_atomic(skb_frag_page(frag)) + frag->page_offset;
+		cp = kmap_atomic(skb_frag_page(frag)) + skb_frag_off(frag);
 	} else {
 		cp = skb_put(skb, tlen);
 	}
diff --git a/drivers/staging/unisys/visornic/visornic_main.c b/drivers/staging/unisys/visornic/visornic_main.c
index b889b04a6e25..6fa7726185de 100644
--- a/drivers/staging/unisys/visornic/visornic_main.c
+++ b/drivers/staging/unisys/visornic/visornic_main.c
@@ -284,7 +284,7 @@ static int visor_copy_fragsinfo_from_skb(struct sk_buff *skb,
 		for (frag = 0; frag < numfrags; frag++) {
 			count = add_physinfo_entries(page_to_pfn(
 				  skb_frag_page(&skb_shinfo(skb)->frags[frag])),
-				  skb_shinfo(skb)->frags[frag].page_offset,
+				  skb_frag_off(&skb_shinfo(skb)->frags[frag]),
 				  skb_frag_size(&skb_shinfo(skb)->frags[frag]),
 				  count, frags_max, frags);
 			/* add_physinfo_entries only returns
diff --git a/drivers/target/iscsi/cxgbit/cxgbit_target.c b/drivers/target/iscsi/cxgbit/cxgbit_target.c
index c25315431ad0..fcdc4211e3c2 100644
--- a/drivers/target/iscsi/cxgbit/cxgbit_target.c
+++ b/drivers/target/iscsi/cxgbit/cxgbit_target.c
@@ -900,7 +900,7 @@ cxgbit_handle_immediate_data(struct iscsi_cmd *cmd, struct iscsi_scsi_req *hdr,
 
 		sg_init_table(&ccmd->sg, 1);
 		sg_set_page(&ccmd->sg, skb_frag_page(dfrag),
-				skb_frag_size(dfrag), dfrag->page_offset);
+				skb_frag_size(dfrag), skb_frag_off(dfrag));
 		get_page(skb_frag_page(dfrag));
 
 		cmd->se_cmd.t_data_sg = &ccmd->sg;
@@ -1403,7 +1403,7 @@ static void cxgbit_lro_skb_dump(struct sk_buff *skb)
 			pdu_cb->ddigest, pdu_cb->frags);
 	for (i = 0; i < ssi->nr_frags; i++)
 		pr_info("skb 0x%p, frag %d, off %u, sz %u.\n",
-			skb, i, ssi->frags[i].page_offset,
+			skb, i, skb_frag_off(&ssi->frags[i]),
 			skb_frag_size(&ssi->frags[i]));
 }
 
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index a8cb6b2e20c1..4072e9d394d6 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -953,8 +953,8 @@ static unsigned long atalk_sum_skb(const struct sk_buff *skb, int offset,
 			if (copy > len)
 				copy = len;
 			vaddr = kmap_atomic(skb_frag_page(frag));
-			sum = atalk_sum_partial(vaddr + frag->page_offset +
-						  offset - start, copy, sum);
+			sum = atalk_sum_partial(vaddr + skb_frag_off(frag) +
+						offset - start, copy, sum);
 			kunmap_atomic(vaddr);
 
 			if (!(len -= copy))
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 45a162ef5e02..4cc8dc5db2b7 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -442,8 +442,8 @@ static int __skb_datagram_iter(const struct sk_buff *skb, int offset,
 
 			if (copy > len)
 				copy = len;
-			n = cb(vaddr + frag->page_offset +
-				offset - start, copy, data, to);
+			n = cb(vaddr + skb_frag_off(frag) + offset - start,
+			       copy, data, to);
 			kunmap(page);
 			offset += n;
 			if (n != copy)
@@ -573,7 +573,7 @@ int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset,
 			if (copy > len)
 				copy = len;
 			copied = copy_page_from_iter(skb_frag_page(frag),
-					  frag->page_offset + offset - start,
+					  skb_frag_off(frag) + offset - start,
 					  copy, from);
 			if (copied != copy)
 				goto fault;
diff --git a/net/core/dev.c b/net/core/dev.c
index fc676b2610e3..e2a11c62197b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5481,7 +5481,7 @@ static void gro_pull_from_frag0(struct sk_buff *skb, int grow)
 	skb->data_len -= grow;
 	skb->tail += grow;
 
-	pinfo->frags[0].page_offset += grow;
+	skb_frag_off_add(&pinfo->frags[0], grow);
 	skb_frag_size_sub(&pinfo->frags[0], grow);
 
 	if (unlikely(!skb_frag_size(&pinfo->frags[0]))) {
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index bb9915291644..c5dbdc87342a 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2652,7 +2652,7 @@ static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb,
 			}
 			get_page(pkt_dev->page);
 			skb_frag_set_page(skb, i, pkt_dev->page);
-			skb_shinfo(skb)->frags[i].page_offset = 0;
+			skb_frag_off_set(&skb_shinfo(skb)->frags[i], 0);
 			/*last fragment, fill rest of data*/
 			if (i == (frags - 1))
 				skb_frag_size_set(&skb_shinfo(skb)->frags[i],
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 0b788df5a75b..ea8e8d332d85 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -785,7 +785,7 @@ void skb_dump(const char *level, const struct sk_buff *skb, bool full_pkt)
 		struct page *p;
 		u8 *vaddr;
 
-		skb_frag_foreach_page(frag, frag->page_offset,
+		skb_frag_foreach_page(frag, skb_frag_off(frag),
 				      skb_frag_size(frag), p, p_off, p_len,
 				      copied) {
 			seg_len = min_t(int, p_len, len);
@@ -1375,7 +1375,7 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
 		struct page *p;
 		u8 *vaddr;
 
-		skb_frag_foreach_page(f, f->page_offset, skb_frag_size(f),
+		skb_frag_foreach_page(f, skb_frag_off(f), skb_frag_size(f),
 				      p, p_off, p_len, copied) {
 			u32 copy, done = 0;
 			vaddr = kmap_atomic(p);
@@ -2144,10 +2144,12 @@ void *__pskb_pull_tail(struct sk_buff *skb, int delta)
 			skb_frag_unref(skb, i);
 			eat -= size;
 		} else {
-			skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
+			skb_frag_t *frag = &skb_shinfo(skb)->frags[k];
+
+			*frag = skb_shinfo(skb)->frags[i];
 			if (eat) {
-				skb_shinfo(skb)->frags[k].page_offset += eat;
-				skb_frag_size_sub(&skb_shinfo(skb)->frags[k], eat);
+				skb_frag_off_add(frag, eat);
+				skb_frag_size_sub(frag, eat);
 				if (!i)
 					goto end;
 				eat = 0;
@@ -2219,7 +2221,7 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
 				copy = len;
 
 			skb_frag_foreach_page(f,
-					      f->page_offset + offset - start,
+					      skb_frag_off(f) + offset - start,
 					      copy, p, p_off, p_len, copied) {
 				vaddr = kmap_atomic(p);
 				memcpy(to + copied, vaddr + p_off, p_len);
@@ -2395,7 +2397,7 @@ static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
 		const skb_frag_t *f = &skb_shinfo(skb)->frags[seg];
 
 		if (__splice_segment(skb_frag_page(f),
-				     f->page_offset, skb_frag_size(f),
+				     skb_frag_off(f), skb_frag_size(f),
 				     offset, len, spd, false, sk, pipe))
 			return true;
 	}
@@ -2498,7 +2500,7 @@ int skb_send_sock_locked(struct sock *sk, struct sk_buff *skb, int offset,
 
 		while (slen) {
 			ret = kernel_sendpage_locked(sk, skb_frag_page(frag),
-						     frag->page_offset + offset,
+						     skb_frag_off(frag) + offset,
 						     slen, MSG_DONTWAIT);
 			if (ret <= 0)
 				goto error;
@@ -2580,7 +2582,7 @@ int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len)
 				copy = len;
 
 			skb_frag_foreach_page(frag,
-					      frag->page_offset + offset - start,
+					      skb_frag_off(frag) + offset - start,
 					      copy, p, p_off, p_len, copied) {
 				vaddr = kmap_atomic(p);
 				memcpy(vaddr + p_off, from + copied, p_len);
@@ -2660,7 +2662,7 @@ __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len,
 				copy = len;
 
 			skb_frag_foreach_page(frag,
-					      frag->page_offset + offset - start,
+					      skb_frag_off(frag) + offset - start,
 					      copy, p, p_off, p_len, copied) {
 				vaddr = kmap_atomic(p);
 				csum2 = INDIRECT_CALL_1(ops->update,
@@ -2759,7 +2761,7 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
 				copy = len;
 
 			skb_frag_foreach_page(frag,
-					      frag->page_offset + offset - start,
+					      skb_frag_off(frag) + offset - start,
 					      copy, p, p_off, p_len, copied) {
 				vaddr = kmap_atomic(p);
 				csum2 = csum_partial_copy_nocheck(vaddr + p_off,
@@ -3234,7 +3236,7 @@ static inline void skb_split_no_header(struct sk_buff *skb,
 				 * 2. Split is accurately. We make this.
 				 */
 				skb_frag_ref(skb, i);
-				skb_shinfo(skb1)->frags[0].page_offset += len - pos;
+				skb_frag_off_add(&skb_shinfo(skb1)->frags[0], len - pos);
 				skb_frag_size_sub(&skb_shinfo(skb1)->frags[0], len - pos);
 				skb_frag_size_set(&skb_shinfo(skb)->frags[i], len - pos);
 				skb_shinfo(skb)->nr_frags++;
@@ -3316,7 +3318,7 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
 	 */
 	if (!to ||
 	    !skb_can_coalesce(tgt, to, skb_frag_page(fragfrom),
-			      fragfrom->page_offset)) {
+			      skb_frag_off(fragfrom))) {
 		merge = -1;
 	} else {
 		merge = to - 1;
@@ -3333,7 +3335,7 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
 
 			skb_frag_size_add(fragto, shiftlen);
 			skb_frag_size_sub(fragfrom, shiftlen);
-			fragfrom->page_offset += shiftlen;
+			skb_frag_off_add(fragfrom, shiftlen);
 
 			goto onlymerged;
 		}
@@ -3364,11 +3366,11 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
 
 		} else {
 			__skb_frag_ref(fragfrom);
-			fragto->bv_page = fragfrom->bv_page;
-			fragto->page_offset = fragfrom->page_offset;
+			skb_frag_page_copy(fragto, fragfrom);
+			skb_frag_off_copy(fragto, fragfrom);
 			skb_frag_size_set(fragto, todo);
 
-			fragfrom->page_offset += todo;
+			skb_frag_off_add(fragfrom, todo);
 			skb_frag_size_sub(fragfrom, todo);
 			todo = 0;
 
@@ -3493,7 +3495,7 @@ unsigned int skb_seq_read(unsigned int consumed, const u8 **data,
 			if (!st->frag_data)
 				st->frag_data = kmap_atomic(skb_frag_page(frag));
 
-			*data = (u8 *) st->frag_data + frag->page_offset +
+			*data = (u8 *) st->frag_data + skb_frag_off(frag) +
 				(abs_offset - st->stepped_offset);
 
 			return block_limit - abs_offset;
@@ -3630,8 +3632,8 @@ static inline skb_frag_t skb_head_frag_to_page_desc(struct sk_buff *frag_skb)
 
 	page = virt_to_head_page(frag_skb->head);
 	__skb_frag_set_page(&head_frag, page);
-	head_frag.page_offset = frag_skb->data -
-		(unsigned char *)page_address(page);
+	skb_frag_off_set(&head_frag, frag_skb->data -
+			 (unsigned char *)page_address(page));
 	skb_frag_size_set(&head_frag, skb_headlen(frag_skb));
 	return head_frag;
 }
@@ -3875,7 +3877,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
 			size = skb_frag_size(nskb_frag);
 
 			if (pos < offset) {
-				nskb_frag->page_offset += offset - pos;
+				skb_frag_off_add(nskb_frag, offset - pos);
 				skb_frag_size_sub(nskb_frag, offset - pos);
 			}
 
@@ -3996,7 +3998,7 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
 			*--frag = *--frag2;
 		} while (--i);
 
-		frag->page_offset += offset;
+		skb_frag_off_add(frag, offset);
 		skb_frag_size_sub(frag, offset);
 
 		/* all fragments truesize : remove (head size + sk_buff) */
@@ -4026,7 +4028,7 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
 		pinfo->nr_frags = nr_frags + 1 + skbinfo->nr_frags;
 
 		__skb_frag_set_page(frag, page);
-		frag->page_offset = first_offset;
+		skb_frag_off_set(frag, first_offset);
 		skb_frag_size_set(frag, first_size);
 
 		memcpy(frag + 1, skbinfo->frags, sizeof(*frag) * skbinfo->nr_frags);
@@ -4042,7 +4044,7 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
 	if (offset > headlen) {
 		unsigned int eat = offset - headlen;
 
-		skbinfo->frags[0].page_offset += eat;
+		skb_frag_off_add(&skbinfo->frags[0], eat);
 		skb_frag_size_sub(&skbinfo->frags[0], eat);
 		skb->data_len -= eat;
 		skb->len -= eat;
@@ -4167,7 +4169,7 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len,
 			if (copy > len)
 				copy = len;
 			sg_set_page(&sg[elt], skb_frag_page(frag), copy,
-					frag->page_offset+offset-start);
+				    skb_frag_off(frag) + offset - start);
 			elt++;
 			if (!(len -= copy))
 				return elt;
@@ -5838,7 +5840,7 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off,
 				 *    where splitting is expensive.
 				 * 2. Split is accurately. We make this.
 				 */
-				shinfo->frags[0].page_offset += off - pos;
+				skb_frag_off_add(&shinfo->frags[0], off - pos);
 				skb_frag_size_sub(&shinfo->frags[0], off - pos);
 			}
 			skb_frag_ref(skb, i);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f62f0e7e3cdd..a0a66321c0ee 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1782,12 +1782,12 @@ static int tcp_zerocopy_receive(struct sock *sk,
 				frags++;
 			}
 		}
-		if (skb_frag_size(frags) != PAGE_SIZE || frags->page_offset) {
+		if (skb_frag_size(frags) != PAGE_SIZE || skb_frag_off(frags)) {
 			int remaining = zc->recv_skip_hint;
 			int size = skb_frag_size(frags);
 
 			while (remaining && (size != PAGE_SIZE ||
-					     frags->page_offset)) {
+					     skb_frag_off(frags))) {
 				remaining -= size;
 				frags++;
 				size = skb_frag_size(frags);
@@ -3784,7 +3784,7 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
 
 	for (i = 0; i < shi->nr_frags; ++i) {
 		const skb_frag_t *f = &shi->frags[i];
-		unsigned int offset = f->page_offset;
+		unsigned int offset = skb_frag_off(f);
 		struct page *page = skb_frag_page(f) + (offset >> PAGE_SHIFT);
 
 		sg_set_page(&sg, page, skb_frag_size(f),
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 6e4afc48d7bb..e6d02e05bb1c 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1402,7 +1402,7 @@ static int __pskb_trim_head(struct sk_buff *skb, int len)
 		} else {
 			shinfo->frags[k] = shinfo->frags[i];
 			if (eat) {
-				shinfo->frags[k].page_offset += eat;
+				skb_frag_off_add(&shinfo->frags[k], eat);
 				skb_frag_size_sub(&shinfo->frags[k], eat);
 				eat = 0;
 			}
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index 05f63c4300e9..4ff75c3a8d6e 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -642,7 +642,7 @@ static int kcm_write_msgs(struct kcm_sock *kcm)
 
 			ret = kernel_sendpage(psock->sk->sk_socket,
 					      skb_frag_page(frag),
-					      frag->page_offset + frag_offset,
+					      skb_frag_off(frag) + frag_offset,
 					      skb_frag_size(frag) - frag_offset,
 					      MSG_DONTWAIT);
 			if (ret <= 0) {
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 4ec8a06fa5d1..d184230665eb 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -244,12 +244,12 @@ static void tls_append_frag(struct tls_record_info *record,
 
 	frag = &record->frags[record->num_frags - 1];
 	if (skb_frag_page(frag) == pfrag->page &&
-	    frag->page_offset + skb_frag_size(frag) == pfrag->offset) {
+	    skb_frag_off(frag) + skb_frag_size(frag) == pfrag->offset) {
 		skb_frag_size_add(frag, size);
 	} else {
 		++frag;
 		__skb_frag_set_page(frag, pfrag->page);
-		frag->page_offset = pfrag->offset;
+		skb_frag_off_set(frag, pfrag->offset);
 		skb_frag_size_set(frag, size);
 		++record->num_frags;
 		get_page(pfrag->page);
@@ -301,7 +301,7 @@ static int tls_push_record(struct sock *sk,
 		frag = &record->frags[i];
 		sg_unmark_end(&offload_ctx->sg_tx_data[i]);
 		sg_set_page(&offload_ctx->sg_tx_data[i], skb_frag_page(frag),
-			    skb_frag_size(frag), frag->page_offset);
+			    skb_frag_size(frag), skb_frag_off(frag));
 		sk_mem_charge(sk, skb_frag_size(frag));
 		get_page(skb_frag_page(frag));
 	}
@@ -324,7 +324,7 @@ static int tls_create_new_record(struct tls_offload_context_tx *offload_ctx,
 
 	frag = &record->frags[0];
 	__skb_frag_set_page(frag, pfrag->page);
-	frag->page_offset = pfrag->offset;
+	skb_frag_off_set(frag, pfrag->offset);
 	skb_frag_size_set(frag, prepend_size);
 
 	get_page(pfrag->page);
diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c
index 9070d68a92a4..28895333701e 100644
--- a/net/tls/tls_device_fallback.c
+++ b/net/tls/tls_device_fallback.c
@@ -273,7 +273,7 @@ static int fill_sg_in(struct scatterlist *sg_in,
 
 		__skb_frag_ref(frag);
 		sg_set_page(sg_in + i, skb_frag_page(frag),
-			    skb_frag_size(frag), frag->page_offset);
+			    skb_frag_size(frag), skb_frag_off(frag));
 
 		remaining -= skb_frag_size(frag);
 
diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c
index 32c364d3bfb3..4d422447aadc 100644
--- a/net/xfrm/xfrm_ipcomp.c
+++ b/net/xfrm/xfrm_ipcomp.c
@@ -85,7 +85,7 @@ static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb)
 		if (dlen < len)
 			len = dlen;
 
-		frag->page_offset = 0;
+		skb_frag_off_set(frag, 0);
 		skb_frag_size_set(frag, len);
 		memcpy(skb_frag_address(frag), scratch, len);
 
-- 
2.17.1


^ permalink raw reply related

* [PATCH v2 1/3 net-next] linux: Add skb_frag_t page_offset accessors
From: Jonathan Lemon @ 2019-07-30 14:40 UTC (permalink / raw)
  To: willy, davem, jakub.kicinski; +Cc: kernel-team, netdev
In-Reply-To: <20190730144034.444022-1-jonathan.lemon@gmail.com>

Add skb_frag_off(), skb_frag_off_add(), skb_frag_off_set(),
and skb_frag_off_copy() accessors for page_offset.

Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>
---
 include/linux/skbuff.h | 67 +++++++++++++++++++++++++++++++++++++-----
 1 file changed, 59 insertions(+), 8 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 718742b1c505..2957cdd6c032 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -312,7 +312,7 @@ extern int sysctl_max_skb_frags;
 typedef struct bio_vec skb_frag_t;
 
 /**
- * skb_frag_size - Returns the size of a skb fragment
+ * skb_frag_size() - Returns the size of a skb fragment
  * @frag: skb fragment
  */
 static inline unsigned int skb_frag_size(const skb_frag_t *frag)
@@ -321,7 +321,7 @@ static inline unsigned int skb_frag_size(const skb_frag_t *frag)
 }
 
 /**
- * skb_frag_size_set - Sets the size of a skb fragment
+ * skb_frag_size_set() - Sets the size of a skb fragment
  * @frag: skb fragment
  * @size: size of fragment
  */
@@ -331,7 +331,7 @@ static inline void skb_frag_size_set(skb_frag_t *frag, unsigned int size)
 }
 
 /**
- * skb_frag_size_add - Incrementes the size of a skb fragment by %delta
+ * skb_frag_size_add() - Increments the size of a skb fragment by @delta
  * @frag: skb fragment
  * @delta: value to add
  */
@@ -341,7 +341,7 @@ static inline void skb_frag_size_add(skb_frag_t *frag, int delta)
 }
 
 /**
- * skb_frag_size_sub - Decrements the size of a skb fragment by %delta
+ * skb_frag_size_sub() - Decrements the size of a skb fragment by @delta
  * @frag: skb fragment
  * @delta: value to subtract
  */
@@ -2857,6 +2857,46 @@ static inline void skb_propagate_pfmemalloc(struct page *page,
 		skb->pfmemalloc = true;
 }
 
+/**
+ * skb_frag_off() - Returns the offset of a skb fragment
+ * @frag: the paged fragment
+ */
+static inline unsigned int skb_frag_off(const skb_frag_t *frag)
+{
+	return frag->page_offset;
+}
+
+/**
+ * skb_frag_off_add() - Increments the offset of a skb fragment by @delta
+ * @frag: skb fragment
+ * @delta: value to add
+ */
+static inline void skb_frag_off_add(skb_frag_t *frag, int delta)
+{
+	frag->page_offset += delta;
+}
+
+/**
+ * skb_frag_off_set() - Sets the offset of a skb fragment
+ * @frag: skb fragment
+ * @offset: offset of fragment
+ */
+static inline void skb_frag_off_set(skb_frag_t *frag, unsigned int offset)
+{
+	frag->page_offset = offset;
+}
+
+/**
+ * skb_frag_off_copy() - Sets the offset of a skb fragment from another fragment
+ * @fragto: skb fragment where offset is set
+ * @fragfrom: skb fragment offset is copied from
+ */
+static inline void skb_frag_off_copy(skb_frag_t *fragto,
+				     const skb_frag_t *fragfrom)
+{
+	fragto->page_offset = fragfrom->page_offset;
+}
+
 /**
  * skb_frag_page - retrieve the page referred to by a paged fragment
  * @frag: the paged fragment
@@ -2923,7 +2963,7 @@ static inline void skb_frag_unref(struct sk_buff *skb, int f)
  */
 static inline void *skb_frag_address(const skb_frag_t *frag)
 {
-	return page_address(skb_frag_page(frag)) + frag->page_offset;
+	return page_address(skb_frag_page(frag)) + skb_frag_off(frag);
 }
 
 /**
@@ -2939,7 +2979,18 @@ static inline void *skb_frag_address_safe(const skb_frag_t *frag)
 	if (unlikely(!ptr))
 		return NULL;
 
-	return ptr + frag->page_offset;
+	return ptr + skb_frag_off(frag);
+}
+
+/**
+ * skb_frag_page_copy() - sets the page in a fragment from another fragment
+ * @fragto: skb fragment where page is set
+ * @fragfrom: skb fragment page is copied from
+ */
+static inline void skb_frag_page_copy(skb_frag_t *fragto,
+				      const skb_frag_t *fragfrom)
+{
+	fragto->bv_page = fragfrom->bv_page;
 }
 
 /**
@@ -2987,7 +3038,7 @@ static inline dma_addr_t skb_frag_dma_map(struct device *dev,
 					  enum dma_data_direction dir)
 {
 	return dma_map_page(dev, skb_frag_page(frag),
-			    frag->page_offset + offset, size, dir);
+			    skb_frag_off(frag) + offset, size, dir);
 }
 
 static inline struct sk_buff *pskb_copy(struct sk_buff *skb,
@@ -3157,7 +3208,7 @@ static inline bool skb_can_coalesce(struct sk_buff *skb, int i,
 		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1];
 
 		return page == skb_frag_page(frag) &&
-		       off == frag->page_offset + skb_frag_size(frag);
+		       off == skb_frag_off(frag) + skb_frag_size(frag);
 	}
 	return false;
 }
-- 
2.17.1


^ permalink raw reply related

* [PATCH v2 3/3 net-next] linux: Remove bvec page_offset, use bv_offset
From: Jonathan Lemon @ 2019-07-30 14:40 UTC (permalink / raw)
  To: willy, davem, jakub.kicinski; +Cc: kernel-team, netdev
In-Reply-To: <20190730144034.444022-1-jonathan.lemon@gmail.com>

Now that page_offset is referenced through accessors, remove
the union, and use bv_offset.

Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>
---
 include/linux/bvec.h   |  5 +----
 include/linux/skbuff.h | 10 +++++-----
 2 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/include/linux/bvec.h b/include/linux/bvec.h
index 7f2b2ea9399c..a032f01e928c 100644
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -18,10 +18,7 @@
 struct bio_vec {
 	struct page	*bv_page;
 	unsigned int	bv_len;
-	union {
-		__u32		page_offset;
-		unsigned int	bv_offset;
-	};
+	unsigned int	bv_offset;
 };
 
 struct bvec_iter {
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 2957cdd6c032..3aef8d82ea59 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2078,7 +2078,7 @@ static inline void __skb_fill_page_desc(struct sk_buff *skb, int i,
 	 * on page_is_pfmemalloc doing the right thing(tm).
 	 */
 	frag->bv_page		  = page;
-	frag->page_offset	  = off;
+	frag->bv_offset		  = off;
 	skb_frag_size_set(frag, size);
 
 	page = compound_head(page);
@@ -2863,7 +2863,7 @@ static inline void skb_propagate_pfmemalloc(struct page *page,
  */
 static inline unsigned int skb_frag_off(const skb_frag_t *frag)
 {
-	return frag->page_offset;
+	return frag->bv_offset;
 }
 
 /**
@@ -2873,7 +2873,7 @@ static inline unsigned int skb_frag_off(const skb_frag_t *frag)
  */
 static inline void skb_frag_off_add(skb_frag_t *frag, int delta)
 {
-	frag->page_offset += delta;
+	frag->bv_offset += delta;
 }
 
 /**
@@ -2883,7 +2883,7 @@ static inline void skb_frag_off_add(skb_frag_t *frag, int delta)
  */
 static inline void skb_frag_off_set(skb_frag_t *frag, unsigned int offset)
 {
-	frag->page_offset = offset;
+	frag->bv_offset = offset;
 }
 
 /**
@@ -2894,7 +2894,7 @@ static inline void skb_frag_off_set(skb_frag_t *frag, unsigned int offset)
 static inline void skb_frag_off_copy(skb_frag_t *fragto,
 				     const skb_frag_t *fragfrom)
 {
-	fragto->page_offset = fragfrom->page_offset;
+	fragto->bv_offset = fragfrom->bv_offset;
 }
 
 /**
-- 
2.17.1


^ permalink raw reply related

* [PATCH v2 0/3 net-next] Finish conversion of skb_frag_t to bio_vec
From: Jonathan Lemon @ 2019-07-30 14:40 UTC (permalink / raw)
  To: willy, davem, jakub.kicinski; +Cc: kernel-team, netdev

The recent conversion of skb_frag_t to bio_vec did not include
skb_frag's page_offset.  Add accessor functions for this field,
utilize them, and remove the union, restoring the original structure.

v2:
  - rename accessors
  - follow kdoc conventions

Jonathan Lemon (3):
  linux: Add page_offset accessors
  net: Use skb_frag_off accessors
  linux: Remove bvec page_offset, use bv_offset

 drivers/atm/eni.c                             |  2 +-
 drivers/hsi/clients/ssi_protocol.c            |  2 +-
 drivers/infiniband/hw/hfi1/vnic_sdma.c        |  2 +-
 drivers/infiniband/ulp/ipoib/ipoib_ib.c       |  3 +-
 drivers/net/ethernet/broadcom/bnxt/bnxt.c     |  2 +-
 .../ethernet/cavium/thunder/nicvf_queues.c    |  2 +-
 drivers/net/ethernet/chelsio/cxgb3/sge.c      |  2 +-
 drivers/net/ethernet/emulex/benet/be_main.c   | 12 ++--
 .../ethernet/freescale/fs_enet/fs_enet-main.c |  2 +-
 drivers/net/ethernet/ibm/ibmvnic.c            |  2 +-
 drivers/net/ethernet/intel/i40e/i40e_txrx.c   |  2 +-
 drivers/net/ethernet/intel/iavf/iavf_txrx.c   |  2 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |  4 +-
 drivers/net/ethernet/jme.c                    |  4 +-
 drivers/net/ethernet/marvell/mv643xx_eth.c    |  2 +-
 .../net/ethernet/myricom/myri10ge/myri10ge.c  |  6 +-
 drivers/net/ethernet/sfc/tx.c                 |  2 +-
 drivers/net/ethernet/sun/cassini.c            |  8 +--
 drivers/net/ethernet/sun/niu.c                |  2 +-
 drivers/net/ethernet/sun/sunvnet_common.c     |  4 +-
 drivers/net/ethernet/ti/netcp_core.c          |  2 +-
 drivers/net/hyperv/netvsc_drv.c               |  4 +-
 drivers/net/thunderbolt.c                     |  2 +-
 drivers/net/usb/usbnet.c                      |  2 +-
 drivers/net/vmxnet3/vmxnet3_drv.c             |  2 +-
 drivers/net/xen-netback/netback.c             |  6 +-
 drivers/net/xen-netfront.c                    |  8 +--
 drivers/scsi/bnx2fc/bnx2fc_fcoe.c             |  2 +-
 drivers/scsi/fcoe/fcoe.c                      |  3 +-
 drivers/scsi/fcoe/fcoe_transport.c            |  2 +-
 drivers/scsi/qedf/qedf_main.c                 |  2 +-
 .../staging/unisys/visornic/visornic_main.c   |  2 +-
 drivers/target/iscsi/cxgbit/cxgbit_target.c   |  4 +-
 include/linux/bvec.h                          |  5 +-
 include/linux/skbuff.h                        | 69 ++++++++++++++++---
 net/appletalk/ddp.c                           |  4 +-
 net/core/datagram.c                           |  6 +-
 net/core/dev.c                                |  2 +-
 net/core/pktgen.c                             |  2 +-
 net/core/skbuff.c                             | 54 ++++++++-------
 net/ipv4/tcp.c                                |  6 +-
 net/ipv4/tcp_output.c                         |  2 +-
 net/kcm/kcmsock.c                             |  2 +-
 net/tls/tls_device.c                          |  8 +--
 net/tls/tls_device_fallback.c                 |  2 +-
 net/xfrm/xfrm_ipcomp.c                        |  2 +-
 46 files changed, 161 insertions(+), 111 deletions(-)

-- 
2.17.1


^ permalink raw reply

* Re: [RFC PATCH 1/2] gianfar: convert to phylink
From: Arseny Solokha @ 2019-07-30 14:40 UTC (permalink / raw)
  To: Vladimir Oltean
  Cc: Claudiu Manoil, Russell King, Ioana Ciornei, Andrew Lunn, netdev,
	Florian Fainelli
In-Reply-To: <CA+h21hpacLmKzoeKrdE-frZSTsiYCi4rKCObJ4LfAmfrCJ6H9g@mail.gmail.com>

> Hi Arseny,
>
> Nice project!

Vladimir, Russell, thanks for your review. I'm on vacation now, so won't fully
address your comments in a few weeks: while I can build the code, I won't have
access to hardware to test.

So it seems this patch will turn into a series where we'll have some cleanup
patches preceding the actual conversion (and the latter will also contain a
documentation change in Documentation/devicetree/bindings/net/fsl-tsec-phy.txt
which I've overlooked in the first submission). I'll try to post trivial
cleanups independently while still on vacation.


>> @@ -891,11 +912,21 @@ static int gfar_of_init(struct platform_device *ofdev, struct net_device **pdev)
>>
>>         err = of_property_read_string(np, "phy-connection-type", &ctype);
>>
>> -       /* We only care about rgmii-id.  The rest are autodetected */
>> -       if (err == 0 && !strcmp(ctype, "rgmii-id"))
>> -               priv->interface = PHY_INTERFACE_MODE_RGMII_ID;
>> -       else
>> +       /* We only care about rgmii-id and sgmii - the former
>> +        * is indistinguishable from rgmii in hardware, and phylink needs
>> +        * the latter to be set appropriately for correct phy configuration.
>> +        * The rest are autodetected
>> +        */
>> +       if (err == 0) {
>> +               if (!strcmp(ctype, "rgmii-id"))
>> +                       priv->interface = PHY_INTERFACE_MODE_RGMII_ID;
>> +               else if (!strcmp(ctype, "sgmii"))
>> +                       priv->interface = PHY_INTERFACE_MODE_SGMII;
>> +               else
>> +                       priv->interface = PHY_INTERFACE_MODE_MII;
>> +       } else {
>>                 priv->interface = PHY_INTERFACE_MODE_MII;
>> +       }
>>
>
> No. Don't do this. Just do:
>
>     err = of_get_phy_mode(np);
>     if (err < 0)
>         goto err_grp_init;
>
>     priv->interface = err;
>
>>         if (of_find_property(np, "fsl,magic-packet", NULL))
>>                 priv->device_flags |= FSL_GIANFAR_DEV_HAS_MAGIC_PACKET;
>> @@ -903,19 +934,21 @@ static int gfar_of_init(struct platform_device *ofdev, struct net_device **pdev)
>>         if (of_get_property(np, "fsl,wake-on-filer", NULL))
>>                 priv->device_flags |= FSL_GIANFAR_DEV_HAS_WAKE_ON_FILER;
>>
>> -       priv->phy_node = of_parse_phandle(np, "phy-handle", 0);
>> +       priv->device_node = np;
>> +       priv->speed = SPEED_UNKNOWN;
>>
>> -       /* In the case of a fixed PHY, the DT node associated
>> -        * to the PHY is the Ethernet MAC DT node.
>> -        */
>> -       if (!priv->phy_node && of_phy_is_fixed_link(np)) {
>> -               err = of_phy_register_fixed_link(np);
>> -               if (err)
>> -                       goto err_grp_init;
>> +       priv->phylink_config.dev = &priv->ndev->dev;
>> +       priv->phylink_config.type = PHYLINK_NETDEV;
>>
>> -               priv->phy_node = of_node_get(np);
>> +       phylink = phylink_create(&priv->phylink_config, of_fwnode_handle(np),
>> +                                priv->interface, &gfar_phylink_ops);
>
> You introduced a bug here.
> of_phy_connect used to take the PHY interface type (for good or bad)
> from gfar_get_interface() (which is reconstructing it from the MAC
> registers).
> You are now passing the PHY interface type to phylink_create from the
> "phy-connection-type" DT property.
> At the very least, you are breaking LS1021A which uses phy-mode
> instead of phy-connection-type (hence my comment above to use the
> generic OF helper).
> Actually I think you just uncovered a latent bug, in that the DT
> bindings for phy-mode didn't mean much at all to the driver - it would
> rely on what the bootloader had set up.
> Actually DT bindings for phy-connection-type were most likely simply
> bolt on on top of gianfar when they figured they couldn't just
> auto-detect the various species of required RGMII delays.
> But gfar_get_interface is a piece of history that was introduced in
> the same commit as the enum phy_interface_t itself: e8a2b6a42073
> ("[PATCH] PHY: Add support for configuring the PHY connection
> interface"). Its time has come.

<…>

>>         }
>>
>> +       priv->tbi_phy = NULL;
>> +       interface = gfar_get_interface(dev);
>
> Be consistent and just go for priv->interface. Nobody's changing it anyway.

So if I get you right, I'm supposed to drop gfar_get_interface() and rely on DT
bindings entirely?


>> @@ -3387,23 +3384,6 @@ static irqreturn_t gfar_interrupt(int irq, void *grp_id)
>>         return IRQ_HANDLED;
>>  }
>>
>> -/* Called every time the controller might need to be made
>> - * aware of new link state.  The PHY code conveys this
>> - * information through variables in the phydev structure, and this
>> - * function converts those variables into the appropriate
>> - * register values, and can bring down the device if needed.
>> - */
>> -static void adjust_link(struct net_device *dev)
>> -{
>> -       struct gfar_private *priv = netdev_priv(dev);
>> -       struct phy_device *phydev = dev->phydev;
>> -
>> -       if (unlikely(phydev->link != priv->oldlink ||
>> -                    (phydev->link && (phydev->duplex != priv->oldduplex ||
>> -                                      phydev->speed != priv->oldspeed))))
>> -               gfar_update_link_state(priv);
>> -}
>
> Getting rid of the cruft from this function deserves its own patch.

How am I supposed to remove it without breaking the PHYLIB-based driver? Or do
you mean making it call gfar_update_link_state() just before the conversion
which will then remove adjust_link() altogether?


>>
>>         if (unlikely(test_bit(GFAR_RESETTING, &priv->state)))
>>                 return;
>>
>> -       if (phydev->link) {
>> -               u32 tempval1 = gfar_read(&regs->maccfg1);
>> -               u32 tempval = gfar_read(&regs->maccfg2);
>> -               u32 ecntrl = gfar_read(&regs->ecntrl);
>> -               u32 tx_flow_oldval = (tempval1 & MACCFG1_TX_FLOW);
>> +       if (unlikely(phylink_autoneg_inband(mode)))
>> +               return;
>>
>> -               if (phydev->duplex != priv->oldduplex) {
>> -                       if (!(phydev->duplex))
>> -                               tempval &= ~(MACCFG2_FULL_DUPLEX);
>> -                       else
>> -                               tempval |= MACCFG2_FULL_DUPLEX;
>> +       maccfg1 = gfar_read(&regs->maccfg1);
>> +       maccfg2 = gfar_read(&regs->maccfg2);
>> +       ecntrl = gfar_read(&regs->ecntrl);
>>
>> -                       priv->oldduplex = phydev->duplex;
>> -               }
>> +       new_maccfg2 = maccfg2 & ~(MACCFG2_FULL_DUPLEX | MACCFG2_IF);
>> +       new_ecntrl = ecntrl & ~ECNTRL_R100;
>>
>> -               if (phydev->speed != priv->oldspeed) {
>> -                       switch (phydev->speed) {
>> -                       case 1000:
>> -                               tempval =
>> -                                   ((tempval & ~(MACCFG2_IF)) | MACCFG2_GMII);
>> +       if (state->duplex)
>> +               new_maccfg2 |= MACCFG2_FULL_DUPLEX;
>>
>> -                               ecntrl &= ~(ECNTRL_R100);
>> -                               break;
>> -                       case 100:
>> -                       case 10:
>> -                               tempval =
>> -                                   ((tempval & ~(MACCFG2_IF)) | MACCFG2_MII);
>> -
>> -                               /* Reduced mode distinguishes
>> -                                * between 10 and 100
>> -                                */
>> -                               if (phydev->speed == SPEED_100)
>> -                                       ecntrl |= ECNTRL_R100;
>> -                               else
>> -                                       ecntrl &= ~(ECNTRL_R100);
>> -                               break;
>> -                       default:
>> -                               netif_warn(priv, link, priv->ndev,
>> -                                          "Ack!  Speed (%d) is not 10/100/1000!\n",
>> -                                          phydev->speed);
>
> Please 1. remove "Ack!" 2. treat SPEED_UNKNOWN here by setting the MAC
> into a low-power state (e.g. 10 Mbps - the power savings are real).
> Don't print that Speed -1 is not 10/100/1000, we know that.

In my first conversion attempt I see "Ack!" when changing link speed on when
shutting it down, so switching to 10 Mbps doesn't seem right for me—hence early
return in this case. Maybe I'm doing something wrong here.


>> diff --git a/drivers/net/ethernet/freescale/gianfar_ethtool.c b/drivers/net/ethernet/freescale/gianfar_ethtool.c
>> index 3433b46b90c1..146b30d07789 100644
>> --- a/drivers/net/ethernet/freescale/gianfar_ethtool.c
>> +++ b/drivers/net/ethernet/freescale/gianfar_ethtool.c
>> @@ -35,7 +35,7 @@
>>  #include <asm/types.h>
>>  #include <linux/ethtool.h>
>>  #include <linux/mii.h>
>> -#include <linux/phy.h>
>> +#include <linux/phylink.h>
>>  #include <linux/sort.h>
>>  #include <linux/if_vlan.h>
>>  #include <linux/of_platform.h>
>> @@ -207,12 +207,10 @@ static void gfar_get_regs(struct net_device *dev, struct ethtool_regs *regs,
>>  static unsigned int gfar_usecs2ticks(struct gfar_private *priv,
>>                                      unsigned int usecs)
>>  {
>> -       struct net_device *ndev = priv->ndev;
>> -       struct phy_device *phydev = ndev->phydev;
>
> Are you sure this still works? You missed a ndev->phydev check from
> gfar_gcoalesce, where this is called from. Technically you can still
> check ndev->phydev, it's just that PHYLINK doesn't guarantee you'll
> have one (e.g. fixed-link interfaces).

It still works for RGMII PHYs, SGMII and 1000Base-X in my testing. I didn't
check it with fixed-link, though.


>> @@ -1519,6 +1472,24 @@ static int gfar_get_ts_info(struct net_device *dev,
>>         return 0;
>>  }
>>
>> +/* Set link ksettings (phy address, speed) for ethtools */
>
> ethtool, not ethtools. Also, I'm not quite sure what you mean by
> setting the "phy address" with ethtool.

Well, I know where I've copied it from… Thanks for pointing it out.

^ permalink raw reply

* [PATCH v2] net/socket: fix GCC8+ Wpacked-not-aligned warnings
From: Qian Cai @ 2019-07-30 14:38 UTC (permalink / raw)
  To: davem
  Cc: vyasevich, nhorman, marcelo.leitner, David.Laight, linux-sctp,
	netdev, linux-kernel, Qian Cai

In file included from ./include/linux/sctp.h:42,
                 from net/core/skbuff.c:47:
./include/uapi/linux/sctp.h:395:1: warning: alignment 4 of 'struct
sctp_paddr_change' is less than 8 [-Wpacked-not-aligned]
 } __attribute__((packed, aligned(4)));
 ^
./include/uapi/linux/sctp.h:728:1: warning: alignment 4 of 'struct
sctp_setpeerprim' is less than 8 [-Wpacked-not-aligned]
 } __attribute__((packed, aligned(4)));
 ^
./include/uapi/linux/sctp.h:727:26: warning: 'sspp_addr' offset 4 in
'struct sctp_setpeerprim' isn't aligned to 8 [-Wpacked-not-aligned]
  struct sockaddr_storage sspp_addr;
                          ^~~~~~~~~
./include/uapi/linux/sctp.h:741:1: warning: alignment 4 of 'struct
sctp_prim' is less than 8 [-Wpacked-not-aligned]
 } __attribute__((packed, aligned(4)));
 ^
./include/uapi/linux/sctp.h:740:26: warning: 'ssp_addr' offset 4 in
'struct sctp_prim' isn't aligned to 8 [-Wpacked-not-aligned]
  struct sockaddr_storage ssp_addr;
                          ^~~~~~~~
./include/uapi/linux/sctp.h:792:1: warning: alignment 4 of 'struct
sctp_paddrparams' is less than 8 [-Wpacked-not-aligned]
 } __attribute__((packed, aligned(4)));
 ^
./include/uapi/linux/sctp.h:784:26: warning: 'spp_address' offset 4 in
'struct sctp_paddrparams' isn't aligned to 8 [-Wpacked-not-aligned]
  struct sockaddr_storage spp_address;
                          ^~~~~~~~~~~
./include/uapi/linux/sctp.h:905:1: warning: alignment 4 of 'struct
sctp_paddrinfo' is less than 8 [-Wpacked-not-aligned]
 } __attribute__((packed, aligned(4)));
 ^
./include/uapi/linux/sctp.h:899:26: warning: 'spinfo_address' offset 4
in 'struct sctp_paddrinfo' isn't aligned to 8 [-Wpacked-not-aligned]
  struct sockaddr_storage spinfo_address;
                          ^~~~~~~~~~~~~~

This is because the commit 20c9c825b12f ("[SCTP] Fix SCTP socket options
to work with 32-bit apps on 64-bit kernels.") added "packed, aligned(4)"
GCC attributes to some structures but one of the members, i.e, "struct
sockaddr_storage" in those structures has the attribute,
"aligned(__alignof__ (struct sockaddr *)" which is 8-byte on 64-bit
systems, so the commit overwrites the designed alignments for
"sockaddr_storage".

To fix this, "struct sockaddr_storage" needs to be aligned to 4-byte as
it is only used in those packed sctp structure which is part of UAPI,
and "struct __kernel_sockaddr_storage" is used in some other
places of UAPI that need not to change alignments in order to not
breaking userspace.

Use an implicit alignment for "struct __kernel_sockaddr_storage" so it
can keep the same alignments as a member in both packed and un-packed
structures without breaking UAPI.

Suggested-by: David Laight <David.Laight@ACULAB.COM>
Signed-off-by: Qian Cai <cai@lca.pw>
---

v2: Use an implicit alignment for "struct __kernel_sockaddr_storage".

 include/uapi/linux/socket.h | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/include/uapi/linux/socket.h b/include/uapi/linux/socket.h
index 8eb96021709c..caf6c4d5f6ad 100644
--- a/include/uapi/linux/socket.h
+++ b/include/uapi/linux/socket.h
@@ -6,17 +6,20 @@
  * Desired design of maximum size and alignment (see RFC2553)
  */
 #define _K_SS_MAXSIZE	128	/* Implementation specific max size */
-#define _K_SS_ALIGNSIZE	(__alignof__ (struct sockaddr *))
-				/* Implementation specific desired alignment */
 
 typedef unsigned short __kernel_sa_family_t;
 
 struct __kernel_sockaddr_storage {
-	__kernel_sa_family_t	ss_family;		/* address family */
-	/* Following field(s) are implementation specific */
-	char		__data[_K_SS_MAXSIZE - sizeof(unsigned short)];
+	union {
+		void *__align; /* implementation specific desired alignment */
+		struct {
+			__kernel_sa_family_t	ss_family; /* address family */
+			/* Following field(s) are implementation specific */
+			char __data[_K_SS_MAXSIZE - sizeof(unsigned short)];
 				/* space to achieve desired size, */
 				/* _SS_MAXSIZE value minus size of ss_family */
-} __attribute__ ((aligned(_K_SS_ALIGNSIZE)));	/* force desired alignment */
+			};
+		};
+};
 
 #endif /* _UAPI_LINUX_SOCKET_H */
-- 
1.8.3.1


^ permalink raw reply related

* Re: [PATCH] net: bridge: Allow bridge to joing multicast groups
From: Andrew Lunn @ 2019-07-30 14:34 UTC (permalink / raw)
  To: Allan W. Nielsen
  Cc: Ido Schimmel, Nikolay Aleksandrov, Horatiu Vultur, roopa, davem,
	bridge, netdev, linux-kernel
In-Reply-To: <20190730062721.p4vrxo5sxbtulkrx@lx-anielsen.microsemi.net>

Hi Allan

Just throwing out another idea....

The whole offloading story has been you use the hardware to accelerate
what the Linux stack can already do.

In this case, you want to accelerate Device Level Ring, DLR.  But i've
not yet seen a software implementation of DLR. Should we really be
considering first adding DLR to the SW bridge? Make it an alternative
to the STP code? Once we have a generic implementation we can then
look at how it can be accelerated using switchdev.

     Andrew

^ permalink raw reply

* [PATCH][net-next] mac80211: add missing null return check from call to ieee80211_get_sband
From: Colin King @ 2019-07-30 14:32 UTC (permalink / raw)
  To: John Crispin, Johannes Berg, David S . Miller, linux-wireless,
	netdev
  Cc: kernel-janitors, linux-kernel

From: Colin Ian King <colin.king@canonical.com>

The return from ieee80211_get_sband can potentially be a null pointer, so
it seems prudent to add a null check to avoid a null pointer dereference
on sband.

Addresses-Coverity: ("Dereference null return")
Fixes: 2ab45876756f ("mac80211: add support for the ADDBA extension element")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
---
 net/mac80211/agg-rx.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 0e1bb43973b8..4d1c335e06e5 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -189,6 +189,8 @@ static void ieee80211_add_addbaext(struct ieee80211_sub_if_data *sdata,
 	u8 *pos;
 
 	sband = ieee80211_get_sband(sdata);
+	if (!sband)
+		return;
 	he_cap = ieee80211_get_he_iftype_cap(sband, sdata->vif.type);
 	if (!he_cap)
 		return;
-- 
2.20.1


^ permalink raw reply related

* [PATCH] enetc: Fix build error without PHYLIB
From: YueHaibing @ 2019-07-30 14:29 UTC (permalink / raw)
  To: claudiu.manoil, davem; +Cc: linux-kernel, netdev, YueHaibing

If PHYLIB is not set, build enetc will fails:

drivers/net/ethernet/freescale/enetc/enetc.o: In function `enetc_open':
enetc.c: undefined reference to `phy_disconnect'
enetc.c: undefined reference to `phy_start'
drivers/net/ethernet/freescale/enetc/enetc.o: In function `enetc_close':
enetc.c: undefined reference to `phy_stop'
enetc.c: undefined reference to `phy_disconnect'
drivers/net/ethernet/freescale/enetc/enetc_ethtool.o: undefined reference to `phy_ethtool_get_link_ksettings'
drivers/net/ethernet/freescale/enetc/enetc_ethtool.o: undefined reference to `phy_ethtool_set_link_ksettings'
drivers/net/ethernet/freescale/enetc/enetc_mdio.o: In function `enetc_mdio_probe':
enetc_mdio.c: undefined reference to `mdiobus_alloc_size'
enetc_mdio.c: undefined reference to `mdiobus_free'

Reported-by: Hulk Robot <hulkci@huawei.com>
Fixes: d4fd0404c1c9 ("enetc: Introduce basic PF and VF ENETC ethernet drivers")
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
---
 drivers/net/ethernet/freescale/enetc/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/freescale/enetc/Kconfig b/drivers/net/ethernet/freescale/enetc/Kconfig
index ed0d010..46fdf36b 100644
--- a/drivers/net/ethernet/freescale/enetc/Kconfig
+++ b/drivers/net/ethernet/freescale/enetc/Kconfig
@@ -2,6 +2,7 @@
 config FSL_ENETC
 	tristate "ENETC PF driver"
 	depends on PCI && PCI_MSI && (ARCH_LAYERSCAPE || COMPILE_TEST)
+	select PHYLIB
 	help
 	  This driver supports NXP ENETC gigabit ethernet controller PCIe
 	  physical function (PF) devices, managing ENETC Ports at a privileged
-- 
2.7.4



^ permalink raw reply related

* [net-next  1/1] tipc: reduce risk of wakeup queue starvation
From: Jon Maloy @ 2019-07-30 14:23 UTC (permalink / raw)
  To: davem, netdev
  Cc: gordan.mihaljevic, tung.q.nguyen, hoang.h.le, jon.maloy,
	canh.d.luu, ying.xue, tipc-discussion

In commit 365ad353c256 ("tipc: reduce risk of user starvation during
link congestion") we allowed senders to add exactly one list of extra
buffers to the link backlog queues during link congestion (aka
"oversubscription"). However, the criteria for when to stop adding
wakeup messages to the input queue when the overload abates is
inaccurate, and may cause starvation problems during very high load.

Currently, we stop adding wakeup messages after 10 total failed attempts
where we find that there is no space left in the backlog queue for a
certain importance level. The counter for this is accumulated across all
levels, which may lead the algorithm to leave the loop prematurely,
although there may still be plenty of space available at some levels.
The result is sometimes that messages near the wakeup queue tail are not
added to the input queue as they should be.

We now introduce a more exact algorithm, where we keep adding wakeup
messages to a level as long as the backlog queue has free slots for
the corresponding level, and stop at the moment there are no more such
slots or when there are no more wakeup messages to dequeue.

Fixes: 365ad35 ("tipc: reduce risk of user starvation during link congestion")
Reported-by: Tung Nguyen <tung.q.nguyen@dektech.com.au>
Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
---
 net/tipc/link.c | 29 +++++++++++++++++++++--------
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/net/tipc/link.c b/net/tipc/link.c
index 2c27477..dd3155b 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -854,18 +854,31 @@ static int link_schedule_user(struct tipc_link *l, struct tipc_msg *hdr)
  */
 static void link_prepare_wakeup(struct tipc_link *l)
 {
+	struct sk_buff_head *wakeupq = &l->wakeupq;
+	struct sk_buff_head *inputq = l->inputq;
 	struct sk_buff *skb, *tmp;
-	int imp, i = 0;
+	struct sk_buff_head tmpq;
+	int avail[5] = {0,};
+	int imp = 0;
+
+	__skb_queue_head_init(&tmpq);
 
-	skb_queue_walk_safe(&l->wakeupq, skb, tmp) {
+	for (; imp <= TIPC_SYSTEM_IMPORTANCE; imp++)
+		avail[imp] = l->backlog[imp].limit - l->backlog[imp].len;
+
+	skb_queue_walk_safe(wakeupq, skb, tmp) {
 		imp = TIPC_SKB_CB(skb)->chain_imp;
-		if (l->backlog[imp].len < l->backlog[imp].limit) {
-			skb_unlink(skb, &l->wakeupq);
-			skb_queue_tail(l->inputq, skb);
-		} else if (i++ > 10) {
-			break;
-		}
+		if (avail[imp] <= 0)
+			continue;
+		avail[imp]--;
+		__skb_unlink(skb, wakeupq);
+		__skb_queue_tail(&tmpq, skb);
 	}
+
+	spin_lock_bh(&inputq->lock);
+	skb_queue_splice_tail(&tmpq, inputq);
+	spin_unlock_bh(&inputq->lock);
+
 }
 
 void tipc_link_reset(struct tipc_link *l)
-- 
2.1.4


^ permalink raw reply related

* Re: [PATCH net] net: stmmac: Sync RX Buffer upon allocation
From: Ezequiel Garcia @ 2019-07-30 14:17 UTC (permalink / raw)
  To: Jose Abreu
  Cc: Networking, Joao Pinto, Giuseppe Cavallaro, Alexandre Torgue,
	David S. Miller, Maxime Coquelin, linux-stm32, linux-arm-kernel,
	Linux Kernel Mailing List, Jon Hunter
In-Reply-To: <3601e3ae4357d48b3294f42781d0f19095d1b00e.1564479382.git.joabreu@synopsys.com>

On Tue, 30 Jul 2019 at 10:57, Jose Abreu <Jose.Abreu@synopsys.com> wrote:
>
> With recent changes that introduced support for Page Pool in stmmac, Jon
> reported that NFS boot was no longer working on an ARM64 based platform
> that had the IP behind an IOMMU.
>
> As Page Pool API does not guarantee DMA syncing because of the use of
> DMA_ATTR_SKIP_CPU_SYNC flag, we have to explicit sync the whole buffer upon
> re-allocation because we are always re-using same pages.
>
> In fact, ARM64 code invalidates the DMA area upon two situations [1]:
>         - sync_single_for_cpu(): Invalidates if direction != DMA_TO_DEVICE
>         - sync_single_for_device(): Invalidates if direction == DMA_FROM_DEVICE
>
> So, as we must invalidate both the current RX buffer and the newly allocated
> buffer we propose this fix.
>
> [1] arch/arm64/mm/cache.S
>
> Reported-by: Jon Hunter <jonathanh@nvidia.com>
> Tested-by: Jon Hunter <jonathanh@nvidia.com>
> Fixes: 2af6106ae949 ("net: stmmac: Introducing support for Page Pool")
> Signed-off-by: Jose Abreu <joabreu@synopsys.com>

Thanks a lot for the bug hunt and the fix. This fixes NFS mounting
on my RK3288 and RK3399 boards.

Tested-by: Ezequiel Garcia <ezequiel@collabora.com>

^ permalink raw reply

* Re: net: ipv6: Fix a bug in ndisc_send_ns when netdev only has a global address
From: David Ahern @ 2019-07-30 14:11 UTC (permalink / raw)
  To: Mark Smith, Su Yanjun; +Cc: netdev
In-Reply-To: <CAO42Z2x163LCaYrB2ZEm9i-A=Pw1xcudbGSua5TxxEHdc4=O2g@mail.gmail.com>

On 7/30/19 4:28 AM, Mark Smith wrote:
> Hi Su,
> 
> On Tue, 30 Jul 2019 at 19:41, Su Yanjun <suyj.fnst@cn.fujitsu.com> wrote:
>>
>>
>> 在 2019/7/30 16:15, Mark Smith 写道:
>>> Hi,
>>>
>>> I'm not subscribed to the Linux netdev mailing list, so I can't
>>> directly reply to the patch email.
>>>
>>> This patch is not the correct solution to this issue.
>>>
> 
> <snip>
> 
>> In linux implementation, one interface may have no link local address if
>> kernel config
>>
>> *addr_gen_mode* is set to IN6_ADDR_GEN_MODE_NONE. My patch is to fix
>> this problem.
>>
> 
> So this "IN6_ADDR_GEN_MODE_NONE" behaviour doesn't comply with RFC 4291.
> 
> As RFC 4291 says,
> 
> "All interfaces are *required* to have *at least one* Link-Local
> unicast address."
> 
> That's not an ambiguous requirement.

Interesting. Going back to the original commit:

commit bc91b0f07ada5535427373a4e2050877bcc12218
Author: Jiri Pirko <jiri@resnulli.us>
Date:   Fri Jul 11 21:10:18 2014 +0200

    ipv6: addrconf: implement address generation modes

    This patch introduces a possibility for userspace to set various (so far
    two) modes of generating addresses. This is useful for example for
    NetworkManager because it can set the mode to NONE and take care of link
    local addresses itself. That allow it to have the interface up,
    monitoring carrier but still don't have any addresses on it.

So the intention of IN6_ADDR_GEN_MODE_NONE was for userspace to control
it. If an LLA is required (4291 says yes, 4861 suggests no) then the
current behavior is correct and if IN6_ADDR_GEN_MODE_NONE is used by an
admin some userspace agent is required to add it for IPv6 to work on
that link.

> 
> This specific, explicit requirement goes as back as far as RFC 2373
> from 1998, the ancestor of RFC 4291. It is also heavily implied in RFC
> 1884s, 2.7 A Node's Required Addresses.
> 
>> And what you say is related to the lo interface.  I'm not sure whether
>> the lo interface needs a ll adreess.
>>
> 
> It is an IPv6 enabled interface, so it requires a link-local address,
> per RFC 4291. RFC 4291 doesn't exclude any interfaces types from the
> LL address requirement.

There is no 'link' for loopback, so really no point in generating an LLA
for it.


^ permalink raw reply

* Re: [PATCH 1/2] net: dsa: mv88e6xxx: add support to setup led-control register through device-tree
From: Andrew Lunn @ 2019-07-30 14:09 UTC (permalink / raw)
  To: Hubert Feurstein
  Cc: netdev, linux-kernel, Vivien Didelot, Florian Fainelli,
	David S. Miller
In-Reply-To: <20190730101451.845-1-h.feurstein@gmail.com>

On Tue, Jul 30, 2019 at 12:14:50PM +0200, Hubert Feurstein wrote:
> So it is possible to change the default behaviour of the switch LEDs.

Sorry, but this is not going to be accepted. There is an ongoing
discussion about PHY LEDs and how they should be configured. Switch
LEDs are no different from PHY LEDs. So they should use the same basic
concept.

Please take a look at the discussion around:

[RFC] dt-bindings: net: phy: Add subnode for LED configuration

Marvell designers have made this more difficult than it should be by
moving the registers out of the PHY address space and into the switch
address space. So we are going to have to implement this code twice
:-(

	Andrew

^ permalink raw reply

* Re: [PATCH 0/4] net: dsa: mv88e6xxx: add support for MV88E6220
From: Andrew Lunn @ 2019-07-30 14:03 UTC (permalink / raw)
  To: Hubert Feurstein
  Cc: netdev, linux-kernel, Vivien Didelot, Florian Fainelli,
	David S. Miller, Rasmus Villemoes
In-Reply-To: <20190730100429.32479-1-h.feurstein@gmail.com>

On Tue, Jul 30, 2019 at 12:04:25PM +0200, Hubert Feurstein wrote:
> This patch series adds support for the MV88E6220 chip to the mv88e6xxx driver.
> The MV88E6220 is almost the same as MV88E6250 except that the ports 2-4 are
> not routed to pins.
> 
> Furthermore, PTP support is added to the MV88E6250 family.

Hi Hubert

In general, these are a nice series of patches.

FYI: Please indicate in the subject link which tree the patches are
for. These are all for net-next, so the subject would be

[PATCH net-next] net: dsa: mv88e6xxx: .....

Thanks
	Andrew

^ permalink raw reply

* Re: [PATCH] net: dsa: mv88e6xxx: use link-down-define instead of plain value
From: Andrew Lunn @ 2019-07-30 14:00 UTC (permalink / raw)
  To: Hubert Feurstein
  Cc: netdev, linux-kernel, Vivien Didelot, Florian Fainelli,
	David S. Miller
In-Reply-To: <20190730101142.548-1-h.feurstein@gmail.com>

On Tue, Jul 30, 2019 at 12:11:42PM +0200, Hubert Feurstein wrote:
> Using the define here makes the code more expressive.
> 
> Signed-off-by: Hubert Feurstein <h.feurstein@gmail.com>

Reviewed-by: Andrew Lunn <andrew@lunn.ch>

    Andrew

^ permalink raw reply

* Re: [PATCH 1/4] net: dsa: mv88e6xxx: add support for MV88E6220
From: Hubert Feurstein @ 2019-07-30 14:00 UTC (permalink / raw)
  To: Andrew Lunn
  Cc: netdev, linux-kernel, Vivien Didelot, Florian Fainelli,
	David S. Miller, Rasmus Villemoes
In-Reply-To: <20190730134924.GH28552@lunn.ch>

Hi Andrew,

[...]
> Do the registers for the ports exist?
Yes, they do and they return sane values.

> > +     [MV88E6220] = {
> > +             .prod_num = MV88E6XXX_PORT_SWITCH_ID_PROD_6220,
> > +             .family = MV88E6XXX_FAMILY_6250,
> > +             .name = "Marvell 88E6220",
> > +             .num_databases = 64,
> > +
> > +             /* Ports 2-4 are not routed to pins
> > +              * => usable ports 0, 1, 5, 6
> > +              */
> > +             .num_ports = 7,
>
> I'm wondering if we should add something like
>
>                 .invalid_port_mask = BIT(2) | BIT(3) | BIT(4)
>
Would make sense. I'll add it to the next series.

>
> and
>
>         /* Setup Switch Port Registers */
>         for (i = 0; i < mv88e6xxx_num_ports(chip); i++) {
> +               if (chip->info->invalid_port_mask & BIT(i) &&
> +                   !dsa_is_unused_port(ds, i))
> +                   return -EINVAL;
>                 if (dsa_is_unused_port(ds, i)) {
>                         err = mv88e6xxx_port_set_state(chip, i,
>                                                        BR_STATE_DISABLED);
>
>         Andrew

Hubert

^ permalink raw reply

* Re: [PATCH net] net: bridge: mcast: don't delete permanent entries when fast leave is enabled
From: Nikolay Aleksandrov @ 2019-07-30 14:00 UTC (permalink / raw)
  To: David Ahern, netdev; +Cc: davem, roopa, bridge
In-Reply-To: <4511701d-88c4-a937-2fbc-b557033a24ed@gmail.com>

On 30/07/2019 16:58, David Ahern wrote:
> On 7/30/19 5:21 AM, Nikolay Aleksandrov wrote:
>> diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
>> index 3d8deac2353d..f8cac3702712 100644
>> --- a/net/bridge/br_multicast.c
>> +++ b/net/bridge/br_multicast.c
>> @@ -1388,6 +1388,9 @@ br_multicast_leave_group(struct net_bridge *br,
>>  			if (!br_port_group_equal(p, port, src))
>>  				continue;
>>  
>> +			if (p->flags & MDB_PG_FLAGS_PERMANENT)
>> +				break;
>> +
>>  			rcu_assign_pointer(*pp, p->next);
>>  			hlist_del_init(&p->mglist);
>>  			del_timer(&p->timer);
> 
> Why 'break' and not 'continue' like you have with
> 	if (!br_port_group_equal(p, port, src))
> 

Because we'll hit the goto out after this hunk always, no point in continuing
if we matched a group and it's permanent, the break might as well be a goto out.


^ permalink raw reply

* Re: [PATCH] net: dsa: mv88e6xxx: extend PTP gettime function to read system clock
From: Andrew Lunn @ 2019-07-30 14:00 UTC (permalink / raw)
  To: Hubert Feurstein
  Cc: netdev, linux-kernel, Vivien Didelot, Florian Fainelli,
	David S. Miller, richardcochran
In-Reply-To: <20190730101007.344-1-h.feurstein@gmail.com>

On Tue, Jul 30, 2019 at 12:10:07PM +0200, Hubert Feurstein wrote:
> This adds support for the PTP_SYS_OFFSET_EXTENDED ioctl.
> 
> Signed-off-by: Hubert Feurstein <h.feurstein@gmail.com>

Please include the PTP maintainer for patches like this. Richard also
wrote this PTP code.

      Andrew

> ---
>  drivers/net/dsa/mv88e6xxx/ptp.c | 11 +++++++----
>  1 file changed, 7 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/net/dsa/mv88e6xxx/ptp.c b/drivers/net/dsa/mv88e6xxx/ptp.c
> index 51cdf4712517..1ff983376f95 100644
> --- a/drivers/net/dsa/mv88e6xxx/ptp.c
> +++ b/drivers/net/dsa/mv88e6xxx/ptp.c
> @@ -230,14 +230,17 @@ static int mv88e6xxx_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
>  	return 0;
>  }
>  
> -static int mv88e6xxx_ptp_gettime(struct ptp_clock_info *ptp,
> -				 struct timespec64 *ts)
> +static int mv88e6xxx_ptp_gettimex(struct ptp_clock_info *ptp,
> +				  struct timespec64 *ts,
> +				  struct ptp_system_timestamp *sts)
>  {
>  	struct mv88e6xxx_chip *chip = ptp_to_chip(ptp);
>  	u64 ns;
>  
>  	mv88e6xxx_reg_lock(chip);
> +	ptp_read_system_prets(sts);
>  	ns = timecounter_read(&chip->tstamp_tc);
> +	ptp_read_system_postts(sts);
>  	mv88e6xxx_reg_unlock(chip);
>  
>  	*ts = ns_to_timespec64(ns);
> @@ -386,7 +389,7 @@ static void mv88e6xxx_ptp_overflow_check(struct work_struct *work)
>  	struct mv88e6xxx_chip *chip = dw_overflow_to_chip(dw);
>  	struct timespec64 ts;
>  
> -	mv88e6xxx_ptp_gettime(&chip->ptp_clock_info, &ts);
> +	mv88e6xxx_ptp_gettimex(&chip->ptp_clock_info, &ts, NULL);
>  
>  	schedule_delayed_work(&chip->overflow_work,
>  			      MV88E6XXX_TAI_OVERFLOW_PERIOD);
> @@ -444,7 +447,7 @@ int mv88e6xxx_ptp_setup(struct mv88e6xxx_chip *chip)
>  	chip->ptp_clock_info.max_adj    = MV88E6XXX_MAX_ADJ_PPB;
>  	chip->ptp_clock_info.adjfine	= mv88e6xxx_ptp_adjfine;
>  	chip->ptp_clock_info.adjtime	= mv88e6xxx_ptp_adjtime;
> -	chip->ptp_clock_info.gettime64	= mv88e6xxx_ptp_gettime;
> +	chip->ptp_clock_info.gettimex64	= mv88e6xxx_ptp_gettimex;
>  	chip->ptp_clock_info.settime64	= mv88e6xxx_ptp_settime;
>  	chip->ptp_clock_info.enable	= ptp_ops->ptp_enable;
>  	chip->ptp_clock_info.verify	= ptp_ops->ptp_verify;
> -- 
> 2.22.0
> 

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox