public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Mat Martineau <martineau@kernel.org>
To: Yunsheng Lin <linyunsheng@huawei.com>
Cc: davem@davemloft.net, kuba@kernel.org, pabeni@redhat.com,
	 netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
	 Alexander Duyck <alexander.duyck@gmail.com>,
	 Ayush Sawal <ayush.sawal@chelsio.com>,
	Eric Dumazet <edumazet@google.com>,
	 Willem de Bruijn <willemdebruijn.kernel@gmail.com>,
	 Jason Wang <jasowang@redhat.com>, Ingo Molnar <mingo@redhat.com>,
	 Peter Zijlstra <peterz@infradead.org>,
	Juri Lelli <juri.lelli@redhat.com>,
	 Vincent Guittot <vincent.guittot@linaro.org>,
	 Dietmar Eggemann <dietmar.eggemann@arm.com>,
	 Steven Rostedt <rostedt@goodmis.org>,
	Ben Segall <bsegall@google.com>,  Mel Gorman <mgorman@suse.de>,
	 Daniel Bristot de Oliveira <bristot@redhat.com>,
	 Valentin Schneider <vschneid@redhat.com>,
	 John Fastabend <john.fastabend@gmail.com>,
	 Jakub Sitnicki <jakub@cloudflare.com>,
	David Ahern <dsahern@kernel.org>,
	 Matthieu Baerts <matttbe@kernel.org>,
	Geliang Tang <geliang@kernel.org>,
	 Jamal Hadi Salim <jhs@mojatatu.com>,
	Cong Wang <xiyou.wangcong@gmail.com>,
	 Jiri Pirko <jiri@resnulli.us>,
	Boris Pismenny <borisp@nvidia.com>,
	 bpf@vger.kernel.org, mptcp@lists.linux.dev
Subject: Re: [PATCH net-next v3 11/13] net: replace page_frag with page_frag_cache
Date: Thu, 9 May 2024 09:22:05 -0700 (PDT)	[thread overview]
Message-ID: <334a8c67-87c8-a918-9517-0afbfae0d02b@kernel.org> (raw)
In-Reply-To: <20240508133408.54708-12-linyunsheng@huawei.com>

On Wed, 8 May 2024, Yunsheng Lin wrote:

> Use the newly introduced prepare/probe/commit API to
> replace page_frag with page_frag_cache for sk_page_frag().
>
> CC: Alexander Duyck <alexander.duyck@gmail.com>
> Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
> ---
> .../chelsio/inline_crypto/chtls/chtls.h       |   3 -
> .../chelsio/inline_crypto/chtls/chtls_io.c    | 100 ++++---------
> .../chelsio/inline_crypto/chtls/chtls_main.c  |   3 -
> drivers/net/tun.c                             |  28 ++--
> include/linux/sched.h                         |   4 +-
> include/net/sock.h                            |  14 +-
> kernel/exit.c                                 |   3 +-
> kernel/fork.c                                 |   3 +-
> net/core/skbuff.c                             |  32 ++--
> net/core/skmsg.c                              |  22 +--
> net/core/sock.c                               |  46 ++++--
> net/ipv4/ip_output.c                          |  33 +++--
> net/ipv4/tcp.c                                |  35 ++---
> net/ipv4/tcp_output.c                         |  28 ++--
> net/ipv6/ip6_output.c                         |  33 +++--
> net/kcm/kcmsock.c                             |  30 ++--
> net/mptcp/protocol.c                          |  70 +++++----
> net/sched/em_meta.c                           |   2 +-
> net/tls/tls_device.c                          | 139 ++++++++++--------
> 19 files changed, 331 insertions(+), 297 deletions(-)
>

<snip>

> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> index bb8f96f2b86f..ab844011d442 100644
> --- a/net/mptcp/protocol.c
> +++ b/net/mptcp/protocol.c
> @@ -960,17 +960,18 @@ static bool mptcp_skb_can_collapse_to(u64 write_seq,
> }
>
> /* we can append data to the given data frag if:
> - * - there is space available in the backing page_frag
> - * - the data frag tail matches the current page_frag free offset
> + * - there is space available for the current page
> + * - the data frag tail matches the current page and offset
>  * - the data frag end sequence number matches the current write seq
>  */
> static bool mptcp_frag_can_collapse_to(const struct mptcp_sock *msk,
> -				       const struct page_frag *pfrag,
> +				       const struct page *page,
> +				       const unsigned int offset,
> +				       const unsigned int size,

Hi Yunsheng -

Why add the 'size' parameter here? It's checked to be a nonzero value, but 
it can only be 0 if page is also NULL. In this case "page == df->page" 
will be false, so the function will return false even without checking 
'size'.

Thanks,

Mat

> 				       const struct mptcp_data_frag *df)
> {
> -	return df && pfrag->page == df->page &&
> -		pfrag->size - pfrag->offset > 0 &&
> -		pfrag->offset == (df->offset + df->data_len) &&
> +	return df && size && page == df->page &&
> +		offset == (df->offset + df->data_len) &&
> 		df->data_seq + df->data_len == msk->write_seq;
> }
>
> @@ -1085,30 +1086,36 @@ static void mptcp_enter_memory_pressure(struct sock *sk)
> /* ensure we get enough memory for the frag hdr, beyond some minimal amount of
>  * data
>  */
> -static bool mptcp_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
> +static struct page *mptcp_page_frag_alloc_prepare(struct sock *sk,
> +						  struct page_frag_cache *pfrag,
> +						  unsigned int *offset,
> +						  unsigned int *size, void **va)
> {
> -	if (likely(skb_page_frag_refill(32U + sizeof(struct mptcp_data_frag),
> -					pfrag, sk->sk_allocation)))
> -		return true;
> +	struct page *page;
> +
> +	page = page_frag_alloc_prepare(pfrag, offset, size, va,
> +				       sk->sk_allocation);
> +	if (likely(page))
> +		return page;
>
> 	mptcp_enter_memory_pressure(sk);
> -	return false;
> +	return NULL;
> }
>
> static struct mptcp_data_frag *
> -mptcp_carve_data_frag(const struct mptcp_sock *msk, struct page_frag *pfrag,
> -		      int orig_offset)
> +mptcp_carve_data_frag(const struct mptcp_sock *msk, struct page *page,
> +		      unsigned int orig_offset)
> {
> 	int offset = ALIGN(orig_offset, sizeof(long));
> 	struct mptcp_data_frag *dfrag;
>
> -	dfrag = (struct mptcp_data_frag *)(page_to_virt(pfrag->page) + offset);
> +	dfrag = (struct mptcp_data_frag *)(page_to_virt(page) + offset);
> 	dfrag->data_len = 0;
> 	dfrag->data_seq = msk->write_seq;
> 	dfrag->overhead = offset - orig_offset + sizeof(struct mptcp_data_frag);
> 	dfrag->offset = offset + sizeof(struct mptcp_data_frag);
> 	dfrag->already_sent = 0;
> -	dfrag->page = pfrag->page;
> +	dfrag->page = page;
>
> 	return dfrag;
> }
> @@ -1793,7 +1800,7 @@ static u32 mptcp_send_limit(const struct sock *sk)
> static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
> {
> 	struct mptcp_sock *msk = mptcp_sk(sk);
> -	struct page_frag *pfrag;
> +	struct page_frag_cache *pfrag;
> 	size_t copied = 0;
> 	int ret = 0;
> 	long timeo;
> @@ -1832,9 +1839,12 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
> 	while (msg_data_left(msg)) {
> 		int total_ts, frag_truesize = 0;
> 		struct mptcp_data_frag *dfrag;
> -		bool dfrag_collapsed;
> -		size_t psize, offset;
> +		bool dfrag_collapsed = false;
> +		unsigned int offset, size;
> +		struct page *page;
> +		size_t psize;
> 		u32 copy_limit;
> +		void *va;
>
> 		/* ensure fitting the notsent_lowat() constraint */
> 		copy_limit = mptcp_send_limit(sk);
> @@ -1845,21 +1855,26 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
> 		 * page allocator
> 		 */
> 		dfrag = mptcp_pending_tail(sk);
> -		dfrag_collapsed = mptcp_frag_can_collapse_to(msk, pfrag, dfrag);
> +		page = page_frag_alloc_probe(pfrag, &offset, &size, &va);
> +		dfrag_collapsed = mptcp_frag_can_collapse_to(msk, page, offset,
> +							     size, dfrag);
> 		if (!dfrag_collapsed) {
> -			if (!mptcp_page_frag_refill(sk, pfrag))
> +			size = 32U + sizeof(struct mptcp_data_frag);
> +			page = mptcp_page_frag_alloc_prepare(sk, pfrag, &offset,
> +							     &size, &va);
> +			if (!page)
> 				goto wait_for_memory;
>
> -			dfrag = mptcp_carve_data_frag(msk, pfrag, pfrag->offset);
> +			dfrag = mptcp_carve_data_frag(msk, page, offset);
> 			frag_truesize = dfrag->overhead;
> +			va += dfrag->overhead;
> 		}
>
> 		/* we do not bound vs wspace, to allow a single packet.
> 		 * memory accounting will prevent execessive memory usage
> 		 * anyway
> 		 */
> -		offset = dfrag->offset + dfrag->data_len;
> -		psize = pfrag->size - offset;
> +		psize = size - frag_truesize;
> 		psize = min_t(size_t, psize, msg_data_left(msg));
> 		psize = min_t(size_t, psize, copy_limit);
> 		total_ts = psize + frag_truesize;
> @@ -1867,8 +1882,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
> 		if (!sk_wmem_schedule(sk, total_ts))
> 			goto wait_for_memory;
>
> -		ret = do_copy_data_nocache(sk, psize, &msg->msg_iter,
> -					   page_address(dfrag->page) + offset);
> +		ret = do_copy_data_nocache(sk, psize, &msg->msg_iter, va);
> 		if (ret)
> 			goto do_error;
>
> @@ -1877,7 +1891,6 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
> 		copied += psize;
> 		dfrag->data_len += psize;
> 		frag_truesize += psize;
> -		pfrag->offset += frag_truesize;
> 		WRITE_ONCE(msk->write_seq, msk->write_seq + psize);
>
> 		/* charge data on mptcp pending queue to the msk socket
> @@ -1885,11 +1898,14 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
> 		 */
> 		sk_wmem_queued_add(sk, frag_truesize);
> 		if (!dfrag_collapsed) {
> -			get_page(dfrag->page);
> +			page_frag_alloc_commit(pfrag, frag_truesize);
> 			list_add_tail(&dfrag->list, &msk->rtx_queue);
> 			if (!msk->first_pending)
> 				WRITE_ONCE(msk->first_pending, dfrag);
> +		} else {
> +			page_frag_alloc_commit_noref(pfrag, frag_truesize);
> 		}
> +
> 		pr_debug("msk=%p dfrag at seq=%llu len=%u sent=%u new=%d", msk,
> 			 dfrag->data_seq, dfrag->data_len, dfrag->already_sent,
> 			 !dfrag_collapsed);


  reply	other threads:[~2024-05-09 16:22 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-05-08 13:33 [PATCH net-next v3 00/13] First try to replace page_frag with page_frag_cache Yunsheng Lin
2024-05-08 13:33 ` [PATCH net-next v3 01/13] mm: page_frag: add a test module for page_frag Yunsheng Lin
2024-05-08 13:33 ` [PATCH net-next v3 02/13] xtensa: remove the get_order() implementation Yunsheng Lin
2024-05-08 13:33 ` [PATCH net-next v3 03/13] mm: page_frag: use free_unref_page() to free page fragment Yunsheng Lin
2024-05-08 13:33 ` [PATCH net-next v3 04/13] mm: move the page fragment allocator from page_alloc into its own file Yunsheng Lin
2024-05-08 13:34 ` [PATCH net-next v3 05/13] mm: page_frag: use initial zero offset for page_frag_alloc_align() Yunsheng Lin
2024-05-08 13:34 ` [PATCH net-next v3 06/13] mm: page_frag: add '_va' suffix to page_frag API Yunsheng Lin
2024-05-08 13:34 ` [PATCH net-next v3 07/13] mm: page_frag: avoid caller accessing 'page_frag_cache' directly Yunsheng Lin
2024-05-08 13:34 ` [PATCH net-next v3 08/13] mm: page_frag: reuse existing space for 'size' and 'pfmemalloc' Yunsheng Lin
2024-05-08 13:34 ` [PATCH net-next v3 09/13] net: introduce the skb_copy_to_va_nocache() helper Yunsheng Lin
2024-05-08 13:34 ` [PATCH net-next v3 10/13] mm: page_frag: introduce prepare/probe/commit API Yunsheng Lin
2024-05-10 17:38   ` Mat Martineau
2024-05-08 13:34 ` [PATCH net-next v3 11/13] net: replace page_frag with page_frag_cache Yunsheng Lin
2024-05-09 16:22   ` Mat Martineau [this message]
2024-05-10  9:48     ` Yunsheng Lin
2024-05-10 17:29       ` Mat Martineau
2024-05-13 11:53         ` Yunsheng Lin
2024-05-13 23:44           ` Mat Martineau
2024-05-08 13:34 ` [PATCH net-next v3 12/13] mm: page_frag: update documentation for page_frag Yunsheng Lin
2024-05-09  0:44   ` Randy Dunlap
2024-05-10  9:48     ` Yunsheng Lin
2024-05-10 12:32     ` Yunsheng Lin
2024-05-10 18:30       ` Randy Dunlap
2024-05-09 16:58   ` Mat Martineau
2024-05-10  9:48     ` Yunsheng Lin
2024-05-08 13:34 ` [PATCH net-next v3 13/13] mm: page_frag: add a entry in MAINTAINERS " Yunsheng Lin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=334a8c67-87c8-a918-9517-0afbfae0d02b@kernel.org \
    --to=martineau@kernel.org \
    --cc=alexander.duyck@gmail.com \
    --cc=ayush.sawal@chelsio.com \
    --cc=borisp@nvidia.com \
    --cc=bpf@vger.kernel.org \
    --cc=bristot@redhat.com \
    --cc=bsegall@google.com \
    --cc=davem@davemloft.net \
    --cc=dietmar.eggemann@arm.com \
    --cc=dsahern@kernel.org \
    --cc=edumazet@google.com \
    --cc=geliang@kernel.org \
    --cc=jakub@cloudflare.com \
    --cc=jasowang@redhat.com \
    --cc=jhs@mojatatu.com \
    --cc=jiri@resnulli.us \
    --cc=john.fastabend@gmail.com \
    --cc=juri.lelli@redhat.com \
    --cc=kuba@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linyunsheng@huawei.com \
    --cc=matttbe@kernel.org \
    --cc=mgorman@suse.de \
    --cc=mingo@redhat.com \
    --cc=mptcp@lists.linux.dev \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=peterz@infradead.org \
    --cc=rostedt@goodmis.org \
    --cc=vincent.guittot@linaro.org \
    --cc=vschneid@redhat.com \
    --cc=willemdebruijn.kernel@gmail.com \
    --cc=xiyou.wangcong@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox