From: Mat Martineau <martineau@kernel.org>
To: Yunsheng Lin <linyunsheng@huawei.com>
Cc: davem@davemloft.net, kuba@kernel.org, pabeni@redhat.com,
netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
Alexander Duyck <alexander.duyck@gmail.com>,
Ayush Sawal <ayush.sawal@chelsio.com>,
Eric Dumazet <edumazet@google.com>,
Willem de Bruijn <willemdebruijn.kernel@gmail.com>,
Jason Wang <jasowang@redhat.com>, Ingo Molnar <mingo@redhat.com>,
Peter Zijlstra <peterz@infradead.org>,
Juri Lelli <juri.lelli@redhat.com>,
Vincent Guittot <vincent.guittot@linaro.org>,
Dietmar Eggemann <dietmar.eggemann@arm.com>,
Steven Rostedt <rostedt@goodmis.org>,
Ben Segall <bsegall@google.com>, Mel Gorman <mgorman@suse.de>,
Daniel Bristot de Oliveira <bristot@redhat.com>,
Valentin Schneider <vschneid@redhat.com>,
John Fastabend <john.fastabend@gmail.com>,
Jakub Sitnicki <jakub@cloudflare.com>,
David Ahern <dsahern@kernel.org>,
Matthieu Baerts <matttbe@kernel.org>,
Geliang Tang <geliang@kernel.org>,
Jamal Hadi Salim <jhs@mojatatu.com>,
Cong Wang <xiyou.wangcong@gmail.com>,
Jiri Pirko <jiri@resnulli.us>,
Boris Pismenny <borisp@nvidia.com>,
bpf@vger.kernel.org, mptcp@lists.linux.dev
Subject: Re: [PATCH net-next v3 11/13] net: replace page_frag with page_frag_cache
Date: Thu, 9 May 2024 09:22:05 -0700 (PDT) [thread overview]
Message-ID: <334a8c67-87c8-a918-9517-0afbfae0d02b@kernel.org> (raw)
In-Reply-To: <20240508133408.54708-12-linyunsheng@huawei.com>
On Wed, 8 May 2024, Yunsheng Lin wrote:
> Use the newly introduced prepare/probe/commit API to
> replace page_frag with page_frag_cache for sk_page_frag().
>
> CC: Alexander Duyck <alexander.duyck@gmail.com>
> Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
> ---
> .../chelsio/inline_crypto/chtls/chtls.h | 3 -
> .../chelsio/inline_crypto/chtls/chtls_io.c | 100 ++++---------
> .../chelsio/inline_crypto/chtls/chtls_main.c | 3 -
> drivers/net/tun.c | 28 ++--
> include/linux/sched.h | 4 +-
> include/net/sock.h | 14 +-
> kernel/exit.c | 3 +-
> kernel/fork.c | 3 +-
> net/core/skbuff.c | 32 ++--
> net/core/skmsg.c | 22 +--
> net/core/sock.c | 46 ++++--
> net/ipv4/ip_output.c | 33 +++--
> net/ipv4/tcp.c | 35 ++---
> net/ipv4/tcp_output.c | 28 ++--
> net/ipv6/ip6_output.c | 33 +++--
> net/kcm/kcmsock.c | 30 ++--
> net/mptcp/protocol.c | 70 +++++----
> net/sched/em_meta.c | 2 +-
> net/tls/tls_device.c | 139 ++++++++++--------
> 19 files changed, 331 insertions(+), 297 deletions(-)
>
<snip>
> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> index bb8f96f2b86f..ab844011d442 100644
> --- a/net/mptcp/protocol.c
> +++ b/net/mptcp/protocol.c
> @@ -960,17 +960,18 @@ static bool mptcp_skb_can_collapse_to(u64 write_seq,
> }
>
> /* we can append data to the given data frag if:
> - * - there is space available in the backing page_frag
> - * - the data frag tail matches the current page_frag free offset
> + * - there is space available for the current page
> + * - the data frag tail matches the current page and offset
> * - the data frag end sequence number matches the current write seq
> */
> static bool mptcp_frag_can_collapse_to(const struct mptcp_sock *msk,
> - const struct page_frag *pfrag,
> + const struct page *page,
> + const unsigned int offset,
> + const unsigned int size,
Hi Yunsheng -
Why add the 'size' parameter here? It's checked to be a nonzero value, but
it can only be 0 if page is also NULL. In this case "page == df->page"
will be false, so the function will return false even without checking
'size'.
Thanks,
Mat
> const struct mptcp_data_frag *df)
> {
> - return df && pfrag->page == df->page &&
> - pfrag->size - pfrag->offset > 0 &&
> - pfrag->offset == (df->offset + df->data_len) &&
> + return df && size && page == df->page &&
> + offset == (df->offset + df->data_len) &&
> df->data_seq + df->data_len == msk->write_seq;
> }
>
> @@ -1085,30 +1086,36 @@ static void mptcp_enter_memory_pressure(struct sock *sk)
> /* ensure we get enough memory for the frag hdr, beyond some minimal amount of
> * data
> */
> -static bool mptcp_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
> +static struct page *mptcp_page_frag_alloc_prepare(struct sock *sk,
> + struct page_frag_cache *pfrag,
> + unsigned int *offset,
> + unsigned int *size, void **va)
> {
> - if (likely(skb_page_frag_refill(32U + sizeof(struct mptcp_data_frag),
> - pfrag, sk->sk_allocation)))
> - return true;
> + struct page *page;
> +
> + page = page_frag_alloc_prepare(pfrag, offset, size, va,
> + sk->sk_allocation);
> + if (likely(page))
> + return page;
>
> mptcp_enter_memory_pressure(sk);
> - return false;
> + return NULL;
> }
>
> static struct mptcp_data_frag *
> -mptcp_carve_data_frag(const struct mptcp_sock *msk, struct page_frag *pfrag,
> - int orig_offset)
> +mptcp_carve_data_frag(const struct mptcp_sock *msk, struct page *page,
> + unsigned int orig_offset)
> {
> int offset = ALIGN(orig_offset, sizeof(long));
> struct mptcp_data_frag *dfrag;
>
> - dfrag = (struct mptcp_data_frag *)(page_to_virt(pfrag->page) + offset);
> + dfrag = (struct mptcp_data_frag *)(page_to_virt(page) + offset);
> dfrag->data_len = 0;
> dfrag->data_seq = msk->write_seq;
> dfrag->overhead = offset - orig_offset + sizeof(struct mptcp_data_frag);
> dfrag->offset = offset + sizeof(struct mptcp_data_frag);
> dfrag->already_sent = 0;
> - dfrag->page = pfrag->page;
> + dfrag->page = page;
>
> return dfrag;
> }
> @@ -1793,7 +1800,7 @@ static u32 mptcp_send_limit(const struct sock *sk)
> static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
> {
> struct mptcp_sock *msk = mptcp_sk(sk);
> - struct page_frag *pfrag;
> + struct page_frag_cache *pfrag;
> size_t copied = 0;
> int ret = 0;
> long timeo;
> @@ -1832,9 +1839,12 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
> while (msg_data_left(msg)) {
> int total_ts, frag_truesize = 0;
> struct mptcp_data_frag *dfrag;
> - bool dfrag_collapsed;
> - size_t psize, offset;
> + bool dfrag_collapsed = false;
> + unsigned int offset, size;
> + struct page *page;
> + size_t psize;
> u32 copy_limit;
> + void *va;
>
> /* ensure fitting the notsent_lowat() constraint */
> copy_limit = mptcp_send_limit(sk);
> @@ -1845,21 +1855,26 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
> * page allocator
> */
> dfrag = mptcp_pending_tail(sk);
> - dfrag_collapsed = mptcp_frag_can_collapse_to(msk, pfrag, dfrag);
> + page = page_frag_alloc_probe(pfrag, &offset, &size, &va);
> + dfrag_collapsed = mptcp_frag_can_collapse_to(msk, page, offset,
> + size, dfrag);
> if (!dfrag_collapsed) {
> - if (!mptcp_page_frag_refill(sk, pfrag))
> + size = 32U + sizeof(struct mptcp_data_frag);
> + page = mptcp_page_frag_alloc_prepare(sk, pfrag, &offset,
> + &size, &va);
> + if (!page)
> goto wait_for_memory;
>
> - dfrag = mptcp_carve_data_frag(msk, pfrag, pfrag->offset);
> + dfrag = mptcp_carve_data_frag(msk, page, offset);
> frag_truesize = dfrag->overhead;
> + va += dfrag->overhead;
> }
>
> /* we do not bound vs wspace, to allow a single packet.
> * memory accounting will prevent execessive memory usage
> * anyway
> */
> - offset = dfrag->offset + dfrag->data_len;
> - psize = pfrag->size - offset;
> + psize = size - frag_truesize;
> psize = min_t(size_t, psize, msg_data_left(msg));
> psize = min_t(size_t, psize, copy_limit);
> total_ts = psize + frag_truesize;
> @@ -1867,8 +1882,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
> if (!sk_wmem_schedule(sk, total_ts))
> goto wait_for_memory;
>
> - ret = do_copy_data_nocache(sk, psize, &msg->msg_iter,
> - page_address(dfrag->page) + offset);
> + ret = do_copy_data_nocache(sk, psize, &msg->msg_iter, va);
> if (ret)
> goto do_error;
>
> @@ -1877,7 +1891,6 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
> copied += psize;
> dfrag->data_len += psize;
> frag_truesize += psize;
> - pfrag->offset += frag_truesize;
> WRITE_ONCE(msk->write_seq, msk->write_seq + psize);
>
> /* charge data on mptcp pending queue to the msk socket
> @@ -1885,11 +1898,14 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
> */
> sk_wmem_queued_add(sk, frag_truesize);
> if (!dfrag_collapsed) {
> - get_page(dfrag->page);
> + page_frag_alloc_commit(pfrag, frag_truesize);
> list_add_tail(&dfrag->list, &msk->rtx_queue);
> if (!msk->first_pending)
> WRITE_ONCE(msk->first_pending, dfrag);
> + } else {
> + page_frag_alloc_commit_noref(pfrag, frag_truesize);
> }
> +
> pr_debug("msk=%p dfrag at seq=%llu len=%u sent=%u new=%d", msk,
> dfrag->data_seq, dfrag->data_len, dfrag->already_sent,
> !dfrag_collapsed);
next prev parent reply other threads:[~2024-05-09 16:22 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-05-08 13:33 [PATCH net-next v3 00/13] First try to replace page_frag with page_frag_cache Yunsheng Lin
2024-05-08 13:33 ` [PATCH net-next v3 01/13] mm: page_frag: add a test module for page_frag Yunsheng Lin
2024-05-08 13:33 ` [PATCH net-next v3 02/13] xtensa: remove the get_order() implementation Yunsheng Lin
2024-05-08 13:33 ` [PATCH net-next v3 03/13] mm: page_frag: use free_unref_page() to free page fragment Yunsheng Lin
2024-05-08 13:33 ` [PATCH net-next v3 04/13] mm: move the page fragment allocator from page_alloc into its own file Yunsheng Lin
2024-05-08 13:34 ` [PATCH net-next v3 05/13] mm: page_frag: use initial zero offset for page_frag_alloc_align() Yunsheng Lin
2024-05-08 13:34 ` [PATCH net-next v3 06/13] mm: page_frag: add '_va' suffix to page_frag API Yunsheng Lin
2024-05-08 13:34 ` [PATCH net-next v3 07/13] mm: page_frag: avoid caller accessing 'page_frag_cache' directly Yunsheng Lin
2024-05-08 13:34 ` [PATCH net-next v3 08/13] mm: page_frag: reuse existing space for 'size' and 'pfmemalloc' Yunsheng Lin
2024-05-08 13:34 ` [PATCH net-next v3 09/13] net: introduce the skb_copy_to_va_nocache() helper Yunsheng Lin
2024-05-08 13:34 ` [PATCH net-next v3 10/13] mm: page_frag: introduce prepare/probe/commit API Yunsheng Lin
2024-05-10 17:38 ` Mat Martineau
2024-05-08 13:34 ` [PATCH net-next v3 11/13] net: replace page_frag with page_frag_cache Yunsheng Lin
2024-05-09 16:22 ` Mat Martineau [this message]
2024-05-10 9:48 ` Yunsheng Lin
2024-05-10 17:29 ` Mat Martineau
2024-05-13 11:53 ` Yunsheng Lin
2024-05-13 23:44 ` Mat Martineau
2024-05-08 13:34 ` [PATCH net-next v3 12/13] mm: page_frag: update documentation for page_frag Yunsheng Lin
2024-05-09 0:44 ` Randy Dunlap
2024-05-10 9:48 ` Yunsheng Lin
2024-05-10 12:32 ` Yunsheng Lin
2024-05-10 18:30 ` Randy Dunlap
2024-05-09 16:58 ` Mat Martineau
2024-05-10 9:48 ` Yunsheng Lin
2024-05-08 13:34 ` [PATCH net-next v3 13/13] mm: page_frag: add a entry in MAINTAINERS " Yunsheng Lin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=334a8c67-87c8-a918-9517-0afbfae0d02b@kernel.org \
--to=martineau@kernel.org \
--cc=alexander.duyck@gmail.com \
--cc=ayush.sawal@chelsio.com \
--cc=borisp@nvidia.com \
--cc=bpf@vger.kernel.org \
--cc=bristot@redhat.com \
--cc=bsegall@google.com \
--cc=davem@davemloft.net \
--cc=dietmar.eggemann@arm.com \
--cc=dsahern@kernel.org \
--cc=edumazet@google.com \
--cc=geliang@kernel.org \
--cc=jakub@cloudflare.com \
--cc=jasowang@redhat.com \
--cc=jhs@mojatatu.com \
--cc=jiri@resnulli.us \
--cc=john.fastabend@gmail.com \
--cc=juri.lelli@redhat.com \
--cc=kuba@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linyunsheng@huawei.com \
--cc=matttbe@kernel.org \
--cc=mgorman@suse.de \
--cc=mingo@redhat.com \
--cc=mptcp@lists.linux.dev \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=peterz@infradead.org \
--cc=rostedt@goodmis.org \
--cc=vincent.guittot@linaro.org \
--cc=vschneid@redhat.com \
--cc=willemdebruijn.kernel@gmail.com \
--cc=xiyou.wangcong@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox