Re: [PATCH net-next v17 12/14] net: replace page_frag with page_frag_cache
From: Yunsheng Lin
Date: Tue Sep 03 2024 - 08:48:56 EST
On 2024/9/2 20:03, Yunsheng Lin wrote:
> Use the newly introduced prepare/probe/commit API to
> replace page_frag with page_frag_cache for sk_page_frag().
>
> CC: Alexander Duyck <alexander.duyck@xxxxxxxxx>
> Signed-off-by: Yunsheng Lin <linyunsheng@xxxxxxxxxx>
Hi, Mat
As new refactoring and new API naming is used since v14, the
replacing patch have bigger change to use the new API naming too,
so I dropped your 'Acked-by' tag when using the new API.
It would good to review it again so that this patch doesn't
break mptcp when you are not busy, thanks.
> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> index 37ebcb7640eb..5fbddd9de53e 100644
> --- a/net/mptcp/protocol.c
> +++ b/net/mptcp/protocol.c
> @@ -960,7 +960,6 @@ static bool mptcp_skb_can_collapse_to(u64 write_seq,
> }
>
> /* we can append data to the given data frag if:
> - * - there is space available in the backing page_frag
> * - the data frag tail matches the current page_frag free offset
> * - the data frag end sequence number matches the current write seq
> */
> @@ -969,7 +968,6 @@ static bool mptcp_frag_can_collapse_to(const struct mptcp_sock *msk,
> const struct mptcp_data_frag *df)
> {
> return df && pfrag->page == df->page &&
> - pfrag->size - pfrag->offset > 0 &&
> pfrag->offset == (df->offset + df->data_len) &&
> df->data_seq + df->data_len == msk->write_seq;
> }
> @@ -1085,14 +1083,20 @@ static void mptcp_enter_memory_pressure(struct sock *sk)
> /* ensure we get enough memory for the frag hdr, beyond some minimal amount of
> * data
> */
> -static bool mptcp_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
> +static void *mptcp_page_frag_alloc_refill_prepare(struct sock *sk,
> + struct page_frag_cache *nc,
> + struct page_frag *pfrag)
> {
> - if (likely(skb_page_frag_refill(32U + sizeof(struct mptcp_data_frag),
> - pfrag, sk->sk_allocation)))
> - return true;
> + unsigned int fragsz = 32U + sizeof(struct mptcp_data_frag);
> + void *va;
> +
> + va = page_frag_alloc_refill_prepare(nc, fragsz, pfrag,
> + sk->sk_allocation);
> + if (likely(va))
> + return va;
>
> mptcp_enter_memory_pressure(sk);
> - return false;
> + return NULL;
> }
>
> static struct mptcp_data_frag *
> @@ -1795,7 +1799,7 @@ static u32 mptcp_send_limit(const struct sock *sk)
> static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
> {
> struct mptcp_sock *msk = mptcp_sk(sk);
> - struct page_frag *pfrag;
> + struct page_frag_cache *nc;
> size_t copied = 0;
> int ret = 0;
> long timeo;
> @@ -1829,14 +1833,16 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
> if (unlikely(sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)))
> goto do_error;
>
> - pfrag = sk_page_frag(sk);
> + nc = sk_page_frag_cache(sk);
>
> while (msg_data_left(msg)) {
> + struct page_frag page_frag, *pfrag;
> int total_ts, frag_truesize = 0;
> struct mptcp_data_frag *dfrag;
> bool dfrag_collapsed;
> - size_t psize, offset;
> u32 copy_limit;
> + size_t psize;
> + void *va;
>
> /* ensure fitting the notsent_lowat() constraint */
> copy_limit = mptcp_send_limit(sk);
> @@ -1847,21 +1853,26 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
> * page allocator
> */
> dfrag = mptcp_pending_tail(sk);
> - dfrag_collapsed = mptcp_frag_can_collapse_to(msk, pfrag, dfrag);
> + pfrag = &page_frag;
> + va = page_frag_alloc_refill_probe(nc, 1, pfrag);
> + dfrag_collapsed = va && mptcp_frag_can_collapse_to(msk, pfrag,
> + dfrag);
> if (!dfrag_collapsed) {
> - if (!mptcp_page_frag_refill(sk, pfrag))
> + va = mptcp_page_frag_alloc_refill_prepare(sk, nc,
> + pfrag);
> + if (!va)
> goto wait_for_memory;
>
> dfrag = mptcp_carve_data_frag(msk, pfrag, pfrag->offset);
> frag_truesize = dfrag->overhead;
> + va += dfrag->overhead;
> }
>
> /* we do not bound vs wspace, to allow a single packet.
> * memory accounting will prevent execessive memory usage
> * anyway
> */
> - offset = dfrag->offset + dfrag->data_len;
> - psize = pfrag->size - offset;
> + psize = pfrag->size - frag_truesize;
> psize = min_t(size_t, psize, msg_data_left(msg));
> psize = min_t(size_t, psize, copy_limit);
> total_ts = psize + frag_truesize;
> @@ -1869,8 +1880,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
> if (!sk_wmem_schedule(sk, total_ts))
> goto wait_for_memory;
>
> - ret = do_copy_data_nocache(sk, psize, &msg->msg_iter,
> - page_address(dfrag->page) + offset);
> + ret = do_copy_data_nocache(sk, psize, &msg->msg_iter, va);
> if (ret)
> goto do_error;
>
> @@ -1879,7 +1889,6 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
> copied += psize;
> dfrag->data_len += psize;
> frag_truesize += psize;
> - pfrag->offset += frag_truesize;
> WRITE_ONCE(msk->write_seq, msk->write_seq + psize);
>
> /* charge data on mptcp pending queue to the msk socket
> @@ -1887,10 +1896,12 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
> */
> sk_wmem_queued_add(sk, frag_truesize);
> if (!dfrag_collapsed) {
> - get_page(dfrag->page);
> + page_frag_commit(nc, pfrag, frag_truesize);
> list_add_tail(&dfrag->list, &msk->rtx_queue);
> if (!msk->first_pending)
> WRITE_ONCE(msk->first_pending, dfrag);
> + } else {
> + page_frag_commit_noref(nc, pfrag, frag_truesize);
> }
> pr_debug("msk=%p dfrag at seq=%llu len=%u sent=%u new=%d\n", msk,
> dfrag->data_seq, dfrag->data_len, dfrag->already_sent,