Paolo Abeni wrote: I think this looks good, thanks Paolo! Some comments below. > diff --git a/include/net/mptcp.h b/include/net/mptcp.h > index 43ddfdf9e4a3..8a702e7566e6 100644 > --- a/include/net/mptcp.h > +++ b/include/net/mptcp.h > @@ -27,17 +27,55 @@ struct mptcp_ext { > > #ifdef CONFIG_MPTCP > > +static inline void mptcp_skb_ext_move(struct sk_buff *to, > + const struct sk_buff *from) > +{ > + skb_ext_put(to); > + > + to->active_extensions = from->active_extensions; > + to->extensions = from->extensions; > + from->extensions = NULL; > + from->active_extensions = 0; > +} You can remove the '= NULL' assignment, @to is free'd and ext core doesn't look at ->extensions if active_extensions == 0. > static inline bool mptcp_skb_ext_exist(const struct sk_buff *skb) > { > return skb_ext_exist(skb, SKB_EXT_MPTCP); > } > > +static inline bool mptcp_ext_matches(const struct mptcp_ext *to_ext, > + const struct mptcp_ext *from_ext) > +{ > + return !memcmp(&from_ext->data_seq, &to_ext->data_seq); Is this supposed to be return !memcmp(from_ext, to_ext, sizeof(*to_ext)); or return from_ext->data_seq == to_ext->data_seq; ? I assume its the former? Maybe make this return to_ext == from_ext || (to_ext && from_ext && memcmp(...) == 0); > +static inline bool mptcp_skb_can_collapse(const struct sk_buff *to, > + const struct sk_buff *from) > +{ > + const struct mptcp_ext *from_ext = skb_ext_find(from, SKB_EXT_MPTCP); > + const struct mptcp_ext *to_ext = skb_ext_find(to, SKB_EXT_MPTCP); > + > + return !from_ext || (to_ext && mptcp_ext_matches(to_ext, from_ext)); I would prefer to handle 'to == NULL' in mptcp_ext_matches, see below. > diff --git a/include/net/tcp.h b/include/net/tcp.h > index c483c73b8d41..f4cddd42d52a 100644 > --- a/include/net/tcp.h > +++ b/include/net/tcp.h > @@ -983,7 +983,7 @@ static inline bool tcp_skb_can_collapse(const struct sk_buff *to, > const struct sk_buff *from) > { > return likely(tcp_skb_can_collapse_to(to) && > - !mptcp_skb_ext_exist(from)); > + mptcp_skb_can_collapse(to, from)); > } > > /* Events passed to congestion control interface */ > diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c > index 55b460a2ece2..1c96c545a23a 100644 > --- a/net/ipv4/tcp_input.c > +++ b/net/ipv4/tcp_input.c > @@ -4420,7 +4420,7 @@ static bool tcp_try_coalesce(struct sock *sk, > if (TCP_SKB_CB(from)->seq != TCP_SKB_CB(to)->end_seq) > return false; > > - if (mptcp_skb_ext_exist(from)) > + if (mptcp_skb_can_collapse(to, from)) > return false; > > #ifdef CONFIG_TLS_DEVICE > @@ -4931,12 +4931,12 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, struct rb_root *root, > > /* The first skb to collapse is: > * - not SYN/FIN and > - * - does not include a MPTCP skb extension > + * - MPTCP allow collapsing with the next one, if any > * - bloated or contains data before "start" or > * overlaps to the next one. > */ > if (!(TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)) && > - !mptcp_skb_ext_exist(skb) && > + (!n || mptcp_skb_can_collapse(n, skb)) && mptcp_skb_can_collapse(skb, n)) ? (n is the next skb in the tcp sequence space). Also, there is a second check after this: if (end_of_skbs || mptcp_skb_ext_exist(skb) || (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN))) return; I was worried about following scenario: A) 1. skb has no mapping 2. next skb has one B) or 1. skb has no mapping 2. next skb has no mapping 3. but next skb has one It took me some time to figure out that your mptcp_skb_can_collapse() will not allow 'end_of_skbs == false AND skb-has-no-mapping AND next-skb-has-one' to happen, and last check in copy loop will allow coalesce of 1 and 2 in B), but will stop when encountering #3. Maybe there should be a comment explaining mptcp coalesce rule/invariants here or in mptcp_skb_can_collapse()? Maybe something like: /* mptcp_skb_can_collapse - check if skbs can be collapsed * * mptcp collapse is allowed if neither @to or @from * carry an mptcp data mapping, or if the extension of @to * is the same as @from. * Collapsing is not possible if @to lacks an extension, but * @from carries one. */