* Re: [PATCH net] net/sched: act_ct: fix skb leak on fragment check failure
2026-04-16 15:31 ` Jamal Hadi Salim
@ 2026-04-17 3:56 ` phx
0 siblings, 0 replies; 3+ messages in thread
From: phx @ 2026-04-17 3:56 UTC (permalink / raw)
To: Jamal Hadi Salim; +Cc: netdev, jiri, horms
[-- Attachment #1.1: Type: text/plain, Size: 2826 bytes --]
Found it through code review. Reproduced it on a 7.0-rc6 kernel
using a veth pair with act_ct on ingress:
ip netns add ns_ct
ip link add veth0 type veth peer name veth1
ip link set veth1 netns ns_ct
ip link set veth0 up
ip netns exec ns_ct ip link set veth1 up
tc qdisc add dev veth0 clsact
tc filter add dev veth0 ingress protocol ip flower action ct zone 1
Then send truncated IP packets (10 bytes IP, need 20 minimum) from
the namespace via raw AF_PACKET socket on veth1. This hits
pskb_may_pull failure in tcf_ct_ipv4_is_fragment -> -EINVAL ->
out_frag -> TC_ACT_CONSUMED. net/core/dev.c handles TC_ACT_CONSUMED
by returning NULL without freeing the skb.
Result on unpatched kernel:
Sent: 222 packets
skbuff_head_cache: before=6439 after=6663 growth=224
FAIL: skb leak detected (224 objects leaked)
Attached a test script that automates this. With the fix applied
(TC_ACT_SHOT for non-EINPROGRESS errors), the skbs get freed and
the test passes. The test script is generated by AI.
On Thu, Apr 16, 2026 at 11:32 PM Jamal Hadi Salim <jhs@mojatatu.com> wrote:
> On Thu, Apr 16, 2026 at 9:01 AM Dudu Lu <phx0fer@gmail.com> wrote:
> >
> > When tcf_ct_handle_fragments() returns an error other than -EINPROGRESS
> > (e.g. -EINVAL from malformed fragments), tcf_ct_act() jumps to out_frag
> > which unconditionally returns TC_ACT_CONSUMED. This tells the caller the
> > skb was consumed, but it was not freed, leaking one skb per malformed
> > fragment.
> >
> > TC_ACT_CONSUMED is only correct for -EINPROGRESS, where defragmentation
> > is genuinely in progress and the skb has been queued. For all other
> > errors the skb is still owned by the caller and must be freed via
> > TC_ACT_SHOT.
> >
> > Fixes: 3f14b377d01d ("net/sched: act_ct: fix skb leak and crash on ooo
> frags")
> > Signed-off-by: Dudu Lu <phx0fer@gmail.com>
>
> Do you have a reproducer? Always helps adding at least a tdc test.
> Also: How did you find this issue? was it AI? If yes, please add the
> tag "Assisted-by:<AI name here>"
>
> cheers,
> jamal
>
> > ---
> > net/sched/act_ct.c | 4 +++-
> > 1 file changed, 3 insertions(+), 1 deletion(-)
> >
> > diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
> > index 7d5e50c921a0..870655f682bd 100644
> > --- a/net/sched/act_ct.c
> > +++ b/net/sched/act_ct.c
> > @@ -1107,8 +1107,10 @@ TC_INDIRECT_SCOPE int tcf_ct_act(struct sk_buff
> *skb, const struct tc_action *a,
> > return retval;
> >
> > out_frag:
> > - if (err != -EINPROGRESS)
> > + if (err != -EINPROGRESS) {
> > tcf_action_inc_drop_qstats(&c->common);
> > + return TC_ACT_SHOT;
> > + }
> > return TC_ACT_CONSUMED;
> >
> > drop:
> > --
> > 2.39.3 (Apple Git-145)
> >
>
[-- Attachment #1.2: Type: text/html, Size: 3592 bytes --]
[-- Attachment #2: act_ct_skb_leak.sh --]
[-- Type: text/x-sh, Size: 3349 bytes --]
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
#
# Test for skb leak in act_ct when tcf_ct_handle_fragments() fails.
#
# When a truncated IP packet hits act_ct, tcf_ct_ipv4_is_fragment()
# returns -EINVAL. The out_frag path returns TC_ACT_CONSUMED, but the
# skb was never actually consumed — it leaks.
#
# This test sends truncated IP packets through a veth pair with act_ct
# on ingress and checks skbuff_head_cache slab growth.
set -e
readonly NS="ns-act-ct-leak-$(mktemp -u XXXXXX)"
readonly DEV="veth-ct0"
readonly DEV_PEER="veth-ct1"
readonly NUM_PKTS=500
# Minimum slab growth to consider a leak (allow some noise)
readonly LEAK_THRESHOLD=200
cleanup() {
ip link del $DEV 2>/dev/null || true
ip netns del $NS 2>/dev/null || true
}
trap cleanup EXIT
get_skb_active() {
awk '/skbuff_head_cache/ {print $2}' /proc/slabinfo
}
# Build the packet sender
build_sender() {
local prog="$1"
local src="$prog.c"
cat > "$src" << 'EOF'
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/socket.h>
#include <linux/if_packet.h>
#include <net/if.h>
#include <net/ethernet.h>
#include <arpa/inet.h>
#include <fcntl.h>
int main(int argc, char **argv) {
const char *ifname = argv[1];
int count = atoi(argv[2]);
int fd, i, sent = 0;
fd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
if (fd < 0) { perror("socket"); return 1; }
fcntl(fd, F_SETFL, O_NONBLOCK);
struct sockaddr_ll sa = {};
sa.sll_family = AF_PACKET;
sa.sll_protocol = htons(ETH_P_IP);
sa.sll_ifindex = if_nametoindex(ifname);
sa.sll_halen = 6;
memset(sa.sll_addr, 0xff, 6);
/* Ethernet(14) + truncated IP(10) = 24 bytes.
* IP header needs 20 bytes minimum, so pskb_may_pull fails
* in tcf_ct_ipv4_is_fragment() -> -EINVAL.
*/
unsigned char pkt[24] = {};
memset(pkt, 0xff, 6); /* dst = broadcast */
pkt[12] = 0x08; pkt[13] = 0x00;/* ethertype = IPv4 */
pkt[14] = 0x45; /* ver=4, ihl=5 */
pkt[16] = 0x00; pkt[17] = 0x0a;/* total_len=10 */
pkt[23] = 0x06; /* proto=TCP */
for (i = 0; i < count; i++) {
if (sendto(fd, pkt, sizeof(pkt), 0,
(struct sockaddr *)&sa, sizeof(sa)) >= 0)
sent++;
}
printf("%d\n", sent);
close(fd);
return 0;
}
EOF
gcc -Wall -o "$prog" "$src"
}
echo "=== act_ct skb leak test ==="
# Setup veth pair with namespace
ip netns add $NS
ip link add $DEV type veth peer name $DEV_PEER
ip link set $DEV_PEER netns $NS
ip link set $DEV up
ip netns exec $NS ip link set $DEV_PEER up
# Add act_ct filter on ingress
tc qdisc add dev $DEV clsact
tc filter add dev $DEV ingress protocol ip flower action ct zone 1
# Build sender
SENDER=$(mktemp)
build_sender "$SENDER"
# Record slab state
BEFORE=$(get_skb_active)
# Send truncated packets from namespace
SENT=$(ip netns exec $NS "$SENDER" $DEV_PEER $NUM_PKTS)
sleep 1
# Record slab state again
AFTER=$(get_skb_active)
# Check tc stats
DROPPED=$(tc -s filter show dev $DEV ingress | \
awk '/dropped/ {for(i=1;i<=NF;i++) if($i=="dropped") print $(i+1)}' | \
tr -d ',')
GROWTH=$((AFTER - BEFORE))
echo "Sent: $SENT packets"
echo "TC dropped: $DROPPED"
echo "skbuff_head_cache: before=$BEFORE after=$AFTER growth=$GROWTH"
rm -f "$SENDER" "${SENDER}.c"
if [ "$GROWTH" -ge "$LEAK_THRESHOLD" ]; then
echo "FAIL: skb leak detected ($GROWTH objects leaked)"
exit 1
else
echo "PASS: no significant skb leak"
exit 0
fi
^ permalink raw reply [flat|nested] 3+ messages in thread