* Re: [PATCH v5] bpf: test_run: Fix the null pointer dereference issue in bpf_lwt_xmit_push_encap
2026-02-10 9:06 [PATCH v5] bpf: test_run: Fix the null pointer dereference issue in bpf_lwt_xmit_push_encap Feng Yang
@ 2026-02-10 18:34 ` kernel test robot
2026-02-10 19:08 ` kernel test robot
` (2 subsequent siblings)
3 siblings, 0 replies; 10+ messages in thread
From: kernel test robot @ 2026-02-10 18:34 UTC (permalink / raw)
To: Feng Yang, davem, edumazet, kuba, pabeni, horms, ast, daniel,
andrii
Cc: oe-kbuild-all, bpf, netdev, linux-kernel, yangfeng59949
Hi Feng,
kernel test robot noticed the following build errors:
[auto build test ERROR on bpf-next/net]
[also build test ERROR on bpf-next/master bpf/master net-next/main net/main linus/master v6.19 next-20260209]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Feng-Yang/bpf-test_run-Fix-the-null-pointer-dereference-issue-in-bpf_lwt_xmit_push_encap/20260210-171138
base: https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git net
patch link: https://lore.kernel.org/r/20260210090657.86977-1-yangfeng59949%40163.com
patch subject: [PATCH v5] bpf: test_run: Fix the null pointer dereference issue in bpf_lwt_xmit_push_encap
config: x86_64-buildonly-randconfig-004-20260210 (https://download.01.org/0day-ci/archive/20260211/202602110246.gh5IIHrI-lkp@intel.com/config)
compiler: gcc-14 (Debian 14.2.0-19) 14.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20260211/202602110246.gh5IIHrI-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202602110246.gh5IIHrI-lkp@intel.com/
All error/warnings (new ones prefixed by >>):
net/bpf/test_run.c: In function 'bpf_prog_test_run_skb':
>> net/bpf/test_run.c:1162:31: warning: unused variable 'fl6' [-Wunused-variable]
1162 | struct flowi6 fl6 = {};
| ^~~
--
ld: vmlinux.o: in function `ip_route_output_key':
>> include/net/route.h:179:(.text+0x128a7e9): undefined reference to `ip_route_output_flow'
vim +/fl6 +1162 net/bpf/test_run.c
984
985 int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
986 union bpf_attr __user *uattr)
987 {
988 bool is_l2 = false, is_direct_pkt_access = false, is_lwt = false;
989 u32 tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
990 struct net *net = current->nsproxy->net_ns;
991 struct net_device *dev = net->loopback_dev;
992 u32 headroom = NET_SKB_PAD + NET_IP_ALIGN;
993 u32 linear_sz = kattr->test.data_size_in;
994 u32 repeat = kattr->test.repeat;
995 struct dst_entry *dst = NULL;
996 struct __sk_buff *ctx = NULL;
997 struct sk_buff *skb = NULL;
998 struct sock *sk = NULL;
999 u32 retval, duration;
1000 int hh_len = ETH_HLEN;
1001 void *data = NULL;
1002 int ret;
1003
1004 if ((kattr->test.flags & ~BPF_F_TEST_SKB_CHECKSUM_COMPLETE) ||
1005 kattr->test.cpu || kattr->test.batch_size)
1006 return -EINVAL;
1007
1008 if (kattr->test.data_size_in < ETH_HLEN)
1009 return -EINVAL;
1010
1011 switch (prog->type) {
1012 case BPF_PROG_TYPE_SCHED_CLS:
1013 case BPF_PROG_TYPE_SCHED_ACT:
1014 is_direct_pkt_access = true;
1015 is_l2 = true;
1016 break;
1017 case BPF_PROG_TYPE_LWT_IN:
1018 case BPF_PROG_TYPE_LWT_OUT:
1019 case BPF_PROG_TYPE_LWT_XMIT:
1020 is_lwt = true;
1021 fallthrough;
1022 case BPF_PROG_TYPE_CGROUP_SKB:
1023 is_direct_pkt_access = true;
1024 break;
1025 default:
1026 break;
1027 }
1028
1029 ctx = bpf_ctx_init(kattr, sizeof(struct __sk_buff));
1030 if (IS_ERR(ctx))
1031 return PTR_ERR(ctx);
1032
1033 if (ctx) {
1034 if (ctx->data_end > kattr->test.data_size_in || ctx->data || ctx->data_meta) {
1035 ret = -EINVAL;
1036 goto out;
1037 }
1038 if (ctx->data_end) {
1039 /* Non-linear LWT test_run is unsupported for now. */
1040 if (is_lwt) {
1041 ret = -EINVAL;
1042 goto out;
1043 }
1044 linear_sz = max(ETH_HLEN, ctx->data_end);
1045 }
1046 }
1047
1048 linear_sz = min_t(u32, linear_sz, PAGE_SIZE - headroom - tailroom);
1049
1050 data = bpf_test_init(kattr, linear_sz, linear_sz, headroom, tailroom);
1051 if (IS_ERR(data)) {
1052 ret = PTR_ERR(data);
1053 data = NULL;
1054 goto out;
1055 }
1056
1057 sk = sk_alloc(net, AF_UNSPEC, GFP_USER, &bpf_dummy_proto, 1);
1058 if (!sk) {
1059 ret = -ENOMEM;
1060 goto out;
1061 }
1062 sock_init_data(NULL, sk);
1063
1064 skb = slab_build_skb(data);
1065 if (!skb) {
1066 ret = -ENOMEM;
1067 goto out;
1068 }
1069 skb->sk = sk;
1070
1071 data = NULL; /* data released via kfree_skb */
1072
1073 skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
1074 __skb_put(skb, linear_sz);
1075
1076 if (unlikely(kattr->test.data_size_in > linear_sz)) {
1077 void __user *data_in = u64_to_user_ptr(kattr->test.data_in);
1078 struct skb_shared_info *sinfo = skb_shinfo(skb);
1079 u32 copied = linear_sz;
1080
1081 while (copied < kattr->test.data_size_in) {
1082 struct page *page;
1083 u32 data_len;
1084
1085 if (sinfo->nr_frags == MAX_SKB_FRAGS) {
1086 ret = -ENOMEM;
1087 goto out;
1088 }
1089
1090 page = alloc_page(GFP_KERNEL);
1091 if (!page) {
1092 ret = -ENOMEM;
1093 goto out;
1094 }
1095
1096 data_len = min_t(u32, kattr->test.data_size_in - copied,
1097 PAGE_SIZE);
1098 skb_fill_page_desc(skb, sinfo->nr_frags, page, 0, data_len);
1099
1100 if (copy_from_user(page_address(page), data_in + copied,
1101 data_len)) {
1102 ret = -EFAULT;
1103 goto out;
1104 }
1105 skb->data_len += data_len;
1106 skb->truesize += PAGE_SIZE;
1107 skb->len += data_len;
1108 copied += data_len;
1109 }
1110 }
1111
1112 if (ctx && ctx->ifindex > 1) {
1113 dev = dev_get_by_index(net, ctx->ifindex);
1114 if (!dev) {
1115 ret = -ENODEV;
1116 goto out;
1117 }
1118 }
1119 skb->protocol = eth_type_trans(skb, dev);
1120 skb_reset_network_header(skb);
1121
1122 switch (skb->protocol) {
1123 case htons(ETH_P_IP):
1124 sk->sk_family = AF_INET;
1125 if (sizeof(struct iphdr) <= skb_headlen(skb)) {
1126 sk->sk_rcv_saddr = ip_hdr(skb)->saddr;
1127 sk->sk_daddr = ip_hdr(skb)->daddr;
1128 }
1129 break;
1130 #if IS_ENABLED(CONFIG_IPV6)
1131 case htons(ETH_P_IPV6):
1132 sk->sk_family = AF_INET6;
1133 if (sizeof(struct ipv6hdr) <= skb_headlen(skb)) {
1134 sk->sk_v6_rcv_saddr = ipv6_hdr(skb)->saddr;
1135 sk->sk_v6_daddr = ipv6_hdr(skb)->daddr;
1136 }
1137 break;
1138 #endif
1139 default:
1140 break;
1141 }
1142
1143 if (is_l2)
1144 __skb_push(skb, hh_len);
1145 if (is_direct_pkt_access)
1146 bpf_compute_data_pointers(skb);
1147
1148 ret = convert___skb_to_skb(skb, ctx);
1149 if (ret)
1150 goto out;
1151
1152 if (kattr->test.flags & BPF_F_TEST_SKB_CHECKSUM_COMPLETE) {
1153 const int off = skb_network_offset(skb);
1154 int len = skb->len - off;
1155
1156 skb->csum = skb_checksum(skb, off, len, 0);
1157 skb->ip_summed = CHECKSUM_COMPLETE;
1158 }
1159
1160 if (prog->type == BPF_PROG_TYPE_LWT_XMIT && !skb_dst(skb)) {
1161 struct flowi4 fl4 = {};
> 1162 struct flowi6 fl6 = {};
1163 struct rtable *rt;
1164
1165 switch (skb->protocol) {
1166 case htons(ETH_P_IP):
1167 if (sizeof(struct iphdr) <= skb_headlen(skb)) {
1168 fl4.saddr = ip_hdr(skb)->saddr;
1169 fl4.daddr = ip_hdr(skb)->daddr;
1170 }
1171
1172 rt = ip_route_output_key(net, &fl4);
1173 if (IS_ERR(rt)) {
1174 ret = PTR_ERR(rt);
1175 goto out;
1176 }
1177 dst = &rt->dst;
1178 break;
1179 #if IS_ENABLED(CONFIG_IPV6)
1180 case htons(ETH_P_IPV6):
1181 if (sizeof(struct ipv6hdr) <= skb_headlen(skb)) {
1182 fl6.saddr = ipv6_hdr(skb)->saddr;
1183 fl6.daddr = ipv6_hdr(skb)->daddr;
1184 }
1185
1186 dst = ip6_route_output(net, NULL, &fl6);
1187 if (IS_ERR(dst)) {
1188 ret = PTR_ERR(dst);
1189 goto out;
1190 }
1191 break;
1192 #endif
1193 default:
1194 ret = -EINVAL;
1195 goto out;
1196 }
1197
1198 if (unlikely(dst->error)) {
1199 ret = dst->error;
1200 dst_release(dst);
1201 goto out;
1202 }
1203 skb_dst_set(skb, dst);
1204 }
1205 ret = bpf_test_run(prog, skb, repeat, &retval, &duration, false);
1206 if (ret)
1207 goto out;
1208 if (!is_l2) {
1209 if (skb_headroom(skb) < hh_len) {
1210 int nhead = HH_DATA_ALIGN(hh_len - skb_headroom(skb));
1211
1212 if (pskb_expand_head(skb, nhead, 0, GFP_USER)) {
1213 ret = -ENOMEM;
1214 goto out;
1215 }
1216 }
1217 memset(__skb_push(skb, hh_len), 0, hh_len);
1218 }
1219
1220 if (kattr->test.flags & BPF_F_TEST_SKB_CHECKSUM_COMPLETE) {
1221 const int off = skb_network_offset(skb);
1222 int len = skb->len - off;
1223 __wsum csum;
1224
1225 csum = skb_checksum(skb, off, len, 0);
1226
1227 if (csum_fold(skb->csum) != csum_fold(csum)) {
1228 ret = -EBADMSG;
1229 goto out;
1230 }
1231 }
1232
1233 convert_skb_to___skb(skb, ctx);
1234
1235 if (skb_is_nonlinear(skb))
1236 /* bpf program can never convert linear skb to non-linear */
1237 WARN_ON_ONCE(linear_sz == kattr->test.data_size_in);
1238 ret = bpf_test_finish(kattr, uattr, skb->data, skb_shinfo(skb), skb->len,
1239 skb->data_len, retval, duration);
1240 if (!ret)
1241 ret = bpf_ctx_finish(kattr, uattr, ctx,
1242 sizeof(struct __sk_buff));
1243 out:
1244 if (dev && dev != net->loopback_dev)
1245 dev_put(dev);
1246 kfree_skb(skb);
1247 kfree(data);
1248 if (sk)
1249 sk_free(sk);
1250 kfree(ctx);
1251 return ret;
1252 }
1253
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply [flat|nested] 10+ messages in thread* Re: [PATCH v5] bpf: test_run: Fix the null pointer dereference issue in bpf_lwt_xmit_push_encap
2026-02-10 9:06 [PATCH v5] bpf: test_run: Fix the null pointer dereference issue in bpf_lwt_xmit_push_encap Feng Yang
2026-02-10 18:34 ` kernel test robot
@ 2026-02-10 19:08 ` kernel test robot
2026-02-10 19:10 ` Martin KaFai Lau
2026-02-10 20:26 ` kernel test robot
3 siblings, 0 replies; 10+ messages in thread
From: kernel test robot @ 2026-02-10 19:08 UTC (permalink / raw)
To: Feng Yang, davem, edumazet, kuba, pabeni, horms, ast, daniel,
andrii
Cc: oe-kbuild-all, bpf, netdev, linux-kernel, yangfeng59949
Hi Feng,
kernel test robot noticed the following build errors:
[auto build test ERROR on bpf-next/net]
[also build test ERROR on bpf-next/master bpf/master net-next/main net/main linus/master v6.19 next-20260209]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Feng-Yang/bpf-test_run-Fix-the-null-pointer-dereference-issue-in-bpf_lwt_xmit_push_encap/20260210-171138
base: https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git net
patch link: https://lore.kernel.org/r/20260210090657.86977-1-yangfeng59949%40163.com
patch subject: [PATCH v5] bpf: test_run: Fix the null pointer dereference issue in bpf_lwt_xmit_push_encap
config: sparc-randconfig-002-20260210 (https://download.01.org/0day-ci/archive/20260211/202602110336.LIXRMPDU-lkp@intel.com/config)
compiler: sparc64-linux-gcc (GCC) 15.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20260211/202602110336.LIXRMPDU-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202602110336.LIXRMPDU-lkp@intel.com/
All errors (new ones prefixed by >>):
sparc64-linux-ld: net/bpf/test_run.o: in function `bpf_prog_test_run_skb':
>> test_run.c:(.text+0x46fc): undefined reference to `ip_route_output_flow'
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v5] bpf: test_run: Fix the null pointer dereference issue in bpf_lwt_xmit_push_encap
2026-02-10 9:06 [PATCH v5] bpf: test_run: Fix the null pointer dereference issue in bpf_lwt_xmit_push_encap Feng Yang
2026-02-10 18:34 ` kernel test robot
2026-02-10 19:08 ` kernel test robot
@ 2026-02-10 19:10 ` Martin KaFai Lau
2026-02-11 7:52 ` Feng Yang
2026-02-10 20:26 ` kernel test robot
3 siblings, 1 reply; 10+ messages in thread
From: Martin KaFai Lau @ 2026-02-10 19:10 UTC (permalink / raw)
To: Feng Yang
Cc: davem, edumazet, kuba, pabeni, horms, ast, daniel, andrii, bpf,
netdev, linux-kernel
On 2/10/26 1:06 AM, Feng Yang wrote:
> From: Feng Yang <yangfeng@kylinos.cn>
>
> The bpf_lwt_xmit_push_encap helper needs to access skb_dst(skb)->dev to
> calculate the needed headroom:
>
> err = skb_cow_head(skb,
> len + LL_RESERVED_SPACE(skb_dst(skb)->dev));
>
> But skb->_skb_refdst may not be initialized when the skb is set up by
> bpf_prog_test_run_skb function. Executing bpf_lwt_push_ip_encap function
> in this scenario will trigger null pointer dereference, causing a kernel
> crash as Yinhao reported:
>
> [ 105.186365] BUG: kernel NULL pointer dereference, address: 0000000000000000
> [ 105.186382] #PF: supervisor read access in kernel mode
> [ 105.186388] #PF: error_code(0x0000) - not-present page
> [ 105.186393] PGD 121d3d067 P4D 121d3d067 PUD 106c83067 PMD 0
> [ 105.186404] Oops: 0000 [#1] PREEMPT SMP NOPTI
> [ 105.186412] CPU: 3 PID: 3250 Comm: poc Kdump: loaded Not tainted 6.19.0-rc5 #1
> [ 105.186423] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
> [ 105.186427] RIP: 0010:bpf_lwt_push_ip_encap+0x1eb/0x520
> [ 105.186443] Code: 0f 84 de 01 00 00 0f b7 4a 04 66 85 c9 0f 85 47 01 00 00 31 c0 5b 5d 41 5c 41 5d 41 5e c3 cc cc cc cc 48 8b 73 58 48 83 e6 fe <48> 8b 36 0f b7 be ec 00 00 00 0f b7 b6 e6 00 00 00 01 fe 83 e6 f0
> [ 105.186449] RSP: 0018:ffffbb0e0387bc50 EFLAGS: 00010246
> [ 105.186455] RAX: 000000000000004e RBX: ffff94c74e036500 RCX: ffff94c74874da00
> [ 105.186460] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff94c74e036500
> [ 105.186463] RBP: 0000000000000001 R08: 0000000000000002 R09: 0000000000000000
> [ 105.186467] R10: ffffbb0e0387bd50 R11: 0000000000000000 R12: ffffbb0e0387bc98
> [ 105.186471] R13: 0000000000000014 R14: 0000000000000000 R15: 0000000000000002
> [ 105.186484] FS: 00007f166aa4d680(0000) GS:ffff94c8b7780000(0000) knlGS:0000000000000000
> [ 105.186490] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [ 105.186494] CR2: 0000000000000000 CR3: 000000015eade001 CR4: 0000000000770ee0
> [ 105.186499] PKRU: 55555554
> [ 105.186502] Call Trace:
> [ 105.186507] <TASK>
> [ 105.186513] bpf_lwt_xmit_push_encap+0x2b/0x40
> [ 105.186522] bpf_prog_a75eaad51e517912+0x41/0x49
> [ 105.186536] ? kvm_clock_get_cycles+0x18/0x30
> [ 105.186547] ? ktime_get+0x3c/0xa0
> [ 105.186554] bpf_test_run+0x195/0x320
> [ 105.186563] ? bpf_test_run+0x10f/0x320
> [ 105.186579] bpf_prog_test_run_skb+0x2f5/0x4f0
> [ 105.186590] __sys_bpf+0x69c/0xa40
> [ 105.186603] __x64_sys_bpf+0x1e/0x30
> [ 105.186611] do_syscall_64+0x59/0x110
> [ 105.186620] entry_SYSCALL_64_after_hwframe+0x76/0xe0
> [ 105.186649] RIP: 0033:0x7f166a97455d
>
> Temporarily add the setting of skb->_skb_refdst before bpf_test_run to resolve the issue.
>
> Fixes: 52f278774e79 ("bpf: implement BPF_LWT_ENCAP_IP mode in bpf_lwt_push_encap")
> Reported-by: Yinhao Hu <dddddd@hust.edu.cn>
> Reported-by: Kaiyan Mei <M202472210@hust.edu.cn>
> Closes: https://groups.google.com/g/hust-os-kernel-patches/c/8-a0kPpBW2s
> Signed-off-by: Yun Lu <luyun@kylinos.cn>
> Signed-off-by: Feng Yang <yangfeng@kylinos.cn>
> Tested-by: syzbot@syzkaller.appspotmail.com
> ---
> Changes in v5:
> - Refer to the bpf_lwt_xmit_reroute function to configure the dst parameter.
> - Link to v4: https://lore.kernel.org/all/20260209015111.28144-1-yangfeng59949@163.com/
> Changes in v4:
> - add rcu lock
> - Link to v3: https://lore.kernel.org/all/20260206055113.63476-1-yangfeng59949@163.com/
> Changes in v3:
> - use dst_init
> - Link to v2: https://lore.kernel.org/all/20260205092227.126665-1-yangfeng59949@163.com/
> Changes in v2:
> - Link to v1: https://lore.kernel.org/all/20260127084520.13890-1-luyun_611@163.com/
The earlier syzbot reports are crying for a selftest which is still
missing in v5.
The CI has also reported errors in the test_progs. Did you run any test
before posting ?
pw-bot: cr
> ---
> net/bpf/test_run.c | 47 ++++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 47 insertions(+)
>
> diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
> index 178c4738e63b..dbd2c44da7e5 100644
> --- a/net/bpf/test_run.c
> +++ b/net/bpf/test_run.c
> @@ -24,6 +24,7 @@
> #include <net/netdev_rx_queue.h>
> #include <net/xdp.h>
> #include <net/netfilter/nf_bpf_link.h>
> +#include <net/ip6_route.h>
>
> #define CREATE_TRACE_POINTS
> #include <trace/events/bpf_test_run.h>
> @@ -992,6 +993,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
> u32 headroom = NET_SKB_PAD + NET_IP_ALIGN;
> u32 linear_sz = kattr->test.data_size_in;
> u32 repeat = kattr->test.repeat;
> + struct dst_entry *dst = NULL;
> struct __sk_buff *ctx = NULL;
> struct sk_buff *skb = NULL;
> struct sock *sk = NULL;
> @@ -1156,6 +1158,51 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
> skb->ip_summed = CHECKSUM_COMPLETE;
> }
>
> + if (prog->type == BPF_PROG_TYPE_LWT_XMIT && !skb_dst(skb)) {
> + struct flowi4 fl4 = {};
> + struct flowi6 fl6 = {};
> + struct rtable *rt;
> +
> + switch (skb->protocol) {
> + case htons(ETH_P_IP):
> + if (sizeof(struct iphdr) <= skb_headlen(skb)) {
> + fl4.saddr = ip_hdr(skb)->saddr;
> + fl4.daddr = ip_hdr(skb)->daddr;
> + }
> +
> + rt = ip_route_output_key(net, &fl4);
What can be expected from the return value rt if fl4 is 0.
> + if (IS_ERR(rt)) {
> + ret = PTR_ERR(rt);
I suspect this is probably what failed in CI. Add a NULL check in
bpf_lwt_push_ip_encap instead. No change is needed in the
bpf_prog_test_run_skb.
> + goto out;
> + }
> + dst = &rt->dst;
> + break;
> +#if IS_ENABLED(CONFIG_IPV6)
> + case htons(ETH_P_IPV6):
> + if (sizeof(struct ipv6hdr) <= skb_headlen(skb)) {
> + fl6.saddr = ipv6_hdr(skb)->saddr;
> + fl6.daddr = ipv6_hdr(skb)->daddr;
> + }
> +
> + dst = ip6_route_output(net, NULL, &fl6);
> + if (IS_ERR(dst)) {
> + ret = PTR_ERR(dst);
> + goto out;
> + }
> + break;
> +#endif
> + default:
> + ret = -EINVAL;
> + goto out;
> + }
> +
> + if (unlikely(dst->error)) {
> + ret = dst->error;
> + dst_release(dst);
> + goto out;
> + }
> + skb_dst_set(skb, dst);
> + }
> ret = bpf_test_run(prog, skb, repeat, &retval, &duration, false);
> if (ret)
> goto out;
^ permalink raw reply [flat|nested] 10+ messages in thread* Re: [PATCH v5] bpf: test_run: Fix the null pointer dereference issue in bpf_lwt_xmit_push_encap
2026-02-10 19:10 ` Martin KaFai Lau
@ 2026-02-11 7:52 ` Feng Yang
2026-02-17 18:08 ` Martin KaFai Lau
0 siblings, 1 reply; 10+ messages in thread
From: Feng Yang @ 2026-02-11 7:52 UTC (permalink / raw)
To: martin.lau
Cc: andrii, ast, bpf, daniel, davem, edumazet, horms, kuba,
linux-kernel, netdev, pabeni, yangfeng59949
On Tue, 10 Feb 2026 11:10:03 -0800 Martin KaFai Lau <martin.lau@linux.dev> wrote:
> On 2/10/26 1:06 AM, Feng Yang wrote:
> > From: Feng Yang <yangfeng@kylinos.cn>
> >
> > The bpf_lwt_xmit_push_encap helper needs to access skb_dst(skb)->dev to
> > calculate the needed headroom:
> >
> > err = skb_cow_head(skb,
> > len + LL_RESERVED_SPACE(skb_dst(skb)->dev));
> >
> > But skb->_skb_refdst may not be initialized when the skb is set up by
> > bpf_prog_test_run_skb function. Executing bpf_lwt_push_ip_encap function
> > in this scenario will trigger null pointer dereference, causing a kernel
> > crash as Yinhao reported:
> >
> > [ 105.186365] BUG: kernel NULL pointer dereference, address: 0000000000000000
> > [ 105.186382] #PF: supervisor read access in kernel mode
> > [ 105.186388] #PF: error_code(0x0000) - not-present page
> > [ 105.186393] PGD 121d3d067 P4D 121d3d067 PUD 106c83067 PMD 0
> > [ 105.186404] Oops: 0000 [#1] PREEMPT SMP NOPTI
> > [ 105.186412] CPU: 3 PID: 3250 Comm: poc Kdump: loaded Not tainted 6.19.0-rc5 #1
> > [ 105.186423] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
> > [ 105.186427] RIP: 0010:bpf_lwt_push_ip_encap+0x1eb/0x520
> > [ 105.186443] Code: 0f 84 de 01 00 00 0f b7 4a 04 66 85 c9 0f 85 47 01 00 00 31 c0 5b 5d 41 5c 41 5d 41 5e c3 cc cc cc cc 48 8b 73 58 48 83 e6 fe <48> 8b 36 0f > b7 be ec 00 00 00 0f b7 b6 e6 00 00 00 01 fe 83 e6 f0
> > [ 105.186449] RSP: 0018:ffffbb0e0387bc50 EFLAGS: 00010246
> > [ 105.186455] RAX: 000000000000004e RBX: ffff94c74e036500 RCX: ffff94c74874da00
> > [ 105.186460] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff94c74e036500
> > [ 105.186463] RBP: 0000000000000001 R08: 0000000000000002 R09: 0000000000000000
> > [ 105.186467] R10: ffffbb0e0387bd50 R11: 0000000000000000 R12: ffffbb0e0387bc98
> > [ 105.186471] R13: 0000000000000014 R14: 0000000000000000 R15: 0000000000000002
> > [ 105.186484] FS: 00007f166aa4d680(0000) GS:ffff94c8b7780000(0000) knlGS:0000000000000000
> > [ 105.186490] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> > [ 105.186494] CR2: 0000000000000000 CR3: 000000015eade001 CR4: 0000000000770ee0
> > [ 105.186499] PKRU: 55555554
> > [ 105.186502] Call Trace:
> > [ 105.186507] <TASK>
> > [ 105.186513] bpf_lwt_xmit_push_encap+0x2b/0x40
> > [ 105.186522] bpf_prog_a75eaad51e517912+0x41/0x49
> > [ 105.186536] ? kvm_clock_get_cycles+0x18/0x30
> > [ 105.186547] ? ktime_get+0x3c/0xa0
> > [ 105.186554] bpf_test_run+0x195/0x320
> > [ 105.186563] ? bpf_test_run+0x10f/0x320
> > [ 105.186579] bpf_prog_test_run_skb+0x2f5/0x4f0
> > [ 105.186590] __sys_bpf+0x69c/0xa40
> > [ 105.186603] __x64_sys_bpf+0x1e/0x30
> > [ 105.186611] do_syscall_64+0x59/0x110
> > [ 105.186620] entry_SYSCALL_64_after_hwframe+0x76/0xe0
> > [ 105.186649] RIP: 0033:0x7f166a97455d
> >
> > Temporarily add the setting of skb->_skb_refdst before bpf_test_run to resolve the issue.
> >
> > Fixes: 52f278774e79 ("bpf: implement BPF_LWT_ENCAP_IP mode in bpf_lwt_push_encap")
> > Reported-by: Yinhao Hu <dddddd@hust.edu.cn>
> > Reported-by: Kaiyan Mei <M202472210@hust.edu.cn>
> > Closes: https://groups.google.com/g/hust-os-kernel-patches/c/8-a0kPpBW2s
> > Signed-off-by: Yun Lu <luyun@kylinos.cn>
> > Signed-off-by: Feng Yang <yangfeng@kylinos.cn>
> > Tested-by: syzbot@syzkaller.appspotmail.com
> > ---
> > Changes in v5:
> > - Refer to the bpf_lwt_xmit_reroute function to configure the dst parameter.
> > - Link to v4: https://lore.kernel.org/all/20260209015111.28144-1-yangfeng59949@163.com/
> > Changes in v4:
> > - add rcu lock
> > - Link to v3: https://lore.kernel.org/all/20260206055113.63476-1-yangfeng59949@163.com/
> > Changes in v3:
> > - use dst_init
> > - Link to v2: https://lore.kernel.org/all/20260205092227.126665-1-yangfeng59949@163.com/
> > Changes in v2:
> > - Link to v1: https://lore.kernel.org/all/20260127084520.13890-1-luyun_611@163.com/
>
> The earlier syzbot reports are crying for a selftest which is still
> missing in v5.
>
> The CI has also reported errors in the test_progs. Did you run any test
> before posting ?
My apologies. I only tested whether a crash would occur
when using `bpf_prog_test_run_skb` to execute `bpf_lwt_push_ip_encap` without a dst entry.
I will include a selftest in my next submission.
> pw-bot: cr
>
> > ---
> > net/bpf/test_run.c | 47 ++++++++++++++++++++++++++++++++++++++++++++++
> > 1 file changed, 47 insertions(+)
> >
> > diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
> > index 178c4738e63b..dbd2c44da7e5 100644
> > --- a/net/bpf/test_run.c
> > +++ b/net/bpf/test_run.c
> > @@ -24,6 +24,7 @@
> > #include <net/netdev_rx_queue.h>
> > #include <net/xdp.h>
> > #include <net/netfilter/nf_bpf_link.h>
> > +#include <net/ip6_route.h>
> >
> > #define CREATE_TRACE_POINTS
> > #include <trace/events/bpf_test_run.h>
> > @@ -992,6 +993,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
> > u32 headroom = NET_SKB_PAD + NET_IP_ALIGN;
> > u32 linear_sz = kattr->test.data_size_in;
> > u32 repeat = kattr->test.repeat;
> > + struct dst_entry *dst = NULL;
> > struct __sk_buff *ctx = NULL;
> > struct sk_buff *skb = NULL;
> > struct sock *sk = NULL;
> > @@ -1156,6 +1158,51 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
> > skb->ip_summed = CHECKSUM_COMPLETE;
> > }
> >
> > + if (prog->type == BPF_PROG_TYPE_LWT_XMIT && !skb_dst(skb)) {
> > + struct flowi4 fl4 = {};
> > + struct flowi6 fl6 = {};
> > + struct rtable *rt;
> > +
> > + switch (skb->protocol) {
> > + case htons(ETH_P_IP):
> > + if (sizeof(struct iphdr) <= skb_headlen(skb)) {
> > + fl4.saddr = ip_hdr(skb)->saddr;
> > + fl4.daddr = ip_hdr(skb)->daddr;
> > + }
> > +
> > + rt = ip_route_output_key(net, &fl4);
>
> What can be expected from the return value rt if fl4 is 0.
If it is empty, `ip_route_output_key_hash_rcu` will assign the loopback address,
and the returned `rt` is valid, not an error value.
> > + if (IS_ERR(rt)) {
> > + ret = PTR_ERR(rt);
>
> I suspect this is probably what failed in CI. Add a NULL check in
> bpf_lwt_push_ip_encap instead. No change is needed in the
> bpf_prog_test_run_skb.
The root cause of the CI failure is that the default path was taken, resulting in a return value of -EINVAL.
So, is your suggestion to modify the bpf_lwt_push_ip_encap function as in the v1(https://lore.kernel.org/all/20260127084520.13890-1-luyun_611@163.com/) version?
However, the previous suggestion was to make changes to the bpf_prog_test_run_skb function instead.
Thank you very much for your reply.
> > + goto out;
> > + }
> > + dst = &rt->dst;
> > + break;
> > +#if IS_ENABLED(CONFIG_IPV6)
> > + case htons(ETH_P_IPV6):
> > + if (sizeof(struct ipv6hdr) <= skb_headlen(skb)) {
> > + fl6.saddr = ipv6_hdr(skb)->saddr;
> > + fl6.daddr = ipv6_hdr(skb)->daddr;
> > + }
> > +
> > + dst = ip6_route_output(net, NULL, &fl6);
> > + if (IS_ERR(dst)) {
> > + ret = PTR_ERR(dst);
> > + goto out;
> > + }
> > + break;
> > +#endif
> > + default:
> > + ret = -EINVAL;
The reason for the CI test failure.
> > + goto out;
> > + }
> > +
> > + if (unlikely(dst->error)) {
> > + ret = dst->error;
> > + dst_release(dst);
> > + goto out;
> > + }
> > + skb_dst_set(skb, dst);
> > + }
> > ret = bpf_test_run(prog, skb, repeat, &retval, &duration, false);
> > if (ret)
> > goto out;
^ permalink raw reply [flat|nested] 10+ messages in thread* Re: [PATCH v5] bpf: test_run: Fix the null pointer dereference issue in bpf_lwt_xmit_push_encap
2026-02-11 7:52 ` Feng Yang
@ 2026-02-17 18:08 ` Martin KaFai Lau
2026-02-17 22:08 ` Jakub Kicinski
0 siblings, 1 reply; 10+ messages in thread
From: Martin KaFai Lau @ 2026-02-17 18:08 UTC (permalink / raw)
To: Feng Yang
Cc: andrii, ast, bpf, daniel, davem, edumazet, horms, kuba,
linux-kernel, netdev, pabeni
On 2/10/26 11:52 PM, Feng Yang wrote:
> On Tue, 10 Feb 2026 11:10:03 -0800 Martin KaFai Lau <martin.lau@linux.dev> wrote:
>
>> On 2/10/26 1:06 AM, Feng Yang wrote:
>>> From: Feng Yang <yangfeng@kylinos.cn>
>>>
>>> The bpf_lwt_xmit_push_encap helper needs to access skb_dst(skb)->dev to
>>> calculate the needed headroom:
>>>
>>> err = skb_cow_head(skb,
>>> len + LL_RESERVED_SPACE(skb_dst(skb)->dev));
>>>
>>> But skb->_skb_refdst may not be initialized when the skb is set up by
>>> bpf_prog_test_run_skb function. Executing bpf_lwt_push_ip_encap function
>>> in this scenario will trigger null pointer dereference, causing a kernel
>>> crash as Yinhao reported:
>>>
>>> [ 105.186365] BUG: kernel NULL pointer dereference, address: 0000000000000000
>>> [ 105.186382] #PF: supervisor read access in kernel mode
>>> [ 105.186388] #PF: error_code(0x0000) - not-present page
>>> [ 105.186393] PGD 121d3d067 P4D 121d3d067 PUD 106c83067 PMD 0
>>> [ 105.186404] Oops: 0000 [#1] PREEMPT SMP NOPTI
>>> [ 105.186412] CPU: 3 PID: 3250 Comm: poc Kdump: loaded Not tainted 6.19.0-rc5 #1
>>> [ 105.186423] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
>>> [ 105.186427] RIP: 0010:bpf_lwt_push_ip_encap+0x1eb/0x520
>>> [ 105.186443] Code: 0f 84 de 01 00 00 0f b7 4a 04 66 85 c9 0f 85 47 01 00 00 31 c0 5b 5d 41 5c 41 5d 41 5e c3 cc cc cc cc 48 8b 73 58 48 83 e6 fe <48> 8b 36 0f > b7 be ec 00 00 00 0f b7 b6 e6 00 00 00 01 fe 83 e6 f0
>>> [ 105.186449] RSP: 0018:ffffbb0e0387bc50 EFLAGS: 00010246
>>> [ 105.186455] RAX: 000000000000004e RBX: ffff94c74e036500 RCX: ffff94c74874da00
>>> [ 105.186460] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff94c74e036500
>>> [ 105.186463] RBP: 0000000000000001 R08: 0000000000000002 R09: 0000000000000000
>>> [ 105.186467] R10: ffffbb0e0387bd50 R11: 0000000000000000 R12: ffffbb0e0387bc98
>>> [ 105.186471] R13: 0000000000000014 R14: 0000000000000000 R15: 0000000000000002
>>> [ 105.186484] FS: 00007f166aa4d680(0000) GS:ffff94c8b7780000(0000) knlGS:0000000000000000
>>> [ 105.186490] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>>> [ 105.186494] CR2: 0000000000000000 CR3: 000000015eade001 CR4: 0000000000770ee0
>>> [ 105.186499] PKRU: 55555554
>>> [ 105.186502] Call Trace:
>>> [ 105.186507] <TASK>
>>> [ 105.186513] bpf_lwt_xmit_push_encap+0x2b/0x40
>>> [ 105.186522] bpf_prog_a75eaad51e517912+0x41/0x49
>>> [ 105.186536] ? kvm_clock_get_cycles+0x18/0x30
>>> [ 105.186547] ? ktime_get+0x3c/0xa0
>>> [ 105.186554] bpf_test_run+0x195/0x320
>>> [ 105.186563] ? bpf_test_run+0x10f/0x320
>>> [ 105.186579] bpf_prog_test_run_skb+0x2f5/0x4f0
>>> [ 105.186590] __sys_bpf+0x69c/0xa40
>>> [ 105.186603] __x64_sys_bpf+0x1e/0x30
>>> [ 105.186611] do_syscall_64+0x59/0x110
>>> [ 105.186620] entry_SYSCALL_64_after_hwframe+0x76/0xe0
>>> [ 105.186649] RIP: 0033:0x7f166a97455d
>>>
>>> Temporarily add the setting of skb->_skb_refdst before bpf_test_run to resolve the issue.
>>>
>>> Fixes: 52f278774e79 ("bpf: implement BPF_LWT_ENCAP_IP mode in bpf_lwt_push_encap")
>>> Reported-by: Yinhao Hu <dddddd@hust.edu.cn>
>>> Reported-by: Kaiyan Mei <M202472210@hust.edu.cn>
>>> Closes: https://groups.google.com/g/hust-os-kernel-patches/c/8-a0kPpBW2s
>>> Signed-off-by: Yun Lu <luyun@kylinos.cn>
>>> Signed-off-by: Feng Yang <yangfeng@kylinos.cn>
>>> Tested-by: syzbot@syzkaller.appspotmail.com
>>> ---
>>> Changes in v5:
>>> - Refer to the bpf_lwt_xmit_reroute function to configure the dst parameter.
>>> - Link to v4: https://lore.kernel.org/all/20260209015111.28144-1-yangfeng59949@163.com/
>>> Changes in v4:
>>> - add rcu lock
>>> - Link to v3: https://lore.kernel.org/all/20260206055113.63476-1-yangfeng59949@163.com/
>>> Changes in v3:
>>> - use dst_init
>>> - Link to v2: https://lore.kernel.org/all/20260205092227.126665-1-yangfeng59949@163.com/
>>> Changes in v2:
>>> - Link to v1: https://lore.kernel.org/all/20260127084520.13890-1-luyun_611@163.com/
>>
>> The earlier syzbot reports are crying for a selftest which is still
>> missing in v5.
>>
>> The CI has also reported errors in the test_progs. Did you run any test
>> before posting ?
>
> My apologies. I only tested whether a crash would occur
> when using `bpf_prog_test_run_skb` to execute `bpf_lwt_push_ip_encap` without a dst entry.
> I will include a selftest in my next submission.
>
>> pw-bot: cr
>>
>>> ---
>>> net/bpf/test_run.c | 47 ++++++++++++++++++++++++++++++++++++++++++++++
>>> 1 file changed, 47 insertions(+)
>>>
>>> diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
>>> index 178c4738e63b..dbd2c44da7e5 100644
>>> --- a/net/bpf/test_run.c
>>> +++ b/net/bpf/test_run.c
>>> @@ -24,6 +24,7 @@
>>> #include <net/netdev_rx_queue.h>
>>> #include <net/xdp.h>
>>> #include <net/netfilter/nf_bpf_link.h>
>>> +#include <net/ip6_route.h>
>>>
>>> #define CREATE_TRACE_POINTS
>>> #include <trace/events/bpf_test_run.h>
>>> @@ -992,6 +993,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
>>> u32 headroom = NET_SKB_PAD + NET_IP_ALIGN;
>>> u32 linear_sz = kattr->test.data_size_in;
>>> u32 repeat = kattr->test.repeat;
>>> + struct dst_entry *dst = NULL;
>>> struct __sk_buff *ctx = NULL;
>>> struct sk_buff *skb = NULL;
>>> struct sock *sk = NULL;
>>> @@ -1156,6 +1158,51 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
>>> skb->ip_summed = CHECKSUM_COMPLETE;
>>> }
>>>
>>> + if (prog->type == BPF_PROG_TYPE_LWT_XMIT && !skb_dst(skb)) {
>>> + struct flowi4 fl4 = {};
>>> + struct flowi6 fl6 = {};
>>> + struct rtable *rt;
>>> +
>>> + switch (skb->protocol) {
>>> + case htons(ETH_P_IP):
>>> + if (sizeof(struct iphdr) <= skb_headlen(skb)) {
>>> + fl4.saddr = ip_hdr(skb)->saddr;
>>> + fl4.daddr = ip_hdr(skb)->daddr;
>>> + }
>>> +
>>> + rt = ip_route_output_key(net, &fl4);
>>
>> What can be expected from the return value rt if fl4 is 0.
>
> If it is empty, `ip_route_output_key_hash_rcu` will assign the loopback address,
> and the returned `rt` is valid, not an error value.
>
>>> + if (IS_ERR(rt)) {
>>> + ret = PTR_ERR(rt);
>>
>> I suspect this is probably what failed in CI. Add a NULL check in
>> bpf_lwt_push_ip_encap instead. No change is needed in the
>> bpf_prog_test_run_skb.
>
> The root cause of the CI failure is that the default path was taken, resulting in a return value of -EINVAL.
> So, is your suggestion to modify the bpf_lwt_push_ip_encap function as in the v1(https://lore.kernel.org/all/20260127084520.13890-1-luyun_611@163.com/) version?
> However, the previous suggestion was to make changes to the bpf_prog_test_run_skb function instead.
>
> Thank you very much for your reply.
>
>>> + goto out;
>>> + }
>>> + dst = &rt->dst;
>>> + break;
>>> +#if IS_ENABLED(CONFIG_IPV6)
>>> + case htons(ETH_P_IPV6):
>>> + if (sizeof(struct ipv6hdr) <= skb_headlen(skb)) {
>>> + fl6.saddr = ipv6_hdr(skb)->saddr;
>>> + fl6.daddr = ipv6_hdr(skb)->daddr;
>>> + }
>>> +
>>> + dst = ip6_route_output(net, NULL, &fl6);
>>> + if (IS_ERR(dst)) {
>>> + ret = PTR_ERR(dst);
>>> + goto out;
>>> + }
>>> + break;
>>> +#endif
>>> + default:
>>> + ret = -EINVAL;
>
> The reason for the CI test failure.
Make sense, but the early point stays the same: the user-provided skb
can have unexpected data. Either an skb->protocol is not handled here,
or the earlier dst lookup has an error. I don't know what the current
active users left in lwt are. Unless there is an issue with missing
skb_dst() in other mainstream program types (e.g., tc), I would prefer
to add a check in bpf_lwt_push_ip_encap() instead of complicating
bpf_prog_test_run_skb() further.
>
>>> + goto out;
>>> + }
>>> +
>>> + if (unlikely(dst->error)) {
>>> + ret = dst->error;
>>> + dst_release(dst);
>>> + goto out;
>>> + }
>>> + skb_dst_set(skb, dst);
>>> + }
>>> ret = bpf_test_run(prog, skb, repeat, &retval, &duration, false);
>>> if (ret)
>>> goto out;
>
^ permalink raw reply [flat|nested] 10+ messages in thread* Re: [PATCH v5] bpf: test_run: Fix the null pointer dereference issue in bpf_lwt_xmit_push_encap
2026-02-17 18:08 ` Martin KaFai Lau
@ 2026-02-17 22:08 ` Jakub Kicinski
2026-02-17 23:26 ` Martin KaFai Lau
0 siblings, 1 reply; 10+ messages in thread
From: Jakub Kicinski @ 2026-02-17 22:08 UTC (permalink / raw)
To: Martin KaFai Lau
Cc: Feng Yang, andrii, ast, bpf, daniel, davem, edumazet, horms,
linux-kernel, netdev, pabeni
On Tue, 17 Feb 2026 10:08:29 -0800 Martin KaFai Lau wrote:
> > The reason for the CI test failure.
>
> Make sense, but the early point stays the same: the user-provided skb
> can have unexpected data. Either an skb->protocol is not handled here,
> or the earlier dst lookup has an error. I don't know what the current
> active users left in lwt are. Unless there is an issue with missing
> skb_dst() in other mainstream program types (e.g., tc), I would prefer
> to add a check in bpf_lwt_push_ip_encap() instead of complicating
> bpf_prog_test_run_skb() further.
Can bpf_prog_test_run_skb() simply not support LWT then?
Having fastpath checks for test harness really feels like duct tape
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v5] bpf: test_run: Fix the null pointer dereference issue in bpf_lwt_xmit_push_encap
2026-02-17 22:08 ` Jakub Kicinski
@ 2026-02-17 23:26 ` Martin KaFai Lau
2026-02-17 23:39 ` Jakub Kicinski
0 siblings, 1 reply; 10+ messages in thread
From: Martin KaFai Lau @ 2026-02-17 23:26 UTC (permalink / raw)
To: Jakub Kicinski
Cc: Feng Yang, andrii, ast, bpf, daniel, davem, edumazet, horms,
linux-kernel, netdev, pabeni
On 2/17/26 2:08 PM, Jakub Kicinski wrote:
> On Tue, 17 Feb 2026 10:08:29 -0800 Martin KaFai Lau wrote:
>>> The reason for the CI test failure.
>>
>> Make sense, but the early point stays the same: the user-provided skb
>> can have unexpected data. Either an skb->protocol is not handled here,
>> or the earlier dst lookup has an error. I don't know what the current
>> active users left in lwt are. Unless there is an issue with missing
>> skb_dst() in other mainstream program types (e.g., tc), I would prefer
>> to add a check in bpf_lwt_push_ip_encap() instead of complicating
>> bpf_prog_test_run_skb() further.
>
> Can bpf_prog_test_run_skb() simply not support LWT then?
It is a bold move. If we are open to this idea, we can consider to
retire the lwt support completely instead of only retiring lwt in the
test_run_skb alone. I think it will still take time to announce and
deprecate it?
> Having fastpath checks for test harness really feels like duct tape
Adding route lookup on ip[v6]_hdr does not work well either and is a
larger duct tape on the test_run_skb side.
An option is to always set skb to 'some' dst (loopback or
ipv6.fib6_null_entry) on all is_lwt cases without doing the lookup. It
is only to get it going. It is a duct tape also imo but maybe a middle
ground.
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v5] bpf: test_run: Fix the null pointer dereference issue in bpf_lwt_xmit_push_encap
2026-02-17 23:26 ` Martin KaFai Lau
@ 2026-02-17 23:39 ` Jakub Kicinski
0 siblings, 0 replies; 10+ messages in thread
From: Jakub Kicinski @ 2026-02-17 23:39 UTC (permalink / raw)
To: Martin KaFai Lau
Cc: Feng Yang, andrii, ast, bpf, daniel, davem, edumazet, horms,
linux-kernel, netdev, pabeni
On Tue, 17 Feb 2026 15:26:33 -0800 Martin KaFai Lau wrote:
> > Having fastpath checks for test harness really feels like duct tape
>
> Adding route lookup on ip[v6]_hdr does not work well either and is a
> larger duct tape on the test_run_skb side.
Well, maybe to clarify, the responsibility of the test harness is to
generate valid inputs. Whether it's hard or not to address this issue
in test_run_skb is a less fundamental than whether it is the correct
place to address it.
> An option is to always set skb to 'some' dst (loopback or
> ipv6.fib6_null_entry) on all is_lwt cases without doing the lookup. It
> is only to get it going. It is a duct tape also imo but maybe a middle
> ground.
Sounds reasonable to me, FWIW.
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v5] bpf: test_run: Fix the null pointer dereference issue in bpf_lwt_xmit_push_encap
2026-02-10 9:06 [PATCH v5] bpf: test_run: Fix the null pointer dereference issue in bpf_lwt_xmit_push_encap Feng Yang
` (2 preceding siblings ...)
2026-02-10 19:10 ` Martin KaFai Lau
@ 2026-02-10 20:26 ` kernel test robot
3 siblings, 0 replies; 10+ messages in thread
From: kernel test robot @ 2026-02-10 20:26 UTC (permalink / raw)
To: Feng Yang, davem, edumazet, kuba, pabeni, horms, ast, daniel,
andrii
Cc: llvm, oe-kbuild-all, bpf, netdev, linux-kernel, yangfeng59949
Hi Feng,
kernel test robot noticed the following build warnings:
[auto build test WARNING on bpf-next/net]
[also build test WARNING on bpf-next/master bpf/master net-next/main net/main linus/master v6.19 next-20260209]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Feng-Yang/bpf-test_run-Fix-the-null-pointer-dereference-issue-in-bpf_lwt_xmit_push_encap/20260210-171138
base: https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git net
patch link: https://lore.kernel.org/r/20260210090657.86977-1-yangfeng59949%40163.com
patch subject: [PATCH v5] bpf: test_run: Fix the null pointer dereference issue in bpf_lwt_xmit_push_encap
config: x86_64-kexec (https://download.01.org/0day-ci/archive/20260210/202602102144.JapMKlzC-lkp@intel.com/config)
compiler: clang version 20.1.8 (https://github.com/llvm/llvm-project 87f0227cb60147a26a1eeb4fb06e3b505e9c7261)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20260210/202602102144.JapMKlzC-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202602102144.JapMKlzC-lkp@intel.com/
All warnings (new ones prefixed by >>):
>> net/bpf/test_run.c:1162:17: warning: unused variable 'fl6' [-Wunused-variable]
1162 | struct flowi6 fl6 = {};
| ^~~
1 warning generated.
vim +/fl6 +1162 net/bpf/test_run.c
984
985 int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
986 union bpf_attr __user *uattr)
987 {
988 bool is_l2 = false, is_direct_pkt_access = false, is_lwt = false;
989 u32 tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
990 struct net *net = current->nsproxy->net_ns;
991 struct net_device *dev = net->loopback_dev;
992 u32 headroom = NET_SKB_PAD + NET_IP_ALIGN;
993 u32 linear_sz = kattr->test.data_size_in;
994 u32 repeat = kattr->test.repeat;
995 struct dst_entry *dst = NULL;
996 struct __sk_buff *ctx = NULL;
997 struct sk_buff *skb = NULL;
998 struct sock *sk = NULL;
999 u32 retval, duration;
1000 int hh_len = ETH_HLEN;
1001 void *data = NULL;
1002 int ret;
1003
1004 if ((kattr->test.flags & ~BPF_F_TEST_SKB_CHECKSUM_COMPLETE) ||
1005 kattr->test.cpu || kattr->test.batch_size)
1006 return -EINVAL;
1007
1008 if (kattr->test.data_size_in < ETH_HLEN)
1009 return -EINVAL;
1010
1011 switch (prog->type) {
1012 case BPF_PROG_TYPE_SCHED_CLS:
1013 case BPF_PROG_TYPE_SCHED_ACT:
1014 is_direct_pkt_access = true;
1015 is_l2 = true;
1016 break;
1017 case BPF_PROG_TYPE_LWT_IN:
1018 case BPF_PROG_TYPE_LWT_OUT:
1019 case BPF_PROG_TYPE_LWT_XMIT:
1020 is_lwt = true;
1021 fallthrough;
1022 case BPF_PROG_TYPE_CGROUP_SKB:
1023 is_direct_pkt_access = true;
1024 break;
1025 default:
1026 break;
1027 }
1028
1029 ctx = bpf_ctx_init(kattr, sizeof(struct __sk_buff));
1030 if (IS_ERR(ctx))
1031 return PTR_ERR(ctx);
1032
1033 if (ctx) {
1034 if (ctx->data_end > kattr->test.data_size_in || ctx->data || ctx->data_meta) {
1035 ret = -EINVAL;
1036 goto out;
1037 }
1038 if (ctx->data_end) {
1039 /* Non-linear LWT test_run is unsupported for now. */
1040 if (is_lwt) {
1041 ret = -EINVAL;
1042 goto out;
1043 }
1044 linear_sz = max(ETH_HLEN, ctx->data_end);
1045 }
1046 }
1047
1048 linear_sz = min_t(u32, linear_sz, PAGE_SIZE - headroom - tailroom);
1049
1050 data = bpf_test_init(kattr, linear_sz, linear_sz, headroom, tailroom);
1051 if (IS_ERR(data)) {
1052 ret = PTR_ERR(data);
1053 data = NULL;
1054 goto out;
1055 }
1056
1057 sk = sk_alloc(net, AF_UNSPEC, GFP_USER, &bpf_dummy_proto, 1);
1058 if (!sk) {
1059 ret = -ENOMEM;
1060 goto out;
1061 }
1062 sock_init_data(NULL, sk);
1063
1064 skb = slab_build_skb(data);
1065 if (!skb) {
1066 ret = -ENOMEM;
1067 goto out;
1068 }
1069 skb->sk = sk;
1070
1071 data = NULL; /* data released via kfree_skb */
1072
1073 skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
1074 __skb_put(skb, linear_sz);
1075
1076 if (unlikely(kattr->test.data_size_in > linear_sz)) {
1077 void __user *data_in = u64_to_user_ptr(kattr->test.data_in);
1078 struct skb_shared_info *sinfo = skb_shinfo(skb);
1079 u32 copied = linear_sz;
1080
1081 while (copied < kattr->test.data_size_in) {
1082 struct page *page;
1083 u32 data_len;
1084
1085 if (sinfo->nr_frags == MAX_SKB_FRAGS) {
1086 ret = -ENOMEM;
1087 goto out;
1088 }
1089
1090 page = alloc_page(GFP_KERNEL);
1091 if (!page) {
1092 ret = -ENOMEM;
1093 goto out;
1094 }
1095
1096 data_len = min_t(u32, kattr->test.data_size_in - copied,
1097 PAGE_SIZE);
1098 skb_fill_page_desc(skb, sinfo->nr_frags, page, 0, data_len);
1099
1100 if (copy_from_user(page_address(page), data_in + copied,
1101 data_len)) {
1102 ret = -EFAULT;
1103 goto out;
1104 }
1105 skb->data_len += data_len;
1106 skb->truesize += PAGE_SIZE;
1107 skb->len += data_len;
1108 copied += data_len;
1109 }
1110 }
1111
1112 if (ctx && ctx->ifindex > 1) {
1113 dev = dev_get_by_index(net, ctx->ifindex);
1114 if (!dev) {
1115 ret = -ENODEV;
1116 goto out;
1117 }
1118 }
1119 skb->protocol = eth_type_trans(skb, dev);
1120 skb_reset_network_header(skb);
1121
1122 switch (skb->protocol) {
1123 case htons(ETH_P_IP):
1124 sk->sk_family = AF_INET;
1125 if (sizeof(struct iphdr) <= skb_headlen(skb)) {
1126 sk->sk_rcv_saddr = ip_hdr(skb)->saddr;
1127 sk->sk_daddr = ip_hdr(skb)->daddr;
1128 }
1129 break;
1130 #if IS_ENABLED(CONFIG_IPV6)
1131 case htons(ETH_P_IPV6):
1132 sk->sk_family = AF_INET6;
1133 if (sizeof(struct ipv6hdr) <= skb_headlen(skb)) {
1134 sk->sk_v6_rcv_saddr = ipv6_hdr(skb)->saddr;
1135 sk->sk_v6_daddr = ipv6_hdr(skb)->daddr;
1136 }
1137 break;
1138 #endif
1139 default:
1140 break;
1141 }
1142
1143 if (is_l2)
1144 __skb_push(skb, hh_len);
1145 if (is_direct_pkt_access)
1146 bpf_compute_data_pointers(skb);
1147
1148 ret = convert___skb_to_skb(skb, ctx);
1149 if (ret)
1150 goto out;
1151
1152 if (kattr->test.flags & BPF_F_TEST_SKB_CHECKSUM_COMPLETE) {
1153 const int off = skb_network_offset(skb);
1154 int len = skb->len - off;
1155
1156 skb->csum = skb_checksum(skb, off, len, 0);
1157 skb->ip_summed = CHECKSUM_COMPLETE;
1158 }
1159
1160 if (prog->type == BPF_PROG_TYPE_LWT_XMIT && !skb_dst(skb)) {
1161 struct flowi4 fl4 = {};
> 1162 struct flowi6 fl6 = {};
1163 struct rtable *rt;
1164
1165 switch (skb->protocol) {
1166 case htons(ETH_P_IP):
1167 if (sizeof(struct iphdr) <= skb_headlen(skb)) {
1168 fl4.saddr = ip_hdr(skb)->saddr;
1169 fl4.daddr = ip_hdr(skb)->daddr;
1170 }
1171
1172 rt = ip_route_output_key(net, &fl4);
1173 if (IS_ERR(rt)) {
1174 ret = PTR_ERR(rt);
1175 goto out;
1176 }
1177 dst = &rt->dst;
1178 break;
1179 #if IS_ENABLED(CONFIG_IPV6)
1180 case htons(ETH_P_IPV6):
1181 if (sizeof(struct ipv6hdr) <= skb_headlen(skb)) {
1182 fl6.saddr = ipv6_hdr(skb)->saddr;
1183 fl6.daddr = ipv6_hdr(skb)->daddr;
1184 }
1185
1186 dst = ip6_route_output(net, NULL, &fl6);
1187 if (IS_ERR(dst)) {
1188 ret = PTR_ERR(dst);
1189 goto out;
1190 }
1191 break;
1192 #endif
1193 default:
1194 ret = -EINVAL;
1195 goto out;
1196 }
1197
1198 if (unlikely(dst->error)) {
1199 ret = dst->error;
1200 dst_release(dst);
1201 goto out;
1202 }
1203 skb_dst_set(skb, dst);
1204 }
1205 ret = bpf_test_run(prog, skb, repeat, &retval, &duration, false);
1206 if (ret)
1207 goto out;
1208 if (!is_l2) {
1209 if (skb_headroom(skb) < hh_len) {
1210 int nhead = HH_DATA_ALIGN(hh_len - skb_headroom(skb));
1211
1212 if (pskb_expand_head(skb, nhead, 0, GFP_USER)) {
1213 ret = -ENOMEM;
1214 goto out;
1215 }
1216 }
1217 memset(__skb_push(skb, hh_len), 0, hh_len);
1218 }
1219
1220 if (kattr->test.flags & BPF_F_TEST_SKB_CHECKSUM_COMPLETE) {
1221 const int off = skb_network_offset(skb);
1222 int len = skb->len - off;
1223 __wsum csum;
1224
1225 csum = skb_checksum(skb, off, len, 0);
1226
1227 if (csum_fold(skb->csum) != csum_fold(csum)) {
1228 ret = -EBADMSG;
1229 goto out;
1230 }
1231 }
1232
1233 convert_skb_to___skb(skb, ctx);
1234
1235 if (skb_is_nonlinear(skb))
1236 /* bpf program can never convert linear skb to non-linear */
1237 WARN_ON_ONCE(linear_sz == kattr->test.data_size_in);
1238 ret = bpf_test_finish(kattr, uattr, skb->data, skb_shinfo(skb), skb->len,
1239 skb->data_len, retval, duration);
1240 if (!ret)
1241 ret = bpf_ctx_finish(kattr, uattr, ctx,
1242 sizeof(struct __sk_buff));
1243 out:
1244 if (dev && dev != net->loopback_dev)
1245 dev_put(dev);
1246 kfree_skb(skb);
1247 kfree(data);
1248 if (sk)
1249 sk_free(sk);
1250 kfree(ctx);
1251 return ret;
1252 }
1253
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply [flat|nested] 10+ messages in thread