* [PATCH v1] bpf: Prevent infinite loops with bpf_redirect_peer
@ 2024-09-29 17:02 Jordan Rife
2024-09-30 5:15 ` kernel test robot
2024-09-30 7:36 ` Daniel Borkmann
0 siblings, 2 replies; 4+ messages in thread
From: Jordan Rife @ 2024-09-29 17:02 UTC (permalink / raw)
To: bpf
Cc: Jordan Rife, netdev, Daniel Borkmann, Alexei Starovoitov,
Andrii Nakryiko, Martin KaFai Lau, Kui-Feng Lee, Eric Dumazet,
Jakub Kicinski, David S. Miller, Paolo Abeni, stable
It is possible to create cycles using bpf_redirect_peer which lead to an
an infinite loop inside __netif_receive_skb_core. The simplest way to
illustrate this is by attaching a TC program to the ingress hook on both
sides of a veth or netkit device pair which redirects to its own peer,
although other cycles are possible. This patch places an upper limit on
the number of iterations allowed inside __netif_receive_skb_core to
prevent this.
Signed-off-by: Jordan Rife <jrife@google.com>
Fixes: 9aa1206e8f48 ("bpf: Add redirect_peer helper")
Cc: stable@vger.kernel.org
---
net/core/dev.c | 11 +++-
net/core/dev.h | 1 +
.../selftests/bpf/prog_tests/tc_redirect.c | 51 +++++++++++++++++++
.../selftests/bpf/progs/test_tc_peer.c | 13 +++++
4 files changed, 75 insertions(+), 1 deletion(-)
diff --git a/net/core/dev.c b/net/core/dev.c
index cd479f5f22f6..753f8d27f47c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5455,6 +5455,7 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
struct net_device *orig_dev;
bool deliver_exact = false;
int ret = NET_RX_DROP;
+ int loops = 0;
__be16 type;
net_timestamp_check(!READ_ONCE(net_hotdata.tstamp_prequeue), skb);
@@ -5521,8 +5522,16 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
nf_skip_egress(skb, true);
skb = sch_handle_ingress(skb, &pt_prev, &ret, orig_dev,
&another);
- if (another)
+ if (another) {
+ loops++;
+ if (unlikely(loops == RX_LOOP_LIMIT)) {
+ ret = NET_RX_DROP;
+ net_crit_ratelimited("bpf: loop limit reached on datapath, buggy bpf program?\n");
+ goto out;
+ }
+
goto another_round;
+ }
if (!skb)
goto out;
diff --git a/net/core/dev.h b/net/core/dev.h
index 5654325c5b71..28d1cf2f9069 100644
--- a/net/core/dev.h
+++ b/net/core/dev.h
@@ -150,6 +150,7 @@ struct napi_struct *napi_by_id(unsigned int napi_id);
void kick_defer_list_purge(struct softnet_data *sd, unsigned int cpu);
#define XMIT_RECURSION_LIMIT 8
+#define RX_LOOP_LIMIT 8
#ifndef CONFIG_PREEMPT_RT
static inline bool dev_xmit_recursion(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
index c85798966aec..db1b36090d6c 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
@@ -1081,6 +1081,55 @@ static void test_tc_redirect_peer(struct netns_setup_result *setup_result)
close_netns(nstoken);
}
+static void test_tc_redirect_peer_loop(struct netns_setup_result *setup_result)
+{
+ LIBBPF_OPTS(bpf_tc_hook, qdisc_src_fwd);
+ LIBBPF_OPTS(bpf_tc_hook, qdisc_src);
+ struct test_tc_peer *skel;
+ struct nstoken *nstoken;
+ int err;
+
+ /* Set up an infinite redirect loop using bpf_redirect_peer with ingress
+ * hooks on either side of a veth/netkit pair redirecting to its own
+ * peer. This should not lock up the kernel.
+ */
+ nstoken = open_netns(NS_SRC);
+ if (!ASSERT_OK_PTR(nstoken, "setns src"))
+ return;
+
+ skel = test_tc_peer__open();
+ if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
+ goto done;
+
+ skel->rodata->IFINDEX_SRC = setup_result->ifindex_src;
+ skel->rodata->IFINDEX_SRC_FWD = setup_result->ifindex_src_fwd;
+
+ err = test_tc_peer__load(skel);
+ if (!ASSERT_OK(err, "test_tc_peer__load"))
+ goto done;
+
+ QDISC_CLSACT_CREATE(&qdisc_src, setup_result->ifindex_src);
+ XGRESS_FILTER_ADD(&qdisc_src, BPF_TC_INGRESS, skel->progs.tc_src_self, 0);
+ close_netns(nstoken);
+
+ nstoken = open_netns(NS_FWD);
+ if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
+ return;
+ QDISC_CLSACT_CREATE(&qdisc_src_fwd, setup_result->ifindex_src_fwd);
+ XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_INGRESS, skel->progs.tc_src_fwd_self, 0);
+
+ if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
+ goto done;
+
+ SYS_NOFAIL("ip netns exec " NS_SRC " %s -c 1 -W 1 -q %s > /dev/null",
+ ping_command(AF_INET), IP4_DST);
+fail:
+done:
+ if (skel)
+ test_tc_peer__destroy(skel);
+ close_netns(nstoken);
+}
+
static int tun_open(char *name)
{
struct ifreq ifr;
@@ -1280,6 +1329,8 @@ static void *test_tc_redirect_run_tests(void *arg)
RUN_TEST(tc_redirect_peer, MODE_VETH);
RUN_TEST(tc_redirect_peer, MODE_NETKIT);
RUN_TEST(tc_redirect_peer_l3, MODE_VETH);
+ RUN_TEST(tc_redirect_peer_loop, MODE_VETH);
+ RUN_TEST(tc_redirect_peer_loop, MODE_NETKIT);
RUN_TEST(tc_redirect_peer_l3, MODE_NETKIT);
RUN_TEST(tc_redirect_neigh, MODE_VETH);
RUN_TEST(tc_redirect_neigh_fib, MODE_VETH);
diff --git a/tools/testing/selftests/bpf/progs/test_tc_peer.c b/tools/testing/selftests/bpf/progs/test_tc_peer.c
index 365eacb5dc34..9b8a00ccad42 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_peer.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_peer.c
@@ -10,6 +10,7 @@
#include <bpf/bpf_helpers.h>
+volatile const __u32 IFINDEX_SRC_FWD;
volatile const __u32 IFINDEX_SRC;
volatile const __u32 IFINDEX_DST;
@@ -34,6 +35,18 @@ int tc_src(struct __sk_buff *skb)
return bpf_redirect_peer(IFINDEX_DST, 0);
}
+SEC("tc")
+int tc_src_self(struct __sk_buff *skb)
+{
+ return bpf_redirect_peer(IFINDEX_SRC, 0);
+}
+
+SEC("tc")
+int tc_src_fwd_self(struct __sk_buff *skb)
+{
+ return bpf_redirect_peer(IFINDEX_SRC_FWD, 0);
+}
+
SEC("tc")
int tc_dst_l3(struct __sk_buff *skb)
{
--
2.46.1.824.gd892dcdcdd-goog
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH v1] bpf: Prevent infinite loops with bpf_redirect_peer
2024-09-29 17:02 [PATCH v1] bpf: Prevent infinite loops with bpf_redirect_peer Jordan Rife
@ 2024-09-30 5:15 ` kernel test robot
2024-09-30 7:36 ` Daniel Borkmann
1 sibling, 0 replies; 4+ messages in thread
From: kernel test robot @ 2024-09-30 5:15 UTC (permalink / raw)
To: Jordan Rife, bpf
Cc: oe-kbuild-all, Jordan Rife, netdev, Daniel Borkmann,
Alexei Starovoitov, Andrii Nakryiko, Martin KaFai Lau,
Kui-Feng Lee, Eric Dumazet, Jakub Kicinski, Paolo Abeni, stable
Hi Jordan,
kernel test robot noticed the following build warnings:
[auto build test WARNING on bpf-next/master]
[also build test WARNING on bpf/master net/main net-next/main linus/master v6.12-rc1 next-20240927]
[cannot apply to horms-ipvs/master]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Jordan-Rife/bpf-Prevent-infinite-loops-with-bpf_redirect_peer/20240930-010356
base: https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master
patch link: https://lore.kernel.org/r/20240929170219.1881536-1-jrife%40google.com
patch subject: [PATCH v1] bpf: Prevent infinite loops with bpf_redirect_peer
config: openrisc-defconfig (https://download.01.org/0day-ci/archive/20240930/202409301255.h6vAvBWG-lkp@intel.com/config)
compiler: or1k-linux-gcc (GCC) 14.1.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240930/202409301255.h6vAvBWG-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202409301255.h6vAvBWG-lkp@intel.com/
All warnings (new ones prefixed by >>):
net/core/dev.c: In function '__netif_receive_skb_core':
>> net/core/dev.c:5458:13: warning: unused variable 'loops' [-Wunused-variable]
5458 | int loops = 0;
| ^~~~~
vim +/loops +5458 net/core/dev.c
5448
5449 static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
5450 struct packet_type **ppt_prev)
5451 {
5452 struct packet_type *ptype, *pt_prev;
5453 rx_handler_func_t *rx_handler;
5454 struct sk_buff *skb = *pskb;
5455 struct net_device *orig_dev;
5456 bool deliver_exact = false;
5457 int ret = NET_RX_DROP;
> 5458 int loops = 0;
5459 __be16 type;
5460
5461 net_timestamp_check(!READ_ONCE(net_hotdata.tstamp_prequeue), skb);
5462
5463 trace_netif_receive_skb(skb);
5464
5465 orig_dev = skb->dev;
5466
5467 skb_reset_network_header(skb);
5468 if (!skb_transport_header_was_set(skb))
5469 skb_reset_transport_header(skb);
5470 skb_reset_mac_len(skb);
5471
5472 pt_prev = NULL;
5473
5474 another_round:
5475 skb->skb_iif = skb->dev->ifindex;
5476
5477 __this_cpu_inc(softnet_data.processed);
5478
5479 if (static_branch_unlikely(&generic_xdp_needed_key)) {
5480 int ret2;
5481
5482 migrate_disable();
5483 ret2 = do_xdp_generic(rcu_dereference(skb->dev->xdp_prog),
5484 &skb);
5485 migrate_enable();
5486
5487 if (ret2 != XDP_PASS) {
5488 ret = NET_RX_DROP;
5489 goto out;
5490 }
5491 }
5492
5493 if (eth_type_vlan(skb->protocol)) {
5494 skb = skb_vlan_untag(skb);
5495 if (unlikely(!skb))
5496 goto out;
5497 }
5498
5499 if (skb_skip_tc_classify(skb))
5500 goto skip_classify;
5501
5502 if (pfmemalloc)
5503 goto skip_taps;
5504
5505 list_for_each_entry_rcu(ptype, &net_hotdata.ptype_all, list) {
5506 if (pt_prev)
5507 ret = deliver_skb(skb, pt_prev, orig_dev);
5508 pt_prev = ptype;
5509 }
5510
5511 list_for_each_entry_rcu(ptype, &skb->dev->ptype_all, list) {
5512 if (pt_prev)
5513 ret = deliver_skb(skb, pt_prev, orig_dev);
5514 pt_prev = ptype;
5515 }
5516
5517 skip_taps:
5518 #ifdef CONFIG_NET_INGRESS
5519 if (static_branch_unlikely(&ingress_needed_key)) {
5520 bool another = false;
5521
5522 nf_skip_egress(skb, true);
5523 skb = sch_handle_ingress(skb, &pt_prev, &ret, orig_dev,
5524 &another);
5525 if (another) {
5526 loops++;
5527 if (unlikely(loops == RX_LOOP_LIMIT)) {
5528 ret = NET_RX_DROP;
5529 net_crit_ratelimited("bpf: loop limit reached on datapath, buggy bpf program?\n");
5530 goto out;
5531 }
5532
5533 goto another_round;
5534 }
5535 if (!skb)
5536 goto out;
5537
5538 nf_skip_egress(skb, false);
5539 if (nf_ingress(skb, &pt_prev, &ret, orig_dev) < 0)
5540 goto out;
5541 }
5542 #endif
5543 skb_reset_redirect(skb);
5544 skip_classify:
5545 if (pfmemalloc && !skb_pfmemalloc_protocol(skb))
5546 goto drop;
5547
5548 if (skb_vlan_tag_present(skb)) {
5549 if (pt_prev) {
5550 ret = deliver_skb(skb, pt_prev, orig_dev);
5551 pt_prev = NULL;
5552 }
5553 if (vlan_do_receive(&skb))
5554 goto another_round;
5555 else if (unlikely(!skb))
5556 goto out;
5557 }
5558
5559 rx_handler = rcu_dereference(skb->dev->rx_handler);
5560 if (rx_handler) {
5561 if (pt_prev) {
5562 ret = deliver_skb(skb, pt_prev, orig_dev);
5563 pt_prev = NULL;
5564 }
5565 switch (rx_handler(&skb)) {
5566 case RX_HANDLER_CONSUMED:
5567 ret = NET_RX_SUCCESS;
5568 goto out;
5569 case RX_HANDLER_ANOTHER:
5570 goto another_round;
5571 case RX_HANDLER_EXACT:
5572 deliver_exact = true;
5573 break;
5574 case RX_HANDLER_PASS:
5575 break;
5576 default:
5577 BUG();
5578 }
5579 }
5580
5581 if (unlikely(skb_vlan_tag_present(skb)) && !netdev_uses_dsa(skb->dev)) {
5582 check_vlan_id:
5583 if (skb_vlan_tag_get_id(skb)) {
5584 /* Vlan id is non 0 and vlan_do_receive() above couldn't
5585 * find vlan device.
5586 */
5587 skb->pkt_type = PACKET_OTHERHOST;
5588 } else if (eth_type_vlan(skb->protocol)) {
5589 /* Outer header is 802.1P with vlan 0, inner header is
5590 * 802.1Q or 802.1AD and vlan_do_receive() above could
5591 * not find vlan dev for vlan id 0.
5592 */
5593 __vlan_hwaccel_clear_tag(skb);
5594 skb = skb_vlan_untag(skb);
5595 if (unlikely(!skb))
5596 goto out;
5597 if (vlan_do_receive(&skb))
5598 /* After stripping off 802.1P header with vlan 0
5599 * vlan dev is found for inner header.
5600 */
5601 goto another_round;
5602 else if (unlikely(!skb))
5603 goto out;
5604 else
5605 /* We have stripped outer 802.1P vlan 0 header.
5606 * But could not find vlan dev.
5607 * check again for vlan id to set OTHERHOST.
5608 */
5609 goto check_vlan_id;
5610 }
5611 /* Note: we might in the future use prio bits
5612 * and set skb->priority like in vlan_do_receive()
5613 * For the time being, just ignore Priority Code Point
5614 */
5615 __vlan_hwaccel_clear_tag(skb);
5616 }
5617
5618 type = skb->protocol;
5619
5620 /* deliver only exact match when indicated */
5621 if (likely(!deliver_exact)) {
5622 deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
5623 &ptype_base[ntohs(type) &
5624 PTYPE_HASH_MASK]);
5625 }
5626
5627 deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
5628 &orig_dev->ptype_specific);
5629
5630 if (unlikely(skb->dev != orig_dev)) {
5631 deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
5632 &skb->dev->ptype_specific);
5633 }
5634
5635 if (pt_prev) {
5636 if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC)))
5637 goto drop;
5638 *ppt_prev = pt_prev;
5639 } else {
5640 drop:
5641 if (!deliver_exact)
5642 dev_core_stats_rx_dropped_inc(skb->dev);
5643 else
5644 dev_core_stats_rx_nohandler_inc(skb->dev);
5645 kfree_skb_reason(skb, SKB_DROP_REASON_UNHANDLED_PROTO);
5646 /* Jamal, now you will not able to escape explaining
5647 * me how you were going to use this. :-)
5648 */
5649 ret = NET_RX_DROP;
5650 }
5651
5652 out:
5653 /* The invariant here is that if *ppt_prev is not NULL
5654 * then skb should also be non-NULL.
5655 *
5656 * Apparently *ppt_prev assignment above holds this invariant due to
5657 * skb dereferencing near it.
5658 */
5659 *pskb = skb;
5660 return ret;
5661 }
5662
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH v1] bpf: Prevent infinite loops with bpf_redirect_peer
2024-09-29 17:02 [PATCH v1] bpf: Prevent infinite loops with bpf_redirect_peer Jordan Rife
2024-09-30 5:15 ` kernel test robot
@ 2024-09-30 7:36 ` Daniel Borkmann
2024-09-30 14:09 ` Jordan Rife
1 sibling, 1 reply; 4+ messages in thread
From: Daniel Borkmann @ 2024-09-30 7:36 UTC (permalink / raw)
To: Jordan Rife, bpf
Cc: netdev, Alexei Starovoitov, Andrii Nakryiko, Martin KaFai Lau,
Kui-Feng Lee, Eric Dumazet, Jakub Kicinski, David S. Miller,
Paolo Abeni, stable, John Fastabend
On 9/29/24 7:02 PM, Jordan Rife wrote:
> It is possible to create cycles using bpf_redirect_peer which lead to an
> an infinite loop inside __netif_receive_skb_core. The simplest way to
> illustrate this is by attaching a TC program to the ingress hook on both
> sides of a veth or netkit device pair which redirects to its own peer,
> although other cycles are possible. This patch places an upper limit on
> the number of iterations allowed inside __netif_receive_skb_core to
> prevent this.
>
> Signed-off-by: Jordan Rife <jrife@google.com>
> Fixes: 9aa1206e8f48 ("bpf: Add redirect_peer helper")
> Cc: stable@vger.kernel.org
> ---
> net/core/dev.c | 11 +++-
> net/core/dev.h | 1 +
> .../selftests/bpf/prog_tests/tc_redirect.c | 51 +++++++++++++++++++
> .../selftests/bpf/progs/test_tc_peer.c | 13 +++++
> 4 files changed, 75 insertions(+), 1 deletion(-)
>
> diff --git a/net/core/dev.c b/net/core/dev.c
> index cd479f5f22f6..753f8d27f47c 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -5455,6 +5455,7 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
> struct net_device *orig_dev;
> bool deliver_exact = false;
> int ret = NET_RX_DROP;
> + int loops = 0;
> __be16 type;
>
> net_timestamp_check(!READ_ONCE(net_hotdata.tstamp_prequeue), skb);
> @@ -5521,8 +5522,16 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
> nf_skip_egress(skb, true);
> skb = sch_handle_ingress(skb, &pt_prev, &ret, orig_dev,
> &another);
> - if (another)
> + if (another) {
> + loops++;
No, as you mentioned, there are plenty of other misconfiguration
possibilities in and
outside bpf where something can loop in the stack (or where you can lock
yourself
out e.g. drop-all).
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH v1] bpf: Prevent infinite loops with bpf_redirect_peer
2024-09-30 7:36 ` Daniel Borkmann
@ 2024-09-30 14:09 ` Jordan Rife
0 siblings, 0 replies; 4+ messages in thread
From: Jordan Rife @ 2024-09-30 14:09 UTC (permalink / raw)
To: Daniel Borkmann
Cc: bpf, netdev, Alexei Starovoitov, Andrii Nakryiko,
Martin KaFai Lau, Kui-Feng Lee, Eric Dumazet, Jakub Kicinski,
David S. Miller, Paolo Abeni, stable, John Fastabend
> No, as you mentioned, there are plenty of other misconfiguration
> possibilities in and
> outside bpf where something can loop in the stack (or where you can lock
> yourself
> out e.g. drop-all).
I wasn't sure if it should be possible to lock up the kernel with such
a combination of BPF programs. If this is the view generally, then
fair enough I suppose. Maybe this is my ignorance showing, but my
understanding is that with BPF generally we go to great lengths to
make sure things don't block (e.g. making sure a BPF program
terminates eventually) to avoid locking up the kernel. By extension,
should it not also be impossible to block indefinitely in
__netif_receive_skb_core with a combination of two BPF programs that
create a cycle with bpf_redirect_peer? It seems like there are other
provisions in place to avoid misconfiguration or buggy combinations of
programs from breaking things too badly such as the
dev_xmit_recursion() check in __bpf_tx_skb().
> if (dev_xmit_recursion()) {
> net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n");
> kfree_skb(skb);
> return -ENETDOWN;
> }
-Jordan
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2024-09-30 14:09 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-09-29 17:02 [PATCH v1] bpf: Prevent infinite loops with bpf_redirect_peer Jordan Rife
2024-09-30 5:15 ` kernel test robot
2024-09-30 7:36 ` Daniel Borkmann
2024-09-30 14:09 ` Jordan Rife
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).