public inbox for linux-rdma@vger.kernel.org
 help / color / mirror / Atom feed
* [rdma] "rdma link del" operation hangs at wait_for_completion() when a file descriptor is in use.
@ 2025-12-04  8:26 Tetsuo Handa
  2026-02-28  6:07 ` Tetsuo Handa
  0 siblings, 1 reply; 6+ messages in thread
From: Tetsuo Handa @ 2025-12-04  8:26 UTC (permalink / raw)
  To: OFED mailing list

[-- Attachment #1: Type: text/plain, Size: 6352 bytes --]

I found that running the attached example program causes khungtaskd message. What is wrong?



INFO: task rdma:1387 blocked for more than 122 seconds.
      Not tainted 6.18.0 #231
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
task:rdma            state:D stack:0     pid:1387  tgid:1387  ppid:1347   task_flags:0x400100 flags:0x00080001
Call Trace:
 <TASK>
 __schedule+0x369/0x8a0
 schedule+0x3a/0xe0
 schedule_timeout+0xca/0x110
 wait_for_completion+0x8a/0x140
 ib_uverbs_remove_one+0x1b0/0x210 [ib_uverbs]
 remove_client_context+0x8d/0xd0 [ib_core]
 disable_device+0x8b/0x170 [ib_core]
 __ib_unregister_device+0x110/0x180 [ib_core]
 ib_unregister_device_and_put+0x37/0x50 [ib_core]
 nldev_dellink+0xa4/0x100 [ib_core]
 rdma_nl_rcv_msg+0x12f/0x2f0 [ib_core]
 ? __lock_acquire+0x55d/0xbf0
 rdma_nl_rcv_skb.constprop.0.isra.0+0xb2/0x100 [ib_core]
 netlink_unicast+0x203/0x2e0
 netlink_sendmsg+0x1f8/0x420
 __sys_sendto+0x1e1/0x1f0
 __x64_sys_sendto+0x24/0x30
 do_syscall_64+0x94/0x320
 ? _copy_to_user+0x22/0x70
 ? move_addr_to_user+0xd6/0x120
 ? __sys_getsockname+0x9a/0xf0
 ? do_syscall_64+0x137/0x320
 ? do_sock_setsockopt+0x85/0x160
 ? do_sock_setsockopt+0x85/0x160
 ? __sys_setsockopt+0x7b/0xc0
 ? do_syscall_64+0x137/0x320
 ? do_syscall_64+0x137/0x320
 ? do_syscall_64+0x137/0x320
 ? lockdep_hardirqs_on_prepare.part.0+0x9b/0x150
 entry_SYSCALL_64_after_hwframe+0x76/0x7e
RIP: 0033:0x7f877077b77e
RSP: 002b:00007ffda335da70 EFLAGS: 00000202 ORIG_RAX: 000000000000002c
RAX: ffffffffffffffda RBX: 000056038051c3c0 RCX: 00007f877077b77e
RDX: 0000000000000018 RSI: 000056038051b2a0 RDI: 0000000000000004
RBP: 00007ffda335da80 R08: 00007f877090f9a0 R09: 000000000000000c
R10: 0000000000000000 R11: 0000000000000202 R12: 00007ffda335dce0
R13: 00007ffda335dd38 R14: 00007ffda335dce0 R15: 0000000069314344
 </TASK>

Showing all locks held in the system:
4 locks held by kworker/u512:0/11:
 #0: ffff8c82003d3148 ((wq_completion)netns){+.+.}-{0:0}, at: process_one_work+0x509/0x590
 #1: ffffcea98008fe38 (net_cleanup_work){+.+.}-{0:0}, at: process_one_work+0x1e2/0x590
 #2: ffffffff9807a310 (pernet_ops_rwsem){++++}-{4:4}, at: cleanup_net+0x51/0x390
 #3: ffff8c8224164700 (&device->unregistration_lock){+.+.}-{4:4}, at: rdma_dev_change_netns+0x28/0x120 [ib_core]
1 lock held by khungtaskd/99:
 #0: ffffffff97d9f4e0 (rcu_read_lock){....}-{1:3}, at: rcu_lock_acquire.constprop.0+0x7/0x30
2 locks held by kworker/10:1/127:
1 lock held by systemd-journal/662:
2 locks held by rdma/1387:
 #0: ffffffffc0b88c18 (&rdma_nl_types[idx].sem){.+.+}-{4:4}, at: rdma_nl_rcv_msg+0x9e/0x2f0 [ib_core]
 #1: ffff8c8224164700 (&device->unregistration_lock){+.+.}-{4:4}, at: __ib_unregister_device+0xe4/0x180 [ib_core]

=============================================

INFO: task kworker/u512:0:11 blocked for more than 122 seconds.
      Not tainted 6.18.0 #231
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
task:kworker/u512:0  state:D stack:0     pid:11    tgid:11    ppid:2      task_flags:0x4208060 flags:0x00080000
Workqueue: netns cleanup_net
Call Trace:
 <TASK>
 __schedule+0x369/0x8a0
 schedule+0x3a/0xe0
 schedule_preempt_disabled+0x15/0x30
 __mutex_lock+0x568/0x1170
 ? rdma_dev_change_netns+0x28/0x120 [ib_core]
 ? rdma_dev_change_netns+0x28/0x120 [ib_core]
 rdma_dev_change_netns+0x28/0x120 [ib_core]
 rdma_dev_exit_net+0x1a4/0x320 [ib_core]
 ops_undo_list+0xea/0x3b0
 cleanup_net+0x20b/0x390
 process_one_work+0x223/0x590
 worker_thread+0x1cb/0x3a0
 ? __pfx_worker_thread+0x10/0x10
 kthread+0xff/0x240
 ? __pfx_kthread+0x10/0x10
 ret_from_fork+0x182/0x1e0
 ? __pfx_kthread+0x10/0x10
 ret_from_fork_asm+0x1a/0x30
 </TASK>
INFO: task kworker/u512:0:11 is blocked on a mutex likely owned by task rdma:1387.
INFO: task rdma:1387 blocked for more than 245 seconds.
      Not tainted 6.18.0 #231
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
task:rdma            state:D stack:0     pid:1387  tgid:1387  ppid:1347   task_flags:0x400100 flags:0x00080001
Call Trace:
 <TASK>
 __schedule+0x369/0x8a0
 schedule+0x3a/0xe0
 schedule_timeout+0xca/0x110
 wait_for_completion+0x8a/0x140
 ib_uverbs_remove_one+0x1b0/0x210 [ib_uverbs]
 remove_client_context+0x8d/0xd0 [ib_core]
 disable_device+0x8b/0x170 [ib_core]
 __ib_unregister_device+0x110/0x180 [ib_core]
 ib_unregister_device_and_put+0x37/0x50 [ib_core]
 nldev_dellink+0xa4/0x100 [ib_core]
 rdma_nl_rcv_msg+0x12f/0x2f0 [ib_core]
 ? __lock_acquire+0x55d/0xbf0
 rdma_nl_rcv_skb.constprop.0.isra.0+0xb2/0x100 [ib_core]
 netlink_unicast+0x203/0x2e0
 netlink_sendmsg+0x1f8/0x420
 __sys_sendto+0x1e1/0x1f0
 __x64_sys_sendto+0x24/0x30
 do_syscall_64+0x94/0x320
 ? _copy_to_user+0x22/0x70
 ? move_addr_to_user+0xd6/0x120
 ? __sys_getsockname+0x9a/0xf0
 ? do_syscall_64+0x137/0x320
 ? do_sock_setsockopt+0x85/0x160
 ? do_sock_setsockopt+0x85/0x160
 ? __sys_setsockopt+0x7b/0xc0
 ? do_syscall_64+0x137/0x320
 ? do_syscall_64+0x137/0x320
 ? do_syscall_64+0x137/0x320
 ? lockdep_hardirqs_on_prepare.part.0+0x9b/0x150
 entry_SYSCALL_64_after_hwframe+0x76/0x7e
RIP: 0033:0x7f877077b77e
RSP: 002b:00007ffda335da70 EFLAGS: 00000202 ORIG_RAX: 000000000000002c
RAX: ffffffffffffffda RBX: 000056038051c3c0 RCX: 00007f877077b77e
RDX: 0000000000000018 RSI: 000056038051b2a0 RDI: 0000000000000004
RBP: 00007ffda335da80 R08: 00007f877090f9a0 R09: 000000000000000c
R10: 0000000000000000 R11: 0000000000000202 R12: 00007ffda335dce0
R13: 00007ffda335dd38 R14: 00007ffda335dce0 R15: 0000000069314344
 </TASK>

Showing all locks held in the system:
4 locks held by kworker/u512:0/11:
 #0: ffff8c82003d3148 ((wq_completion)netns){+.+.}-{0:0}, at: process_one_work+0x509/0x590
 #1: ffffcea98008fe38 (net_cleanup_work){+.+.}-{0:0}, at: process_one_work+0x1e2/0x590
 #2: ffffffff9807a310 (pernet_ops_rwsem){++++}-{4:4}, at: cleanup_net+0x51/0x390
 #3: ffff8c8224164700 (&device->unregistration_lock){+.+.}-{4:4}, at: rdma_dev_change_netns+0x28/0x120 [ib_core]
1 lock held by khungtaskd/99:
 #0: ffffffff97d9f4e0 (rcu_read_lock){....}-{1:3}, at: rcu_lock_acquire.constprop.0+0x7/0x30
2 locks held by rdma/1387:
 #0: ffffffffc0b88c18 (&rdma_nl_types[idx].sem){.+.+}-{4:4}, at: rdma_nl_rcv_msg+0x9e/0x2f0 [ib_core]
 #1: ffff8c8224164700 (&device->unregistration_lock){+.+.}-{4:4}, at: __ib_unregister_device+0xe4/0x180 [ib_core]

=============================================

[-- Attachment #2: rdma_example.c --]
[-- Type: text/plain, Size: 5958 bytes --]

// gcc -Wall -O2 rdma_example.c -lrdmacm -libverbs
#define _GNU_SOURCE
#include <arpa/inet.h>
#include <string.h>
#include <stdio.h>
#include <errno.h>
#include <getopt.h>
#include <stdlib.h>
#include <unistd.h>
#include <rdma/rdma_cma.h>

int main(int argc, char *argv[])
{
	const char *remote_addr = "10.0.0.1";
	const int port = 21234;
	struct rdma_event_channel *evch1, *evch2;
	struct rdma_cm_id *server_id;
	struct rdma_cm_id *client_id;
	struct rdma_cm_event *event = NULL;
	struct rdma_conn_param conn_param;
	struct ibv_pd *pd1, *pd2;
	struct ibv_cq *cq1, *cq2;
	struct ibv_mr *mr1, *mr2;
	struct ibv_send_wr snd_wr;
	struct ibv_recv_wr rcv_wr;
	struct ibv_send_wr *bad_wr = NULL;
	struct ibv_sge sge;
	struct ibv_wc wc;
	struct ibv_qp_init_attr attr = {
		.cap = {
			.max_send_wr = 32,
			.max_recv_wr = 32,
			.max_send_sge = 1,
			.max_recv_sge = 1,
			.max_inline_data = 64
		},
		.qp_type = IBV_QPT_RC
	};
	char msg[256] = "Hello World";
	const int msg_len = strlen(msg) + 1;
	struct sockaddr_in sin;

	if (unshare(CLONE_NEWNET)) {
		perror("unshare");
		exit(-1);
	}
	system("ip link set lo up");
	system("rdma link add siw0 type siw netdev lo");
	system("rdma link list");
	system("ip link add veth1 type veth peer name veth2");
	system("ip addr add 10.0.0.1/24 dev veth1");
	system("ip link set veth1 up");
	system("ip addr add 10.0.0.2/24 dev veth2");
	system("ip link set veth2 up");
	system("ping -c 1 10.0.0.1");
	system("ping -c 1 10.0.0.2");
	system("rdma link show");
	system("rdma link add siw_dev1 type siw netdev veth1");
	system("rdma link add siw_dev2 type siw netdev veth2");
		
	if (!(evch1 = rdma_create_event_channel())) {
		perror("server: rdma_create_event_channel");
		exit(-1);
	}
	if (rdma_create_id(evch1, &server_id, NULL, RDMA_PS_TCP)) {
		perror("server: rdma_create_id");
		exit(-1);
	}
	sin.sin_family = AF_INET;
	sin.sin_port = htons(port);
	sin.sin_addr.s_addr = htonl(INADDR_ANY);
	if (rdma_bind_addr(server_id, (struct sockaddr *)&sin)) {
		perror("server: rdma_bind_addr");
		exit(-1);
	}
	if (rdma_listen(server_id, 6)) {
		perror("server: rdma_listen");
		exit(-1);
	}
	if (!(evch2 = rdma_create_event_channel())) {
		perror("client: rdma_create_event_channel");
		exit(-1);
	}
	if (rdma_create_id(evch2, &client_id, NULL, RDMA_PS_TCP)) {
		perror("client: rdma_create_id");
		exit(-1);
	}
	sin.sin_family = AF_INET;
	sin.sin_port = htons(port);
	sin.sin_addr.s_addr = inet_addr(remote_addr);
	if (rdma_resolve_addr
	    (client_id, NULL, (struct sockaddr *)&sin, 2000)) {
		perror("client: rdma_resolve_addr");
		exit(-1);
	}
	if (rdma_get_cm_event(evch2, &event)
	    || event->event != RDMA_CM_EVENT_ADDR_RESOLVED) {
		perror("client: rdma_get_cm_event");
		exit(-1);
	}
	rdma_ack_cm_event(event);
	if (rdma_resolve_route(client_id, 2000)) {
		perror("client: rdma_resolve_route");
		exit(-1);
	}
	if (rdma_get_cm_event(evch2, &event)
	    || event->event != RDMA_CM_EVENT_ROUTE_RESOLVED) {
		perror("client: rdma_get_cm_event");
		exit(-1);
	}
	rdma_ack_cm_event(event);
	if (!(pd2 = ibv_alloc_pd(client_id->verbs))) {
		perror("client: ibv_alloc_pd");
		exit(-1);
	}
	if (!(mr2 = ibv_reg_mr(pd2, msg, 256,
			       IBV_ACCESS_REMOTE_WRITE |
			       IBV_ACCESS_LOCAL_WRITE |
			       IBV_ACCESS_REMOTE_READ))) {
		perror("client: ibv_reg_mr");
		exit(-1);
	}		
	if (!(cq2 = ibv_create_cq(client_id->verbs, 32, 0, 0, 0))) {
		perror("client: ibv_create_cq");
		exit(-1);
	}
	attr.send_cq = attr.recv_cq = cq2;
	if (rdma_create_qp(client_id, pd2, &attr)) {
		perror("client: rdma_create_qp");
		exit(-1);
	}
	sge.addr = (uint64_t)msg;
	sge.length = msg_len;
	sge.lkey = mr2->lkey;
	rcv_wr.sg_list = &sge;
	rcv_wr.num_sge = 1;
	rcv_wr.next = NULL;
	if (ibv_post_recv(client_id->qp, &rcv_wr, NULL)) {
		perror("client: ibv_post_recv");
		exit(-1);
	}
	memset(&conn_param, 0, sizeof conn_param);
	if (rdma_connect(client_id, &conn_param)) {
		perror("client: rdma_connect");
		exit(-1);
	}
	if (rdma_get_cm_event(evch1, &event)
	    || event->event != RDMA_CM_EVENT_CONNECT_REQUEST) {
		perror("server: rdma_get_cm_event");
		exit(-1);
	}
	client_id = (struct rdma_cm_id *)event->id;
	if (!(pd1 = ibv_alloc_pd(client_id->verbs))) {
		perror("server: ibv_alloc_pd");
		exit(-1);
	}
	if (!(mr1 = ibv_reg_mr(pd1, msg, 256,
			       IBV_ACCESS_REMOTE_WRITE |
			       IBV_ACCESS_LOCAL_WRITE |
			       IBV_ACCESS_REMOTE_READ))) {
		perror("server: ibv_reg_mr");
		exit(-1);
	}
	if (!(cq1 = ibv_create_cq(client_id->verbs, 32, 0, 0, 0))) {
		perror("server: ibv_create_cq");
		exit(-1);
	}
	attr.send_cq = attr.recv_cq = cq1;
	if (rdma_create_qp(client_id, pd1, &attr)) {
		perror("server: rdma_create_qp");
		exit(-1);
	}
	memset(&conn_param, 0, sizeof conn_param);
	if (rdma_accept(client_id, &conn_param)) {
		perror("server: rdma_accept");
		exit(-1);
	}
	rdma_ack_cm_event(event);
	if (rdma_get_cm_event(evch1, &event)
	    || event->event != RDMA_CM_EVENT_ESTABLISHED) {
		perror("server: rdma_get_cm_event");
		exit(-1);
	}
	rdma_ack_cm_event(event);
	sge.addr = (uint64_t)msg;
	sge.length = msg_len;
	sge.lkey = mr1->lkey;
	snd_wr.sg_list = &sge;
	snd_wr.num_sge = 1;
	snd_wr.opcode = IBV_WR_SEND;
	snd_wr.send_flags = IBV_SEND_SIGNALED;
	snd_wr.next = NULL;
	if (ibv_post_send(client_id->qp, &snd_wr, &bad_wr)) {
		perror("server: ibv_post_send");
		exit(-1);
	}
	while (!ibv_poll_cq(cq1, 1, &wc))
		;
	if (wc.status != IBV_WC_SUCCESS) {
		perror("server: ibv_poll_cq");
		exit(-1);
	}
	if (rdma_get_cm_event(evch2, &event)
	    || event->event != RDMA_CM_EVENT_ESTABLISHED) {
		perror("client: rdma_get_cm_event");
		exit(-1);
	}
	rdma_ack_cm_event(event);
	while (!ibv_poll_cq(cq2, 1, &wc))
		;
	if (wc.status != IBV_WC_SUCCESS) {
		perror("client: ibv_poll_cq");
		exit(-1);
	}
	printf("Received: %s\n", msg);
	fflush(stdout);
	system("rdma link del siw_dev1");
	system("rdma link del siw_dev2");
	system("ip link del veth1 type veth peer name veth2");
	return system("rdma link del siw0");
}

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2026-03-01 17:47 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-12-04  8:26 [rdma] "rdma link del" operation hangs at wait_for_completion() when a file descriptor is in use Tetsuo Handa
2026-02-28  6:07 ` Tetsuo Handa
2026-02-28 16:43   ` Jason Gunthorpe
2026-02-28 22:35     ` Tetsuo Handa
2026-03-01  7:43       ` Tetsuo Handa
2026-03-01 17:47         ` Jason Gunthorpe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox