* [BUG] KFENCE: use-after-free read in udp_tunnel_nic_device_sync_work
From: Yue Sun @ 2026-06-24 9:01 UTC (permalink / raw)
To: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni
Cc: netdev, linux-kernel, syzkaller
Hello,
I hit a reproducible use-after-free in the UDP tunnel NIC offload work item.
The original local crash was reported by KFENCE as:
KFENCE: use-after-free read in udp_tunnel_nic_device_sync_work
On current mainline, the C reproducer below triggers the same lifetime bug,
reported by KASAN before KFENCE samples the object:
BUG: KASAN: slab-use-after-free in __mutex_lock
Workqueue: udp_tunnel_nic udp_tunnel_nic_device_sync_work
Tested kernel:
840ef6c78e6a ("Merge tag 'nfs-for-7.2-1' of git://git.linux-nfs.org/projects/anna/linux-nfs")
Linux 7.1.0-11240-g840ef6c78e6a #31 SMP PREEMPT_DYNAMIC
Related syzbot reports:
https://syzkaller.appspot.com/bug?id=8d8fd27ae4339074afe3f456be73b2030d795256
The dashboard currently marks it as invalid, but the same issue is still
reproducible for me on current mainline with the C reproducer pasted below.
Crash stack from the latest C reproducer run:
[ 131.757233][ T12] BUG: KASAN: slab-use-after-free in __mutex_lock+0x16d0/0x1d80
[ 131.759738][ T12] Read of size 8 at addr ff11000065dcd2a8 by task kworker/u16:0/12
[ 131.762204][ T12]
[ 131.762956][ T12] CPU: 2 UID: 0 PID: 12 Comm: kworker/u16:0 Not tainted 7.1.0-11240-g840ef6c78e6a #31 PREEMPT(full)
[ 131.762997][ T12] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014
[ 131.763020][ T12] Workqueue: udp_tunnel_nic udp_tunnel_nic_device_sync_work
[ 131.763064][ T12] Call Trace:
[ 131.763076][ T12] <TASK>
[ 131.763089][ T12] dump_stack_lvl+0x116/0x1b0
[ 131.763136][ T12] print_report+0xf1/0x5c0
[ 131.763178][ T12] ? __virt_addr_valid+0x238/0x420
[ 131.763214][ T12] ? __mutex_lock+0x16d0/0x1d80
[ 131.763264][ T12] kasan_report+0xca/0x100
[ 131.763301][ T12] ? __mutex_lock+0x16d0/0x1d80
[ 131.763345][ T12] __mutex_lock+0x16d0/0x1d80
[ 131.763383][ T12] ? udp_tunnel_nic_device_sync_work+0x32/0x9c0
[ 131.763438][ T12] ? __pfx___mutex_lock+0x10/0x10
[ 131.763487][ T12] ? debug_object_deactivate+0x213/0x390
[ 131.763551][ T12] ? udp_tunnel_nic_device_sync_work+0x32/0x9c0
[ 131.763587][ T12] udp_tunnel_nic_device_sync_work+0x32/0x9c0
[ 131.763633][ T12] process_one_work+0x9de/0x1bf0
[ 131.763670][ T12] ? __pfx_udp_tunnel_nic_device_sync_work+0x10/0x10
[ 131.763710][ T12] ? __pfx_process_one_work+0x10/0x10
[ 131.763747][ T12] ? __pfx_udp_tunnel_nic_device_sync_work+0x10/0x10
[ 131.763787][ T12] worker_thread+0x693/0xeb0
[ 131.763823][ T12] ? __pfx_worker_thread+0x10/0x10
[ 131.763855][ T12] kthread+0x38d/0x4a0
[ 131.763903][ T12] ? __pfx_kthread+0x10/0x10
[ 131.763953][ T12] ret_from_fork+0xb09/0xdb0
[ 131.763997][ T12] ? __pfx_ret_from_fork+0x10/0x10
[ 131.764037][ T12] ? __pfx_kthread+0x10/0x10
[ 131.764082][ T12] ? kthread_affine_node+0x210/0x230
[ 131.764131][ T12] ? __switch_to+0x7a7/0x10e0
[ 131.764174][ T12] ? __pfx_kthread+0x10/0x10
[ 131.764221][ T12] ret_from_fork_asm+0x1a/0x30
[ 131.764269][ T12] </TASK>
[ 131.764280][ T12]
[ 131.810468][ T12] Allocated by task 10356:
[ 131.811305][ T12] kasan_save_stack+0x24/0x50
[ 131.812187][ T12] kasan_save_track+0x14/0x30
[ 131.813069][ T12] __kasan_kmalloc+0xaa/0xb0
[ 131.813959][ T12] __kmalloc_noprof+0x345/0x7f0
[ 131.814882][ T12] udp_tunnel_nic_netdevice_event+0x1285/0x1de0
[ 131.816061][ T12] notifier_call_chain+0xbd/0x430
[ 131.817014][ T12] call_netdevice_notifiers_info+0xbe/0x110
[ 131.819862][ T12] register_netdevice+0x197c/0x2440
[ 131.820868][ T12] nsim_create+0xcd5/0x14b0
[ 131.821733][ T12] __nsim_dev_port_add+0x3c2/0x900
[ 131.822701][ T12] nsim_dev_reload_up+0x42c/0x6a0
[ 131.823584][ T12] devlink_reload+0x2e3/0x7b0
[ 131.824421][ T12] devlink_nl_reload_doit+0x541/0x1160
[ 131.825385][ T12] genl_family_rcv_msg_doit+0x1ff/0x2f0
[ 131.826358][ T12] genl_rcv_msg+0x532/0x7e0
[ 131.827159][ T12] netlink_rcv_skb+0x147/0x430
[ 131.828017][ T12] genl_rcv+0x28/0x40
[ 131.828724][ T12] netlink_unicast+0x58d/0x850
[ 131.829582][ T12] netlink_sendmsg+0x88d/0xd90
[ 131.830439][ T12] ____sys_sendmsg+0xa27/0xb90
[ 131.831286][ T12] ___sys_sendmsg+0x11c/0x1b0
[ 131.832106][ T12] __sys_sendmsg+0x142/0x1f0
[ 131.832927][ T12] do_syscall_64+0x11f/0x860
[ 131.833745][ T12] entry_SYSCALL_64_after_hwframe+0x77/0x7f
[ 131.834791][ T12]
[ 131.835206][ T12] Freed by task 10421:
[ 131.835916][ T12] kasan_save_stack+0x24/0x50
[ 131.836742][ T12] kasan_save_track+0x14/0x30
[ 131.837564][ T12] kasan_save_free_info+0x3b/0x60
[ 131.838465][ T12] __kasan_slab_free+0x61/0x80
[ 131.839314][ T12] kfree+0x2ca/0x6d0
[ 131.840008][ T12] udp_tunnel_nic_netdevice_event+0xc24/0x1de0
[ 131.841080][ T12] notifier_call_chain+0xbd/0x430
[ 131.841981][ T12] call_netdevice_notifiers_info+0xbe/0x110
[ 131.843022][ T12] unregister_netdevice_many_notify+0xbab/0x2130
[ 131.844129][ T12] unregister_netdevice_queue+0x305/0x3c0
[ 131.845132][ T12] nsim_destroy+0x231/0x980
[ 131.845960][ T12] __nsim_dev_port_del+0x197/0x2c0
[ 131.846860][ T12] nsim_dev_reload_destroy+0x105/0x490
[ 131.847823][ T12] nsim_dev_reload_down+0x67/0xd0
[ 131.848707][ T12] devlink_reload+0x197/0x7b0
[ 131.849545][ T12] devlink_nl_reload_doit+0x541/0x1160
[ 131.850517][ T12] genl_family_rcv_msg_doit+0x1ff/0x2f0
[ 131.851489][ T12] genl_rcv_msg+0x532/0x7e0
[ 131.852282][ T12] netlink_rcv_skb+0x147/0x430
[ 131.853130][ T12] genl_rcv+0x28/0x40
[ 131.853836][ T12] netlink_unicast+0x58d/0x850
[ 131.854689][ T12] netlink_sendmsg+0x88d/0xd90
[ 131.855497][ T12] ____sys_sendmsg+0xa27/0xb90
[ 131.856293][ T12] ___sys_sendmsg+0x11c/0x1b0
[ 131.857067][ T12] __sys_sendmsg+0x142/0x1f0
[ 131.857826][ T12] do_syscall_64+0x11f/0x860
[ 131.858605][ T12] entry_SYSCALL_64_after_hwframe+0x77/0x7f
[ 131.859579][ T12]
[ 131.859971][ T12] Last potentially related work creation:
[ 131.860910][ T12] kasan_save_stack+0x24/0x50
[ 131.861694][ T12] kasan_record_aux_stack+0xa7/0xc0
[ 131.862563][ T12] insert_work+0x36/0x230
[ 131.863276][ T12] __queue_work+0x474/0x12a0
[ 131.864038][ T12] queue_work_on+0x11c/0x140
[ 131.864800][ T12] __udp_tunnel_nic_del_port+0x2a5/0x350
[ 131.865733][ T12] udp_tunnel_notify_del_rx_port+0x228/0x410
[ 131.866698][ T12] __geneve_sock_release.part.0+0x13b/0x1d0
[ 131.867583][ T12] geneve_sock_release+0x165/0x2c0
[ 131.868346][ T12] geneve_stop+0x19c/0x200
[ 131.869001][ T12] __dev_close_many+0x350/0x720
[ 131.869733][ T12] __dev_change_flags+0x301/0x860
[ 131.870474][ T12] netif_change_flags+0x8e/0x170
[ 131.871208][ T12] do_setlink.constprop.0+0xac0/0x3f80
[ 131.872011][ T12] rtnl_newlink+0x17e7/0x1f30
[ 131.872711][ T12] rtnetlink_rcv_msg+0x9e8/0xfa0
[ 131.873455][ T12] netlink_rcv_skb+0x147/0x430
[ 131.874207][ T12] netlink_unicast+0x58d/0x850
[ 131.874959][ T12] netlink_sendmsg+0x88d/0xd90
[ 131.875685][ T12] ____sys_sendmsg+0xa27/0xb90
[ 131.876406][ T12] ___sys_sendmsg+0x11c/0x1b0
[ 131.877102][ T12] __sys_sendmsg+0x142/0x1f0
[ 131.877803][ T12] do_syscall_64+0x11f/0x860
[ 131.878505][ T12] entry_SYSCALL_64_after_hwframe+0x77/0x7f
[ 131.879297][ T12]
[ 131.879611][ T12] Second to last potentially related work creation:
[ 131.880480][ T12] kasan_save_stack+0x24/0x50
[ 131.881112][ T12] kasan_record_aux_stack+0xa7/0xc0
[ 131.881818][ T12] insert_work+0x36/0x230
[ 131.882403][ T12] __queue_work+0x474/0x12a0
[ 131.883019][ T12] queue_work_on+0x11c/0x140
[ 131.883634][ T12] __udp_tunnel_nic_add_port+0x6e4/0xd90
[ 131.884385][ T12] udp_tunnel_notify_add_rx_port+0x228/0x410
[ 131.885199][ T12] geneve_sock_add+0x7a3/0xb60
[ 131.885844][ T12] geneve_open+0xde/0x1d0
[ 131.886435][ T12] __dev_open+0x3b8/0x900
[ 131.887017][ T12] __dev_change_flags+0x58f/0x860
[ 131.887692][ T12] netif_change_flags+0x8e/0x170
[ 131.888389][ T12] do_setlink.constprop.0+0xac0/0x3f80
[ 131.889130][ T12] rtnl_newlink+0x17e7/0x1f30
[ 131.889774][ T12] rtnetlink_rcv_msg+0x9e8/0xfa0
[ 131.890455][ T12] netlink_rcv_skb+0x147/0x430
[ 131.891065][ T12] netlink_unicast+0x58d/0x850
[ 131.891654][ T12] netlink_sendmsg+0x88d/0xd90
[ 131.892251][ T12] ____sys_sendmsg+0xa27/0xb90
[ 131.892837][ T12] ___sys_sendmsg+0x11c/0x1b0
[ 131.893412][ T12] __sys_sendmsg+0x142/0x1f0
[ 131.893988][ T12] do_syscall_64+0x11f/0x860
[ 131.894559][ T12] entry_SYSCALL_64_after_hwframe+0x77/0x7f
[ 131.895275][ T12]
[ 131.895564][ T12] The buggy address belongs to the object at ff11000065dcd200
[ 131.895564][ T12] which belongs to the cache kmalloc-256 of size 256
[ 131.897253][ T12] The buggy address is located 168 bytes inside of
[ 131.897253][ T12] freed 256-byte region [ff11000065dcd200, ff11000065dcd300)
[ 131.898932][ T12]
[ 131.899228][ T12] The buggy address belongs to the physical page:
[ 131.900013][ T12] page: refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x65dcc
[ 131.901089][ T12] head: order:1 mapcount:0 entire_mapcount:0 nr_pages_mapped:0 pincount:0
[ 131.902125][ T12] flags: 0xfff00000000040(head|node=0|zone=1|lastcpupid=0x7ff)
[ 131.903017][ T12] page_type: f5(slab)
[ 131.903477][ T12] raw: 00fff00000000040 ff11000100038b40 dead000000000100 dead000000000122
[ 131.904457][ T12] raw: 0000000000000000 0000000800100010 00000000f5000000 0000000000000000
[ 131.905422][ T12] head: 00fff00000000040 ff11000100038b40 dead000000000100 dead000000000122
[ 131.906405][ T12] head: 0000000000000000 0000000800100010 00000000f5000000 0000000000000000
[ 131.907388][ T12] head: 00fff00000000001 ffffffffffffff81 00000000ffffffff 00000000ffffffff
[ 131.908365][ T12] head: ffffffffffffffff 0000000000000000 00000000ffffffff 0000000000000002
[ 131.909334][ T12] page dumped because: kasan: bad access detected
[ 131.910076][ T12] page_owner tracks the page as allocated
[ 131.910720][ T12] page last allocated via order 1, migratetype Unmovable, gfp_mask 0xd20c0(__GFP_IO|__GFP_FS|__GFP_NOWARN|__GFP_NORETRY|__GFP_COMP|__GFP_NOMEMALLOC), pid 9905, tgid 9901 (repro), ts 81743543441, free_ts 81726980842
[ 131.913040][ T12] post_alloc_hook+0xff/0x130
[ 131.913579][ T12] get_page_from_freelist+0xe82/0x2bf0
[ 131.914216][ T12] __alloc_frozen_pages_noprof+0x27b/0x2a00
[ 131.914849][ T12] new_slab+0xad/0x610
[ 131.915190][ T12] refill_objects+0x10e/0x3d0
[ 131.915587][ T12] __pcs_replace_empty_main+0x352/0x670
[ 131.916050][ T12] __kmalloc_noprof+0x65e/0x7f0
[ 131.916458][ T12] ops_init+0x77/0x5f0
[ 131.916799][ T12] setup_net+0x11a/0x3a0
[ 131.917155][ T12] copy_net_ns+0x351/0x7c0
[ 131.917530][ T12] create_new_namespaces+0x3f6/0xac0
[ 131.917984][ T12] copy_namespaces+0x45c/0x580
[ 131.918389][ T12] copy_process+0x3687/0x7d40
[ 131.918788][ T12] kernel_clone+0x1f6/0x910
[ 131.919183][ T12] __do_sys_clone+0xce/0x120
[ 131.919575][ T12] do_syscall_64+0x11f/0x860
[ 131.919969][ T12] page last free pid 9905 tgid 9901 stack trace:
[ 131.920496][ T12] __free_frozen_pages+0x74d/0x1110
[ 131.920930][ T12] qlist_free_all+0x4c/0xf0
[ 131.921327][ T12] kasan_quarantine_reduce+0x195/0x1e0
[ 131.921791][ T12] __kasan_slab_alloc+0x67/0x90
[ 131.922202][ T12] kmem_cache_alloc_noprof+0x244/0x690
[ 131.922665][ T12] security_inode_alloc+0x3e/0x2d0
[ 131.923099][ T12] inode_init_always_gfp+0xc77/0xfb0
[ 131.923562][ T12] alloc_inode+0x8e/0x250
[ 131.923930][ T12] new_inode+0x22/0x1d0
[ 131.924284][ T12] __debugfs_create_file+0x10b/0x540
[ 131.924737][ T12] debugfs_create_file_full+0x41/0x60
[ 131.925189][ T12] ref_tracker_dir_debugfs+0x18c/0x2e0
[ 131.925658][ T12] alloc_netdev_mqs+0x31f/0x1600
[ 131.926078][ T12] ip6_tnl_init_net+0x12f/0x4c0
[ 131.926491][ T12] ops_init+0x1e2/0x5f0
[ 131.926843][ T12] setup_net+0x11a/0x3a0
[ 131.927201][ T12]
[ 131.927399][ T12] Memory state around the buggy address:
[ 131.927873][ T12] ff11000065dcd180: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[ 131.928542][ T12] ff11000065dcd200: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[ 131.929231][ T12] >ff11000065dcd280: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[ 131.929902][ T12] ^
[ 131.930375][ T12] ff11000065dcd300: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[ 131.931050][ T12] ff11000065dcd380: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[ 131.931723][ T12] ==================================================================
[ 131.932651][ T12] Kernel panic - not syncing: KASAN: panic_on_warn set ...
[ 131.933267][ T12] CPU: 2 UID: 0 PID: 12 Comm: kworker/u16:0 Not tainted 7.1.0-11240-g840ef6c78e6a #31 PREEMPT(full)
[ 131.934181][ T12] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014
[ 131.934965][ T12] Workqueue: udp_tunnel_nic udp_tunnel_nic_device_sync_work
[ 131.935585][ T12] Call Trace:
[ 131.935865][ T12] <TASK>
The reproducer creates netdevsim/geneve devices and drives a devlink reload
while UDP tunnel offload notifications are being queued. A queued
udp_tunnel_nic_device_sync_work item later runs with its embedded work_struct
inside struct udp_tunnel_nic, but that struct has already been freed from the
NETDEV_UNREGISTER path. The first dereference in the worker is the mutex lock:
udp_tunnel_nic_device_sync_work()
rtnl_lock();
mutex_lock(&utn->lock);
KASAN reports the read from the freed object at that mutex access.
Possible root cause analysis:
The lifetime of struct udp_tunnel_nic appears to be guarded only by the
utn->work_pending flag, but that flag is not a safe lifetime barrier for the
queued work:
udp_tunnel_nic_device_sync()
queue_work(udp_tunnel_nic_workqueue, &utn->work);
utn->work_pending = 1;
udp_tunnel_nic_unregister()
udp_tunnel_nic_flush(dev, utn);
udp_tunnel_nic_unlock(dev);
if (utn->work_pending)
return;
udp_tunnel_nic_free(utn);
The work is queued before work_pending is set, so unregister can observe
work_pending == 0 and free utn while the work is already queued or starting.
Also, work_pending is only a flag in the object being protected; it is not a
reference, cancel_work_sync(), or flush_work() style guarantee that the worker
no longer owns or can dereference the containing object. Once udp_tunnel_nic_free()
runs, the embedded work item can still call container_of(work, struct
udp_tunnel_nic, work) and dereference the freed utn, which matches the
alloc/free/use stacks above.
A fix should make unregister synchronize with the embedded work or otherwise
hold a real lifetime reference while the work is queued/running. In particular,
freeing utn based solely on work_pending looks racy.
If you have any questions, please let me know.
Best regards,
Yue
Build/run:
gcc -pthread -static -O2 repro.c -o repro
./repro
C reproducer:
// autogenerated by syzkaller (https://github.com/google/syzkaller)
#define _GNU_SOURCE
#include <arpa/inet.h>
#include <dirent.h>
#include <endian.h>
#include <errno.h>
#include <fcntl.h>
#include <net/if.h>
#include <net/if_arp.h>
#include <netinet/in.h>
#include <pthread.h>
#include <sched.h>
#include <signal.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/mount.h>
#include <sys/prctl.h>
#include <sys/resource.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/uio.h>
#include <sys/wait.h>
#include <time.h>
#include <unistd.h>
#include <linux/capability.h>
#include <linux/futex.h>
#include <linux/genetlink.h>
#include <linux/if_addr.h>
#include <linux/if_ether.h>
#include <linux/if_link.h>
#include <linux/if_tun.h>
#include <linux/in6.h>
#include <linux/ip.h>
#include <linux/neighbour.h>
#include <linux/net.h>
#include <linux/netlink.h>
#include <linux/rtnetlink.h>
#include <linux/tcp.h>
#include <linux/veth.h>
static unsigned long long procid;
static void sleep_ms(uint64_t ms)
{
usleep(ms * 1000);
}
static uint64_t current_time_ms(void)
{
struct timespec ts;
if (clock_gettime(CLOCK_MONOTONIC, &ts))
exit(1);
return (uint64_t)ts.tv_sec * 1000 + (uint64_t)ts.tv_nsec / 1000000;
}
static void thread_start(void* (*fn)(void*), void* arg)
{
pthread_t th;
pthread_attr_t attr;
pthread_attr_init(&attr);
pthread_attr_setstacksize(&attr, 128 << 10);
int i = 0;
for (; i < 100; i++) {
if (pthread_create(&th, &attr, fn, arg) == 0) {
pthread_attr_destroy(&attr);
return;
}
if (errno == EAGAIN) {
usleep(50);
continue;
}
break;
}
exit(1);
}
#define BITMASK(bf_off,bf_len) (((1ull << (bf_len)) - 1) << (bf_off))
#define STORE_BY_BITMASK(type,htobe,addr,val,bf_off,bf_len) *(type*)(addr) = htobe((htobe(*(type*)(addr)) & ~BITMASK((bf_off), (bf_len))) | (((type)(val) << (bf_off)) & BITMASK((bf_off), (bf_len))))
typedef struct {
int state;
} event_t;
static void event_init(event_t* ev)
{
ev->state = 0;
}
static void event_reset(event_t* ev)
{
ev->state = 0;
}
static void event_set(event_t* ev)
{
if (ev->state)
exit(1);
__atomic_store_n(&ev->state, 1, __ATOMIC_RELEASE);
syscall(SYS_futex, &ev->state, FUTEX_WAKE | FUTEX_PRIVATE_FLAG, 1000000);
}
static void event_wait(event_t* ev)
{
while (!__atomic_load_n(&ev->state, __ATOMIC_ACQUIRE))
syscall(SYS_futex, &ev->state, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, 0, 0);
}
static int event_isset(event_t* ev)
{
return __atomic_load_n(&ev->state, __ATOMIC_ACQUIRE);
}
static int event_timedwait(event_t* ev, uint64_t timeout)
{
uint64_t start = current_time_ms();
uint64_t now = start;
for (;;) {
uint64_t remain = timeout - (now - start);
struct timespec ts;
ts.tv_sec = remain / 1000;
ts.tv_nsec = (remain % 1000) * 1000 * 1000;
syscall(SYS_futex, &ev->state, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, 0, &ts);
if (__atomic_load_n(&ev->state, __ATOMIC_ACQUIRE))
return 1;
now = current_time_ms();
if (now - start > timeout)
return 0;
}
}
static bool write_file(const char* file, const char* what, ...)
{
char buf[1024];
va_list args;
va_start(args, what);
vsnprintf(buf, sizeof(buf), what, args);
va_end(args);
buf[sizeof(buf) - 1] = 0;
int len = strlen(buf);
int fd = open(file, O_WRONLY | O_CLOEXEC);
if (fd == -1)
return false;
if (write(fd, buf, len) != len) {
int err = errno;
close(fd);
errno = err;
return false;
}
close(fd);
return true;
}
struct nlmsg {
char* pos;
int nesting;
struct nlattr* nested[8];
char buf[4096];
};
static void netlink_init(struct nlmsg* nlmsg, int typ, int flags,
const void* data, int size)
{
memset(nlmsg, 0, sizeof(*nlmsg));
struct nlmsghdr* hdr = (struct nlmsghdr*)nlmsg->buf;
hdr->nlmsg_type = typ;
hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags;
memcpy(hdr + 1, data, size);
nlmsg->pos = (char*)(hdr + 1) + NLMSG_ALIGN(size);
}
static void netlink_attr(struct nlmsg* nlmsg, int typ,
const void* data, int size)
{
struct nlattr* attr = (struct nlattr*)nlmsg->pos;
attr->nla_len = sizeof(*attr) + size;
attr->nla_type = typ;
if (size > 0)
memcpy(attr + 1, data, size);
nlmsg->pos += NLMSG_ALIGN(attr->nla_len);
}
static void netlink_nest(struct nlmsg* nlmsg, int typ)
{
struct nlattr* attr = (struct nlattr*)nlmsg->pos;
attr->nla_type = typ;
nlmsg->pos += sizeof(*attr);
nlmsg->nested[nlmsg->nesting++] = attr;
}
static void netlink_done(struct nlmsg* nlmsg)
{
struct nlattr* attr = nlmsg->nested[--nlmsg->nesting];
attr->nla_len = nlmsg->pos - (char*)attr;
}
static int netlink_send_ext(struct nlmsg* nlmsg, int sock,
uint16_t reply_type, int* reply_len, bool dofail)
{
if (nlmsg->pos > nlmsg->buf + sizeof(nlmsg->buf) || nlmsg->nesting)
exit(1);
struct nlmsghdr* hdr = (struct nlmsghdr*)nlmsg->buf;
hdr->nlmsg_len = nlmsg->pos - nlmsg->buf;
struct sockaddr_nl addr;
memset(&addr, 0, sizeof(addr));
addr.nl_family = AF_NETLINK;
ssize_t n = sendto(sock, nlmsg->buf, hdr->nlmsg_len, 0, (struct sockaddr*)&addr, sizeof(addr));
if (n != (ssize_t)hdr->nlmsg_len) {
if (dofail)
exit(1);
return -1;
}
n = recv(sock, nlmsg->buf, sizeof(nlmsg->buf), 0);
if (reply_len)
*reply_len = 0;
if (n < 0) {
if (dofail)
exit(1);
return -1;
}
if (n < (ssize_t)sizeof(struct nlmsghdr)) {
errno = EINVAL;
if (dofail)
exit(1);
return -1;
}
if (hdr->nlmsg_type == NLMSG_DONE)
return 0;
if (reply_len && hdr->nlmsg_type == reply_type) {
*reply_len = n;
return 0;
}
if (n < (ssize_t)(sizeof(struct nlmsghdr) + sizeof(struct nlmsgerr))) {
errno = EINVAL;
if (dofail)
exit(1);
return -1;
}
if (hdr->nlmsg_type != NLMSG_ERROR) {
errno = EINVAL;
if (dofail)
exit(1);
return -1;
}
errno = -((struct nlmsgerr*)(hdr + 1))->error;
return -errno;
}
static int netlink_send(struct nlmsg* nlmsg, int sock)
{
return netlink_send_ext(nlmsg, sock, 0, NULL, true);
}
static int netlink_query_family_id(struct nlmsg* nlmsg, int sock, const char* family_name, bool dofail)
{
struct genlmsghdr genlhdr;
memset(&genlhdr, 0, sizeof(genlhdr));
genlhdr.cmd = CTRL_CMD_GETFAMILY;
netlink_init(nlmsg, GENL_ID_CTRL, 0, &genlhdr, sizeof(genlhdr));
netlink_attr(nlmsg, CTRL_ATTR_FAMILY_NAME, family_name, strnlen(family_name, GENL_NAMSIZ - 1) + 1);
int n = 0;
int err = netlink_send_ext(nlmsg, sock, GENL_ID_CTRL, &n, dofail);
if (err < 0) {
return -1;
}
uint16_t id = 0;
struct nlattr* attr = (struct nlattr*)(nlmsg->buf + NLMSG_HDRLEN + NLMSG_ALIGN(sizeof(genlhdr)));
for (; (char*)attr < nlmsg->buf + n; attr = (struct nlattr*)((char*)attr + NLMSG_ALIGN(attr->nla_len))) {
if (attr->nla_type == CTRL_ATTR_FAMILY_ID) {
id = *(uint16_t*)(attr + 1);
break;
}
}
if (!id) {
errno = EINVAL;
return -1;
}
recv(sock, nlmsg->buf, sizeof(nlmsg->buf), 0);
return id;
}
static int netlink_next_msg(struct nlmsg* nlmsg, unsigned int offset,
unsigned int total_len)
{
struct nlmsghdr* hdr = (struct nlmsghdr*)(nlmsg->buf + offset);
if (offset == total_len || offset + hdr->nlmsg_len > total_len)
return -1;
return hdr->nlmsg_len;
}
static unsigned int queue_count = 2;
static void netlink_add_device_impl(struct nlmsg* nlmsg, const char* type,
const char* name, bool up)
{
struct ifinfomsg hdr;
memset(&hdr, 0, sizeof(hdr));
if (up)
hdr.ifi_flags = hdr.ifi_change = IFF_UP;
netlink_init(nlmsg, RTM_NEWLINK, NLM_F_EXCL | NLM_F_CREATE, &hdr, sizeof(hdr));
if (name)
netlink_attr(nlmsg, IFLA_IFNAME, name, strlen(name));
netlink_attr(nlmsg, IFLA_NUM_TX_QUEUES, &queue_count, sizeof(queue_count));
netlink_attr(nlmsg, IFLA_NUM_RX_QUEUES, &queue_count, sizeof(queue_count));
netlink_nest(nlmsg, IFLA_LINKINFO);
netlink_attr(nlmsg, IFLA_INFO_KIND, type, strlen(type));
}
static void netlink_add_device(struct nlmsg* nlmsg, int sock, const char* type,
const char* name)
{
netlink_add_device_impl(nlmsg, type, name, false);
netlink_done(nlmsg);
int err = netlink_send(nlmsg, sock);
if (err < 0) {
}
}
static void netlink_add_veth(struct nlmsg* nlmsg, int sock, const char* name,
const char* peer)
{
netlink_add_device_impl(nlmsg, "veth", name, false);
netlink_nest(nlmsg, IFLA_INFO_DATA);
netlink_nest(nlmsg, VETH_INFO_PEER);
nlmsg->pos += sizeof(struct ifinfomsg);
netlink_attr(nlmsg, IFLA_IFNAME, peer, strlen(peer));
netlink_attr(nlmsg, IFLA_NUM_TX_QUEUES, &queue_count, sizeof(queue_count));
netlink_attr(nlmsg, IFLA_NUM_RX_QUEUES, &queue_count, sizeof(queue_count));
netlink_done(nlmsg);
netlink_done(nlmsg);
netlink_done(nlmsg);
int err = netlink_send(nlmsg, sock);
if (err < 0) {
}
}
static void netlink_add_xfrm(struct nlmsg* nlmsg, int sock, const char* name)
{
netlink_add_device_impl(nlmsg, "xfrm", name, true);
netlink_nest(nlmsg, IFLA_INFO_DATA);
int if_id = 1;
netlink_attr(nlmsg, 2, &if_id, sizeof(if_id));
netlink_done(nlmsg);
netlink_done(nlmsg);
int err = netlink_send(nlmsg, sock);
if (err < 0) {
}
}
static void netlink_add_hsr(struct nlmsg* nlmsg, int sock, const char* name,
const char* slave1, const char* slave2)
{
netlink_add_device_impl(nlmsg, "hsr", name, false);
netlink_nest(nlmsg, IFLA_INFO_DATA);
int ifindex1 = if_nametoindex(slave1);
netlink_attr(nlmsg, IFLA_HSR_SLAVE1, &ifindex1, sizeof(ifindex1));
int ifindex2 = if_nametoindex(slave2);
netlink_attr(nlmsg, IFLA_HSR_SLAVE2, &ifindex2, sizeof(ifindex2));
netlink_done(nlmsg);
netlink_done(nlmsg);
int err = netlink_send(nlmsg, sock);
if (err < 0) {
}
}
static void netlink_add_linked(struct nlmsg* nlmsg, int sock, const char* type, const char* name, const char* link)
{
netlink_add_device_impl(nlmsg, type, name, false);
netlink_done(nlmsg);
int ifindex = if_nametoindex(link);
netlink_attr(nlmsg, IFLA_LINK, &ifindex, sizeof(ifindex));
int err = netlink_send(nlmsg, sock);
if (err < 0) {
}
}
static void netlink_add_vlan(struct nlmsg* nlmsg, int sock, const char* name, const char* link, uint16_t id, uint16_t proto)
{
netlink_add_device_impl(nlmsg, "vlan", name, false);
netlink_nest(nlmsg, IFLA_INFO_DATA);
netlink_attr(nlmsg, IFLA_VLAN_ID, &id, sizeof(id));
netlink_attr(nlmsg, IFLA_VLAN_PROTOCOL, &proto, sizeof(proto));
netlink_done(nlmsg);
netlink_done(nlmsg);
int ifindex = if_nametoindex(link);
netlink_attr(nlmsg, IFLA_LINK, &ifindex, sizeof(ifindex));
int err = netlink_send(nlmsg, sock);
if (err < 0) {
}
}
static void netlink_add_macvlan(struct nlmsg* nlmsg, int sock, const char* name, const char* link)
{
netlink_add_device_impl(nlmsg, "macvlan", name, false);
netlink_nest(nlmsg, IFLA_INFO_DATA);
uint32_t mode = MACVLAN_MODE_BRIDGE;
netlink_attr(nlmsg, IFLA_MACVLAN_MODE, &mode, sizeof(mode));
netlink_done(nlmsg);
netlink_done(nlmsg);
int ifindex = if_nametoindex(link);
netlink_attr(nlmsg, IFLA_LINK, &ifindex, sizeof(ifindex));
int err = netlink_send(nlmsg, sock);
if (err < 0) {
}
}
static void netlink_add_geneve(struct nlmsg* nlmsg, int sock, const char* name, uint32_t vni, struct in_addr* addr4, struct in6_addr* addr6)
{
netlink_add_device_impl(nlmsg, "geneve", name, false);
netlink_nest(nlmsg, IFLA_INFO_DATA);
netlink_attr(nlmsg, IFLA_GENEVE_ID, &vni, sizeof(vni));
if (addr4)
netlink_attr(nlmsg, IFLA_GENEVE_REMOTE, addr4, sizeof(*addr4));
if (addr6)
netlink_attr(nlmsg, IFLA_GENEVE_REMOTE6, addr6, sizeof(*addr6));
netlink_done(nlmsg);
netlink_done(nlmsg);
int err = netlink_send(nlmsg, sock);
if (err < 0) {
}
}
#define IFLA_IPVLAN_FLAGS 2
#define IPVLAN_MODE_L3S 2
#undef IPVLAN_F_VEPA
#define IPVLAN_F_VEPA 2
static void netlink_add_ipvlan(struct nlmsg* nlmsg, int sock, const char* name, const char* link, uint16_t mode, uint16_t flags)
{
netlink_add_device_impl(nlmsg, "ipvlan", name, false);
netlink_nest(nlmsg, IFLA_INFO_DATA);
netlink_attr(nlmsg, IFLA_IPVLAN_MODE, &mode, sizeof(mode));
netlink_attr(nlmsg, IFLA_IPVLAN_FLAGS, &flags, sizeof(flags));
netlink_done(nlmsg);
netlink_done(nlmsg);
int ifindex = if_nametoindex(link);
netlink_attr(nlmsg, IFLA_LINK, &ifindex, sizeof(ifindex));
int err = netlink_send(nlmsg, sock);
if (err < 0) {
}
}
static void netlink_device_change(struct nlmsg* nlmsg, int sock, const char* name, bool up,
const char* master, const void* mac, int macsize,
const char* new_name)
{
struct ifinfomsg hdr;
memset(&hdr, 0, sizeof(hdr));
if (up)
hdr.ifi_flags = hdr.ifi_change = IFF_UP;
hdr.ifi_index = if_nametoindex(name);
netlink_init(nlmsg, RTM_NEWLINK, 0, &hdr, sizeof(hdr));
if (new_name)
netlink_attr(nlmsg, IFLA_IFNAME, new_name, strlen(new_name));
if (master) {
int ifindex = if_nametoindex(master);
netlink_attr(nlmsg, IFLA_MASTER, &ifindex, sizeof(ifindex));
}
if (macsize)
netlink_attr(nlmsg, IFLA_ADDRESS, mac, macsize);
int err = netlink_send(nlmsg, sock);
if (err < 0) {
}
}
static int netlink_add_addr(struct nlmsg* nlmsg, int sock, const char* dev,
const void* addr, int addrsize)
{
struct ifaddrmsg hdr;
memset(&hdr, 0, sizeof(hdr));
hdr.ifa_family = addrsize == 4 ? AF_INET : AF_INET6;
hdr.ifa_prefixlen = addrsize == 4 ? 24 : 120;
hdr.ifa_scope = RT_SCOPE_UNIVERSE;
hdr.ifa_index = if_nametoindex(dev);
netlink_init(nlmsg, RTM_NEWADDR, NLM_F_CREATE | NLM_F_REPLACE, &hdr, sizeof(hdr));
netlink_attr(nlmsg, IFA_LOCAL, addr, addrsize);
netlink_attr(nlmsg, IFA_ADDRESS, addr, addrsize);
return netlink_send(nlmsg, sock);
}
static void netlink_add_addr4(struct nlmsg* nlmsg, int sock,
const char* dev, const char* addr)
{
struct in_addr in_addr;
inet_pton(AF_INET, addr, &in_addr);
int err = netlink_add_addr(nlmsg, sock, dev, &in_addr, sizeof(in_addr));
if (err < 0) {
}
}
static void netlink_add_addr6(struct nlmsg* nlmsg, int sock,
const char* dev, const char* addr)
{
struct in6_addr in6_addr;
inet_pton(AF_INET6, addr, &in6_addr);
int err = netlink_add_addr(nlmsg, sock, dev, &in6_addr, sizeof(in6_addr));
if (err < 0) {
}
}
static struct nlmsg nlmsg;
#define DEVLINK_FAMILY_NAME "devlink"
#define DEVLINK_CMD_PORT_GET 5
#define DEVLINK_ATTR_BUS_NAME 1
#define DEVLINK_ATTR_DEV_NAME 2
#define DEVLINK_ATTR_NETDEV_NAME 7
static struct nlmsg nlmsg2;
static void initialize_devlink_ports(const char* bus_name, const char* dev_name,
const char* netdev_prefix)
{
struct genlmsghdr genlhdr;
int len, total_len, id, err, offset;
uint16_t netdev_index;
int sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
if (sock == -1)
exit(1);
int rtsock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
if (rtsock == -1)
exit(1);
id = netlink_query_family_id(&nlmsg, sock, DEVLINK_FAMILY_NAME, true);
if (id == -1)
goto error;
memset(&genlhdr, 0, sizeof(genlhdr));
genlhdr.cmd = DEVLINK_CMD_PORT_GET;
netlink_init(&nlmsg, id, NLM_F_DUMP, &genlhdr, sizeof(genlhdr));
netlink_attr(&nlmsg, DEVLINK_ATTR_BUS_NAME, bus_name, strlen(bus_name) + 1);
netlink_attr(&nlmsg, DEVLINK_ATTR_DEV_NAME, dev_name, strlen(dev_name) + 1);
err = netlink_send_ext(&nlmsg, sock, id, &total_len, true);
if (err < 0) {
goto error;
}
offset = 0;
netdev_index = 0;
while ((len = netlink_next_msg(&nlmsg, offset, total_len)) != -1) {
struct nlattr* attr = (struct nlattr*)(nlmsg.buf + offset + NLMSG_HDRLEN + NLMSG_ALIGN(sizeof(genlhdr)));
for (; (char*)attr < nlmsg.buf + offset + len; attr = (struct nlattr*)((char*)attr + NLMSG_ALIGN(attr->nla_len))) {
if (attr->nla_type == DEVLINK_ATTR_NETDEV_NAME) {
char* port_name;
char netdev_name[IFNAMSIZ];
port_name = (char*)(attr + 1);
snprintf(netdev_name, sizeof(netdev_name), "%s%d", netdev_prefix, netdev_index);
netlink_device_change(&nlmsg2, rtsock, port_name, true, 0, 0, 0, netdev_name);
break;
}
}
offset += len;
netdev_index++;
}
error:
close(rtsock);
close(sock);
}
#define DEV_IPV4 "172.20.20.%d"
#define DEV_IPV6 "fe80::%02x"
#define DEV_MAC 0x00aaaaaaaaaa
static void netdevsim_add(unsigned int addr, unsigned int port_count)
{
write_file("/sys/bus/netdevsim/del_device", "%u", addr);
if (write_file("/sys/bus/netdevsim/new_device", "%u %u", addr, port_count)) {
char buf[32];
snprintf(buf, sizeof(buf), "netdevsim%d", addr);
initialize_devlink_ports("netdevsim", buf, "netdevsim");
}
}
#define WG_GENL_NAME "wireguard"
enum wg_cmd {
WG_CMD_GET_DEVICE,
WG_CMD_SET_DEVICE,
};
enum wgdevice_attribute {
WGDEVICE_A_UNSPEC,
WGDEVICE_A_IFINDEX,
WGDEVICE_A_IFNAME,
WGDEVICE_A_PRIVATE_KEY,
WGDEVICE_A_PUBLIC_KEY,
WGDEVICE_A_FLAGS,
WGDEVICE_A_LISTEN_PORT,
WGDEVICE_A_FWMARK,
WGDEVICE_A_PEERS,
};
enum wgpeer_attribute {
WGPEER_A_UNSPEC,
WGPEER_A_PUBLIC_KEY,
WGPEER_A_PRESHARED_KEY,
WGPEER_A_FLAGS,
WGPEER_A_ENDPOINT,
WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL,
WGPEER_A_LAST_HANDSHAKE_TIME,
WGPEER_A_RX_BYTES,
WGPEER_A_TX_BYTES,
WGPEER_A_ALLOWEDIPS,
WGPEER_A_PROTOCOL_VERSION,
};
enum wgallowedip_attribute {
WGALLOWEDIP_A_UNSPEC,
WGALLOWEDIP_A_FAMILY,
WGALLOWEDIP_A_IPADDR,
WGALLOWEDIP_A_CIDR_MASK,
};
static void netlink_wireguard_setup(void)
{
const char ifname_a[] = "wg0";
const char ifname_b[] = "wg1";
const char ifname_c[] = "wg2";
const char private_a[] = "\xa0\x5c\xa8\x4f\x6c\x9c\x8e\x38\x53\xe2\xfd\x7a\x70\xae\x0f\xb2\x0f\xa1\x52\x60\x0c\xb0\x08\x45\x17\x4f\x08\x07\x6f\x8d\x78\x43";
const char private_b[] = "\xb0\x80\x73\xe8\xd4\x4e\x91\xe3\xda\x92\x2c\x22\x43\x82\x44\xbb\x88\x5c\x69\xe2\x69\xc8\xe9\xd8\x35\xb1\x14\x29\x3a\x4d\xdc\x6e";
const char private_c[] = "\xa0\xcb\x87\x9a\x47\xf5\xbc\x64\x4c\x0e\x69\x3f\xa6\xd0\x31\xc7\x4a\x15\x53\xb6\xe9\x01\xb9\xff\x2f\x51\x8c\x78\x04\x2f\xb5\x42";
const char public_a[] = "\x97\x5c\x9d\x81\xc9\x83\xc8\x20\x9e\xe7\x81\x25\x4b\x89\x9f\x8e\xd9\x25\xae\x9f\x09\x23\xc2\x3c\x62\xf5\x3c\x57\xcd\xbf\x69\x1c";
const char public_b[] = "\xd1\x73\x28\x99\xf6\x11\xcd\x89\x94\x03\x4d\x7f\x41\x3d\xc9\x57\x63\x0e\x54\x93\xc2\x85\xac\xa4\x00\x65\xcb\x63\x11\xbe\x69\x6b";
const char public_c[] = "\xf4\x4d\xa3\x67\xa8\x8e\xe6\x56\x4f\x02\x02\x11\x45\x67\x27\x08\x2f\x5c\xeb\xee\x8b\x1b\xf5\xeb\x73\x37\x34\x1b\x45\x9b\x39\x22";
const uint16_t listen_a = 20001;
const uint16_t listen_b = 20002;
const uint16_t listen_c = 20003;
const uint16_t af_inet = AF_INET;
const uint16_t af_inet6 = AF_INET6;
const struct sockaddr_in endpoint_b_v4 = {
.sin_family = AF_INET,
.sin_port = htons(listen_b),
.sin_addr = {htonl(INADDR_LOOPBACK)}};
const struct sockaddr_in endpoint_c_v4 = {
.sin_family = AF_INET,
.sin_port = htons(listen_c),
.sin_addr = {htonl(INADDR_LOOPBACK)}};
struct sockaddr_in6 endpoint_a_v6 = {
.sin6_family = AF_INET6,
.sin6_port = htons(listen_a)};
endpoint_a_v6.sin6_addr = in6addr_loopback;
struct sockaddr_in6 endpoint_c_v6 = {
.sin6_family = AF_INET6,
.sin6_port = htons(listen_c)};
endpoint_c_v6.sin6_addr = in6addr_loopback;
const struct in_addr first_half_v4 = {0};
const struct in_addr second_half_v4 = {(uint32_t)htonl(128 << 24)};
const struct in6_addr first_half_v6 = {{{0}}};
const struct in6_addr second_half_v6 = {{{0x80}}};
const uint8_t half_cidr = 1;
const uint16_t persistent_keepalives[] = {1, 3, 7, 9, 14, 19};
struct genlmsghdr genlhdr = {
.cmd = WG_CMD_SET_DEVICE,
.version = 1};
int sock;
int id, err;
sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
if (sock == -1) {
return;
}
id = netlink_query_family_id(&nlmsg, sock, WG_GENL_NAME, true);
if (id == -1)
goto error;
netlink_init(&nlmsg, id, 0, &genlhdr, sizeof(genlhdr));
netlink_attr(&nlmsg, WGDEVICE_A_IFNAME, ifname_a, strlen(ifname_a) + 1);
netlink_attr(&nlmsg, WGDEVICE_A_PRIVATE_KEY, private_a, 32);
netlink_attr(&nlmsg, WGDEVICE_A_LISTEN_PORT, &listen_a, 2);
netlink_nest(&nlmsg, NLA_F_NESTED | WGDEVICE_A_PEERS);
netlink_nest(&nlmsg, NLA_F_NESTED | 0);
netlink_attr(&nlmsg, WGPEER_A_PUBLIC_KEY, public_b, 32);
netlink_attr(&nlmsg, WGPEER_A_ENDPOINT, &endpoint_b_v4, sizeof(endpoint_b_v4));
netlink_attr(&nlmsg, WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL, &persistent_keepalives[0], 2);
netlink_nest(&nlmsg, NLA_F_NESTED | WGPEER_A_ALLOWEDIPS);
netlink_nest(&nlmsg, NLA_F_NESTED | 0);
netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet, 2);
netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &first_half_v4, sizeof(first_half_v4));
netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1);
netlink_done(&nlmsg);
netlink_nest(&nlmsg, NLA_F_NESTED | 0);
netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet6, 2);
netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &first_half_v6, sizeof(first_half_v6));
netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1);
netlink_done(&nlmsg);
netlink_done(&nlmsg);
netlink_done(&nlmsg);
netlink_nest(&nlmsg, NLA_F_NESTED | 0);
netlink_attr(&nlmsg, WGPEER_A_PUBLIC_KEY, public_c, 32);
netlink_attr(&nlmsg, WGPEER_A_ENDPOINT, &endpoint_c_v6, sizeof(endpoint_c_v6));
netlink_attr(&nlmsg, WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL, &persistent_keepalives[1], 2);
netlink_nest(&nlmsg, NLA_F_NESTED | WGPEER_A_ALLOWEDIPS);
netlink_nest(&nlmsg, NLA_F_NESTED | 0);
netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet, 2);
netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &second_half_v4, sizeof(second_half_v4));
netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1);
netlink_done(&nlmsg);
netlink_nest(&nlmsg, NLA_F_NESTED | 0);
netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet6, 2);
netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &second_half_v6, sizeof(second_half_v6));
netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1);
netlink_done(&nlmsg);
netlink_done(&nlmsg);
netlink_done(&nlmsg);
netlink_done(&nlmsg);
err = netlink_send(&nlmsg, sock);
if (err < 0) {
}
netlink_init(&nlmsg, id, 0, &genlhdr, sizeof(genlhdr));
netlink_attr(&nlmsg, WGDEVICE_A_IFNAME, ifname_b, strlen(ifname_b) + 1);
netlink_attr(&nlmsg, WGDEVICE_A_PRIVATE_KEY, private_b, 32);
netlink_attr(&nlmsg, WGDEVICE_A_LISTEN_PORT, &listen_b, 2);
netlink_nest(&nlmsg, NLA_F_NESTED | WGDEVICE_A_PEERS);
netlink_nest(&nlmsg, NLA_F_NESTED | 0);
netlink_attr(&nlmsg, WGPEER_A_PUBLIC_KEY, public_a, 32);
netlink_attr(&nlmsg, WGPEER_A_ENDPOINT, &endpoint_a_v6, sizeof(endpoint_a_v6));
netlink_attr(&nlmsg, WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL, &persistent_keepalives[2], 2);
netlink_nest(&nlmsg, NLA_F_NESTED | WGPEER_A_ALLOWEDIPS);
netlink_nest(&nlmsg, NLA_F_NESTED | 0);
netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet, 2);
netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &first_half_v4, sizeof(first_half_v4));
netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1);
netlink_done(&nlmsg);
netlink_nest(&nlmsg, NLA_F_NESTED | 0);
netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet6, 2);
netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &first_half_v6, sizeof(first_half_v6));
netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1);
netlink_done(&nlmsg);
netlink_done(&nlmsg);
netlink_done(&nlmsg);
netlink_nest(&nlmsg, NLA_F_NESTED | 0);
netlink_attr(&nlmsg, WGPEER_A_PUBLIC_KEY, public_c, 32);
netlink_attr(&nlmsg, WGPEER_A_ENDPOINT, &endpoint_c_v4, sizeof(endpoint_c_v4));
netlink_attr(&nlmsg, WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL, &persistent_keepalives[3], 2);
netlink_nest(&nlmsg, NLA_F_NESTED | WGPEER_A_ALLOWEDIPS);
netlink_nest(&nlmsg, NLA_F_NESTED | 0);
netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet, 2);
netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &second_half_v4, sizeof(second_half_v4));
netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1);
netlink_done(&nlmsg);
netlink_nest(&nlmsg, NLA_F_NESTED | 0);
netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet6, 2);
netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &second_half_v6, sizeof(second_half_v6));
netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1);
netlink_done(&nlmsg);
netlink_done(&nlmsg);
netlink_done(&nlmsg);
netlink_done(&nlmsg);
err = netlink_send(&nlmsg, sock);
if (err < 0) {
}
netlink_init(&nlmsg, id, 0, &genlhdr, sizeof(genlhdr));
netlink_attr(&nlmsg, WGDEVICE_A_IFNAME, ifname_c, strlen(ifname_c) + 1);
netlink_attr(&nlmsg, WGDEVICE_A_PRIVATE_KEY, private_c, 32);
netlink_attr(&nlmsg, WGDEVICE_A_LISTEN_PORT, &listen_c, 2);
netlink_nest(&nlmsg, NLA_F_NESTED | WGDEVICE_A_PEERS);
netlink_nest(&nlmsg, NLA_F_NESTED | 0);
netlink_attr(&nlmsg, WGPEER_A_PUBLIC_KEY, public_a, 32);
netlink_attr(&nlmsg, WGPEER_A_ENDPOINT, &endpoint_a_v6, sizeof(endpoint_a_v6));
netlink_attr(&nlmsg, WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL, &persistent_keepalives[4], 2);
netlink_nest(&nlmsg, NLA_F_NESTED | WGPEER_A_ALLOWEDIPS);
netlink_nest(&nlmsg, NLA_F_NESTED | 0);
netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet, 2);
netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &first_half_v4, sizeof(first_half_v4));
netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1);
netlink_done(&nlmsg);
netlink_nest(&nlmsg, NLA_F_NESTED | 0);
netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet6, 2);
netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &first_half_v6, sizeof(first_half_v6));
netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1);
netlink_done(&nlmsg);
netlink_done(&nlmsg);
netlink_done(&nlmsg);
netlink_nest(&nlmsg, NLA_F_NESTED | 0);
netlink_attr(&nlmsg, WGPEER_A_PUBLIC_KEY, public_b, 32);
netlink_attr(&nlmsg, WGPEER_A_ENDPOINT, &endpoint_b_v4, sizeof(endpoint_b_v4));
netlink_attr(&nlmsg, WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL, &persistent_keepalives[5], 2);
netlink_nest(&nlmsg, NLA_F_NESTED | WGPEER_A_ALLOWEDIPS);
netlink_nest(&nlmsg, NLA_F_NESTED | 0);
netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet, 2);
netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &second_half_v4, sizeof(second_half_v4));
netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1);
netlink_done(&nlmsg);
netlink_nest(&nlmsg, NLA_F_NESTED | 0);
netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet6, 2);
netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &second_half_v6, sizeof(second_half_v6));
netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1);
netlink_done(&nlmsg);
netlink_done(&nlmsg);
netlink_done(&nlmsg);
netlink_done(&nlmsg);
err = netlink_send(&nlmsg, sock);
if (err < 0) {
}
error:
close(sock);
}
static void initialize_netdevices(void)
{
char netdevsim[16];
sprintf(netdevsim, "netdevsim%d", (int)procid);
struct {
const char* type;
const char* dev;
} devtypes[] = {
{"ip6gretap", "ip6gretap0"},
{"bridge", "bridge0"},
{"vcan", "vcan0"},
{"bond", "bond0"},
{"team", "team0"},
{"dummy", "dummy0"},
{"nlmon", "nlmon0"},
{"caif", "caif0"},
{"batadv", "batadv0"},
{"vxcan", "vxcan1"},
{"veth", 0},
{"wireguard", "wg0"},
{"wireguard", "wg1"},
{"wireguard", "wg2"},
};
const char* devmasters[] = {"bridge", "bond", "team", "batadv"};
struct {
const char* name;
int macsize;
bool noipv6;
} devices[] = {
{"lo", ETH_ALEN},
{"sit0", 0},
{"bridge0", ETH_ALEN},
{"vcan0", 0, true},
{"tunl0", 0},
{"gre0", 0},
{"gretap0", ETH_ALEN},
{"ip_vti0", 0},
{"ip6_vti0", 0},
{"ip6tnl0", 0},
{"ip6gre0", 0},
{"ip6gretap0", ETH_ALEN},
{"erspan0", ETH_ALEN},
{"bond0", ETH_ALEN},
{"veth0", ETH_ALEN},
{"veth1", ETH_ALEN},
{"team0", ETH_ALEN},
{"veth0_to_bridge", ETH_ALEN},
{"veth1_to_bridge", ETH_ALEN},
{"veth0_to_bond", ETH_ALEN},
{"veth1_to_bond", ETH_ALEN},
{"veth0_to_team", ETH_ALEN},
{"veth1_to_team", ETH_ALEN},
{"veth0_to_hsr", ETH_ALEN},
{"veth1_to_hsr", ETH_ALEN},
{"hsr0", 0},
{"dummy0", ETH_ALEN},
{"nlmon0", 0},
{"vxcan0", 0, true},
{"vxcan1", 0, true},
{"caif0", ETH_ALEN},
{"batadv0", ETH_ALEN},
{netdevsim, ETH_ALEN},
{"xfrm0", ETH_ALEN},
{"veth0_virt_wifi", ETH_ALEN},
{"veth1_virt_wifi", ETH_ALEN},
{"virt_wifi0", ETH_ALEN},
{"veth0_vlan", ETH_ALEN},
{"veth1_vlan", ETH_ALEN},
{"vlan0", ETH_ALEN},
{"vlan1", ETH_ALEN},
{"macvlan0", ETH_ALEN},
{"macvlan1", ETH_ALEN},
{"ipvlan0", ETH_ALEN},
{"ipvlan1", ETH_ALEN},
{"veth0_macvtap", ETH_ALEN},
{"veth1_macvtap", ETH_ALEN},
{"macvtap0", ETH_ALEN},
{"macsec0", ETH_ALEN},
{"veth0_to_batadv", ETH_ALEN},
{"veth1_to_batadv", ETH_ALEN},
{"batadv_slave_0", ETH_ALEN},
{"batadv_slave_1", ETH_ALEN},
{"geneve0", ETH_ALEN},
{"geneve1", ETH_ALEN},
{"wg0", 0},
{"wg1", 0},
{"wg2", 0},
};
int sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
if (sock == -1)
exit(1);
unsigned i;
for (i = 0; i < sizeof(devtypes) / sizeof(devtypes[0]); i++)
netlink_add_device(&nlmsg, sock, devtypes[i].type, devtypes[i].dev);
for (i = 0; i < sizeof(devmasters) / (sizeof(devmasters[0])); i++) {
char master[32], slave0[32], veth0[32], slave1[32], veth1[32];
sprintf(slave0, "%s_slave_0", devmasters[i]);
sprintf(veth0, "veth0_to_%s", devmasters[i]);
netlink_add_veth(&nlmsg, sock, slave0, veth0);
sprintf(slave1, "%s_slave_1", devmasters[i]);
sprintf(veth1, "veth1_to_%s", devmasters[i]);
netlink_add_veth(&nlmsg, sock, slave1, veth1);
sprintf(master, "%s0", devmasters[i]);
netlink_device_change(&nlmsg, sock, slave0, false, master, 0, 0, NULL);
netlink_device_change(&nlmsg, sock, slave1, false, master, 0, 0, NULL);
}
netlink_add_xfrm(&nlmsg, sock, "xfrm0");
netlink_device_change(&nlmsg, sock, "bridge_slave_0", true, 0, 0, 0, NULL);
netlink_device_change(&nlmsg, sock, "bridge_slave_1", true, 0, 0, 0, NULL);
netlink_add_veth(&nlmsg, sock, "hsr_slave_0", "veth0_to_hsr");
netlink_add_veth(&nlmsg, sock, "hsr_slave_1", "veth1_to_hsr");
netlink_add_hsr(&nlmsg, sock, "hsr0", "hsr_slave_0", "hsr_slave_1");
netlink_device_change(&nlmsg, sock, "hsr_slave_0", true, 0, 0, 0, NULL);
netlink_device_change(&nlmsg, sock, "hsr_slave_1", true, 0, 0, 0, NULL);
netlink_add_veth(&nlmsg, sock, "veth0_virt_wifi", "veth1_virt_wifi");
netlink_add_linked(&nlmsg, sock, "virt_wifi", "virt_wifi0", "veth1_virt_wifi");
netlink_add_veth(&nlmsg, sock, "veth0_vlan", "veth1_vlan");
netlink_add_vlan(&nlmsg, sock, "vlan0", "veth0_vlan", 0, htons(ETH_P_8021Q));
netlink_add_vlan(&nlmsg, sock, "vlan1", "veth0_vlan", 1, htons(ETH_P_8021AD));
netlink_add_macvlan(&nlmsg, sock, "macvlan0", "veth1_vlan");
netlink_add_macvlan(&nlmsg, sock, "macvlan1", "veth1_vlan");
netlink_add_ipvlan(&nlmsg, sock, "ipvlan0", "veth0_vlan", IPVLAN_MODE_L2, 0);
netlink_add_ipvlan(&nlmsg, sock, "ipvlan1", "veth0_vlan", IPVLAN_MODE_L3S, IPVLAN_F_VEPA);
netlink_add_veth(&nlmsg, sock, "veth0_macvtap", "veth1_macvtap");
netlink_add_linked(&nlmsg, sock, "macvtap", "macvtap0", "veth0_macvtap");
netlink_add_linked(&nlmsg, sock, "macsec", "macsec0", "veth1_macvtap");
char addr[32];
sprintf(addr, DEV_IPV4, 14 + 10);
struct in_addr geneve_addr4;
if (inet_pton(AF_INET, addr, &geneve_addr4) <= 0)
exit(1);
struct in6_addr geneve_addr6;
if (inet_pton(AF_INET6, "fc00::01", &geneve_addr6) <= 0)
exit(1);
netlink_add_geneve(&nlmsg, sock, "geneve0", 0, &geneve_addr4, 0);
netlink_add_geneve(&nlmsg, sock, "geneve1", 1, 0, &geneve_addr6);
netdevsim_add((int)procid, 4);
netlink_wireguard_setup();
for (i = 0; i < sizeof(devices) / (sizeof(devices[0])); i++) {
char addr[32];
sprintf(addr, DEV_IPV4, i + 10);
netlink_add_addr4(&nlmsg, sock, devices[i].name, addr);
if (!devices[i].noipv6) {
sprintf(addr, DEV_IPV6, i + 10);
netlink_add_addr6(&nlmsg, sock, devices[i].name, addr);
}
uint64_t macaddr = DEV_MAC + ((i + 10ull) << 40);
netlink_device_change(&nlmsg, sock, devices[i].name, true, 0, &macaddr, devices[i].macsize, NULL);
}
close(sock);
}
static void initialize_netdevices_init(void)
{
int sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
if (sock == -1)
exit(1);
struct {
const char* type;
int macsize;
bool noipv6;
bool noup;
} devtypes[] = {
{"nr", 7, true},
{"rose", 5, true, true},
};
unsigned i;
for (i = 0; i < sizeof(devtypes) / sizeof(devtypes[0]); i++) {
char dev[32], addr[32];
sprintf(dev, "%s%d", devtypes[i].type, (int)procid);
sprintf(addr, "172.30.%d.%d", i, (int)procid + 1);
netlink_add_addr4(&nlmsg, sock, dev, addr);
if (!devtypes[i].noipv6) {
sprintf(addr, "fe88::%02x:%02x", i, (int)procid + 1);
netlink_add_addr6(&nlmsg, sock, dev, addr);
}
int macsize = devtypes[i].macsize;
uint64_t macaddr = 0xbbbbbb + ((unsigned long long)i << (8 * (macsize - 2))) +
(procid << (8 * (macsize - 1)));
netlink_device_change(&nlmsg, sock, dev, !devtypes[i].noup, 0, &macaddr, macsize, NULL);
}
close(sock);
}
#define MAX_FDS 30
static long syz_genetlink_get_family_id(volatile long name, volatile long sock_arg)
{
int fd = sock_arg;
if (fd < 0) {
fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
if (fd == -1) {
return -1;
}
}
struct nlmsg nlmsg_tmp;
int ret = netlink_query_family_id(&nlmsg_tmp, fd, (char*)name, false);
if ((int)sock_arg < 0)
close(fd);
if (ret < 0) {
return -1;
}
return ret;
}
static void setup_gadgetfs();
static void setup_binderfs();
static void setup_fusectl();
static void sandbox_common_mount_tmpfs(void)
{
write_file("/proc/sys/fs/mount-max", "100000");
if (mkdir("./syz-tmp", 0777))
exit(1);
if (mount("", "./syz-tmp", "tmpfs", 0, NULL))
exit(1);
if (mkdir("./syz-tmp/newroot", 0777))
exit(1);
if (mkdir("./syz-tmp/newroot/dev", 0700))
exit(1);
unsigned bind_mount_flags = MS_BIND | MS_REC | MS_PRIVATE;
if (mount("/dev", "./syz-tmp/newroot/dev", NULL, bind_mount_flags, NULL))
exit(1);
if (mkdir("./syz-tmp/newroot/proc", 0700))
exit(1);
if (mount("syz-proc", "./syz-tmp/newroot/proc", "proc", 0, NULL))
exit(1);
if (mkdir("./syz-tmp/newroot/selinux", 0700))
exit(1);
const char* selinux_path = "./syz-tmp/newroot/selinux";
if (mount("/selinux", selinux_path, NULL, bind_mount_flags, NULL)) {
if (errno != ENOENT)
exit(1);
if (mount("/sys/fs/selinux", selinux_path, NULL, bind_mount_flags, NULL) && errno != ENOENT)
exit(1);
}
if (mkdir("./syz-tmp/newroot/sys", 0700))
exit(1);
if (mount("/sys", "./syz-tmp/newroot/sys", 0, bind_mount_flags, NULL))
exit(1);
if (mount("/sys/kernel/debug", "./syz-tmp/newroot/sys/kernel/debug", NULL, bind_mount_flags, NULL) && errno != ENOENT)
exit(1);
if (mount("/sys/fs/smackfs", "./syz-tmp/newroot/sys/fs/smackfs", NULL, bind_mount_flags, NULL) && errno != ENOENT)
exit(1);
if (mount("/proc/sys/fs/binfmt_misc", "./syz-tmp/newroot/proc/sys/fs/binfmt_misc", NULL, bind_mount_flags, NULL) && errno != ENOENT)
exit(1);
if (mkdir("./syz-tmp/newroot/syz-inputs", 0700))
exit(1);
if (mount("/syz-inputs", "./syz-tmp/newroot/syz-inputs", NULL, bind_mount_flags | MS_RDONLY, NULL) && errno != ENOENT)
exit(1);
if (mkdir("./syz-tmp/pivot", 0777))
exit(1);
if (syscall(SYS_pivot_root, "./syz-tmp", "./syz-tmp/pivot")) {
if (chdir("./syz-tmp"))
exit(1);
} else {
if (chdir("/"))
exit(1);
if (umount2("./pivot", MNT_DETACH))
exit(1);
}
if (chroot("./newroot"))
exit(1);
if (chdir("/"))
exit(1);
setup_gadgetfs();
setup_binderfs();
setup_fusectl();
}
static void setup_gadgetfs()
{
if (mkdir("/dev/gadgetfs", 0777)) {
}
if (mount("gadgetfs", "/dev/gadgetfs", "gadgetfs", 0, NULL)) {
}
}
static void setup_fusectl()
{
if (mount(0, "/sys/fs/fuse/connections", "fusectl", 0, 0)) {
}
}
static void setup_binderfs()
{
if (mkdir("/dev/binderfs", 0777)) {
}
if (mount("binder", "/dev/binderfs", "binder", 0, NULL)) {
}
if (symlink("/dev/binderfs", "./binderfs")) {
}
}
static void loop();
static void sandbox_common()
{
prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
if (getppid() == 1)
exit(1);
struct rlimit rlim;
rlim.rlim_cur = rlim.rlim_max = (200 << 20);
setrlimit(RLIMIT_AS, &rlim);
rlim.rlim_cur = rlim.rlim_max = 32 << 20;
setrlimit(RLIMIT_MEMLOCK, &rlim);
rlim.rlim_cur = rlim.rlim_max = 136 << 20;
setrlimit(RLIMIT_FSIZE, &rlim);
rlim.rlim_cur = rlim.rlim_max = 1 << 20;
setrlimit(RLIMIT_STACK, &rlim);
rlim.rlim_cur = rlim.rlim_max = 128 << 20;
setrlimit(RLIMIT_CORE, &rlim);
rlim.rlim_cur = rlim.rlim_max = 256;
setrlimit(RLIMIT_NOFILE, &rlim);
if (unshare(CLONE_NEWNS)) {
}
if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL)) {
}
if (unshare(CLONE_NEWIPC)) {
}
if (unshare(0x02000000)) {
}
if (unshare(CLONE_NEWUTS)) {
}
if (unshare(CLONE_SYSVSEM)) {
}
typedef struct {
const char* name;
const char* value;
} sysctl_t;
static const sysctl_t sysctls[] = {
{"/proc/sys/kernel/shmmax", "16777216"},
{"/proc/sys/kernel/shmall", "536870912"},
{"/proc/sys/kernel/shmmni", "1024"},
{"/proc/sys/kernel/msgmax", "8192"},
{"/proc/sys/kernel/msgmni", "1024"},
{"/proc/sys/kernel/msgmnb", "1024"},
{"/proc/sys/kernel/sem", "1024 1048576 500 1024"},
};
unsigned i;
for (i = 0; i < sizeof(sysctls) / sizeof(sysctls[0]); i++)
write_file(sysctls[i].name, sysctls[i].value);
}
static int wait_for_loop(int pid)
{
if (pid < 0)
exit(1);
int status = 0;
while (waitpid(-1, &status, __WALL) != pid) {
}
return WEXITSTATUS(status);
}
static void drop_caps(void)
{
struct __user_cap_header_struct cap_hdr = {};
struct __user_cap_data_struct cap_data[2] = {};
cap_hdr.version = _LINUX_CAPABILITY_VERSION_3;
cap_hdr.pid = getpid();
if (syscall(SYS_capget, &cap_hdr, &cap_data))
exit(1);
const int drop = (1 << CAP_SYS_PTRACE) | (1 << CAP_SYS_NICE);
cap_data[0].effective &= ~drop;
cap_data[0].permitted &= ~drop;
cap_data[0].inheritable &= ~drop;
if (syscall(SYS_capset, &cap_hdr, &cap_data))
exit(1);
}
static int do_sandbox_none(void)
{
if (unshare(CLONE_NEWPID)) {
}
int pid = fork();
if (pid != 0)
return wait_for_loop(pid);
sandbox_common();
drop_caps();
initialize_netdevices_init();
if (unshare(CLONE_NEWNET)) {
}
write_file("/proc/sys/net/ipv4/ping_group_range", "0 65535");
initialize_netdevices();
sandbox_common_mount_tmpfs();
loop();
exit(1);
}
static void kill_and_wait(int pid, int* status)
{
kill(-pid, SIGKILL);
kill(pid, SIGKILL);
for (int i = 0; i < 100; i++) {
if (waitpid(-1, status, WNOHANG | __WALL) == pid)
return;
usleep(1000);
}
DIR* dir = opendir("/sys/fs/fuse/connections");
if (dir) {
for (;;) {
struct dirent* ent = readdir(dir);
if (!ent)
break;
if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0)
continue;
char abort[300];
snprintf(abort, sizeof(abort), "/sys/fs/fuse/connections/%s/abort", ent->d_name);
int fd = open(abort, O_WRONLY);
if (fd == -1) {
continue;
}
if (write(fd, abort, 1) < 0) {
}
close(fd);
}
closedir(dir);
} else {
}
while (waitpid(-1, status, __WALL) != pid) {
}
}
static void setup_test()
{
prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
setpgrp();
write_file("/proc/self/oom_score_adj", "1000");
}
static void close_fds()
{
for (int fd = 3; fd < MAX_FDS; fd++)
close(fd);
}
#define USLEEP_FORKED_CHILD (3 * 50 *1000)
static long handle_clone_ret(long ret)
{
if (ret != 0) {
return ret;
}
usleep(USLEEP_FORKED_CHILD);
syscall(__NR_exit, 0);
while (1) {
}
}
static long syz_clone(volatile long flags, volatile long stack, volatile long stack_len,
volatile long ptid, volatile long ctid, volatile long tls)
{
long sp = (stack + stack_len) & ~15;
long ret = (long)syscall(__NR_clone, flags & ~CLONE_VM, sp, ptid, ctid, tls);
return handle_clone_ret(ret);
}
struct thread_t {
int created, call;
event_t ready, done;
};
static struct thread_t threads[16];
static void execute_call(int call);
static int running;
static void* thr(void* arg)
{
struct thread_t* th = (struct thread_t*)arg;
for (;;) {
event_wait(&th->ready);
event_reset(&th->ready);
execute_call(th->call);
__atomic_fetch_sub(&running, 1, __ATOMIC_RELAXED);
event_set(&th->done);
}
return 0;
}
static void execute_one(void)
{
if (write(1, "executing program\n", sizeof("executing program\n") - 1)) {
}
int i, call, thread;
for (call = 0; call < 9; call++) {
for (thread = 0; thread < (int)(sizeof(threads) / sizeof(threads[0])); thread++) {
struct thread_t* th = &threads[thread];
if (!th->created) {
th->created = 1;
event_init(&th->ready);
event_init(&th->done);
event_set(&th->done);
thread_start(thr, th);
}
if (!event_isset(&th->done))
continue;
event_reset(&th->done);
th->call = call;
__atomic_fetch_add(&running, 1, __ATOMIC_RELAXED);
event_set(&th->ready);
event_timedwait(&th->done, 50);
break;
}
}
for (i = 0; i < 100 && __atomic_load_n(&running, __ATOMIC_RELAXED); i++)
sleep_ms(1);
close_fds();
}
static void execute_one(void);
#define WAIT_FLAGS __WALL
static void loop(void)
{
int iter = 0;
for (;; iter++) {
int pid = fork();
if (pid < 0)
exit(1);
if (pid == 0) {
setup_test();
execute_one();
exit(0);
}
int status = 0;
uint64_t start = current_time_ms();
for (;;) {
sleep_ms(10);
if (waitpid(-1, &status, WNOHANG | WAIT_FLAGS) == pid)
break;
if (current_time_ms() - start < 5000)
continue;
kill_and_wait(pid, &status);
break;
}
}
}
uint64_t r[5] = {0xffffffffffffffff, 0x0, 0x0, 0xffffffffffffffff, 0xffffffffffffffff};
void execute_call(int call)
{
intptr_t res = 0;
switch (call) {
case 0:
// socket$nl_generic arguments: [
// domain: const = 0x10 (8 bytes)
// type: const = 0x3 (8 bytes)
// proto: const = 0x10 (4 bytes)
// ]
// returns sock_nl_generic
res = syscall(__NR_socket, /*domain=*/0x10ul, /*type=*/3ul, /*proto=*/0x10);
if (res != -1)
r[0] = res;
break;
case 1:
// syz_genetlink_get_family_id$devlink arguments: [
// name: ptr[in, buffer] {
// buffer: {64 65 76 6c 69 6e 6b 00} (length 0x8)
// }
// fd: sock_nl_generic (resource)
// ]
// returns genl_devlink_family_id
memcpy((void*)0x200000000080, "devlink\000", 8);
res = -1;
res = syz_genetlink_get_family_id(/*name=*/0x200000000080, /*fd=*/-1);
if (res != -1)
r[1] = res;
break;
case 2:
// gettid arguments: [
// ]
// returns pid
res = syscall(__NR_gettid);
if (res != -1)
r[2] = res;
break;
case 3:
// sendmsg$DEVLINK_CMD_RELOAD arguments: [
// fd: sock_nl_generic (resource)
// msg: ptr[in, msghdr_netlink[netlink_msg_t[genl_devlink_family_id, genlmsghdr_t[DEVLINK_CMD_RELOAD], devlink_reload_policy]]] {
// msghdr_netlink[netlink_msg_t[genl_devlink_family_id, genlmsghdr_t[DEVLINK_CMD_RELOAD], devlink_reload_policy]] {
// addr: nil
// addrlen: len = 0x0 (4 bytes)
// pad = 0x0 (4 bytes)
// vec: ptr[in, iovec[in, netlink_msg_t[genl_devlink_family_id, genlmsghdr_t[DEVLINK_CMD_RELOAD], devlink_reload_policy]]] {
// iovec[in, netlink_msg_t[genl_devlink_family_id, genlmsghdr_t[DEVLINK_CMD_RELOAD], devlink_reload_policy]] {
// addr: ptr[in, netlink_msg_t[genl_devlink_family_id, genlmsghdr_t[DEVLINK_CMD_RELOAD], devlink_reload_policy]] {
// netlink_msg_t[genl_devlink_family_id, genlmsghdr_t[DEVLINK_CMD_RELOAD], devlink_reload_policy] {
// len: len = 0x3c (4 bytes)
// type: genl_devlink_family_id (resource)
// flags: netlink_msg_flags = 0x536ae464467e3e0b (2 bytes)
// seq: int32 = 0x0 (4 bytes)
// pid: int32 = 0x0 (4 bytes)
// payload: genlmsghdr_t[DEVLINK_CMD_RELOAD] {
// cmd: const = 0x25 (1 bytes)
// version: const = 0x0 (1 bytes)
// reserved: const = 0x0 (2 bytes)
// }
// attrs: array[devlink_reload_policy] {
// devlink_reload_policy {
// handle: union devlink_handle {
// nsim: devlink_nl_policy$nsim {
// DEVLINK_ATTR_BUS_NAME: nlattr_t[const[DEVLINK_ATTR_BUS_NAME, int16], string["netdevsim"]] {
// nla_len: offsetof = 0xe (2 bytes)
// nla_type: const = 0x1 (2 bytes)
// payload: buffer: {6e 65 74 64 65 76 73 69 6d 00} (length 0xa)
// size: buffer: {} (length 0x0)
// pad = 0x0 (2 bytes)
// }
// DEVLINK_ATTR_DEV_NAME: nlattr_t[const[DEVLINK_ATTR_DEV_NAME, int16], devlink_devname] {
// nla_len: offsetof = 0xf (2 bytes)
// nla_type: const = 0x2 (2 bytes)
// payload: devlink_devname {
// prefix: buffer: {6e 65 74 64 65 76 73 69 6d} (length 0x9)
// id: proc = 0x0 (1 bytes)
// z: const = 0x0 (1 bytes)
// }
// size: buffer: {} (length 0x0)
// pad = 0x0 (1 bytes)
// }
// }
// }
// arg: union devlink_nl_policy$reload {
// DEVLINK_ATTR_NETNS_PID: nlattr_t[const[DEVLINK_ATTR_NETNS_PID, int16], pid] {
// nla_len: offsetof = 0x8 (2 bytes)
// nla_type: const = 0x8b (2 bytes)
// payload: pid (resource)
// size: buffer: {} (length 0x0)
// }
// }
// }
// }
// }
// }
// len: len = 0x3c (8 bytes)
// }
// }
// vlen: const = 0x1 (8 bytes)
// ctrl: const = 0x0 (8 bytes)
// ctrllen: const = 0x0 (8 bytes)
// f: send_flags = 0x0 (4 bytes)
// pad = 0x0 (4 bytes)
// }
// }
// f: send_flags = 0x0 (8 bytes)
// ]
*(uint64_t*)0x200000000800 = 0;
*(uint32_t*)0x200000000808 = 0;
*(uint64_t*)0x200000000810 = 0x200000000380;
*(uint64_t*)0x200000000380 = 0x200000000000;
*(uint32_t*)0x200000000000 = 0x3c;
*(uint16_t*)0x200000000004 = r[1];
*(uint16_t*)0x200000000006 = 0x3e0b;
*(uint32_t*)0x200000000008 = 0;
*(uint32_t*)0x20000000000c = 0;
*(uint8_t*)0x200000000010 = 0x25;
*(uint8_t*)0x200000000011 = 0;
*(uint16_t*)0x200000000012 = 0;
*(uint16_t*)0x200000000014 = 0xe;
*(uint16_t*)0x200000000016 = 1;
memcpy((void*)0x200000000018, "netdevsim\000", 10);
*(uint16_t*)0x200000000024 = 0xf;
*(uint16_t*)0x200000000026 = 2;
memcpy((void*)0x200000000028, "netdevsim", 9);
*(uint8_t*)0x200000000031 = 0x30;
*(uint8_t*)0x200000000032 = 0;
*(uint16_t*)0x200000000034 = 8;
*(uint16_t*)0x200000000036 = 0x8b;
*(uint32_t*)0x200000000038 = r[2];
*(uint64_t*)0x200000000388 = 0x3c;
*(uint64_t*)0x200000000818 = 1;
*(uint64_t*)0x200000000820 = 0;
*(uint64_t*)0x200000000828 = 0;
*(uint32_t*)0x200000000830 = 0;
syscall(__NR_sendmsg, /*fd=*/r[0], /*msg=*/0x200000000800ul, /*f=*/0ul);
break;
case 4:
// syz_clone arguments: [
// flags: clone_flags = 0x40b04000 (8 bytes)
// stack: nil
// stack_len: bytesize = 0x0 (8 bytes)
// parentid: nil
// childtid: nil
// tls: nil
// ]
// returns pid
syz_clone(/*flags=CLONE_NEWNET|CLONE_UNTRACED|CLONE_CHILD_CLEARTID|CLONE_PARENT_SETTID|CLONE_VFORK*/0x40b04000, /*stack=*/0, /*stack_len=*/0, /*parentid=*/0, /*childtid=*/0, /*tls=*/0);
break;
case 5:
// socket$netlink arguments: [
// domain: const = 0x10 (8 bytes)
// type: const = 0x3 (8 bytes)
// proto: netlink_proto = 0x0 (4 bytes)
// ]
// returns sock_netlink
res = syscall(__NR_socket, /*domain=*/0x10ul, /*type=*/3ul, /*proto=*/0);
if (res != -1)
r[3] = res;
break;
case 6:
// sendmsg$nl_route arguments: [
// fd: sock_nl_route (resource)
// msg: ptr[in, msghdr_netlink[netlink_msg_route]] {
// msghdr_netlink[netlink_msg_route] {
// addr: nil
// addrlen: len = 0x0 (4 bytes)
// pad = 0x0 (4 bytes)
// vec: ptr[in, iovec[in, netlink_msg_route]] {
// iovec[in, netlink_msg_route] {
// addr: ptr[in, netlink_msg_route] {
// union netlink_msg_route {
// newlink: netlink_msg_t[const[RTM_NEWLINK, int16], ifinfomsg[AF_UNSPEC], ifla_policy] {
// len: len = 0x28 (4 bytes)
// type: const = 0x10 (2 bytes)
// flags: netlink_msg_flags = 0x1 (2 bytes)
// seq: int32 = 0x0 (4 bytes)
// pid: int32 = 0x0 (4 bytes)
// payload: ifinfomsg[AF_UNSPEC] {
// ifi_family: const = 0x0 (1 bytes)
// __ifi_pad: const = 0x0 (1 bytes)
// ifi_type: const = 0x0 (2 bytes)
// ifi_index: ifindex (resource)
// ifi_flags: net_device_flags = 0xf0ffff (4 bytes)
// ifi_change: net_device_flags = 0x0 (4 bytes)
// }
// attrs: array[ifla_policy] {
// union ifla_policy {
// IFLA_GROUP: nlattr_t[const[IFLA_GROUP, int16], int32] {
// nla_len: offsetof = 0x8 (2 bytes)
// nla_type: const = 0x1b (2 bytes)
// payload: int32 = 0x0 (4 bytes)
// size: buffer: {} (length 0x0)
// }
// }
// }
// }
// }
// }
// len: len = 0x28 (8 bytes)
// }
// }
// vlen: const = 0x1 (8 bytes)
// ctrl: const = 0x0 (8 bytes)
// ctrllen: const = 0x0 (8 bytes)
// f: send_flags = 0x0 (4 bytes)
// pad = 0x0 (4 bytes)
// }
// }
// f: send_flags = 0x0 (8 bytes)
// ]
*(uint64_t*)0x2000000000c0 = 0;
*(uint32_t*)0x2000000000c8 = 0;
*(uint64_t*)0x2000000000d0 = 0x2000000013c0;
*(uint64_t*)0x2000000013c0 = 0x200000000040;
*(uint32_t*)0x200000000040 = 0x28;
*(uint16_t*)0x200000000044 = 0x10;
*(uint16_t*)0x200000000046 = 1;
*(uint32_t*)0x200000000048 = 0;
*(uint32_t*)0x20000000004c = 0;
*(uint8_t*)0x200000000050 = 0;
*(uint8_t*)0x200000000051 = 0;
*(uint16_t*)0x200000000052 = 0;
*(uint32_t*)0x200000000054 = 0;
*(uint32_t*)0x200000000058 = 0xf0ffff;
*(uint32_t*)0x20000000005c = 0;
*(uint16_t*)0x200000000060 = 8;
*(uint16_t*)0x200000000062 = 0x1b;
*(uint32_t*)0x200000000064 = 0;
*(uint64_t*)0x2000000013c8 = 0x28;
*(uint64_t*)0x2000000000d8 = 1;
*(uint64_t*)0x2000000000e0 = 0;
*(uint64_t*)0x2000000000e8 = 0;
*(uint32_t*)0x2000000000f0 = 0;
syscall(__NR_sendmsg, /*fd=*/r[3], /*msg=*/0x2000000000c0ul, /*f=*/0ul);
break;
case 7:
// socket$nl_route arguments: [
// domain: const = 0x10 (8 bytes)
// type: const = 0x3 (8 bytes)
// proto: const = 0x0 (4 bytes)
// ]
// returns sock_nl_route
res = syscall(__NR_socket, /*domain=*/0x10ul, /*type=*/3ul, /*proto=*/0);
if (res != -1)
r[4] = res;
break;
case 8:
// sendmsg$nl_route_sched arguments: [
// fd: sock_nl_route (resource)
// msg: ptr[in, msghdr_netlink[netlink_msg_route_sched]] {
// msghdr_netlink[netlink_msg_route_sched] {
// addr: nil
// addrlen: len = 0x0 (4 bytes)
// pad = 0x0 (4 bytes)
// vec: ptr[in, iovec[in, netlink_msg_route_sched]] {
// iovec[in, netlink_msg_route_sched] {
// addr: ptr[in, netlink_msg_route_sched] {
// union netlink_msg_route_sched {
// newqdisc: netlink_msg_t[const[RTM_NEWQDISC, int16], tcmsg[AF_UNSPEC], rtm_tca_policy] {
// len: len = 0x54 (4 bytes)
// type: const = 0x10 (2 bytes)
// flags: netlink_msg_flags = 0x1 (2 bytes)
// seq: int32 = 0x0 (4 bytes)
// pid: int32 = 0x0 (4 bytes)
// payload: tcmsg[AF_UNSPEC] {
// family: const = 0x6 (1 bytes)
// tcm__pad1: const = 0x0 (1 bytes)
// tcm__pad2: const = 0x8100 (2 bytes)
// ifindex: ifindex (resource)
// tcm_handle: tcm_handle {
// minor: tcm_handle_offsets = 0x0 (2 bytes)
// major: tcm_handle_offsets = 0x0 (2 bytes)
// }
// tcm_parent: tcm_handle {
// minor: tcm_handle_offsets = 0xfff1 (2 bytes)
// major: tcm_handle_offsets = 0x0 (2 bytes)
// }
// tcm_info: tcm_handle {
// minor: tcm_handle_offsets = 0xe (2 bytes)
// major: tcm_handle_offsets = 0x10 (2 bytes)
// }
// }
// attrs: array[rtm_tca_policy] {
// union rtm_tca_policy {
// TCA_EGRESS_BLOCK: nlattr_t[const[TCA_EGRESS_BLOCK, int16], int32] {
// nla_len: offsetof = 0x8 (2 bytes)
// nla_type: const = 0xe (2 bytes)
// payload: int32 = 0x7 (4 bytes)
// size: buffer: {} (length 0x0)
// }
// }
// union rtm_tca_policy {
// TCA_STAB: nlattr_tt[const[TCA_STAB, int16:14], 0, 1, array[stab_policy]] {
// nla_len: offsetof = 0x28 (2 bytes)
// nla_type: const = 0x8 (1 bytes)
// NLA_F_NET_BYTEORDER: const = 0x0 (0 bytes)
// NLA_F_NESTED: const = 0x1 (1 bytes)
// payload: array[stab_policy] {
// stab_policy {
// TCA_STAB_BASE: nlattr_t[const[TCA_STAB_BASE, int16], tc_sizespec] {
// nla_len: offsetof = 0x1c (2 bytes)
// nla_type: const = 0x11 (2 bytes)
// payload: tc_sizespec {
// cell_log: int8 = 0x0 (1 bytes)
// size_log: int8 = 0x0 (1 bytes)
// cell_align: int16 = 0x0 (2 bytes)
// overhead: int32 = 0x0 (4 bytes)
// linklayer: linklayer = 0x0 (4 bytes)
// mpu: int32 = 0x5e06209c (4 bytes)
// mtu: int32 = 0x0 (4 bytes)
// tsize: len = 0x2 (4 bytes)
// }
// size: buffer: {} (length 0x0)
// }
// TCA_STAB_DATA: nlattr_t[const[TCA_STAB_DATA, int16], array[int16]] {
// nla_len: offsetof = 0x8 (2 bytes)
// nla_type: const = 0x1b (2 bytes)
// payload: array[int16] {
// int16 = 0x0 (2 bytes)
// int16 = 0x0 (2 bytes)
// }
// size: buffer: {} (length 0x0)
// }
// }
// }
// size: buffer: {} (length 0x0)
// }
// }
// }
// }
// }
// }
// len: len = 0x54 (8 bytes)
// }
// }
// vlen: const = 0x1 (8 bytes)
// ctrl: const = 0x0 (8 bytes)
// ctrllen: const = 0x0 (8 bytes)
// f: send_flags = 0x0 (4 bytes)
// pad = 0x0 (4 bytes)
// }
// }
// f: send_flags = 0x0 (8 bytes)
// ]
*(uint64_t*)0x200000000180 = 0;
*(uint32_t*)0x200000000188 = 0;
*(uint64_t*)0x200000000190 = 0x200000000280;
*(uint64_t*)0x200000000280 = 0x2000000001c0;
*(uint32_t*)0x2000000001c0 = 0x54;
*(uint16_t*)0x2000000001c4 = 0x10;
*(uint16_t*)0x2000000001c6 = 1;
*(uint32_t*)0x2000000001c8 = 0;
*(uint32_t*)0x2000000001cc = 0;
*(uint8_t*)0x2000000001d0 = 6;
*(uint8_t*)0x2000000001d1 = 0;
*(uint16_t*)0x2000000001d2 = 0x8100;
*(uint32_t*)0x2000000001d4 = 0;
*(uint16_t*)0x2000000001d8 = 0;
*(uint16_t*)0x2000000001da = 0;
*(uint16_t*)0x2000000001dc = 0xfff1;
*(uint16_t*)0x2000000001de = 0;
*(uint16_t*)0x2000000001e0 = 0xe;
*(uint16_t*)0x2000000001e2 = 0x10;
*(uint16_t*)0x2000000001e4 = 8;
*(uint16_t*)0x2000000001e6 = 0xe;
*(uint32_t*)0x2000000001e8 = 7;
*(uint16_t*)0x2000000001ec = 0x28;
STORE_BY_BITMASK(uint16_t, , 0x2000000001ee, 8, 0, 14);
STORE_BY_BITMASK(uint16_t, , 0x2000000001ef, 0, 6, 1);
STORE_BY_BITMASK(uint16_t, , 0x2000000001ef, 1, 7, 1);
*(uint16_t*)0x2000000001f0 = 0x1c;
*(uint16_t*)0x2000000001f2 = 0x11;
*(uint8_t*)0x2000000001f4 = 0;
*(uint8_t*)0x2000000001f5 = 0;
*(uint16_t*)0x2000000001f6 = 0;
*(uint32_t*)0x2000000001f8 = 0;
*(uint32_t*)0x2000000001fc = 0;
*(uint32_t*)0x200000000200 = 0x5e06209c;
*(uint32_t*)0x200000000204 = 0;
*(uint32_t*)0x200000000208 = 2;
*(uint16_t*)0x20000000020c = 8;
*(uint16_t*)0x20000000020e = 0x1b;
*(uint16_t*)0x200000000210 = 0;
*(uint16_t*)0x200000000212 = 0;
*(uint64_t*)0x200000000288 = 0x54;
*(uint64_t*)0x200000000198 = 1;
*(uint64_t*)0x2000000001a0 = 0;
*(uint64_t*)0x2000000001a8 = 0;
*(uint32_t*)0x2000000001b0 = 0;
syscall(__NR_sendmsg, /*fd=*/r[4], /*msg=*/0x200000000180ul, /*f=*/0ul);
break;
}
}
int main(void)
{
syscall(__NR_mmap, /*addr=*/0x1ffffffff000ul, /*len=*/0x1000ul, /*prot=*/0ul, /*flags=MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE*/0x32ul, /*fd=*/(intptr_t)-1, /*offset=*/0ul);
syscall(__NR_mmap, /*addr=*/0x200000000000ul, /*len=*/0x1000000ul, /*prot=PROT_WRITE|PROT_READ|PROT_EXEC*/7ul, /*flags=MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE*/0x32ul, /*fd=*/(intptr_t)-1, /*offset=*/0ul);
syscall(__NR_mmap, /*addr=*/0x200001000000ul, /*len=*/0x1000ul, /*prot=*/0ul, /*flags=MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE*/0x32ul, /*fd=*/(intptr_t)-1, /*offset=*/0ul);
const char* reason;
(void)reason;
do_sandbox_none();
return 0;
}
^ permalink raw reply
* Re: Please apply 736b380e28d0 and eca856950f7c down to 6.1.y
From: Greg Kroah-Hartman @ 2026-06-24 9:00 UTC (permalink / raw)
To: Wongi Lee
Cc: stable, Sasha Levin, netdev, David Ahern, Ido Schimmel,
David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
Simon Horman, Jungwoo Lee
In-Reply-To: <ajuR7rZYU943EG6p@DESKTOP-19IMU7U.localdomain>
On Wed, Jun 24, 2026 at 05:14:38PM +0900, Wongi Lee wrote:
> Hi,
>
> Could the following upstream commits be queued for the active stable
> trees?
>
> commit 736b380e28d0480c7bc3e022f1950f31fe53a7c5
> ("ipv6: account for fraggap on the paged allocation path")
I do not see that commit id in Linus's tree, are you sure it is correct?
> commit eca856950f7cb1a221e02b99d758409f2c5cec42
> ("ipv4: account for fraggap on the paged allocation path")
Same here, no id of that one in Linus's tree that I can see.
thanks,
greg k-h
^ permalink raw reply
* [PATCH 7/7] ARM: dts: rockchip: Add Alientek DLRV1126
From: Yanan He @ 2026-06-24 9:02 UTC (permalink / raw)
To: robh, krzk+dt, conor+dt, heiko, andrew+netdev, davem, edumazet,
kuba, pabeni, david.wu, mcoquelin.stm32, alexandre.torgue
Cc: devicetree, linux-kernel, linux-arm-kernel, linux-rockchip,
netdev, linux-stm32, grumpycat921013
In-Reply-To: <20260624-rv1126-alientek-dlrv1126-v1-0-dc42d99f75a7@gmail.com>
The board consists of a CLRV1126F core module and a DLRV1126 carrier
board. The core module contains the RV1126 SoC, eMMC and RK809 PMIC,
while the carrier board provides Ethernet, SD card, AP6212 WiFi and
Bluetooth, PCF8563 RTC, ADC keys, GPIO LEDs and audio connectors.
The board has been tested with Ethernet/NFS boot, eMMC, SD card, SDIO
WiFi enumeration, Bluetooth LE scanning, RTC, ADC keys, GPIO LEDs and
RK809 audio card registration.
Signed-off-by: Yanan He <grumpycat921013@gmail.com>
---
arch/arm/boot/dts/rockchip/Makefile | 1 +
.../dts/rockchip/rv1126-alientek-clrv1126f.dtsi | 277 +++++++++++++++++++++
.../boot/dts/rockchip/rv1126-alientek-dlrv1126.dts | 258 +++++++++++++++++++
3 files changed, 536 insertions(+)
diff --git a/arch/arm/boot/dts/rockchip/Makefile b/arch/arm/boot/dts/rockchip/Makefile
index d0154fd7ff24..e9f9e0ac3bfd 100644
--- a/arch/arm/boot/dts/rockchip/Makefile
+++ b/arch/arm/boot/dts/rockchip/Makefile
@@ -5,6 +5,7 @@ dtb-$(CONFIG_ARCH_ROCKCHIP) += \
rv1108-evb.dtb \
rv1109-relfor-saib.dtb \
rv1109-sonoff-ihost.dtb \
+ rv1126-alientek-dlrv1126.dtb \
rv1126-edgeble-neu2-io.dtb \
rv1126-sonoff-ihost.dtb \
rk3036-evb.dtb \
diff --git a/arch/arm/boot/dts/rockchip/rv1126-alientek-clrv1126f.dtsi b/arch/arm/boot/dts/rockchip/rv1126-alientek-clrv1126f.dtsi
new file mode 100644
index 000000000000..9bee424b1797
--- /dev/null
+++ b/arch/arm/boot/dts/rockchip/rv1126-alientek-clrv1126f.dtsi
@@ -0,0 +1,277 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Copyright (c) 2026 Yanan He <grumpycat921013@gmail.com>
+ */
+
+#include "rv1126.dtsi"
+
+/ {
+ compatible = "alientek,clrv1126f", "rockchip,rv1126";
+
+ aliases {
+ mmc0 = &emmc;
+ };
+};
+
+&cpu0 {
+ cpu-supply = <&vdd_arm>;
+};
+
+&cpu1 {
+ cpu-supply = <&vdd_arm>;
+};
+
+&cpu2 {
+ cpu-supply = <&vdd_arm>;
+};
+
+&cpu3 {
+ cpu-supply = <&vdd_arm>;
+};
+
+&emmc {
+ bus-width = <8>;
+ cap-mmc-highspeed;
+ mmc-hs200-1_8v;
+ non-removable;
+ pinctrl-names = "default";
+ pinctrl-0 = <&emmc_bus8 &emmc_cmd &emmc_clk &emmc_rstnout>;
+ rockchip,default-sample-phase = <90>;
+ vmmc-supply = <&vcc_3v3>;
+ vqmmc-supply = <&vcc_1v8>;
+ status = "okay";
+};
+
+&i2c0 {
+ clock-frequency = <400000>;
+ status = "okay";
+
+ rk809: pmic@20 {
+ compatible = "rockchip,rk809";
+ reg = <0x20>;
+ interrupt-parent = <&gpio0>;
+ interrupts = <RK_PB1 IRQ_TYPE_LEVEL_LOW>;
+ #clock-cells = <1>;
+ #sound-dai-cells = <0>;
+ clock-output-names = "rk808-clkout1", "rk808-clkout2";
+ clock-names = "mclk";
+ clocks = <&cru MCLK_I2S0_TX_OUT2IO>;
+ assigned-clocks = <&cru MCLK_I2S0_TX_OUT2IO>;
+ assigned-clock-parents = <&cru MCLK_I2S0_TX>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pmic_int_l>;
+ rockchip,system-power-controller;
+ wakeup-source;
+
+ vcc1-supply = <&vcc5v0_sys>;
+ vcc2-supply = <&vcc5v0_sys>;
+ vcc3-supply = <&vcc5v0_sys>;
+ vcc4-supply = <&vcc5v0_sys>;
+ vcc5-supply = <&vcc_buck5>;
+ vcc6-supply = <&vcc_buck5>;
+ vcc7-supply = <&vcc5v0_sys>;
+ vcc8-supply = <&vcc3v3_sys>;
+ vcc9-supply = <&vcc5v0_sys>;
+
+ regulators {
+ vdd_npu_vepu: DCDC_REG1 {
+ regulator-name = "vdd_npu_vepu";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-initial-mode = <0x2>;
+ regulator-min-microvolt = <650000>;
+ regulator-max-microvolt = <950000>;
+ regulator-ramp-delay = <6001>;
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ vdd_arm: DCDC_REG2 {
+ regulator-name = "vdd_arm";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-initial-mode = <0x2>;
+ regulator-min-microvolt = <725000>;
+ regulator-max-microvolt = <1350000>;
+ regulator-ramp-delay = <6001>;
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ vcc_ddr: DCDC_REG3 {
+ regulator-name = "vcc_ddr";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-initial-mode = <0x2>;
+ regulator-state-mem {
+ regulator-on-in-suspend;
+ };
+ };
+
+ vcc3v3_sys: DCDC_REG4 {
+ regulator-name = "vcc3v3_sys";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-initial-mode = <0x2>;
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-state-mem {
+ regulator-on-in-suspend;
+ regulator-suspend-microvolt = <3300000>;
+ };
+ };
+
+ vcc_buck5: DCDC_REG5 {
+ regulator-name = "vcc_buck5";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <2200000>;
+ regulator-max-microvolt = <2200000>;
+ regulator-state-mem {
+ regulator-on-in-suspend;
+ regulator-suspend-microvolt = <2200000>;
+ };
+ };
+
+ vcc_0v8: LDO_REG1 {
+ regulator-name = "vcc_0v8";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <800000>;
+ regulator-max-microvolt = <800000>;
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ vcc1v8_pmu: LDO_REG2 {
+ regulator-name = "vcc1v8_pmu";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-state-mem {
+ regulator-on-in-suspend;
+ regulator-suspend-microvolt = <1800000>;
+ };
+ };
+
+ vdd0v8_pmu: LDO_REG3 {
+ regulator-name = "vcc0v8_pmu";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <800000>;
+ regulator-max-microvolt = <800000>;
+ regulator-state-mem {
+ regulator-on-in-suspend;
+ regulator-suspend-microvolt = <800000>;
+ };
+ };
+
+ vcc_1v8: LDO_REG4 {
+ regulator-name = "vcc_1v8";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-state-mem {
+ regulator-on-in-suspend;
+ regulator-suspend-microvolt = <1800000>;
+ };
+ };
+
+ vcc_dovdd: LDO_REG5 {
+ regulator-name = "vcc_dovdd";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ vcc_dvdd: LDO_REG6 {
+ regulator-name = "vcc_dvdd";
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <1200000>;
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ vcc_avdd: LDO_REG7 {
+ regulator-name = "vcc_avdd";
+ regulator-min-microvolt = <2800000>;
+ regulator-max-microvolt = <2800000>;
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ vccio_sd: LDO_REG8 {
+ regulator-name = "vccio_sd";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ vcc3v3_sd: LDO_REG9 {
+ regulator-name = "vcc3v3_sd";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ vcc_5v0: SWITCH_REG1 {
+ regulator-name = "vcc_5v0";
+ };
+
+ vcc_3v3: SWITCH_REG2 {
+ regulator-name = "vcc_3v3";
+ regulator-always-on;
+ regulator-boot-on;
+ };
+ };
+ };
+};
+
+&pinctrl {
+ pmic {
+ pmic_int_l: pmic-int-l {
+ rockchip,pins = <0 RK_PB1 RK_FUNC_GPIO &pcfg_pull_up>;
+ };
+ };
+};
+
+&pmu_io_domains {
+ pmuio0-supply = <&vcc3v3_sys>;
+ pmuio1-supply = <&vcc3v3_sys>;
+ vccio1-supply = <&vcc_1v8>;
+ vccio2-supply = <&vccio_sd>;
+ vccio3-supply = <&vcc_1v8>;
+ vccio4-supply = <&vcc_3v3>;
+ vccio5-supply = <&vcc_3v3>;
+ vccio6-supply = <&vcc_3v3>;
+ vccio7-supply = <&vcc_1v8>;
+ status = "okay";
+};
+
+&saradc {
+ vref-supply = <&vcc_1v8>;
+ status = "okay";
+};
+
+&wdt {
+ status = "okay";
+};
diff --git a/arch/arm/boot/dts/rockchip/rv1126-alientek-dlrv1126.dts b/arch/arm/boot/dts/rockchip/rv1126-alientek-dlrv1126.dts
new file mode 100644
index 000000000000..c8123a3c4746
--- /dev/null
+++ b/arch/arm/boot/dts/rockchip/rv1126-alientek-dlrv1126.dts
@@ -0,0 +1,258 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Copyright (c) 2026 Yanan He <grumpycat921013@gmail.com>
+ */
+
+/dts-v1/;
+#include <dt-bindings/input/input.h>
+#include "rv1126-alientek-clrv1126f.dtsi"
+
+/ {
+ model = "Alientek ATK-DLRV1126";
+ compatible = "alientek,dlrv1126", "alientek,clrv1126f", "rockchip,rv1126";
+
+ aliases {
+ ethernet0 = &gmac;
+ mmc1 = &sdio;
+ mmc2 = &sdmmc;
+ };
+
+ chosen {
+ stdout-path = "serial2:1500000n8";
+ };
+
+ adc-keys {
+ compatible = "adc-keys";
+ io-channels = <&saradc 0>;
+ io-channel-names = "buttons";
+ keyup-threshold-microvolt = <1800000>;
+ poll-interval = <100>;
+
+ button-esc {
+ label = "esc";
+ linux,code = <KEY_ESC>;
+ press-threshold-microvolt = <0>;
+ };
+
+ button-right {
+ label = "right";
+ linux,code = <KEY_RIGHT>;
+ press-threshold-microvolt = <400781>;
+ };
+
+ button-left {
+ label = "left";
+ linux,code = <KEY_LEFT>;
+ press-threshold-microvolt = <801562>;
+ };
+
+ button-menu {
+ label = "menu";
+ linux,code = <KEY_MENU>;
+ press-threshold-microvolt = <1198828>;
+ };
+ };
+
+ leds {
+ compatible = "gpio-leds";
+
+ led-0 {
+ label = "sys-led";
+ gpios = <&gpio3 RK_PD4 GPIO_ACTIVE_HIGH>;
+ linux,default-trigger = "heartbeat";
+ default-state = "on";
+ };
+
+ led-1 {
+ label = "user-led";
+ gpios = <&gpio3 RK_PD6 GPIO_ACTIVE_HIGH>;
+ linux,default-trigger = "none";
+ default-state = "on";
+ };
+ };
+
+ sound {
+ compatible = "simple-audio-card";
+ simple-audio-card,format = "i2s";
+ simple-audio-card,name = "Analog RK809";
+ simple-audio-card,mclk-fs = <256>;
+ simple-audio-card,widgets =
+ "Speaker", "Speaker",
+ "Headphone", "Headphones",
+ "Microphone", "Mic Jack";
+ simple-audio-card,routing =
+ "Speaker", "SPKO",
+ "Headphones", "HPOL",
+ "Headphones", "HPOR",
+ "MICL", "Mic Jack";
+
+ simple-audio-card,cpu {
+ sound-dai = <&i2s0>;
+ };
+
+ simple-audio-card,codec {
+ sound-dai = <&rk809>;
+ };
+ };
+
+ vcc5v0_sys: regulator-vcc5v0-sys {
+ compatible = "regulator-fixed";
+ regulator-name = "vcc5v0_sys";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <5000000>;
+ regulator-max-microvolt = <5000000>;
+ };
+
+ sdio_pwrseq: pwrseq-sdio {
+ compatible = "mmc-pwrseq-simple";
+ pinctrl-names = "default";
+ pinctrl-0 = <&wifi_enable_h>;
+ reset-gpios = <&gpio0 RK_PA6 GPIO_ACTIVE_LOW>;
+ post-power-on-delay-ms = <200>;
+ power-off-delay-us = <20000>;
+ };
+};
+
+&i2c5 {
+ status = "okay";
+ clock-frequency = <400000>;
+
+ pcf8563: rtc@51 {
+ compatible = "nxp,pcf8563";
+ reg = <0x51>;
+ #clock-cells = <0>;
+ interrupt-parent = <&gpio1>;
+ interrupts = <RK_PD0 IRQ_TYPE_LEVEL_LOW>;
+ clock-output-names = "xin32k";
+ };
+};
+
+&gmac {
+ phy-mode = "rgmii";
+ clock_in_out = "input";
+ assigned-clocks = <&cru CLK_GMAC_SRC>, <&cru CLK_GMAC_TX_RX>,
+ <&cru CLK_GMAC_ETHERNET_OUT>;
+ assigned-clock-parents = <&cru CLK_GMAC_SRC_M1>,
+ <&cru RGMII_MODE_CLK>;
+ assigned-clock-rates = <125000000>, <0>, <25000000>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&rgmiim1_miim &rgmiim1_bus2 &rgmiim1_bus4
+ &clk_out_ethernetm1_pins>;
+ tx_delay = <0x2a>;
+ rx_delay = <0x1a>;
+ phy-handle = <&phy>;
+ status = "okay";
+};
+
+&mdio {
+ phy: ethernet-phy@1 {
+ compatible = "ethernet-phy-ieee802.3-c22";
+ reg = <0x1>;
+ clocks = <&cru CLK_GMAC_ETHERNET_OUT>;
+ pinctrl-names = "default";
+ pinctrl-0 = <ð_phy_rst>;
+ reset-gpios = <&gpio3 RK_PA0 GPIO_ACTIVE_LOW>;
+ reset-assert-us = <20000>;
+ reset-deassert-us = <100000>;
+ };
+};
+
+&pinctrl {
+ ethernet {
+ eth_phy_rst: eth-phy-rst {
+ rockchip,pins = <3 RK_PA0 RK_FUNC_GPIO &pcfg_pull_down>;
+ };
+ };
+
+ bt {
+ bt_enable: bt-enable {
+ rockchip,pins = <0 RK_PA7 RK_FUNC_GPIO &pcfg_pull_none>;
+ };
+
+ bt_wake_dev: bt-wake-dev {
+ rockchip,pins = <1 RK_PD1 RK_FUNC_GPIO &pcfg_pull_none>;
+ };
+
+ bt_wake_host: bt-wake-host {
+ rockchip,pins = <0 RK_PA5 RK_FUNC_GPIO &pcfg_pull_none>;
+ };
+ };
+
+ wifi {
+ wifi_enable_h: wifi-enable-h {
+ rockchip,pins = <0 RK_PA6 RK_FUNC_GPIO &pcfg_pull_none>;
+ };
+ };
+};
+
+&sdio {
+ bus-width = <4>;
+ cap-sdio-irq;
+ keep-power-in-suspend;
+ max-frequency = <25000000>;
+ mmc-pwrseq = <&sdio_pwrseq>;
+ non-removable;
+ pinctrl-names = "default";
+ pinctrl-0 = <&sdmmc1_clk &sdmmc1_cmd &sdmmc1_bus4>;
+ rockchip,default-sample-phase = <90>;
+ vmmc-supply = <&vcc3v3_sd>;
+ vqmmc-supply = <&vcc_1v8>;
+ status = "okay";
+};
+
+&sdmmc {
+ bus-width = <4>;
+ cap-mmc-highspeed;
+ cap-sd-highspeed;
+ card-detect-delay = <200>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&sdmmc0_clk &sdmmc0_cmd &sdmmc0_bus4 &sdmmc0_det>;
+ rockchip,default-sample-phase = <90>;
+ sd-uhs-sdr12;
+ sd-uhs-sdr25;
+ sd-uhs-sdr104;
+ vmmc-supply = <&vcc3v3_sd>;
+ vqmmc-supply = <&vccio_sd>;
+ status = "okay";
+};
+
+&uart0 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&uart0_xfer &uart0_ctsn &uart0_rtsn>;
+ uart-has-rtscts;
+ status = "okay";
+
+ bluetooth {
+ compatible = "brcm,bcm43430a1-bt";
+ shutdown-gpios = <&gpio0 RK_PA7 GPIO_ACTIVE_HIGH>;
+ device-wakeup-gpios = <&gpio1 RK_PD1 GPIO_ACTIVE_HIGH>;
+ clocks = <&rk809 1>;
+ clock-names = "lpo";
+ interrupt-parent = <&gpio0>;
+ interrupts = <RK_PA5 IRQ_TYPE_EDGE_RISING>;
+ interrupt-names = "host-wakeup";
+ max-speed = <115200>;
+ vbat-supply = <&vcc_3v3>;
+ vddio-supply = <&vcc_1v8>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&bt_enable>, <&bt_wake_dev>, <&bt_wake_host>;
+ };
+};
+
+&uart2 {
+ status = "okay";
+};
+
+&i2s0 {
+ rockchip,trcm-sync-tx-only;
+ rockchip,i2s-rx-route = <3 1 2 0>;
+ rockchip,i2s-tx-route = <0 1 2 3>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&i2s0m0_sclk_tx>,
+ <&i2s0m0_mclk>,
+ <&i2s0m0_lrck_tx>,
+ <&i2s0m0_sdo0>,
+ <&i2s0m0_sdo1_sdi3>;
+ status = "okay";
+};
--
2.54.0
^ permalink raw reply related
* Re: [Intel-wired-lan] [PATCH net] igc: Fix RX HW timestamp reporting when NET_RX_BUSY_POLL is disabled
From: Bezdeka, Florian @ 2026-06-24 9:12 UTC (permalink / raw)
To: andrew+netdev@lunn.ch, davem@davemloft.net,
przemyslaw.kitszel@intel.com, aleksandr.loktionov@intel.com,
Ding, Meng, kuba@kernel.org, edumazet@google.com,
anthony.l.nguyen@intel.com, Kiszka, Jan, pabeni@redhat.com
Cc: linux-kernel@vger.kernel.org, intel-wired-lan@lists.osuosl.org,
Wang, Qi, netdev@vger.kernel.org
In-Reply-To: <IA3PR11MB89860AC4A5FBB899A502B2EAE5EF2@IA3PR11MB8986.namprd11.prod.outlook.com>
On Mon, 2026-06-22 at 15:26 +0000, Loktionov, Aleksandr wrote:
> > -----Original Message-----
> > From: Intel-wired-lan <intel-wired-lan-bounces@osuosl.org> On Behalf
> > Of Ding Meng via Intel-wired-lan
> > Sent: Monday, June 22, 2026 6:13 AM
> > To: Nguyen, Anthony L <anthony.l.nguyen@intel.com>; Kitszel,
> > Przemyslaw <przemyslaw.kitszel@intel.com>; andrew+netdev@lunn.ch;
> > davem@davemloft.net; edumazet@google.com; kuba@kernel.org;
> > pabeni@redhat.com; Kiszka, Jan <jan.kiszka@siemens.com>; Bezdeka,
> > Florian <florian.bezdeka@siemens.com>
> > Cc: intel-wired-lan@lists.osuosl.org; linux-kernel@vger.kernel.org;
> > netdev@vger.kernel.org; meng.ding@siemens.com; wq.wang@siemens.com
> > Subject: [Intel-wired-lan] [PATCH net] igc: Fix RX HW timestamp
> > reporting when NET_RX_BUSY_POLL is disabled
> >
> > When CONFIG_NET_RX_BUSY_POLL is deactivated, fetching RX HW timestamps
> > from the NIC no longer works as expected.
> >
> > This occurs because disabling CONFIG_NET_RX_BUSY_POLL disables the SKB
> > NAPI mapping in __skb_mark_napi_id(). Consequently, get_timestamp()
> > fails to perform its driver lookup, and the igc driver's struct
> > net_device_ops::ndo_get_tstamp is never invoked.
> >
> > Instead, get_timestamp() falls back to use shhwtstamps(skb)->hwtstamp,
> > a field that the driver has not populated.
> >
> > Fix this by populating the hwtstamp field with the correct timestamp
> > in the default timer when CONFIG_NET_RX_BUSY_POLL is disabled.
> >
> > Fixes: 069b142f5819 ("igc: Add support for PTP .getcyclesx64()")
> I think, because it's a fix, it needs Cc: stable@vger.kernel.org
Once we hit mainline the stable machinery will pick it up by following
the Fixes: tag. If that fails for some reason we can ping stable
manually.
Florian
>
> Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
>
> > Co-developed-by: Florian Bezdeka <florian.bezdeka@siemens.com>
> > Signed-off-by: Florian Bezdeka <florian.bezdeka@siemens.com>
> > Signed-off-by: Ding Meng <meng.ding@siemens.com>
> > ---
> >
[snip]
^ permalink raw reply
* Re: [Intel-wired-lan] [PATCH net] igc: Fix RX HW timestamp reporting when NET_RX_BUSY_POLL is disabled
From: Florian Bezdeka @ 2026-06-24 9:05 UTC (permalink / raw)
To: Kwapulinski, Piotr, Ding Meng, Nguyen, Anthony L,
Kitszel, Przemyslaw, andrew+netdev@lunn.ch, davem@davemloft.net,
edumazet@google.com, kuba@kernel.org, pabeni@redhat.com,
Kiszka, Jan
Cc: intel-wired-lan@lists.osuosl.org, linux-kernel@vger.kernel.org,
netdev@vger.kernel.org, wq.wang@siemens.com
In-Reply-To: <BL1PR11MB59796B99C5A7709B07000D68F3EE2@BL1PR11MB5979.namprd11.prod.outlook.com>
On Tue, 2026-06-23 at 09:46 +0000, Kwapulinski, Piotr wrote:
> > -----Original Message-----
> > From: Intel-wired-lan <intel-wired-lan-bounces@osuosl.org> On Behalf Of Ding Meng via Intel-wired-lan
> > Sent: Monday, June 22, 2026 6:13 AM
> > To: Nguyen, Anthony L <anthony.l.nguyen@intel.com>; Kitszel, Przemyslaw <przemyslaw.kitszel@intel.com>; andrew+netdev@lunn.ch; davem@davemloft.net; edumazet@google.com; kuba@kernel.org; pabeni@redhat.com; Kiszka, Jan <jan.kiszka@siemens.com>; Bezdeka, Florian <florian.bezdeka@siemens.com>
> > Cc: intel-wired-lan@lists.osuosl.org; linux-kernel@vger.kernel.org; netdev@vger.kernel.org; meng.ding@siemens.com; wq.wang@siemens.com
> > Subject: [Intel-wired-lan] [PATCH net] igc: Fix RX HW timestamp reporting when NET_RX_BUSY_POLL is disabled
> >
> > When CONFIG_NET_RX_BUSY_POLL is deactivated, fetching RX HW timestamps from the NIC no longer works as expected.
> >
> > This occurs because disabling CONFIG_NET_RX_BUSY_POLL disables the SKB NAPI mapping in __skb_mark_napi_id(). Consequently, get_timestamp() fails to perform its driver lookup, and the igc driver's struct net_device_ops::ndo_get_tstamp is never invoked.
> >
> > Instead, get_timestamp() falls back to use shhwtstamps(skb)->hwtstamp, a field that the driver has not populated.
> >
> > Fix this by populating the hwtstamp field with the correct timestamp in the default timer when CONFIG_NET_RX_BUSY_POLL is disabled.
> >
> > Fixes: 069b142f5819 ("igc: Add support for PTP .getcyclesx64()")
> > Co-developed-by: Florian Bezdeka <florian.bezdeka@siemens.com>
> > Signed-off-by: Florian Bezdeka <florian.bezdeka@siemens.com>
> > Signed-off-by: Ding Meng <meng.ding@siemens.com>
> > ---
> > drivers/net/ethernet/intel/igc/igc_main.c | 38 ++++++++++++++++-------
> > 1 file changed, 26 insertions(+), 12 deletions(-)
> >
> > diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
> > index 8ac16808023..1da8d7aa76d 100644
> > --- a/drivers/net/ethernet/intel/igc/igc_main.c
> > +++ b/drivers/net/ethernet/intel/igc/igc_main.c
> > @@ -1992,7 +1992,26 @@ static struct sk_buff *igc_build_skb(struct igc_ring *rx_ring,
> > return skb;
> > }
> >
> > -static struct sk_buff *igc_construct_skb(struct igc_ring *rx_ring,
> > +static void igc_construct_skb_timestamps(struct igc_adapter *adapter,
> > + struct sk_buff *skb,
> > + struct igc_xdp_buff *ctx)
> > +{
> > + if (!ctx->rx_ts)
> > + return;
> > +#ifdef CONFIG_NET_RX_BUSY_POLL
> > + skb_shinfo(skb)->tx_flags |= SKBTX_HW_TSTAMP_NETDEV;
> > + skb_hwtstamps(skb)->netdev_data = ctx->rx_ts; #else
> > + struct igc_inline_rx_tstamps *tstamps;
> Please move at the top of the function and add:
That would trigger a "unused variable" warning in the
CONFIG_NET_RX_BUSY_POLL case.
Btw: I was really confused that the #else statement moved to the end of
the previous line. Might someone be using a wrongly configured mail
client here?
Florian
> Reviewed-by: Piotr Kwapulinski <piotr.kwapulinski@intel.com
>
> > +
> > + tstamps = ctx->rx_ts;
> > + skb_hwtstamps(skb)->hwtstamp = igc_ptp_rx_pktstamp(adapter,
> > + tstamps->timer0);
> > +#endif
> > +}
> > +
[snip]
^ permalink raw reply
* Re: [PATCH 1/2] bug: Provide WARN_ON.*DEFERRED() macros for console deferred output
From: Petr Mladek @ 2026-06-24 9:17 UTC (permalink / raw)
To: Sebastian Andrzej Siewior
Cc: K Prateek Nayak, linux-arch, linux-kernel, sched-ext, netdev,
David S . Miller, Andrea Righi, Andrew Morton, Arnd Bergmann,
Ben Segall, Breno Leitao, Changwoo Min, David Vernet,
Dietmar Eggemann, Eric Dumazet, Ingo Molnar, Jakub Kicinski,
John Ogness, Juri Lelli, Paolo Abeni, Peter Zijlstra,
Sergey Senozhatsky, Simon Horman, Steven Rostedt, Tejun Heo,
Vincent Guittot, Vlad Poenaru
In-Reply-To: <20260624062642.5DER6vrP@linutronix.de>
On Wed 2026-06-24 08:26:42, Sebastian Andrzej Siewior wrote:
> On 2026-06-23 20:24:02 [+0530], K Prateek Nayak wrote:
> > Hello Sebastian,
> Hi Prateek,
>
> > nit.
> >
> > Instead of replicating these bits, can we replace that return with a
> > "goto out" ...
>
> sure
>
> …
> > ... and replace this return with a:
> >
> > return (warning) ? BUG_TRAP_TYPE_WARN : BUG_TRAP_TYPE_BUG;
> >
> > Looks a tab bit cleaner to my eyes. Thoughts?
>
> It sure does.
> I wait for PeterZ' executive order to either do this and sprinkle sched/
> _or_ make legacy consoles deferred as it is done on RT.
>
> Petr, was there a big push back doing it unconditionally?
For Linus, it was a no-go, definitely.
The problem are situations where the system gets stuck and panic()
is not called. This is why nbcon consoles switch to the atomic
mode in some emergency situations, see nbcon_cpu_emergency_enter(),
for example, into __warn(), oops_enter(), rcu stall, and lockdep
calls.
Moving legacy consoles to a kthread would prevent stall in situations
where printk() is called from the scheduler code. But it would cause
that some other stalls become silent.
In my opinion, we should not move the legacy consoles to a kthread
by default. I believe that the rest of the kernel is a bigger
source of possible stalls than the scheduler. So, the overall
experience will be better if we keep the status quo.
I would vote for adding the WARN_*DEFERRED() into the scheduler code
at least until majority of console drivers are converted to nbcon API.
Best Regards,
Petr
^ permalink raw reply
* RE: [External Mail] [PATCH v2 4/7] net: wwan: t9xx: Add control port
From: Wu. JackBB (GSM) @ 2026-06-24 9:19 UTC (permalink / raw)
To: Loic Poulain, Sergey Ryazanov, Johannes Berg, Andrew Lunn,
David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
Wen-Zhi Huang, Shi-Wei Yeh, Minano Tseng, Matthias Brugger,
AngeloGioacchino Del Regno, Simon Horman, Jonathan Corbet,
Shuah Khan, Wu. JackBB (GSM)
Cc: linux-kernel@vger.kernel.org, netdev@vger.kernel.org,
linux-arm-kernel@lists.infradead.org,
linux-mediatek@lists.infradead.org, linux-doc@vger.kernel.org
In-Reply-To: <20260610-t9xx_driver_v1-v2-4-c65addf23b3f@compal.com>
Hi Jakub,
Addressing sashiko AI code review comments for this patch, as
requested by you in the patch 3/7 review:
https://patchwork.kernel.org/project/netdevbpf/patch/20260610-t9xx_driver_v1-v2-3-c65addf23b3f@compal.com/#27006088
Q1: Does this unconditionally free the internal port memory? If an internal
user or an active TRB holds a reference to the port during device teardown,
it seems this bypasses kref_put and directly calls the release function.
This could lead to a use-after-free when they attempt to access the port
or drop their reference.
Internal ports are exclusively used by the FSM layer. During
teardown, mtk_fsm_ctrl_ch_stop calls mtk_port_internal_close,
which calls kref_put to release the user's reference before
mtk_port_free_or_backup is reached. By the time
mtk_port_free_or_backup runs, the kref count is 1 (the initial
allocation reference only). Calling mtk_port_release directly at
this point is equivalent to kref_put decrementing from 1 to 0.
Internal ports do not use the stale list backup mechanism, so
bypassing kref_put avoids the unnecessary stale list check in the
kref_put path.
Q2: Is it safe to traverse the port_tbl radix tree without holding
rcu_read_lock or the port_mngr_grp_mtx mutex? Concurrent port deletions
could free radix tree nodes or mtk_port structures during traversal,
potentially leading to a use-after-free when the un-refcounted pointers
are dereferenced.
mtk_port_search_by_name is only called from mtk_port_internal_open,
which is exclusively invoked by the FSM kthread during
mtk_fsm_ctrl_ch_start. Port creation and destruction are also
driven by FSM state transitions on the same kthread. Since the FSM
serializes these operations, no concurrent port removal can occur
while search_by_name is iterating the radix tree.
Q3: If this array allocation fails during teardown, does the function skip
deleting items from the radix tree and freeing the dynamically allocated
mtk_port objects? This appears to bypass the teardown phase entirely and
leak memory.
Valid. Fixed in v3 by replacing kcalloc + radix_tree_gang_lookup
with radix_tree_for_each_slot() and single-entry gang_lookup,
eliminating the allocation in the teardown path.
Q4: Should we increment the port's reference count when assigning it to
trb->priv? If a writer thread is interrupted by a signal while waiting for
hardware completion, it returns and drops the port. If the port is then
closed and freed, the asynchronous callback mtk_port_tx_complete will
dereference the freed port, resulting in a use-after-free.
The TRB lifecycle is protected by kref on the TRB itself (kref_get
before submit, kref_put in both the caller and the completion
callback). When a blocking operation times out, the caller releases
its kref reference, but the TRB remains alive in the transaction
layer until the completion callback fires. The port is not freed at
this point — port destruction goes through kref_put(&port->kref,
mtk_port_release), and the port's reference count is not decremented
until the close path completes. Additionally, TRB_CMD_DISABLE
triggers mtk_ctrl_ch_flush in the transaction layer, which forces
all pending TRBs to complete before the port teardown proceeds.
Q5: Can concurrent packet transmissions on the same port race here? There
does not appear to be any synchronization protecting the read and increment
of port->tx_seq.
tx_seq is a per-port 8-bit sequence number. Each port has a single
writer in practice: external ports are written from user space via
cdev write() (serialized by the file descriptor), and internal
ports are written exclusively by the FSM kthread. Concurrent
transmissions on the same port do not occur in normal operation.
Q6: Does this safely handle fragmented SKBs? Without using pskb_may_pull
or similar, the header might span across multiple DMA fragments. If
skb->data does not contain the entire 16-byte header, this cast could
result in reading out-of-bounds memory.
The RX SKBs are allocated internally by the CLDMA driver using
__dev_alloc_skb with linear buffers. The CCCI header always resides
in the contiguous linear data region. mtk_port_strip_header already
validates skb->len >= sizeof(*ccci_h) before accessing the header.
Since these SKBs are not received from the network stack and are
always linear, pskb_may_pull is not needed here.
Q7: Is there a bounds check to ensure that the packet buffer is large
enough to hold the number of ports specified by msg->port_cnt? If the
modem provides an arbitrarily large count, this loop could read past the
end of the packet data.
The message is already validated by head_pattern, tail_pattern, and
version checks before the loop. The port enumeration protocol
between host and modem guarantees that the buffer contains exactly
port_cnt entries. The modem firmware is a trusted source — this is
a control plane interface, not an untrusted network protocol.
Q8: If a signal is pending, wait_event_interruptible_timeout will return
-ERESTARTSYS immediately. Jumping back to start_wait without returning to
userspace to handle the signal creates an infinite tight loop that will
stall the CPU at 100% until the hardware completes.
This is intentional. Channel enable/disable are control plane
operations that must complete or timeout — aborting mid-operation
due to a signal would leave the channel in an inconsistent state.
The loop does not spin: each iteration sleeps in
wait_event_interruptible_timeout until either the signal re-fires,
the condition is met, or the timeout expires. The timeout
(MTK_DFLT_TRB_TIMEOUT) guarantees bounded execution.
Q9: Is there a race condition between checking PORT_S_OPEN and setting it?
If two threads concurrently open the same port, they might both read the
bit as clear and proceed with initialization. Could test_and_set_bit be
used?
All callers of mtk_port_common_open acquire port_mngr_grp_mtx via
mtk_port_get_locked before calling this function, which serializes
concurrent open attempts on the same port. The test_bit/set_bit
sequence is protected by the mutex.
Q10: If mtk_port_common_open fails, the port reference is dropped via
mtk_port_put_locked, but the pointer itself is not set to NULL before
jumping to out. This causes the function to return an un-refcounted,
invalid port pointer.
Valid. Fixed in v3 by adding port = NULL after mtk_port_put_locked()
in the error path.
Q11: If mtk_cldma_submit_tx fails with a fatal error (like -EINVAL), the
error path breaks out of the switch without unlinking the SKB from
skb_list and without signaling completion. Will the next processing loop
retrieve the exact same failing SKB with skb_peek and infinitely repeat
the failure?
Valid. Fixed in v3 by restoring skb_unlink + trb_complete for
non-EAGAIN errors in the TX path.
Thanks.
================================================================================================================================================================
This message may contain information which is private, privileged or confidential of Compal Electronics, Inc. If you are not the intended recipient of this message, please notify the sender and destroy/delete the message. Any review, retransmission, dissemination or other use of, or taking of any action in reliance upon this information, by persons or entities other than the intended recipient is prohibited.
================================================================================================================================================================
^ permalink raw reply
* RE: [External Mail] [PATCH v2 5/7] net: wwan: t9xx: Add FSM thread
From: Wu. JackBB (GSM) @ 2026-06-24 9:23 UTC (permalink / raw)
To: Loic Poulain, Sergey Ryazanov, Johannes Berg, Andrew Lunn,
David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
Wen-Zhi Huang, Shi-Wei Yeh, Minano Tseng, Matthias Brugger,
AngeloGioacchino Del Regno, Simon Horman, Jonathan Corbet,
Shuah Khan, Wu. JackBB (GSM)
Cc: linux-kernel@vger.kernel.org, netdev@vger.kernel.org,
linux-arm-kernel@lists.infradead.org,
linux-mediatek@lists.infradead.org, linux-doc@vger.kernel.org
In-Reply-To: <20260610-t9xx_driver_v1-v2-5-c65addf23b3f@compal.com>
Hi Jakub,
Addressing sashiko AI code review comments for this patch, as
requested by you in the patch 3/7 review:
https://patchwork.kernel.org/project/netdevbpf/patch/20260610-t9xx_driver_v1-v2-3-c65addf23b3f@compal.com/#27006088
Q1: Will this compile on standard compilers? The container_of macro relies
on offsetof, which requires an integer constant expression. Evaluating
hs_info->id dynamically at runtime will cause a compilation error.
This compiles correctly. The Linux kernel's container_of uses
__builtin_offsetof (GCC extension), which supports variable array
indices. GCC and Clang both generate runtime offset calculation
code for container_of(ptr, type, member[variable_index]). This
pattern is used in multiple places in the kernel.
Q2: Could this result in a NULL pointer dereference? If the device sends
multiple CTRL_MSG_HS2 messages, the second message overwrites the skb
pointer. When the first event is processed, it frees the skb and sets
hs_info->rt_data to NULL, causing a deref on the second event.
This cannot happen in practice. The handshake follows a strict
HS1 -> HS2 -> HS3 sequence. The MHCCIF channel interrupt is masked
in mtk_fsm_hs1_handler() before the FSM event is submitted,
preventing duplicate HS2 notifications. Only one HS2 message is
expected per handshake cycle.
Q3: Can this read past the end of the packet? We pass rtft_entry->data to
the action callback before checking if rtft_entry->data_len fits within
the remaining packet length.
The bounds check at the loop start ensures the rtft_entry struct
header is within bounds. The action functions only read fixed-size
fields (e.g., a single __le32 for packet padding mode). The modem
firmware is a trusted source — the feature query protocol uses
head_pattern/tail_pattern integrity checks, and the message format
is guaranteed by the modem firmware.
Q4: Could this result in unaligned memory accesses on the next loop
iteration? Since data_len is not verified to be a multiple of 4, the
next rtft_entry could be misaligned.
On x86 (the target platform for this PCIe WWAN modem), unaligned
memory access is handled natively without traps. The feature query
entries from the modem firmware are always naturally aligned in
practice (data_len is always a multiple of 4 for the defined
feature types).
Q5: Does this code ensure the packet is long enough before parsing the
feature query? It casts and reads ft_query fields without verifying if
hs_info->rt_data_len is large enough to hold a feature_query structure.
The SKB is received from the CLDMA RX path with validated length.
After skb_pull, the remaining data is the feature query payload.
The head/tail pattern check immediately following this cast
validates the message integrity. A truncated message would fail the
pattern check and take the error path.
Q6: Is there a risk of accessing uninitialized memory here? If the skb is
fragmented, checking skb->len is not sufficient. We should use
pskb_may_pull() before casting and accessing skb->data.
The RX SKBs are allocated internally by the CLDMA driver using
__dev_alloc_skb with linear buffers. The CCCI header and control
message data always reside in the contiguous linear data region.
These SKBs are not received from the network stack and are always
linear, so pskb_may_pull is not needed.
Q7: Does this introduce a leak if the device sends multiple messages
rapidly? Overwriting hs_info->rt_data discards the previous skb pointer
without freeing it.
Same as Q2. The handshake is a strict HS1 -> HS2 -> HS3 sequence
with the MHCCIF channel masked after HS1. Only one HS2 message
arrives per handshake cycle. No overwrite can occur.
Q8: Does this permanently hang the device initialization? The interrupt is
masked and cleared at the beginning. If the device stage is not
DEV_STAGE_IDLE, we return without unmasking.
The early_bootup_handler is registered for the BOOT_FLOW_SYNC
MHCCIF event. The MTK modem firmware sends this event once, only
when the modem reaches IDLE stage. The non-IDLE code path handles
unexpected states defensively — masking prevents spurious interrupts
from an error state. The normal boot flow always reaches IDLE
before sending BOOT_FLOW_SYNC.
Q9: Can concurrent modifications corrupt the list traversal? External
modules can call mtk_fsm_notifier_register() to modify these lists
concurrently with FSM thread iteration.
The notifier registration and unregistration are only called during
driver probe and remove paths, which are serialized by the driver
model. The FSM kthread does not start processing events until probe
completes, so there is no concurrent access to the notifier lists
between registration and traversal.
Q10: These lists are modified without any locking mechanism, meaning
concurrent registrations or iterations could crash the kernel.
Same as Q9. mtk_fsm_notifier_register/unregister are only called
from probe/remove paths, serialized by the driver model.
Q11: Is it safe to guess the caller's context like this? The checks do not
catch contexts where preemption is disabled by a standard spin_lock().
The heuristic is sufficient for all current callers.
mtk_fsm_evt_submit is called from FSM kthread, MHCCIF workqueue
callback, probe/remove paths, and CLDMA workqueue callbacks — all
process context, none holding spinlocks. The in_hardirq/in_softirq/
irqs_disabled checks cover the actual calling contexts.
Q12: Could this lead to a NULL pointer dereference? wake_up_process() is
performed outside the lock. If mtk_fsm_exit() stops the thread and sets
fsm->fsm_handler to NULL concurrently.
This race cannot occur. mtk_fsm_exit() is called during the device
remove sequence, which first submits a FSM_EVT_DEV_RM event. The
FSM kthread processes this event via mtk_fsm_dev_rm_act(), which
sets EVT_TF_GATECLOSED under evtq_lock. Only after the kthread
finishes does mtk_fsm_exit() proceed to call kthread_stop() and
set fsm_handler to NULL. The EVT_TF_GATECLOSED check inside the
spinlock provides ordering guarantee.
Q13: Can this loop endlessly if a signal is continuously delivered? Since
this resets the timer to the full MTK_DFLT_TRB_TIMEOUT every time, a
stream of signals might trap the task in an infinite loop.
This is intentional. Channel enable/disable are control plane
operations that must complete or timeout. Each iteration sleeps in
wait_event_interruptible_timeout — not a busy loop. The timeout
guarantees bounded per-iteration execution. The total wait time may
exceed a single timeout with continuous signals, but the operation
eventually completes or times out.
Q14: Does this code silently ignore interrupt registration failures? The
return value of mtk_pci_register_irq() is not checked, and the cleanup
operations below the return statement are unreachable dead code.
Valid. Fixed in v3 by checking the return value and adding an
err_destroy_wq error label. Also propagated specific error codes
from all error paths.
Q15: Does this leak the head SKB when operating in scatter-gather mode? If
rxq->nr_bds > 0, the frag_list is detached but the head SKB itself is
never freed because dev_kfree_skb_any(req->skb) is inside the else block.
Valid. Fixed in v3 by adding dev_kfree_skb_any(req->skb) after
detaching frag_list in SG mode.
Thanks.
================================================================================================================================================================
This message may contain information which is private, privileged or confidential of Compal Electronics, Inc. If you are not the intended recipient of this message, please notify the sender and destroy/delete the message. Any review, retransmission, dissemination or other use of, or taking of any action in reliance upon this information, by persons or entities other than the intended recipient is prohibited.
================================================================================================================================================================
^ permalink raw reply
* RE: [External Mail] [PATCH v2 6/7] net: wwan: t9xx: Add AT & MBIM WWAN ports
From: Wu. JackBB (GSM) @ 2026-06-24 9:24 UTC (permalink / raw)
To: Loic Poulain, Sergey Ryazanov, Johannes Berg, Andrew Lunn,
David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
Wen-Zhi Huang, Shi-Wei Yeh, Minano Tseng, Matthias Brugger,
AngeloGioacchino Del Regno, Simon Horman, Jonathan Corbet,
Shuah Khan, Wu. JackBB (GSM)
Cc: linux-kernel@vger.kernel.org, netdev@vger.kernel.org,
linux-arm-kernel@lists.infradead.org,
linux-mediatek@lists.infradead.org, linux-doc@vger.kernel.org
In-Reply-To: <20260610-t9xx_driver_v1-v2-6-c65addf23b3f@compal.com>
Hi Jakub,
Addressing sashiko AI code review comments for this patch, as
requested by you in the patch 3/7 review:
https://patchwork.kernel.org/project/netdevbpf/patch/20260610-t9xx_driver_v1-v2-3-c65addf23b3f@compal.com/#27006088
Q1: Is this mutex ever acquired? It is initialized during port setup, but
it does not appear to be used to serialize operations in
mtk_port_common_write() or when sending data.
Valid. Fixed in v3 by removing the unused write_lock mutex from
struct mtk_port and its mutex_init call.
Q2: Will concurrent writes safely execute here without holding write_lock?
Could this lack of serialization lead to sequence number corruption?
The WWAN core framework holds port->ops_lock (a mutex) around the
tx/tx_blocking callback invocations, serializing write operations
on the same WWAN port. For internal ports, writes are exclusively
performed by the FSM kthread, which is single-threaded. Concurrent
writes to the same port do not occur, and port->tx_seq is always
modified by a single thread.
Q3: What happens if a blocking write is interrupted by a port teardown?
If PORT_S_WR is cleared, trb->status remains at MTK_DFLT_TRB_STATUS (1).
ret = (!trb->status) ? len : trb->status evaluates to 1, causing an
incorrect byte count to be returned.
This only occurs during port teardown (PORT_S_WR cleared by
mtk_port_common_close or disable). At this point the port is being
shut down and the return value is largely irrelevant — the caller
cannot meaningfully use the port afterward. The submitted data may
or may not have been transmitted by DMA, depending on timing. This
is a teardown-only scenario with no practical impact on data
integrity.
Q4: Is it safe to mutate the flags directly here? This is a non-atomic
read-modify-write on the shared port structure.
The WWAN core holds port->ops_lock (a mutex) around tx/tx_blocking
callbacks, serializing flag modifications. mtk_port_wwan_write()
and mtk_port_wwan_write_blocking() are never called concurrently
for the same port.
Q5: Does this silently drop data on partial writes? If
mtk_port_common_write() returns a positive value (partial success),
consume_skb is called unconditionally and 0 (success) is returned.
This is a design limitation of the WWAN port API: wwan_port_op_tx
returns 0 for success or negative for error — there is no mechanism
to report partial writes back to the WWAN core. The already-submitted
fragments cannot be recalled from the DMA engine. For the AT/MBIM
control ports in this driver, messages are small (typically under
1KB, within a single MTU). The multi-fragment path is rarely
exercised for control plane traffic.
Q6: As with mtk_port_wwan_write(), mutating the shared blocking flag
without atomics could race, and ignoring positive return values could
lead to silent data loss.
Same as Q4 (flags serialized by WWAN core ops_lock) and Q5
(partial write is a WWAN API limitation, rare for control messages).
Q7: Is there a race condition here if wwan_create_port() fails? The return
value is directly assigned to w_port without checking IS_ERR() first.
Could concurrent RX pass the error pointer to wwan_port_rx()?
No race. mtk_port_wwan_enable() is called from the FSM thread
during the handshake sequence, before the port starts receiving
data. The CLDMA RX queue for this port has not been opened at this
point — RX data only arrives after the modem completes its
handshake. The RX path cannot observe the error pointer because no
data arrives until after the port is fully enabled.
Q8: Is the WWAN port exposed to userspace before its state is fully
initialized? wwan_create_port() registers the character device and
triggers a uevent. If userspace opens immediately, PORT_S_ENABLE is
not set yet so open returns -ENODEV.
The window between wwan_create_port() returning and
set_bit(PORT_S_ENABLE) is a few instructions (nanoseconds). If
userspace opens in that window, the open returns -ENODEV and the
application retries. In practice, user space WWAN managers (e.g.,
ModemManager) wait for udev events to settle before opening ports.
Reordering to set PORT_S_ENABLE before wwan_create_port is not
correct either — the port should not be marked enabled before the
WWAN port object exists.
Thanks.
================================================================================================================================================================
This message may contain information which is private, privileged or confidential of Compal Electronics, Inc. If you are not the intended recipient of this message, please notify the sender and destroy/delete the message. Any review, retransmission, dissemination or other use of, or taking of any action in reliance upon this information, by persons or entities other than the intended recipient is prohibited.
================================================================================================================================================================
^ permalink raw reply
* Re: Please apply 736b380e28d0 and eca856950f7c down to 6.1.y
From: Wongi Lee @ 2026-06-24 9:30 UTC (permalink / raw)
To: Greg Kroah-Hartman
Cc: stable, Sasha Levin, netdev, David Ahern, Ido Schimmel,
David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
Simon Horman, Jungwoo Lee
In-Reply-To: <2026062417-conceal-driving-0ebd@gregkh>
On Wed, Jun 24, 2026 at 11:00:45AM +0200, Greg Kroah-Hartman wrote:
> On Wed, Jun 24, 2026 at 05:14:38PM +0900, Wongi Lee wrote:
> > Hi,
> >
> > Could the following upstream commits be queued for the active stable
> > trees?
> >
> > commit 736b380e28d0480c7bc3e022f1950f31fe53a7c5
> > ("ipv6: account for fraggap on the paged allocation path")
>
> I do not see that commit id in Linus's tree, are you sure it is correct?
>
> > commit eca856950f7cb1a221e02b99d758409f2c5cec42
> > ("ipv4: account for fraggap on the paged allocation path")
>
> Same here, no id of that one in Linus's tree that I can see.
>
> thanks,
>
> greg k-h
Hi Greg,
First, sorry for confusing you.
The commit IDs are from netdev/net.git:
736b380e28d0480c7bc3e022f1950f31fe53a7c5
https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git/commit/?id=736b380e28d0
eca856950f7cb1a221e02b99d758409f2c5cec42
https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git/commit/?id=eca856950f7c
They were applied to netdev without Cc: stable@vger.kernel.org, so I
wanted to flag them for stable handling but I send it too fast (before
merge).
I will resend the request with the Linus tree commit ID.
Thanks,
Wongi
^ permalink raw reply
* RE: [External Mail] [PATCH v2 1/7] net: wwan: t9xx: Add PCIe core
From: Wu. JackBB (GSM) @ 2026-06-24 9:15 UTC (permalink / raw)
To: Loic Poulain, Sergey Ryazanov, Johannes Berg, Andrew Lunn,
David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
Wen-Zhi Huang, Shi-Wei Yeh, Minano Tseng, Matthias Brugger,
AngeloGioacchino Del Regno, Simon Horman, Jonathan Corbet,
Shuah Khan, Wu. JackBB (GSM)
Cc: linux-kernel@vger.kernel.org, netdev@vger.kernel.org,
linux-arm-kernel@lists.infradead.org,
linux-mediatek@lists.infradead.org, linux-doc@vger.kernel.org
In-Reply-To: <20260610-t9xx_driver_v1-v2-1-c65addf23b3f@compal.com>
Hi Jakub,
Addressing sashiko AI code review comments for this patch, as
requested by you in the patch 3/7 review:
https://patchwork.kernel.org/project/netdevbpf/patch/20260610-t9xx_driver_v1-v2-3-c65addf23b3f@compal.com/#27006088
Q1: Does this code perform an incorrect double byte-swap on big-endian
architectures? The hardware bits are manually swapped using cpu_to_le32()
and cast back to u32. This value is later passed to mtk_pci_write32(),
which utilizes iowrite32(). Since iowrite32() internally handles
host-to-little-endian conversion, swapping the value beforehand will cause
a double swap on big-endian platforms.
This driver targets MediaTek T9xx PCIe WWAN modems, which exist
exclusively on x86 platforms (little-endian). On little-endian,
cpu_to_le32() is a no-op and LE32_TO_U32() is a simple cast, so
no byte-swap occurs. The hardware register layout assumes LE host
ordering. While the pattern is technically incorrect for big-endian,
this hardware is not available on BE platforms and the Kconfig
dependency (depends on PCI) combined with the device's PCIe-only
nature effectively restricts this to x86/ARM64-LE.
Q2: Does this call to ffs() yield the wrong channel index on big-endian
systems? hw_bits has already been endian-swapped in
mtk_pci_ext_h2d_evt_hw_bits(). Using ffs() on an endian-swapped value
produces a completely incorrect bit index.
Same as Q1 — on little-endian platforms, no swap occurs, so ffs()
operates on the correct value. Additionally, all callers of
mtk_pci_send_ext_evt() pass known-valid channel values through the
is_power_of_2(ch) check, and the SET_HW_BITS mapping covers all
valid channels.
Q3: Does clearing the top-level interrupt status at the end of the handler
without a subsequent read loop lead to permanently lost hardware events?
If a new hardware event triggers during the worker's execution, clearing
the write-1-to-clear (W1C) interrupt status after the hardware event
statuses were read at the start will discard the newly asserted events.
No events are lost. mtk_pci_clear_irq() clears the MSI-X interrupt
status register (BIT(irq_id) in REG_MSIX_ISTATUS_HOST_GRP0_0), NOT
the MHCCIF event registers. The MHCCIF events are level-triggered
from the modem's EP2RC registers — the modem sets event bits and
they remain set until the modem clears them or the host acknowledges.
The flow is:
1. Hardware: modem sets EP2RC event bits -> MSI-X interrupt fires
2. ISR: masks MSI-X bit, schedules work
3. Worker: reads EP2RC event status, dispatches callbacks
4. Worker: clears MSI-X status (BIT(irq_id)), unmasks MSI-X
If a new MHCCIF event arrives during step 3, the EP2RC register
gets the new bit set. The MSI-X clear in step 4 only clears the
MSI-X pending bit, not the MHCCIF source. After unmask, the MHCCIF
event source re-asserts the MSI-X interrupt because the EP2RC bits
are still set, triggering a new ISR -> new work -> new read of the
updated EP2RC register.
Q4: Does this code incorrectly attempt to unmap a bitmask instead of a
single BAR index? MTK_REQUESTED_BARS is defined as a bitmask, but
pcim_iounmap_region() expects a specific BAR index. Furthermore, is
this manual unmapping redundant since the memory was mapped with
pcim_iomap_regions() and will automatically be cleaned up by devres?
Valid on both points. Fixed in v3 by removing mtk_pci_bar_exit()
entirely. pcim_iomap_regions() is devres-managed so manual unmap
is redundant.
Q5: Does this logic mistakenly drop hardware interrupts when fewer than 32
MSI-X vectors are allocated? If fewer than 32 vectors are allocated
(for example, irq_cnt = 1), GENMASK(0, 0) evaluates to BIT(0). A
hardware interrupt source like MHCCIF (bit 28) will result in
BIT(28) & BIT(0) == 0. Could this lead to a complete loss of critical
device events?
No issue. This is the MSI-X merged mode design. When fewer vectors
are allocated than interrupt sources, mtk_pci_set_msix_merged()
configures the hardware to merge multiple interrupt sources into
the available vectors. With irq_cnt=1, the hardware maps ALL
interrupt sources to vector 0. The GENMASK filters the irq_state
to only the allocated vector bits, then checks if the current
handler's vector bit is set. This correctly handles both full
allocation (32 vectors, 1:1 mapping) and merged mode (fewer
vectors, N:1 mapping).
Q6: Will this manual pci_disable_device() call trigger an atomic underflow?
Since the device was initialized using pcim_enable_device(), devres will
automatically disable the device during cleanup. Manually calling
pci_disable_device() drops the enable_cnt to zero. When devres executes
its cleanup, it will call pci_disable_device() again, leading to an
underflow and a kernel warning.
Valid. Fixed in v3 by removing pci_disable_device() from both the
probe error path and mtk_pci_remove(). pcim_enable_device() handles
cleanup via devres.
Q7: Does performing a PCIe Link Down Reset (PLDR) before freeing IRQs cause
PCIe Unsupported Request (UR) faults? mtk_pci_pldr() power-cycles the
device and clears its BARs. Directly after this, mtk_pci_free_irq() is
called, which attempts to access the device's MSI-X MMIO table.
No UR faults. mtk_pci_pldr() calls ACPI PXP._OFF then PXP._ON.
The _ON call restores power to the device, bringing the PCIe link
back up with BARs accessible. pci_free_irq_vectors() writes to PCI
config space (MSI-X capability enable bit) which is always
accessible while the device is on the bus. The MSI-X table MMIO
access for masking individual entries goes through the
re-established BAR mapping.
Q8: Will this manual pci_disable_device() call in mtk_pci_remove() trigger
an atomic underflow for the same reason as in mtk_pci_probe()?
Is a call to mtk_pci_dev_exit() missing from the remove path?
Two issues, both valid and fixed in v3:
1. pci_disable_device() underflow: same as Q6, removed.
2. Missing mtk_pci_dev_exit(): added in patch 3/7 v3 (where
mtk_pci_dev_init is introduced).
Thanks.
================================================================================================================================================================
This message may contain information which is private, privileged or confidential of Compal Electronics, Inc. If you are not the intended recipient of this message, please notify the sender and destroy/delete the message. Any review, retransmission, dissemination or other use of, or taking of any action in reliance upon this information, by persons or entities other than the intended recipient is prohibited.
================================================================================================================================================================
^ permalink raw reply
* Re: [PATCH 1/2] bug: Provide WARN_ON.*DEFERRED() macros for console deferred output
From: Peter Zijlstra @ 2026-06-24 9:31 UTC (permalink / raw)
To: Sebastian Andrzej Siewior
Cc: linux-arch, linux-kernel, sched-ext, netdev, David S . Miller,
Andrea Righi, Andrew Morton, Arnd Bergmann, Ben Segall,
Breno Leitao, Changwoo Min, David Vernet, Dietmar Eggemann,
Eric Dumazet, Ingo Molnar, Jakub Kicinski, John Ogness,
Juri Lelli, K Prateek Nayak, Paolo Abeni, Petr Mladek,
Sergey Senozhatsky, Simon Horman, Steven Rostedt, Tejun Heo,
Vincent Guittot, Vlad Poenaru
In-Reply-To: <20260623142650.265721-2-bigeasy@linutronix.de>
On Tue, Jun 23, 2026 at 04:26:49PM +0200, Sebastian Andrzej Siewior wrote:
> +#ifndef WARN_ON_DEFERRED
> +#define WARN_ON_DEFERRED(condition) ({ \
> + int __ret_warn_on = !!(condition); \
> + if (unlikely(__ret_warn_on)) { \
> + guard(preempt)(); \
> + printk_deferred_enter() \
> + __WARN(); \
> + printk_deferred_exit() \
> + } \
> + unlikely(__ret_warn_on); \
> +})
> +#endif
This will generate atrocious shite at the WARN sites.
^ permalink raw reply
* Re: [PATCH v3] net: mvneta: re-enable percpu interrupt on resume
From: Sebastian Andrzej Siewior @ 2026-06-24 9:32 UTC (permalink / raw)
To: Zhou, Yun
Cc: marcin.s.wojtas, andrew+netdev, davem, edumazet, kuba, pabeni,
maxime.chevallier, netdev, linux-kernel
In-Reply-To: <52e2dc9b-45ce-4779-8ca3-b4e022380db6@windriver.com>
On 2026-06-19 09:15:05 [+0800], Zhou, Yun wrote:
> This is a hardware constraint, not a software design choice.
>
…
> This means request_percpu_irq() is the only valid registration
> method — a plain request_irq() would fail with -EINVAL because the
> irq descriptor requires IRQ_PER_CPU_DEVID semantics.
…
> The multi-interrupt approach (like MSI-X NICs) would require the
> hardware to provide multiple distinct interrupt lines per port,
> which this SoC does not have.
I don't question that. There might have been an option route that
interrupt source differently. If it is not the case, then there is not
much you can do. Thank you for the explanation.
> BR,
> Yun
Sebastian
^ permalink raw reply
* Re: [PATCH bpf-next v5 1/3] bpf: Add BPF_FIB_LOOKUP_VLAN flag to bpf_fib_lookup() helper
From: Toke Høiland-Jørgensen @ 2026-06-24 9:33 UTC (permalink / raw)
To: Avinash Duduskar, ast, daniel, andrii
Cc: eddyz87, memxor, martin.lau, song, yonghong.song, jolsa, emil,
john.fastabend, sdf, davem, edumazet, kuba, pabeni, horms, shuah,
hawk, yatsenko, leon.hwang, kpsingh, a.s.protopopov, ameryhung,
rongtao, eyal.birger, bpf, netdev, linux-kernel, linux-kselftest,
dsahern
In-Reply-To: <20260624030530.3342884-2-avinash.duduskar@gmail.com>
Avinash Duduskar <avinash.duduskar@gmail.com> writes:
> bpf_fib_lookup() returns the FIB-resolved egress ifindex straight
> from the fib result. When the egress is a VLAN device, the returned
> ifindex is the VLAN netdev's, which has no XDP xmit handler; XDP
> programs that want to forward the frame (e.g. xdp-forward) must
> instead target the underlying physical device and push the VLAN tag
> themselves. Today the program has no way to learn either the
> underlying ifindex or the VLAN tag without maintaining its own
> VLAN-to-ifindex map in userspace and refreshing it on netlink
> events.
>
> Add BPF_FIB_LOOKUP_VLAN. When the caller sets this flag and the fib
> result is a VLAN device whose immediate parent is a real (non-VLAN)
> device in the same network namespace, populate the existing output
> fields params->h_vlan_proto and params->h_vlan_TCI from the VLAN
> device and replace params->ifindex with the parent's ifindex.
> params->h_vlan_TCI carries the VID only, with PCP and DEI bits zero; a
> consumer wanting to set egress priority writes PCP itself.
> params->smac is the VLAN device's own address, which can differ from
> the parent's.
>
> Only the immediate parent is resolved, via vlan_dev_priv(dev)->real_dev
> and not vlan_dev_real_dev(), which walks to the bottom of a stack. When
> the immediate parent is not a real device in the same namespace, the
> lookup returns BPF_FIB_LKUP_RET_VLAN_FAILURE and leaves params->ifindex
> at the input. This covers a stacked VLAN (QinQ), where the immediate
> parent is itself a VLAN device and one h_vlan_proto/h_vlan_TCI pair
> cannot describe two tags, and a parent in another network namespace (a
> VLAN device can be moved while its parent stays), whose ifindex would
> be meaningless in the caller's namespace. A program that wants the VLAN
> device's own ifindex re-issues the lookup without BPF_FIB_LOOKUP_VLAN,
> so the unreducible case stays distinct from a physical egress. That
> distinction matters for XDP: a program cannot xmit on a VLAN device, so
> a success carrying the VLAN ifindex would make it redirect to a device
> with no ndo_xdp_xmit and drop the frame at xdp_do_flush(). The swap and
> the vlan fields are written only on the reduce path; other output
> fields keep their existing behaviour, so a frag-needed result still
> reports the route mtu in params->mtu_result.
>
> BPF_FIB_LOOKUP_VLAN is only useful to XDP, which cannot redirect to a
> VLAN device. A tc program can redirect to the VLAN device directly, so
> bpf_skb_fib_lookup() rejects the flag with -EINVAL; bpf_xdp_fib_lookup()
> accepts it. When the flag is not set, behaviour is unchanged:
> h_vlan_proto and h_vlan_TCI are zeroed and ifindex is left at the FIB
> result.
>
> The new block is compiled only under CONFIG_VLAN_8021Q since
> vlan_dev_priv() is not defined otherwise; without that config
> is_vlan_dev() is constant false and the flag is accepted but never
> acts. That is safe because no VLAN device can exist there, so every
> egress is already physical.
>
> This lets an XDP redirect target the physical device and learn the
> tag to push in a single lookup, which xdp-forward's optional VLAN
> mode (xdp-project/xdp-tools#504) wants from the kernel side.
>
> The helper's input semantics are unchanged; the reverse direction
> (supplying a tag as lookup input) is added in the following patch.
>
> Suggested-by: Toke Høiland-Jørgensen <toke@redhat.com>
> Signed-off-by: Avinash Duduskar <avinash.duduskar@gmail.com>
Yes, this is way nicer - thanks! One nit below, otherwise LGTM:
Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com>
[..]
> + if (flags & BPF_FIB_LOOKUP_VLAN)
> + return -EINVAL;
> +
This is fine, but we should probably reject the input flag as well in
the next patch (for symmetry).
-Toke
^ permalink raw reply
* Re: [PATCH 0/2] sched: Introduce and use deferred WARNs in sched
From: Peter Zijlstra @ 2026-06-24 9:33 UTC (permalink / raw)
To: Sebastian Andrzej Siewior
Cc: linux-arch, linux-kernel, sched-ext, netdev, David S . Miller,
Andrea Righi, Andrew Morton, Arnd Bergmann, Ben Segall,
Breno Leitao, Changwoo Min, David Vernet, Dietmar Eggemann,
Eric Dumazet, Ingo Molnar, Jakub Kicinski, John Ogness,
Juri Lelli, K Prateek Nayak, Paolo Abeni, Petr Mladek,
Sergey Senozhatsky, Simon Horman, Steven Rostedt, Tejun Heo,
Vincent Guittot, Vlad Poenaru
In-Reply-To: <20260623142650.265721-1-bigeasy@linutronix.de>
On Tue, Jun 23, 2026 at 04:26:48PM +0200, Sebastian Andrzej Siewior wrote:
> This is a follow-up to the netconsole lockup reported
> https://lore.kernel.org/all/20260610183621.3915271-1-vlad.wing@gmail.com/
>
> The idea is to use deferred printing for WARNs and use them in sched. I
> tried to use only where it looks that the rq lock acquired instead a
> plain s/WARN_ON/WARN_ON_DEFFERED which would be simpler.
>
> This unholy deferred mess can be removed once we don't have legacy
> consoles anymore _or_ force force_legacy_kthread=true.
So I really don't see why we should do this. This has been a 'problem'
forever, and printk() is actually being fixed.
^ permalink raw reply
* Re: Please apply 736b380e28d0 and eca856950f7c down to 6.1.y
From: Greg Kroah-Hartman @ 2026-06-24 9:37 UTC (permalink / raw)
To: Wongi Lee
Cc: stable, Sasha Levin, netdev, David Ahern, Ido Schimmel,
David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
Simon Horman, Jungwoo Lee
In-Reply-To: <ajujm9+82N1g/HgF@DESKTOP-19IMU7U.localdomain>
On Wed, Jun 24, 2026 at 06:30:03PM +0900, Wongi Lee wrote:
> On Wed, Jun 24, 2026 at 11:00:45AM +0200, Greg Kroah-Hartman wrote:
> > On Wed, Jun 24, 2026 at 05:14:38PM +0900, Wongi Lee wrote:
> > > Hi,
> > >
> > > Could the following upstream commits be queued for the active stable
> > > trees?
> > >
> > > commit 736b380e28d0480c7bc3e022f1950f31fe53a7c5
> > > ("ipv6: account for fraggap on the paged allocation path")
> >
> > I do not see that commit id in Linus's tree, are you sure it is correct?
> >
> > > commit eca856950f7cb1a221e02b99d758409f2c5cec42
> > > ("ipv4: account for fraggap on the paged allocation path")
> >
> > Same here, no id of that one in Linus's tree that I can see.
> >
> > thanks,
> >
> > greg k-h
>
>
> Hi Greg,
>
> First, sorry for confusing you.
>
> The commit IDs are from netdev/net.git:
>
> 736b380e28d0480c7bc3e022f1950f31fe53a7c5
> https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git/commit/?id=736b380e28d0
>
> eca856950f7cb1a221e02b99d758409f2c5cec42
> https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git/commit/?id=eca856950f7c
>
> They were applied to netdev without Cc: stable@vger.kernel.org, so I
> wanted to flag them for stable handling but I send it too fast (before
> merge).
>
> I will resend the request with the Linus tree commit ID.
They have to be in Linus's tree, before we can take them in a stable
release, right?
And why were they not originally tagged with the cc: stable? That would
save you time in the future as it would all just happen automatically.
thanks,
greg k-h
^ permalink raw reply
* Re: [PATCH v4] net: mvneta: re-enable percpu interrupt on resume
From: Sebastian Andrzej Siewior @ 2026-06-24 9:39 UTC (permalink / raw)
To: Yun Zhou
Cc: marcin.s.wojtas, andrew+netdev, davem, edumazet, kuba, pabeni,
clrkwllms, rostedt, netdev, linux-kernel, linux-rt-devel
In-Reply-To: <20260622074350.1666290-1-yun.zhou@windriver.com>
On 2026-06-22 15:43:50 [+0800], Yun Zhou wrote:
> On Marvell MPIC platforms (Armada 370/XP/38x), mvneta uses a percpu
…
> Fix by calling on_each_cpu(mvneta_percpu_enable) in the resume path
> to unconditionally unmask the MPIC per-CPU interrupt regardless of
> pre-suspend state.
>
> Fixes: 12bb03b436da ("net: mvneta: Handle per-cpu interrupts")
> Signed-off-by: Yun Zhou <yun.zhou@windriver.com>
Reviewed-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Sebastian
^ permalink raw reply
* Re: Please apply 736b380e28d0 and eca856950f7c down to 6.1.y
From: Wongi Lee @ 2026-06-24 9:44 UTC (permalink / raw)
To: Greg Kroah-Hartman
Cc: stable, Sasha Levin, netdev, David Ahern, Ido Schimmel,
David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
Simon Horman, Jungwoo Lee
In-Reply-To: <2026062416-amulet-paradox-cf7c@gregkh>
On Wed, Jun 24, 2026 at 11:37:29AM +0200, Greg Kroah-Hartman wrote:
> On Wed, Jun 24, 2026 at 06:30:03PM +0900, Wongi Lee wrote:
> > On Wed, Jun 24, 2026 at 11:00:45AM +0200, Greg Kroah-Hartman wrote:
> > > On Wed, Jun 24, 2026 at 05:14:38PM +0900, Wongi Lee wrote:
> > > > Hi,
> > > >
> > > > Could the following upstream commits be queued for the active stable
> > > > trees?
> > > >
> > > > commit 736b380e28d0480c7bc3e022f1950f31fe53a7c5
> > > > ("ipv6: account for fraggap on the paged allocation path")
> > >
> > > I do not see that commit id in Linus's tree, are you sure it is correct?
> > >
> > > > commit eca856950f7cb1a221e02b99d758409f2c5cec42
> > > > ("ipv4: account for fraggap on the paged allocation path")
> > >
> > > Same here, no id of that one in Linus's tree that I can see.
> > >
> > > thanks,
> > >
> > > greg k-h
> >
> >
> > Hi Greg,
> >
> > First, sorry for confusing you.
> >
> > The commit IDs are from netdev/net.git:
> >
> > 736b380e28d0480c7bc3e022f1950f31fe53a7c5
> > https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git/commit/?id=736b380e28d0
> >
> > eca856950f7cb1a221e02b99d758409f2c5cec42
> > https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git/commit/?id=eca856950f7c
> >
> > They were applied to netdev without Cc: stable@vger.kernel.org, so I
> > wanted to flag them for stable handling but I send it too fast (before
> > merge).
> >
> > I will resend the request with the Linus tree commit ID.
>
> They have to be in Linus's tree, before we can take them in a stable
> release, right?
>
> And why were they not originally tagged with the cc: stable? That would
> save you time in the future as it would all just happen automatically.
>
> thanks,
>
> greg k-h
Right, my fault.
Also I just forgot cc'ing stable when sending it. I'll apply it next time.
thanks,
Wongi
^ permalink raw reply
* Re: [PATCH v2 2/2] net: fman: use devm_kzalloc() for fman and rely on devres
From: ZhaoJinming @ 2026-06-24 9:49 UTC (permalink / raw)
To: andrew
Cc: andrew+netdev, davem, edumazet, horms, kuba, linux-kernel,
madalin.bucur, netdev, pabeni, sean.anderson, zhaojinming
In-Reply-To: <f0a6fd09-2fab-4bb4-85ae-82f8d383452c@lunn.ch>
Thank you for the feedback. I fully understand the concern about
devm_ conversions. I have reworked the patch to avoid the devm_kzalloc()
approach entirely. The new version explicitly calls devm_free_irq() on
the error paths before kfree(fman), keeping the original kzalloc_obj()
allocation unchanged. The fix targets the same UAF problem but without
introducing any devm_ allocation conversion.
v3:
- Drop devm_kzalloc() approach
- Fix by explicitly calling devm_free_irq() before kfree(fman)
on all post-IRQ-registration error paths
- Add conditional check for err_irq before devm_free_irq() in
read_dts_node() to handle the case where err_irq is not registered
Best regards,
ZhaoJinming
^ permalink raw reply
* [PATCH v3] net: fman: fix use-after-free on IRQF_SHARED handler after probe failure
From: ZhaoJinming @ 2026-06-24 9:49 UTC (permalink / raw)
To: andrew
Cc: andrew+netdev, davem, edumazet, horms, kuba, linux-kernel,
madalin.bucur, netdev, pabeni, sean.anderson, zhaojinming
In-Reply-To: <20260624094922.2971930-1-zhaojinming@uniontech.com>
In read_dts_node(), the fman structure is allocated with kzalloc_obj()
and then passed as dev_id to devm_request_irq() when registering
shared interrupt handlers:
devm_request_irq(&of_dev->dev, irq, fman_irq, IRQF_SHARED, fman, fman);
devm_request_irq(&of_dev->dev, err_irq, fman_err_irq, IRQF_SHARED,
fman-err, fman);
On error paths after IRQ registration (err_irq request failure,
ioremap failure, of_platform_populate failure), the error handling
jumps to the fman_free label which calls kfree(fman) and returns
ERR_PTR(err) from read_dts_node().
fman_probe() then returns the error to the driver core, which invokes
device_unbind_cleanup() -> devres_release_all() to clean up devres
resources in LIFO order. Since fman was allocated with kzalloc_obj()
(not devm), it was already freed at this point. However, the devm IRQ
handlers are still registered and will only be released by the
subsequent devres_release_all() call:
kfree(fman) <- fman freed, dev_id points to freed memory
fman_probe() returns error
devres_release_all():
- free ioremap
- devm_free_irq(err_irq) <- handler still registered
- devm_free_irq(main_irq) <- handler still registered
During the window between kfree(fman) and devm_free_irq(main_irq),
the still-registered IRQF_SHARED handler may fire on behalf of another
device sharing the same IRQ line. The handler will dereference the
already-freed fman pointer:
static irqreturn_t fman_irq(int irq, void *handle)
{
struct fman *fman = (struct fman *)handle;
if (!is_init_done(fman->cfg)) <- accesses freed memory
return IRQ_NONE;
The same problem exists in fman_config(). When fman_config() fails
at the err_fm_state error path, it calls kfree(fman) and returns
-EINVAL. fman_probe() returns this error without any cleanup, and
the driver core releases the IRQ handlers after fman has already
been freed.
Fix by explicitly calling devm_free_irq() before kfree(fman) on
all post-IRQ-registration error paths. devm_free_irq() removes the
IRQ from both the interrupt subsystem and the devres list, so
devres_release_all() will not attempt to free it again. This ensures
the IRQ handlers are fully unregistered before fman is freed,
eliminating the UAF window.
Store the main IRQ number in struct fman_dts_params so that
fman_config() can also access it for cleanup.
Signed-off-by: ZhaoJinming <zhaojinming@uniontech.com>
---
drivers/net/ethernet/freescale/fman/fman.c | 17 ++++++++++++++---
drivers/net/ethernet/freescale/fman/fman.h | 1 +
2 files changed, 15 insertions(+), 3 deletions(-)
diff --git a/drivers/net/ethernet/freescale/fman/fman.c b/drivers/net/ethernet/freescale/fman/fman.c
index 013273a2de32..ba2338da0cea 100644
--- a/drivers/net/ethernet/freescale/fman/fman.c
+++ b/drivers/net/ethernet/freescale/fman/fman.c
@@ -1794,6 +1794,9 @@ static int fman_config(struct fman *fman)
err_fm_drv:
kfree(fman->state);
err_fm_state:
+ if (fman->dts_params.err_irq != 0)
+ devm_free_irq(fman->dev, fman->dts_params.err_irq, fman);
+ devm_free_irq(fman->dev, fman->dts_params.irq, fman);
kfree(fman);
return -EINVAL;
}
@@ -2716,6 +2719,7 @@ static struct fman *read_dts_node(struct platform_device *of_dev)
if (err < 0)
goto fman_node_put;
irq = err;
+ fman->dts_params.irq = irq;
/* Get the FM error interrupt */
err = platform_get_irq(of_dev, 1);
@@ -2786,7 +2790,7 @@ static struct fman *read_dts_node(struct platform_device *of_dev)
if (err < 0) {
dev_err(&of_dev->dev, "%s: irq %d allocation failed (error = %d)\n",
__func__, fman->dts_params.err_irq, err);
- goto fman_free;
+ goto free_main_irq;
}
}
@@ -2794,7 +2798,7 @@ static struct fman *read_dts_node(struct platform_device *of_dev)
if (IS_ERR(base_addr)) {
err = PTR_ERR(base_addr);
dev_err(&of_dev->dev, "%s: devm_ioremap() failed\n", __func__);
- goto fman_free;
+ goto free_irqs;
}
fman->dts_params.base_addr = base_addr;
@@ -2806,7 +2810,7 @@ static struct fman *read_dts_node(struct platform_device *of_dev)
if (err) {
dev_err(&of_dev->dev, "%s: of_platform_populate() failed\n",
__func__);
- goto fman_free;
+ goto free_irqs;
}
#ifdef CONFIG_DPAA_ERRATUM_A050385
@@ -2816,6 +2820,13 @@ static struct fman *read_dts_node(struct platform_device *of_dev)
return fman;
+free_irqs:
+ if (fman->dts_params.err_irq != 0)
+ devm_free_irq(&of_dev->dev, fman->dts_params.err_irq, fman);
+free_main_irq:
+ devm_free_irq(&of_dev->dev, irq, fman);
+ goto fman_free;
+
fman_node_put:
of_node_put(fm_node);
fman_free:
diff --git a/drivers/net/ethernet/freescale/fman/fman.h b/drivers/net/ethernet/freescale/fman/fman.h
index 74eb62eba0d7..d05f857c1c16 100644
--- a/drivers/net/ethernet/freescale/fman/fman.h
+++ b/drivers/net/ethernet/freescale/fman/fman.h
@@ -286,6 +286,7 @@ struct fman_dts_params {
struct resource *res; /* FMan memory resource */
u8 id; /* FMan ID */
+ int irq; /* FMan main IRQ */
int err_irq; /* FMan Error IRQ */
u16 clk_freq; /* FMan clock freq (In Mhz) */
--
2.20.1
^ permalink raw reply related
* Re: [PATCH bpf-next v8 3/7] bpf: add bpf_icmp_send kfunc
From: Mahe Tardy @ 2026-06-24 9:59 UTC (permalink / raw)
To: Emil Tsalapatis
Cc: bpf, andrii, ast, daniel, edumazet, john.fastabend, jordan, kuba,
martin.lau, netdev, netfilter-devel, pabeni, yonghong.song
In-Reply-To: <DJGWWQQD3B0P.2O1D9MO17YRK4@etsalapatis.com>
On Tue, Jun 23, 2026 at 10:09:20PM -0400, Emil Tsalapatis wrote:
> On Mon Jun 22, 2026 at 8:05 AM EDT, Mahe Tardy wrote:
[...]
> > +#if IS_ENABLED(CONFIG_IPV6)
> > + case htons(ETH_P_IPV6):
> > + if (type != ICMPV6_DEST_UNREACH)
> > + return -EOPNOTSUPP;
> > + if (code < 0 || code > ICMPV6_REJECT_ROUTE)
> > + return -EINVAL;
> > +
> > + nskb = skb_clone(skb, GFP_ATOMIC);
> > + if (!nskb)
> > + return -ENOMEM;
> > +
> > + if (!pskb_network_may_pull(nskb, sizeof(struct ipv6hdr))) {
>
> Minor nit, but this may also fail with SKB_DROP_REASON_NOMEM. Now this is only
> possible if the IP header is not in the linear space which may well be
> impossible (?), but do we want to differentiate with
> pskb_network_may_pull_reason()?
Indeed, I think for the IP header is should be fine, but I replaced it
with the reason variant. Thanks!
> > + kfree_skb(nskb);
> > + return -EBADMSG;
> > + }
> > +
[...]
> > static int __init bpf_kfunc_init(void)
> > {
> > int ret;
> > @@ -12639,6 +12745,9 @@ static int __init bpf_kfunc_init(void)
> > ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
> > &bpf_kfunc_set_sock_addr);
> > ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_kfunc_set_tcp_reqsk);
> > + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SKB, &bpf_kfunc_set_icmp_send);
> > + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_kfunc_set_icmp_send);
> > + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_ACT, &bpf_kfunc_set_icmp_send);
>
> Based on Sashiko's feedback, since we mostly care about cgroup_skb
> should we just make it exclusive to them and drop CLS_ACT?
This would indeed simplify this patchset, I could drop most of the
complication induced by tc ingress routing. But I think having both
cgroup_skb and tc support would be nice as a first implem. I'll try
again in a new version as I added a test for ingress tc and could
actually fix the routing based on sashiko's feedback (this also drop the
first two patches that were partially wrong).
> > return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SOCK_OPS, &bpf_kfunc_set_sock_ops);
> > }
> > late_initcall(bpf_kfunc_init);
> > --
> > 2.34.1
>
^ permalink raw reply
* Re: [BUG] KFENCE: use-after-free read in udp_tunnel_nic_device_sync_work
From: Eric Dumazet @ 2026-06-24 10:01 UTC (permalink / raw)
To: Yue Sun
Cc: David S. Miller, Jakub Kicinski, Paolo Abeni, netdev,
linux-kernel, syzkaller
In-Reply-To: <20260624090135.95763-1-samsun1006219@gmail.com>
On Wed, Jun 24, 2026 at 2:01 AM Yue Sun <samsun1006219@gmail.com> wrote:
>
> Hello,
>
> I hit a reproducible use-after-free in the UDP tunnel NIC offload work item.
> The original local crash was reported by KFENCE as:
>
> KFENCE: use-after-free read in udp_tunnel_nic_device_sync_work
>
> On current mainline, the C reproducer below triggers the same lifetime bug,
> reported by KASAN before KFENCE samples the object:
>
> BUG: KASAN: slab-use-after-free in __mutex_lock
> Workqueue: udp_tunnel_nic udp_tunnel_nic_device_sync_work
>
> Tested kernel:
>
> 840ef6c78e6a ("Merge tag 'nfs-for-7.2-1' of git://git.linux-nfs.org/projects/anna/linux-nfs")
> Linux 7.1.0-11240-g840ef6c78e6a #31 SMP PREEMPT_DYNAMIC
>
Thanks or the report.
Can you test the following patch?
diff --git a/net/ipv4/udp_tunnel_nic.c b/net/ipv4/udp_tunnel_nic.c
index 9944ed923ddfd10f9adf6ad788c0740daeaf2adb..c5f8d2f9d325de8f4d2247ddaa52e33378851857
100644
--- a/net/ipv4/udp_tunnel_nic.c
+++ b/net/ipv4/udp_tunnel_nic.c
@@ -304,8 +304,8 @@ udp_tunnel_nic_device_sync(struct net_device *dev,
struct udp_tunnel_nic *utn)
if (!utn->need_sync)
return;
- queue_work(udp_tunnel_nic_workqueue, &utn->work);
utn->work_pending = 1;
+ queue_work(udp_tunnel_nic_workqueue, &utn->work);
}
static bool
@@ -866,6 +866,11 @@ udp_tunnel_nic_unregister(struct net_device *dev,
struct udp_tunnel_nic *utn)
udp_tunnel_nic_lock(dev);
+ if (utn->work_pending) {
+ udp_tunnel_nic_unlock(dev);
+ return;
+ }
+
/* For a shared table remove this dev from the list of sharing devices
* and if there are other devices just detach.
*/
@@ -901,12 +906,6 @@ udp_tunnel_nic_unregister(struct net_device *dev,
struct udp_tunnel_nic *utn)
udp_tunnel_nic_flush(dev, utn);
udp_tunnel_nic_unlock(dev);
- /* Wait for the work to be done using the state, netdev core will
- * retry unregister until we give up our reference on this device.
- */
- if (utn->work_pending)
- return;
-
udp_tunnel_nic_free(utn);
release_dev:
dev->udp_tunnel_nic = NULL;
^ permalink raw reply
* [PATCH v3 0/7] net: wwan: t9xx: Add MediaTek T9XX WWAN driver
From: Jack Wu via B4 Relay @ 2026-06-24 10:04 UTC (permalink / raw)
To: Loic Poulain, Sergey Ryazanov, Johannes Berg, Andrew Lunn,
David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
Jack Wu, Wen-Zhi Huang, Shi-Wei Yeh, Minano Tseng,
Matthias Brugger, AngeloGioacchino Del Regno, Simon Horman,
Jonathan Corbet, Shuah Khan
Cc: linux-kernel, netdev, linux-arm-kernel, linux-mediatek, linux-doc
T9XX is the PCIe host device driver for MediaTek's
t900 modem. The driver uses the WWAN framework
infrastructure to create the following control ports
and network interfaces for data transactions.
* /dev/wwan0at0 - Interface that supports AT commands.
* /dev/wwan0mbim0 - Interface conforming to the MBIM
protocol.
* wwan0-X - Primary network interface for IP traffic.
The main blocks in the T9XX driver are:
* HW layer - Abstracts the hardware bus operations for
the device, and provides generic interfaces for the
transaction layer to get the device's information and
control the device's behavior. It includes:
* PCIe - Implements probe, removal and interrupt
handling.
* MHCCIF (Modem Host Cross-Core Interface) - Provides
interrupt channels for bidirectional event
notification such as handshake and port enumeration.
* Transaction layer - Implements data transactions for
the control plane and the data plane. It includes:
* DPMAIF (Data Plane Modem AP Interface) - Controls
the hardware that provides uplink and downlink
queues for the data path. The data exchange takes
place using circular buffers to share data buffer
addresses and metadata to describe the packets.
* CLDMA (Cross Layer DMA) - Manages the hardware
used by the port layer to send control messages to
the device using MediaTek's CCCI (Cross-Core
Communication Interface) protocol.
* TX Services - Dispatch packets from the port layer
to the device.
* RX Services - Dispatch packets to the port layer
when receiving packets from the device.
* Port layer - Provides control plane and data plane
interfaces to userspace. It includes:
* Control Plane - Provides device node interfaces
for controlling data transactions.
* Data Plane - Provides network link interfaces
wwanX (0, 1, 2...) for IP data transactions.
* Core logic - Contains the core logic to keep the
device working. It includes:
* FSM (Finite State Machine) - Monitors the state
of the device, and notifies each module when the
state changes.
The compilation of the T9XX driver is enabled by the
CONFIG_MTK_T9XX and CONFIG_MTK_T9XX_PCI config option
which depends on CONFIG_WWAN.
This v2 submission covers the control plane only
(patches 1-6). The data plane will follow in a
separate series once the control plane is accepted.
---
Changes in v3:
- Address sashiko AI code review comments and fix sparse warnings
- Patch 1 (Add PCIe core):
- Move extern declaration of mtk_dev_cfg_0900 from mtk_pci.c to mtk_pci.h to fix sparse warning
- Remove mtk_pci_bar_exit(): pcim_iounmap_region() was called with bitmask instead of BAR index, and pcim_iomap_regions() is devres-managed so manual unmap is redundant [sashiko]
- Remove pci_disable_device() from probe error path and remove path: pcim_enable_device() registers devres cleanup, manual disable causes enable_cnt underflow [sashiko]
- Patch 3 (Add control DMA interface):
- Add #include "mtk_cldma.h" in mtk_cldma_drv_m9xx.c to fix sparse undeclared symbol warnings for mtk_cldma_regs_m9xx and cldma_drv_ops_m9xx
- Move extern declaration of mtk_ctrl_info_m9xx from mtk_trans_ctrl.c to mtk_trans_ctrl.h to fix sparse undeclared symbol warning
- Replace kcalloc + radix_tree_gang_lookup with radix_tree_for_each_slot() in mtk_ctrl_remove_radix_tree() to eliminate allocation in teardown path [sashiko]
- Add missing mtk_pci_dev_exit() call in mtk_pci_remove() to properly clean up FSM and trans_ctrl resources before device removal [sashiko]
- Patch 4 (Add control port):
- Replace kcalloc + radix_tree_gang_lookup with radix_tree_for_each_slot() and single-entry gang_lookup in mtk_port_tbl_destroy() to eliminate allocation failure in teardown path [sashiko]
- Add port = NULL after mtk_port_put_locked() in mtk_port_internal_open() error path to prevent returning un-refcounted pointer [sashiko]
- Restore skb_unlink + trb_complete for non-EAGAIN errors in mtk_ctrl_trb_handler() TX path to prevent infinite retry loop [sashiko]
- Patch 5 (Add FSM thread):
- Check mtk_pci_register_irq() return value in mtk_cldma_dev_init() and add err_destroy_wq error label to fix unreachable dead code [sashiko]
- Propagate specific error codes (ENOMEM/EIO/EINVAL) from mtk_cldma_dev_init() error paths instead of generic -EIO [sashiko]
- Free head SKB after detaching frag_list in mtk_cldma_rxq_free() scatter-gather mode to fix memory leak [sashiko]
- Patch 6 (Add AT & MBIM WWAN ports):
- Remove unused write_lock mutex from struct mtk_port and its mutex_init call [sashiko]
- Link to v2: https://patch.msgid.link/20260610-t9xx_driver_v1-v2-0-c65addf23b3f@compal.com
Changes in v2:
- Split series into control plane (this v2) and data plane (follow-up)
- Patch 1 (Add PCIe core):
- Rename BAR_NUM to MTK_PCI_BAR_NUM for driver prefix consistency
- Replace magic numbers in mtk_pci_setup_atr() with named defines
- Remove redundant ATR register comments, use blank line separators
- Add kernel-doc comments to all non-static functions
- Convert 4 MMIO wrapper functions to static inline in header [sashiko]
- Remove unnecessary unlikely() from IRQ validation paths
- Add irq_cnt == 0 and irq_id < 0 guards in mtk_pci_get_virq_id() [sashiko]
- Initialize hw_bits at declaration for consistency
- Merge same-type variable declarations into single lines
- Add #else/#endif comments for CONFIG_ACPI blocks
- Add newlines in mtk_pci_pldr() for readability
- Move return into default case in mtk_pci_dev_reset()
- Simplify mtk_mhccif_init() error path to use direct returns
- Change -EFAULT to -ENOLINK for PCIe link check failure
- Rename goto label "out" to "log_err" in mtk_pci_probe()
- Wrap long lines to stay within 80 columns
- Fix IRQ vector leak: add pci_free_irq_vectors() on error path [sashiko]
- Fix mtk_pci_remove() ordering: free IRQ before cancel_work_sync [sashiko]
- Fix mtk_pci_pldr() ACPI buffer leak: free first result before second call [sashiko]
- Replace msleep(500) with MTK_PLDR_POWER_OFF_DELAY_MS define
- Remove unused EXT_EVT_H2D_DRM_DISABLE_AP and related register define [sashiko]
- Increase MTK_IRQ_NAME_LEN from 20 to 32 to fix W=1 format-truncation warning [sashiko]
- Patch 2 (Add control plane transaction layer):
- Add kernel-doc comments to mtk_ctrl_init() and mtk_ctrl_exit()
- Change mtk_ctrl_exit() return type from int to void
- Set mdev->ctrl_blk to NULL after freeing in mtk_ctrl_exit() [sashiko]
- Change ctrl_blk from void* to typed struct mtk_ctrl_blk* [sashiko]
- Remove redundant "depends on MTK_T9XX" from MTK_T9XX_PCI Kconfig [sashiko]
- Use mtk_dev_free() instead of devm_kfree() in mtk_pci_probe() error path [sashiko]
- Patch 3 (Add control DMA interface):
- Add @ops kernel-doc parameter for mtk_ctrl_init()
- Rename 'err' to 'ret' consistently throughout the patch
- Reorder variable declarations to follow reverse Christmas tree style
- Change mtk_cldma_txq_free() return type from int to void
- Change mtk_cldma_rxq_free() return type from int to void
- Change mtk_cldma_exit() return type from int to void
- Remove unnecessary zero-initialization of ret in mtk_cldma_start_xfer()
- Remove unnecessary zero-initialization of ret in mtk_cldma_tx()
- Use direct return instead of goto out in mtk_cldma_submit_tx() error paths
- Move software state before HWO flag in mtk_cldma_submit_tx()
- Squash variable declarations in mtk_cldma_check_intr_status()
- Remove unlikely() from validation paths in mtk_cldma_check_ch_cfg()
- Clamp data_recv_len with min_t to prevent skb_over_panic in mtk_cldma_rx_skb_adjust() [sashiko]
- Use READ_ONCE() for HWO flag polling in mtk_cldma_check_rx_req() [sashiko]
- Fix mtk_cldma_rx_done_work() to always unmask interrupt on error path [sashiko]
- Add DMA address guard in mtk_cldma_txq_free() teardown loop [sashiko]
- Add IS_ERR() check for kthread_run() in mtk_ctrl_trb_srv_init() [sashiko]
- Fix queue_info memory leak on validation failure in mtk_pcie_hif_init() [sashiko]
- Handle non-EAGAIN errors in mtk_ctrl_trb_handler() TX path [sashiko]
- Fix 'err' typo to 'ret' in mtk_cldma_txbuf_set() error message
- Remove unused variable mdev in mtk_cldma_rx_check_again() [sashiko]
- Remove unused variables trans and ctrl_blk in mtk_cldma_txq_free() and mtk_cldma_rxq_free() [sashiko]
- Patch 4 (Add control port):
- Add @cfg kernel-doc parameter for mtk_ctrl_init()
- Update mtk_ctrl_init() return description to cover additional error codes
- Fix double list_del in mtk_port_stale_list_grp_cleanup() [sashiko]
- Fix direct mtk_port_trb_free() call to use kref_put() in mtk_port_ch_enable() error path [sashiko]
- Fix direct mtk_port_trb_free() call to use kref_put() in mtk_port_ch_disable() error path [sashiko]
- Add mtk_port_tbl_destroy() in mtk_port_mngr_init() error path to prevent port memory leak [sashiko]
- Change port_ops exit/reset/enable/disable callbacks from int to void
- Move -EIO dispatch comment to where the code was introduced
- Patch 5 (Add FSM thread):
- Add bounds check for rtft_entry in mtk_fsm_parse_hs2_msg() [sashiko]
- Add skb length validation before accessing ctrl_msg_header in mtk_fsm_sap_ctrl_msg_handler() [sashiko]
- Fix skb leak on CTRL_MSG_HS2 mismatch return in mtk_fsm_sap_ctrl_msg_handler() [sashiko]
- Add skb length validation before accessing ctrl_msg_header in mtk_fsm_md_ctrl_msg_handler() [sashiko]
- Replace devm_kzalloc/devm_kfree with kzalloc/kfree for FSM events [sashiko]
- Fix mtk_fsm_evt_submit() to return -ETIMEDOUT on blocking event timeout [sashiko]
- Change FSM kthread from TASK_INTERRUPTIBLE to TASK_UNINTERRUPTIBLE [sashiko]
- Remove unused variable hw_id in mtk_cldma_dev_exit() [sashiko]
- Patch 6 (Add AT & MBIM WWAN ports):
- Use imperative mode in commit message
- Remove unnecessary zero-initialization of ret in mtk_port_copy_data_from()
- Change copy_from_user() error code from -EFAULT to -EINVAL in mtk_port_copy_data_from()
- Return -EINVAL for zero-length write in mtk_port_common_write()
- Change mtk_port_wwan_exit/enable/disable() return type from int to void
- Fix packet_size to account for CCCI header reservation in mtk_port_common_write() [sashiko]
- Fix WWAN tx callbacks to consume skb and return 0 per wwan_port_ops contract [sashiko]
- Fix wwan_create_port() error path: clear ERR_PTR to NULL and call mtk_port_ch_disable() [sashiko]
- Patch 7 (Add maintainers entry): new patch
- Link to v1: https://patch.msgid.link/20260529-t9xx_driver_v1-v1-0-bdbfe2c01e57@compal.com
---
Jack Wu (7):
net: wwan: t9xx: Add PCIe core
net: wwan: t9xx: Add control plane transaction layer
net: wwan: t9xx: Add control DMA interface
net: wwan: t9xx: Add control port
net: wwan: t9xx: Add FSM thread
net: wwan: t9xx: Add AT & MBIM WWAN ports
net: wwan: t9xx: Add maintainers entry
MAINTAINERS | 9 +
drivers/net/wwan/Kconfig | 17 +
drivers/net/wwan/Makefile | 1 +
drivers/net/wwan/t9xx/Makefile | 14 +
drivers/net/wwan/t9xx/mtk_ctrl_plane.c | 111 ++
drivers/net/wwan/t9xx/mtk_ctrl_plane.h | 88 ++
drivers/net/wwan/t9xx/mtk_dev.c | 55 +
drivers/net/wwan/t9xx/mtk_dev.h | 114 ++
drivers/net/wwan/t9xx/mtk_fsm.c | 948 +++++++++++++++
drivers/net/wwan/t9xx/mtk_fsm.h | 140 +++
drivers/net/wwan/t9xx/mtk_port.c | 968 +++++++++++++++
drivers/net/wwan/t9xx/mtk_port.h | 174 +++
drivers/net/wwan/t9xx/mtk_port_io.c | 573 +++++++++
drivers/net/wwan/t9xx/mtk_port_io.h | 41 +
drivers/net/wwan/t9xx/mtk_utility.h | 33 +
drivers/net/wwan/t9xx/pcie/Makefile | 15 +
drivers/net/wwan/t9xx/pcie/mtk_cldma.c | 1420 +++++++++++++++++++++++
drivers/net/wwan/t9xx/pcie/mtk_cldma.h | 173 +++
drivers/net/wwan/t9xx/pcie/mtk_cldma_drv.c | 371 ++++++
drivers/net/wwan/t9xx/pcie/mtk_cldma_drv.h | 174 +++
drivers/net/wwan/t9xx/pcie/mtk_cldma_drv_m9xx.c | 178 +++
drivers/net/wwan/t9xx/pcie/mtk_cldma_drv_m9xx.h | 101 ++
drivers/net/wwan/t9xx/pcie/mtk_ctrl_cfg_m9xx.c | 55 +
drivers/net/wwan/t9xx/pcie/mtk_pci.c | 1102 ++++++++++++++++++
drivers/net/wwan/t9xx/pcie/mtk_pci.h | 234 ++++
drivers/net/wwan/t9xx/pcie/mtk_pci_drv_m9xx.c | 69 ++
drivers/net/wwan/t9xx/pcie/mtk_pci_reg.h | 71 ++
drivers/net/wwan/t9xx/pcie/mtk_trans_ctrl.c | 603 ++++++++++
drivers/net/wwan/t9xx/pcie/mtk_trans_ctrl.h | 107 ++
29 files changed, 7959 insertions(+)
---
base-commit: eb3f4b7426cfd2b79d65b7d37155480b32259a11
change-id: 20260529-t9xx_driver_v1-1744f8af7739
Best regards,
--
Jack Wu <jackbb_wu@compal.com>
^ permalink raw reply
* [PATCH v3 1/7] net: wwan: t9xx: Add PCIe core
From: Jack Wu via B4 Relay @ 2026-06-24 10:04 UTC (permalink / raw)
To: Loic Poulain, Sergey Ryazanov, Johannes Berg, Andrew Lunn,
David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
Jack Wu, Wen-Zhi Huang, Shi-Wei Yeh, Minano Tseng,
Matthias Brugger, AngeloGioacchino Del Regno, Simon Horman,
Jonathan Corbet, Shuah Khan
Cc: linux-kernel, netdev, linux-arm-kernel, linux-mediatek, linux-doc
In-Reply-To: <20260624-t9xx_driver_v1-v3-0-73ff03f60c48@compal.com>
From: Jack Wu <jackbb_wu@compal.com>
Registers the T900 device driver with the kernel. Set up all
the fundamental configurations for the device: PCIe layer,
Modem Host Cross Core Interface (MHCCIF), Reset Generation
Unit (RGU), modem common control operations and build
infrastructure.
* PCIe layer code implements driver probe and removal, MSI-X
interrupt initialization and de-initialization, and the way
of resetting the device.
* MHCCIF provides interrupt channels to communicate events
such as handshake, PM and port enumeration.
* RGU provides interrupt channels to generate notifications
from the device so that the T900 driver could get the
device reset.
* Modem common control operations provide the basic read/write
functions of the device's hardware registers,
mask/unmask/get/clear functions of the device's interrupt
registers and inquiry functions of the device's status.
Signed-off-by: Jack Wu <jackbb_wu@compal.com>
---
drivers/net/wwan/Kconfig | 12 +
drivers/net/wwan/Makefile | 1 +
drivers/net/wwan/t9xx/Makefile | 10 +
drivers/net/wwan/t9xx/mtk_dev.h | 108 +++
drivers/net/wwan/t9xx/pcie/mtk_pci.c | 1049 +++++++++++++++++++++++++
drivers/net/wwan/t9xx/pcie/mtk_pci.h | 234 ++++++
drivers/net/wwan/t9xx/pcie/mtk_pci_drv_m9xx.c | 69 ++
drivers/net/wwan/t9xx/pcie/mtk_pci_reg.h | 70 ++
8 files changed, 1553 insertions(+)
diff --git a/drivers/net/wwan/Kconfig b/drivers/net/wwan/Kconfig
index 88df55d78d90..4cee537c739f 100644
--- a/drivers/net/wwan/Kconfig
+++ b/drivers/net/wwan/Kconfig
@@ -121,6 +121,18 @@ config MTK_T7XX
If unsure, say N.
+config MTK_T9XX
+ tristate "MediaTek PCIe 5G WWAN modem T9xx device"
+ depends on PCI
+ select NET_DEVLINK
+ help
+ Enables MediaTek PCIe based 5G WWAN modem (T9xx series) device.
+
+ To compile this driver as a module, choose M here: the module will be
+ called mtk_t9xx.
+
+ If unsure, say N.
+
endif # WWAN
endmenu
diff --git a/drivers/net/wwan/Makefile b/drivers/net/wwan/Makefile
index 3960c0ae2445..7361eef4c472 100644
--- a/drivers/net/wwan/Makefile
+++ b/drivers/net/wwan/Makefile
@@ -14,3 +14,4 @@ obj-$(CONFIG_QCOM_BAM_DMUX) += qcom_bam_dmux.o
obj-$(CONFIG_RPMSG_WWAN_CTRL) += rpmsg_wwan_ctrl.o
obj-$(CONFIG_IOSM) += iosm/
obj-$(CONFIG_MTK_T7XX) += t7xx/
+obj-$(CONFIG_MTK_T9XX) += t9xx/
diff --git a/drivers/net/wwan/t9xx/Makefile b/drivers/net/wwan/t9xx/Makefile
new file mode 100644
index 000000000000..6f2dd3f91454
--- /dev/null
+++ b/drivers/net/wwan/t9xx/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+ccflags-y += -I$(src)/pcie
+ccflags-y += -I$(src)
+
+obj-$(CONFIG_MTK_T9XX) += mtk_t9xx.o
+
+mtk_t9xx-y := \
+ pcie/mtk_pci.o \
+ pcie/mtk_pci_drv_m9xx.o
diff --git a/drivers/net/wwan/t9xx/mtk_dev.h b/drivers/net/wwan/t9xx/mtk_dev.h
new file mode 100644
index 000000000000..8278a0e2875e
--- /dev/null
+++ b/drivers/net/wwan/t9xx/mtk_dev.h
@@ -0,0 +1,108 @@
+/* SPDX-License-Identifier: GPL-2.0-only
+ *
+ * Copyright (c) 2022, MediaTek Inc.
+ */
+
+#ifndef __MTK_DEV_H__
+#define __MTK_DEV_H__
+
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#define MTK_DEV_STR_LEN 16
+
+enum mtk_user_id {
+ MTK_USER_MIN,
+ MTK_USER_CTRL,
+ MTK_USER_DATA,
+ MTK_USER_MAX
+};
+
+enum mtk_dev_evt_h2d {
+ DEV_EVT_H2D_DEVICE_RESET = BIT(2),
+ DEV_EVT_H2D_MAX = BIT(5)
+};
+
+enum mtk_dev_evt_d2h {
+ DEV_EVT_D2H_BOOT_FLOW_SYNC = BIT(4),
+ DEV_EVT_D2H_ASYNC_HS_NOTIFY_SAP = BIT(5),
+ DEV_EVT_D2H_ASYNC_HS_NOTIFY_MD = BIT(6),
+ DEV_EVT_D2H_MAX = BIT(11)
+};
+
+struct mtk_md_dev;
+
+struct mtk_dev_ops {
+ u32 (*get_dev_state)(struct mtk_md_dev *mdev);
+ void (*ack_dev_state)(struct mtk_md_dev *mdev, u32 state);
+ u32 (*get_dev_cfg)(struct mtk_md_dev *mdev);
+ int (*register_dev_evt)(struct mtk_md_dev *mdev, u32 dev_evt,
+ int (*evt_cb)(u32 status, void *data), void *data);
+ void (*unregister_dev_evt)(struct mtk_md_dev *mdev, u32 dev_evt);
+ void (*mask_dev_evt)(struct mtk_md_dev *mdev, u32 dev_evt);
+ void (*unmask_dev_evt)(struct mtk_md_dev *mdev, u32 dev_evt);
+ void (*clear_dev_evt)(struct mtk_md_dev *mdev, u32 dev_evt);
+ int (*send_dev_evt)(struct mtk_md_dev *mdev, u32 dev_evt);
+};
+
+/* mtk_md_dev defines the structure of MTK modem device */
+struct mtk_md_dev {
+ struct device *dev;
+ const struct mtk_dev_ops *dev_ops;
+ void *hw_priv;
+ u32 hw_ver;
+ char dev_str[MTK_DEV_STR_LEN];
+};
+
+static inline u32 mtk_dev_get_dev_state(struct mtk_md_dev *mdev)
+{
+ return mdev->dev_ops->get_dev_state(mdev);
+}
+
+static inline void mtk_dev_ack_dev_state(struct mtk_md_dev *mdev, u32 state)
+{
+ return mdev->dev_ops->ack_dev_state(mdev, state);
+}
+
+static inline u32 mtk_dev_get_dev_cfg(struct mtk_md_dev *mdev)
+{
+ return mdev->dev_ops->get_dev_cfg(mdev);
+}
+
+static inline int mtk_dev_register_dev_evt(struct mtk_md_dev *mdev, u32 dev_evt,
+ int (*evt_cb)(u32 status, void *data), void *data)
+{
+ return mdev->dev_ops->register_dev_evt(mdev, dev_evt, evt_cb, data);
+}
+
+static inline void mtk_dev_unregister_dev_evt(struct mtk_md_dev *mdev, u32 dev_evt)
+{
+ mdev->dev_ops->unregister_dev_evt(mdev, dev_evt);
+}
+
+static inline void mtk_dev_mask_dev_evt(struct mtk_md_dev *mdev, u32 dev_evt)
+{
+ mdev->dev_ops->mask_dev_evt(mdev, dev_evt);
+}
+
+static inline void mtk_dev_unmask_dev_evt(struct mtk_md_dev *mdev, u32 dev_evt)
+{
+ mdev->dev_ops->unmask_dev_evt(mdev, dev_evt);
+}
+
+static inline void mtk_dev_clear_dev_evt(struct mtk_md_dev *mdev, u32 dev_evt)
+{
+ mdev->dev_ops->clear_dev_evt(mdev, dev_evt);
+}
+
+static inline int mtk_dev_send_dev_evt(struct mtk_md_dev *mdev, u32 dev_evt)
+{
+ return mdev->dev_ops->send_dev_evt(mdev, dev_evt);
+}
+
+#endif /* __MTK_DEV_H__ */
diff --git a/drivers/net/wwan/t9xx/pcie/mtk_pci.c b/drivers/net/wwan/t9xx/pcie/mtk_pci.c
new file mode 100644
index 000000000000..c6a7196fcdd6
--- /dev/null
+++ b/drivers/net/wwan/t9xx/pcie/mtk_pci.c
@@ -0,0 +1,1049 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2022, MediaTek Inc.
+ */
+
+#include <linux/acpi.h>
+#include <linux/aer.h>
+#include <linux/bitfield.h>
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include "mtk_dev.h"
+#include "mtk_pci.h"
+#include "mtk_pci_reg.h"
+
+#define MTK_PCI_BAR_NUM 6
+#define MTK_PCI_TRANSPARENT_ATR_SIZE (0x3F)
+#define MTK_PCI_MINIMUM_ATR_SIZE (0x1000)
+#define ATR_SIZE_LO32_MASK GENMASK_ULL(31, 0)
+#define ATR_SIZE_HI32_MASK GENMASK_ULL(63, 32)
+#define ATR_SIZE_BIAS_FROM_LO32 2
+#define ATR_ADDR_ALIGN_MASK 0xFFFFF000
+#define ATR_EN BIT(0)
+#define ATR_PARAM_OFFSET 16
+/* Delay between ACPI PXP._OFF and _ON for modem power cycle stabilization */
+#define MTK_PLDR_POWER_OFF_DELAY_MS 500
+#define LE32_TO_U32(x) ((__force u32)(__le32)(x))
+#define SET_HW_BITS(dest, chs, mhccif, dev) \
+ ({ \
+ if ((chs) & (dev)) \
+ (dest) |= FIELD_PREP(mhccif, 1); \
+ })
+
+struct mtk_mhccif_cb {
+ struct list_head entry;
+ int (*evt_cb)(u32 status, void *data);
+ void *data;
+ u32 chs;
+};
+
+/**
+ * mtk_pci_setup_atr() - Configure a PCIe address translation rule
+ * @mdev: MTK MD device
+ * @cfg: ATR configuration parameters
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int mtk_pci_setup_atr(struct mtk_md_dev *mdev, struct mtk_atr_cfg *cfg)
+{
+ struct mtk_pci_priv *priv = mdev->hw_priv;
+ u32 addr, val, size_h, size_l;
+ int atr_size, pos, offset;
+
+ if (cfg->transparent) {
+ /* No address conversion is performed */
+ atr_size = MTK_PCI_TRANSPARENT_ATR_SIZE;
+ } else {
+ if (cfg->size < MTK_PCI_MINIMUM_ATR_SIZE)
+ cfg->size = MTK_PCI_MINIMUM_ATR_SIZE;
+
+ if (cfg->src_addr & (cfg->size - 1)) {
+ dev_err((mdev)->dev, "Invalid atr src addr is not aligned to size\n");
+ return -EFAULT;
+ }
+
+ if (cfg->trsl_addr & (cfg->size - 1)) {
+ dev_err((mdev)->dev,
+ "Invalid atr trsl addr is not aligned to size, %llx, %llx\n",
+ cfg->trsl_addr, cfg->size - 1);
+ return -EFAULT;
+ }
+
+ size_l = FIELD_GET(ATR_SIZE_LO32_MASK, cfg->size);
+ size_h = FIELD_GET(ATR_SIZE_HI32_MASK, cfg->size);
+ pos = ffs(size_l);
+ if (pos) {
+ atr_size = pos - ATR_SIZE_BIAS_FROM_LO32;
+ } else {
+ pos = ffs(size_h);
+ atr_size = pos + 32 - ATR_SIZE_BIAS_FROM_LO32;
+ }
+ }
+
+ /* Calculate table offset */
+ offset = ATR_PORT_OFFSET * cfg->port + ATR_TABLE_OFFSET * cfg->table;
+ addr = REG_ATR_PCIE_WIN0_T0_SRC_ADDR_MSB + offset;
+ val = (u32)(cfg->src_addr >> 32);
+ mtk_pci_mac_write32(priv, addr, val);
+
+ addr = REG_ATR_PCIE_WIN0_T0_SRC_ADDR_LSB + offset;
+ val = (u32)(cfg->src_addr & ATR_ADDR_ALIGN_MASK) | (atr_size << 1) | ATR_EN;
+ mtk_pci_mac_write32(priv, addr, val);
+
+ addr = REG_ATR_PCIE_WIN0_T0_TRSL_ADDR_MSB + offset;
+ val = (u32)(cfg->trsl_addr >> 32);
+ mtk_pci_mac_write32(priv, addr, val);
+
+ addr = REG_ATR_PCIE_WIN0_T0_TRSL_ADDR_LSB + offset;
+ val = (u32)(cfg->trsl_addr & ATR_ADDR_ALIGN_MASK);
+ mtk_pci_mac_write32(priv, addr, val);
+
+ /* TRSL_PARAM */
+ addr = REG_ATR_PCIE_WIN0_T0_TRSL_PARAM + offset;
+ val = (cfg->trsl_param << ATR_PARAM_OFFSET) | cfg->trsl_id;
+ mtk_pci_mac_write32(priv, addr, val);
+
+ return 0;
+}
+
+/**
+ * mtk_pci_atr_disable() - Disable all PCIe address translation rules
+ * @priv: MTK PCI private data
+ */
+void mtk_pci_atr_disable(struct mtk_pci_priv *priv)
+{
+ int port, tbl, offset;
+ u32 val;
+
+ /* Disable all ATR table for all ports */
+ for (port = ATR_SRC_PCI_WIN0; port <= ATR_SRC_AXIS_3; port++)
+ for (tbl = 0; tbl < ATR_TABLE_NUM_PER_ATR; tbl++) {
+ /* Calculate table offset */
+ offset = ATR_PORT_OFFSET * port + ATR_TABLE_OFFSET * tbl;
+ val = mtk_pci_mac_read32(priv, REG_ATR_PCIE_WIN0_T0_SRC_ADDR_LSB + offset);
+ val = val & (~BIT(0));
+ /* Disable table by SRC_ADDR_L */
+ mtk_pci_mac_write32(priv, REG_ATR_PCIE_WIN0_T0_SRC_ADDR_LSB + offset, val);
+ }
+}
+
+static void mtk_pci_set_msix_merged(struct mtk_pci_priv *priv, int irq_cnt)
+{
+ mtk_pci_mac_write32(priv, REG_PCIE_CFG_MSIX, ffs(irq_cnt) * 2 - 1);
+}
+
+/**
+ * mtk_pci_get_dev_state() - Read the device state from the modem
+ * @mdev: MTK MD device
+ *
+ * Return: Device state value.
+ */
+u32 mtk_pci_get_dev_state(struct mtk_md_dev *mdev)
+{
+ return mtk_pci_mac_read32(mdev->hw_priv, REG_PCIE_DEBUG_DUMMY_7);
+}
+
+/**
+ * mtk_pci_ack_dev_state() - Acknowledge the device state to the modem
+ * @mdev: MTK MD device
+ * @state: State value to acknowledge
+ */
+void mtk_pci_ack_dev_state(struct mtk_md_dev *mdev, u32 state)
+{
+ mtk_pci_mac_write32(mdev->hw_priv, REG_PCIE_DEBUG_DUMMY_7, state);
+}
+
+/**
+ * mtk_pci_get_irq_id() - Map an IRQ source to its hardware IRQ ID
+ * @mdev: MTK MD device
+ * @irq_src: IRQ source enum
+ *
+ * Return: IRQ ID on success, -EINVAL on failure.
+ */
+int mtk_pci_get_irq_id(struct mtk_md_dev *mdev, enum mtk_irq_src irq_src)
+{
+ struct mtk_pci_priv *priv = mdev->hw_priv;
+ const int *irq_tbl = priv->cfg->irq_tbl;
+ int irq_id = -EINVAL;
+
+ if (irq_src > MTK_IRQ_SRC_MIN && irq_src < MTK_IRQ_SRC_MAX) {
+ irq_id = irq_tbl[irq_src];
+ if (irq_id < 0 || irq_id >= MTK_IRQ_CNT_MAX)
+ irq_id = -EINVAL;
+ }
+
+ return irq_id;
+}
+
+/**
+ * mtk_pci_get_virq_id() - Get the Linux virtual IRQ for a hardware IRQ ID
+ * @mdev: MTK MD device
+ * @irq_id: Hardware IRQ ID
+ *
+ * Return: Virtual IRQ number on success, negative error code on failure.
+ */
+int mtk_pci_get_virq_id(struct mtk_md_dev *mdev, int irq_id)
+{
+ struct pci_dev *pdev = to_pci_dev(mdev->dev);
+ struct mtk_pci_priv *priv = mdev->hw_priv;
+
+ if (!priv->irq_cnt || irq_id < 0)
+ return -EINVAL;
+
+ return pci_irq_vector(pdev, irq_id % priv->irq_cnt);
+}
+
+/**
+ * mtk_pci_register_irq() - Register a callback for a hardware IRQ
+ * @mdev: MTK MD device
+ * @irq_id: Hardware IRQ ID
+ * @irq_cb: Callback function
+ * @data: Private data passed to callback
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int mtk_pci_register_irq(struct mtk_md_dev *mdev, int irq_id,
+ int (*irq_cb)(int irq_id, void *data), void *data)
+{
+ struct mtk_pci_priv *priv = mdev->hw_priv;
+
+ if ((irq_id < 0 || irq_id >= MTK_IRQ_CNT_MAX) || !irq_cb)
+ return -EINVAL;
+
+ if (priv->irq_cb_list[irq_id]) {
+ dev_err((mdev)->dev,
+ "Unable to register irq, irq_id=%d, it's already been register by %ps.\n",
+ irq_id, priv->irq_cb_list[irq_id]);
+ return -EFAULT;
+ }
+ priv->irq_cb_list[irq_id] = irq_cb;
+ priv->irq_cb_data[irq_id] = data;
+
+ return 0;
+}
+
+/**
+ * mtk_pci_unregister_irq() - Unregister a hardware IRQ callback
+ * @mdev: MTK MD device
+ * @irq_id: Hardware IRQ ID
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int mtk_pci_unregister_irq(struct mtk_md_dev *mdev, int irq_id)
+{
+ struct mtk_pci_priv *priv = mdev->hw_priv;
+
+ if (irq_id < 0 || irq_id >= MTK_IRQ_CNT_MAX)
+ return -EINVAL;
+
+ if (!priv->irq_cb_list[irq_id]) {
+ dev_err((mdev)->dev, "irq_id=%d has not been registered\n", irq_id);
+ return -EFAULT;
+ }
+ priv->irq_cb_list[irq_id] = NULL;
+ priv->irq_cb_data[irq_id] = NULL;
+
+ return 0;
+}
+
+/**
+ * mtk_pci_mask_irq() - Mask (disable) a hardware IRQ
+ * @mdev: MTK MD device
+ * @irq_id: Hardware IRQ ID
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int mtk_pci_mask_irq(struct mtk_md_dev *mdev, int irq_id)
+{
+ struct mtk_pci_priv *priv = mdev->hw_priv;
+
+ if (irq_id < 0 || irq_id >= MTK_IRQ_CNT_MAX ||
+ priv->irq_type != PCI_IRQ_MSIX) {
+ dev_err(mdev->dev, "Failed to mask irq: input irq_id=%d\n", irq_id);
+ return -EINVAL;
+ }
+
+ mtk_pci_mac_write32(priv, REG_IMASK_HOST_MSIX_CLR_GRP0_0, BIT(irq_id));
+
+ return 0;
+}
+
+/**
+ * mtk_pci_unmask_irq() - Unmask (enable) a hardware IRQ
+ * @mdev: MTK MD device
+ * @irq_id: Hardware IRQ ID
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int mtk_pci_unmask_irq(struct mtk_md_dev *mdev, int irq_id)
+{
+ struct mtk_pci_priv *priv = mdev->hw_priv;
+
+ if (irq_id < 0 || irq_id >= MTK_IRQ_CNT_MAX ||
+ priv->irq_type != PCI_IRQ_MSIX) {
+ dev_err(mdev->dev, "Failed to unmask irq: input irq_id=%d\n", irq_id);
+ return -EINVAL;
+ }
+
+ mtk_pci_mac_write32(priv, REG_IMASK_HOST_MSIX_SET_GRP0_0, BIT(irq_id));
+
+ return 0;
+}
+
+/**
+ * mtk_pci_clear_irq() - Clear (acknowledge) a hardware IRQ
+ * @mdev: MTK MD device
+ * @irq_id: Hardware IRQ ID
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int mtk_pci_clear_irq(struct mtk_md_dev *mdev, int irq_id)
+{
+ struct mtk_pci_priv *priv = mdev->hw_priv;
+
+ if (irq_id < 0 || irq_id >= MTK_IRQ_CNT_MAX ||
+ priv->irq_type != PCI_IRQ_MSIX) {
+ dev_err(mdev->dev, "Failed to clear irq: input irq_id=%d\n", irq_id);
+ return -EINVAL;
+ }
+
+ mtk_pci_mac_write32(priv, REG_MSIX_ISTATUS_HOST_GRP0_0, BIT(irq_id));
+
+ return 0;
+}
+
+static u32 mtk_pci_ext_d2h_evt_hw_bits(u32 chs)
+{
+ u32 hw_bits = 0;
+
+ SET_HW_BITS(hw_bits, chs, MHCCIF_EP2RC_EVT_BOOT_FLOW_SYNC,
+ DEV_EVT_D2H_BOOT_FLOW_SYNC);
+ SET_HW_BITS(hw_bits, chs, MHCCIF_EP2RC_EVT_ASYNC_HS_NOTIFY_SAP,
+ DEV_EVT_D2H_ASYNC_HS_NOTIFY_SAP);
+ SET_HW_BITS(hw_bits, chs, MHCCIF_EP2RC_EVT_ASYNC_HS_NOTIFY_MD,
+ DEV_EVT_D2H_ASYNC_HS_NOTIFY_MD);
+
+ return LE32_TO_U32(cpu_to_le32(hw_bits));
+}
+
+static u32 mtk_pci_ext_d2h_evt_chs(u32 hw_bits)
+{
+ u32 chs = 0;
+
+ if (!hw_bits)
+ return chs;
+
+ chs = FIELD_PREP(DEV_EVT_D2H_BOOT_FLOW_SYNC,
+ FIELD_GET(MHCCIF_EP2RC_EVT_BOOT_FLOW_SYNC, hw_bits)) |
+ FIELD_PREP(DEV_EVT_D2H_ASYNC_HS_NOTIFY_SAP,
+ FIELD_GET(MHCCIF_EP2RC_EVT_ASYNC_HS_NOTIFY_SAP, hw_bits)) |
+ FIELD_PREP(DEV_EVT_D2H_ASYNC_HS_NOTIFY_MD,
+ FIELD_GET(MHCCIF_EP2RC_EVT_ASYNC_HS_NOTIFY_MD, hw_bits));
+
+ return chs;
+}
+
+/**
+ * mtk_pci_register_ext_evt() - Register a callback for MHCCIF device events
+ * @mdev: MTK MD device
+ * @chs: Bitmask of event channels to register
+ * @evt_cb: Callback function
+ * @data: Private data passed to callback
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int mtk_pci_register_ext_evt(struct mtk_md_dev *mdev, u32 chs,
+ int (*evt_cb)(u32 status, void *data), void *data)
+{
+ struct mtk_pci_priv *priv = mdev->hw_priv;
+ struct mtk_mhccif_cb *cb;
+ int ret = 0;
+
+ if (!chs || !evt_cb)
+ return -EINVAL;
+
+ spin_lock_bh(&priv->mhccif_lock);
+ list_for_each_entry(cb, &priv->mhccif_cb_list, entry) {
+ if (cb->chs & chs) {
+ ret = -EFAULT;
+ dev_err((mdev)->dev,
+ "Unable to register evt, intersection: chs=0x%08x&0x%08x cb=%ps\n",
+ chs, cb->chs, cb->evt_cb);
+ goto err_spin_unlock;
+ }
+ }
+ cb = devm_kzalloc(mdev->dev, sizeof(*cb), GFP_ATOMIC);
+ if (!cb) {
+ ret = -ENOMEM;
+ goto err_spin_unlock;
+ }
+ cb->evt_cb = evt_cb;
+ cb->data = data;
+ cb->chs = chs;
+ list_add_tail(&cb->entry, &priv->mhccif_cb_list);
+err_spin_unlock:
+ spin_unlock_bh(&priv->mhccif_lock);
+
+ return ret;
+}
+
+/**
+ * mtk_pci_unregister_ext_evt() - Unregister an MHCCIF device event callback
+ * @mdev: MTK MD device
+ * @chs: Bitmask of event channels to unregister
+ */
+void mtk_pci_unregister_ext_evt(struct mtk_md_dev *mdev, u32 chs)
+{
+ struct mtk_pci_priv *priv = mdev->hw_priv;
+ struct mtk_mhccif_cb *cb, *next;
+
+ if (!chs)
+ return;
+
+ spin_lock_bh(&priv->mhccif_lock);
+ list_for_each_entry_safe(cb, next, &priv->mhccif_cb_list, entry) {
+ if (cb->chs == chs) {
+ list_del(&cb->entry);
+ devm_kfree(mdev->dev, cb);
+ goto out;
+ }
+ }
+ dev_warn((mdev)->dev,
+ "Unable to unregister evt, no chs=0x%08x has been registered.\n", chs);
+out:
+ spin_unlock_bh(&priv->mhccif_lock);
+}
+
+/**
+ * mtk_pci_mask_ext_evt() - Mask (disable) MHCCIF device events
+ * @mdev: MTK MD device
+ * @chs: Bitmask of event channels to mask
+ */
+void mtk_pci_mask_ext_evt(struct mtk_md_dev *mdev, u32 chs)
+{
+ struct mtk_pci_priv *priv = mdev->hw_priv;
+ u32 hw_bits = mtk_pci_ext_d2h_evt_hw_bits(chs);
+
+ mtk_pci_write32(mdev, priv->cfg->mhccif_rc_base_addr +
+ MHCCIF_EP2RC_SW_INT_EAP_MASK_SET, hw_bits);
+}
+
+/**
+ * mtk_pci_unmask_ext_evt() - Unmask (enable) MHCCIF device events
+ * @mdev: MTK MD device
+ * @chs: Bitmask of event channels to unmask
+ */
+void mtk_pci_unmask_ext_evt(struct mtk_md_dev *mdev, u32 chs)
+{
+ struct mtk_pci_priv *priv = mdev->hw_priv;
+ u32 hw_bits = mtk_pci_ext_d2h_evt_hw_bits(chs);
+
+ mtk_pci_write32(mdev, priv->cfg->mhccif_rc_base_addr +
+ MHCCIF_EP2RC_SW_INT_EAP_MASK_CLR, hw_bits);
+}
+
+/**
+ * mtk_pci_clear_ext_evt() - Clear (acknowledge) MHCCIF device events
+ * @mdev: MTK MD device
+ * @chs: Bitmask of event channels to clear
+ */
+void mtk_pci_clear_ext_evt(struct mtk_md_dev *mdev, u32 chs)
+{
+ struct mtk_pci_priv *priv = mdev->hw_priv;
+ u32 hw_bits = mtk_pci_ext_d2h_evt_hw_bits(chs);
+
+ mtk_pci_write32(mdev, priv->cfg->mhccif_rc_base_addr +
+ MHCCIF_EP2RC_SW_INT_ACK, hw_bits);
+}
+
+static u32 mtk_pci_ext_h2d_evt_hw_bits(u32 chs)
+{
+ u32 hw_bits = 0;
+
+ SET_HW_BITS(hw_bits, chs, MHCCIF_RC2EP_EVT_DEVICE_RESET,
+ DEV_EVT_H2D_DEVICE_RESET);
+ return LE32_TO_U32(cpu_to_le32(hw_bits));
+}
+
+/**
+ * mtk_pci_send_ext_evt() - Send an MHCCIF event to the modem
+ * @mdev: MTK MD device
+ * @ch: Event channel to trigger (must be a single bit)
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int mtk_pci_send_ext_evt(struct mtk_md_dev *mdev, u32 ch)
+{
+ struct mtk_pci_priv *priv = mdev->hw_priv;
+ u32 rc_base, hw_bits;
+
+ rc_base = priv->cfg->mhccif_rc_base_addr;
+
+ /* Only allow one ch to be triggered at a time */
+ if (!is_power_of_2(ch)) {
+ dev_err((mdev)->dev, "Unsupported ext evt ch=0x%08x\n", ch);
+ return -EINVAL;
+ }
+
+ hw_bits = mtk_pci_ext_h2d_evt_hw_bits(ch);
+ mtk_pci_write32(mdev, rc_base + MHCCIF_RC2EP_SW_BSY, hw_bits);
+ mtk_pci_write32(mdev, rc_base + MHCCIF_RC2EP_SW_TCHNUM, ffs(hw_bits) - 1);
+ return 0;
+}
+
+static u32 mtk_pci_get_ext_evt_hw_status(struct mtk_md_dev *mdev)
+{
+ struct mtk_pci_priv *priv = mdev->hw_priv;
+
+ return mtk_pci_read32(mdev, priv->cfg->mhccif_rc_base_addr +
+ MHCCIF_EP2RC_SW_INT_STS);
+}
+
+/**
+ * mtk_pci_fldr() - Perform a Function Level Device Reset via ACPI _RST
+ * @mdev: MTK MD device
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int mtk_pci_fldr(struct mtk_md_dev *mdev)
+{
+#ifdef CONFIG_ACPI
+ struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+ acpi_status acpi_ret;
+ acpi_handle handle;
+
+ if (acpi_disabled) {
+ dev_err((mdev)->dev, "Unsupported, acpi function isn't enable\n");
+ return -ENODEV;
+ }
+
+ handle = ACPI_HANDLE(mdev->dev);
+
+ if (!handle) {
+ dev_err((mdev)->dev, "Unsupported, acpi handle isn't found\n");
+ return -ENODEV;
+ }
+
+ if (!acpi_has_method(handle, "_RST")) {
+ dev_err((mdev)->dev, "Unsupported, _RST method isn't found\n");
+ return -ENODEV;
+ }
+
+ acpi_ret = acpi_evaluate_object(handle, "_RST", NULL, &buffer);
+ if (ACPI_FAILURE(acpi_ret)) {
+ dev_err((mdev)->dev, "Failed to execute _RST method: %s\n",
+ acpi_format_exception(acpi_ret));
+ return -EFAULT;
+ }
+
+ acpi_os_free(buffer.pointer);
+
+ return 0;
+#else /* !CONFIG_ACPI */
+ dev_err((mdev)->dev, "Unsupported, CONFIG ACPI hasn't been set to 'y'\n");
+
+ return -ENODEV;
+#endif /* !CONFIG_ACPI */
+}
+
+/**
+ * mtk_pci_pldr() - Perform a PCIe Link Down Reset via ACPI PXP._OFF/_ON
+ * @mdev: MTK MD device
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int mtk_pci_pldr(struct mtk_md_dev *mdev)
+{
+#ifdef CONFIG_ACPI
+ struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+ struct pci_dev *bridge;
+ acpi_status acpi_ret;
+ acpi_handle handle;
+
+ if (acpi_disabled) {
+ dev_err((mdev)->dev, "Unsupported, acpi function isn't enable\n");
+ return -ENODEV;
+ }
+
+ bridge = pci_upstream_bridge(to_pci_dev(mdev->dev));
+ if (!bridge) {
+ dev_err((mdev)->dev, "Unable to find bridge\n");
+ return -ENODEV;
+ }
+
+ handle = ACPI_HANDLE(&bridge->dev);
+ if (!handle) {
+ dev_err((mdev)->dev, "Unsupported, acpi handle isn't found\n");
+ return -ENODEV;
+ }
+ if (!acpi_has_method(handle, "PXP._OFF") ||
+ !acpi_has_method(handle, "PXP._ON")) {
+ dev_err((mdev)->dev, "Unsupported, pldr method isn't supported\n");
+ return -ENODEV;
+ }
+ acpi_ret = acpi_evaluate_object(handle, "PXP._OFF", NULL, &buffer);
+ if (ACPI_FAILURE(acpi_ret)) {
+ dev_err((mdev)->dev, "Failed to execute _OFF method: %s\n",
+ acpi_format_exception(acpi_ret));
+ return -EFAULT;
+ }
+ acpi_os_free(buffer.pointer);
+
+ msleep(MTK_PLDR_POWER_OFF_DELAY_MS);
+
+ buffer.length = ACPI_ALLOCATE_BUFFER;
+ buffer.pointer = NULL;
+ acpi_ret = acpi_evaluate_object(handle, "PXP._ON", NULL, &buffer);
+ if (ACPI_FAILURE(acpi_ret)) {
+ dev_err((mdev)->dev, "Failed to execute _ON method: %s\n",
+ acpi_format_exception(acpi_ret));
+ return -EFAULT;
+ }
+ acpi_os_free(buffer.pointer);
+
+ return 0;
+#else
+ dev_err((mdev)->dev, "Unsupported, CONFIG ACPI hasn't been set to 'y'\n");
+
+ return -ENODEV;
+#endif
+}
+
+/**
+ * mtk_pci_get_dev_cfg() - Read the device configuration from the modem
+ * @mdev: MTK MD device
+ *
+ * Return: Device configuration value.
+ */
+u32 mtk_pci_get_dev_cfg(struct mtk_md_dev *mdev)
+{
+ u32 val;
+
+ val = mtk_pci_mac_read32(mdev->hw_priv, REG_PCIE_DEBUG_DUMMY_4);
+ return (val >> MTK_CFG_INFO_BIT_SHIFT);
+}
+
+static int mtk_pci_dev_reset(struct mtk_md_dev *mdev, enum mtk_reset_type type)
+{
+ switch (type) {
+ case RESET_MHCCIF:
+ return mtk_pci_send_ext_evt(mdev, DEV_EVT_H2D_DEVICE_RESET);
+ case RESET_FLDR:
+ return mtk_pci_fldr(mdev);
+ case RESET_PLDR:
+ return mtk_pci_pldr(mdev);
+ default:
+ return -EINVAL;
+ }
+}
+
+/**
+ * mtk_pci_reset() - Reset the modem device
+ * @mdev: MTK MD device
+ * @type: Reset type (MHCCIF, FLDR, or PLDR)
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int mtk_pci_reset(struct mtk_md_dev *mdev, enum mtk_reset_type type)
+{
+ return mtk_pci_dev_reset(mdev, type);
+}
+
+/**
+ * mtk_pci_link_check() - Check if the PCIe link to the modem is active
+ * @mdev: MTK MD device
+ *
+ * Return: true if the device is present, false otherwise.
+ */
+bool mtk_pci_link_check(struct mtk_md_dev *mdev)
+{
+ return pci_device_is_present(to_pci_dev(mdev->dev));
+}
+
+static void mtk_mhccif_isr_work(struct work_struct *work)
+{
+ struct mtk_pci_priv *priv =
+ container_of(work, struct mtk_pci_priv, mhccif_work);
+ struct mtk_md_dev *mdev = priv->irq_desc->mdev;
+ struct mtk_mhccif_cb *cb;
+ u32 stat, mask, chs;
+
+ stat = mtk_pci_get_ext_evt_hw_status(mdev);
+ mask = mtk_pci_read32(mdev, priv->cfg->mhccif_rc_base_addr
+ + MHCCIF_EP2RC_SW_INT_EAP_MASK);
+ if (unlikely(stat == U32_MAX && !(mtk_pci_link_check(mdev)))) {
+ /* When link failed, we don't need to unmask/clear. */
+ dev_err((mdev)->dev, "Failed to check link in MHCCIF handler.\n");
+ return;
+ }
+
+ stat &= ~mask;
+ chs = mtk_pci_ext_d2h_evt_chs(stat);
+ spin_lock_bh(&priv->mhccif_lock);
+ list_for_each_entry(cb, &priv->mhccif_cb_list, entry) {
+ if (cb->chs & chs)
+ cb->evt_cb(cb->chs & chs, cb->data);
+ }
+ spin_unlock_bh(&priv->mhccif_lock);
+
+ mtk_pci_clear_irq(mdev, priv->mhccif_irq_id);
+ mtk_pci_unmask_irq(mdev, priv->mhccif_irq_id);
+}
+
+static const struct pci_device_id t9xx_pci_table[] = {
+ MTK_PCI_DEV_CFG(0x0900, mtk_dev_cfg_0900),
+ CEI_PCI_DEV_CFG(0x01CA, mtk_dev_cfg_0900),
+ {/* end: all zeroes */}
+};
+
+MODULE_DEVICE_TABLE(pci, t9xx_pci_table);
+
+static int mtk_pci_bar_init(struct mtk_md_dev *mdev)
+{
+ struct pci_dev *pdev = to_pci_dev(mdev->dev);
+ struct mtk_pci_priv *priv = mdev->hw_priv;
+ u32 bar[MTK_PCI_BAR_NUM];
+ int i, ret;
+
+ for (i = 0; i < MTK_PCI_BAR_NUM; i++)
+ pci_read_config_dword(to_pci_dev(mdev->dev),
+ PCI_BASE_ADDRESS_0 + (i << 2), bar + i);
+
+ ret = pcim_iomap_regions(pdev, MTK_REQUESTED_BARS, mdev->dev_str);
+ if (ret) {
+ dev_err((mdev)->dev, "Failed to init MMIO. ret=%d\n", ret);
+ return ret;
+ }
+
+ /* get ioremapped memory */
+ priv->mac_reg_base = pcim_iomap_table(pdev)[MTK_BAR_0_1_IDX];
+ priv->bar23_addr = pcim_iomap_table(pdev)[MTK_BAR_2_3_IDX];
+ if (!priv->mac_reg_base || !priv->bar23_addr) {
+ dev_err((mdev)->dev, "Failed to init BAR.\n");
+ return -EINVAL;
+ }
+ /* We use MD view base address "0" to observe registers */
+ priv->ext_reg_base = priv->bar23_addr - ATR_PCIE_REG_TRSL_ADDR;
+
+ return 0;
+}
+
+static int mtk_mhccif_irq_cb(int irq_id, void *data)
+{
+ struct mtk_md_dev *mdev = data;
+ struct mtk_pci_priv *priv;
+
+ priv = mdev->hw_priv;
+ queue_work(system_highpri_wq, &priv->mhccif_work);
+
+ return 0;
+}
+
+static int mtk_mhccif_init(struct mtk_md_dev *mdev)
+{
+ struct mtk_pci_priv *priv = mdev->hw_priv;
+ int ret;
+
+ INIT_LIST_HEAD(&priv->mhccif_cb_list);
+ spin_lock_init(&priv->mhccif_lock);
+ INIT_WORK(&priv->mhccif_work, mtk_mhccif_isr_work);
+
+ ret = mtk_pci_get_irq_id(mdev, MTK_IRQ_SRC_MHCCIF);
+ if (ret < 0) {
+ dev_err((mdev)->dev, "Failed to get mhccif_irq_id. ret=%d\n", ret);
+ return ret;
+ }
+ priv->mhccif_irq_id = ret;
+
+ ret = mtk_pci_register_irq(mdev, priv->mhccif_irq_id, mtk_mhccif_irq_cb, mdev);
+ if (ret) {
+ dev_err((mdev)->dev, "Failed to register mhccif_irq callback\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+static void mtk_mhccif_exit(struct mtk_md_dev *mdev)
+{
+ struct mtk_pci_priv *priv = mdev->hw_priv;
+
+ mtk_pci_unregister_irq(mdev, priv->mhccif_irq_id);
+ cancel_work_sync(&priv->mhccif_work);
+}
+
+static irqreturn_t mtk_pci_irq_handler(struct mtk_md_dev *mdev, u32 irq_state)
+{
+ struct mtk_pci_priv *priv = mdev->hw_priv;
+ int irq_id;
+
+ /* Check whether each set bit has a callback, if has, call it */
+ do {
+ irq_id = fls(irq_state) - 1;
+ irq_state &= ~BIT(irq_id);
+ if (likely(priv->irq_cb_list[irq_id]))
+ priv->irq_cb_list[irq_id](irq_id, priv->irq_cb_data[irq_id]);
+ else
+ dev_err((mdev)->dev, "Unhandled irq_id=%d, no callback for it.\n", irq_id);
+ } while (irq_state);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t mtk_pci_irq_msix(int irq, void *data)
+{
+ struct mtk_pci_irq_desc *irq_desc = data;
+ struct mtk_md_dev *mdev = irq_desc->mdev;
+ struct mtk_pci_priv *priv;
+ u32 irq_state, irq_enable;
+
+ priv = mdev->hw_priv;
+ irq_state = mtk_pci_mac_read32(priv, REG_MSIX_ISTATUS_HOST_GRP0_0);
+ irq_enable = mtk_pci_mac_read32(priv, REG_IMASK_HOST_MSIX_GRP0_0);
+ irq_state &= irq_enable;
+
+ if (unlikely(!irq_state) ||
+ unlikely(!((irq_state & GENMASK(priv->irq_cnt - 1, 0)) &
+ irq_desc->msix_bits)))
+ return IRQ_NONE;
+
+ /* Mask the bit and user needs to unmask by itself */
+ mtk_pci_mac_write32(priv, REG_IMASK_HOST_MSIX_CLR_GRP0_0,
+ irq_state & ~BIT(30));
+
+ return mtk_pci_irq_handler(mdev, irq_state);
+}
+
+static int mtk_pci_request_irq_msix(struct mtk_md_dev *mdev,
+ int irq_cnt_allocated)
+{
+ struct mtk_pci_priv *priv = mdev->hw_priv;
+ struct mtk_pci_irq_desc *irq_desc;
+ struct pci_dev *pdev;
+ int irq_cnt;
+ int ret, i;
+
+ /* calculate the nearest 2's power number */
+ irq_cnt = BIT(fls(irq_cnt_allocated) - 1);
+ pdev = to_pci_dev(mdev->dev);
+ irq_desc = priv->irq_desc;
+ for (i = 0; i < irq_cnt; i++) {
+ irq_desc[i].mdev = mdev;
+ irq_desc[i].msix_bits = BIT(i);
+ snprintf(irq_desc[i].name, MTK_IRQ_NAME_LEN, "msix%d-%s", i, mdev->dev_str);
+ ret = pci_request_irq(pdev, i, mtk_pci_irq_msix, NULL,
+ &irq_desc[i], irq_desc[i].name);
+ if (ret) {
+ dev_err((mdev)->dev, "Failed to request %s: ret=%d\n",
+ irq_desc[i].name, ret);
+ for (i--; i >= 0; i--)
+ pci_free_irq(pdev, i, &irq_desc[i]);
+ return ret;
+ }
+ }
+ priv->irq_cnt = irq_cnt;
+ priv->irq_type = PCI_IRQ_MSIX;
+
+ if (irq_cnt != MTK_IRQ_CNT_MAX)
+ mtk_pci_set_msix_merged(priv, irq_cnt);
+
+ return 0;
+}
+
+static int mtk_pci_request_irq(struct mtk_md_dev *mdev)
+{
+ struct pci_dev *pdev = to_pci_dev(mdev->dev);
+ int irq_cnt, ret;
+
+ irq_cnt = pci_alloc_irq_vectors(pdev, MTK_IRQ_CNT_MIN,
+ MTK_IRQ_CNT_MAX, PCI_IRQ_MSIX);
+
+ if (irq_cnt < MTK_IRQ_CNT_MIN) {
+ dev_err(mdev->dev,
+ "Unable to alloc pci irq vectors. ret=%d maxirqcnt=%d irqtype=0x%x\n",
+ irq_cnt, MTK_IRQ_CNT_MAX, PCI_IRQ_MSIX);
+ return -EFAULT;
+ }
+
+ ret = mtk_pci_request_irq_msix(mdev, irq_cnt);
+ if (ret)
+ pci_free_irq_vectors(pdev);
+
+ return ret;
+}
+
+static void mtk_pci_free_irq(struct mtk_md_dev *mdev)
+{
+ struct pci_dev *pdev = to_pci_dev(mdev->dev);
+ struct mtk_pci_priv *priv = mdev->hw_priv;
+ int i;
+
+ for (i = 0; i < priv->irq_cnt; i++)
+ pci_free_irq(pdev, i, &priv->irq_desc[i]);
+
+ pci_free_irq_vectors(pdev);
+}
+
+static const struct mtk_dev_ops pci_hw_ops = {
+ .get_dev_state = mtk_pci_get_dev_state,
+ .ack_dev_state = mtk_pci_ack_dev_state,
+ .get_dev_cfg = mtk_pci_get_dev_cfg,
+ .register_dev_evt = mtk_pci_register_ext_evt,
+ .unregister_dev_evt = mtk_pci_unregister_ext_evt,
+ .mask_dev_evt = mtk_pci_mask_ext_evt,
+ .unmask_dev_evt = mtk_pci_unmask_ext_evt,
+ .clear_dev_evt = mtk_pci_clear_ext_evt,
+ .send_dev_evt = mtk_pci_send_ext_evt,
+};
+
+static int mtk_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+ struct device *dev = &pdev->dev;
+ struct mtk_pci_priv *priv;
+ struct mtk_md_dev *mdev;
+ int ret;
+
+ mdev = devm_kzalloc(dev, sizeof(*mdev), GFP_KERNEL);
+ if (!mdev) {
+ ret = -ENOMEM;
+ goto log_err;
+ }
+ mdev->dev_ops = &pci_hw_ops;
+ mdev->dev = dev;
+
+ priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+ if (!priv) {
+ ret = -ENOMEM;
+ goto free_cntx_data;
+ }
+
+ pci_set_drvdata(pdev, mdev);
+ priv->cfg = (void *)id->driver_data;
+ priv->mdev = mdev;
+ mdev->hw_ver = pdev->device;
+ mdev->hw_priv = priv;
+ mdev->dev = dev;
+ snprintf(mdev->dev_str, MTK_DEV_STR_LEN, "%02x%02x%d",
+ pdev->bus->number, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
+ if (pdev->state_saved)
+ pci_restore_state(pdev);
+
+ ret = pcim_enable_device(pdev);
+ if (ret) {
+ dev_err((mdev)->dev, "Failed to enable pci device.\n");
+ goto free_priv_data;
+ }
+
+ ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+ if (ret) {
+ dev_err((mdev)->dev, "Failed to set DMA Mask and Coherent. (ret=%d)\n", ret);
+ goto free_priv_data;
+ }
+
+ ret = mtk_pci_bar_init(mdev);
+ if (ret)
+ goto free_priv_data;
+
+ ret = priv->cfg->atr_init(mdev);
+ if (ret)
+ goto free_priv_data;
+
+ ret = mtk_mhccif_init(mdev);
+ if (ret)
+ goto free_priv_data;
+
+ /* mask all irqs */
+ if (priv->cfg->flag & MTK_CFG_IRQ_DFLT_MASK)
+ mtk_pci_mac_write32(priv, REG_IMASK_HOST_MSIX_CLR_GRP0_0, U32_MAX);
+
+ ret = mtk_pci_request_irq(mdev);
+ if (ret)
+ goto free_mhccif;
+
+ pci_set_master(pdev);
+ mtk_pci_unmask_irq(mdev, priv->mhccif_irq_id);
+
+ if (mtk_pci_link_check(mdev)) {
+ pci_save_state(pdev);
+ } else {
+ ret = -ENOLINK;
+ goto clear_master;
+ }
+
+ priv->saved_state = pci_store_saved_state(pdev);
+ if (!priv->saved_state) {
+ ret = -EFAULT;
+ goto clear_master;
+ }
+
+ return 0;
+
+clear_master:
+ pci_clear_master(pdev);
+ mtk_pci_free_irq(mdev);
+free_mhccif:
+ mtk_mhccif_exit(mdev);
+free_priv_data:
+ devm_kfree(dev, priv);
+free_cntx_data:
+ devm_kfree(dev, mdev);
+log_err:
+ dev_err(dev, "Failed to probe device, ret=%d\n", ret);
+
+ return ret;
+}
+
+static void mtk_pci_remove(struct pci_dev *pdev)
+{
+ struct mtk_md_dev *mdev = pci_get_drvdata(pdev);
+ struct mtk_pci_priv *priv = mdev->hw_priv;
+ struct device *dev = &pdev->dev;
+
+ mtk_pci_mask_irq(mdev, priv->mhccif_irq_id);
+
+ if (mtk_pci_pldr(mdev)) {
+ dev_warn(dev, "Failed to execute PLDR, try external event\n");
+ mtk_pci_reset(mdev, RESET_MHCCIF);
+ }
+
+ pci_clear_master(pdev);
+ mtk_pci_free_irq(mdev);
+ mtk_mhccif_exit(mdev);
+ pci_load_and_free_saved_state(pdev, &priv->saved_state);
+
+ devm_kfree(dev, priv);
+ devm_kfree(dev, mdev);
+}
+
+static pci_ers_result_t mtk_pci_error_detected(struct pci_dev *pdev,
+ pci_channel_state_t state)
+{
+ struct mtk_md_dev *mdev = pci_get_drvdata(pdev);
+
+ dev_err((mdev)->dev, "AER detected: pci_channel_state_t=%d\n", state);
+
+ /* Request a slot reset. */
+ return PCI_ERS_RESULT_CAN_RECOVER;
+}
+
+static const struct pci_error_handlers mtk_pci_err_handler = {
+ .error_detected = mtk_pci_error_detected,
+};
+
+static struct pci_driver mtk_pci_drv = {
+ .name = "mtk_pci_drv",
+ .id_table = t9xx_pci_table,
+ .probe = mtk_pci_probe,
+ .remove = mtk_pci_remove,
+ .err_handler = &mtk_pci_err_handler
+};
+
+module_pci_driver(mtk_pci_drv);
+
+MODULE_DESCRIPTION("MediaTek T9xx PCIe WWAN driver pcie layer");
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/wwan/t9xx/pcie/mtk_pci.h b/drivers/net/wwan/t9xx/pcie/mtk_pci.h
new file mode 100644
index 000000000000..9819a1b07c1b
--- /dev/null
+++ b/drivers/net/wwan/t9xx/pcie/mtk_pci.h
@@ -0,0 +1,234 @@
+/* SPDX-License-Identifier: GPL-2.0-only
+ *
+ * Copyright (c) 2022, MediaTek Inc.
+ */
+
+#ifndef __MTK_PCI_H__
+#define __MTK_PCI_H__
+
+#include <linux/pci.h>
+
+#include "../mtk_dev.h"
+
+enum mtk_irq_src {
+ MTK_IRQ_SRC_MIN,
+ MTK_IRQ_SRC_MHCCIF,
+ MTK_IRQ_SRC_DPMAIF,
+ MTK_IRQ_SRC_DPMAIF2,
+ MTK_IRQ_SRC_CLDMA0,
+ MTK_IRQ_SRC_CLDMA1,
+ MTK_IRQ_SRC_CLDMA2,
+ MTK_IRQ_SRC_CLDMA3,
+ MTK_IRQ_SRC_PM_LOCK,
+ MTK_IRQ_SRC_DPMAIF3,
+ MTK_IRQ_SRC_DPMAIF6,
+ MTK_IRQ_SRC_MAX
+};
+
+enum mtk_reset_type {
+ RESET_FLDR,
+ RESET_PLDR,
+ RESET_MHCCIF,
+};
+
+enum mtk_atr_type {
+ ATR_PCI2AXI = 0,
+ ATR_AXI2PCI,
+};
+
+enum mtk_atr_src_port {
+ ATR_SRC_PCI_WIN0 = 0,
+ ATR_SRC_PCI_WIN1,
+ ATR_SRC_AXIS_0,
+ ATR_SRC_AXIS_1,
+ ATR_SRC_AXIS_2,
+ ATR_SRC_AXIS_3,
+};
+
+enum mtk_atr_dst_port {
+ ATR_DST_PCI_TRX = 0,
+ ATR_DST_AXIM_0 = 4,
+ ATR_DST_AXIM_1,
+ ATR_DST_AXIM_2,
+ ATR_DST_AXIM_3,
+};
+
+enum mtk_pci_evt_h2d {
+ DEV_EVT_H2D_EXTEND_BASE = DEV_EVT_H2D_MAX,
+ EXT_EVT_H2D_RESERVED_FOR_CLDMA0 = DEV_EVT_H2D_EXTEND_BASE << 1,
+ EXT_EVT_H2D_RESERVED_FOR_CLDMA1 = DEV_EVT_H2D_EXTEND_BASE << 2,
+ EXT_EVT_H2D_RESERVED_FOR_CLDMA3 = DEV_EVT_H2D_EXTEND_BASE << 3,
+ EXT_EVT_H2D_RESERVED_FOR_CLDMA2 = DEV_EVT_H2D_EXTEND_BASE << 4,
+ EXT_EVT_H2D_RESERVED_FOR_DPMAIF = DEV_EVT_H2D_EXTEND_BASE << 5,
+ EXT_EVT_H2D_PCIE_PM_SUSPEND_REQ = DEV_EVT_H2D_EXTEND_BASE << 6,
+ EXT_EVT_H2D_PCIE_PM_RESUME_REQ = DEV_EVT_H2D_EXTEND_BASE << 7,
+ EXT_EVT_H2D_PCIE_PM_SUSPEND_REQ_AP = DEV_EVT_H2D_EXTEND_BASE << 8,
+ EXT_EVT_H2D_PCIE_PM_RESUME_REQ_AP = DEV_EVT_H2D_EXTEND_BASE << 9,
+ EXT_EVT_H2D_RESERVED_FOR_TEST = DEV_EVT_H2D_EXTEND_BASE << 11,
+};
+
+enum mtk_pci_evt_d2h {
+ DEV_EVT_D2H_EXTEND_BASE = DEV_EVT_D2H_MAX,
+ EXT_EVT_D2H_RESERVED_FOR_CLDMA0 = DEV_EVT_D2H_EXTEND_BASE << 1,
+ EXT_EVT_D2H_RESERVED_FOR_CLDMA1 = DEV_EVT_D2H_EXTEND_BASE << 2,
+ EXT_EVT_D2H_RESERVED_FOR_CLDMA3 = DEV_EVT_D2H_EXTEND_BASE << 3,
+ EXT_EVT_D2H_RESERVED_FOR_CLDMA2 = DEV_EVT_D2H_EXTEND_BASE << 4,
+ EXT_EVT_D2H_RESERVED_FOR_DPMAIF = DEV_EVT_D2H_EXTEND_BASE << 5,
+ EXT_EVT_D2H_PCIE_PM_SUSPEND_ACK = DEV_EVT_D2H_EXTEND_BASE << 6,
+ EXT_EVT_D2H_PCIE_PM_RESUME_ACK = DEV_EVT_D2H_EXTEND_BASE << 7,
+ EXT_EVT_D2H_PCIE_PM_SUSPEND_ACK_AP = DEV_EVT_D2H_EXTEND_BASE << 8,
+ EXT_EVT_D2H_PCIE_PM_RESUME_ACK_AP = DEV_EVT_D2H_EXTEND_BASE << 9,
+ EXT_EVT_D2H_SOFT_OFF_NOTIFY = DEV_EVT_D2H_EXTEND_BASE << 10,
+ EXT_EVT_D2H_FRC_DONE_NOTIFY = DEV_EVT_D2H_EXTEND_BASE << 11,
+ EXT_EVT_D2H_RESERVED_FOR_TEST1 = DEV_EVT_D2H_EXTEND_BASE << 12,
+ EXT_EVT_D2H_RESERVED_FOR_TEST2 = DEV_EVT_D2H_EXTEND_BASE << 13,
+};
+
+#define MTK_PCI_CLASS 0x0D4000
+#define MTK_PCI_VENDOR_ID 0x14C3
+#define CEI_PCI_VENDOR_ID 0x03F0
+
+#define MTK_CFG_INFO_BIT_SHIFT 4
+
+#define MTK_PCI_DEV_CFG(id, cfg) \
+{ \
+ PCI_DEVICE(MTK_PCI_VENDOR_ID, id), \
+ MTK_PCI_CLASS, PCI_ANY_ID, \
+ .driver_data = (kernel_ulong_t)&(cfg), \
+}
+
+#define CEI_PCI_DEV_CFG(id, cfg) \
+{ \
+ PCI_DEVICE(CEI_PCI_VENDOR_ID, id), \
+ MTK_PCI_CLASS, PCI_ANY_ID, \
+ .driver_data = (kernel_ulong_t)&(cfg), \
+}
+
+#define MTK_CFG_IRQ_DFLT_MASK BIT(0)
+#define MTK_CFG_DISABLE_AP_DRM BIT(2)
+#define MTK_CFG_PM_SW_IRQ BIT(6)
+
+#define MTK_BAR_0_1_IDX 0
+#define MTK_BAR_2_3_IDX 2
+
+#define MTK_REQUESTED_BARS \
+ ((1 << MTK_BAR_0_1_IDX) | \
+ (1 << MTK_BAR_2_3_IDX))
+
+#define MTK_IRQ_CNT_MIN 1
+#define MTK_IRQ_CNT_MAX 32
+#define MTK_IRQ_NAME_LEN 32
+
+#define ATR_PORT_OFFSET 0x100
+#define ATR_TABLE_OFFSET 0x20
+#define ATR_TABLE_NUM_PER_ATR 8
+#define ATR_PCIE_REG_TRSL_ADDR 0x10000000
+#define ATR_PCIE_REG_SIZE 0x00400000
+#define ATR_PCIE_REG_PORT ATR_SRC_PCI_WIN0
+#define ATR_PCIE_REG_TABLE_NUM 1
+#define ATR_PCIE_REG_TRSL_PORT ATR_DST_AXIM_0
+#define ATR_PCIE_DEV_DMA_SRC_ADDR 0x00000000
+#define ATR_PCIE_DEV_DMA_TRANSPARENT 1
+#define ATR_PCIE_DEV_DMA_SIZE 0
+#define ATR_PCIE_DEV_DMA_TABLE_NUM 0
+#define ATR_PCIE_DEV_DMA_TRSL_ADDR 0x00000000
+
+struct mtk_pci_irq_desc {
+ struct mtk_md_dev *mdev;
+ u32 msix_bits;
+ char name[MTK_IRQ_NAME_LEN];
+};
+
+struct mtk_pci_dev_cfg {
+ u32 flag;
+ u32 mhccif_rc_base_addr;
+ u32 istatus_host_ctrl_addr;
+ int irq_tbl[MTK_IRQ_SRC_MAX];
+ int (*atr_init)(struct mtk_md_dev *mdev);
+};
+
+extern const struct mtk_pci_dev_cfg mtk_dev_cfg_0900;
+
+struct mtk_pci_priv {
+ struct mtk_md_dev *mdev;
+ const struct mtk_pci_dev_cfg *cfg;
+ void __iomem *bar23_addr;
+ void __iomem *mac_reg_base;
+ void __iomem *ext_reg_base;
+ int irq_cnt;
+ int irq_type;
+ void *irq_cb_data[MTK_IRQ_CNT_MAX];
+
+ int (*irq_cb_list[MTK_IRQ_CNT_MAX])(int irq_id, void *data);
+ struct mtk_pci_irq_desc irq_desc[MTK_IRQ_CNT_MAX];
+ struct list_head mhccif_cb_list;
+ /* mhccif_lock: lock to protect mhccif_cb_list */
+ spinlock_t mhccif_lock;
+ struct work_struct mhccif_work;
+ int mhccif_irq_id;
+ struct pci_saved_state *saved_state;
+};
+
+struct mtk_atr_cfg {
+ u64 src_addr;
+ u64 trsl_addr;
+ u64 size;
+ u32 type; /* Port type */
+ u32 port; /* Port number */
+ u32 table; /* Table number (8 tables for each port) */
+ u32 trsl_id;
+ u32 trsl_param;
+ u32 transparent;
+};
+
+/* BAR 0/1 MMIO access */
+static inline u32 mtk_pci_mac_read32(struct mtk_pci_priv *priv, u64 addr)
+{
+ return ioread32(priv->mac_reg_base + addr);
+}
+
+static inline void mtk_pci_mac_write32(struct mtk_pci_priv *priv, u64 addr, u32 val)
+{
+ iowrite32(val, priv->mac_reg_base + addr);
+}
+
+/* BAR 2/3 MMIO access */
+static inline u32 mtk_pci_read32(struct mtk_md_dev *mdev, u64 addr)
+{
+ return ioread32(((struct mtk_pci_priv *)mdev->hw_priv)->ext_reg_base + addr);
+}
+
+static inline void mtk_pci_write32(struct mtk_md_dev *mdev, u64 addr, u32 val)
+{
+ iowrite32(val, ((struct mtk_pci_priv *)mdev->hw_priv)->ext_reg_base + addr);
+}
+
+/* Device operations */
+u32 mtk_pci_get_dev_state(struct mtk_md_dev *mdev);
+void mtk_pci_ack_dev_state(struct mtk_md_dev *mdev, u32 state);
+u32 mtk_pci_get_dev_cfg(struct mtk_md_dev *mdev);
+/* IRQ Related operations */
+int mtk_pci_get_irq_id(struct mtk_md_dev *mdev, enum mtk_irq_src irq_src);
+int mtk_pci_get_virq_id(struct mtk_md_dev *mdev, int irq_id);
+int mtk_pci_register_irq(struct mtk_md_dev *mdev, int irq_id,
+ int (*irq_cb)(int irq_id, void *data), void *data);
+int mtk_pci_unregister_irq(struct mtk_md_dev *mdev, int irq_id);
+int mtk_pci_mask_irq(struct mtk_md_dev *mdev, int irq_id);
+int mtk_pci_unmask_irq(struct mtk_md_dev *mdev, int irq_id);
+int mtk_pci_clear_irq(struct mtk_md_dev *mdev, int irq_id);
+/* External event related */
+int mtk_pci_register_ext_evt(struct mtk_md_dev *mdev, u32 chs,
+ int (*evt_cb)(u32 status, void *data), void *data);
+void mtk_pci_unregister_ext_evt(struct mtk_md_dev *mdev, u32 chs);
+void mtk_pci_mask_ext_evt(struct mtk_md_dev *mdev, u32 chs);
+void mtk_pci_unmask_ext_evt(struct mtk_md_dev *mdev, u32 chs);
+void mtk_pci_clear_ext_evt(struct mtk_md_dev *mdev, u32 chs);
+int mtk_pci_send_ext_evt(struct mtk_md_dev *mdev, u32 ch);
+int mtk_pci_fldr(struct mtk_md_dev *mdev);
+int mtk_pci_pldr(struct mtk_md_dev *mdev);
+int mtk_pci_reset(struct mtk_md_dev *mdev, enum mtk_reset_type type);
+bool mtk_pci_link_check(struct mtk_md_dev *mdev);
+int mtk_pci_setup_atr(struct mtk_md_dev *mdev, struct mtk_atr_cfg *cfg);
+void mtk_pci_atr_disable(struct mtk_pci_priv *priv);
+
+#endif /* __MTK_PCI_H__ */
diff --git a/drivers/net/wwan/t9xx/pcie/mtk_pci_drv_m9xx.c b/drivers/net/wwan/t9xx/pcie/mtk_pci_drv_m9xx.c
new file mode 100644
index 000000000000..88b44142afb7
--- /dev/null
+++ b/drivers/net/wwan/t9xx/pcie/mtk_pci_drv_m9xx.c
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2022, MediaTek Inc.
+ */
+#include <linux/types.h>
+#include "mtk_pci.h"
+#include "mtk_pci_reg.h"
+
+static int mtk_pci_atr_init_m9xx(struct mtk_md_dev *mdev)
+{
+ struct pci_dev *pdev = to_pci_dev(mdev->dev);
+ struct mtk_pci_priv *priv = mdev->hw_priv;
+ struct mtk_atr_cfg cfg;
+ int port, ret;
+
+ mtk_pci_atr_disable(priv);
+
+ /* Config ATR for RC to access device's register */
+ cfg.src_addr = pci_resource_start(pdev, MTK_BAR_2_3_IDX);
+ cfg.size = ATR_PCIE_REG_SIZE;
+ cfg.trsl_addr = ATR_PCIE_REG_TRSL_ADDR;
+ cfg.type = ATR_PCI2AXI;
+ cfg.port = ATR_PCIE_REG_PORT;
+ cfg.table = ATR_PCIE_REG_TABLE_NUM;
+ cfg.trsl_id = ATR_PCIE_REG_TRSL_PORT;
+ cfg.trsl_param = 0x0;
+ cfg.transparent = 0x0;
+ ret = mtk_pci_setup_atr(mdev, &cfg);
+ if (ret)
+ return ret;
+
+ /* Config ATR for EP to access RC's memory */
+ for (port = ATR_SRC_AXIS_0; port <= ATR_SRC_AXIS_3; port++) {
+ cfg.src_addr = ATR_PCIE_DEV_DMA_SRC_ADDR;
+ cfg.size = ATR_PCIE_DEV_DMA_SIZE;
+ cfg.trsl_addr = ATR_PCIE_DEV_DMA_TRSL_ADDR;
+ cfg.type = ATR_AXI2PCI;
+ cfg.port = port;
+ cfg.table = ATR_PCIE_DEV_DMA_TABLE_NUM;
+ cfg.trsl_id = ATR_DST_PCI_TRX;
+ cfg.trsl_param = 0x0;
+ /* Enable transparent translation */
+ cfg.transparent = ATR_PCIE_DEV_DMA_TRANSPARENT;
+ ret = mtk_pci_setup_atr(mdev, &cfg);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+const struct mtk_pci_dev_cfg mtk_dev_cfg_0900 = {
+ .flag = MTK_CFG_PM_SW_IRQ,
+ .mhccif_rc_base_addr = 0x1000A000,
+ .istatus_host_ctrl_addr = REG_ISTATUS_HOST_CTRL_NEW,
+ .irq_tbl = {
+ [MTK_IRQ_SRC_DPMAIF] = 24,
+ [MTK_IRQ_SRC_CLDMA0] = 27,
+ [MTK_IRQ_SRC_CLDMA1] = 26,
+ [MTK_IRQ_SRC_CLDMA2] = 25,
+ [MTK_IRQ_SRC_MHCCIF] = 28,
+ [MTK_IRQ_SRC_DPMAIF2] = 29,
+ [MTK_IRQ_SRC_CLDMA3] = 31,
+ [MTK_IRQ_SRC_PM_LOCK] = 0,
+ [MTK_IRQ_SRC_DPMAIF3] = 7,
+ [MTK_IRQ_SRC_DPMAIF6] = 10,
+ },
+ .atr_init = mtk_pci_atr_init_m9xx,
+};
diff --git a/drivers/net/wwan/t9xx/pcie/mtk_pci_reg.h b/drivers/net/wwan/t9xx/pcie/mtk_pci_reg.h
new file mode 100644
index 000000000000..3f0667e8a846
--- /dev/null
+++ b/drivers/net/wwan/t9xx/pcie/mtk_pci_reg.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: GPL-2.0-only
+ *
+ * Copyright (c) 2022, MediaTek Inc.
+ */
+
+#ifndef __MTK_PCI_REG_H__
+#define __MTK_PCI_REG_H__
+
+#define REG_ISTATUS_HOST_CTRL_NEW 0x031C
+#define REG_PCIE_MISC_CTRL 0x0348
+#define REG_PCIE_CFG_MSIX 0x03EC
+#define REG_ATR_PCIE_WIN0_T0_SRC_ADDR_LSB 0x0600
+#define REG_ATR_PCIE_WIN0_T0_SRC_ADDR_MSB 0x0604
+#define REG_ATR_PCIE_WIN0_T0_TRSL_ADDR_LSB 0x0608
+#define REG_ATR_PCIE_WIN0_T0_TRSL_ADDR_MSB 0x060C
+#define REG_ATR_PCIE_WIN0_T0_TRSL_PARAM 0x0610
+#define REG_PCIE_DEBUG_DUMMY_3 0x0D0C
+#define REG_PCIE_DEBUG_DUMMY_4 0x0D10
+#define REG_PCIE_DEBUG_DUMMY_7 0x0D1C
+#define REG_MSIX_ISTATUS_HOST_GRP0_0 0x0F00
+#define REG_IMASK_HOST_MSIX_SET_GRP0_0 0x3000
+#define REG_IMASK_HOST_MSIX_CLR_GRP0_0 0x3080
+#define REG_IMASK_HOST_MSIX_GRP0_0 0x3100
+
+/* mhccif registers */
+#define MHCCIF_RC2EP_SW_BSY 0x4
+#define MHCCIF_RC2EP_SW_TCHNUM 0xC
+#define MHCCIF_RC2EP_EVT_RESERVED_FOR_CLDMA0 BIT(4)
+#define MHCCIF_RC2EP_EVT_RESERVED_FOR_CLDMA1 BIT(5)
+#define MHCCIF_RC2EP_EVT_RESERVED_FOR_CLDMA3 BIT(6)
+#define MHCCIF_RC2EP_EVT_RESERVED_FOR_CLDMA2 BIT(7)
+#define MHCCIF_RC2EP_EVT_RESERVED_FOR_DPMAIF BIT(8)
+#define MHCCIF_RC2EP_EVT_PCIE_PM_SUSPEND_REQ BIT(9)
+#define MHCCIF_RC2EP_EVT_PCIE_PM_RESUME_REQ BIT(10)
+#define MHCCIF_RC2EP_EVT_PCIE_PM_SUSPEND_REQ_AP BIT(11)
+#define MHCCIF_RC2EP_EVT_PCIE_PM_RESUME_REQ_AP BIT(12)
+#define MHCCIF_RC2EP_EVT_DEVICE_RESET BIT(13)
+#define MHCCIF_RC2EP_EVT_RESERVED_FOR_TEST BIT(31)
+
+#define MHCCIF_EP2RC_SW_INT_STS 0x10
+#define MHCCIF_EP2RC_SW_INT_ACK 0x14
+#define MHCCIF_EP2RC_SW_INT_EAP_MASK 0x20
+#define MHCCIF_EP2RC_SW_INT_EAP_MASK_SET 0x30
+#define MHCCIF_EP2RC_SW_INT_EAP_MASK_CLR 0x40
+#define MHCCIF_EP2RC_SPARE_REG_1 0x0104
+#define MHCCIF_EP2RC_SPARE_REG_5 0x0114
+#define MHCCIF_EP2RC_SPARE_REG_13 0x0134
+#define MHCCIF_EP2RC_SPARE_REG_14 0x0138
+#define MHCCIF_EP2RC_EVT_BOOT_FLOW_SYNC BIT(5)
+#define MHCCIF_EP2RC_EVT_RESERVED_FOR_CLDMA0 BIT(6)
+#define MHCCIF_EP2RC_EVT_RESERVED_FOR_CLDMA1 BIT(7)
+#define MHCCIF_EP2RC_EVT_RESERVED_FOR_CLDMA3 BIT(8)
+#define MHCCIF_EP2RC_EVT_RESERVED_FOR_CLDMA2 BIT(9)
+#define MHCCIF_EP2RC_EVT_RESERVED_FOR_DPMAIF BIT(10)
+#define MHCCIF_EP2RC_EVT_PCIE_PM_SUSPEND_ACK BIT(11)
+#define MHCCIF_EP2RC_EVT_PCIE_PM_RESUME_ACK BIT(12)
+#define MHCCIF_EP2RC_EVT_PCIE_PM_SUSPEND_ACK_AP BIT(13)
+#define MHCCIF_EP2RC_EVT_PCIE_PM_RESUME_ACK_AP BIT(14)
+#define MHCCIF_EP2RC_EVT_ASYNC_HS_NOTIFY_SAP BIT(15)
+#define MHCCIF_EP2RC_EVT_ASYNC_HS_NOTIFY_MD BIT(16)
+#define MHCCIF_EP2RC_EVT_SOFT_OFF_NOTIFY BIT(17)
+#define MHCCIF_EP2RC_EVT_MD_REBOOT BIT(19)
+#define MHCCIF_EP2RC_EVT_MD_POWEROFF BIT(20)
+#define MHCCIF_EP2RC_EVT_GNSS_ENABLE BIT(21)
+#define MHCCIF_EP2RC_EVT_GNSS_DISABLE BIT(22)
+#define MHCCIF_EP2RC_EVT_FRC_DONE_NOTIFY BIT(24)
+#define MHCCIF_EP2RC_EVT_RESERVED_FOR_TEST1 BIT(30)
+#define MHCCIF_EP2RC_EVT_RESERVED_FOR_TEST2 BIT(31)
+
+#endif /* __MTK_PCI_REG_H__ */
--
2.34.1
^ permalink raw reply related
* [PATCH v3 2/7] net: wwan: t9xx: Add control plane transaction layer
From: Jack Wu via B4 Relay @ 2026-06-24 10:04 UTC (permalink / raw)
To: Loic Poulain, Sergey Ryazanov, Johannes Berg, Andrew Lunn,
David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
Jack Wu, Wen-Zhi Huang, Shi-Wei Yeh, Minano Tseng,
Matthias Brugger, AngeloGioacchino Del Regno, Simon Horman,
Jonathan Corbet, Shuah Khan
Cc: linux-kernel, netdev, linux-arm-kernel, linux-mediatek, linux-doc
In-Reply-To: <20260624-t9xx_driver_v1-v3-0-73ff03f60c48@compal.com>
From: Jack Wu <jackbb_wu@compal.com>
The control plane implements TX services that reside in the
transaction layer. The services receive the packets from the
port layer and call the corresponding DMA components to
transmit data to the device. Meanwhile, TX services receive
and manage the port control commands from the port layer.
The control plane implements RX services that reside in the
transaction layer. The services receive the downlink packets
from the modem and transfer the packets to the corresponding
port layer interfaces.
Signed-off-by: Jack Wu <jackbb_wu@compal.com>
---
drivers/net/wwan/Kconfig | 5 +++
drivers/net/wwan/t9xx/Makefile | 5 +--
drivers/net/wwan/t9xx/mtk_ctrl_plane.c | 48 +++++++++++++++++++++++++++++
drivers/net/wwan/t9xx/mtk_ctrl_plane.h | 22 +++++++++++++
drivers/net/wwan/t9xx/mtk_dev.c | 44 ++++++++++++++++++++++++++
drivers/net/wwan/t9xx/mtk_dev.h | 5 +++
drivers/net/wwan/t9xx/pcie/Makefile | 10 ++++++
drivers/net/wwan/t9xx/pcie/mtk_pci.c | 10 +++---
drivers/net/wwan/t9xx/pcie/mtk_trans_ctrl.h | 21 +++++++++++++
9 files changed, 163 insertions(+), 7 deletions(-)
diff --git a/drivers/net/wwan/Kconfig b/drivers/net/wwan/Kconfig
index 4cee537c739f..7019b44494f8 100644
--- a/drivers/net/wwan/Kconfig
+++ b/drivers/net/wwan/Kconfig
@@ -124,6 +124,7 @@ config MTK_T7XX
config MTK_T9XX
tristate "MediaTek PCIe 5G WWAN modem T9xx device"
depends on PCI
+ select MTK_T9XX_PCI
select NET_DEVLINK
help
Enables MediaTek PCIe based 5G WWAN modem (T9xx series) device.
@@ -133,6 +134,10 @@ config MTK_T9XX
If unsure, say N.
+config MTK_T9XX_PCI
+ tristate
+ depends on PCI
+
endif # WWAN
endmenu
diff --git a/drivers/net/wwan/t9xx/Makefile b/drivers/net/wwan/t9xx/Makefile
index 6f2dd3f91454..ae9d6f2344ab 100644
--- a/drivers/net/wwan/t9xx/Makefile
+++ b/drivers/net/wwan/t9xx/Makefile
@@ -4,7 +4,8 @@ ccflags-y += -I$(src)/pcie
ccflags-y += -I$(src)
obj-$(CONFIG_MTK_T9XX) += mtk_t9xx.o
+obj-$(CONFIG_MTK_T9XX_PCI) += pcie/
mtk_t9xx-y := \
- pcie/mtk_pci.o \
- pcie/mtk_pci_drv_m9xx.o
+ mtk_dev.o \
+ mtk_ctrl_plane.o
diff --git a/drivers/net/wwan/t9xx/mtk_ctrl_plane.c b/drivers/net/wwan/t9xx/mtk_ctrl_plane.c
new file mode 100644
index 000000000000..07938f3e6fe2
--- /dev/null
+++ b/drivers/net/wwan/t9xx/mtk_ctrl_plane.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2022, MediaTek Inc.
+ * Copyright (c) 2022-2023, Intel Corporation.
+ */
+
+#include <linux/device.h>
+
+#include "mtk_ctrl_plane.h"
+
+/**
+ * mtk_ctrl_init() - Initialize the control plane block.
+ * @mdev: Pointer to the MTK modem device.
+ *
+ * Allocates and initializes the control plane block
+ * associated with @mdev.
+ *
+ * Return: 0 on success, -ENOMEM on allocation failure.
+ */
+int mtk_ctrl_init(struct mtk_md_dev *mdev)
+{
+ struct mtk_ctrl_blk *ctrl_blk;
+
+ ctrl_blk = devm_kzalloc(mdev->dev, sizeof(*ctrl_blk), GFP_KERNEL);
+ if (!ctrl_blk)
+ return -ENOMEM;
+
+ ctrl_blk->mdev = mdev;
+ mdev->ctrl_blk = ctrl_blk;
+
+ return 0;
+}
+EXPORT_SYMBOL(mtk_ctrl_init);
+
+/**
+ * mtk_ctrl_exit() - Clean up the control plane block.
+ * @mdev: Pointer to the MTK modem device.
+ *
+ * Frees the control plane block associated with @mdev.
+ */
+void mtk_ctrl_exit(struct mtk_md_dev *mdev)
+{
+ struct mtk_ctrl_blk *ctrl_blk = mdev->ctrl_blk;
+
+ devm_kfree(mdev->dev, ctrl_blk);
+ mdev->ctrl_blk = NULL;
+}
+EXPORT_SYMBOL(mtk_ctrl_exit);
diff --git a/drivers/net/wwan/t9xx/mtk_ctrl_plane.h b/drivers/net/wwan/t9xx/mtk_ctrl_plane.h
new file mode 100644
index 000000000000..c141876ef95d
--- /dev/null
+++ b/drivers/net/wwan/t9xx/mtk_ctrl_plane.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0-only
+ *
+ * Copyright (c) 2022, MediaTek Inc.
+ */
+
+#ifndef __MTK_CTRL_PLANE_H__
+#define __MTK_CTRL_PLANE_H__
+
+#include <linux/kref.h>
+#include <linux/skbuff.h>
+
+#include "mtk_dev.h"
+
+struct mtk_ctrl_blk {
+ struct mtk_md_dev *mdev;
+ struct mtk_ctrl_trans *trans;
+};
+
+int mtk_ctrl_init(struct mtk_md_dev *mdev);
+void mtk_ctrl_exit(struct mtk_md_dev *mdev);
+
+#endif /* __MTK_CTRL_PLANE_H__ */
diff --git a/drivers/net/wwan/t9xx/mtk_dev.c b/drivers/net/wwan/t9xx/mtk_dev.c
new file mode 100644
index 000000000000..f254ca7ed877
--- /dev/null
+++ b/drivers/net/wwan/t9xx/mtk_dev.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2022, MediaTek Inc.
+ */
+
+#include <linux/module.h>
+
+#include "mtk_dev.h"
+
+struct mtk_md_dev *mtk_dev_alloc(struct device *pdev, const struct mtk_dev_ops *dev_ops)
+{
+ struct mtk_md_dev *mdev;
+
+ mdev = devm_kzalloc(pdev, sizeof(*mdev), GFP_KERNEL);
+ if (!mdev)
+ return NULL;
+
+ mdev->dev_ops = dev_ops;
+ mdev->dev = pdev;
+ return mdev;
+}
+EXPORT_SYMBOL(mtk_dev_alloc);
+
+void mtk_dev_free(struct mtk_md_dev *mdev)
+{
+ struct device *dev = mdev->dev;
+
+ devm_kfree(dev, mdev);
+}
+EXPORT_SYMBOL(mtk_dev_free);
+
+static int __init mtk_common_drv_init(void)
+{
+ return 0;
+}
+module_init(mtk_common_drv_init);
+
+static void __exit mtk_common_drv_exit(void)
+{
+}
+module_exit(mtk_common_drv_exit);
+
+MODULE_DESCRIPTION("MediaTek T9xx PCIe WWAN driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/wwan/t9xx/mtk_dev.h b/drivers/net/wwan/t9xx/mtk_dev.h
index 8278a0e2875e..bb3ea68890ea 100644
--- a/drivers/net/wwan/t9xx/mtk_dev.h
+++ b/drivers/net/wwan/t9xx/mtk_dev.h
@@ -36,6 +36,7 @@ enum mtk_dev_evt_d2h {
};
struct mtk_md_dev;
+struct mtk_ctrl_blk;
struct mtk_dev_ops {
u32 (*get_dev_state)(struct mtk_md_dev *mdev);
@@ -57,6 +58,7 @@ struct mtk_md_dev {
void *hw_priv;
u32 hw_ver;
char dev_str[MTK_DEV_STR_LEN];
+ struct mtk_ctrl_blk *ctrl_blk;
};
static inline u32 mtk_dev_get_dev_state(struct mtk_md_dev *mdev)
@@ -105,4 +107,7 @@ static inline int mtk_dev_send_dev_evt(struct mtk_md_dev *mdev, u32 dev_evt)
return mdev->dev_ops->send_dev_evt(mdev, dev_evt);
}
+struct mtk_md_dev *mtk_dev_alloc(struct device *pdev, const struct mtk_dev_ops *dev_ops);
+void mtk_dev_free(struct mtk_md_dev *mdev);
+
#endif /* __MTK_DEV_H__ */
diff --git a/drivers/net/wwan/t9xx/pcie/Makefile b/drivers/net/wwan/t9xx/pcie/Makefile
new file mode 100644
index 000000000000..7410d1796d27
--- /dev/null
+++ b/drivers/net/wwan/t9xx/pcie/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+ccflags-y += -I$(src)
+ccflags-y += -I$(src)/..
+
+obj-$(CONFIG_MTK_T9XX_PCI) += mtk_t9xx_pcie.o
+
+mtk_t9xx_pcie-y := \
+ mtk_pci_drv_m9xx.o \
+ mtk_pci.o
diff --git a/drivers/net/wwan/t9xx/pcie/mtk_pci.c b/drivers/net/wwan/t9xx/pcie/mtk_pci.c
index c6a7196fcdd6..90b33dd6effd 100644
--- a/drivers/net/wwan/t9xx/pcie/mtk_pci.c
+++ b/drivers/net/wwan/t9xx/pcie/mtk_pci.c
@@ -14,6 +14,7 @@
#include <linux/module.h>
#include "mtk_dev.h"
+#include "mtk_trans_ctrl.h"
#include "mtk_pci.h"
#include "mtk_pci_reg.h"
@@ -467,6 +468,7 @@ static u32 mtk_pci_ext_h2d_evt_hw_bits(u32 chs)
SET_HW_BITS(hw_bits, chs, MHCCIF_RC2EP_EVT_DEVICE_RESET,
DEV_EVT_H2D_DEVICE_RESET);
+
return LE32_TO_U32(cpu_to_le32(hw_bits));
}
@@ -908,13 +910,11 @@ static int mtk_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
struct mtk_md_dev *mdev;
int ret;
- mdev = devm_kzalloc(dev, sizeof(*mdev), GFP_KERNEL);
+ mdev = mtk_dev_alloc(dev, &pci_hw_ops);
if (!mdev) {
ret = -ENOMEM;
goto log_err;
}
- mdev->dev_ops = &pci_hw_ops;
- mdev->dev = dev;
priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
if (!priv) {
@@ -991,7 +991,7 @@ static int mtk_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
free_priv_data:
devm_kfree(dev, priv);
free_cntx_data:
- devm_kfree(dev, mdev);
+ mtk_dev_free(mdev);
log_err:
dev_err(dev, "Failed to probe device, ret=%d\n", ret);
@@ -1017,7 +1017,7 @@ static void mtk_pci_remove(struct pci_dev *pdev)
pci_load_and_free_saved_state(pdev, &priv->saved_state);
devm_kfree(dev, priv);
- devm_kfree(dev, mdev);
+ mtk_dev_free(mdev);
}
static pci_ers_result_t mtk_pci_error_detected(struct pci_dev *pdev,
diff --git a/drivers/net/wwan/t9xx/pcie/mtk_trans_ctrl.h b/drivers/net/wwan/t9xx/pcie/mtk_trans_ctrl.h
new file mode 100644
index 000000000000..d6de4c43b529
--- /dev/null
+++ b/drivers/net/wwan/t9xx/pcie/mtk_trans_ctrl.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0-only
+ *
+ * Copyright (c) 2022, MediaTek Inc.
+ */
+
+#ifndef __MTK_TRANS_CTRL_H__
+#define __MTK_TRANS_CTRL_H__
+
+#include <linux/kref.h>
+#include <linux/list.h>
+#include <linux/skbuff.h>
+#include <linux/types.h>
+
+#include "mtk_dev.h"
+
+struct mtk_ctrl_trans {
+ struct mtk_ctrl_blk *ctrl_blk;
+ struct mtk_md_dev *mdev;
+};
+
+#endif
--
2.34.1
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox