From: "Michael S. Tsirkin" <mst@redhat.com>
To: Longjun Tang <lange_tang@163.com>
Cc: jasowang@redhat.com, xuanzhuo@linux.alibaba.com,
tanglongjun@kylinos.cn, virtualization@lists.linux.dev
Subject: Re: [PATCH v1 3/7] tools/virtio/virtnet_mon: add kprobe start_xmit
Date: Wed, 10 Dec 2025 04:03:21 -0500 [thread overview]
Message-ID: <20251210040228-mutt-send-email-mst@kernel.org> (raw)
In-Reply-To: <20251127032407.33475-4-lange_tang@163.com>
On Thu, Nov 27, 2025 at 11:24:03AM +0800, Longjun Tang wrote:
> From: Tang Longjun <tanglongjun@kylinos.cn>
>
> track skb and virtqueue through the kprobe start_xmit function
>
> Signed-off-by: Tang Longjun <tanglongjun@kylinos.cn>
> ---
> tools/virtio/virtnet_mon/virtnet_mon.c | 793 ++++++++++++++++++++++++-
> 1 file changed, 772 insertions(+), 21 deletions(-)
>
> diff --git a/tools/virtio/virtnet_mon/virtnet_mon.c b/tools/virtio/virtnet_mon/virtnet_mon.c
> index 696e621cf803..36b51d0a13d4 100644
> --- a/tools/virtio/virtnet_mon/virtnet_mon.c
> +++ b/tools/virtio/virtnet_mon/virtnet_mon.c
> @@ -6,15 +6,724 @@
> #include <linux/uaccess.h>
> #include <linux/miscdevice.h>
> #include <linux/poll.h>
> +#include <linux/string.h>
> +#include <linux/if_ether.h>
> +
> +#include <linux/kprobes.h>
> +#include <linux/netdevice.h>
> +#include <linux/skbuff.h>
> +#include <linux/ip.h>
> +#include <linux/ipv6.h>
> +#include <linux/tcp.h>
> +#include <linux/udp.h>
> +#include <linux/icmp.h>
> +#include <linux/icmpv6.h>
> +#include <linux/version.h>
> +#include <linux/time.h>
> +#include <linux/smp.h>
> +#include <linux/virtio.h>
> +#include <linux/scatterlist.h>
> +#include <linux/bpf.h>
> +#include <linux/dim.h>
> +#include <linux/mutex.h>
> +#include <linux/workqueue.h>
> +#include <linux/spinlock.h>
> +
> +#include <linux/u64_stats_sync.h>
> +#include <linux/mm_types_task.h>
> +#include <linux/virtio_net.h>
> +#include <linux/virtio_ring.h>
> +#include <net/xdp.h>
> +
>
> #define DEVICE_NAME "virtnet_mon"
> -#define KFIFO_SIZE 1024 // ring buffer size
> +#define KFIFO_SIZE 65536 // ring buffer size
> +#define WRITE_SIZE 1024
> +#define READ_SIZE 16384
> +#define LINE_MAX_SIZE 1024
> +
> +#if defined(CONFIG_X86_64)
> +#define KP_GET_ARG(regs, idx) \
> + ((idx) == 0 ? (unsigned long)(regs)->di : \
> + (idx) == 1 ? (unsigned long)(regs)->si : 0UL)
> +#elif defined(CONFIG_ARM64)
> +#define KP_GET_ARG(regs, idx) \
> + ((idx) < 8 ? (unsigned long)(regs)->regs[(idx)] : 0UL)
> +#endif
> +
> +struct _virtnet_sq_stats {
> + struct u64_stats_sync syncp;
> + u64_stats_t packets;
> + u64_stats_t bytes;
> + u64_stats_t xdp_tx;
> + u64_stats_t xdp_tx_drops;
> + u64_stats_t kicks;
> + u64_stats_t tx_timeouts;
> + u64_stats_t stop;
> + u64_stats_t wake;
> +};
> +
> +struct _virtnet_interrupt_coalesce {
> + u32 max_packets;
> + u32 max_usecs;
> +};
> +
> +struct _send_queue {
> + /* Virtqueue associated with this send _queue */
> + struct virtqueue *vq;
> +
> + /* TX: fragments + linear part + virtio header */
> + struct scatterlist sg[MAX_SKB_FRAGS + 2];
> +
> + /* Name of the send queue: output.$index */
> + char name[16];
> +
> + struct _virtnet_sq_stats stats;
> +
> + struct _virtnet_interrupt_coalesce intr_coal;
> +
> + struct napi_struct napi;
> +
> + /* Record whether sq is in reset state. */
> + bool reset;
> +
> + struct xsk_buff_pool *xsk_pool;
> +
> + dma_addr_t xsk_hdr_dma_addr;
> +};
> +
> +struct _virtnet_rq_stats {
> + struct u64_stats_sync syncp;
> + u64_stats_t packets;
> + u64_stats_t bytes;
> + u64_stats_t drops;
> + u64_stats_t xdp_packets;
> + u64_stats_t xdp_tx;
> + u64_stats_t xdp_redirects;
> + u64_stats_t xdp_drops;
> + u64_stats_t kicks;
> +};
> +
> +struct _ewma_pkt_len {
> + unsigned long internal;
> +};
> +
> +struct _virtnet_rq_dma {
> + dma_addr_t addr;
> + u32 ref;
> + u16 len;
> + u16 need_sync;
> +};
> +
> +struct _receive_queue {
> + /* Virtqueue associated with this receive_queue */
> + struct virtqueue *vq;
> +
> + struct napi_struct napi;
> +
> + struct bpf_prog __rcu *xdp_prog;
> +
> + struct _virtnet_rq_stats stats;
> +
> + /* The number of rx notifications */
> + u16 calls;
> +
> + /* Is dynamic interrupt moderation enabled? */
> + bool dim_enabled;
> +
> + /* Used to protect dim_enabled and inter_coal */
> + struct mutex dim_lock;
> +
> + /* Dynamic Interrupt Moderation */
> + struct dim dim;
> +
> + u32 packets_in_napi;
> +
> + struct _virtnet_interrupt_coalesce intr_coal;
> +
> + /* Chain pages by the private ptr. */
> + struct page *pages;
> +
> + /* Average packet length for mergeable receive buffers. */
> + struct _ewma_pkt_len mrg_avg_pkt_len;
> +
> + /* Page frag for packet buffer allocation. */
> + struct page_frag alloc_frag;
> +
> + /* RX: fragments + linear part + virtio header */
> + struct scatterlist sg[MAX_SKB_FRAGS + 2];
> +
> + /* Min single buffer size for mergeable buffers case. */
> + unsigned int min_buf_len;
> +
> + /* Name of this receive queue: input.$index */
> + char name[16];
> +
> + struct xdp_rxq_info xdp_rxq;
> +
> + /* Record the last dma info to free after new pages is allocated. */
> + struct _virtnet_rq_dma *last_dma;
> +
> + struct xsk_buff_pool *xsk_pool;
> +
> + /* xdp rxq used by xsk */
> + struct xdp_rxq_info xsk_rxq_info;
> +
> + struct xdp_buff **xsk_buffs;
> +};
> +
> +#define VIRTIO_NET_RSS_MAX_KEY_SIZE 40
> +
> +struct _control_buf {
> + struct virtio_net_ctrl_hdr hdr;
> + virtio_net_ctrl_ack status;
> +};
> +
> +struct _virtnet_info {
> + struct virtio_device *vdev;
> + struct virtqueue *cvq;
> + struct net_device *dev;
> + struct _send_queue *sq;
> + struct _receive_queue *rq;
> + unsigned int status;
> +
> + /* Max # of queue pairs supported by the device */
> + u16 max_queue_pairs;
> +
> + /* # of queue pairs currently used by the driver */
> + u16 curr_queue_pairs;
> +
> + /* # of XDP queue pairs currently used by the driver */
> + u16 xdp_queue_pairs;
> +
> + /* xdp_queue_pairs may be 0, when xdp is already loaded. So add this. */
> + bool xdp_enabled;
> +
> + /* I like... big packets and I cannot lie! */
> + bool big_packets;
> +
> + /* number of sg entries allocated for big packets */
> + unsigned int big_packets_num_skbfrags;
> +
> + /* Host will merge rx buffers for big packets (shake it! shake it!) */
> + bool mergeable_rx_bufs;
> +
> + /* Host supports rss and/or hash report */
> + bool has_rss;
> + bool has_rss_hash_report;
> + u8 rss_key_size;
> + u16 rss_indir_table_size;
> + u32 rss_hash_types_supported;
> + u32 rss_hash_types_saved;
> + struct virtio_net_rss_config_hdr *rss_hdr;
> + struct virtio_net_rss_config_trailer rss_trailer;
> + u8 rss_hash_key_data[VIRTIO_NET_RSS_MAX_KEY_SIZE];
> +
> + /* Has control virtqueue */
> + bool has_cvq;
> +
> + /* Lock to protect the control VQ */
> + struct mutex cvq_lock;
> +
> + /* Host can handle any s/g split between our header and packet data */
> + bool any_header_sg;
> +
> + /* Packet virtio header size */
> + u8 hdr_len;
> +
> + /* Work struct for delayed refilling if we run low on memory. */
> + struct delayed_work refill;
> +
> + /* UDP tunnel support */
> + bool tx_tnl;
> +
> + bool rx_tnl;
> +
> + bool rx_tnl_csum;
> +
> + /* Is delayed refill enabled? */
> + bool refill_enabled;
> +
> + /* The lock to synchronize the access to refill_enabled */
> + spinlock_t refill_lock;
> +
> + /* Work struct for config space updates */
> + struct work_struct config_work;
> +
> + /* Work struct for setting rx mode */
> + struct work_struct rx_mode_work;
> +
> + /* OK to queue work setting RX mode? */
> + bool rx_mode_work_enabled;
> +
> + /* Does the affinity hint is set for virtqueues? */
> +
> + bool affinity_hint_set;
> +
> + /* CPU hotplug instances for online & dead */
> +
> + struct hlist_node node;
> +
> + struct hlist_node node_dead;
> +
> + struct _control_buf *ctrl;
> +
> + /* Ethtool settings */
> + u8 duplex;
> + u32 speed;
> +
> + /* Is rx dynamic interrupt moderation enabled? */
> + bool rx_dim_enabled;
> +
> + /* Interrupt coalescing settings */
> + struct _virtnet_interrupt_coalesce intr_coal_tx;
> + struct _virtnet_interrupt_coalesce intr_coal_rx;
> +
> + unsigned long guest_offloads;
> + unsigned long guest_offloads_capable;
> +
> + /* failover when STANDBY feature enabled */
> + struct failover *failover;
> +
> + u64 device_stats_cap;
> +};
> +
> +
> +struct _vring_desc_state_split {
> + void *data; /* Data for callback. */
> + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */
> +};
> +
> +struct _vring_desc_extra {
> + dma_addr_t addr; /* Descriptor DMA addr. */
> + u32 len; /* Descriptor length. */
> + u16 flags; /* Descriptor flags. */
> + u16 next; /* The next desc state in a list. */
> +};
> +
> +struct _vring_virtqueue_split {
> + /* Actual memory layout for this queue. */
> + struct vring vring;
> +
> + /* Last written value to avail->flags */
> + u16 avail_flags_shadow;
> +
> + /*
> + * Last written value to avail->idx in
> + * guest byte order.
> + */
> + u16 avail_idx_shadow;
> +
> + /* Per-descriptor state. */
> + struct _vring_desc_state_split *desc_state;
> + struct _vring_desc_extra *desc_extra;
> +
> + /* DMA address and size information */
> + dma_addr_t queue_dma_addr;
> + size_t queue_size_in_bytes;
> +
> + /*
> + * The parameters for creating vrings are reserved for creating new
> + * vring.
> + */
> + u32 vring_align;
> + bool may_reduce_num;
> +};
> +
> +struct _vring_desc_state_packed {
> + void *data; /* Data for callback. */
> + struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */
> + u16 num; /* Descriptor list length. */
> + u16 last; /* The last desc state in a list. */
> +};
> +
> +struct _vring_virtqueue_packed {
> + /* Actual memory layout for this queue. */
> + struct {
> + unsigned int num;
> + struct vring_packed_desc *desc;
> + struct vring_packed_desc_event *driver;
> + struct vring_packed_desc_event *device;
> + } vring;
> +
> + /* Driver ring wrap counter. */
> + bool avail_wrap_counter;
> +
> + /* Avail used flags. */
> + u16 avail_used_flags;
> +
> + /* Index of the next avail descriptor. */
> + u16 next_avail_idx;
> +
> + /*
> + * Last written value to driver->flags in
> + * guest byte order.
> + */
> + u16 event_flags_shadow;
> +
> + /* Per-descriptor state. */
> + struct _vring_desc_state_packed *desc_state;
> + struct _vring_desc_extra *desc_extra;
> +
> + /* DMA address and size information */
> + dma_addr_t ring_dma_addr;
> + dma_addr_t driver_event_dma_addr;
> + dma_addr_t device_event_dma_addr;
> + size_t ring_size_in_bytes;
> + size_t event_size_in_bytes;
> +};
> +
> +struct _vring_virtqueue {
> + struct virtqueue vq;
> +
> + /* Is this a packed ring? */
> + bool packed_ring;
> +
> + /* Is DMA API used? */
> + bool use_dma_api;
> +
> + /* Can we use weak barriers? */
> + bool weak_barriers;
> +
> + /* Other side has made a mess, don't try any more. */
> + bool broken;
> +
> + /* Host supports indirect buffers */
> + bool indirect;
> +
> + /* Host publishes avail event idx */
> + bool event;
> +
> + /* Head of free buffer list. */
> + unsigned int free_head;
> + /* Number we've added since last sync. */
> + unsigned int num_added;
> +
> + /* Last used index we've seen.
> + * for split ring, it just contains last used index
> + * for packed ring:
> + * bits up to VRING_PACKED_EVENT_F_WRAP_CTR include the last used index.
> + * bits from VRING_PACKED_EVENT_F_WRAP_CTR include the used wrap counter.
> + */
> + u16 last_used_idx;
>
> -static DEFINE_KFIFO(virtnet_mon_kfifo, char, KFIFO_SIZE);
> + /* Hint for event idx: already triggered no need to disable. */
> + bool event_triggered;
> +
> + union {
> + /* Available for split ring */
> + struct _vring_virtqueue_split split;
> +
> + /* Available for packed ring */
> + struct _vring_virtqueue_packed packed;
> + };
> +
> + /* How to notify other side. FIXME: commonalize hcalls! */
> + bool (*notify)(struct virtqueue *vq);
> +
> + /* DMA, allocation, and size information */
> + bool we_own_ring;
> +
> + union virtio_map map;
> +};
> +
> +/* RX or TX */
> +enum pkt_dir {
> + PKT_DIR_UN = 0, /* Unknown */
> + PKT_DIR_RX = 1, /* RX */
> + PKT_DIR_TX = 2, /* TX */
> + PKT_DIR_MAX
> +};
> +
> +enum event_type {
> + START_XMIT_PRE_EVENT = 1,
> + START_XMIT_POST_EVENT = 2,
> +};
> +
> +struct iph_info {
> + struct sk_buff *skb; /* SKB */
> + u8 iph_proto; /* iph protocol type */
> + u32 seq; /* absolute sequence number */
> +};
> +
> +struct queue_info {
> + struct virtqueue *vq;
> + char name[16];
> + unsigned int num_free;
> + unsigned int num;
> + __virtio16 avail_flags;
> + __virtio16 avail_idx;
> + u16 avail_flags_shadow;
> + u16 avail_idx_shadow;
> + __virtio16 used_flags;
> + __virtio16 used_idx;
> + u16 last_used_idx;
> + bool broken;
> +};
Not at all excited about all the code duplication going on here.
next prev parent reply other threads:[~2025-12-10 9:03 UTC|newest]
Thread overview: 22+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-11-27 3:24 [PATCH v1 0/7] introduce virtnet_mon for monitor virtio_net Longjun Tang
2025-11-27 3:24 ` [PATCH v1 1/7] tools/virtio/virtnet_mon: create misc driver for virtnet_mon Longjun Tang
2025-11-27 3:24 ` [PATCH v1 2/7] tools/virtio/virtnet_mon: add kfifo to virtnet_mon Longjun Tang
2025-11-27 3:24 ` [PATCH v1 3/7] tools/virtio/virtnet_mon: add kprobe start_xmit Longjun Tang
2025-11-28 2:21 ` Jason Wang
2025-11-28 3:25 ` Lange Tang
2025-12-10 9:03 ` Michael S. Tsirkin [this message]
2025-11-27 3:24 ` [PATCH v1 4/7] tools/virtio/virtnet_mon: add kprobe gro_receive_skb Longjun Tang
2025-11-27 3:24 ` [PATCH v1 5/7] tools/virtio/virtnet_mon: add kprobe ip_local_deliver Longjun Tang
2025-11-27 3:24 ` [PATCH v1 6/7] tools/virtio/virtnet_mon: add kprobe skb_xmit_done and skb_recv_done Longjun Tang
2025-11-27 3:24 ` [PATCH v1 7/7] tools/virtio/virtnet_mon: add README file for virtnet_moin Longjun Tang
2025-12-10 9:04 ` [PATCH v1 0/7] introduce virtnet_mon for monitor virtio_net Michael S. Tsirkin
2025-12-11 2:51 ` Lange Tang
2025-12-11 7:10 ` Michael S. Tsirkin
2025-12-11 8:32 ` Jason Wang
2025-12-13 2:41 ` Lange Tang
2025-12-15 6:41 ` Jason Wang
2025-12-15 8:12 ` Lange Tang
2025-12-16 3:59 ` Jason Wang
2025-12-16 7:47 ` Lange Tang
2025-12-16 8:27 ` Jason Wang
2025-12-21 13:46 ` Michael S. Tsirkin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20251210040228-mutt-send-email-mst@kernel.org \
--to=mst@redhat.com \
--cc=jasowang@redhat.com \
--cc=lange_tang@163.com \
--cc=tanglongjun@kylinos.cn \
--cc=virtualization@lists.linux.dev \
--cc=xuanzhuo@linux.alibaba.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).