virtualization.lists.linux-foundation.org archive mirror
 help / color / mirror / Atom feed
From: "Michael S. Tsirkin" <mst@redhat.com>
To: Longjun Tang <lange_tang@163.com>
Cc: jasowang@redhat.com, xuanzhuo@linux.alibaba.com,
	tanglongjun@kylinos.cn, virtualization@lists.linux.dev
Subject: Re: [PATCH v1 3/7] tools/virtio/virtnet_mon: add kprobe start_xmit
Date: Wed, 10 Dec 2025 04:03:21 -0500	[thread overview]
Message-ID: <20251210040228-mutt-send-email-mst@kernel.org> (raw)
In-Reply-To: <20251127032407.33475-4-lange_tang@163.com>

On Thu, Nov 27, 2025 at 11:24:03AM +0800, Longjun Tang wrote:
> From: Tang Longjun <tanglongjun@kylinos.cn>
> 
> track skb and virtqueue through the kprobe start_xmit function
> 
> Signed-off-by: Tang Longjun <tanglongjun@kylinos.cn>
> ---
>  tools/virtio/virtnet_mon/virtnet_mon.c | 793 ++++++++++++++++++++++++-
>  1 file changed, 772 insertions(+), 21 deletions(-)
> 
> diff --git a/tools/virtio/virtnet_mon/virtnet_mon.c b/tools/virtio/virtnet_mon/virtnet_mon.c
> index 696e621cf803..36b51d0a13d4 100644
> --- a/tools/virtio/virtnet_mon/virtnet_mon.c
> +++ b/tools/virtio/virtnet_mon/virtnet_mon.c
> @@ -6,15 +6,724 @@
>  #include <linux/uaccess.h>
>  #include <linux/miscdevice.h>
>  #include <linux/poll.h>
> +#include <linux/string.h>
> +#include <linux/if_ether.h>
> +
> +#include <linux/kprobes.h>
> +#include <linux/netdevice.h>
> +#include <linux/skbuff.h>
> +#include <linux/ip.h>
> +#include <linux/ipv6.h>
> +#include <linux/tcp.h>
> +#include <linux/udp.h>
> +#include <linux/icmp.h>
> +#include <linux/icmpv6.h>
> +#include <linux/version.h>
> +#include <linux/time.h>
> +#include <linux/smp.h>
> +#include <linux/virtio.h>
> +#include <linux/scatterlist.h>
> +#include <linux/bpf.h>
> +#include <linux/dim.h>
> +#include <linux/mutex.h>
> +#include <linux/workqueue.h>
> +#include <linux/spinlock.h>
> +
> +#include <linux/u64_stats_sync.h>
> +#include <linux/mm_types_task.h>
> +#include <linux/virtio_net.h>
> +#include <linux/virtio_ring.h>
> +#include <net/xdp.h>
> +
>  
>  #define DEVICE_NAME "virtnet_mon"
> -#define KFIFO_SIZE 1024     // ring buffer size
> +#define KFIFO_SIZE 65536     // ring buffer size
> +#define WRITE_SIZE 1024
> +#define READ_SIZE 16384
> +#define LINE_MAX_SIZE 1024
> +
> +#if defined(CONFIG_X86_64)
> +#define KP_GET_ARG(regs, idx) \
> +	((idx) == 0 ? (unsigned long)(regs)->di : \
> +	(idx) == 1 ? (unsigned long)(regs)->si : 0UL)
> +#elif defined(CONFIG_ARM64)
> +#define KP_GET_ARG(regs, idx) \
> +	((idx) < 8 ? (unsigned long)(regs)->regs[(idx)] : 0UL)
> +#endif
> +
> +struct _virtnet_sq_stats {
> +	struct u64_stats_sync syncp;
> +	u64_stats_t packets;
> +	u64_stats_t bytes;
> +	u64_stats_t xdp_tx;
> +	u64_stats_t xdp_tx_drops;
> +	u64_stats_t kicks;
> +	u64_stats_t tx_timeouts;
> +	u64_stats_t stop;
> +	u64_stats_t wake;
> +};
> +
> +struct _virtnet_interrupt_coalesce {
> +	u32 max_packets;
> +	u32 max_usecs;
> +};
> +
> +struct _send_queue {
> +	/* Virtqueue associated with this send _queue */
> +	struct virtqueue *vq;
> +
> +	/* TX: fragments + linear part + virtio header */
> +	struct scatterlist sg[MAX_SKB_FRAGS + 2];
> +
> +	/* Name of the send queue: output.$index */
> +	char name[16];
> +
> +	struct _virtnet_sq_stats stats;
> +
> +	struct _virtnet_interrupt_coalesce intr_coal;
> +
> +	struct napi_struct napi;
> +
> +	/* Record whether sq is in reset state. */
> +	bool reset;
> +
> +	struct xsk_buff_pool *xsk_pool;
> +
> +	dma_addr_t xsk_hdr_dma_addr;
> +};
> +
> +struct _virtnet_rq_stats {
> +	struct u64_stats_sync syncp;
> +	u64_stats_t packets;
> +	u64_stats_t bytes;
> +	u64_stats_t drops;
> +	u64_stats_t xdp_packets;
> +	u64_stats_t xdp_tx;
> +	u64_stats_t xdp_redirects;
> +	u64_stats_t xdp_drops;
> +	u64_stats_t kicks;
> +};
> +
> +struct _ewma_pkt_len {
> +	unsigned long internal;
> +};
> +
> +struct _virtnet_rq_dma {
> +	dma_addr_t addr;
> +	u32 ref;
> +	u16 len;
> +	u16 need_sync;
> +};
> +
> +struct _receive_queue {
> +	/* Virtqueue associated with this receive_queue */
> +	struct virtqueue *vq;
> +
> +	struct napi_struct napi;
> +
> +	struct bpf_prog __rcu *xdp_prog;
> +
> +	struct _virtnet_rq_stats stats;
> +
> +	/* The number of rx notifications */
> +	u16 calls;
> +
> +	/* Is dynamic interrupt moderation enabled? */
> +	bool dim_enabled;
> +
> +	/* Used to protect dim_enabled and inter_coal */
> +	struct mutex dim_lock;
> +
> +	/* Dynamic Interrupt Moderation */
> +	struct dim dim;
> +
> +	u32 packets_in_napi;
> +
> +	struct _virtnet_interrupt_coalesce intr_coal;
> +
> +	/* Chain pages by the private ptr. */
> +	struct page *pages;
> +
> +	/* Average packet length for mergeable receive buffers. */
> +	struct _ewma_pkt_len mrg_avg_pkt_len;
> +
> +	/* Page frag for packet buffer allocation. */
> +	struct page_frag alloc_frag;
> +
> +	/* RX: fragments + linear part + virtio header */
> +	struct scatterlist sg[MAX_SKB_FRAGS + 2];
> +
> +	/* Min single buffer size for mergeable buffers case. */
> +	unsigned int min_buf_len;
> +
> +	/* Name of this receive queue: input.$index */
> +	char name[16];
> +
> +	struct xdp_rxq_info xdp_rxq;
> +
> +	/* Record the last dma info to free after new pages is allocated. */
> +	struct _virtnet_rq_dma *last_dma;
> +
> +	struct xsk_buff_pool *xsk_pool;
> +
> +	/* xdp rxq used by xsk */
> +	struct xdp_rxq_info xsk_rxq_info;
> +
> +	struct xdp_buff **xsk_buffs;
> +};
> +
> +#define VIRTIO_NET_RSS_MAX_KEY_SIZE     40
> +
> +struct _control_buf {
> +	struct virtio_net_ctrl_hdr hdr;
> +	virtio_net_ctrl_ack status;
> +};
> +
> +struct _virtnet_info {
> +	struct virtio_device *vdev;
> +	struct virtqueue *cvq;
> +	struct net_device *dev;
> +	struct _send_queue *sq;
> +	struct _receive_queue *rq;
> +	unsigned int status;
> +
> +	/* Max # of queue pairs supported by the device */
> +	u16 max_queue_pairs;
> +
> +	/* # of queue pairs currently used by the driver */
> +	u16 curr_queue_pairs;
> +
> +	/* # of XDP queue pairs currently used by the driver */
> +	u16 xdp_queue_pairs;
> +
> +	/* xdp_queue_pairs may be 0, when xdp is already loaded. So add this. */
> +	bool xdp_enabled;
> +
> +	/* I like... big packets and I cannot lie! */
> +	bool big_packets;
> +
> +	/* number of sg entries allocated for big packets */
> +	unsigned int big_packets_num_skbfrags;
> +
> +	/* Host will merge rx buffers for big packets (shake it! shake it!) */
> +	bool mergeable_rx_bufs;
> +
> +	/* Host supports rss and/or hash report */
> +	bool has_rss;
> +	bool has_rss_hash_report;
> +	u8 rss_key_size;
> +	u16 rss_indir_table_size;
> +	u32 rss_hash_types_supported;
> +	u32 rss_hash_types_saved;
> +	struct virtio_net_rss_config_hdr *rss_hdr;
> +	struct virtio_net_rss_config_trailer rss_trailer;
> +	u8 rss_hash_key_data[VIRTIO_NET_RSS_MAX_KEY_SIZE];
> +
> +	/* Has control virtqueue */
> +	bool has_cvq;
> +
> +	/* Lock to protect the control VQ */
> +	struct mutex cvq_lock;
> +
> +	/* Host can handle any s/g split between our header and packet data */
> +	bool any_header_sg;
> +
> +	/* Packet virtio header size */
> +	u8 hdr_len;
> +
> +	/* Work struct for delayed refilling if we run low on memory. */
> +	struct delayed_work refill;
> +
> +	/* UDP tunnel support */
> +	bool tx_tnl;
> +
> +	bool rx_tnl;
> +
> +	bool rx_tnl_csum;
> +
> +	/* Is delayed refill enabled? */
> +	bool refill_enabled;
> +
> +	/* The lock to synchronize the access to refill_enabled */
> +	spinlock_t refill_lock;
> +
> +	/* Work struct for config space updates */
> +	struct work_struct config_work;
> +
> +	/* Work struct for setting rx mode */
> +	struct work_struct rx_mode_work;
> +
> +	/* OK to queue work setting RX mode? */
> +	bool rx_mode_work_enabled;
> +
> +	/* Does the affinity hint is set for virtqueues? */
> +
> +	bool affinity_hint_set;
> +
> +	/* CPU hotplug instances for online & dead */
> +
> +	struct hlist_node node;
> +
> +	struct hlist_node node_dead;
> +
> +	struct _control_buf *ctrl;
> +
> +	/* Ethtool settings */
> +	u8 duplex;
> +	u32 speed;
> +
> +	/* Is rx dynamic interrupt moderation enabled? */
> +	bool rx_dim_enabled;
> +
> +	/* Interrupt coalescing settings */
> +	struct _virtnet_interrupt_coalesce intr_coal_tx;
> +	struct _virtnet_interrupt_coalesce intr_coal_rx;
> +
> +	unsigned long guest_offloads;
> +	unsigned long guest_offloads_capable;
> +
> +	/* failover when STANDBY feature enabled */
> +	struct failover *failover;
> +
> +	u64 device_stats_cap;
> +};
> +
> +
> +struct _vring_desc_state_split {
> +	void *data;			/* Data for callback. */
> +	struct vring_desc *indir_desc;	/* Indirect descriptor, if any. */
> +};
> +
> +struct _vring_desc_extra {
> +	dma_addr_t addr;		/* Descriptor DMA addr. */
> +	u32 len;			/* Descriptor length. */
> +	u16 flags;			/* Descriptor flags. */
> +	u16 next;			/* The next desc state in a list. */
> +};
> +
> +struct _vring_virtqueue_split {
> +	/* Actual memory layout for this queue. */
> +	struct vring vring;
> +
> +	/* Last written value to avail->flags */
> +	u16 avail_flags_shadow;
> +
> +	/*
> +	 * Last written value to avail->idx in
> +	 * guest byte order.
> +	 */
> +	u16 avail_idx_shadow;
> +
> +	/* Per-descriptor state. */
> +	struct _vring_desc_state_split *desc_state;
> +	struct _vring_desc_extra *desc_extra;
> +
> +	/* DMA address and size information */
> +	dma_addr_t queue_dma_addr;
> +	size_t queue_size_in_bytes;
> +
> +	/*
> +	 * The parameters for creating vrings are reserved for creating new
> +	 * vring.
> +	 */
> +	u32 vring_align;
> +	bool may_reduce_num;
> +};
> +
> +struct _vring_desc_state_packed {
> +	void *data;			/* Data for callback. */
> +	struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */
> +	u16 num;			/* Descriptor list length. */
> +	u16 last;			/* The last desc state in a list. */
> +};
> +
> +struct _vring_virtqueue_packed {
> +	/* Actual memory layout for this queue. */
> +	struct {
> +		unsigned int num;
> +		struct vring_packed_desc *desc;
> +		struct vring_packed_desc_event *driver;
> +		struct vring_packed_desc_event *device;
> +	} vring;
> +
> +	/* Driver ring wrap counter. */
> +	bool avail_wrap_counter;
> +
> +	/* Avail used flags. */
> +	u16 avail_used_flags;
> +
> +	/* Index of the next avail descriptor. */
> +	u16 next_avail_idx;
> +
> +	/*
> +	 * Last written value to driver->flags in
> +	 * guest byte order.
> +	 */
> +	u16 event_flags_shadow;
> +
> +	/* Per-descriptor state. */
> +	struct _vring_desc_state_packed *desc_state;
> +	struct _vring_desc_extra *desc_extra;
> +
> +	/* DMA address and size information */
> +	dma_addr_t ring_dma_addr;
> +	dma_addr_t driver_event_dma_addr;
> +	dma_addr_t device_event_dma_addr;
> +	size_t ring_size_in_bytes;
> +	size_t event_size_in_bytes;
> +};
> +
> +struct _vring_virtqueue {
> +	struct virtqueue vq;
> +
> +	/* Is this a packed ring? */
> +	bool packed_ring;
> +
> +	/* Is DMA API used? */
> +	bool use_dma_api;
> +
> +	/* Can we use weak barriers? */
> +	bool weak_barriers;
> +
> +	/* Other side has made a mess, don't try any more. */
> +	bool broken;
> +
> +	/* Host supports indirect buffers */
> +	bool indirect;
> +
> +	/* Host publishes avail event idx */
> +	bool event;
> +
> +	/* Head of free buffer list. */
> +	unsigned int free_head;
> +	/* Number we've added since last sync. */
> +	unsigned int num_added;
> +
> +	/* Last used index  we've seen.
> +	 * for split ring, it just contains last used index
> +	 * for packed ring:
> +	 * bits up to VRING_PACKED_EVENT_F_WRAP_CTR include the last used index.
> +	 * bits from VRING_PACKED_EVENT_F_WRAP_CTR include the used wrap counter.
> +	 */
> +	u16 last_used_idx;
>  
> -static DEFINE_KFIFO(virtnet_mon_kfifo, char, KFIFO_SIZE);
> +	/* Hint for event idx: already triggered no need to disable. */
> +	bool event_triggered;
> +
> +	union {
> +		/* Available for split ring */
> +		struct _vring_virtqueue_split split;
> +
> +		/* Available for packed ring */
> +		struct _vring_virtqueue_packed packed;
> +	};
> +
> +	/* How to notify other side. FIXME: commonalize hcalls! */
> +	bool (*notify)(struct virtqueue *vq);
> +
> +	/* DMA, allocation, and size information */
> +	bool we_own_ring;
> +
> +	union virtio_map map;
> +};
> +
> +/* RX or TX */
> +enum pkt_dir {
> +	PKT_DIR_UN = 0,      /* Unknown */
> +	PKT_DIR_RX = 1,           /* RX */
> +	PKT_DIR_TX = 2,           /* TX */
> +	PKT_DIR_MAX
> +};
> +
> +enum event_type {
> +	START_XMIT_PRE_EVENT = 1,
> +	START_XMIT_POST_EVENT = 2,
> +};
> +
> +struct iph_info {
> +	struct sk_buff *skb;        /* SKB */
> +	u8 iph_proto;    /* iph protocol type */
> +	u32 seq;         /* absolute sequence number */
> +};
> +
> +struct queue_info {
> +	struct virtqueue *vq;
> +	char name[16];
> +	unsigned int num_free;
> +	unsigned int num;
> +	__virtio16 avail_flags;
> +	__virtio16 avail_idx;
> +	u16 avail_flags_shadow;
> +	u16 avail_idx_shadow;
> +	__virtio16 used_flags;
> +	__virtio16 used_idx;
> +	u16 last_used_idx;
> +	bool broken;
> +};


Not at all excited about all the code duplication going on here.


  parent reply	other threads:[~2025-12-10  9:03 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-11-27  3:24 [PATCH v1 0/7] introduce virtnet_mon for monitor virtio_net Longjun Tang
2025-11-27  3:24 ` [PATCH v1 1/7] tools/virtio/virtnet_mon: create misc driver for virtnet_mon Longjun Tang
2025-11-27  3:24 ` [PATCH v1 2/7] tools/virtio/virtnet_mon: add kfifo to virtnet_mon Longjun Tang
2025-11-27  3:24 ` [PATCH v1 3/7] tools/virtio/virtnet_mon: add kprobe start_xmit Longjun Tang
2025-11-28  2:21   ` Jason Wang
2025-11-28  3:25     ` Lange Tang
2025-12-10  9:03   ` Michael S. Tsirkin [this message]
2025-11-27  3:24 ` [PATCH v1 4/7] tools/virtio/virtnet_mon: add kprobe gro_receive_skb Longjun Tang
2025-11-27  3:24 ` [PATCH v1 5/7] tools/virtio/virtnet_mon: add kprobe ip_local_deliver Longjun Tang
2025-11-27  3:24 ` [PATCH v1 6/7] tools/virtio/virtnet_mon: add kprobe skb_xmit_done and skb_recv_done Longjun Tang
2025-11-27  3:24 ` [PATCH v1 7/7] tools/virtio/virtnet_mon: add README file for virtnet_moin Longjun Tang
2025-12-10  9:04 ` [PATCH v1 0/7] introduce virtnet_mon for monitor virtio_net Michael S. Tsirkin
2025-12-11  2:51   ` Lange Tang
2025-12-11  7:10     ` Michael S. Tsirkin
2025-12-11  8:32     ` Jason Wang
2025-12-13  2:41       ` Lange Tang
2025-12-15  6:41         ` Jason Wang
2025-12-15  8:12           ` Lange Tang
2025-12-16  3:59             ` Jason Wang
2025-12-16  7:47               ` Lange Tang
2025-12-16  8:27                 ` Jason Wang
2025-12-21 13:46                 ` Michael S. Tsirkin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251210040228-mutt-send-email-mst@kernel.org \
    --to=mst@redhat.com \
    --cc=jasowang@redhat.com \
    --cc=lange_tang@163.com \
    --cc=tanglongjun@kylinos.cn \
    --cc=virtualization@lists.linux.dev \
    --cc=xuanzhuo@linux.alibaba.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).