All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Michael S. Tsirkin" <mst@redhat.com>
To: Longjun Tang <lange_tang@163.com>
Cc: jasowang@redhat.com, xuanzhuo@linux.alibaba.com,
	tanglongjun@kylinos.cn, virtualization@lists.linux.dev
Subject: Re: [PATCH v1 3/7] tools/virtio/virtnet_mon: add kprobe start_xmit
Date: Wed, 10 Dec 2025 04:03:21 -0500	[thread overview]
Message-ID: <20251210040228-mutt-send-email-mst@kernel.org> (raw)
In-Reply-To: <20251127032407.33475-4-lange_tang@163.com>

On Thu, Nov 27, 2025 at 11:24:03AM +0800, Longjun Tang wrote:
> From: Tang Longjun <tanglongjun@kylinos.cn>
> 
> track skb and virtqueue through the kprobe start_xmit function
> 
> Signed-off-by: Tang Longjun <tanglongjun@kylinos.cn>
> ---
>  tools/virtio/virtnet_mon/virtnet_mon.c | 793 ++++++++++++++++++++++++-
>  1 file changed, 772 insertions(+), 21 deletions(-)
> 
> diff --git a/tools/virtio/virtnet_mon/virtnet_mon.c b/tools/virtio/virtnet_mon/virtnet_mon.c
> index 696e621cf803..36b51d0a13d4 100644
> --- a/tools/virtio/virtnet_mon/virtnet_mon.c
> +++ b/tools/virtio/virtnet_mon/virtnet_mon.c
> @@ -6,15 +6,724 @@
>  #include <linux/uaccess.h>
>  #include <linux/miscdevice.h>
>  #include <linux/poll.h>
> +#include <linux/string.h>
> +#include <linux/if_ether.h>
> +
> +#include <linux/kprobes.h>
> +#include <linux/netdevice.h>
> +#include <linux/skbuff.h>
> +#include <linux/ip.h>
> +#include <linux/ipv6.h>
> +#include <linux/tcp.h>
> +#include <linux/udp.h>
> +#include <linux/icmp.h>
> +#include <linux/icmpv6.h>
> +#include <linux/version.h>
> +#include <linux/time.h>
> +#include <linux/smp.h>
> +#include <linux/virtio.h>
> +#include <linux/scatterlist.h>
> +#include <linux/bpf.h>
> +#include <linux/dim.h>
> +#include <linux/mutex.h>
> +#include <linux/workqueue.h>
> +#include <linux/spinlock.h>
> +
> +#include <linux/u64_stats_sync.h>
> +#include <linux/mm_types_task.h>
> +#include <linux/virtio_net.h>
> +#include <linux/virtio_ring.h>
> +#include <net/xdp.h>
> +
>  
>  #define DEVICE_NAME "virtnet_mon"
> -#define KFIFO_SIZE 1024     // ring buffer size
> +#define KFIFO_SIZE 65536     // ring buffer size
> +#define WRITE_SIZE 1024
> +#define READ_SIZE 16384
> +#define LINE_MAX_SIZE 1024
> +
> +#if defined(CONFIG_X86_64)
> +#define KP_GET_ARG(regs, idx) \
> +	((idx) == 0 ? (unsigned long)(regs)->di : \
> +	(idx) == 1 ? (unsigned long)(regs)->si : 0UL)
> +#elif defined(CONFIG_ARM64)
> +#define KP_GET_ARG(regs, idx) \
> +	((idx) < 8 ? (unsigned long)(regs)->regs[(idx)] : 0UL)
> +#endif
> +
> +struct _virtnet_sq_stats {
> +	struct u64_stats_sync syncp;
> +	u64_stats_t packets;
> +	u64_stats_t bytes;
> +	u64_stats_t xdp_tx;
> +	u64_stats_t xdp_tx_drops;
> +	u64_stats_t kicks;
> +	u64_stats_t tx_timeouts;
> +	u64_stats_t stop;
> +	u64_stats_t wake;
> +};
> +
> +struct _virtnet_interrupt_coalesce {
> +	u32 max_packets;
> +	u32 max_usecs;
> +};
> +
> +struct _send_queue {
> +	/* Virtqueue associated with this send _queue */
> +	struct virtqueue *vq;
> +
> +	/* TX: fragments + linear part + virtio header */
> +	struct scatterlist sg[MAX_SKB_FRAGS + 2];
> +
> +	/* Name of the send queue: output.$index */
> +	char name[16];
> +
> +	struct _virtnet_sq_stats stats;
> +
> +	struct _virtnet_interrupt_coalesce intr_coal;
> +
> +	struct napi_struct napi;
> +
> +	/* Record whether sq is in reset state. */
> +	bool reset;
> +
> +	struct xsk_buff_pool *xsk_pool;
> +
> +	dma_addr_t xsk_hdr_dma_addr;
> +};
> +
> +struct _virtnet_rq_stats {
> +	struct u64_stats_sync syncp;
> +	u64_stats_t packets;
> +	u64_stats_t bytes;
> +	u64_stats_t drops;
> +	u64_stats_t xdp_packets;
> +	u64_stats_t xdp_tx;
> +	u64_stats_t xdp_redirects;
> +	u64_stats_t xdp_drops;
> +	u64_stats_t kicks;
> +};
> +
> +struct _ewma_pkt_len {
> +	unsigned long internal;
> +};
> +
> +struct _virtnet_rq_dma {
> +	dma_addr_t addr;
> +	u32 ref;
> +	u16 len;
> +	u16 need_sync;
> +};
> +
> +struct _receive_queue {
> +	/* Virtqueue associated with this receive_queue */
> +	struct virtqueue *vq;
> +
> +	struct napi_struct napi;
> +
> +	struct bpf_prog __rcu *xdp_prog;
> +
> +	struct _virtnet_rq_stats stats;
> +
> +	/* The number of rx notifications */
> +	u16 calls;
> +
> +	/* Is dynamic interrupt moderation enabled? */
> +	bool dim_enabled;
> +
> +	/* Used to protect dim_enabled and inter_coal */
> +	struct mutex dim_lock;
> +
> +	/* Dynamic Interrupt Moderation */
> +	struct dim dim;
> +
> +	u32 packets_in_napi;
> +
> +	struct _virtnet_interrupt_coalesce intr_coal;
> +
> +	/* Chain pages by the private ptr. */
> +	struct page *pages;
> +
> +	/* Average packet length for mergeable receive buffers. */
> +	struct _ewma_pkt_len mrg_avg_pkt_len;
> +
> +	/* Page frag for packet buffer allocation. */
> +	struct page_frag alloc_frag;
> +
> +	/* RX: fragments + linear part + virtio header */
> +	struct scatterlist sg[MAX_SKB_FRAGS + 2];
> +
> +	/* Min single buffer size for mergeable buffers case. */
> +	unsigned int min_buf_len;
> +
> +	/* Name of this receive queue: input.$index */
> +	char name[16];
> +
> +	struct xdp_rxq_info xdp_rxq;
> +
> +	/* Record the last dma info to free after new pages is allocated. */
> +	struct _virtnet_rq_dma *last_dma;
> +
> +	struct xsk_buff_pool *xsk_pool;
> +
> +	/* xdp rxq used by xsk */
> +	struct xdp_rxq_info xsk_rxq_info;
> +
> +	struct xdp_buff **xsk_buffs;
> +};
> +
> +#define VIRTIO_NET_RSS_MAX_KEY_SIZE     40
> +
> +struct _control_buf {
> +	struct virtio_net_ctrl_hdr hdr;
> +	virtio_net_ctrl_ack status;
> +};
> +
> +struct _virtnet_info {
> +	struct virtio_device *vdev;
> +	struct virtqueue *cvq;
> +	struct net_device *dev;
> +	struct _send_queue *sq;
> +	struct _receive_queue *rq;
> +	unsigned int status;
> +
> +	/* Max # of queue pairs supported by the device */
> +	u16 max_queue_pairs;
> +
> +	/* # of queue pairs currently used by the driver */
> +	u16 curr_queue_pairs;
> +
> +	/* # of XDP queue pairs currently used by the driver */
> +	u16 xdp_queue_pairs;
> +
> +	/* xdp_queue_pairs may be 0, when xdp is already loaded. So add this. */
> +	bool xdp_enabled;
> +
> +	/* I like... big packets and I cannot lie! */
> +	bool big_packets;
> +
> +	/* number of sg entries allocated for big packets */
> +	unsigned int big_packets_num_skbfrags;
> +
> +	/* Host will merge rx buffers for big packets (shake it! shake it!) */
> +	bool mergeable_rx_bufs;
> +
> +	/* Host supports rss and/or hash report */
> +	bool has_rss;
> +	bool has_rss_hash_report;
> +	u8 rss_key_size;
> +	u16 rss_indir_table_size;
> +	u32 rss_hash_types_supported;
> +	u32 rss_hash_types_saved;
> +	struct virtio_net_rss_config_hdr *rss_hdr;
> +	struct virtio_net_rss_config_trailer rss_trailer;
> +	u8 rss_hash_key_data[VIRTIO_NET_RSS_MAX_KEY_SIZE];
> +
> +	/* Has control virtqueue */
> +	bool has_cvq;
> +
> +	/* Lock to protect the control VQ */
> +	struct mutex cvq_lock;
> +
> +	/* Host can handle any s/g split between our header and packet data */
> +	bool any_header_sg;
> +
> +	/* Packet virtio header size */
> +	u8 hdr_len;
> +
> +	/* Work struct for delayed refilling if we run low on memory. */
> +	struct delayed_work refill;
> +
> +	/* UDP tunnel support */
> +	bool tx_tnl;
> +
> +	bool rx_tnl;
> +
> +	bool rx_tnl_csum;
> +
> +	/* Is delayed refill enabled? */
> +	bool refill_enabled;
> +
> +	/* The lock to synchronize the access to refill_enabled */
> +	spinlock_t refill_lock;
> +
> +	/* Work struct for config space updates */
> +	struct work_struct config_work;
> +
> +	/* Work struct for setting rx mode */
> +	struct work_struct rx_mode_work;
> +
> +	/* OK to queue work setting RX mode? */
> +	bool rx_mode_work_enabled;
> +
> +	/* Does the affinity hint is set for virtqueues? */
> +
> +	bool affinity_hint_set;
> +
> +	/* CPU hotplug instances for online & dead */
> +
> +	struct hlist_node node;
> +
> +	struct hlist_node node_dead;
> +
> +	struct _control_buf *ctrl;
> +
> +	/* Ethtool settings */
> +	u8 duplex;
> +	u32 speed;
> +
> +	/* Is rx dynamic interrupt moderation enabled? */
> +	bool rx_dim_enabled;
> +
> +	/* Interrupt coalescing settings */
> +	struct _virtnet_interrupt_coalesce intr_coal_tx;
> +	struct _virtnet_interrupt_coalesce intr_coal_rx;
> +
> +	unsigned long guest_offloads;
> +	unsigned long guest_offloads_capable;
> +
> +	/* failover when STANDBY feature enabled */
> +	struct failover *failover;
> +
> +	u64 device_stats_cap;
> +};
> +
> +
> +struct _vring_desc_state_split {
> +	void *data;			/* Data for callback. */
> +	struct vring_desc *indir_desc;	/* Indirect descriptor, if any. */
> +};
> +
> +struct _vring_desc_extra {
> +	dma_addr_t addr;		/* Descriptor DMA addr. */
> +	u32 len;			/* Descriptor length. */
> +	u16 flags;			/* Descriptor flags. */
> +	u16 next;			/* The next desc state in a list. */
> +};
> +
> +struct _vring_virtqueue_split {
> +	/* Actual memory layout for this queue. */
> +	struct vring vring;
> +
> +	/* Last written value to avail->flags */
> +	u16 avail_flags_shadow;
> +
> +	/*
> +	 * Last written value to avail->idx in
> +	 * guest byte order.
> +	 */
> +	u16 avail_idx_shadow;
> +
> +	/* Per-descriptor state. */
> +	struct _vring_desc_state_split *desc_state;
> +	struct _vring_desc_extra *desc_extra;
> +
> +	/* DMA address and size information */
> +	dma_addr_t queue_dma_addr;
> +	size_t queue_size_in_bytes;
> +
> +	/*
> +	 * The parameters for creating vrings are reserved for creating new
> +	 * vring.
> +	 */
> +	u32 vring_align;
> +	bool may_reduce_num;
> +};
> +
> +struct _vring_desc_state_packed {
> +	void *data;			/* Data for callback. */
> +	struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */
> +	u16 num;			/* Descriptor list length. */
> +	u16 last;			/* The last desc state in a list. */
> +};
> +
> +struct _vring_virtqueue_packed {
> +	/* Actual memory layout for this queue. */
> +	struct {
> +		unsigned int num;
> +		struct vring_packed_desc *desc;
> +		struct vring_packed_desc_event *driver;
> +		struct vring_packed_desc_event *device;
> +	} vring;
> +
> +	/* Driver ring wrap counter. */
> +	bool avail_wrap_counter;
> +
> +	/* Avail used flags. */
> +	u16 avail_used_flags;
> +
> +	/* Index of the next avail descriptor. */
> +	u16 next_avail_idx;
> +
> +	/*
> +	 * Last written value to driver->flags in
> +	 * guest byte order.
> +	 */
> +	u16 event_flags_shadow;
> +
> +	/* Per-descriptor state. */
> +	struct _vring_desc_state_packed *desc_state;
> +	struct _vring_desc_extra *desc_extra;
> +
> +	/* DMA address and size information */
> +	dma_addr_t ring_dma_addr;
> +	dma_addr_t driver_event_dma_addr;
> +	dma_addr_t device_event_dma_addr;
> +	size_t ring_size_in_bytes;
> +	size_t event_size_in_bytes;
> +};
> +
> +struct _vring_virtqueue {
> +	struct virtqueue vq;
> +
> +	/* Is this a packed ring? */
> +	bool packed_ring;
> +
> +	/* Is DMA API used? */
> +	bool use_dma_api;
> +
> +	/* Can we use weak barriers? */
> +	bool weak_barriers;
> +
> +	/* Other side has made a mess, don't try any more. */
> +	bool broken;
> +
> +	/* Host supports indirect buffers */
> +	bool indirect;
> +
> +	/* Host publishes avail event idx */
> +	bool event;
> +
> +	/* Head of free buffer list. */
> +	unsigned int free_head;
> +	/* Number we've added since last sync. */
> +	unsigned int num_added;
> +
> +	/* Last used index  we've seen.
> +	 * for split ring, it just contains last used index
> +	 * for packed ring:
> +	 * bits up to VRING_PACKED_EVENT_F_WRAP_CTR include the last used index.
> +	 * bits from VRING_PACKED_EVENT_F_WRAP_CTR include the used wrap counter.
> +	 */
> +	u16 last_used_idx;
>  
> -static DEFINE_KFIFO(virtnet_mon_kfifo, char, KFIFO_SIZE);
> +	/* Hint for event idx: already triggered no need to disable. */
> +	bool event_triggered;
> +
> +	union {
> +		/* Available for split ring */
> +		struct _vring_virtqueue_split split;
> +
> +		/* Available for packed ring */
> +		struct _vring_virtqueue_packed packed;
> +	};
> +
> +	/* How to notify other side. FIXME: commonalize hcalls! */
> +	bool (*notify)(struct virtqueue *vq);
> +
> +	/* DMA, allocation, and size information */
> +	bool we_own_ring;
> +
> +	union virtio_map map;
> +};
> +
> +/* RX or TX */
> +enum pkt_dir {
> +	PKT_DIR_UN = 0,      /* Unknown */
> +	PKT_DIR_RX = 1,           /* RX */
> +	PKT_DIR_TX = 2,           /* TX */
> +	PKT_DIR_MAX
> +};
> +
> +enum event_type {
> +	START_XMIT_PRE_EVENT = 1,
> +	START_XMIT_POST_EVENT = 2,
> +};
> +
> +struct iph_info {
> +	struct sk_buff *skb;        /* SKB */
> +	u8 iph_proto;    /* iph protocol type */
> +	u32 seq;         /* absolute sequence number */
> +};
> +
> +struct queue_info {
> +	struct virtqueue *vq;
> +	char name[16];
> +	unsigned int num_free;
> +	unsigned int num;
> +	__virtio16 avail_flags;
> +	__virtio16 avail_idx;
> +	u16 avail_flags_shadow;
> +	u16 avail_idx_shadow;
> +	__virtio16 used_flags;
> +	__virtio16 used_idx;
> +	u16 last_used_idx;
> +	bool broken;
> +};


Not at all excited about all the code duplication going on here.


  parent reply	other threads:[~2025-12-10  9:03 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-11-27  3:24 [PATCH v1 0/7] introduce virtnet_mon for monitor virtio_net Longjun Tang
2025-11-27  3:24 ` [PATCH v1 1/7] tools/virtio/virtnet_mon: create misc driver for virtnet_mon Longjun Tang
2025-11-27  3:24 ` [PATCH v1 2/7] tools/virtio/virtnet_mon: add kfifo to virtnet_mon Longjun Tang
2025-11-27  3:24 ` [PATCH v1 3/7] tools/virtio/virtnet_mon: add kprobe start_xmit Longjun Tang
2025-11-28  2:21   ` Jason Wang
2025-11-28  3:25     ` Lange Tang
2025-12-10  9:03   ` Michael S. Tsirkin [this message]
2025-11-27  3:24 ` [PATCH v1 4/7] tools/virtio/virtnet_mon: add kprobe gro_receive_skb Longjun Tang
2025-11-27  3:24 ` [PATCH v1 5/7] tools/virtio/virtnet_mon: add kprobe ip_local_deliver Longjun Tang
2025-11-27  3:24 ` [PATCH v1 6/7] tools/virtio/virtnet_mon: add kprobe skb_xmit_done and skb_recv_done Longjun Tang
2025-11-27  3:24 ` [PATCH v1 7/7] tools/virtio/virtnet_mon: add README file for virtnet_moin Longjun Tang
2025-12-10  9:04 ` [PATCH v1 0/7] introduce virtnet_mon for monitor virtio_net Michael S. Tsirkin
2025-12-11  2:51   ` Lange Tang
2025-12-11  7:10     ` Michael S. Tsirkin
2025-12-11  8:32     ` Jason Wang
2025-12-13  2:41       ` Lange Tang
2025-12-15  6:41         ` Jason Wang
2025-12-15  8:12           ` Lange Tang
2025-12-16  3:59             ` Jason Wang
2025-12-16  7:47               ` Lange Tang
2025-12-16  8:27                 ` Jason Wang
2025-12-21 13:46                 ` Michael S. Tsirkin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251210040228-mutt-send-email-mst@kernel.org \
    --to=mst@redhat.com \
    --cc=jasowang@redhat.com \
    --cc=lange_tang@163.com \
    --cc=tanglongjun@kylinos.cn \
    --cc=virtualization@lists.linux.dev \
    --cc=xuanzhuo@linux.alibaba.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.