All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Daniel P. Berrangé" <berrange@redhat.com>
To: Cindy Lu <lulu@redhat.com>
Cc: mst@redhat.com, jasowang@redhat.com, zhangckid@gmail.com,
	lizhijian@fujitsu.com, jmarcin@redhat.com, qemu-devel@nongnu.org
Subject: Re: [RFC v4 5/5] chardev/socket: add AF_PACKET capture path
Date: Wed, 8 Apr 2026 13:13:19 +0100	[thread overview]
Message-ID: <adZGX5nA76feGRuV@redhat.com> (raw)
In-Reply-To: <20260407050818.2249570-6-lulu@redhat.com>

On Tue, Apr 07, 2026 at 01:05:52PM +0800, Cindy Lu wrote:
> Add the AF_PACKET capture read path for socket chardevs. When opened
> with af-packet-mode=capture, the read side drains raw frames with
> recvfrom(), keeps only PACKET_OUTGOING traffic, and feeds the result
> through the normal chardev frontend interface.
> 
> Signed-off-by: Cindy Lu <lulu@redhat.com>
> ---
>  chardev/char-socket.c | 133 +++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 131 insertions(+), 2 deletions(-)
> 
> diff --git a/chardev/char-socket.c b/chardev/char-socket.c
> index 45d06fda8f..76a51a853d 100644
> --- a/chardev/char-socket.c
> +++ b/chardev/char-socket.c
> @@ -107,9 +107,17 @@ static void tcp_chr_accept(QIONetListener *listener,
>  
>  static int tcp_chr_read_poll(void *opaque);
>  static void tcp_chr_disconnect_locked(Chardev *chr);
> +static void tcp_chr_deliver_af_packet(Chardev *chr);
>  
>  #define TCP_CHARDEV_AF_PACKET_MAX_FRAME_SIZE 65536
>  
> +static bool
> +tcp_chr_uses_af_packet_capture(SocketChardev *s)
> +{
> +    return s->is_af_packet && s->af_packet_mode_set &&
> +           s->af_packet_mode == CHARDEV_SOCKET_AF_PACKET_MODE_CAPTURE;
> +}
> +
>  static bool tcp_chr_uses_af_packet_inject(SocketChardev *s)
>  {
>      return s->is_af_packet &&
> @@ -300,6 +308,9 @@ static int tcp_chr_read_poll(void *opaque)
>          return 0;
>      }
>      s->max_size = qemu_chr_be_can_write(chr);
> +    if (tcp_chr_uses_af_packet_capture(s) && s->af_packet_buf_len) {
> +        tcp_chr_deliver_af_packet(chr);
> +    }
>      return s->max_size;
>  }
>  
> @@ -500,6 +511,98 @@ static void tcp_chr_reset_af_packet_send(SocketChardev *s)
>      s->af_packet_send_len_bytes = 0;
>  }
>  
> +/* Push buffered AF_PACKET capture data into the chardev frontend. */
> +static void
> +tcp_chr_deliver_af_packet(Chardev *chr)
> +{
> +    SocketChardev *s = SOCKET_CHARDEV(chr);
> +
> +    while (s->max_size > 0 && s->af_packet_buf_offset < s->af_packet_buf_len) {
> +        size_t remaining = s->af_packet_buf_len - s->af_packet_buf_offset;
> +        size_t chunk = MIN((size_t)s->max_size, remaining);
> +
> +        qemu_chr_be_write(chr, s->af_packet_buf + s->af_packet_buf_offset,
> +                          (int)chunk);
> +        s->af_packet_buf_offset += chunk;
> +        s->max_size = qemu_chr_be_can_write(chr);
> +    }
> +
> +    if (s->af_packet_buf_offset == s->af_packet_buf_len) {
> +        tcp_chr_reset_af_packet_buf(s);
> +    }
> +}
> +
> +/* Copy buffered AF_PACKET capture data into a synchronous read buffer. */
> +static int tcp_chr_copy_af_packet_buf(SocketChardev *s, uint8_t *buf,
> +                                      int len) {
> +    size_t remaining = s->af_packet_buf_len - s->af_packet_buf_offset;
> +    size_t copied = MIN((size_t)len, remaining);
> +
> +    memcpy(buf, s->af_packet_buf + s->af_packet_buf_offset, copied);
> +    s->af_packet_buf_offset += copied;
> +
> +    if (s->af_packet_buf_offset == s->af_packet_buf_len) {
> +        tcp_chr_reset_af_packet_buf(s);
> +    }
> +
> +    return (int)copied;
> +}
> +
> +static ssize_t
> +tcp_chr_capture_af_packet(Chardev *chr)
> +{
> +#ifdef CONFIG_LINUX
> +    SocketChardev *s = SOCKET_CHARDEV(chr);
> +    struct sockaddr_ll sll;
> +    socklen_t sll_len;
> +    ssize_t size;
> +    uint32_t len;
> +
> +    if (!tcp_chr_uses_af_packet_capture(s)) {
> +        errno = EIO;
> +        return -1;
> +    }
> +
> +    if (s->af_packet_buf_size <
> +        sizeof(len) + TCP_CHARDEV_AF_PACKET_MAX_FRAME_SIZE) {
> +        s->af_packet_buf =
> +            g_realloc(s->af_packet_buf,
> +                      sizeof(len) + TCP_CHARDEV_AF_PACKET_MAX_FRAME_SIZE);
> +        s->af_packet_buf_size =
> +            sizeof(len) + TCP_CHARDEV_AF_PACKET_MAX_FRAME_SIZE;
> +    }
> +
> +    for (;;) {
> +        sll_len = sizeof(sll);
> +        do {
> +            size = recvfrom(s->sioc->fd, s->af_packet_buf + sizeof(len),
> +                            TCP_CHARDEV_AF_PACKET_MAX_FRAME_SIZE, 0,
> +                            (struct sockaddr *)&sll, &sll_len);
> +        } while (size < 0 && errno == EINTR);
> +
> +        if (size <= 0) {
> +            if (size < 0 && errno != EAGAIN && errno != EWOULDBLOCK) {
> +                trace_chr_socket_recv_err(chr, chr->label, g_strerror(errno));
> +            }
> +            return size;
> +        }
> +
> +        if (sll.sll_pkttype != PACKET_OUTGOING) {
> +            continue;
> +        }
> +
> +        len = htonl(size);
> +        memcpy(s->af_packet_buf, &len, sizeof(len));
> +        s->af_packet_buf_len = sizeof(len) + size;
> +        s->af_packet_buf_offset = 0;
> +        return (ssize_t)s->af_packet_buf_len;
> +    }
> +#else
> +    errno = EPROTONOSUPPORT;
> +    return -1;
> +#endif
> +}
> +
>  static GSource *tcp_chr_add_watch(Chardev *chr, GIOCondition cond)
>  {
>      SocketChardev *s = SOCKET_CHARDEV(chr);
> @@ -682,6 +785,22 @@ static gboolean tcp_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque)
>      if (len > s->max_size) {
>          len = s->max_size;
>      }
> +    if (tcp_chr_uses_af_packet_capture(s)) {
> +        tcp_chr_deliver_af_packet(chr);
> +        if (s->max_size <= 0 || s->af_packet_buf_len) {
> +            return TRUE;
> +        }
> +
> +        size = tcp_chr_capture_af_packet(chr);
> +        if (size == 0 || (size == -1 && errno != EAGAIN)) {
> +            tcp_chr_disconnect(chr);
> +        } else if (size > 0) {
> +            tcp_chr_deliver_af_packet(chr);
> +        }
> +
> +        return TRUE;
> +    }
> +
>      size = tcp_chr_recv(chr, (void *)buf, len);
>      if (size == 0 || (size == -1 && errno != EAGAIN)) {
>          /* connection closed */
> @@ -715,6 +834,10 @@ static int tcp_chr_sync_read(Chardev *chr, const uint8_t *buf, int len)
>      int saved_errno;
>      Error *local_err = NULL;
>  
> +    if (tcp_chr_uses_af_packet_capture(s) && s->af_packet_buf_len) {
> +        return tcp_chr_copy_af_packet_buf(s, (uint8_t *)buf, len);
> +    }
> +
>      if (s->state != TCP_CHARDEV_STATE_CONNECTED) {
>          return 0;
>      }
> @@ -723,7 +846,14 @@ static int tcp_chr_sync_read(Chardev *chr, const uint8_t *buf, int len)
>          error_report_err(local_err);
>          return -1;
>      }
> -    size = tcp_chr_recv(chr, (void *) buf, len);
> +    if (tcp_chr_uses_af_packet_capture(s)) {
> +        size = tcp_chr_capture_af_packet(chr);
> +        if (size > 0) {
> +            size = tcp_chr_copy_af_packet_buf(s, (uint8_t *)buf, len);
> +        }
> +    } else {
> +        size = tcp_chr_recv(chr, (void *)buf, len);
> +    }


Similarly to the send side, I don't really think we should have this
packet re-assembly logic in the chardev code. We should just be
calling the normal qio_channel_read APIs and let the netfilter code
re-assemble packets it gets from the chardev. Mostly it seems we
would use TCP_CHARDEV_AF_PACKET_MAX_FRAME_SIZE instead of CHR_READ_BUF_LEN
in the existing code paths.

>      saved_errno = errno;
>      if (s->state != TCP_CHARDEV_STATE_DISCONNECTED) {
>          if (!qio_channel_set_blocking(s->ioc, false, &local_err)) {
> @@ -1448,7 +1578,6 @@ static gboolean socket_reconnect_timeout(gpointer opaque)
>      return false;
>  }
>  
> -
>  static int qmp_chardev_open_socket_server(Chardev *chr,
>                                            bool is_telnet,
>                                            bool is_waitconnect,
> -- 
> 2.52.0
> 
> 

With regards,
Daniel
-- 
|: https://berrange.com       ~~        https://hachyderm.io/@berrange :|
|: https://libvirt.org          ~~          https://entangle-photo.org :|
|: https://pixelfed.art/berrange   ~~    https://fstop138.berrange.com :|



  reply	other threads:[~2026-04-08 19:08 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-07  5:05 [RFC v4 0/5] net/filter: Add AF_PACKET support for vhost-net Cindy Lu
2026-04-07  5:05 ` [RFC v4 1/5] net/filter: allow filters on vhost netdevs Cindy Lu
2026-04-07  5:05 ` [RFC v4 2/5] chardev/socket: add AF_PACKET initialization Cindy Lu
2026-04-07  5:05 ` [RFC v4 3/5] io/channel-socket: tolerate AF_PACKET getpeername Cindy Lu
2026-04-08 12:00   ` Daniel P. Berrangé
2026-04-07  5:05 ` [RFC v4 4/5] chardev/socket: add AF_PACKET inject path Cindy Lu
2026-04-08 12:07   ` Daniel P. Berrangé
2026-04-07  5:05 ` [RFC v4 5/5] chardev/socket: add AF_PACKET capture path Cindy Lu
2026-04-08 12:13   ` Daniel P. Berrangé [this message]
2026-04-08 12:16 ` [RFC v4 0/5] net/filter: Add AF_PACKET support for vhost-net Daniel P. Berrangé

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=adZGX5nA76feGRuV@redhat.com \
    --to=berrange@redhat.com \
    --cc=jasowang@redhat.com \
    --cc=jmarcin@redhat.com \
    --cc=lizhijian@fujitsu.com \
    --cc=lulu@redhat.com \
    --cc=mst@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=zhangckid@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.