Re: [RFC PATCH v2 1/2] net: af_packet support for direct ring access in user space

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Daniel Borkmann <dborkman@redhat.com>
To: John Fastabend <john.fastabend@gmail.com>
Cc: netdev@vger.kernel.org, danny.zhou@intel.com,
	nhorman@tuxdriver.com, john.ronciak@intel.com,
	hannes@stressinduktion.org, brouer@redhat.com
Subject: Re: [RFC PATCH v2 1/2] net: af_packet support for direct ring access in user space
Date: Tue, 13 Jan 2015 16:12:34 +0100	[thread overview]
Message-ID: <54B535E2.7040600@redhat.com> (raw)
In-Reply-To: <20150113043509.29985.33515.stgit@nitbit.x32>

On 01/13/2015 05:35 AM, John Fastabend wrote:
...
>   struct net_device_ops {
>   	int			(*ndo_init)(struct net_device *dev);
> @@ -1190,6 +1240,35 @@ struct net_device_ops {
>   	int			(*ndo_switch_port_stp_update)(struct net_device *dev,
>   							      u8 state);
>   #endif
> +	int			(*ndo_split_queue_pairs)(struct net_device *dev,
> +					 unsigned int qpairs_start_from,
> +					 unsigned int qpairs_num,
> +					 struct sock *sk);
...
> +	int			(*ndo_get_dma_region_info)
> +					(struct net_device *dev,
> +					 struct tpacket_dma_mem_region *region,
> +					 struct sock *sk);
>   };

Any slight chance these 8 ndo ops could be further reduced? ;)

>   /**
> diff --git a/include/uapi/linux/if_packet.h b/include/uapi/linux/if_packet.h
> index da2d668..eb7a727 100644
> --- a/include/uapi/linux/if_packet.h
> +++ b/include/uapi/linux/if_packet.h
...
> +struct tpacket_dev_qpair_map_region_info {
> +	unsigned int tp_dev_bar_sz;		/* size of BAR */
> +	unsigned int tp_dev_sysm_sz;		/* size of systerm memory */
> +	/* number of contiguous memory on BAR mapping to user space */
> +	unsigned int tp_num_map_regions;
> +	/* number of contiguous memory on system mapping to user apce */
> +	unsigned int tp_num_sysm_map_regions;
> +	struct map_page_region {
> +		unsigned page_offset;	/* offset to start of region */
> +		unsigned page_sz;	/* size of page */
> +		unsigned page_cnt;	/* number of pages */

Please use unsigned int et al, or preferably __u* variants consistently
in the uapi structs.

> +	} tp_regions[MAX_MAP_MEMORY_REGIONS];
> +};
...
> diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
> index 6880f34..8cd17da 100644
> --- a/net/packet/af_packet.c
> +++ b/net/packet/af_packet.c
...
> @@ -2633,6 +2636,16 @@ static int packet_release(struct socket *sock)
>   	sock_prot_inuse_add(net, sk->sk_prot, -1);
>   	preempt_enable();
>
> +	if (po->tp_owns_queue_pairs) {
> +		struct net_device *dev;
> +
> +		dev = __dev_get_by_index(sock_net(sk), po->ifindex);
> +		if (dev) {
> +			dev->netdev_ops->ndo_return_queue_pairs(dev, sk);
> +			umem_release(dev, po);
> +		}
> +	}
> +
...
> +static int
>   packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen)
>   {
>   	struct sock *sk = sock->sk;
> @@ -3428,6 +3525,167 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
>   		po->xmit = val ? packet_direct_xmit : dev_queue_xmit;
>   		return 0;
>   	}
> +	case PACKET_RXTX_QPAIRS_SPLIT:
> +	{
...
> +		/* This call only works after a bind call which calls a dev_hold
> +		 * operation so we do not need to increment dev ref counter
> +		 */
> +		dev = __dev_get_by_index(sock_net(sk), po->ifindex);
> +		if (!dev)
> +			return -EINVAL;
> +		ops = dev->netdev_ops;
> +		if (!ops->ndo_split_queue_pairs)
> +			return -EOPNOTSUPP;
> +
> +		err =  ops->ndo_split_queue_pairs(dev,
> +						  qpairs.tp_qpairs_start_from,
> +						  qpairs.tp_qpairs_num, sk);
> +		if (!err)
> +			po->tp_owns_queue_pairs = true;

When this is being set here, above test in packet_release() and the chunk
quoted below in packet_mmap() are not guaranteed to work since we don't
test if some ndos are actually implemented by the driver. Seems a bit
fragile, I'm wondering if we should test this capability as a _whole_,
iow if all necessary functions to make this work are being provided by the
driver, e.g. flag the netdev as such and test for that instead.

> +		return err;
> +	}
> +	case PACKET_RXTX_QPAIRS_RETURN:
> +	{
...
> +		dev = __dev_get_by_index(sock_net(sk), po->ifindex);
> +		if (!dev)
> +			return -EINVAL;
> +		ops = dev->netdev_ops;
> +		if (!ops->ndo_split_queue_pairs)
> +			return -EOPNOTSUPP;

Should test for ndo_return_queue_pairs.

> +		err =  dev->netdev_ops->ndo_return_queue_pairs(dev, sk);
> +		if (!err)
> +			po->tp_owns_queue_pairs = false;
> +
...
> +	case PACKET_RXTX_QPAIRS_SPLIT:
> +	{
...
> +		/* This call only work after a bind call which calls a dev_hold
> +		 * operation so we do not need to increment dev ref counter
> +		 */
> +		dev = __dev_get_by_index(sock_net(sk), po->ifindex);
> +		if (!dev)
> +			return -EINVAL;
> +		if (!dev->netdev_ops->ndo_split_queue_pairs)
> +			return -EOPNOTSUPP;

Copy-paste (although not quite, since here's no extra ops var). :)
Should be ndo_get_split_queue_pairs.

> +		err =  dev->netdev_ops->ndo_get_split_queue_pairs(dev,
> +					&qpairs_info.tp_qpairs_start_from,
> +					&qpairs_info.tp_qpairs_num, sk);
> +
...
> @@ -3927,8 +4309,20 @@ static int packet_mmap(struct file *file, struct socket *sock,
>   	if (vma->vm_pgoff)
>   		return -EINVAL;
>
> +	dev = __dev_get_by_index(sock_net(sk), po->ifindex);
> +	if (!dev)
> +		return -EINVAL;
> +
>   	mutex_lock(&po->pg_vec_lock);
>
> +	if (po->tp_owns_queue_pairs) {
> +		ops = dev->netdev_ops;
> +		err = ops->ndo_direct_qpair_page_map(vma, dev);
> +		if (err)
> +			goto out;
> +		goto done;
> +	}
> +

next prev parent reply	other threads:[~2015-01-13 15:13 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-01-13  4:35 [RFC PATCH v2 1/2] net: af_packet support for direct ring access in user space John Fastabend
2015-01-13  4:35 ` [RFC PATCH v2 2/2] net: ixgbe: implement af_packet direct queue mappings John Fastabend
2015-01-13 12:05   ` Hannes Frederic Sowa
2015-01-13 14:26   ` Daniel Borkmann
2015-01-13 15:46     ` John Fastabend
2015-01-13 18:18       ` Daniel Borkmann
2015-01-13 18:58   ` Willem de Bruijn
2015-01-13  4:42 ` [RFC PATCH v2 1/2] net: af_packet support for direct ring access in user space John Fastabend
2015-01-13 12:35 ` Hannes Frederic Sowa
2015-01-13 13:21   ` Daniel Borkmann
2015-01-13 15:24     ` John Fastabend
2015-01-13 17:15       ` David Laight
2015-01-13 17:27         ` David Miller
2015-01-14 15:28           ` Zhou, Danny
2015-01-13 15:12 ` Daniel Borkmann [this message]
2015-01-13 15:58   ` John Fastabend
2015-01-13 16:05     ` Daniel Borkmann
2015-01-13 16:19 ` Neil Horman
2015-01-13 18:52 ` Willem de Bruijn
2015-01-14 15:26   ` Zhou, Danny
2015-01-14 20:35 ` David Miller
2015-01-17 17:35   ` John Fastabend
2015-01-18 22:02   ` Neil Horman
2015-01-19 21:45   ` Neil Horman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=54B535E2.7040600@redhat.com \
    --to=dborkman@redhat.com \
    --cc=brouer@redhat.com \
    --cc=danny.zhou@intel.com \
    --cc=hannes@stressinduktion.org \
    --cc=john.fastabend@gmail.com \
    --cc=john.ronciak@intel.com \
    --cc=netdev@vger.kernel.org \
    --cc=nhorman@tuxdriver.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.