Re: [RFC PATCH v2 1/2] net: af_packet support for direct ring access in user space

netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: Daniel Borkmann <dborkman@redhat.com>
To: John Fastabend <john.fastabend@gmail.com>
Cc: netdev@vger.kernel.org, danny.zhou@intel.com,
	nhorman@tuxdriver.com, john.ronciak@intel.com,
	hannes@stressinduktion.org, brouer@redhat.com
Subject: Re: [RFC PATCH v2 1/2] net: af_packet support for direct ring access in user space
Date: Tue, 13 Jan 2015 16:12:34 +0100	[thread overview]
Message-ID: <54B535E2.7040600@redhat.com> (raw)
In-Reply-To: <20150113043509.29985.33515.stgit@nitbit.x32>

On 01/13/2015 05:35 AM, John Fastabend wrote:
...
>   struct net_device_ops {
>   	int			(*ndo_init)(struct net_device *dev);
> @@ -1190,6 +1240,35 @@ struct net_device_ops {
>   	int			(*ndo_switch_port_stp_update)(struct net_device *dev,
>   							      u8 state);
>   #endif
> +	int			(*ndo_split_queue_pairs)(struct net_device *dev,
> +					 unsigned int qpairs_start_from,
> +					 unsigned int qpairs_num,
> +					 struct sock *sk);
...
> +	int			(*ndo_get_dma_region_info)
> +					(struct net_device *dev,
> +					 struct tpacket_dma_mem_region *region,
> +					 struct sock *sk);
>   };

Any slight chance these 8 ndo ops could be further reduced? ;)

>   /**
> diff --git a/include/uapi/linux/if_packet.h b/include/uapi/linux/if_packet.h
> index da2d668..eb7a727 100644
> --- a/include/uapi/linux/if_packet.h
> +++ b/include/uapi/linux/if_packet.h
...
> +struct tpacket_dev_qpair_map_region_info {
> +	unsigned int tp_dev_bar_sz;		/* size of BAR */
> +	unsigned int tp_dev_sysm_sz;		/* size of systerm memory */
> +	/* number of contiguous memory on BAR mapping to user space */
> +	unsigned int tp_num_map_regions;
> +	/* number of contiguous memory on system mapping to user apce */
> +	unsigned int tp_num_sysm_map_regions;
> +	struct map_page_region {
> +		unsigned page_offset;	/* offset to start of region */
> +		unsigned page_sz;	/* size of page */
> +		unsigned page_cnt;	/* number of pages */

Please use unsigned int et al, or preferably __u* variants consistently
in the uapi structs.

> +	} tp_regions[MAX_MAP_MEMORY_REGIONS];
> +};
...
> diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
> index 6880f34..8cd17da 100644
> --- a/net/packet/af_packet.c
> +++ b/net/packet/af_packet.c
...
> @@ -2633,6 +2636,16 @@ static int packet_release(struct socket *sock)
>   	sock_prot_inuse_add(net, sk->sk_prot, -1);
>   	preempt_enable();
>
> +	if (po->tp_owns_queue_pairs) {
> +		struct net_device *dev;
> +
> +		dev = __dev_get_by_index(sock_net(sk), po->ifindex);
> +		if (dev) {
> +			dev->netdev_ops->ndo_return_queue_pairs(dev, sk);
> +			umem_release(dev, po);
> +		}
> +	}
> +
...
> +static int
>   packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen)
>   {
>   	struct sock *sk = sock->sk;
> @@ -3428,6 +3525,167 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
>   		po->xmit = val ? packet_direct_xmit : dev_queue_xmit;
>   		return 0;
>   	}
> +	case PACKET_RXTX_QPAIRS_SPLIT:
> +	{
...
> +		/* This call only works after a bind call which calls a dev_hold
> +		 * operation so we do not need to increment dev ref counter
> +		 */
> +		dev = __dev_get_by_index(sock_net(sk), po->ifindex);
> +		if (!dev)
> +			return -EINVAL;
> +		ops = dev->netdev_ops;
> +		if (!ops->ndo_split_queue_pairs)
> +			return -EOPNOTSUPP;
> +
> +		err =  ops->ndo_split_queue_pairs(dev,
> +						  qpairs.tp_qpairs_start_from,
> +						  qpairs.tp_qpairs_num, sk);
> +		if (!err)
> +			po->tp_owns_queue_pairs = true;

When this is being set here, above test in packet_release() and the chunk
quoted below in packet_mmap() are not guaranteed to work since we don't
test if some ndos are actually implemented by the driver. Seems a bit
fragile, I'm wondering if we should test this capability as a _whole_,
iow if all necessary functions to make this work are being provided by the
driver, e.g. flag the netdev as such and test for that instead.

> +		return err;
> +	}
> +	case PACKET_RXTX_QPAIRS_RETURN:
> +	{
...
> +		dev = __dev_get_by_index(sock_net(sk), po->ifindex);
> +		if (!dev)
> +			return -EINVAL;
> +		ops = dev->netdev_ops;
> +		if (!ops->ndo_split_queue_pairs)
> +			return -EOPNOTSUPP;

Should test for ndo_return_queue_pairs.

> +		err =  dev->netdev_ops->ndo_return_queue_pairs(dev, sk);
> +		if (!err)
> +			po->tp_owns_queue_pairs = false;
> +
...
> +	case PACKET_RXTX_QPAIRS_SPLIT:
> +	{
...
> +		/* This call only work after a bind call which calls a dev_hold
> +		 * operation so we do not need to increment dev ref counter
> +		 */
> +		dev = __dev_get_by_index(sock_net(sk), po->ifindex);
> +		if (!dev)
> +			return -EINVAL;
> +		if (!dev->netdev_ops->ndo_split_queue_pairs)
> +			return -EOPNOTSUPP;

Copy-paste (although not quite, since here's no extra ops var). :)
Should be ndo_get_split_queue_pairs.

> +		err =  dev->netdev_ops->ndo_get_split_queue_pairs(dev,
> +					&qpairs_info.tp_qpairs_start_from,
> +					&qpairs_info.tp_qpairs_num, sk);
> +
...
> @@ -3927,8 +4309,20 @@ static int packet_mmap(struct file *file, struct socket *sock,
>   	if (vma->vm_pgoff)
>   		return -EINVAL;
>
> +	dev = __dev_get_by_index(sock_net(sk), po->ifindex);
> +	if (!dev)
> +		return -EINVAL;
> +
>   	mutex_lock(&po->pg_vec_lock);
>
> +	if (po->tp_owns_queue_pairs) {
> +		ops = dev->netdev_ops;
> +		err = ops->ndo_direct_qpair_page_map(vma, dev);
> +		if (err)
> +			goto out;
> +		goto done;
> +	}
> +

next prev parent reply	other threads:[~2015-01-13 15:13 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-01-13  4:35 [RFC PATCH v2 1/2] net: af_packet support for direct ring access in user space John Fastabend
2015-01-13  4:35 ` [RFC PATCH v2 2/2] net: ixgbe: implement af_packet direct queue mappings John Fastabend
2015-01-13 12:05   ` Hannes Frederic Sowa
2015-01-13 14:26   ` Daniel Borkmann
2015-01-13 15:46     ` John Fastabend
2015-01-13 18:18       ` Daniel Borkmann
2015-01-13 18:58   ` Willem de Bruijn
2015-01-13  4:42 ` [RFC PATCH v2 1/2] net: af_packet support for direct ring access in user space John Fastabend
2015-01-13 12:35 ` Hannes Frederic Sowa
2015-01-13 13:21   ` Daniel Borkmann
2015-01-13 15:24     ` John Fastabend
2015-01-13 17:15       ` David Laight
2015-01-13 17:27         ` David Miller
2015-01-14 15:28           ` Zhou, Danny
2015-01-13 15:12 ` Daniel Borkmann [this message]
2015-01-13 15:58   ` John Fastabend
2015-01-13 16:05     ` Daniel Borkmann
2015-01-13 16:19 ` Neil Horman
2015-01-13 18:52 ` Willem de Bruijn
2015-01-14 15:26   ` Zhou, Danny
2015-01-14 20:35 ` David Miller
2015-01-17 17:35   ` John Fastabend
2015-01-18 22:02   ` Neil Horman
2015-01-19 21:45   ` Neil Horman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=54B535E2.7040600@redhat.com \
    --to=dborkman@redhat.com \
    --cc=brouer@redhat.com \
    --cc=danny.zhou@intel.com \
    --cc=hannes@stressinduktion.org \
    --cc=john.fastabend@gmail.com \
    --cc=john.ronciak@intel.com \
    --cc=netdev@vger.kernel.org \
    --cc=nhorman@tuxdriver.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).