netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Patrick McHardy <kaber@trash.net>
To: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Cc: davem@davemloft.net, netdev@vger.kernel.org, gospo@redhat.com,
	Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
Subject: Re: [net-next-2.6,	v4 1/3] ethtool: Introduce n-tuple filter programming support
Date: Wed, 10 Feb 2010 12:21:15 +0100	[thread overview]
Message-ID: <4B7296AB.6020004@trash.net> (raw)
In-Reply-To: <20100210100718.10855.96567.stgit@localhost.localdomain>

Jeff Kirsher wrote:
> diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
> index ef4a2d8..4e9ef85 100644
> --- a/include/linux/ethtool.h
> +++ b/include/linux/ethtool.h
> +struct ethtool_rx_ntuple_list {
> +#define ETHTOOL_MAX_NTUPLE_LIST_ENTRY 1024
> +#define ETHTOOL_MAX_NTUPLE_STRING_PER_ENTRY 14
> +	struct list_head	list;
> +	int			count;

unsigned int seems more appropriate.

> diff --git a/net/core/dev.c b/net/core/dev.c
> index 94c1eee..4593abe 100644
> @@ -5447,6 +5449,7 @@ EXPORT_SYMBOL(alloc_netdev_mq);
>  void free_netdev(struct net_device *dev)
>  {
>  	struct napi_struct *p, *n;
> +	struct ethtool_rx_ntuple_flow_spec_container *fsc, *f;
>  
>  	release_net(dev_net(dev));
>  
> @@ -5455,6 +5458,12 @@ void free_netdev(struct net_device *dev)
>  	/* Flush device addresses */
>  	dev_addr_flush(dev);
>  
> +	/* Clear ethtool n-tuple list */
> +	list_for_each_entry_safe(fsc, f, &dev->ethtool_ntuple_list.list, list) {
> +		list_del(&fsc->list);

Shouldn't the filters be freed here?

> +	}
> +	dev->ethtool_ntuple_list.count = 0;
> +
>  	list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
>  		netif_napi_del(p);
>  
> diff --git a/net/core/ethtool.c b/net/core/ethtool.c
> index d8aee58..fa703c6 100644
> --- a/net/core/ethtool.c
> +++ b/net/core/ethtool.c
> @@ -120,7 +120,7 @@ int ethtool_op_set_ufo(struct net_device *dev, u32 data)
>   * NETIF_F_xxx values in include/linux/netdevice.h
>   */
>  static const u32 flags_dup_features =
> -	ETH_FLAG_LRO;
> +	(ETH_FLAG_LRO | ETH_FLAG_NTUPLE);
>  
>  u32 ethtool_op_get_flags(struct net_device *dev)
>  {
> @@ -139,6 +139,11 @@ int ethtool_op_set_flags(struct net_device *dev, u32 data)
>  	else
>  		dev->features &= ~NETIF_F_LRO;
>  
> +	if (data & ETH_FLAG_NTUPLE)
> +		dev->features |= NETIF_F_NTUPLE;
> +	else
> +		dev->features &= ~NETIF_F_NTUPLE;

Shouldn't this check for the real capabilities of the device first?

> +static int __rx_ntuple_filter_add(struct ethtool_rx_ntuple_list *list,
> +                                  struct ethtool_rx_ntuple_flow_spec *spec)
> +{
> +	struct ethtool_rx_ntuple_flow_spec_container *fsc;
> +	int alloc_size;
> +
> +	/* don't add filters forever */
> +	if (list->count >= ETHTOOL_MAX_NTUPLE_LIST_ENTRY)
> +		return 0;
> +
> +	list_for_each_entry(fsc, &list->list, list) { /* run to end */ }

What does this do? fsc is allocated below, so it seems useless.

> +
> +	alloc_size = sizeof(*fsc);
> +	if (alloc_size < L1_CACHE_BYTES)
> +		alloc_size = L1_CACHE_BYTES;

Is that really necessary? I thought the filters would be offloaded
to hardware, so I'd expect aligning them to the cache line size
shouldn't make any difference.

> +	fsc = kmalloc(alloc_size, GFP_ATOMIC);
> +	if (!fsc)
> +		return -ENOMEM;
> +
> +	/* Copy the whole filter over */
> +	fsc->fs.flow_type = spec->flow_type;
> +	memcpy(&fsc->fs.h_u, &spec->h_u, sizeof(spec->h_u));
> +	memcpy(&fsc->fs.m_u, &spec->m_u, sizeof(spec->m_u));
> +
> +	fsc->fs.vlan_tag = spec->vlan_tag;
> +	fsc->fs.vlan_tag_mask = spec->vlan_tag_mask;
> +	fsc->fs.data = spec->data;
> +	fsc->fs.data_mask = spec->data_mask;
> +	fsc->fs.action = spec->action;
> +
> +	/* add to the list */
> +	list_add_tail_rcu(&fsc->list, &list->list);
> +	list->count++;
> +
> +	return 0;
> +}
> +
> +static int ethtool_get_rx_ntuple(struct net_device *dev, void __user *useraddr)
> +{
> +	struct ethtool_gstrings gstrings;
> +	const struct ethtool_ops *ops = dev->ethtool_ops;
> +	struct ethtool_rx_ntuple_flow_spec_container *fsc;
> +	u8 *data;
> +	char *p;
> +	int ret, i, num_strings = 0;
> +
> +	if (!ops->get_sset_count)
> +		return -EOPNOTSUPP;
> +
> +	if (copy_from_user(&gstrings, useraddr, sizeof(gstrings)))
> +		return -EFAULT;
> +
> +	ret = ops->get_sset_count(dev, gstrings.string_set);
> +	if (ret < 0)
> +		return ret;
> +
> +	gstrings.len = ret;
> +
> +	data = kmalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER);
> +	if (!data)
> +		return -ENOMEM;
> +
> +	if (ops->get_rx_ntuple) {
> +		/* driver-specific filter grab */
> +		ret = ops->get_rx_ntuple(dev, gstrings.string_set, data);
> +		goto copy;
> +	}
> +
> +	/* default ethtool filter grab */
> +	i = 0;
> +	p = (char *)data;
> +	list_for_each_entry(fsc, &dev->ethtool_ntuple_list.list, list) {
> +		sprintf(p, "Filter %d:\n", i);

Providing a textual representation from within the kernel doesn't
seem like a good interface to me. If userspace wants to do anything
but simply display them, it will have to parse them again. Additionally
it seems a driver providing a ->get_rx_ntuple() callback would have
to duplicate the entire conversion code, which is error prone.

> +		p += ETH_GSTRING_LEN;
> +		num_strings++;
> +
> +		switch (fsc->fs.flow_type) {
> +		case TCP_V4_FLOW:
> +			sprintf(p, "\tFlow Type: TCP\n");
> +			p += ETH_GSTRING_LEN;
> +			num_strings++;
> +			break;
> +		case UDP_V4_FLOW:
> +			sprintf(p, "\tFlow Type: UDP\n");
> +			p += ETH_GSTRING_LEN;
> +			num_strings++;
> +			break;
> +		case SCTP_V4_FLOW:
> +			sprintf(p, "\tFlow Type: SCTP\n");
> +			p += ETH_GSTRING_LEN;
> +			num_strings++;
> +			break;
> +		case AH_ESP_V4_FLOW:
> +			sprintf(p, "\tFlow Type: AH ESP\n");
> +			p += ETH_GSTRING_LEN;
> +			num_strings++;
> +			break;
> +		case ESP_V4_FLOW:
> +			sprintf(p, "\tFlow Type: ESP\n");
> +			p += ETH_GSTRING_LEN;
> +			num_strings++;
> +			break;
> +		case IP_USER_FLOW:
> +			sprintf(p, "\tFlow Type: Raw IP\n");
> +			p += ETH_GSTRING_LEN;
> +			num_strings++;
> +			break;
> +		case IPV4_FLOW:
> +			sprintf(p, "\tFlow Type: IPv4\n");
> +			p += ETH_GSTRING_LEN;
> +			num_strings++;
> +			break;
> +		default:
> +			sprintf(p, "\tFlow Type: Unknown\n");
> +			p += ETH_GSTRING_LEN;
> +			num_strings++;
> +			goto unknown_filter;
> +		};
> +
> +		/* now the rest of the filters */
> +		switch (fsc->fs.flow_type) {
> +		case TCP_V4_FLOW:
> +		case UDP_V4_FLOW:
> +		case SCTP_V4_FLOW:
> +			sprintf(p, "\tSrc IP addr: 0x%x\n",
> +			        fsc->fs.h_u.tcp_ip4_spec.ip4src);
> +			p += ETH_GSTRING_LEN;
> +			num_strings++;
> +			sprintf(p, "\tSrc IP mask: 0x%x\n",
> +			        fsc->fs.m_u.tcp_ip4_spec.ip4src);
> +			p += ETH_GSTRING_LEN;
> +			num_strings++;
> +			sprintf(p, "\tDest IP addr: 0x%x\n",
> +			        fsc->fs.h_u.tcp_ip4_spec.ip4dst);
> +			p += ETH_GSTRING_LEN;
> +			num_strings++;
> +			sprintf(p, "\tDest IP mask: 0x%x\n",
> +			        fsc->fs.m_u.tcp_ip4_spec.ip4dst);
> +			p += ETH_GSTRING_LEN;
> +			num_strings++;
> +			sprintf(p, "\tSrc Port: %d, mask: 0x%x\n",
> +			        fsc->fs.h_u.tcp_ip4_spec.psrc,
> +			        fsc->fs.m_u.tcp_ip4_spec.psrc);
> +			p += ETH_GSTRING_LEN;
> +			num_strings++;
> +			sprintf(p, "\tDest Port: %d, mask: 0x%x\n",
> +			        fsc->fs.h_u.tcp_ip4_spec.pdst,
> +			        fsc->fs.m_u.tcp_ip4_spec.pdst);
> +			p += ETH_GSTRING_LEN;
> +			num_strings++;
> +			sprintf(p, "\tTOS: %d, mask: 0x%x\n",
> +			        fsc->fs.h_u.tcp_ip4_spec.tos,
> +			        fsc->fs.m_u.tcp_ip4_spec.tos);
> +			p += ETH_GSTRING_LEN;
> +			num_strings++;
> +			break;
> +		case AH_ESP_V4_FLOW:
> +		case ESP_V4_FLOW:
> +			sprintf(p, "\tSrc IP addr: 0x%x\n",
> +			        fsc->fs.h_u.ah_ip4_spec.ip4src);
> +			p += ETH_GSTRING_LEN;
> +			num_strings++;
> +			sprintf(p, "\tSrc IP mask: 0x%x\n",
> +			        fsc->fs.m_u.ah_ip4_spec.ip4src);
> +			p += ETH_GSTRING_LEN;
> +			num_strings++;
> +			sprintf(p, "\tDest IP addr: 0x%x\n",
> +			        fsc->fs.h_u.ah_ip4_spec.ip4dst);
> +			p += ETH_GSTRING_LEN;
> +			num_strings++;
> +			sprintf(p, "\tDest IP mask: 0x%x\n",
> +			        fsc->fs.m_u.ah_ip4_spec.ip4dst);
> +			p += ETH_GSTRING_LEN;
> +			num_strings++;
> +			sprintf(p, "\tSPI: %d, mask: 0x%x\n",
> +			        fsc->fs.h_u.ah_ip4_spec.spi,
> +			        fsc->fs.m_u.ah_ip4_spec.spi);
> +			p += ETH_GSTRING_LEN;
> +			num_strings++;
> +			sprintf(p, "\tTOS: %d, mask: 0x%x\n",
> +			        fsc->fs.h_u.ah_ip4_spec.tos,
> +			        fsc->fs.m_u.ah_ip4_spec.tos);
> +			p += ETH_GSTRING_LEN;
> +			num_strings++;
> +			break;
> +		case IP_USER_FLOW:
> +			sprintf(p, "\tSrc IP addr: 0x%x\n",
> +			        fsc->fs.h_u.raw_ip4_spec.ip4src);
> +			p += ETH_GSTRING_LEN;
> +			num_strings++;
> +			sprintf(p, "\tSrc IP mask: 0x%x\n",
> +			        fsc->fs.m_u.raw_ip4_spec.ip4src);
> +			p += ETH_GSTRING_LEN;
> +			num_strings++;
> +			sprintf(p, "\tDest IP addr: 0x%x\n",
> +			        fsc->fs.h_u.raw_ip4_spec.ip4dst);
> +			p += ETH_GSTRING_LEN;
> +			num_strings++;
> +			sprintf(p, "\tDest IP mask: 0x%x\n",
> +			        fsc->fs.m_u.raw_ip4_spec.ip4dst);
> +			p += ETH_GSTRING_LEN;
> +			num_strings++;
> +			break;
> +		case IPV4_FLOW:
> +			sprintf(p, "\tSrc IP addr: 0x%x\n",
> +			        fsc->fs.h_u.usr_ip4_spec.ip4src);
> +			p += ETH_GSTRING_LEN;
> +			num_strings++;
> +			sprintf(p, "\tSrc IP mask: 0x%x\n",
> +			        fsc->fs.m_u.usr_ip4_spec.ip4src);
> +			p += ETH_GSTRING_LEN;
> +			num_strings++;
> +			sprintf(p, "\tDest IP addr: 0x%x\n",
> +			        fsc->fs.h_u.usr_ip4_spec.ip4dst);
> +			p += ETH_GSTRING_LEN;
> +			num_strings++;
> +			sprintf(p, "\tDest IP mask: 0x%x\n",
> +			        fsc->fs.m_u.usr_ip4_spec.ip4dst);
> +			p += ETH_GSTRING_LEN;
> +			num_strings++;
> +			sprintf(p, "\tL4 bytes: 0x%x, mask: 0x%x\n",
> +			        fsc->fs.h_u.usr_ip4_spec.l4_4_bytes,
> +			        fsc->fs.m_u.usr_ip4_spec.l4_4_bytes);
> +			p += ETH_GSTRING_LEN;
> +			num_strings++;
> +			sprintf(p, "\tTOS: %d, mask: 0x%x\n",
> +			        fsc->fs.h_u.usr_ip4_spec.tos,
> +			        fsc->fs.m_u.usr_ip4_spec.tos);
> +			p += ETH_GSTRING_LEN;
> +			num_strings++;
> +			sprintf(p, "\tIP Version: %d, mask: 0x%x\n",
> +			        fsc->fs.h_u.usr_ip4_spec.ip_ver,
> +			        fsc->fs.m_u.usr_ip4_spec.ip_ver);
> +			p += ETH_GSTRING_LEN;
> +			num_strings++;
> +			sprintf(p, "\tProtocol: %d, mask: 0x%x\n",
> +			        fsc->fs.h_u.usr_ip4_spec.proto,
> +			        fsc->fs.m_u.usr_ip4_spec.proto);
> +			p += ETH_GSTRING_LEN;
> +			num_strings++;
> +			break;
> +		};
> +		sprintf(p, "\tVLAN: %d, mask: 0x%x\n",
> +		        fsc->fs.vlan_tag, fsc->fs.vlan_tag_mask);
> +		p += ETH_GSTRING_LEN;
> +		num_strings++;
> +		sprintf(p, "\tUser-defined: 0x%Lx\n", fsc->fs.data);
> +		p += ETH_GSTRING_LEN;
> +		num_strings++;
> +		sprintf(p, "\tUser-defined mask: 0x%Lx\n", fsc->fs.data_mask);
> +		p += ETH_GSTRING_LEN;
> +		num_strings++;
> +		if (fsc->fs.action == ETHTOOL_RXNTUPLE_ACTION_DROP)
> +			sprintf(p, "\tAction: Drop\n");
> +		else
> +			sprintf(p, "\tAction: Direct to queue %d\n",
> +			        fsc->fs.action);
> +		p += ETH_GSTRING_LEN;
> +		num_strings++;
> +unknown_filter:
> +		i++;
> +	}
> +copy:
> +	/* indicate to userspace how many strings we actually have */
> +	gstrings.len = num_strings;
> +	ret = -EFAULT;
> +	if (copy_to_user(useraddr, &gstrings, sizeof(gstrings)))
> +		goto out;
> +	useraddr += sizeof(gstrings);
> +	if (copy_to_user(useraddr, data, gstrings.len * ETH_GSTRING_LEN))
> +		goto out;
> +	ret = 0;
> +
> +out:
> +	kfree(data);
> +	return ret;
> +}
> +
>  static int ethtool_get_regs(struct net_device *dev, char __user *useraddr)
>  {
>  	struct ethtool_regs regs;
> @@ -313,6 +626,12 @@ static int ethtool_reset(struct net_device *dev, char __user *useraddr)
>  	if (copy_from_user(&reset, useraddr, sizeof(reset)))
>  		return -EFAULT;
>  
> +	/* Clear ethtool n-tuple list */
> +	list_for_each_entry_safe(fsc, f, &dev->ethtool_ntuple_list.list, list) {
> +		list_del(&fsc->list);

This should also free the filters if I'm not mistaken.

> +	}
> +	dev->ethtool_ntuple_list.count = 0;
> +
>  	ret = dev->ethtool_ops->reset(dev, &reset.data);
>  	if (ret)
>  		return ret;
> @@ -351,9 +670,17 @@ static int ethtool_set_wol(struct net_device *dev, char __user *useraddr)
>  
>  static int ethtool_nway_reset(struct net_device *dev)
>  {
> +	struct ethtool_rx_ntuple_flow_spec_container *fsc, *f;
> +
>  	if (!dev->ethtool_ops->nway_reset)
>  		return -EOPNOTSUPP;
>  
> +	/* Clear ethtool n-tuple list */
> +	list_for_each_entry_safe(fsc, f, &dev->ethtool_ntuple_list.list, list) {
> +		list_del(&fsc->list);

Same here. But why are they cleared here at all? It doesn't
seem very intuitive that filters are lost when restarting
auto-negotiation.

> +	}
> +	dev->ethtool_ntuple_list.count = 0;
> +
>  	return dev->ethtool_ops->nway_reset(dev);
>  }

  reply	other threads:[~2010-02-10 11:21 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-02-10 10:10 [net-next-2.6, v4 1/3] ethtool: Introduce n-tuple filter programming support Jeff Kirsher
2010-02-10 11:21 ` Patrick McHardy [this message]
2010-02-10 23:53   ` Waskiewicz Jr, Peter P
2010-02-11  6:16     ` Patrick McHardy
2010-02-11  7:07       ` Peter P Waskiewicz Jr
2010-02-11  7:53         ` Patrick McHardy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4B7296AB.6020004@trash.net \
    --to=kaber@trash.net \
    --cc=davem@davemloft.net \
    --cc=gospo@redhat.com \
    --cc=jeffrey.t.kirsher@intel.com \
    --cc=netdev@vger.kernel.org \
    --cc=peter.p.waskiewicz.jr@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).