From mboxrd@z Thu Jan 1 00:00:00 1970 From: Patrick McHardy Subject: Re: [net-next-2.6, v4 1/3] ethtool: Introduce n-tuple filter programming support Date: Wed, 10 Feb 2010 12:21:15 +0100 Message-ID: <4B7296AB.6020004@trash.net> References: <20100210100718.10855.96567.stgit@localhost.localdomain> Mime-Version: 1.0 Content-Type: text/plain; charset=ISO-8859-15 Content-Transfer-Encoding: 7bit Cc: davem@davemloft.net, netdev@vger.kernel.org, gospo@redhat.com, Peter P Waskiewicz Jr To: Jeff Kirsher Return-path: Received: from stinky.trash.net ([213.144.137.162]:48152 "EHLO stinky.trash.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1754242Ab0BJLVY (ORCPT ); Wed, 10 Feb 2010 06:21:24 -0500 In-Reply-To: <20100210100718.10855.96567.stgit@localhost.localdomain> Sender: netdev-owner@vger.kernel.org List-ID: Jeff Kirsher wrote: > diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h > index ef4a2d8..4e9ef85 100644 > --- a/include/linux/ethtool.h > +++ b/include/linux/ethtool.h > +struct ethtool_rx_ntuple_list { > +#define ETHTOOL_MAX_NTUPLE_LIST_ENTRY 1024 > +#define ETHTOOL_MAX_NTUPLE_STRING_PER_ENTRY 14 > + struct list_head list; > + int count; unsigned int seems more appropriate. > diff --git a/net/core/dev.c b/net/core/dev.c > index 94c1eee..4593abe 100644 > @@ -5447,6 +5449,7 @@ EXPORT_SYMBOL(alloc_netdev_mq); > void free_netdev(struct net_device *dev) > { > struct napi_struct *p, *n; > + struct ethtool_rx_ntuple_flow_spec_container *fsc, *f; > > release_net(dev_net(dev)); > > @@ -5455,6 +5458,12 @@ void free_netdev(struct net_device *dev) > /* Flush device addresses */ > dev_addr_flush(dev); > > + /* Clear ethtool n-tuple list */ > + list_for_each_entry_safe(fsc, f, &dev->ethtool_ntuple_list.list, list) { > + list_del(&fsc->list); Shouldn't the filters be freed here? > + } > + dev->ethtool_ntuple_list.count = 0; > + > list_for_each_entry_safe(p, n, &dev->napi_list, dev_list) > netif_napi_del(p); > > diff --git a/net/core/ethtool.c b/net/core/ethtool.c > index d8aee58..fa703c6 100644 > --- a/net/core/ethtool.c > +++ b/net/core/ethtool.c > @@ -120,7 +120,7 @@ int ethtool_op_set_ufo(struct net_device *dev, u32 data) > * NETIF_F_xxx values in include/linux/netdevice.h > */ > static const u32 flags_dup_features = > - ETH_FLAG_LRO; > + (ETH_FLAG_LRO | ETH_FLAG_NTUPLE); > > u32 ethtool_op_get_flags(struct net_device *dev) > { > @@ -139,6 +139,11 @@ int ethtool_op_set_flags(struct net_device *dev, u32 data) > else > dev->features &= ~NETIF_F_LRO; > > + if (data & ETH_FLAG_NTUPLE) > + dev->features |= NETIF_F_NTUPLE; > + else > + dev->features &= ~NETIF_F_NTUPLE; Shouldn't this check for the real capabilities of the device first? > +static int __rx_ntuple_filter_add(struct ethtool_rx_ntuple_list *list, > + struct ethtool_rx_ntuple_flow_spec *spec) > +{ > + struct ethtool_rx_ntuple_flow_spec_container *fsc; > + int alloc_size; > + > + /* don't add filters forever */ > + if (list->count >= ETHTOOL_MAX_NTUPLE_LIST_ENTRY) > + return 0; > + > + list_for_each_entry(fsc, &list->list, list) { /* run to end */ } What does this do? fsc is allocated below, so it seems useless. > + > + alloc_size = sizeof(*fsc); > + if (alloc_size < L1_CACHE_BYTES) > + alloc_size = L1_CACHE_BYTES; Is that really necessary? I thought the filters would be offloaded to hardware, so I'd expect aligning them to the cache line size shouldn't make any difference. > + fsc = kmalloc(alloc_size, GFP_ATOMIC); > + if (!fsc) > + return -ENOMEM; > + > + /* Copy the whole filter over */ > + fsc->fs.flow_type = spec->flow_type; > + memcpy(&fsc->fs.h_u, &spec->h_u, sizeof(spec->h_u)); > + memcpy(&fsc->fs.m_u, &spec->m_u, sizeof(spec->m_u)); > + > + fsc->fs.vlan_tag = spec->vlan_tag; > + fsc->fs.vlan_tag_mask = spec->vlan_tag_mask; > + fsc->fs.data = spec->data; > + fsc->fs.data_mask = spec->data_mask; > + fsc->fs.action = spec->action; > + > + /* add to the list */ > + list_add_tail_rcu(&fsc->list, &list->list); > + list->count++; > + > + return 0; > +} > + > +static int ethtool_get_rx_ntuple(struct net_device *dev, void __user *useraddr) > +{ > + struct ethtool_gstrings gstrings; > + const struct ethtool_ops *ops = dev->ethtool_ops; > + struct ethtool_rx_ntuple_flow_spec_container *fsc; > + u8 *data; > + char *p; > + int ret, i, num_strings = 0; > + > + if (!ops->get_sset_count) > + return -EOPNOTSUPP; > + > + if (copy_from_user(&gstrings, useraddr, sizeof(gstrings))) > + return -EFAULT; > + > + ret = ops->get_sset_count(dev, gstrings.string_set); > + if (ret < 0) > + return ret; > + > + gstrings.len = ret; > + > + data = kmalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER); > + if (!data) > + return -ENOMEM; > + > + if (ops->get_rx_ntuple) { > + /* driver-specific filter grab */ > + ret = ops->get_rx_ntuple(dev, gstrings.string_set, data); > + goto copy; > + } > + > + /* default ethtool filter grab */ > + i = 0; > + p = (char *)data; > + list_for_each_entry(fsc, &dev->ethtool_ntuple_list.list, list) { > + sprintf(p, "Filter %d:\n", i); Providing a textual representation from within the kernel doesn't seem like a good interface to me. If userspace wants to do anything but simply display them, it will have to parse them again. Additionally it seems a driver providing a ->get_rx_ntuple() callback would have to duplicate the entire conversion code, which is error prone. > + p += ETH_GSTRING_LEN; > + num_strings++; > + > + switch (fsc->fs.flow_type) { > + case TCP_V4_FLOW: > + sprintf(p, "\tFlow Type: TCP\n"); > + p += ETH_GSTRING_LEN; > + num_strings++; > + break; > + case UDP_V4_FLOW: > + sprintf(p, "\tFlow Type: UDP\n"); > + p += ETH_GSTRING_LEN; > + num_strings++; > + break; > + case SCTP_V4_FLOW: > + sprintf(p, "\tFlow Type: SCTP\n"); > + p += ETH_GSTRING_LEN; > + num_strings++; > + break; > + case AH_ESP_V4_FLOW: > + sprintf(p, "\tFlow Type: AH ESP\n"); > + p += ETH_GSTRING_LEN; > + num_strings++; > + break; > + case ESP_V4_FLOW: > + sprintf(p, "\tFlow Type: ESP\n"); > + p += ETH_GSTRING_LEN; > + num_strings++; > + break; > + case IP_USER_FLOW: > + sprintf(p, "\tFlow Type: Raw IP\n"); > + p += ETH_GSTRING_LEN; > + num_strings++; > + break; > + case IPV4_FLOW: > + sprintf(p, "\tFlow Type: IPv4\n"); > + p += ETH_GSTRING_LEN; > + num_strings++; > + break; > + default: > + sprintf(p, "\tFlow Type: Unknown\n"); > + p += ETH_GSTRING_LEN; > + num_strings++; > + goto unknown_filter; > + }; > + > + /* now the rest of the filters */ > + switch (fsc->fs.flow_type) { > + case TCP_V4_FLOW: > + case UDP_V4_FLOW: > + case SCTP_V4_FLOW: > + sprintf(p, "\tSrc IP addr: 0x%x\n", > + fsc->fs.h_u.tcp_ip4_spec.ip4src); > + p += ETH_GSTRING_LEN; > + num_strings++; > + sprintf(p, "\tSrc IP mask: 0x%x\n", > + fsc->fs.m_u.tcp_ip4_spec.ip4src); > + p += ETH_GSTRING_LEN; > + num_strings++; > + sprintf(p, "\tDest IP addr: 0x%x\n", > + fsc->fs.h_u.tcp_ip4_spec.ip4dst); > + p += ETH_GSTRING_LEN; > + num_strings++; > + sprintf(p, "\tDest IP mask: 0x%x\n", > + fsc->fs.m_u.tcp_ip4_spec.ip4dst); > + p += ETH_GSTRING_LEN; > + num_strings++; > + sprintf(p, "\tSrc Port: %d, mask: 0x%x\n", > + fsc->fs.h_u.tcp_ip4_spec.psrc, > + fsc->fs.m_u.tcp_ip4_spec.psrc); > + p += ETH_GSTRING_LEN; > + num_strings++; > + sprintf(p, "\tDest Port: %d, mask: 0x%x\n", > + fsc->fs.h_u.tcp_ip4_spec.pdst, > + fsc->fs.m_u.tcp_ip4_spec.pdst); > + p += ETH_GSTRING_LEN; > + num_strings++; > + sprintf(p, "\tTOS: %d, mask: 0x%x\n", > + fsc->fs.h_u.tcp_ip4_spec.tos, > + fsc->fs.m_u.tcp_ip4_spec.tos); > + p += ETH_GSTRING_LEN; > + num_strings++; > + break; > + case AH_ESP_V4_FLOW: > + case ESP_V4_FLOW: > + sprintf(p, "\tSrc IP addr: 0x%x\n", > + fsc->fs.h_u.ah_ip4_spec.ip4src); > + p += ETH_GSTRING_LEN; > + num_strings++; > + sprintf(p, "\tSrc IP mask: 0x%x\n", > + fsc->fs.m_u.ah_ip4_spec.ip4src); > + p += ETH_GSTRING_LEN; > + num_strings++; > + sprintf(p, "\tDest IP addr: 0x%x\n", > + fsc->fs.h_u.ah_ip4_spec.ip4dst); > + p += ETH_GSTRING_LEN; > + num_strings++; > + sprintf(p, "\tDest IP mask: 0x%x\n", > + fsc->fs.m_u.ah_ip4_spec.ip4dst); > + p += ETH_GSTRING_LEN; > + num_strings++; > + sprintf(p, "\tSPI: %d, mask: 0x%x\n", > + fsc->fs.h_u.ah_ip4_spec.spi, > + fsc->fs.m_u.ah_ip4_spec.spi); > + p += ETH_GSTRING_LEN; > + num_strings++; > + sprintf(p, "\tTOS: %d, mask: 0x%x\n", > + fsc->fs.h_u.ah_ip4_spec.tos, > + fsc->fs.m_u.ah_ip4_spec.tos); > + p += ETH_GSTRING_LEN; > + num_strings++; > + break; > + case IP_USER_FLOW: > + sprintf(p, "\tSrc IP addr: 0x%x\n", > + fsc->fs.h_u.raw_ip4_spec.ip4src); > + p += ETH_GSTRING_LEN; > + num_strings++; > + sprintf(p, "\tSrc IP mask: 0x%x\n", > + fsc->fs.m_u.raw_ip4_spec.ip4src); > + p += ETH_GSTRING_LEN; > + num_strings++; > + sprintf(p, "\tDest IP addr: 0x%x\n", > + fsc->fs.h_u.raw_ip4_spec.ip4dst); > + p += ETH_GSTRING_LEN; > + num_strings++; > + sprintf(p, "\tDest IP mask: 0x%x\n", > + fsc->fs.m_u.raw_ip4_spec.ip4dst); > + p += ETH_GSTRING_LEN; > + num_strings++; > + break; > + case IPV4_FLOW: > + sprintf(p, "\tSrc IP addr: 0x%x\n", > + fsc->fs.h_u.usr_ip4_spec.ip4src); > + p += ETH_GSTRING_LEN; > + num_strings++; > + sprintf(p, "\tSrc IP mask: 0x%x\n", > + fsc->fs.m_u.usr_ip4_spec.ip4src); > + p += ETH_GSTRING_LEN; > + num_strings++; > + sprintf(p, "\tDest IP addr: 0x%x\n", > + fsc->fs.h_u.usr_ip4_spec.ip4dst); > + p += ETH_GSTRING_LEN; > + num_strings++; > + sprintf(p, "\tDest IP mask: 0x%x\n", > + fsc->fs.m_u.usr_ip4_spec.ip4dst); > + p += ETH_GSTRING_LEN; > + num_strings++; > + sprintf(p, "\tL4 bytes: 0x%x, mask: 0x%x\n", > + fsc->fs.h_u.usr_ip4_spec.l4_4_bytes, > + fsc->fs.m_u.usr_ip4_spec.l4_4_bytes); > + p += ETH_GSTRING_LEN; > + num_strings++; > + sprintf(p, "\tTOS: %d, mask: 0x%x\n", > + fsc->fs.h_u.usr_ip4_spec.tos, > + fsc->fs.m_u.usr_ip4_spec.tos); > + p += ETH_GSTRING_LEN; > + num_strings++; > + sprintf(p, "\tIP Version: %d, mask: 0x%x\n", > + fsc->fs.h_u.usr_ip4_spec.ip_ver, > + fsc->fs.m_u.usr_ip4_spec.ip_ver); > + p += ETH_GSTRING_LEN; > + num_strings++; > + sprintf(p, "\tProtocol: %d, mask: 0x%x\n", > + fsc->fs.h_u.usr_ip4_spec.proto, > + fsc->fs.m_u.usr_ip4_spec.proto); > + p += ETH_GSTRING_LEN; > + num_strings++; > + break; > + }; > + sprintf(p, "\tVLAN: %d, mask: 0x%x\n", > + fsc->fs.vlan_tag, fsc->fs.vlan_tag_mask); > + p += ETH_GSTRING_LEN; > + num_strings++; > + sprintf(p, "\tUser-defined: 0x%Lx\n", fsc->fs.data); > + p += ETH_GSTRING_LEN; > + num_strings++; > + sprintf(p, "\tUser-defined mask: 0x%Lx\n", fsc->fs.data_mask); > + p += ETH_GSTRING_LEN; > + num_strings++; > + if (fsc->fs.action == ETHTOOL_RXNTUPLE_ACTION_DROP) > + sprintf(p, "\tAction: Drop\n"); > + else > + sprintf(p, "\tAction: Direct to queue %d\n", > + fsc->fs.action); > + p += ETH_GSTRING_LEN; > + num_strings++; > +unknown_filter: > + i++; > + } > +copy: > + /* indicate to userspace how many strings we actually have */ > + gstrings.len = num_strings; > + ret = -EFAULT; > + if (copy_to_user(useraddr, &gstrings, sizeof(gstrings))) > + goto out; > + useraddr += sizeof(gstrings); > + if (copy_to_user(useraddr, data, gstrings.len * ETH_GSTRING_LEN)) > + goto out; > + ret = 0; > + > +out: > + kfree(data); > + return ret; > +} > + > static int ethtool_get_regs(struct net_device *dev, char __user *useraddr) > { > struct ethtool_regs regs; > @@ -313,6 +626,12 @@ static int ethtool_reset(struct net_device *dev, char __user *useraddr) > if (copy_from_user(&reset, useraddr, sizeof(reset))) > return -EFAULT; > > + /* Clear ethtool n-tuple list */ > + list_for_each_entry_safe(fsc, f, &dev->ethtool_ntuple_list.list, list) { > + list_del(&fsc->list); This should also free the filters if I'm not mistaken. > + } > + dev->ethtool_ntuple_list.count = 0; > + > ret = dev->ethtool_ops->reset(dev, &reset.data); > if (ret) > return ret; > @@ -351,9 +670,17 @@ static int ethtool_set_wol(struct net_device *dev, char __user *useraddr) > > static int ethtool_nway_reset(struct net_device *dev) > { > + struct ethtool_rx_ntuple_flow_spec_container *fsc, *f; > + > if (!dev->ethtool_ops->nway_reset) > return -EOPNOTSUPP; > > + /* Clear ethtool n-tuple list */ > + list_for_each_entry_safe(fsc, f, &dev->ethtool_ntuple_list.list, list) { > + list_del(&fsc->list); Same here. But why are they cleared here at all? It doesn't seem very intuitive that filters are lost when restarting auto-negotiation. > + } > + dev->ethtool_ntuple_list.count = 0; > + > return dev->ethtool_ops->nway_reset(dev); > }