Netdev List
 help / color / mirror / Atom feed
* [PATCH] moxa: fix the error handling in moxart_mac_probe()
From: Wei Yongjun @ 2013-10-08  3:26 UTC (permalink / raw)
  To: grant.likely, rob.herring, davem, jg1.han, b.zolnierkie,
	kyungmin.park
  Cc: yongjun_wei, netdev

From: Wei Yongjun <yongjun_wei@trendmicro.com.cn>

This patch fix the error handling in moxart_mac_probe():
 - return -ENOMEM in some memory alloc fail cases
 - add missing free_netdev() in the error handling case

Signed-off-by: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
---
 drivers/net/ethernet/moxa/moxart_ether.c | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/moxa/moxart_ether.c b/drivers/net/ethernet/moxa/moxart_ether.c
index bd1a2d2..ea54d95 100644
--- a/drivers/net/ethernet/moxa/moxart_ether.c
+++ b/drivers/net/ethernet/moxa/moxart_ether.c
@@ -448,7 +448,8 @@ static int moxart_mac_probe(struct platform_device *pdev)
 	irq = irq_of_parse_and_map(node, 0);
 	if (irq <= 0) {
 		netdev_err(ndev, "irq_of_parse_and_map failed\n");
-		return -EINVAL;
+		ret = -EINVAL;
+		goto irq_map_fail;
 	}
 
 	priv = netdev_priv(ndev);
@@ -472,24 +473,32 @@ static int moxart_mac_probe(struct platform_device *pdev)
 	priv->tx_desc_base = dma_alloc_coherent(NULL, TX_REG_DESC_SIZE *
 						TX_DESC_NUM, &priv->tx_base,
 						GFP_DMA | GFP_KERNEL);
-	if (priv->tx_desc_base == NULL)
+	if (priv->tx_desc_base == NULL) {
+		ret = -ENOMEM;
 		goto init_fail;
+	}
 
 	priv->rx_desc_base = dma_alloc_coherent(NULL, RX_REG_DESC_SIZE *
 						RX_DESC_NUM, &priv->rx_base,
 						GFP_DMA | GFP_KERNEL);
-	if (priv->rx_desc_base == NULL)
+	if (priv->rx_desc_base == NULL) {
+		ret = -ENOMEM;
 		goto init_fail;
+	}
 
 	priv->tx_buf_base = kmalloc(priv->tx_buf_size * TX_DESC_NUM,
 				    GFP_ATOMIC);
-	if (!priv->tx_buf_base)
+	if (!priv->tx_buf_base) {
+		ret = -ENOMEM;
 		goto init_fail;
+	}
 
 	priv->rx_buf_base = kmalloc(priv->rx_buf_size * RX_DESC_NUM,
 				    GFP_ATOMIC);
-	if (!priv->rx_buf_base)
+	if (!priv->rx_buf_base) {
+		ret = -ENOMEM;
 		goto init_fail;
+	}
 
 	platform_set_drvdata(pdev, ndev);
 
@@ -522,7 +531,8 @@ static int moxart_mac_probe(struct platform_device *pdev)
 init_fail:
 	netdev_err(ndev, "init failed\n");
 	moxart_mac_free_memory(ndev);
-
+irq_map_fail:
+	free_netdev(ndev);
 	return ret;
 }
 

^ permalink raw reply related

* [PATCH] qlcnic: add missing destroy_workqueue() on error path in qlcnic_probe()
From: Wei Yongjun @ 2013-10-08  3:32 UTC (permalink / raw)
  To: himanshu.madhani, rajesh.borundia, shahed.shaikh,
	jitendra.kalsaria, sony.chacko, sucheta.chakraborty
  Cc: yongjun_wei, linux-driver, netdev

From: Wei Yongjun <yongjun_wei@trendmicro.com.cn>

Add the missing destroy_workqueue() before return from
qlcnic_probe() in the error handling case.

Signed-off-by: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
---
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
index 21d00a0..f07f2b0 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
@@ -2257,7 +2257,7 @@ qlcnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	err = qlcnic_alloc_adapter_resources(adapter);
 	if (err)
-		goto err_out_free_netdev;
+		goto err_out_free_wq;
 
 	adapter->dev_rst_time = jiffies;
 	adapter->ahw->revision_id = pdev->revision;
@@ -2396,6 +2396,9 @@ err_out_disable_msi:
 err_out_free_hw:
 	qlcnic_free_adapter_resources(adapter);
 
+err_out_free_wq:
+	destroy_workqueue(adapter->qlcnic_wq);
+
 err_out_free_netdev:
 	free_netdev(netdev);
 

^ permalink raw reply related

* [PATCH net-next] fib_trie: only calc for the un-first node
From: baker.kernel @ 2013-10-08  3:36 UTC (permalink / raw)
  To: davem, kuznet, jmorris, yoshfuji, kaber, netdev, linux-kernel; +Cc: baker.zhang

From: "baker.zhang" <baker.kernel@gmail.com>

This is a enhancement.

for the first node in fib_trie, newpos is 0, bit is 1.
Only for the leaf or node with unmatched key need calc pos.

Signed-off-by: baker.zhang <baker.kernel@gmail.com>
---
 net/ipv4/fib_trie.c |    6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 45c74ba..ec9a9ef 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1117,12 +1117,8 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
 		 *  first tnode need some special handling
 		 */
 
-		if (tp)
-			pos = tp->pos+tp->bits;
-		else
-			pos = 0;
-
 		if (n) {
+			pos = tp ? tp->pos+tp->bits : 0;
 			newpos = tkey_mismatch(key, pos, n->key);
 			tn = tnode_new(n->key, newpos, 1);
 		} else {
-- 
1.7.9.5

^ permalink raw reply related

* RE: [PATCH] moxa: fix the error handling in moxart_mac_probe()
From: yongjun_wei @ 2013-10-08  3:29 UTC (permalink / raw)
  To: Wei Yongjun, grant.likely@linaro.org, rob.herring@calxeda.com,
	davem@davemloft.net, jg1.han@samsung.com,
	b.zolnierkie@samsung.com, kyungmin.park@samsung.com
  Cc: netdev@vger.kernel.org
In-Reply-To: <CAPgLHd95Rv=sCCWFsuLwtGz5GL5+7N-y6zQ195uOXF3ebgcj3A@mail.gmail.com>

Sorry, this mail is dup by mail server, please ignore it, thanks.

-----Original Message-----
From: Wei Yongjun [mailto:weiyj.lk@gmail.com] 
Sent: 2013年10月8日 11:27
To: grant.likely@linaro.org; rob.herring@calxeda.com; davem@davemloft.net; jg1.han@samsung.com; b.zolnierkie@samsung.com; kyungmin.park@samsung.com
Cc: Yongjun Wei (RD-CN); netdev@vger.kernel.org
Subject: [PATCH] moxa: fix the error handling in moxart_mac_probe()

From: Wei Yongjun <yongjun_wei@trendmicro.com.cn>

This patch fix the error handling in moxart_mac_probe():
 - return -ENOMEM in some memory alloc fail cases
 - add missing free_netdev() in the error handling case

Signed-off-by: Wei Yongjun <yongjun_wei@trendmicro.com.cn>


<table class="TM_EMAIL_NOTICE"><tr><td><pre>
TREND MICRO EMAIL NOTICE
The information contained in this email and any attachments is confidential 
and may be subject to copyright or other intellectual property protection. 
If you are not the intended recipient, you are not authorized to use or 
disclose this information, and we request that you notify us by reply mail or
telephone and delete the original message from your mail system.
</pre></td></tr></table>

^ permalink raw reply

* Re: [PATCH RFC 00/77] Re-design MSI/MSI-X interrupts enablement pattern
From: Michael Ellerman @ 2013-10-08  4:33 UTC (permalink / raw)
  To: Alexander Gordeev
  Cc: linux-mips, VMware, Inc., linux-nvme, linux-ide, linux-s390,
	Andy King, linux-scsi, linux-rdma, x86, Ingo Molnar, linux-pci,
	iss_storagedev, linux-driver, Tejun Heo, Bjorn Helgaas,
	Dan Williams, Jon Mason, Solarflare linux maintainers, netdev,
	linux-kernel, Ralf Baechle, e1000-devel, Martin Schwidefsky,
	linux390, linuxppc-dev
In-Reply-To: <cover.1380703262.git.agordeev@redhat.com>

On Wed, Oct 02, 2013 at 12:29:04PM +0200, Alexander Gordeev wrote:
> This series is against "next" branch in Bjorn's repo:
> git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci.git
> 
> Currently pci_enable_msi_block() and pci_enable_msix() interfaces
> return a error code in case of failure, 0 in case of success and a
> positive value which indicates the number of MSI-X/MSI interrupts
> that could have been allocated. The latter value should be passed
> to a repeated call to the interfaces until a failure or success:
> 
> 
> 	for (i = 0; i < FOO_DRIVER_MAXIMUM_NVEC; i++)
> 		adapter->msix_entries[i].entry = i;
> 
> 	while (nvec >= FOO_DRIVER_MINIMUM_NVEC) {
> 		rc = pci_enable_msix(adapter->pdev,
> 				     adapter->msix_entries, nvec);
> 		if (rc > 0)
> 			nvec = rc;
> 		else
> 			return rc;
> 	}
> 
> 	return -ENOSPC;
> 
> 
> This technique proved to be confusing and error-prone. Vast share
> of device drivers simply fail to follow the described guidelines.

To clarify "Vast share of device drivers":

 - 58 drivers call pci_enable_msix()
 - 24 try a single allocation and then fallback to MSI/LSI
 - 19 use the loop style allocation as above
 - 14 try an allocation, and if it fails retry once
 - 1  incorrectly continues when pci_enable_msix() returns > 0

So 33 drivers (> 50%) successfully make use of the "confusing and
error-prone" return value.

Another 24 happily ignore it, which is also entirely fine.

And yes, one is buggy, so obviously the interface is too complex. Thanks
drivers/ntb/ntb_hw.c

cheers

^ permalink raw reply

* Re: [PATCH] net ipv4: Allow unprivileged users to use most of the per net systctls
From: David Miller @ 2013-10-08  4:45 UTC (permalink / raw)
  To: ebiederm; +Cc: netdev
In-Reply-To: <878uy4skek.fsf@xmission.com>

From: ebiederm@xmission.com (Eric W. Biederman)
Date: Mon, 07 Oct 2013 16:58:43 -0700

> 
> Allow unprivileged users to use:
> /proc/sys/net/ipv4/icmp_echo_ignore_all
> /proc/sys/net/ipv4/icmp_echo_ignore_broadcasts
> /proc/sys/net/ipv4/icmp_ignore_bogus_error_response
> /proc/sys/net/ipv4/icmp_errors_use_inbound_ifaddr
> /proc/sys/net/ipv4/icmp_ratelimit
> /proc/sys/net/ipv4/icmp_ratemask
> /proc/sys/net/ipv4/ping_group_range
> /proc/sys/net/ipv4/tcp_ecn
> /proc/sys/net/ipv4/ip_local_ports_range
 ...
> -			table[0].procname = NULL;
> +			table[9].procname = NULL;

Regardless of what I think semantically of this change, you really
have to find some way to avoid this magic constant.

Thanks.

^ permalink raw reply

* Re: [PATCH] eisa: standardize on eisa_register_driver like similar bus registrations
From: James Bottomley @ 2013-10-08  4:53 UTC (permalink / raw)
  To: David Miller; +Cc: tedheadster, netdev, linux-scsi
In-Reply-To: <20131007.160225.1679994399484815968.davem@davemloft.net>

On Mon, 2013-10-07 at 16:02 -0400, David Miller wrote:
> From: Matthew Whitehead <tedheadster@gmail.com>
> Date: Sat,  5 Oct 2013 22:35:58 -0400
> 
> > The other buses (isa, pci, pnp, parport, usb, tty, etc) all use the convention
> > of ${BUSNAME}_register_driver. Rewrite the little remaining code that uses EISA
> > to follow this convention for easier readability.
> > 
> > This affects the EISA bus, SCSI, and networking subsystems so only one should
> > ultimately merge the patch if it is accepted.
> > 
> > Signed-off-by: Matthew Whitehead <tedheadster@gmail.com>
> 
> I'm fine with someone else taking this, for networking parts:

I don't really see much value in the rename, especially as eisa is
probably not long for this world.  However, its a trivial rename, so
bundle it all up into a single patch and send it to Jiří Kosina
<trivial@vger.kernel.org> he'll take care of it.

James



--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* XGMII patch
From: Rayagond K @ 2013-10-08  5:18 UTC (permalink / raw)
  To: netdev

Hi All,

I came across some old patch series to support XGMII in PHYLIB -
http://lwn.net/Articles/463344/

These patch are not available in mainline kernel still and also PHYLIB
doesn't support XGMII also.

Any plan for applying these patch to mainline kernel.


Thanks
Rayagond

^ permalink raw reply

* Re: [raw v1 2/4] [NET] Use raw_cpu ops for SNMP stats
From: Ingo Molnar @ 2013-10-08  7:21 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: Tejun Heo, akpm, Eric Dumazet, netdev, Steven Rostedt,
	linux-kernel, Peter Zijlstra, Thomas Gleixner
In-Reply-To: <0000014194309205-d0af4d2d-a046-4189-b4f0-ffea37374988-000000@email.amazonses.com>


* Christoph Lameter <cl@linux.com> wrote:

> SNMP stats are not protected by preemption but by bh handling.

Most forms of bh exclusion work via the preemption count though, and 
softirq contexts themselves are generally not preemptible [to other CPUs] 
either.

So the warnings should, in most cases, not trigger.

> Since protection is provided outside of preemption raw_cpu_ops
> need to be used to avoid false positives.

Could you quote the warning that pops up? It might be better to annotate 
the specific safe usages than to potentially hide bugs:

> --- linux.orig/include/net/snmp.h	2013-10-07 09:16:07.595206864 -0500
> +++ linux/include/net/snmp.h	2013-10-07 09:16:07.591206909 -0500
> @@ -126,7 +126,7 @@ struct linux_xfrm_mib {
>  	extern __typeof__(type) __percpu *name[SNMP_ARRAY_SZ]
>  
>  #define SNMP_INC_STATS_BH(mib, field)	\
> -			__this_cpu_inc(mib[0]->mibs[field])
> +			raw_cpu_inc(mib[0]->mibs[field])
>  
>  #define SNMP_INC_STATS_USER(mib, field)	\
>  			this_cpu_inc(mib[0]->mibs[field])
> @@ -141,7 +141,7 @@ struct linux_xfrm_mib {
>  			this_cpu_dec(mib[0]->mibs[field])
>  
>  #define SNMP_ADD_STATS_BH(mib, field, addend)	\
> -			__this_cpu_add(mib[0]->mibs[field], addend)
> +			raw_cpu_add(mib[0]->mibs[field], addend)
>  
>  #define SNMP_ADD_STATS_USER(mib, field, addend)	\
>  			this_cpu_add(mib[0]->mibs[field], addend)

Doing this will hide any true bugs if any of these primitives is used in 
an unsafe context.

Thanks,

	Ingo

^ permalink raw reply

* Re: [PATCH RFC 00/77] Re-design MSI/MSI-X interrupts enablement pattern
From: Alexander Gordeev @ 2013-10-08  7:33 UTC (permalink / raw)
  To: Michael Ellerman
  Cc: linux-kernel, Bjorn Helgaas, Ralf Baechle, Benjamin Herrenschmidt,
	Martin Schwidefsky, Ingo Molnar, Tejun Heo, Dan Williams,
	Andy King, Jon Mason, Matt Porter, linux-pci, linux-mips,
	linuxppc-dev, linux390, linux-s390, x86, linux-ide,
	iss_storagedev, linux-nvme, linux-rdma, netdev, e1000-devel,
	linux-driver, Solarflare 
In-Reply-To: <20131008043330.GF31666@concordia>

On Tue, Oct 08, 2013 at 03:33:30PM +1100, Michael Ellerman wrote:
> On Wed, Oct 02, 2013 at 12:29:04PM +0200, Alexander Gordeev wrote:
> > This technique proved to be confusing and error-prone. Vast share
> > of device drivers simply fail to follow the described guidelines.
> 
> To clarify "Vast share of device drivers":
> 
>  - 58 drivers call pci_enable_msix()
>  - 24 try a single allocation and then fallback to MSI/LSI
>  - 19 use the loop style allocation as above
>  - 14 try an allocation, and if it fails retry once
>  - 1  incorrectly continues when pci_enable_msix() returns > 0
> 
> So 33 drivers (> 50%) successfully make use of the "confusing and
> error-prone" return value.

Ok, you caught me - 'vast share' is incorrect and is a subject to
rewording. But out of 19/58 how many drivers tested fallbacks on the
real hardware? IOW, which drivers are affected by the pSeries quota?

> And yes, one is buggy, so obviously the interface is too complex. Thanks
> drivers/ntb/ntb_hw.c

vmxnet3 would be the best example.

> cheers

-- 
Regards,
Alexander Gordeev
agordeev@redhat.com

^ permalink raw reply

* Re: [PATCH RFC 07/77] PCI/MSI: Re-design MSI/MSI-X interrupts enablement pattern
From: Alexander Gordeev @ 2013-10-08  7:48 UTC (permalink / raw)
  To: Tejun Heo
  Cc: linux-kernel, Bjorn Helgaas, Ralf Baechle, Michael Ellerman,
	Benjamin Herrenschmidt, Martin Schwidefsky, Ingo Molnar,
	Dan Williams, Andy King, Jon Mason, Matt Porter, linux-pci,
	linux-doc, linux-mips, linuxppc-dev, linux390, linux-s390, x86,
	linux-ide, iss_storagedev, linux-nvme, linux-rdma, netdev,
	e1000-devel, linux-dr
In-Reply-To: <20131007181749.GB27396@htj.dyndns.org>

On Mon, Oct 07, 2013 at 02:17:49PM -0400, Tejun Heo wrote:
> Hello,
> 
> On Wed, Oct 02, 2013 at 12:48:23PM +0200, Alexander Gordeev wrote:
> > +static int foo_driver_enable_msi(struct foo_adapter *adapter, int nvec)
> > +{
> > +	rc = pci_get_msi_cap(adapter->pdev);
> > +	if (rc < 0)
> > +		return rc;
> > +
> > +	nvec = min(nvec, rc);
> > +	if (nvec < FOO_DRIVER_MINIMUM_NVEC) {
> > +		return -ENOSPC;
> > +
> > +	rc = pci_enable_msi_block(adapter->pdev, nvec);
> > +	return rc;
> > +}
> 
> If there are many which duplicate the above pattern, it'd probably be
> worthwhile to provide a helper?  It's usually a good idea to reduce
> the amount of boilerplate code in drivers.

I wanted to limit discussion in v1 to as little changes as possible.
I 'planned' those helper(s) for a separate effort if/when the most
important change is accepted and soaked a bit.

> > @@ -975,7 +951,7 @@ int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec)
> >  	if (nr_entries < 0)
> >  		return nr_entries;
> >  	if (nvec > nr_entries)
> > -		return nr_entries;
> > +		return -EINVAL;
> >  
> >  	/* Check for any invalid entries */
> >  	for (i = 0; i < nvec; i++) {
> 
> If we do things this way, it breaks all drivers using this interface
> until they're converted, right?

Right. And the rest of the series does it.

> Also, it probably isn't the best idea
> to flip the behavior like this as this can go completely unnoticed (no
> compiler warning or anything, the same function just behaves
> differently).  Maybe it'd be a better idea to introduce a simpler
> interface that most can be converted to?

Well, an *other* interface is a good idea. What do you mean with the
simpler here?

> tejun

-- 
Regards,
Alexander Gordeev
agordeev@redhat.com

^ permalink raw reply

* Re: [PATCH RFC 05/77] PCI/MSI: Convert pci_msix_table_size() to a public interface
From: Alexander Gordeev @ 2013-10-08  7:56 UTC (permalink / raw)
  To: Tejun Heo
  Cc: linux-kernel, Bjorn Helgaas, Ralf Baechle, Michael Ellerman,
	Benjamin Herrenschmidt, Martin Schwidefsky, Ingo Molnar,
	Dan Williams, Andy King, Jon Mason, Matt Porter, linux-pci,
	linux-mips, linuxppc-dev, linux390, linux-s390, x86, linux-ide,
	iss_storagedev, linux-nvme, linux-rdma, netdev, e1000-devel,
	linux-driver, So
In-Reply-To: <20131007181043.GA27396@htj.dyndns.org>

On Mon, Oct 07, 2013 at 02:10:43PM -0400, Tejun Heo wrote:
> On Wed, Oct 02, 2013 at 12:48:21PM +0200, Alexander Gordeev wrote:
> > Make pci_msix_table_size() to return a error code if the device
> > does not support MSI-X. This update is needed to facilitate a
> > forthcoming re-design MSI/MSI-X interrupts enabling pattern.
> > 
> > Device drivers will use this interface to obtain maximum number
> > of MSI-X interrupts the device supports and use that value in
> > the following call to pci_enable_msix() interface.
> 
> Hmmm... I probably missed something but why is this necessary?  To
> discern between -EINVAL and -ENOSPC?  If so, does that really matter?

pci_msix_table_size() is kind of helper and returns 0 if a device does
not have MSI-X table. If we want drivers to use it we need return -EINVAL
for MSI-X incapable/disabled devices. Nothing about -ENOSPC.

> tejun

-- 
Regards,
Alexander Gordeev
agordeev@redhat.com

^ permalink raw reply

* Re: [PATCH nf-next] netfilter: xtables: lightweight process control group matching
From: Daniel Borkmann @ 2013-10-08  8:05 UTC (permalink / raw)
  To: Tejun Heo; +Cc: pablo, netfilter-devel, netdev, cgroups
In-Reply-To: <20131007164647.GA2481@htj.dyndns.org>

Hi Tejun,

On 10/07/2013 06:46 PM, Tejun Heo wrote:
> On Fri, Oct 04, 2013 at 08:20:55PM +0200, Daniel Borkmann wrote:
>> It would be useful e.g. in a server or desktop environment to have
>> a facility in the notion of fine-grained "per application" or "per
>> application group" firewall policies. Probably, users in the mobile/
>> embedded area (e.g. Android based) with different security policy
>> requirements for application groups could have great benefit from
>> that as well. For example, with a little bit of configuration effort,
>> an admin could whitelist well-known applications, and thus block
>> otherwise unwanted "hard-to-track" applications like [1] from a
>> user's machine.
>>
>> Implementation of PID-based matching would not be appropriate
>> as they frequently change, and child tracking would make that
>> even more complex and ugly. Cgroups would be a perfect candidate
>> for accomplishing that as they associate a set of tasks with a
>> set of parameters for one or more subsystems, in our case the
>> netfilter subsystem, which, of course, can be combined with other
>> cgroup subsystems into something more complex.
>>
>> As mentioned, to overcome this constraint, such processes could
>> be placed into one or multiple cgroups where different fine-grained
>> rules can be defined depending on the application scenario, while
>> e.g. everything else that is not part of that could be dropped (or
>> vice versa), thus making life harder for unwanted processes to
>> communicate to the outside world. So, we make use of cgroups here
>> to track jobs and limit their resources in terms of iptables
>> policies; in other words, limiting what they are allowed to
>> communicate.
>>
>> We have similar cgroup facilities in networking for traffic
>> classifier, and netprio cgroups. This feature adds a lightweight
>> cgroup id matching in terms of network security resp. network
>> traffic isolation as part of netfilter's xtables subsystem.
>
> I don't think the two net cgroups were a good idea and definitely
> don't want to continue the trend.  I think this is being done
> backwards.  Wouldn't it be more logical to implement netfilter rule to
> match the target cgroup paths?  It really doesn't make much sense to
> me to add separate controllers to just tag processes.  Please classify
> tasks in cgroup and let netfilter match the cgroups.

Thanks for your feedback!

Could you elaborate on "Wouldn't it be more logical to implement netfilter
rule to match the target cgroup paths?". I don't think (or hope) you mean
some string comparison on the dentry path here? :) With our proposal, we
have in the network stack's critical path only the following code that is
being executed here to match the cgroup ...

static bool
cgroup_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
	const struct xt_cgroup_info *info = par->matchinfo;

	if (skb->sk == NULL)
		return false;

	return (info->id == skb->sk->sk_cgrp_fwid) ^ info->invert;
}

... where ``info->id == skb->sk->sk_cgrp_fwid'' is the actual work, so very
lightweight, which is good for high loads (1Gbit/s, 10Gbit/s and beyond), of
course. Also, it would be intuitive for admins familiar with other subsystems
to just set up and use these cgroup ids in iptabels. I'm not yet quite sure
how your suggestion would look like, so you would need to setup some "dummy"
subgroups first just to have a path that you can match on?

Thanks,

Daniel

^ permalink raw reply

* Re: Neterion and UFO handling [was: Re: [PATCH] ipv6: udp packets following an UFO enqueued packet need also be handled by UFO]
From: Jon Mason @ 2013-10-08  8:07 UTC (permalink / raw)
  To: Jon Mason, Jiri Pirko, netdev, yoshfuji, David Miller, kuznet,
	jmorris, kaber, herbert, Eric Dumazet
In-Reply-To: <20131002162730.GA3991@order.stressinduktion.org>

On Wed, Oct 2, 2013 at 9:27 AM, Hannes Frederic Sowa
<hannes@stressinduktion.org> wrote:
> Hi!
>
> I have a question regarding UFO and the neterion driver, which as the only one
> advertises hardware UFO support:
>
> The patch discusses in this thread
> http://thread.gmane.org/gmane.linux.network/284348/focus=285405 could change
> some semantics how packets are constructed before submitted to the driver.
>
> We currently guarantee that we have the MAC/IP/UDP header in skb->data and the
> payload is attached in the skb's frags. With the changes discussed in this
> thread it is possible that we also append to skb->data some amount of data
> which is not targeted for the header. From reading the driver sources it seems
> the hardware interprets the skb->data to skb_headlen as the header, so we
> could include some data in the fragments more than once.

>From my reading of the HW Spec and a quick look at the driver, it
appears that the driver is using one entry in the TX ring for the
header and another for the body of the packet to be fragmented (which
is what the hardware wants).  I don't understand what you are saying,
but if you are asking if simply appending a new header & data to the
end of skb->data will get it out on the wire correct, I don't believe
it will.

I do have hardware that I can try the patch on, if you can walk me
through the use case (unless it is as easy as setup an IPv6 connection
and ping).

Time for sleep now....

Thanks,
Jon

>
> Do you think this change is safe? Otherwise I would suggest that the UFO
> capability is switched off until the driver signals the hardware the start and
> end of the headers correctly?
>
> I left the mail below intact which points to the specific place in s2io.c
> where I think the problem is.
>
> On Wed, Oct 02, 2013 at 08:14:27AM -0700, Eric Dumazet wrote:
>> On Wed, 2013-10-02 at 15:03 +0200, Hannes Frederic Sowa wrote:
>> > On Wed, Oct 02, 2013 at 02:12:07PM +0200, Hannes Frederic Sowa wrote:
>> > > Hi Eric!
>> > >
>> > > On Wed, Oct 02, 2013 at 03:41:28AM -0700, Eric Dumazet wrote:
>> > > > On Wed, 2013-10-02 at 10:58 +0200, Jiri Pirko wrote:
>> > > > > Wed, Oct 02, 2013 at 01:25:34AM CEST, hannes@stressinduktion.org wrote:
>> > > > > >-    if (((length > mtu) || (skb && skb_is_gso(skb))) &&
>> > > > > >+    if (((length > mtu) || (skb && skb_has_frags(skb))) &&
>> > > >
>> > > > >
>> > > > > This seems correct to me. sk_is_gso would work as well is you apply my
>> > > > > patch "[patch net] ip6_output: do skb ufo init for peeked non ufo skb as
>> > > > > well" which does the setting of gso_size.
>> > > >
>> > > > Well, skb having frags or not should not be a concern :
>> > > > Thats an allocation choice (lets say to avoid high order allocations).
>> > > >
>> > > > Setting gso_size is probably better.
>> > >
>> > > e89e9cf539a28df7d0eb1d0a545368e9920b34ac ("[IPv4/IPv6]: UFO Scatter-gather
>> > > approach") states:
>> > >
>> > > "
>> > > skb->data will contain MAC/IP/UDP header and skb_shinfo(skb)->frags[]
>> > > contains the data payload. The skb->ip_summed will be set to CHECKSUM_HW
>> > > indicating that hardware has to do checksum calculation. Hardware should
>> > > compute the UDP checksum of complete datagram and also ip header checksum of
>> > > each fragmented IP packet.
>> > > "
>> > >
>> > > This is the reason why I tried not to update the gso_size. If it is ok, I am
>> > > fine with that.
>> >
>> > Especially, drivers/net/ethernet/neterion/s2io.c states that the first dma
>> > mapping (skb->data with skb_headlen, which is fine) is used as the inband
>> > header:
>> >
>> >         if (offload_type == SKB_GSO_UDP)
>> >                 frg_cnt++; /* as Txd0 was used for inband header */
>> >
>> > That is my only other hint that we maybe should not update gso_size and
>> > gso_type. I guess software fallback does not have this problem, but I won't
>> > have time to check until this evening.
>> >
>> > I am really not sure if just setting gso_size does not break neterion UFO
>> > offloading. :/
>>
>> Well, just ask Jon Mason to double check ;)
>>
>> I think the commit intent was to set gso_size :
>>
>>    skb_shinfo(skb)->ufo_size will indicate the length of data part in each IP
>>     fragment going out of the adapter after IP fragmentation by hardware.
>>
>> The fact that it states "skb->data will contain MAC/IP/UDP header and
>> skb_shinfo(skb)->frags[] contains the data payload." seems irrelevant.
>>
>> If Neterion driver mandates that skb->head *only* contains the
>> MAC/IP/UDP header, that should be handled in the driver itself.
>
> Thanks Eric for clearing this up.
>
> I really thought it would be the common pattern for UFO to have only headers
> in skb->data, so I didn't bother to ask in the first place.
>
> Thanks,
>
>   Hannes
>

^ permalink raw reply

* RE: [PATCH] mac80211: Use ERR_CAST inlined function instead of ERR_PTR(PTR_ERR(...))
From: David Laight @ 2013-10-08  8:33 UTC (permalink / raw)
  To: djduanjiong, Johannes Berg, John W. Linville, David S. Miller
  Cc: linux-wireless, netdev, Duan Jiong
In-Reply-To: <1381190953-6362-1-git-send-email-duanj.fnst@cn.fujitsu.com>

> Signed-off-by: Duan Jiong <duanj.fnst@cn.fujitsu.com>
> ---
>  net/mac80211/key.c |    2 +-
>  1 files changed, 1 insertions(+), 1 deletions(-)
> 
> diff --git a/net/mac80211/key.c b/net/mac80211/key.c
> index 620677e..3e51dd7 100644
> --- a/net/mac80211/key.c
> +++ b/net/mac80211/key.c
> @@ -879,7 +879,7 @@ ieee80211_gtk_rekey_add(struct ieee80211_vif *vif,
>  				  keyconf->keylen, keyconf->key,
>  				  0, NULL);
>  	if (IS_ERR(key))
> -		return ERR_PTR(PTR_ERR(key));
> +		return ERR_CAST(key);

I suspect the old code is a deliberate attempt to indicate that it
the error value that is being returned rather than the pointer.

Looking higher up that file there seems to be a fubar when setting
up the TKIP key.
It seems to put the same 6 bytes into every TID.
(I'm sure seq_len shouldn't be ignored either.0

	David

^ permalink raw reply

* Re: [PATCH] mac80211: Use ERR_CAST inlined function instead of ERR_PTR(PTR_ERR(...))
From: Johannes Berg @ 2013-10-08  8:37 UTC (permalink / raw)
  To: David Laight
  Cc: djduanjiong-Re5JQEeQqe8AvxtiuMwx3w, John W. Linville,
	David S. Miller, linux-wireless-u79uwXL29TY76Z2rM5mHXA,
	netdev-u79uwXL29TY76Z2rM5mHXA, Duan Jiong
In-Reply-To: <AE90C24D6B3A694183C094C60CF0A2F6026B7379-CgBM+Bx2aUAnGFn1LkZF6NBPR1lH4CV8@public.gmane.org>

On Tue, 2013-10-08 at 09:33 +0100, David Laight wrote:

> >  	if (IS_ERR(key))
> > -		return ERR_PTR(PTR_ERR(key));
> > +		return ERR_CAST(key);
> 
> I suspect the old code is a deliberate attempt to indicate that it
> the error value that is being returned rather than the pointer.

I was under the impression that's what ERR_CAST means.

> Looking higher up that file there seems to be a fubar when setting
> up the TKIP key.
> It seems to put the same 6 bytes into every TID.
> (I'm sure seq_len shouldn't be ignored either.0

Both are fine.

johannes

--
To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* RE: bug in passing file descriptors
From: David Laight @ 2013-10-08  8:43 UTC (permalink / raw)
  To: Steve Rago, David Miller; +Cc: luto, netdev, mtk.manpages, ebiederm
In-Reply-To: <5253199B.3000109@nec-labs.com>

> > There is no compatability issue.
> >
> > 32-bit tasks will always see the 4-byte align/length.
> > 64-bit tasks will always see the 8-byte align/length.
> >
> 
> Really?  So when I compile my application on a 32-bit Linux box and then try to run it on a 64-bit
> Linux box, you're not
> going to overrun my buffer when CMSG_SPACE led me to allocate an insufficient amount of memory needed
> to account for
> padding on the 64-bit platform?

Worth checking what happens if you try to send an fd between a 32 bit
application and a 64bit one.
Also is the fd actually closed if the receiving applications buffer
isn't big enough.

	David

^ permalink raw reply

* Re: [PATCH RFC 00/77] Re-design MSI/MSI-X interrupts enablement pattern
From: Alexander Gordeev @ 2013-10-08  9:07 UTC (permalink / raw)
  To: Tejun Heo
  Cc: linux-kernel, Bjorn Helgaas, Ralf Baechle, Michael Ellerman,
	Benjamin Herrenschmidt, Martin Schwidefsky, Ingo Molnar,
	Dan Williams, Andy King, Jon Mason, Matt Porter, linux-pci,
	linux-mips, linuxppc-dev, linux390, linux-s390, x86, linux-ide,
	iss_storagedev, linux-nvme, linux-rdma, netdev, e1000-devel,
	linux-driver, So
In-Reply-To: <20131007182117.GC27396@htj.dyndns.org>

On Mon, Oct 07, 2013 at 02:21:17PM -0400, Tejun Heo wrote:
> Whee.... that's a lot more than I expected.  I was just scanning
> multiple msi users.  Maybe we can stage the work in more manageable
> steps so that you don't have to go through massive conversion only to
> do it all over again afterwards and likewise people don't get
> bombarded on each iteration?  Maybe we can first update pci / msi code
> proper, msi and then msix?

Multipe MSIs is just a handful of drivers, really. MSI-X impact still
will be huge. But if we opt a different name for the new pci_enable_msix()
then we could first update pci/msi, then drivers (in few stages possibly)
and finally remove the old implementation.

> tejun

-- 
Regards,
Alexander Gordeev
agordeev@redhat.com

^ permalink raw reply

* I NEED YOUR TRUST,
From: Mr_Badmons @ 2013-10-08  9:27 UTC (permalink / raw)


I NEED YOUR TRUST,

I am Mr,Badmons Dukaz,The Head of file Department in Bank Of Africa.I seek your assistance and i assured of your capability to champion this business opportunity to remit $18.2 million U.S.A dollars. Into your account if you are interested let me know so that i can send you the details of this transaction.
I agree that 30% of this money will be for you 10% will be set aside for expenses incurred during the business and 60% would be for me.
Regards,
Badmons Dukaz. 

^ permalink raw reply

* [PATCH net-next] Revert "xen-netback: improve ring effeciency for guest RX"
From: Wei Liu @ 2013-10-08  9:54 UTC (permalink / raw)
  To: netdev
  Cc: xen-devel, Wei Liu, Ian Campbell, Annie Li, Matt Wilson, Xi Xiong,
	David Vrabel, Paul Durrant

This reverts commit 4f0581d25827d5e864bcf07b05d73d0d12a20a5c.

The named changeset is causing problem. Let's aim to make this part less
fragile before trying to improve things.

Signed-off-by: Wei Liu <wei.liu2@citrix.com>
Cc: Ian Campbell <ian.campbell@citrix.com>
Cc: Annie Li <annie.li@oracle.com>
Cc: Matt Wilson <msw@amazon.com>
Cc: Xi Xiong <xixiong@amazon.com>
Cc: David Vrabel <david.vrabel@citrix.com>
Cc: Paul Durrant <paul.durrant@citrix.com>
---
 drivers/net/xen-netback/netback.c |  144 +++++++++++++++++++++----------------
 1 file changed, 83 insertions(+), 61 deletions(-)

diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index d0b0feb..f3e591c 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -47,14 +47,6 @@
 #include <asm/xen/hypercall.h>
 #include <asm/xen/page.h>
 
-/* SKB control block overlay is used to store useful information when
- * doing guest RX.
- */
-struct skb_cb_overlay {
-	int meta_slots_used;
-	int peek_slots_count;
-};
-
 /* Provide an option to disable split event channels at load time as
  * event channels are limited resource. Split event channels are
  * enabled by default.
@@ -220,6 +212,49 @@ static bool start_new_rx_buffer(int offset, unsigned long size, int head)
 	return false;
 }
 
+struct xenvif_count_slot_state {
+	unsigned long copy_off;
+	bool head;
+};
+
+unsigned int xenvif_count_frag_slots(struct xenvif *vif,
+				     unsigned long offset, unsigned long size,
+				     struct xenvif_count_slot_state *state)
+{
+	unsigned count = 0;
+
+	offset &= ~PAGE_MASK;
+
+	while (size > 0) {
+		unsigned long bytes;
+
+		bytes = PAGE_SIZE - offset;
+
+		if (bytes > size)
+			bytes = size;
+
+		if (start_new_rx_buffer(state->copy_off, bytes, state->head)) {
+			count++;
+			state->copy_off = 0;
+		}
+
+		if (state->copy_off + bytes > MAX_BUFFER_OFFSET)
+			bytes = MAX_BUFFER_OFFSET - state->copy_off;
+
+		state->copy_off += bytes;
+
+		offset += bytes;
+		size -= bytes;
+
+		if (offset == PAGE_SIZE)
+			offset = 0;
+
+		state->head = false;
+	}
+
+	return count;
+}
+
 /*
  * Figure out how many ring slots we're going to need to send @skb to
  * the guest. This function is essentially a dry run of
@@ -227,53 +262,40 @@ static bool start_new_rx_buffer(int offset, unsigned long size, int head)
  */
 unsigned int xenvif_count_skb_slots(struct xenvif *vif, struct sk_buff *skb)
 {
+	struct xenvif_count_slot_state state;
 	unsigned int count;
-	int i, copy_off;
-	struct skb_cb_overlay *sco;
+	unsigned char *data;
+	unsigned i;
 
-	count = DIV_ROUND_UP(skb_headlen(skb), PAGE_SIZE);
+	state.head = true;
+	state.copy_off = 0;
 
-	copy_off = skb_headlen(skb) % PAGE_SIZE;
+	/* Slot for the first (partial) page of data. */
+	count = 1;
 
+	/* Need a slot for the GSO prefix for GSO extra data? */
 	if (skb_shinfo(skb)->gso_size)
 		count++;
 
-	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
-		unsigned long size = skb_frag_size(&skb_shinfo(skb)->frags[i]);
-		unsigned long offset = skb_shinfo(skb)->frags[i].page_offset;
-		unsigned long bytes;
-
-		offset &= ~PAGE_MASK;
-
-		while (size > 0) {
-			BUG_ON(offset >= PAGE_SIZE);
-			BUG_ON(copy_off > MAX_BUFFER_OFFSET);
-
-			bytes = PAGE_SIZE - offset;
-
-			if (bytes > size)
-				bytes = size;
+	data = skb->data;
+	while (data < skb_tail_pointer(skb)) {
+		unsigned long offset = offset_in_page(data);
+		unsigned long size = PAGE_SIZE - offset;
 
-			if (start_new_rx_buffer(copy_off, bytes, 0)) {
-				count++;
-				copy_off = 0;
-			}
+		if (data + size > skb_tail_pointer(skb))
+			size = skb_tail_pointer(skb) - data;
 
-			if (copy_off + bytes > MAX_BUFFER_OFFSET)
-				bytes = MAX_BUFFER_OFFSET - copy_off;
+		count += xenvif_count_frag_slots(vif, offset, size, &state);
 
-			copy_off += bytes;
+		data += size;
+	}
 
-			offset += bytes;
-			size -= bytes;
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+		unsigned long size = skb_frag_size(&skb_shinfo(skb)->frags[i]);
+		unsigned long offset = skb_shinfo(skb)->frags[i].page_offset;
 
-			if (offset == PAGE_SIZE)
-				offset = 0;
-		}
+		count += xenvif_count_frag_slots(vif, offset, size, &state);
 	}
-
-	sco = (struct skb_cb_overlay *)skb->cb;
-	sco->peek_slots_count = count;
 	return count;
 }
 
@@ -305,11 +327,14 @@ static struct xenvif_rx_meta *get_next_rx_buffer(struct xenvif *vif,
 	return meta;
 }
 
-/* Set up the grant operations for this fragment. */
+/*
+ * Set up the grant operations for this fragment. If it's a flipping
+ * interface, we also set up the unmap request from here.
+ */
 static void xenvif_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb,
 				 struct netrx_pending_operations *npo,
 				 struct page *page, unsigned long size,
-				 unsigned long offset, int head, int *first)
+				 unsigned long offset, int *head)
 {
 	struct gnttab_copy *copy_gop;
 	struct xenvif_rx_meta *meta;
@@ -333,12 +358,12 @@ static void xenvif_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb,
 		if (bytes > size)
 			bytes = size;
 
-		if (start_new_rx_buffer(npo->copy_off, bytes, head)) {
+		if (start_new_rx_buffer(npo->copy_off, bytes, *head)) {
 			/*
 			 * Netfront requires there to be some data in the head
 			 * buffer.
 			 */
-			BUG_ON(*first);
+			BUG_ON(*head);
 
 			meta = get_next_rx_buffer(vif, npo);
 		}
@@ -372,10 +397,10 @@ static void xenvif_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb,
 		}
 
 		/* Leave a gap for the GSO descriptor. */
-		if (*first && skb_shinfo(skb)->gso_size && !vif->gso_prefix)
+		if (*head && skb_shinfo(skb)->gso_size && !vif->gso_prefix)
 			vif->rx.req_cons++;
 
-		*first = 0; /* There must be something in this buffer now. */
+		*head = 0; /* There must be something in this buffer now. */
 
 	}
 }
@@ -401,7 +426,7 @@ static int xenvif_gop_skb(struct sk_buff *skb,
 	struct xen_netif_rx_request *req;
 	struct xenvif_rx_meta *meta;
 	unsigned char *data;
-	int first = 1;
+	int head = 1;
 	int old_meta_prod;
 
 	old_meta_prod = npo->meta_prod;
@@ -437,7 +462,7 @@ static int xenvif_gop_skb(struct sk_buff *skb,
 			len = skb_tail_pointer(skb) - data;
 
 		xenvif_gop_frag_copy(vif, skb, npo,
-				     virt_to_page(data), len, offset, 1, &first);
+				     virt_to_page(data), len, offset, &head);
 		data += len;
 	}
 
@@ -446,7 +471,7 @@ static int xenvif_gop_skb(struct sk_buff *skb,
 				     skb_frag_page(&skb_shinfo(skb)->frags[i]),
 				     skb_frag_size(&skb_shinfo(skb)->frags[i]),
 				     skb_shinfo(skb)->frags[i].page_offset,
-				     0, &first);
+				     &head);
 	}
 
 	return npo->meta_prod - old_meta_prod;
@@ -504,6 +529,10 @@ static void xenvif_add_frag_responses(struct xenvif *vif, int status,
 	}
 }
 
+struct skb_cb_overlay {
+	int meta_slots_used;
+};
+
 static void xenvif_kick_thread(struct xenvif *vif)
 {
 	wake_up(&vif->wq);
@@ -534,26 +563,19 @@ void xenvif_rx_action(struct xenvif *vif)
 	count = 0;
 
 	while ((skb = skb_dequeue(&vif->rx_queue)) != NULL) {
-		RING_IDX old_rx_req_cons;
-
 		vif = netdev_priv(skb->dev);
 		nr_frags = skb_shinfo(skb)->nr_frags;
 
-		old_rx_req_cons = vif->rx.req_cons;
 		sco = (struct skb_cb_overlay *)skb->cb;
 		sco->meta_slots_used = xenvif_gop_skb(skb, &npo);
 
-		count += vif->rx.req_cons - old_rx_req_cons;
+		count += nr_frags + 1;
 
 		__skb_queue_tail(&rxq, skb);
 
-		skb = skb_peek(&vif->rx_queue);
-		if (skb == NULL)
-			break;
-		sco = (struct skb_cb_overlay *)skb->cb;
-
 		/* Filled the batch queue? */
-		if (count + sco->peek_slots_count >= XEN_NETIF_RX_RING_SIZE)
+		/* XXX FIXME: RX path dependent on MAX_SKB_FRAGS */
+		if (count + MAX_SKB_FRAGS >= XEN_NETIF_RX_RING_SIZE)
 			break;
 	}
 
-- 
1.7.10.4

^ permalink raw reply related

* Re: [raw v1 2/4] [NET] Use raw_cpu ops for SNMP stats
From: Peter Zijlstra @ 2013-10-08 10:21 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Christoph Lameter, Tejun Heo, akpm, Eric Dumazet, netdev,
	Steven Rostedt, linux-kernel, Thomas Gleixner
In-Reply-To: <20131008072114.GA4455@gmail.com>

On Tue, Oct 08, 2013 at 09:21:14AM +0200, Ingo Molnar wrote:
> 
> * Christoph Lameter <cl@linux.com> wrote:
> 
> > SNMP stats are not protected by preemption but by bh handling.
> 
> Most forms of bh exclusion work via the preemption count though, and 
> softirq contexts themselves are generally not preemptible [to other CPUs] 
> either.
> 
> So the warnings should, in most cases, not trigger.

Right, so softirqs run either in the irq tail at which point
preempt_count += SOFTIRQ_OFFSET and thus preemption is disabled, or it
runs in ksoftirqd which has strict cpu affinity which also disables the
warning, and it also increments preempt_count with SOFTIRQ_OFFSET to
exclude the softirq from interrupts while its running, also disabling
the warning.

So it should very much not trigger.. if it does you want to know about
it.

^ permalink raw reply

* Re: [PATCH net-next] Revert "xen-netback: improve ring effeciency for guest RX"
From: Ian Campbell @ 2013-10-08 10:21 UTC (permalink / raw)
  To: Wei Liu
  Cc: netdev, xen-devel, Annie Li, Matt Wilson, Xi Xiong, David Vrabel,
	Paul Durrant
In-Reply-To: <1381226061-6521-1-git-send-email-wei.liu2@citrix.com>

On Tue, 2013-10-08 at 10:54 +0100, Wei Liu wrote:
> This reverts commit 4f0581d25827d5e864bcf07b05d73d0d12a20a5c.
> 
> The named changeset is causing problem. Let's aim to make this part less
> fragile before trying to improve things.
> 
> Signed-off-by: Wei Liu <wei.liu2@citrix.com>
Acked-by: Ian Campbell <ian.campbell@citrix.com>

Although I thought davem would just run git revert so I don't know if it
is needed.

^ permalink raw reply

* Re: [raw v1 2/4] [NET] Use raw_cpu ops for SNMP stats
From: Ingo Molnar @ 2013-10-08 10:26 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Christoph Lameter, Tejun Heo, akpm, Eric Dumazet, netdev,
	Steven Rostedt, linux-kernel, Thomas Gleixner
In-Reply-To: <20131008102142.GT3081@twins.programming.kicks-ass.net>


* Peter Zijlstra <peterz@infradead.org> wrote:

> On Tue, Oct 08, 2013 at 09:21:14AM +0200, Ingo Molnar wrote:
> > 
> > * Christoph Lameter <cl@linux.com> wrote:
> > 
> > > SNMP stats are not protected by preemption but by bh handling.
> > 
> > Most forms of bh exclusion work via the preemption count though, and 
> > softirq contexts themselves are generally not preemptible [to other CPUs] 
> > either.
> > 
> > So the warnings should, in most cases, not trigger.
> 
> Right, so softirqs run either in the irq tail at which point 
> preempt_count += SOFTIRQ_OFFSET and thus preemption is disabled, or it 
> runs in ksoftirqd which has strict cpu affinity which also disables the 
> warning, and it also increments preempt_count with SOFTIRQ_OFFSET to 
> exclude the softirq from interrupts while its running, also disabling 
> the warning.

A third context would be syscall-level code that runs with 
local_bh_disable()/enable() - but that too ought to have the preempt count 
elevated.

> So it should very much not trigger.. if it does you want to know about 
> it.

Yes. If nothing else then for the education value.

Thanks,

	Ingo

^ permalink raw reply

* [PATCH net-next 0/5] xen-netback: IPv6 offload support
From: Paul Durrant @ 2013-10-08 10:44 UTC (permalink / raw)
  To: xen-devel, netdev

This patch series adds support for checksum and large packet offloads into
xen-netback.
Testing has mainly been done using the Microsoft network hardware
certification suite running in Server 2008R2 VMs with Citrix PV frontends.

^ permalink raw reply

* [PATCH net-next 1/5] xen-netback: add IPv6 checksum offload to guest
From: Paul Durrant @ 2013-10-08 10:44 UTC (permalink / raw)
  To: xen-devel, netdev; +Cc: Paul Durrant, Wei Liu, David Vrabel, Ian Campbell
In-Reply-To: <1381229081-17814-1-git-send-email-paul.durrant@citrix.com>

Check xenstore flag feature-ipv6-csum-offload to determine if a
guest is happy to accept IPv6 packets with only partial checksum.
Also check analogous feature-ip-csum-offload to determone if a
guest is happy to accept IPv4 packets with only partial checksum
as a replacement for a negated feature-no-csum-offload value and
add a comment to deprecate use of feature-no-csum-offload.

Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
Cc: Wei Liu <wei.liuw2@citrix.com>
Cc: David Vrabel <david.vrabel@citrix.com>
Cc: Ian Campbell <ian.campbell@citrix.com>
---
 drivers/net/xen-netback/common.h    |    3 ++-
 drivers/net/xen-netback/interface.c |   10 +++++++---
 drivers/net/xen-netback/xenbus.c    |   24 +++++++++++++++++++++++-
 3 files changed, 32 insertions(+), 5 deletions(-)

diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index 5715318..b4a9a3c 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -153,7 +153,8 @@ struct xenvif {
 	u8 can_sg:1;
 	u8 gso:1;
 	u8 gso_prefix:1;
-	u8 csum:1;
+	u8 ip_csum:1;
+	u8 ipv6_csum:1;
 
 	/* Internal feature information. */
 	u8 can_queue:1;	    /* can queue packets for receiver? */
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index 01bb854..8e92783 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -216,8 +216,10 @@ static netdev_features_t xenvif_fix_features(struct net_device *dev,
 		features &= ~NETIF_F_SG;
 	if (!vif->gso && !vif->gso_prefix)
 		features &= ~NETIF_F_TSO;
-	if (!vif->csum)
+	if (!vif->ip_csum)
 		features &= ~NETIF_F_IP_CSUM;
+	if (!vif->ipv6_csum)
+		features &= ~NETIF_F_IPV6_CSUM;
 
 	return features;
 }
@@ -306,7 +308,7 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
 	vif->domid  = domid;
 	vif->handle = handle;
 	vif->can_sg = 1;
-	vif->csum = 1;
+	vif->ip_csum = 1;
 	vif->dev = dev;
 
 	vif->credit_bytes = vif->remaining_credit = ~0UL;
@@ -316,7 +318,9 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
 	vif->credit_timeout.expires = jiffies;
 
 	dev->netdev_ops	= &xenvif_netdev_ops;
-	dev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO;
+	dev->hw_features = NETIF_F_SG |
+		NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
+		NETIF_F_TSO;
 	dev->features = dev->hw_features;
 	SET_ETHTOOL_OPS(dev, &xenvif_ethtool_ops);
 
diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
index b45bce2..5e45b00 100644
--- a/drivers/net/xen-netback/xenbus.c
+++ b/drivers/net/xen-netback/xenbus.c
@@ -567,10 +567,32 @@ static int connect_rings(struct backend_info *be)
 		val = 0;
 	vif->gso_prefix = !!val;
 
+	/* Before feature-ipv6-csum-offload was introduced, checksum offload
+	 * was turned on by a zero value in (or lack of)
+	 * feature-no-csum-offload. Therefore, for compatibility, assume
+	 * that if feature-ip-csum-offload is missing that IP (v4) offload
+	 * is turned on. If this is not the case then the flag will be
+	 * cleared when we subsequently sample feature-no-csum-offload.
+	 */
+	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-ip-csum-offload",
+			 "%d", &val) < 0)
+		val = 1;
+	vif->ip_csum = !!val;
+
+	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-ipv6-csum-offload",
+			 "%d", &val) < 0)
+		val = 0;
+	vif->ipv6_csum = !!val;
+
+	/* This is deprecated. Frontends should set IP v4 and v6 checksum
+	 * offload using feature-ip-csum-offload and
+	 * feature-ipv6-csum-offload respectively.
+	 */
 	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-no-csum-offload",
 			 "%d", &val) < 0)
 		val = 0;
-	vif->csum = !val;
+	if (val)
+		vif->ip_csum = 0;
 
 	/* Map the shared frame, irq etc. */
 	err = xenvif_connect(vif, tx_ring_ref, rx_ring_ref,
-- 
1.7.10.4

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox