Netdev List
 help / color / mirror / Atom feed
* Re: [PATCH] netfilter: xt_recent: Add optional mask option for xt_recent
From: Pablo Neira Ayuso @ 2012-06-06  0:01 UTC (permalink / raw)
  To: Denys Fedoryshchenko; +Cc: Linux netdev
In-Reply-To: <1337285238-13589-1-git-send-email-denys@visp.net.lb>

On Thu, May 17, 2012 at 11:07:18PM +0300, Denys Fedoryshchenko wrote:
> Use case for this feature:
> 1)In some occasions if you need to allow,block,match specific subnet.
> 2)I can use recent as a trigger when netfilter rule matches, with mask 0.0.0.0
> 
> Tested for backward compatibility:
> )old (userspace) iptables, new kernel
> )old kernel, new iptables
> )new kernel, new iptables
> 
> For v2:
>  As Pablo Neira Ayuso suggested, moved nf_inet_addr_mask to xt_recent.h
>  and made info_v1 as a stack variable.

Applied with some minor glitches (see below).

Thanks Denys.

> Signed-off-by: Denys Fedoryshchenko <denys@visp.net.lb>
> CC: Pablo Neira Ayuso <pablo@netfilter.org>
> ---
>  include/linux/netfilter/xt_recent.h |   20 +++++++++++
>  net/netfilter/xt_recent.c           |   62 ++++++++++++++++++++++++++++++----
>  2 files changed, 74 insertions(+), 8 deletions(-)
> 
> diff --git a/include/linux/netfilter/xt_recent.h b/include/linux/netfilter/xt_recent.h
> index 83318e0..5f69ebc 100644
> --- a/include/linux/netfilter/xt_recent.h
> +++ b/include/linux/netfilter/xt_recent.h
> @@ -32,4 +32,24 @@ struct xt_recent_mtinfo {
>  	__u8 side;
>  };
>  
> +struct xt_recent_mtinfo_v1 {
> +	__u32 seconds;
> +	__u32 hit_count;
> +	__u8 check_set;
> +	__u8 invert;
> +	char name[XT_RECENT_NAME_LEN];
> +	__u8 side;
> +	union nf_inet_addr mask;
> +};
> +
> +static inline void nf_inet_addr_mask(const union nf_inet_addr *a1,
> +				    union nf_inet_addr *result,
> +				    const union nf_inet_addr *mask)
> +{
> +	result->all[0] = a1->all[0] & mask->all[0];
> +	result->all[1] = a1->all[1] & mask->all[1];
> +	result->all[2] = a1->all[2] & mask->all[2];
> +	result->all[3] = a1->all[3] & mask->all[3];

Yes, I told you to move this to xt_recent. But then I noticed that
nf_inet_addr_cmp in linux/netfilter.h. and well, this is static inline
and other may get some benefit with it.

Yes, I'm changing my mind :-).

> +}
> +
>  #endif /* _LINUX_NETFILTER_XT_RECENT_H */
> diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
> index fc0d6db..ca4375c 100644
> --- a/net/netfilter/xt_recent.c
> +++ b/net/netfilter/xt_recent.c
> @@ -75,6 +75,7 @@ struct recent_entry {
>  struct recent_table {
>  	struct list_head	list;
>  	char			name[XT_RECENT_NAME_LEN];
> +	union nf_inet_addr	mask;
>  	unsigned int		refcnt;
>  	unsigned int		entries;
>  	struct list_head	lru_list;
> @@ -228,10 +229,11 @@ recent_mt(const struct sk_buff *skb, struct xt_action_param *par)
>  {
>  	struct net *net = dev_net(par->in ? par->in : par->out);
>  	struct recent_net *recent_net = recent_pernet(net);
> -	const struct xt_recent_mtinfo *info = par->matchinfo;
> +	const struct xt_recent_mtinfo_v1 *info = par->matchinfo;
>  	struct recent_table *t;
>  	struct recent_entry *e;
>  	union nf_inet_addr addr = {};
> +	union nf_inet_addr addr_masked;

I've put addr_masked with addr (same line).

>  	u_int8_t ttl;
>  	bool ret = info->invert;
>  
> @@ -261,12 +263,15 @@ recent_mt(const struct sk_buff *skb, struct xt_action_param *par)
>  
>  	spin_lock_bh(&recent_lock);
>  	t = recent_table_lookup(recent_net, info->name);
> -	e = recent_entry_lookup(t, &addr, par->family,
> +
> +	nf_inet_addr_mask(&addr, &addr_masked, &t->mask);
> +
> +	e = recent_entry_lookup(t, &addr_masked, par->family,
>  				(info->check_set & XT_RECENT_TTL) ? ttl : 0);
>  	if (e == NULL) {
>  		if (!(info->check_set & XT_RECENT_SET))
>  			goto out;
> -		e = recent_entry_init(t, &addr, par->family, ttl);
> +		e = recent_entry_init(t, &addr_masked, par->family, ttl);
>  		if (e == NULL)
>  			par->hotdrop = true;
>  		ret = !ret;
> @@ -306,10 +311,10 @@ out:
>  	return ret;
>  }
>  
> -static int recent_mt_check(const struct xt_mtchk_param *par)
> +static int recent_mt_check(const struct xt_mtchk_param *par,
> +	const struct xt_recent_mtinfo_v1 *info)
>  {
>  	struct recent_net *recent_net = recent_pernet(par->net);
> -	const struct xt_recent_mtinfo *info = par->matchinfo;
>  	struct recent_table *t;
>  #ifdef CONFIG_PROC_FS
>  	struct proc_dir_entry *pde;
> @@ -361,6 +366,8 @@ static int recent_mt_check(const struct xt_mtchk_param *par)
>  		goto out;
>  	}
>  	t->refcnt = 1;
> +
> +	memcpy(&t->mask, &info->mask, sizeof(t->mask));
>  	strcpy(t->name, info->name);
>  	INIT_LIST_HEAD(&t->lru_list);
>  	for (i = 0; i < ip_list_hash_size; i++)
> @@ -385,10 +392,29 @@ out:
>  	return ret;
>  }
>  
> +static int recent_mt_check_v0(const struct xt_mtchk_param *par)
> +{
> +	const struct xt_recent_mtinfo_v0 *info_v0 = par->matchinfo;
> +	struct xt_recent_mtinfo_v1 info_v1;
> +	int ret;
> +
> +	/* Copy old data */
> +	memcpy(&info_v1, info_v0, sizeof(struct xt_recent_mtinfo));
> +	/* Default mask will make same behavior as old recent */
> +	memset(info_v1.mask.all, 0xFF, sizeof(info_v1.mask.all));
> +	ret = recent_mt_check(par, &info_v1);
> +	return ret;

return recent_mt_check(...)

I removed that ret variable.

> +}
> +
> +static int recent_mt_check_v1(const struct xt_mtchk_param *par)
> +{
> +	return recent_mt_check(par, par->matchinfo);
> +}
> +
>  static void recent_mt_destroy(const struct xt_mtdtor_param *par)
>  {
>  	struct recent_net *recent_net = recent_pernet(par->net);
> -	const struct xt_recent_mtinfo *info = par->matchinfo;
> +	const struct xt_recent_mtinfo_v1 *info = par->matchinfo;
>  	struct recent_table *t;
>  
>  	mutex_lock(&recent_mutex);
> @@ -625,7 +651,7 @@ static struct xt_match recent_mt_reg[] __read_mostly = {
>  		.family     = NFPROTO_IPV4,
>  		.match      = recent_mt,
>  		.matchsize  = sizeof(struct xt_recent_mtinfo),
> -		.checkentry = recent_mt_check,
> +		.checkentry = recent_mt_check_v0,
>  		.destroy    = recent_mt_destroy,
>  		.me         = THIS_MODULE,
>  	},
> @@ -635,10 +661,30 @@ static struct xt_match recent_mt_reg[] __read_mostly = {
>  		.family     = NFPROTO_IPV6,
>  		.match      = recent_mt,
>  		.matchsize  = sizeof(struct xt_recent_mtinfo),
> -		.checkentry = recent_mt_check,
> +		.checkentry = recent_mt_check_v0,
> +		.destroy    = recent_mt_destroy,
> +		.me         = THIS_MODULE,
> +	},
> +	{
> +		.name       = "recent",
> +		.revision   = 1,
> +		.family     = NFPROTO_IPV4,
> +		.match      = recent_mt,
> +		.matchsize  = sizeof(struct xt_recent_mtinfo_v1),
> +		.checkentry = recent_mt_check_v1,
>  		.destroy    = recent_mt_destroy,
>  		.me         = THIS_MODULE,
>  	},
> +	{
> +		.name       = "recent",
> +		.revision   = 1,
> +		.family     = NFPROTO_IPV6,
> +		.match      = recent_mt,
> +		.matchsize  = sizeof(struct xt_recent_mtinfo_v1),
> +		.checkentry = recent_mt_check_v1,
> +		.destroy    = recent_mt_destroy,
> +		.me         = THIS_MODULE,
> +	}
>  };
>  
>  static int __init recent_mt_init(void)
> -- 
> 1.7.3.4
> 
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH 2/6] netfilter: decnet: switch hook PFs to nfproto
From: Pablo Neira Ayuso @ 2012-06-06  0:02 UTC (permalink / raw)
  To: Alban Crequy
  Cc: Patrick McHardy, Vincent Sanders, Javier Martinez Canillas,
	netfilter-devel, netdev
In-Reply-To: <1337003799-2517-2-git-send-email-alban.crequy@collabora.co.uk>

On Mon, May 14, 2012 at 02:56:35PM +0100, Alban Crequy wrote:
> NFPROTO_* constants were usually equal to PF_* constants but it is not
> necessary and it will waste less memory if we don't do so (see commit 7e9c6e
> "netfilter: Introduce NFPROTO_* constants")

Applied, thanks.

But I rewrote the description to just say that this is a consistency
cleanup (other hooks use NFPROTO_*).

Hm, by grepping net/netfilter for PF_* still some netfilter subsystems
(like ipset) show up using it.

Perhaps we can take another patch for these.

^ permalink raw reply

* Re: [PATCH 3/6] netfilter: bridge: switch hook PFs to nfproto
From: Pablo Neira Ayuso @ 2012-06-06  0:03 UTC (permalink / raw)
  To: Alban Crequy
  Cc: Patrick McHardy, Vincent Sanders, Javier Martinez Canillas,
	netfilter-devel, netdev
In-Reply-To: <1337003799-2517-3-git-send-email-alban.crequy@collabora.co.uk>

On Mon, May 14, 2012 at 02:56:36PM +0100, Alban Crequy wrote:
> NFPROTO_* constants were usually equal to PF_* constants but it is not
> necessary and it will waste less memory if we don't do so (see commit 7e9c6e
> "netfilter: Introduce NFPROTO_* constants")

Applied, thanks.

^ permalink raw reply

* Re: [PATCH 4/6] netfilter: ipv4, defrag: switch hook PFs to nfproto
From: Pablo Neira Ayuso @ 2012-06-06  0:03 UTC (permalink / raw)
  To: Alban Crequy
  Cc: Patrick McHardy, Vincent Sanders, Javier Martinez Canillas,
	netfilter-devel, netdev
In-Reply-To: <1337003799-2517-4-git-send-email-alban.crequy@collabora.co.uk>

On Mon, May 14, 2012 at 02:56:37PM +0100, Alban Crequy wrote:
> NFPROTO_* constants were usually equal to PF_* constants but it is not
> necessary and it will waste less memory if we don't do so (see commit 7e9c6e
> "netfilter: Introduce NFPROTO_* constants")

Applied (with that description rewrite as said), thanks.

^ permalink raw reply

* Re: [PATCH 5/6] netfilter: ipvs: switch hook PFs to nfproto
From: Pablo Neira Ayuso @ 2012-06-06  0:03 UTC (permalink / raw)
  To: Alban Crequy
  Cc: Patrick McHardy, Vincent Sanders, Javier Martinez Canillas,
	netfilter-devel, netdev
In-Reply-To: <1337003799-2517-5-git-send-email-alban.crequy@collabora.co.uk>

Applied, thanks.

^ permalink raw reply

* Re: [PATCH 6/6] netfilter: selinux: switch hook PFs to nfproto
From: Pablo Neira Ayuso @ 2012-06-06  0:03 UTC (permalink / raw)
  To: Alban Crequy
  Cc: Patrick McHardy, Vincent Sanders, Javier Martinez Canillas,
	netfilter-devel, netdev
In-Reply-To: <1337003799-2517-6-git-send-email-alban.crequy@collabora.co.uk>

And finally this applied as well.

^ permalink raw reply

* Re: NETDEV WATCHDOG: eth0 (atl1c): transmit queue 0 timed out
From: Jonathan Nieder @ 2012-06-06  0:38 UTC (permalink / raw)
  To: Thomas Meyer
  Cc: Eric Dumazet, Linux Kernel Mailing List, jcliburn, chris.snook,
	netdev, Josh Boyer
In-Reply-To: <1330163565.1419.5.camel@localhost.localdomain>

In February, 2012, Thomas Meyer wrote:
> Am Freitag, den 24.02.2012, 20:20 +0100 schrieb Eric Dumazet:

>> Here is a cumulative patch to hopefuly remove the races in this driver,
>> could you please test it ?
[...]
> just building a 3.2.7 kernel with your patch applied. I will watch out
> for the warning in the next days.

Well, did it work? :)

In suspense,
Jonathan

^ permalink raw reply

* Re: [PATCH] netdev: mv643xx_eth: Prevent build on PPC32
From: Lennert Buytenhek @ 2012-06-06  0:49 UTC (permalink / raw)
  To: Josh Boyer; +Cc: Andrew Lunn, Olof Johansson, netdev
In-Reply-To: <20120605192820.GC7683@zod.bos.redhat.com>

On Tue, Jun 05, 2012 at 03:28:21PM -0400, Josh Boyer wrote:

> Commit 452503ebc (ARM: Orion: Eth: Add clk/clkdev support.) added use of
> the clk driver API which results in compile errors on architectures that
> don't implement the clk API.
> 
> ERROR: "clk_enable" [drivers/net/ethernet/marvell/mv643xx_eth.ko] undefined!
> ERROR: "clk_disable" [drivers/net/ethernet/marvell/mv643xx_eth.ko] undefined!
> ERROR: "clk_put" [drivers/net/ethernet/marvell/mv643xx_eth.ko] undefined!
> ERROR: "clk_get_rate" [drivers/net/ethernet/marvell/mv643xx_eth.ko] undefined!
> ERROR: "clk_get" [drivers/net/ethernet/marvell/mv643xx_eth.ko] undefined!
> 
> Selecting CLKDEV_LOOKUP doesn't fix this either, as the build then fails with:
> 
> In file included from drivers/clk/clkdev.c:21:0:
> include/linux/clkdev.h:15:24: fatal error: asm/clkdev.h: No such file or directory
> 
> So we just prevent this from building at all on PPC32.

If the PPC32 dependency is no longer relevant (e.g. if Pegasos platform
support was removed from the kernel), then the commit message should
mention that -- the above reasoning is a poor sole justification for
this change.

^ permalink raw reply

* Re: [PATCH 01/15] netfilter: add namespace support for l4proto
From: Gao feng @ 2012-06-06  0:56 UTC (permalink / raw)
  To: Pablo Neira Ayuso
  Cc: netfilter-devel, netdev, serge.hallyn, ebiederm, dlezcano,
	Gao feng
In-Reply-To: <20120605235600.GB27222@1984>

Hi pablo
于 2012年06月06日 07:56, Pablo Neira Ayuso 写道:
> On Tue, May 29, 2012 at 03:04:09PM +0800, Gao feng wrote:
>> From: Gao feng <gaofeng@cn.fujitus.com>
>>
>> struct nf_proto_net stroes proto's ctl_table_header and ctl_table,
>> nf_ct_l4proto_(un)register_sysctl use it to register sysctl.
>> because AF_INET6's protocols need not do compat, so register or
>> unregister sysctl when l4proto.l3proto != AF_INET6.
>>
>> - the net_id field is used to store the pernet_operations id
>>   that belones to l4proto.
>>
>> - init_net will be used to initial the proto's pernet data
>>
>> - nf_ct_(un)register_sysctl are changed to support net namespace,
>>   use (un)register_net_sysctl_table replaces (un)register_sysctl_paths.
>>   and in nf_ct_unregister_sysctl,kfree table only when users is 0.
>>
>> - Add the struct net as param of nf_conntrack_l4proto_(un)register.
>>   register or unregister the l4proto only when the net is init_net.
>>
>> - nf_conntrack_l4proto_register call init_net to initial the pernet
>>   data of l4proto.
>>
>> - nf_ct_l4proto_net is used to get the pernet data of l4proto.
>>
>> - use init_net as a param of nf_conntrack_l4proto_(un)register.
> 
> I have applied this patchset, but I had to rewrite the patch
> descriptions.
> 
> I don't blame your English neither your writing abilities (I'm not
> native speaker and not that good at writing either) but I think you
> can make it better next time.

I apologize for my pool English and writing abilities.
Maybe I should take an english course...

> 
> Basically, you don't need to comment every single change that the
> patch does. That's easy to see by looking at the patchset.
> 
> Instead, just provide brief explanation on your intentions with the
> patch, clarify things that may look not obvious to the reviewer and
> what we'll get with this.

Got it, thanks for teaching me this. ;)

> --
> To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Crash in network stack
From: Naveen B N (nbn) @ 2012-06-06  2:28 UTC (permalink / raw)
  To: ipsec-tools-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org,
	netdev-u79uwXL29TY76Z2rM5mHXA@public.gmane.org

[-- Attachment #1: Type: text/plain, Size: 3205 bytes --]

Hi All
I am trying to edit esp flow in kernel , I have edited the ip_rcv() , just to avoid xfrm look up
For ipsec processing I have a simple database for storing xfrm_state values.
In ip_rcv() I have decrypted the ESP packet and I am calling netif_rx(skb) , I am successfully 
Able to receive the packet in user space , where the user application is listening on Inner Ip 
Address. But kernel crashes if I send multiple packets . 
I am unable to figure out this , I am using  user mode kernel, host linux is 2.6.34 version, 
Same code base is used to build UML kernel, x86. 

I am attaching the net/ipv4/ip_input.c file , Please help me to figure out this crash.

Kernel panic - not syncing: Kernel mode signal 4
0823c748:  [<081beb4b>] dump_stack+0x1c/0x20
0823c760:  [<081beb84>] panic+0x35/0x9d
0823c788:  [<0805a107>] relay_signal+0x31/0x6c
0823c7a8:  [<08065a9a>] sig_handler_common+0x61/0x70
0823c820:  [<08065bc2>] sig_handler+0x31/0x3d
0823c82c:  [<08065d5b>] handle_signal+0x4f/0x7d
0823c84c:  [<080673f7>] hard_handler+0xf/0x14
0823c85c:  [<b783d400>] 0xb783d400
0823cb7c:  [<080b3772>] __kmalloc+0x70/0x9c
0823cb9c:  [<0816b95d>] __alloc_skb+0x52/0x11c
0823cbc8:  [<0816c344>] dev_alloc_skb+0x15/0x2c
0823cbe0:  [<0805e871>] uml_net_rx+0x19/0x9e
0823cbfc:  [<0805e9d9>] uml_net_interrupt+0x16/0x5c
0823cc08:  [<080947d0>] handle_IRQ_event+0x20/0xa7
0823cc24:  [<080948aa>] __do_IRQ+0x53/0x93
0823cc40:  [<080581a8>] do_IRQ+0x1f/0x34
0823cc50:  [<08058340>] sigio_handler+0x46/0x5c
0823cc68:  [<08065a9a>] sig_handler_common+0x61/0x70
0823cce0:  [<08065bc2>] sig_handler+0x31/0x3d
0823ccec:  [<08065d5b>] handle_signal+0x4f/0x7d
0823cd0c:  [<080673f7>] hard_handler+0xf/0x14
0823cd1c:  [<b783d400>] 0xb783d400



EIP: 0073:[<b7723861>] CPU: 0 Tainted: P           ESP: 007b:bfe1cbfc EFLAGS: 00000246
    Tainted: P
EAX: 00000000 EBX: 00006222 ECX: 00000013 EDX: 00006222
ESI: 0000621e EDI: 0000001c EBP: bfe1cc18 DS: 007b ES: 007b
0823c6f8:  [<0806aae7>] show_regs+0xc4/0xc9
0823c724:  [<0805a70a>] panic_exit+0x25/0x3b
0823c738:  [<08086063>] notifier_call_chain+0x27/0x4c
0823c760:  [<080860b6>] atomic_notifier_call_chain+0x15/0x17
0823c770:  [<081beb9f>] panic+0x50/0x9d
0823c788:  [<0805a107>] relay_signal+0x31/0x6c
0823c7a8:  [<08065a9a>] sig_handler_common+0x61/0x70
0823c820:  [<08065bc2>] sig_handler+0x31/0x3d
0823c82c:  [<08065d5b>] handle_signal+0x4f/0x7d
0823c84c:  [<080673f7>] hard_handler+0xf/0x14
0823c85c:  [<b783d400>] 0xb783d400
0823cb7c:  [<080b3772>] __kmalloc+0x70/0x9c
0823cb9c:  [<0816b95d>] __alloc_skb+0x52/0x11c
0823cbc8:  [<0816c344>] dev_alloc_skb+0x15/0x2c
0823cbe0:  [<0805e871>] uml_net_rx+0x19/0x9e
0823cbfc:  [<0805e9d9>] uml_net_interrupt+0x16/0x5c
0823cc08:  [<080947d0>] handle_IRQ_event+0x20/0xa7
0823cc24:  [<080948aa>] __do_IRQ+0x53/0x93
0823cc40:  [<080581a8>] do_IRQ+0x1f/0x34
0823cc50:  [<08058340>] sigio_handler+0x46/0x5c
0823cc68:  [<08065a9a>] sig_handler_common+0x61/0x70
0823cce0:  [<08065bc2>] sig_handler+0x31/0x3d
0823ccec:  [<08065d5b>] handle_signal+0x4f/0x7d
0823cd0c:  [<080673f7>] hard_handler+0xf/0x14
0823cd1c:  [<b783d400>] 0xb783d400


Thanks and Regards
Naveen

[-- Attachment #2: ip_input.c --]
[-- Type: text/plain, Size: 24045 bytes --]

/*
 * INET		An implementation of the TCP/IP protocol suite for the LINUX
 *		operating system.  INET is implemented using the  BSD Socket
 *		interface as the means of communication with the user level.
 *
 *		The Internet Protocol (IP) module.
 *
 * Authors:	Ross Biro
 *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
 *		Donald Becker, <becker@super.org>
 *		Alan Cox, <alan@lxorguk.ukuu.org.uk>
 *		Richard Underwood
 *		Stefan Becker, <stefanb@yello.ping.de>
 *		Jorge Cwik, <jorge@laser.satlink.net>
 *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
 *
 *
 * Fixes:
 *		Alan Cox	:	Commented a couple of minor bits of surplus code
 *		Alan Cox	:	Undefining IP_FORWARD doesn't include the code
 *					(just stops a compiler warning).
 *		Alan Cox	:	Frames with >=MAX_ROUTE record routes, strict routes or loose routes
 *					are junked rather than corrupting things.
 *		Alan Cox	:	Frames to bad broadcast subnets are dumped
 *					We used to process them non broadcast and
 *					boy could that cause havoc.
 *		Alan Cox	:	ip_forward sets the free flag on the
 *					new frame it queues. Still crap because
 *					it copies the frame but at least it
 *					doesn't eat memory too.
 *		Alan Cox	:	Generic queue code and memory fixes.
 *		Fred Van Kempen :	IP fragment support (borrowed from NET2E)
 *		Gerhard Koerting:	Forward fragmented frames correctly.
 *		Gerhard Koerting: 	Fixes to my fix of the above 8-).
 *		Gerhard Koerting:	IP interface addressing fix.
 *		Linus Torvalds	:	More robustness checks
 *		Alan Cox	:	Even more checks: Still not as robust as it ought to be
 *		Alan Cox	:	Save IP header pointer for later
 *		Alan Cox	:	ip option setting
 *		Alan Cox	:	Use ip_tos/ip_ttl settings
 *		Alan Cox	:	Fragmentation bogosity removed
 *					(Thanks to Mark.Bush@prg.ox.ac.uk)
 *		Dmitry Gorodchanin :	Send of a raw packet crash fix.
 *		Alan Cox	:	Silly ip bug when an overlength
 *					fragment turns up. Now frees the
 *					queue.
 *		Linus Torvalds/ :	Memory leakage on fragmentation
 *		Alan Cox	:	handling.
 *		Gerhard Koerting:	Forwarding uses IP priority hints
 *		Teemu Rantanen	:	Fragment problems.
 *		Alan Cox	:	General cleanup, comments and reformat
 *		Alan Cox	:	SNMP statistics
 *		Alan Cox	:	BSD address rule semantics. Also see
 *					UDP as there is a nasty checksum issue
 *					if you do things the wrong way.
 *		Alan Cox	:	Always defrag, moved IP_FORWARD to the config.in file
 *		Alan Cox	: 	IP options adjust sk->priority.
 *		Pedro Roque	:	Fix mtu/length error in ip_forward.
 *		Alan Cox	:	Avoid ip_chk_addr when possible.
 *	Richard Underwood	:	IP multicasting.
 *		Alan Cox	:	Cleaned up multicast handlers.
 *		Alan Cox	:	RAW sockets demultiplex in the BSD style.
 *		Gunther Mayer	:	Fix the SNMP reporting typo
 *		Alan Cox	:	Always in group 224.0.0.1
 *	Pauline Middelink	:	Fast ip_checksum update when forwarding
 *					Masquerading support.
 *		Alan Cox	:	Multicast loopback error for 224.0.0.1
 *		Alan Cox	:	IP_MULTICAST_LOOP option.
 *		Alan Cox	:	Use notifiers.
 *		Bjorn Ekwall	:	Removed ip_csum (from slhc.c too)
 *		Bjorn Ekwall	:	Moved ip_fast_csum to ip.h (inline!)
 *		Stefan Becker   :       Send out ICMP HOST REDIRECT
 *	Arnt Gulbrandsen	:	ip_build_xmit
 *		Alan Cox	:	Per socket routing cache
 *		Alan Cox	:	Fixed routing cache, added header cache.
 *		Alan Cox	:	Loopback didn't work right in original ip_build_xmit - fixed it.
 *		Alan Cox	:	Only send ICMP_REDIRECT if src/dest are the same net.
 *		Alan Cox	:	Incoming IP option handling.
 *		Alan Cox	:	Set saddr on raw output frames as per BSD.
 *		Alan Cox	:	Stopped broadcast source route explosions.
 *		Alan Cox	:	Can disable source routing
 *		Takeshi Sone    :	Masquerading didn't work.
 *	Dave Bonn,Alan Cox	:	Faster IP forwarding whenever possible.
 *		Alan Cox	:	Memory leaks, tramples, misc debugging.
 *		Alan Cox	:	Fixed multicast (by popular demand 8))
 *		Alan Cox	:	Fixed forwarding (by even more popular demand 8))
 *		Alan Cox	:	Fixed SNMP statistics [I think]
 *	Gerhard Koerting	:	IP fragmentation forwarding fix
 *		Alan Cox	:	Device lock against page fault.
 *		Alan Cox	:	IP_HDRINCL facility.
 *	Werner Almesberger	:	Zero fragment bug
 *		Alan Cox	:	RAW IP frame length bug
 *		Alan Cox	:	Outgoing firewall on build_xmit
 *		A.N.Kuznetsov	:	IP_OPTIONS support throughout the kernel
 *		Alan Cox	:	Multicast routing hooks
 *		Jos Vos		:	Do accounting *before* call_in_firewall
 *	Willy Konynenberg	:	Transparent proxying support
 *
 *
 *
 * To Fix:
 *		IP fragmentation wants rewriting cleanly. The RFC815 algorithm is much more efficient
 *		and could be made very efficient with the addition of some virtual memory hacks to permit
 *		the allocation of a buffer that can then be 'grown' by twiddling page tables.
 *		Output fragmentation wants updating along with the buffer management to use a single
 *		interleaved copy algorithm so that fragmenting has a one copy overhead. Actual packet
 *		output should probably do its own fragmentation at the UDP/RAW layer. TCP shouldn't cause
 *		fragmentation anyway.
 *
 *		This program is free software; you can redistribute it and/or
 *		modify it under the terms of the GNU General Public License
 *		as published by the Free Software Foundation; either version
 *		2 of the License, or (at your option) any later version.
 */

#include <asm/system.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/slab.h>

#include <linux/net.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/in.h>
#include <linux/inet.h>
#include <linux/inetdevice.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>

#include <net/snmp.h>
#include <net/ip.h>
#include <net/protocol.h>
#include <net/route.h>
#include <linux/skbuff.h>
#include <net/sock.h>
#include <net/arp.h>
#include <net/icmp.h>
#include <net/raw.h>
#include <net/checksum.h>
#include <linux/netfilter_ipv4.h>
#include <net/xfrm.h>
#include <linux/mroute.h>
#include <linux/netlink.h>


#if 1
 extern xfrm_syfer_cb x_syfer_cb;
#endif


/*
 *	Process Router Attention IP option
 */
int ip_call_ra_chain(struct sk_buff *skb)
{
	struct ip_ra_chain *ra;
	u8 protocol = ip_hdr(skb)->protocol;
	struct sock *last = NULL;
	struct net_device *dev = skb->dev;

	read_lock(&ip_ra_lock);
	for (ra = ip_ra_chain; ra; ra = ra->next) {
		struct sock *sk = ra->sk;

		/* If socket is bound to an interface, only report
		 * the packet if it came  from that interface.
		 */
		if (sk && inet_sk(sk)->inet_num == protocol &&
		    (!sk->sk_bound_dev_if ||
		     sk->sk_bound_dev_if == dev->ifindex) &&
		    net_eq(sock_net(sk), dev_net(dev))) {
			if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
				if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN)) {
					read_unlock(&ip_ra_lock);
					return 1;
				}
			}
			if (last) {
				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
				if (skb2)
					raw_rcv(last, skb2);
			}
			last = sk;
		}
	}

	if (last) {
		raw_rcv(last, skb);
		read_unlock(&ip_ra_lock);
		return 1;
	}
	read_unlock(&ip_ra_lock);
	return 0;
}

static int ip_local_deliver_finish(struct sk_buff *skb)
{
	struct net *net = dev_net(skb->dev);

	__skb_pull(skb, ip_hdrlen(skb));

	/* Point into the IP datagram, just past the header. */
	skb_reset_transport_header(skb);

	rcu_read_lock();
	{
		int protocol = ip_hdr(skb)->protocol;
		int hash, raw;
		const struct net_protocol *ipprot;

	resubmit:
		raw = raw_local_deliver(skb, protocol);

		hash = protocol & (MAX_INET_PROTOS - 1);
		ipprot = rcu_dereference(inet_protos[hash]);
		if (ipprot != NULL) {
			int ret;

			if (!net_eq(net, &init_net) && !ipprot->netns_ok) {
				if (net_ratelimit())
					printk("%s: proto %d isn't netns-ready\n",
						__func__, protocol);
				kfree_skb(skb);
				goto out;
			}
#ifndef CONFIG_SYFER_IPSEC
			if (!ipprot->no_policy) {
				if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
					kfree_skb(skb);
					goto out;
				}
				nf_reset(skb);
			}
#endif
			ret = ipprot->handler(skb);/* xfrm4_rcv */
			if (ret < 0) {
				protocol = -ret;
				goto resubmit;
			}
			IP_INC_STATS_BH(net, IPSTATS_MIB_INDELIVERS);
		} else {
			if (!raw) {
				if (xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
					IP_INC_STATS_BH(net, IPSTATS_MIB_INUNKNOWNPROTOS);
					icmp_send(skb, ICMP_DEST_UNREACH,
						  ICMP_PROT_UNREACH, 0);
				}
			} else
				IP_INC_STATS_BH(net, IPSTATS_MIB_INDELIVERS);
			kfree_skb(skb);
		}
	}
 out:
	rcu_read_unlock();

	return 0;
}

/*
 * 	Deliver IP Packets to the higher protocol layers.
 */
int ip_local_deliver(struct sk_buff *skb)
{
	/*
	 *	Reassemble IP fragments.
	 */

	if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
		if (ip_defrag(skb, IP_DEFRAG_LOCAL_DELIVER))
			return 0;
	}

	return NF_HOOK(PF_INET, NF_INET_LOCAL_IN, skb, skb->dev, NULL,
		       ip_local_deliver_finish);
}

static inline int ip_rcv_options(struct sk_buff *skb)
{
	struct ip_options *opt;
	struct iphdr *iph;
	struct net_device *dev = skb->dev;

	/* It looks as overkill, because not all
	   IP options require packet mangling.
	   But it is the easiest for now, especially taking
	   into account that combination of IP options
	   and running sniffer is extremely rare condition.
					      --ANK (980813)
	*/
	if (skb_cow(skb, skb_headroom(skb))) {
		IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS);
		goto drop;
	}

	iph = ip_hdr(skb);
	opt = &(IPCB(skb)->opt);
	opt->optlen = iph->ihl*4 - sizeof(struct iphdr);

	if (ip_options_compile(dev_net(dev), opt, skb)) {
		IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INHDRERRORS);
		goto drop;
	}

	if (unlikely(opt->srr)) {
		struct in_device *in_dev = in_dev_get(dev);
		if (in_dev) {
			if (!IN_DEV_SOURCE_ROUTE(in_dev)) {
				if (IN_DEV_LOG_MARTIANS(in_dev) &&
				    net_ratelimit())
					printk(KERN_INFO "source route option %pI4 -> %pI4\n",
					       &iph->saddr, &iph->daddr);
				in_dev_put(in_dev);
				goto drop;
			}

			in_dev_put(in_dev);
		}

		if (ip_options_rcv_srr(skb))
			goto drop;
	}

	return 0;
drop:
	return -1;
}

static int ip_rcv_finish(struct sk_buff *skb)
{
	const struct iphdr *iph = ip_hdr(skb);
	struct rtable *rt;

	/*
	 *	Initialise the virtual path cache for the packet. It describes
	 *	how the packet travels inside Linux networking.
	 */
	if (skb_dst(skb) == NULL) {
		int err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos,
					 skb->dev);
		if (unlikely(err)) {
			if (err == -EHOSTUNREACH)
				IP_INC_STATS_BH(dev_net(skb->dev),
						IPSTATS_MIB_INADDRERRORS);
			else if (err == -ENETUNREACH)
				IP_INC_STATS_BH(dev_net(skb->dev),
						IPSTATS_MIB_INNOROUTES);
			goto drop;
		}
	}

#ifdef CONFIG_NET_CLS_ROUTE
	if (unlikely(skb_dst(skb)->tclassid)) {
		struct ip_rt_acct *st = per_cpu_ptr(ip_rt_acct, smp_processor_id());
		u32 idx = skb_dst(skb)->tclassid;
		st[idx&0xFF].o_packets++;
		st[idx&0xFF].o_bytes += skb->len;
		st[(idx>>16)&0xFF].i_packets++;
		st[(idx>>16)&0xFF].i_bytes += skb->len;
	}
#endif

	if (iph->ihl > 5 && ip_rcv_options(skb))
		goto drop;

	rt = skb_rtable(skb);
	if (rt->rt_type == RTN_MULTICAST) {
		IP_UPD_PO_STATS_BH(dev_net(rt->u.dst.dev), IPSTATS_MIB_INMCAST,
				skb->len);
	} else if (rt->rt_type == RTN_BROADCAST)
		IP_UPD_PO_STATS_BH(dev_net(rt->u.dst.dev), IPSTATS_MIB_INBCAST,
				skb->len);

	return dst_input(skb);

drop:
	kfree_skb(skb);
	return NET_RX_DROP;
}
#include <net/esp.h>
#include <linux/crypto.h>
#include <crypto/aead.h>
#include <crypto/authenc.h>


#define DUMP_SKB_PTR(skb) printk("\n*** SKB ***\n");\
printk("Head=%p Data=%p Tail=%p END =%p \n",skb->head,skb->data,skb->tail,skb->end);\
printk("Tran_header=%p Net_header=%p Max_header=%p \n",skb->transport_header,skb->network_header,skb->mac_header);\
printk("len=%d data_len=%d hdr_len=%d mac_len=%d \n",skb->len,skb->data_len,skb->hdr_len,skb->mac_len);


struct esp_skb_cb_my {
        struct xfrm_skb_cb xfrm;
            void *tmp;
};

#define ESP_SKB_CB_MY(__skb) ((struct esp_skb_cb_my *)&((__skb)->cb[0]))


static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags)
{
	unsigned int len;

	len = crypto_aead_ivsize(aead);
	if (len) {
		len += crypto_aead_alignmask(aead) &
		       ~(crypto_tfm_ctx_alignment() - 1);
		len = ALIGN(len, crypto_tfm_ctx_alignment());
	}

	len += sizeof(struct aead_givcrypt_request) + crypto_aead_reqsize(aead);
	len = ALIGN(len, __alignof__(struct scatterlist));

	len += sizeof(struct scatterlist) * nfrags;

	return kmalloc(len, GFP_ATOMIC);
}

static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp)
{
	return crypto_aead_ivsize(aead) ?
	       PTR_ALIGN((u8 *)tmp, crypto_aead_alignmask(aead) + 1) : tmp;
}

static inline struct aead_givcrypt_request *esp_tmp_givreq(
	struct crypto_aead *aead, u8 *iv)
{
	struct aead_givcrypt_request *req;

	req = (void *)PTR_ALIGN(iv + crypto_aead_ivsize(aead),
				crypto_tfm_ctx_alignment());
	aead_givcrypt_set_tfm(req, aead);
	return req;
}
static inline struct scatterlist *esp_givreq_sg(
	struct crypto_aead *aead, struct aead_givcrypt_request *req)
{
	return (void *)ALIGN((unsigned long)(req + 1) +
			     crypto_aead_reqsize(aead),
			     __alignof__(struct scatterlist));
}
static inline struct aead_request *esp_tmp_req(struct crypto_aead *aead, u8 *iv)
{
        struct aead_request *req;

            req = (void *)PTR_ALIGN(iv + crypto_aead_ivsize(aead),
                                    crypto_tfm_ctx_alignment());
                aead_request_set_tfm(req, aead);
                    return req;
}
static inline struct scatterlist *esp_req_sg(struct crypto_aead *aead,
                                 struct aead_request *req)
{
        return (void *)ALIGN((unsigned long)(req + 1) +
                                 crypto_aead_reqsize(aead),
                                                  __alignof__(struct scatterlist));
}


static int syfer_esp_input_done2(struct sk_buff *skb, int err)
{
	struct iphdr *iph;
	struct xfrm_state *x = xfrm_input_state(skb);
	struct esp_data *esp = x->data;
	struct crypto_aead *aead = esp->aead;
	int alen = crypto_aead_authsize(aead);
	int hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead);
	int elen = skb->len - hlen;
	int ihl;
	u8 nexthdr[2];
	int padlen;
    printk("\nEntered syfer_esp_input_done2 \n");
	kfree(ESP_SKB_CB_MY(skb)->tmp);

	if (unlikely(err))
		goto out;

	if (skb_copy_bits(skb, skb->len-alen-2, nexthdr, 2))
		BUG();

	err = -EINVAL;
	padlen = nexthdr[0];
    
    printk("\npadlen=%d alen=%d elen=%d\n",padlen,alen,elen);
	if (padlen + 2 + alen >= elen)
		goto out;

	/* ... check padding bits here. Silly. :-) */

	iph = ip_hdr(skb);
	ihl = iph->ihl * 4;
    printk("\nsyfer_esp_input_done2 saddr=%x daddr=%x \n",iph->saddr, iph->daddr);
  	if (x->encap) {
		struct xfrm_encap_tmpl *encap = x->encap;
		struct udphdr *uh = (void *)(skb_network_header(skb) + ihl);

		/*
		 * 1) if the NAT-T peer's IP or port changed then
		 *    advertize the change to the keying daemon.
		 *    This is an inbound SA, so just compare
		 *    SRC ports.
		 */
		if (iph->saddr != x->props.saddr.a4 ||
		    uh->source != encap->encap_sport) {
			xfrm_address_t ipaddr;

			ipaddr.a4 = iph->saddr;
			km_new_mapping(x, &ipaddr, uh->source);

			/* XXX: perhaps add an extra
			 * policy check here, to see
			 * if we should allow or
			 * reject a packet from a
			 * different source
			 * address/port.
			 */
		}

		/*
		 * 2) ignore UDP/TCP checksums in case
		 *    of NAT-T in Transport Mode, or
		 *    perform other post-processing fixes
		 *    as per draft-ietf-ipsec-udp-encaps-06,
		 *    section 3.1.2
		 */
		if (x->props.mode == XFRM_MODE_TRANSPORT)
			skb->ip_summed = CHECKSUM_UNNECESSARY;
	}

	pskb_trim(skb, skb->len - alen - padlen - 2);
	__skb_pull(skb, hlen);
    printk("\n After syfer_esp_input_done2  __skb_pull\n");
     DUMP_SKB_PTR(skb)
         
	skb_set_transport_header(skb, ihl);

    printk("\n AFTER set skb_set_transport_header  \n");
     DUMP_SKB_PTR(skb)
	err = nexthdr[1];
    printk("\n Leaving syfer_esp_input_done2 \n");
	/* RFC4303: Drop dummy packets without any error */
	if (err == IPPROTO_NONE)
		err = -EINVAL;

out:
	return err;
}

static void esp_input_done(struct crypto_async_request *base, int err)
{
	struct sk_buff *skb = base->data;
	//xfrm_input_resume(skb, syfer_esp_input_done2(skb, err));
    syfer_esp_input_done2(skb, err);
}

/*
 * Note: detecting truncated vs. non-truncated authentication data is very
 * expensive, so we only support truncated data, which is the recommended
 * and common case.
 */
static int syfer_esp_input(struct xfrm_state *x, struct sk_buff *skb)
{
	struct ip_esp_hdr *esph;
	struct esp_data *esp = x->data;
	struct crypto_aead *aead = esp->aead;
	struct aead_request *req;
	struct sk_buff *trailer;
	int elen = skb->len - sizeof(*esph) - crypto_aead_ivsize(aead);
	int nfrags;
	void *tmp;
	u8 *iv;
	struct scatterlist *sg;
	struct scatterlist *asg;
	int err = -EINVAL;
    DUMP_SKB_PTR(skb)
                 
    
    printk("\n Entered syfer_esp_input x->data =%p \n",x->data);
	if (!pskb_may_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead)))
		goto out;

	if (elen <= 0)
		goto out;

	if ((err = skb_cow_data(skb, 0, &trailer)) < 0)
		goto out;
	nfrags = err;

	err = -ENOMEM;
	tmp = esp_alloc_tmp(aead, nfrags + 1);
	if (!tmp)
		goto out;

	ESP_SKB_CB_MY(skb)->tmp = tmp;
	iv = esp_tmp_iv(aead, tmp);
	req = esp_tmp_req(aead, iv);
	asg = esp_req_sg(aead, req);
	sg = asg + 1;

	skb->ip_summed = CHECKSUM_NONE;

///	esph = (struct ip_esp_hdr *)skb->data;
    esph= (struct ip_esp_hdr *)(skb->network_header+sizeof(struct iphdr));
	/* Get ivec. This can be wrong, check against another impls. */
	iv = esph->enc_data;

	sg_init_table(sg, nfrags);
	skb_to_sgvec(skb, sg, sizeof(*esph) + crypto_aead_ivsize(aead), elen);
	sg_init_one(asg, esph, sizeof(*esph));
     
	aead_request_set_callback(req, 0, esp_input_done, skb);
	aead_request_set_crypt(req, sg, sg, elen, iv);
	aead_request_set_assoc(req, asg, sizeof(*esph));

	err = crypto_aead_decrypt(req);
	if (err == -EINPROGRESS)
		goto out;
   printk("\n Calling syfer_esp_input_done2 err =%d \n",err);
   err = syfer_esp_input_done2(skb, err);
  
    printk("\n Leaving  syfer_esp_input \n");
out:
	return err;
}



/*
 * 	Main IP Receive routine.
 */
int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
{
	struct iphdr *iph;
	u32 len;

	/* When the interface is in promisc. mode, drop all the crap
	 * that it receives, do not try to analyse it.
	 */
	if (skb->pkt_type == PACKET_OTHERHOST)
		goto drop;


	IP_UPD_PO_STATS_BH(dev_net(dev), IPSTATS_MIB_IN, skb->len);

	if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) {
		IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS);
		goto out;
	}

	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
		goto inhdr_error;

	iph = ip_hdr(skb);

	/*
	 *	RFC1122: 3.2.1.2 MUST silently discard any IP frame that fails the checksum.
	 *
	 *	Is the datagram acceptable?
	 *
	 *	1.	Length at least the size of an ip header
	 *	2.	Version of 4
	 *	3.	Checksums correctly. [Speed optimisation for later, skip loopback checksums]
	 *	4.	Doesn't have a bogus length
	 */

	if (iph->ihl < 5 || iph->version != 4)
		goto inhdr_error;

	if (!pskb_may_pull(skb, iph->ihl*4))
		goto inhdr_error;

	iph = ip_hdr(skb);

	if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
		goto inhdr_error;

	len = ntohs(iph->tot_len);
	if (skb->len < len) {
		IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INTRUNCATEDPKTS);
		goto drop;
	} else if (len < (iph->ihl*4))
		goto inhdr_error;

	/* Our transport medium may have padded the buffer out. Now we know it
	 * is IP we can trim to the true length of the frame.
	 * Note this now means skb->len holds ntohs(iph->tot_len).
	 */
	if (pskb_trim_rcsum(skb, len)) {
		IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS);
		goto drop;
	}

	/* Remove any debris in the socket control block */
	memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));

	/* Must drop socket now because of tproxy. */
	skb_orphan(skb);
    if(iph->protocol ==IPPROTO_ESP)
    {
        struct xfrm_state * x;
        struct ip_esp_hdr *esph=NULL;
        esph=(struct ip_esp_hdr *)(skb->network_header+sizeof(struct iphdr));
        if (x_syfer_cb.syfer_input_state_find)
        {
            x = x_syfer_cb.syfer_input_state_find(esph->spi);
            x->km.state = XFRM_STATE_VALID;     
            if (x){
                //skb->data = skb->data +sizeof(struct iphdr)+sizeof(struct ip_esp_hdr)+8; 

spin_lock(&x->lock);
                printk("Before ESP INBOUND processing \n");
                DUMP_SKB_PTR(skb)
                printk("\n struct iphdr =%d struct ip_esp_hdr=%d \n",sizeof(struct iphdr),sizeof(struct ip_esp_hdr));
#if 1

                skb->sp->xvec[skb->sp->len++] = x;
                skb->data=skb->data+sizeof(struct iphdr);
                skb->len=skb->len-sizeof(struct iphdr);
                //skb->network_header=skb->head+0x2e;
                skb->transport_header=skb->data; 
#endif
#if 0
                    skb->network_header=skb->head+0x2e;
                    skb->data = skb->head +0x42;
                    skb->transport_header=skb->data; 
                    skb->len=372;
#endif
                //skb->len=skb->len-(sizeof(struct iphdr)+sizeof(struct ip_esp_hdr)+8); 
                DUMP_SKB_PTR(skb)
                syfer_esp_input(x, skb);
                printk("AFTER ESP INBOUND processing \n");
                DUMP_SKB_PTR(skb)
            }
        }
        const unsigned char *old_mac;
        old_mac = skb_mac_header(skb);
        //    skb->data = skb_data +sizeof(struct iphdr)+sizeof(struct ip_esp_hdr)+8; 
        skb_set_mac_header(skb, -skb->mac_len);
        //skb->data=skb->mac_header;
        memmove(skb_mac_header(skb), old_mac, skb->mac_len);
        skb_reset_network_header(skb);
       // skb->network_header=skb->data+skb->mac_len;
        printk("\n AFTER xfrm_prepare_input \n");
        //      skb->len=skb->len-(sizeof(struct iphdr)+sizeof(struct ip_esp_hdr)+8); 
        DUMP_SKB_PTR(skb)
            //err=syfer_xfrm4_mode_tunnel_input(x,skb);
            //if(err <0)
            //   printk("\n xfrm_prepare_input Failed \n");
            //  iph = (struct ip_esp_hdr *)(skb->network_header+sizeof(struct iphdr)+sizeof(struct ip_esp_hdr));
            // printk("\nip_rcv Inner saddr=%x daddr=%x \n",iph->saddr, iph->daddr);
#if 0
            skb->dev = dev;
        skb->protocol = htons(ETH_P_IP);
        char dst_mac_addr[] = {0x9C, 0xDF, 0xB2, 0x23, 0x63, 0xD6};

        dev_hard_header(skb, skb->dev, ntohs(skb->protocol),
                &dst_mac_addr, NULL, skb->len);
        dev_queue_xmit(skb);
#endif

        // ip_rcv(skb,dev,pt,orig_dev);
        //return 0;
       nf_reset(skb);
        skb_dst_drop(skb);

// int async =1;
//        x->inner_mode->afinfo->transport_finish(skb, async);
        netif_rx(skb);

//      ip_rcv(skb,dev,pt,orig_dev);
//      ip_rcv_finish(skb);
//spin_unlock(&x->lock);
    }else  

        return NF_HOOK(PF_INET, NF_INET_PRE_ROUTING, skb, dev, NULL,
                ip_rcv_finish);

inhdr_error:
    IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INHDRERRORS);
drop:
    kfree_skb(skb);
out:
    return NET_RX_DROP;
}

[-- Attachment #3: Type: text/plain, Size: 395 bytes --]

------------------------------------------------------------------------------
Live Security Virtual Conference
Exclusive live event will cover all the ways today's security and 
threat landscape has changed and how IT managers can respond. Discussions 
will include endpoint security, mobile security and the latest in malware 
threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/

[-- Attachment #4: Type: text/plain, Size: 0 bytes --]



^ permalink raw reply

* skb_gso_segment oops
From: kendo @ 2012-06-06  2:36 UTC (permalink / raw)
  To: netdev

Hello!

I use Intel 82574L network, open the GSO/SG/RX Checksum/txchecksum support, but the system may occasionally occur caused by skb_gso_segment Oops, this was where do I configure error, kernel Bug is this? (In fact, using the igb also met once)
------------------------------------------
Jun  5 14:03:32 AnShion <12> klogd: [14579.562843] ------------[ cut here ]------------
Jun  5 14:03:32 AnShion <12> klogd: [14579.562859] WARNING: at net/core/dev.c:1861 skb_gso_segment+0x225/0x300()
Jun  5 14:03:32 AnShion <12> klogd: [14579.562869] Hardware name: To Be Filled By O.E.M.
Jun  5 14:03:32 AnShion <12> klogd: [14579.562874] : caps=(0x2000, 0x0) len=40 data_len=0 ip_summed=1
Jun  5 14:03:32 AnShion <12> klogd: [14579.562878] Modules linked in: ipmi_watchdog  igb e1000e
Jun  5 14:03:32 AnShion <12> klogd: [14579.562913] Pid: 0, comm: kworker/0:0 Not tainted 2.6.38.8 #360
Jun  5 14:03:32 AnShion <12> klogd: [14579.562917] Call Trace:
Jun  5 14:03:32 AnShion <12> klogd: [14579.562925]  [<c06a7c25>] ? skb_gso_segment+0x225/0x300
Jun  5 14:03:32 AnShion <12> klogd: [14579.562934]  [<c0145efc>] ? warn_slowpath_common+0x6c/0xa0
Jun  5 14:03:32 AnShion <12> klogd: [14579.562941]  [<c06a7c25>] ? skb_gso_segment+0x225/0x300
Jun  5 14:03:32 AnShion <12> klogd: [14579.562948]  [<c0145fae>] ? warn_slowpath_fmt+0x2e/0x30
Jun  5 14:03:32 AnShion <12> klogd: [14579.562954]  [<c06a7c25>] ? skb_gso_segment+0x225/0x300
Jun  5 14:03:32 AnShion <12> klogd: [14579.562964]  [<c06a7f1f>] ? dev_hard_start_xmit+0x21f/0x5d0
Jun  5 14:03:32 AnShion <12> klogd: [14579.562972]  [<c0742723>] ? ipv4_confirm+0xb3/0x170
Jun  5 14:03:32 AnShion <12> klogd: [14579.562979]  [<c06bdd8d>] ? sch_direct_xmit+0x13d/0x1a0
Jun  5 14:03:32 AnShion <12> klogd: [14579.562986]  [<c06aa6cd>] ? dev_queue_xmit+0x26d/0x470
Jun  5 14:03:32 AnShion <12> klogd: [14579.562993]  [<c06db887>] ? nf_iterate+0x67/0x90
Jun  5 14:03:32 AnShion <12> klogd: [14579.563001]  [<c0705f00>] ? ip_finish_output+0x240/0x2f0
Jun  5 14:03:32 AnShion <12> klogd: [14579.563007]  [<c0705cc0>] ? ip_finish_output+0x0/0x2f0
Jun  5 14:03:33 AnShion <12> klogd: [14579.563014]  [<c0706067>] ? ip_output+0xb7/0xc0
Jun  5 14:03:33 AnShion <12> klogd: [14579.563020]  [<c0705cc0>] ? ip_finish_output+0x0/0x2f0
Jun  5 14:03:33 AnShion <12> klogd: [14579.563028]  [<c0701e96>] ? ip_forward_finish+0x46/0x60
Jun  5 14:03:33 AnShion <12> klogd: [14579.563035]  [<c070209f>] ? ip_forward+0x1ef/0x430
Jun  5 14:03:33 AnShion <12> klogd: [14579.563041]  [<c0701e50>] ? ip_forward_finish+0x0/0x60
Jun  5 14:03:33 AnShion <12> klogd: [14579.563049]  [<c0700751>] ? ip_rcv_finish+0x241/0x3c0
Jun  5 14:03:33 AnShion <12> klogd: [14579.563056]  [<c0700b76>] ? ip_rcv+0x2a6/0x320
Jun  5 14:03:33 AnShion <12> klogd: [14579.563063]  [<c0700510>] ? ip_rcv_finish+0x0/0x3c0
Jun  5 14:03:33 AnShion <12> klogd: [14579.563070]  [<c06a94b8>] ? __netif_receive_skb+0x258/0x520
Jun  5 14:03:33 AnShion <12> klogd: [14579.563076]  [<c0706067>] ? ip_output+0xb7/0xc0
Jun  5 14:03:33 AnShion <12> klogd: [14579.563083]  [<c06a98b3>] ? netif_receive_skb+0x23/0x50
Jun  5 14:03:33 AnShion <12> klogd: [14579.563090]  [<c06a99d7>] ? napi_skb_finish+0x37/0x50
Jun  5 14:03:33 AnShion <12> klogd: [14579.563097]  [<c06aa01b>] ? napi_gro_receive+0xdb/0xf0
Jun  5 14:03:33 AnShion <12> klogd: [14579.563113]  [<f8202c17>] ? e1000_receive_skb+0x47/0x60 [e1000e]
Jun  5 14:03:33 AnShion <12> klogd: [14579.563125]  [<f82040a7>] ? e1000_clean_rx_irq+0x2a7/0x360 [e1000e]
Jun  5 14:03:33 AnShion <12> klogd: [14579.563139]  [<f8208053>] ? e1000e_poll+0x73/0x320 [e1000e]
Jun  5 14:03:33 AnShion <12> klogd: [14579.563146]  [<c06a9e4a>] ? net_rx_action+0xaa/0x1a0
Jun  5 14:03:33 AnShion <12> klogd: [14579.563154]  [<c014bff1>] ? __do_softirq+0xb1/0x190
Jun  5 14:03:33 AnShion <12> klogd: [14579.563161]  [<c014bf40>] ? __do_softirq+0x0/0x190
Jun  5 14:03:33 AnShion <12> klogd: [14579.563165]  <IRQ>  [<c014bebd>] ? irq_exit+0x5d/0x80
Jun  5 14:03:33 AnShion <12> klogd: [14579.563177]  [<c0104bc5>] ? do_IRQ+0x45/0xb0
Jun  5 14:03:33 AnShion <12> klogd: [14579.563183]  [<c014bed5>] ? irq_exit+0x75/0x80
Jun  5 14:03:33 AnShion <12> klogd: [14579.563191]  [<c011c9f6>] ? smp_apic_timer_interrupt+0x56/0x90
Jun  5 14:03:33 AnShion <12> klogd: [14579.563198]  [<c01036f0>] ? common_interrupt+0x30/0x38
Jun  5 14:03:33 AnShion <12> klogd: [14579.563205]  [<c010b0be>] ? mwait_idle+0xfe/0x190
Jun  5 14:03:33 AnShion <12> klogd: [14579.563212]  [<c010214a>] ? cpu_idle+0x8a/0xc0
Jun  5 14:03:33 AnShion <12> klogd: [14579.563219]  [<c0813789>] ? start_secondary+0x1a1/0x1e8
Jun  5 14:03:33 AnShion <12> klogd: [14579.563225] ---[ end trace c07d72448ce8e376 ]---


        kendo
        kendo999@163.com
          2012-06-06

^ permalink raw reply

* Re: [PATCH] netdev: mv643xx_eth: Prevent build on PPC32
From: Josh Boyer @ 2012-06-06  2:38 UTC (permalink / raw)
  To: Ben Hutchings; +Cc: Lennert Buytenhek, Andrew Lunn, Olof Johansson, netdev
In-Reply-To: <1338939005.21665.54.camel@deadeye.wl.decadent.org.uk>

On Wed, Jun 06, 2012 at 12:30:05AM +0100, Ben Hutchings wrote:
> On Tue, 2012-06-05 at 15:28 -0400, Josh Boyer wrote:
> > Commit 452503ebc (ARM: Orion: Eth: Add clk/clkdev support.) added use of
> > the clk driver API which results in compile errors on architectures that
> > don't implement the clk API.
> > 
> > ERROR: "clk_enable" [drivers/net/ethernet/marvell/mv643xx_eth.ko] undefined!
> > ERROR: "clk_disable" [drivers/net/ethernet/marvell/mv643xx_eth.ko] undefined!
> > ERROR: "clk_put" [drivers/net/ethernet/marvell/mv643xx_eth.ko] undefined!
> > ERROR: "clk_get_rate" [drivers/net/ethernet/marvell/mv643xx_eth.ko] undefined!
> > ERROR: "clk_get" [drivers/net/ethernet/marvell/mv643xx_eth.ko] undefined!
> > 
> > Selecting CLKDEV_LOOKUP doesn't fix this either, as the build then fails with:
> > 
> > In file included from drivers/clk/clkdev.c:21:0:
> > include/linux/clkdev.h:15:24: fatal error: asm/clkdev.h: No such file or directory
> > 
> > So we just prevent this from building at all on PPC32.
> [...]
> 
> This dependency was introduced by:
> 
> commit 16b817579fb61050f1abcc0e81089974328a9c27
> Author: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> Date:   Sat Apr 16 15:24:30 2005 -0700
> 
>     [PATCH] ppc32: MV643XX ethernet is an option for Pegasos
> 
> commit 06ede91017d015a03cf8c1c87b3ff668f9a846e0
> Author: Dale Farnsworth <dale@farnsworth.org>
> Date:   Wed Sep 20 12:24:34 2006 -0700
> 
>     [PATCH] mv643xx_eth: restrict to 32-bit PPC_MULTIPLATFORM
> 
> If Pegasos is still supposed to be supported then this needs to be fixed
> properly.

The proper fix, from my minimal looking, was one of:

1) revert the change for ARM that introduced th clk stuff
2) do a similar change as the original commit but with a bunch of
#ifdef-ery
3) implement the clkdev API stuff for 32-bit ppc

Honestly, I'd go for either 1 or 2.  The commit that introduced it was
broken to begin with, but that isn't my call.

josh

^ permalink raw reply

* Re: [PATCH] netdev: mv643xx_eth: Prevent build on PPC32
From: Josh Boyer @ 2012-06-06  2:40 UTC (permalink / raw)
  To: Lennert Buytenhek; +Cc: Andrew Lunn, Olof Johansson, netdev
In-Reply-To: <20120606004907.GG725@wantstofly.org>

On Wed, Jun 06, 2012 at 02:49:07AM +0200, Lennert Buytenhek wrote:
> On Tue, Jun 05, 2012 at 03:28:21PM -0400, Josh Boyer wrote:
> 
> > Commit 452503ebc (ARM: Orion: Eth: Add clk/clkdev support.) added use of
> > the clk driver API which results in compile errors on architectures that
> > don't implement the clk API.
> > 
> > ERROR: "clk_enable" [drivers/net/ethernet/marvell/mv643xx_eth.ko] undefined!
> > ERROR: "clk_disable" [drivers/net/ethernet/marvell/mv643xx_eth.ko] undefined!
> > ERROR: "clk_put" [drivers/net/ethernet/marvell/mv643xx_eth.ko] undefined!
> > ERROR: "clk_get_rate" [drivers/net/ethernet/marvell/mv643xx_eth.ko] undefined!
> > ERROR: "clk_get" [drivers/net/ethernet/marvell/mv643xx_eth.ko] undefined!
> > 
> > Selecting CLKDEV_LOOKUP doesn't fix this either, as the build then fails with:
> > 
> > In file included from drivers/clk/clkdev.c:21:0:
> > include/linux/clkdev.h:15:24: fatal error: asm/clkdev.h: No such file or directory
> > 
> > So we just prevent this from building at all on PPC32.
> 
> If the PPC32 dependency is no longer relevant (e.g. if Pegasos platform
> support was removed from the kernel), then the commit message should
> mention that -- the above reasoning is a poor sole justification for
> this change.

You are correct.  As it stands, it's no better than just breaking it
outright with commit 452503ebc.  I've described the 3 possible solutions
in my other reply.

josh

^ permalink raw reply

* Re: [PATCH] netdev: mv643xx_eth: Prevent build on PPC32
From: Lennert Buytenhek @ 2012-06-06  3:02 UTC (permalink / raw)
  To: Josh Boyer
  Cc: Andrew Lunn, Olof Johansson, Jamie Lentin, Mike Turquette, netdev
In-Reply-To: <20120606024013.GE7683@zod.bos.redhat.com>

On Tue, Jun 05, 2012 at 10:40:14PM -0400, Josh Boyer wrote:

> > > Commit 452503ebc (ARM: Orion: Eth: Add clk/clkdev support.) added use of
> > > the clk driver API which results in compile errors on architectures that
> > > don't implement the clk API.
> > > 
> > > ERROR: "clk_enable" [drivers/net/ethernet/marvell/mv643xx_eth.ko] undefined!
> > > ERROR: "clk_disable" [drivers/net/ethernet/marvell/mv643xx_eth.ko] undefined!
> > > ERROR: "clk_put" [drivers/net/ethernet/marvell/mv643xx_eth.ko] undefined!
> > > ERROR: "clk_get_rate" [drivers/net/ethernet/marvell/mv643xx_eth.ko] undefined!
> > > ERROR: "clk_get" [drivers/net/ethernet/marvell/mv643xx_eth.ko] undefined!
> > > 
> > > Selecting CLKDEV_LOOKUP doesn't fix this either, as the build then fails with:
> > > 
> > > In file included from drivers/clk/clkdev.c:21:0:
> > > include/linux/clkdev.h:15:24: fatal error: asm/clkdev.h: No such file or directory
> > > 
> > > So we just prevent this from building at all on PPC32.
> > 
> > If the PPC32 dependency is no longer relevant (e.g. if Pegasos platform
> > support was removed from the kernel), then the commit message should
> > mention that -- the above reasoning is a poor sole justification for
> > this change.
> 
> You are correct.  As it stands, it's no better than just breaking it
> outright with commit 452503ebc.

ACK.  If I'd have seen that commit ("ARM: Orion: Eth: Add clk/clkdev
support.") come by I would have said something about it, but noone
bothered to CC me on it -- and it doesn't seem that it was CCed to
netdev@ either...?

^ permalink raw reply

* RE: [PATCH RFC] c_can_pci: generic module for c_can on PCI
From: Bhupesh SHARMA @ 2012-06-06  3:50 UTC (permalink / raw)
  To: rubini@gnudd.com, anilkumar@ti.com
  Cc: mkl@pengutronix.de, federico.vaga@gmail.com,
	alan@lxorguk.ukuu.org.uk, wg@grandegger.com, Giancarlo ASNAGHI,
	alan@linux.intel.com, linux-can@vger.kernel.org,
	netdev@vger.kernel.org, linux-kernel@vger.kernel.org
In-Reply-To: <20120605165008.GA21871@mail.gnudd.com>

Hi,

> -----Original Message-----
> From: rubini@gnudd.com [mailto:rubini@gnudd.com]
> Sent: Tuesday, June 05, 2012 10:20 PM
> To: anilkumar@ti.com
> Cc: mkl@pengutronix.de; Bhupesh SHARMA; federico.vaga@gmail.com;
> alan@lxorguk.ukuu.org.uk; wg@grandegger.com; Giancarlo ASNAGHI;
> alan@linux.intel.com; linux-can@vger.kernel.org;
> netdev@vger.kernel.org; linux-kernel@vger.kernel.org
> Subject: Re: [PATCH RFC] c_can_pci: generic module for c_can on PCI
> 
> > I am late to the discussion, is there any specific reason to maintain
> a
> > separate platform file (c_can_pci.c).
> 
> Because it depends on pci and ifdef is bad.
> 
> > I think 90% of the code is copied from c_can_paltform.c, code
> > changes will be less if you merge to existing c_can platform driver.
> 
> Yes, but then we need to ifdef around, which merges two bad files
> into a single but worse file.
> 
> But since the only current user of c_can is the platform device, why
> not merging the platform with the core and having pci just register a
> platform device?  The only problem I see is that we need cooperation,
> because neither me nor federico have a c_can equipped board besides
> the pci one.
> 

I can see examples of where different platform files are present for SJA CAN controller
as well depending on the underlying bus being used: OpenFirmware, ISA, PCI, etc..,
whilst there is a single core file there as well 'sja1000.c'

[1] Kvaser PCI platform driver, using services exposed by sja1000 core:
	http://lxr.linux.no/linux+v3.4.1/drivers/net/can/sja1000/kvaser_pci.c

[2] EMS PCI platform driver, using services exposed by sja1000 core:
	http://lxr.linux.no/linux+v3.4.1/drivers/net/can/sja1000/ems_pci.c

[3] SJA1000 core:
	http://lxr.linux.no/linux+v3.4.1/drivers/net/can/sja1000/sja1000.c

Here each platform driver has its own version of register read/write routine implementation.
The C_CAN approach is similar to that used by SJA1000. Instead of merging the "platform with the core",
I would instead suggest to have two separate platform drivers (for each bus type) and invoke common
routines kept in say another file 'c_can_platform_common.c', thus insuring that there is no code
duplicity and we have a clean hierarchical structure as well. So we can have:
	- Core file, c_can.c
	- Common platform file, c_can_platform_common.c
	- Platform file, c_can_platform.c, c_can_pci.c, etc..

This ensures that nothing breaks at the end of the existing C_CAN users and we have a clean
file structure as well.

Ofcourse, Wolfgang has a better idea of this structure, as he defined the same for SJA1000 and I 
consulted with him on this, while he was reviewing my initial C_CAN patch set. I will let him and Marc
comment further on my proposal. Your comments are also most welcome :)

Regards,
Bhupesh

^ permalink raw reply

* [net 0/3][pull request] Intel Wired LAN Driver Updates
From: Jeff Kirsher @ 2012-06-06  4:08 UTC (permalink / raw)
  To: davem; +Cc: Jeff Kirsher, netdev, gospo, sassmann

This series contains 2 fixes for ixgbe and a fix for e1000e.

The following are changes since commit dc5cd894cace7bda4a743487a9f87d59a3f0a095:
  net/hyperv: Use wait_event on outstanding sends during device removal
and are available in the git repository at:
  git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/net master

Bruce Allan (1):
  e1000e: test for valid check_reset_block function pointer

John Fastabend (2):
  ixgbe: fix_features rxvlan is independent of DCB and needs to be set
  ixgbe: IXGBE_RXD_STAT_VP set even with Rx stripping enabled

 drivers/net/ethernet/intel/e1000e/ethtool.c   |    6 ++++--
 drivers/net/ethernet/intel/e1000e/mac.c       |    2 +-
 drivers/net/ethernet/intel/e1000e/netdev.c    |    4 ++--
 drivers/net/ethernet/intel/e1000e/phy.c       |    8 +++++---
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |   22 ++++++++++------------
 5 files changed, 22 insertions(+), 20 deletions(-)

-- 
1.7.10.2

^ permalink raw reply

* [net 1/3] e1000e: test for valid check_reset_block function pointer
From: Jeff Kirsher @ 2012-06-06  4:08 UTC (permalink / raw)
  To: davem; +Cc: Bruce Allan, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1338955735-8967-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Bruce Allan <bruce.w.allan@intel.com>

commit 44abd5c12767a8c567dc4e45fd9aec3b13ca85e0 introduced NULL pointer
dereferences when attempting to access the check_reset_block function
pointer on 8257x and 80003es2lan non-copper devices.

This fix should be applied back through 3.4.

Signed-off-by: Bruce Allan <bruce.w.allan@intel.com>
Tested-by: Jeff Pieper <jeffrey.e.pieper@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/e1000e/ethtool.c |    6 ++++--
 drivers/net/ethernet/intel/e1000e/mac.c     |    2 +-
 drivers/net/ethernet/intel/e1000e/netdev.c  |    4 ++--
 drivers/net/ethernet/intel/e1000e/phy.c     |    8 +++++---
 4 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/intel/e1000e/ethtool.c b/drivers/net/ethernet/intel/e1000e/ethtool.c
index d863075..905e214 100644
--- a/drivers/net/ethernet/intel/e1000e/ethtool.c
+++ b/drivers/net/ethernet/intel/e1000e/ethtool.c
@@ -258,7 +258,8 @@ static int e1000_set_settings(struct net_device *netdev,
 	 * When SoL/IDER sessions are active, autoneg/speed/duplex
 	 * cannot be changed
 	 */
-	if (hw->phy.ops.check_reset_block(hw)) {
+	if (hw->phy.ops.check_reset_block &&
+	    hw->phy.ops.check_reset_block(hw)) {
 		e_err("Cannot change link characteristics when SoL/IDER is active.\n");
 		return -EINVAL;
 	}
@@ -1615,7 +1616,8 @@ static int e1000_loopback_test(struct e1000_adapter *adapter, u64 *data)
 	 * PHY loopback cannot be performed if SoL/IDER
 	 * sessions are active
 	 */
-	if (hw->phy.ops.check_reset_block(hw)) {
+	if (hw->phy.ops.check_reset_block &&
+	    hw->phy.ops.check_reset_block(hw)) {
 		e_err("Cannot do PHY loopback test when SoL/IDER is active.\n");
 		*data = 0;
 		goto out;
diff --git a/drivers/net/ethernet/intel/e1000e/mac.c b/drivers/net/ethernet/intel/e1000e/mac.c
index 026e8b3..a134399 100644
--- a/drivers/net/ethernet/intel/e1000e/mac.c
+++ b/drivers/net/ethernet/intel/e1000e/mac.c
@@ -709,7 +709,7 @@ s32 e1000e_setup_link_generic(struct e1000_hw *hw)
 	 * In the case of the phy reset being blocked, we already have a link.
 	 * We do not need to set it up again.
 	 */
-	if (hw->phy.ops.check_reset_block(hw))
+	if (hw->phy.ops.check_reset_block && hw->phy.ops.check_reset_block(hw))
 		return 0;
 
 	/*
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index a4b0435..31d37a2 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -6237,7 +6237,7 @@ static int __devinit e1000_probe(struct pci_dev *pdev,
 		adapter->hw.phy.ms_type = e1000_ms_hw_default;
 	}
 
-	if (hw->phy.ops.check_reset_block(hw))
+	if (hw->phy.ops.check_reset_block && hw->phy.ops.check_reset_block(hw))
 		e_info("PHY reset is blocked due to SOL/IDER session.\n");
 
 	/* Set initial default active device features */
@@ -6404,7 +6404,7 @@ err_register:
 	if (!(adapter->flags & FLAG_HAS_AMT))
 		e1000e_release_hw_control(adapter);
 err_eeprom:
-	if (!hw->phy.ops.check_reset_block(hw))
+	if (hw->phy.ops.check_reset_block && !hw->phy.ops.check_reset_block(hw))
 		e1000_phy_hw_reset(&adapter->hw);
 err_hw_init:
 	kfree(adapter->tx_ring);
diff --git a/drivers/net/ethernet/intel/e1000e/phy.c b/drivers/net/ethernet/intel/e1000e/phy.c
index 0334d01..b860d4f 100644
--- a/drivers/net/ethernet/intel/e1000e/phy.c
+++ b/drivers/net/ethernet/intel/e1000e/phy.c
@@ -2155,9 +2155,11 @@ s32 e1000e_phy_hw_reset_generic(struct e1000_hw *hw)
 	s32 ret_val;
 	u32 ctrl;
 
-	ret_val = phy->ops.check_reset_block(hw);
-	if (ret_val)
-		return 0;
+	if (phy->ops.check_reset_block) {
+		ret_val = phy->ops.check_reset_block(hw);
+		if (ret_val)
+			return 0;
+	}
 
 	ret_val = phy->ops.acquire(hw);
 	if (ret_val)
-- 
1.7.10.2

^ permalink raw reply related

* [net 2/3] ixgbe: fix_features rxvlan is independent of DCB and needs to be set
From: Jeff Kirsher @ 2012-06-06  4:08 UTC (permalink / raw)
  To: davem
  Cc: John Fastabend, netdev, gospo, sassmann, Alexander Duyck,
	Jeff Kirsher
In-Reply-To: <1338955735-8967-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: John Fastabend <john.r.fastabend@intel.com>

DCB can be used independent of if RX VLAN stripping is enabled
or disabled so remove erroneous check.

Also enable or disable VLAN stripping when features are applied so
hardware and feature flags are in sync.

CC: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Tested-by: Marcus Dennis <marcusx.e.dennis@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |   15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index bf20457..b8b2087 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -3607,10 +3607,6 @@ static void ixgbe_configure_dcb(struct ixgbe_adapter *adapter)
 	if (hw->mac.type == ixgbe_mac_82598EB)
 		netif_set_gso_max_size(adapter->netdev, 32768);
 
-
-	/* Enable VLAN tag insert/strip */
-	adapter->netdev->features |= NETIF_F_HW_VLAN_RX;
-
 	hw->mac.ops.set_vfta(&adapter->hw, 0, 0, true);
 
 #ifdef IXGBE_FCOE
@@ -6701,11 +6697,6 @@ static netdev_features_t ixgbe_fix_features(struct net_device *netdev,
 {
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 
-#ifdef CONFIG_DCB
-	if (adapter->flags & IXGBE_FLAG_DCB_ENABLED)
-		features &= ~NETIF_F_HW_VLAN_RX;
-#endif
-
 	/* return error if RXHASH is being enabled when RSS is not supported */
 	if (!(adapter->flags & IXGBE_FLAG_RSS_ENABLED))
 		features &= ~NETIF_F_RXHASH;
@@ -6718,7 +6709,6 @@ static netdev_features_t ixgbe_fix_features(struct net_device *netdev,
 	if (!(adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE))
 		features &= ~NETIF_F_LRO;
 
-
 	return features;
 }
 
@@ -6766,6 +6756,11 @@ static int ixgbe_set_features(struct net_device *netdev,
 		need_reset = true;
 	}
 
+	if (features & NETIF_F_HW_VLAN_RX)
+		ixgbe_vlan_strip_enable(adapter);
+	else
+		ixgbe_vlan_strip_disable(adapter);
+
 	if (changed & NETIF_F_RXALL)
 		need_reset = true;
 
-- 
1.7.10.2

^ permalink raw reply related

* [net 3/3] ixgbe: IXGBE_RXD_STAT_VP set even with Rx stripping enabled
From: Jeff Kirsher @ 2012-06-06  4:08 UTC (permalink / raw)
  To: davem; +Cc: John Fastabend, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1338955735-8967-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: John Fastabend <john.r.fastabend@intel.com>

The hardware bit IXGBE_RXD_STAT_VP appears to be set even when Rx
stripping is disabled. This results in passing frames up the stack
which do not have the 802.1Q tag stripped but have the tci bits
set as if it was.

Working around this with a check for the feature flag bit. I
would welcome any better ideas or a pointer to exactly which
bits in the hardware register need to be cleared to get the
IXGBE_RXD_STAT_VP bit to be set per data sheet.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Acked-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Marcus Dennis <marcusx.e.dennis@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |    7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index b8b2087..17ad6a3 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -1390,6 +1390,8 @@ static void ixgbe_process_skb_fields(struct ixgbe_ring *rx_ring,
 				     union ixgbe_adv_rx_desc *rx_desc,
 				     struct sk_buff *skb)
 {
+	struct net_device *dev = rx_ring->netdev;
+
 	ixgbe_update_rsc_stats(rx_ring, skb);
 
 	ixgbe_rx_hash(rx_ring, rx_desc, skb);
@@ -1401,14 +1403,15 @@ static void ixgbe_process_skb_fields(struct ixgbe_ring *rx_ring,
 		ixgbe_ptp_rx_hwtstamp(rx_ring->q_vector, skb);
 #endif
 
-	if (ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_VP)) {
+	if ((dev->features & NETIF_F_HW_VLAN_RX) &&
+	    ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_VP)) {
 		u16 vid = le16_to_cpu(rx_desc->wb.upper.vlan);
 		__vlan_hwaccel_put_tag(skb, vid);
 	}
 
 	skb_record_rx_queue(skb, rx_ring->queue_index);
 
-	skb->protocol = eth_type_trans(skb, rx_ring->netdev);
+	skb->protocol = eth_type_trans(skb, dev);
 }
 
 static void ixgbe_rx_skb(struct ixgbe_q_vector *q_vector,
-- 
1.7.10.2

^ permalink raw reply related

* Re: tg3: transmit timed out, resetting
From: Eric Dumazet @ 2012-06-06  4:12 UTC (permalink / raw)
  To: ethan zhao; +Cc: Matt Carlson, Christian Kujau, LKML, netdev
In-Reply-To: <CABawtvNVKkxAgrQqJS7FJGoswQ9ZVyJcHcNwbSJQk4yscTn8gw@mail.gmail.com>

On Wed, 2012-06-06 at 10:29 +0800, ethan zhao wrote:
> So no way to fix it via firmware update or Linux driver ? :<

Yes, but you need to cooperate, or else it might take more time than
necessary.

Asking questions like that on lkml is not going to help very much.

So, once again, we kindly ask you try a recent kernel and post
register dump and some additional information when transmit timeouts
happen.

The 'latest kernel' is either linux-3.5.rc1, or one of David Miller
tree :

http://git.kernel.org/?p=linux/kernel/git/davem/net-next.git;a=summary

or

http://git.kernel.org/?p=linux/kernel/git/davem/net.git;a=summary

Thanks

^ permalink raw reply

* Re: skb_gso_segment oops
From: Eric Dumazet @ 2012-06-06  4:41 UTC (permalink / raw)
  To: kendo; +Cc: netdev
In-Reply-To: <4FCEC21F.257F91.32344@m12-16.163.com>

On Wed, 2012-06-06 at 10:36 +0800, kendo wrote:
> Hello!
> 
> I use Intel 82574L network, open the GSO/SG/RX Checksum/txchecksum
> support, but the system may occasionally occur caused by
> skb_gso_segment Oops, this was where do I configure error, kernel Bug
> is this? (In fact, using the igb also met once)

New kernels have debugging facility to better diagnose what its going
on. (gso_size, gso_type)

Please upgrade your kernel.

^ permalink raw reply

* Re: tg3: transmit timed out, resetting
From: ethan zhao @ 2012-06-06  4:52 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Matt Carlson, Christian Kujau, LKML, netdev
In-Reply-To: <1338955977.2760.3603.camel@edumazet-glaptop>

Eric,
  That is ask for confirmation from Matt Carlson of Broadcom.

Ethan

On Wed, Jun 6, 2012 at 12:12 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> On Wed, 2012-06-06 at 10:29 +0800, ethan zhao wrote:
>> So no way to fix it via firmware update or Linux driver ? :<
>
> Yes, but you need to cooperate, or else it might take more time than
> necessary.
>
> Asking questions like that on lkml is not going to help very much.
>
> So, once again, we kindly ask you try a recent kernel and post
> register dump and some additional information when transmit timeouts
> happen.
>
> The 'latest kernel' is either linux-3.5.rc1, or one of David Miller
> tree :
>
> http://git.kernel.org/?p=linux/kernel/git/davem/net-next.git;a=summary
>
> or
>
> http://git.kernel.org/?p=linux/kernel/git/davem/net.git;a=summary
>
> Thanks
>
>

^ permalink raw reply

* Re: [net-next RFC PATCH] virtio_net: collect satistics and export through ethtool
From: Jason Wang @ 2012-06-06  5:02 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: netdev, linux-kernel, virtualization
In-Reply-To: <20120605101037.GA19834@redhat.com>

On 06/05/2012 06:10 PM, Michael S. Tsirkin wrote:
> On Tue, Jun 05, 2012 at 04:38:41PM +0800, Jason Wang wrote:
>> Satistics counters is useful for debugging and performance optimization, so this
>> patch lets virtio_net driver collect following and export them to userspace
>> through "ethtool -S":
>>
>> - number of packets sent/received
>> - number of bytes sent/received
>> - number of callbacks for tx/rx
>> - number of kick for tx/rx
>> - number of bytes/packets queued for tx
>>
>> As virtnet_stats were per-cpu, so both per-cpu and gloabl satistics were exposed
>> like:
>>
>> NIC statistics:
>>       tx_bytes[0]: 2551
>>       tx_packets[0]: 12
>>       tx_kick[0]: 12
>>       tx_callbacks[0]: 1
>>       tx_queued_packets[0]: 12
>>       tx_queued_bytes[0]: 3055
>>       rx_bytes[0]: 0
>>       rx_packets[0]: 0
>>       rx_kick[0]: 0
>>       rx_callbacks[0]: 0
>>       tx_bytes[1]: 5575
>>       tx_packets[1]: 37
>>       tx_kick[1]: 38
>>       tx_callbacks[1]: 0
>>       tx_queued_packets[1]: 38
>>       tx_queued_bytes[1]: 5217
>>       rx_bytes[1]: 4175
>>       rx_packets[1]: 25
>>       rx_kick[1]: 1
>>       rx_callbacks[1]: 16
>>       tx_bytes: 8126
>>       tx_packets: 49
>>       tx_kick: 50
>>       tx_callbacks: 1
>>       tx_queued_packets: 50
>>       tx_queued_bytes: 8272
>>       rx_bytes: 4175
>>       rx_packets: 25
>>       rx_kick: 1
>>       rx_callbacks: 16
>>
>> TODO:
>>
>> - more satistics
>> - unitfy the ndo_get_stats64 and get_ethtool_stats
>> - calculate the pending bytes/pkts
>>
>> Signed-off-by: Jason Wang<jasowang@redhat.com>
>> ---
>>   drivers/net/virtio_net.c |  130 +++++++++++++++++++++++++++++++++++++++++++++-
>>   1 files changed, 127 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
>> index 5214b1e..7ab0cc1 100644
>> --- a/drivers/net/virtio_net.c
>> +++ b/drivers/net/virtio_net.c
>> @@ -41,6 +41,10 @@ module_param(gso, bool, 0444);
>>   #define VIRTNET_SEND_COMMAND_SG_MAX    2
>>   #define VIRTNET_DRIVER_VERSION "1.0.0"
>>
>> +#define VIRTNET_STAT_OFF(m) offsetof(struct virtnet_stats, m)
>> +#define VIRTNET_STAT(stat, i) (*((u64 *)((char *)stat + \
> What's going on? Why cast to char *?

It's used to let the pointer advance at the unit of bytes instead of the 
whole stat strcuture.
>> +			       virtnet_stats_str_attr[i].stat_offset)))
> These are confusing unless you see what virtnet_stats_str_attr
> is so please move them near that definition.

ok.
>> +
>>   struct virtnet_stats {
>>   	struct u64_stats_sync syncp;
>>   	u64 tx_bytes;
>> @@ -48,8 +52,33 @@ struct virtnet_stats {
>>
>>   	u64 rx_bytes;
>>   	u64 rx_packets;
>> +
>> +	u64 tx_kick;
>> +	u64 rx_kick;
>> +	u64 tx_callbacks;
>> +	u64 rx_callbacks;
>> +	u64 tx_queued_packets;
>> +	u64 tx_queued_bytes;
>> +};
> I have an idea (not a must): why don't we simply create an enum
> enum virtnet_stats {
> 	VIRTNET_TX_KICK,
> 	VIRTNET_RX_KICK,
> 	....
> 	VIRTNET_MAX_STAT,
> }
>
>
> now stats can just do
> 	stats->data[VIRTNET_RX_KICK] instead of stats->rx_kick
> which is not a big problem, but copying them in bulk
> becomes straight-forward, no need for macros at all.
>
> If we decide to do this, needs to be a separate patch,
> then this one on top.

Make sense, would do this.
>> +
>> +static struct {
> static const.
>
>> +	char string[ETH_GSTRING_LEN];
>> +	int stat_offset;
>> +} virtnet_stats_str_attr[] = {
>> +	{ "tx_bytes", VIRTNET_STAT_OFF(tx_bytes)},
>> +	{ "tx_packets", VIRTNET_STAT_OFF(tx_packets)},
>> +	{ "tx_kick", VIRTNET_STAT_OFF(tx_kick)},
>> +	{ "tx_callbacks", VIRTNET_STAT_OFF(tx_callbacks)},
>> +	{ "tx_queued_packets", VIRTNET_STAT_OFF(tx_queued_packets)},
>> +	{ "tx_queued_bytes", VIRTNET_STAT_OFF(tx_queued_bytes)},
>> +	{ "rx_bytes" , VIRTNET_STAT_OFF(rx_bytes)},
>> +	{ "rx_packets", VIRTNET_STAT_OFF(rx_packets)},
>> +	{ "rx_kick", VIRTNET_STAT_OFF(rx_kick)},
>> +	{ "rx_callbacks", VIRTNET_STAT_OFF(rx_callbacks)},
> VIRTNET_STAT_OFF does not save much here, but if you are after
> saving characters then make the macro instanciate the string
> as well.
>
>>   };
>>
>> +#define VIRTNET_NUM_STATS ARRAY_SIZE(virtnet_stats_str_attr)
>> +
> if you pass virtnet_stats_str_attr to VIRTNET_STAT macro,
> then it's explicit and VIRTNET_NUM_STATS won't be needed either.

It's used to report the number of satistics through .get_sset_count.
>
>>   struct virtnet_info {
>>   	struct virtio_device *vdev;
>>   	struct virtqueue *rvq, *svq, *cvq;
>> @@ -142,6 +171,11 @@ static struct page *get_a_page(struct virtnet_info *vi, gfp_t gfp_mask)
>>   static void skb_xmit_done(struct virtqueue *svq)
>>   {
>>   	struct virtnet_info *vi = svq->vdev->priv;
>> +	struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
>> +
>> +	u64_stats_update_begin(&stats->syncp);
>> +	stats->tx_callbacks++;
>> +	u64_stats_update_end(&stats->syncp);
>>
>>   	/* Suppress further interrupts. */
>>   	virtqueue_disable_cb(svq);
>> @@ -461,6 +495,7 @@ static bool try_fill_recv(struct virtnet_info *vi, gfp_t gfp)
>>   {
>>   	int err;
>>   	bool oom;
>> +	struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
>>
>>   	do {
>>   		if (vi->mergeable_rx_bufs)
>> @@ -477,13 +512,24 @@ static bool try_fill_recv(struct virtnet_info *vi, gfp_t gfp)
>>   	} while (err>  0);
>>   	if (unlikely(vi->num>  vi->max))
>>   		vi->max = vi->num;
>> -	virtqueue_kick(vi->rvq);
>> +	if (virtqueue_kick_prepare(vi->rvq)) {
>> +		virtqueue_notify(vi->rvq);
>> +		u64_stats_update_begin(&stats->syncp);
>> +		stats->rx_kick++;
>> +		u64_stats_update_end(&stats->syncp);
>> +	}
>>   	return !oom;
>>   }
>>
>>   static void skb_recv_done(struct virtqueue *rvq)
>>   {
>>   	struct virtnet_info *vi = rvq->vdev->priv;
>> +	struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
>> +
>> +	u64_stats_update_begin(&stats->syncp);
>> +	stats->rx_callbacks++;
>> +	u64_stats_update_end(&stats->syncp);
>> +
>>   	/* Schedule NAPI, Suppress further interrupts if successful. */
>>   	if (napi_schedule_prep(&vi->napi)) {
>>   		virtqueue_disable_cb(rvq);
>> @@ -626,7 +672,9 @@ static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb)
>>   static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
>>   {
>>   	struct virtnet_info *vi = netdev_priv(dev);
>> +	struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
>>   	int capacity;
>> +	bool kick;
>>
>>   	/* Free up any pending old buffers before queueing new ones. */
>>   	free_old_xmit_skbs(vi);
>> @@ -651,7 +699,17 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
>>   		kfree_skb(skb);
>>   		return NETDEV_TX_OK;
>>   	}
>> -	virtqueue_kick(vi->svq);
>> +
>> +	kick = virtqueue_kick_prepare(vi->svq);
>> +	if (kick)
> probably
> 	 if (unlikely(kick))
>
>> +		virtqueue_notify(vi->svq);
>> +
>> +	u64_stats_update_begin(&stats->syncp);
>> +	if (kick)
> this too
>
>> +		stats->tx_kick++;
>> +	stats->tx_queued_bytes += skb->len;
>> +	stats->tx_queued_packets++;
>> +	u64_stats_update_end(&stats->syncp);
>>
>>   	/* Don't wait up for transmitted skbs to be freed. */
>>   	skb_orphan(skb);
>> @@ -926,7 +984,6 @@ static void virtnet_get_ringparam(struct net_device *dev,
>>
>>   }
>>
>> -
>>   static void virtnet_get_drvinfo(struct net_device *dev,
>>   				struct ethtool_drvinfo *info)
>>   {
>> @@ -939,10 +996,77 @@ static void virtnet_get_drvinfo(struct net_device *dev,
>>
>>   }
>>
>> +static void virtnet_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
>> +{
>> +	int i, cpu;
>> +	switch (stringset) {
>> +	case ETH_SS_STATS:
>> +		for_each_possible_cpu(cpu)
>> +			for (i = 0; i<  VIRTNET_NUM_STATS; i++) {
>> +				sprintf(buf, "%s[%u]",
>> +					virtnet_stats_str_attr[i].string, cpu);
>> +				buf += ETH_GSTRING_LEN;
>> +			}
>> +		for (i = 0; i<  VIRTNET_NUM_STATS; i++) {
>> +			memcpy(buf, virtnet_stats_str_attr[i].string,
>> +				ETH_GSTRING_LEN);
>> +			buf += ETH_GSTRING_LEN;
>> +		}
>> +		break;
>> +	}
>> +}
>> +
>> +static int virtnet_get_sset_count(struct net_device *dev, int sset)
>> +{
>> +	switch (sset) {
>> +	case ETH_SS_STATS:
>> +		return VIRTNET_NUM_STATS * (num_online_cpus() + 1);
> This will allocate buffers for online cpus only, but the above
> will fill them in for all possible cpus.
> Will this overrun some buffer?
>

Yes, a typo here, should be num_possible_cpus().
Thanks
>> +	default:
>> +		return -EOPNOTSUPP;
>> +	}
>> +}
>> +
>> +static void virtnet_get_ethtool_stats(struct net_device *dev,
>> +				struct ethtool_stats *stats, u64 *buf)
> The coding style says
> 	Descendants are always substantially shorter than the parent and
> 	are placed substantially to the right.
>
> you can't call it substantially to the right if it's to the left of
> the opening '('  :), so please indent it aligning on the opening.

Looks like something wrong in my emacs c-style confiugration, would 
check it.
>> +{
>> +	struct virtnet_info *vi = netdev_priv(dev);
>> +	int cpu, i;
>> +	unsigned int start;
>> +	struct virtnet_stats sample, total;
>> +
>> +	memset(&total, 0, sizeof(total));
>> +	memset(&sample, 0, sizeof(sample));
>> +
>> +	for_each_possible_cpu(cpu) {
>> +		struct virtnet_stats *stats = per_cpu_ptr(vi->stats, cpu);
>> +		do {
>> +			start = u64_stats_fetch_begin(&stats->syncp);
>> +			for (i = 0; i<  VIRTNET_NUM_STATS; i++) {
>> +				VIRTNET_STAT(&sample, i) =
>> +					VIRTNET_STAT(stats, i);
> when you feel the need to break lines like this - don't :)
> use an inline function instead.
>
>> +
> kill empty line here
>> +			}
> don't put {} around single statements pls.

Sure
>> +		} while (u64_stats_fetch_retry(&stats->syncp, start));
>> +		for (i = 0; i<  VIRTNET_NUM_STATS; i++) {
>> +			*buf = VIRTNET_STAT(&sample, i);
>> +			VIRTNET_STAT(&total, i) += VIRTNET_STAT(stats, i);
>> +			buf++;
>> +		}
>> +	}
>> +
>> +	for (i = 0; i<  VIRTNET_NUM_STATS; i++) {
>> +		*buf = VIRTNET_STAT(&total, i);
>> +		buf++;
>> +	}
>> +}
>> +
>>   static const struct ethtool_ops virtnet_ethtool_ops = {
>>   	.get_drvinfo = virtnet_get_drvinfo,
>>   	.get_link = ethtool_op_get_link,
>>   	.get_ringparam = virtnet_get_ringparam,
>> +	.get_ethtool_stats = virtnet_get_ethtool_stats,
>> +	.get_strings = virtnet_get_strings,
>> +	.get_sset_count = virtnet_get_sset_count,
>>   };
>>
>>   #define MIN_MTU 68
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: Strange latency spikes/TX network stalls on Sun Fire X4150(x86) and e1000e
From: Eric Dumazet @ 2012-06-06  5:10 UTC (permalink / raw)
  To: Hiroaki SHIMODA
  Cc: Tom Herbert, Denys Fedoryshchenko, netdev, e1000-devel,
	jeffrey.t.kirsher, jesse.brandeburg, davem
In-Reply-To: <20120529232518.e5b41759.shimoda.hiroaki@gmail.com>

On Tue, 2012-05-29 at 23:25 +0900, Hiroaki SHIMODA wrote:

> If I understand the code and spec correctly, TX interrupts are
> generated when TXDCTL.WTHRESH descriptors have been accumulated
> and write backed.
> 
> I tentatively changed the TXDCTL.WTHRESH to 1, then it seems
> that latency spikes are disappear.
> 
> drivers/net/ethernet/intel/e1000e/e1000.h
> @@ -181,7 +181,7 @@ struct e1000_info;
>  #define E1000_TXDCTL_DMA_BURST_ENABLE                          \
>         (E1000_TXDCTL_GRAN | /* set descriptor granularity */  \
>          E1000_TXDCTL_COUNT_DESC |                             \
> -        (5 << 16) | /* wthresh must be +1 more than desired */\
> +        (1 << 16) | /* wthresh must be +1 more than desired */\
>          (1 << 8)  | /* hthresh */                             \
>          0x1f)       /* pthresh */
> 


Was this patch officially submitted ?

Thanks !

^ permalink raw reply

* Re: [PATCH] netdev: mv643xx_eth: Prevent build on PPC32
From: Andrew Lunn @ 2012-06-06  5:29 UTC (permalink / raw)
  To: Josh Boyer
  Cc: Ben Hutchings, Lennert Buytenhek, Andrew Lunn, Olof Johansson,
	netdev
In-Reply-To: <20120606023842.GD7683@zod.bos.redhat.com>

> The proper fix, from my minimal looking, was one of:
> 
> 1) revert the change for ARM that introduced th clk stuff
> 2) do a similar change as the original commit but with a bunch of
> #ifdef-ery
> 3) implement the clkdev API stuff for 32-bit ppc
> 
> Honestly, I'd go for either 1 or 2.  The commit that introduced it was
> broken to begin with, but that isn't my call.

I broke it. Sorry.

At the time, there was a push to remove all the #ifdefs. The following
patchset was doing this:

https://lkml.org/lkml/2012/4/21/94

it would provide dummy implementations for those systems without clk
support. However, it seems that patch set never made it in, and i did
not declare my dependency on it.

I'm happy to add #ifdef. However, i would first like to understand
what was 'broken to begin with'.

     Thanks
	Andrew

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox