Netdev List
 help / color / mirror / Atom feed
* [PATCH] net: IP_NODEFRAG option for IPv4 socket
From: Jiri Olsa @ 2010-06-15  6:53 UTC (permalink / raw)
  To: Jan Engelhardt; +Cc: Patrick McHardy, netdev, Netfilter Developer Mailing List
In-Reply-To: <20100611131038.GB1739@jolsa.Belkin>

hi,

I prepared the patch implementing IP_NODEFRAG option for IPv4 socket.
The reason is, there's no other way to send out the packet with user
customized header of the reassembly part.

wbr,
jirka


Signed-off-by: Jiri Olsa <jolsa@redhat.com>
---
diff --git a/include/linux/in.h b/include/linux/in.h
index 583c76f..41d88a4 100644
--- a/include/linux/in.h
+++ b/include/linux/in.h
@@ -85,6 +85,7 @@ struct in_addr {
 #define IP_RECVORIGDSTADDR   IP_ORIGDSTADDR
 
 #define IP_MINTTL       21
+#define IP_NODEFRAG     22
 
 /* IP_MTU_DISCOVER values */
 #define IP_PMTUDISC_DONT		0	/* Never send DF frames */
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 1653de5..1989cfd 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -137,7 +137,8 @@ struct inet_sock {
 				hdrincl:1,
 				mc_loop:1,
 				transparent:1,
-				mc_all:1;
+				mc_all:1,
+				nodefrag:1;
 	int			mc_index;
 	__be32			mc_addr;
 	struct ip_mc_socklist	*mc_list;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 551ce56..84d2c8e 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -355,6 +355,8 @@ lookup_protocol:
 	inet = inet_sk(sk);
 	inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0;
 
+	inet->nodefrag = 0;
+
 	if (SOCK_RAW == sock->type) {
 		inet->inet_num = protocol;
 		if (IPPROTO_RAW == protocol)
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index ce23178..d8196e1 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -449,7 +449,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 			     (1<<IP_MTU_DISCOVER) | (1<<IP_RECVERR) |
 			     (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND) |
 			     (1<<IP_PASSSEC) | (1<<IP_TRANSPARENT) |
-			     (1<<IP_MINTTL))) ||
+			     (1<<IP_MINTTL) | (1<<IP_NODEFRAG))) ||
 	    optname == IP_MULTICAST_TTL ||
 	    optname == IP_MULTICAST_ALL ||
 	    optname == IP_MULTICAST_LOOP ||
@@ -572,6 +572,13 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 		}
 		inet->hdrincl = val ? 1 : 0;
 		break;
+	case IP_NODEFRAG:
+		if (sk->sk_type != SOCK_RAW) {
+			err = -ENOPROTOOPT;
+			break;
+		}
+		inet->nodefrag = val ? 1 : 0;
+		break;
 	case IP_MTU_DISCOVER:
 		if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_PROBE)
 			goto e_inval;
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index cb763ae..eab8de3 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -66,6 +66,11 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
 					  const struct net_device *out,
 					  int (*okfn)(struct sk_buff *))
 {
+	struct inet_sock *inet = inet_sk(skb->sk);
+
+	if (inet && inet->nodefrag)
+		return NF_ACCEPT;
+
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 #if !defined(CONFIG_NF_NAT) && !defined(CONFIG_NF_NAT_MODULE)
 	/* Previously seen (loopback)?  Ignore.  Do this before

^ permalink raw reply related

* Re: [PATCH] net: IP_NODEFRAG option for IPv4 socket
From: Eric Dumazet @ 2010-06-15  7:13 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: Jan Engelhardt, Patrick McHardy, netdev,
	Netfilter Developer Mailing List
In-Reply-To: <20100615065335.GA8840@jolsa.Belkin>

Le mardi 15 juin 2010 à 08:53 +0200, Jiri Olsa a écrit :
> hi,
> 
> I prepared the patch implementing IP_NODEFRAG option for IPv4 socket.
> The reason is, there's no other way to send out the packet with user
> customized header of the reassembly part.
> 

Obviously, you need to update documentation and man pages as well.

MAN-PAGES: MANUAL PAGES FOR LINUX -- Sections 2, 3, 4, 5, and 7
M:	Michael Kerrisk <mtk.manpages@gmail.com>
W:	http://www.kernel.org/doc/man-pages
L:	linux-man@vger.kernel.org
S:	Maintained


> wbr,
> jirka
> 
> 
> Signed-off-by: Jiri Olsa <jolsa@redhat.com>
> ---
> diff --git a/include/linux/in.h b/include/linux/in.h
> index 583c76f..41d88a4 100644
> --- a/include/linux/in.h
> +++ b/include/linux/in.h
> @@ -85,6 +85,7 @@ struct in_addr {
>  #define IP_RECVORIGDSTADDR   IP_ORIGDSTADDR
>  
>  #define IP_MINTTL       21
> +#define IP_NODEFRAG     22
>  
>  /* IP_MTU_DISCOVER values */
>  #define IP_PMTUDISC_DONT		0	/* Never send DF frames */
> diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
> index 1653de5..1989cfd 100644
> --- a/include/net/inet_sock.h
> +++ b/include/net/inet_sock.h
> @@ -137,7 +137,8 @@ struct inet_sock {
>  				hdrincl:1,
>  				mc_loop:1,
>  				transparent:1,
> -				mc_all:1;
> +				mc_all:1,
> +				nodefrag:1;
>  	int			mc_index;
>  	__be32			mc_addr;
>  	struct ip_mc_socklist	*mc_list;
> diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
> index 551ce56..84d2c8e 100644
> --- a/net/ipv4/af_inet.c
> +++ b/net/ipv4/af_inet.c
> @@ -355,6 +355,8 @@ lookup_protocol:
>  	inet = inet_sk(sk);
>  	inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0;
>  
> +	inet->nodefrag = 0;
> +

Hmm... what about cloning ?

>  	if (SOCK_RAW == sock->type) {
>  		inet->inet_num = protocol;
>  		if (IPPROTO_RAW == protocol)
> diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
> index ce23178..d8196e1 100644
> --- a/net/ipv4/ip_sockglue.c
> +++ b/net/ipv4/ip_sockglue.c
> @@ -449,7 +449,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
>  			     (1<<IP_MTU_DISCOVER) | (1<<IP_RECVERR) |
>  			     (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND) |
>  			     (1<<IP_PASSSEC) | (1<<IP_TRANSPARENT) |
> -			     (1<<IP_MINTTL))) ||
> +			     (1<<IP_MINTTL) | (1<<IP_NODEFRAG))) ||
>  	    optname == IP_MULTICAST_TTL ||
>  	    optname == IP_MULTICAST_ALL ||
>  	    optname == IP_MULTICAST_LOOP ||
> @@ -572,6 +572,13 @@ static int do_ip_setsockopt(struct sock *sk, int level,
>  		}
>  		inet->hdrincl = val ? 1 : 0;
>  		break;
> +	case IP_NODEFRAG:
> +		if (sk->sk_type != SOCK_RAW) {
> +			err = -ENOPROTOOPT;
> +			break;
> +		}
> +		inet->nodefrag = val ? 1 : 0;
> +		break;
>  	case IP_MTU_DISCOVER:
>  		if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_PROBE)
>  			goto e_inval;
> diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
> index cb763ae..eab8de3 100644
> --- a/net/ipv4/netfilter/nf_defrag_ipv4.c
> +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
> @@ -66,6 +66,11 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
>  					  const struct net_device *out,
>  					  int (*okfn)(struct sk_buff *))
>  {
> +	struct inet_sock *inet = inet_sk(skb->sk);
> +
> +	if (inet && inet->nodefrag)
> +		return NF_ACCEPT;
> +
>  #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
>  #if !defined(CONFIG_NF_NAT) && !defined(CONFIG_NF_NAT_MODULE)
>  	/* Previously seen (loopback)?  Ignore.  Do this before
> --
> To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* ftp on udp and tcp .
From: ratheesh k @ 2010-06-15  7:20 UTC (permalink / raw)
  To: netdev, linux-net

Hi ,

       what is real advantage of UDP over TCP / TCP over UDP .  If i
put the question in a different way - if run ftp application on UDP
and TCP , what is the real difference / advantage  ??

Thanks,
Ratheesh

^ permalink raw reply

* Re: [PATCH net-next-2.6] loopback: Implement 64bit stats on 32bit arches
From: Eric Dumazet @ 2010-06-15  7:23 UTC (permalink / raw)
  To: Nick Piggin; +Cc: David Miller, netdev, bhutchings
In-Reply-To: <20100615064923.GF6138@laptop>

Le mardi 15 juin 2010 à 16:49 +1000, Nick Piggin a écrit :
> On Mon, Jun 14, 2010 at 11:14:12PM -0700, David Miller wrote:
> > From: Eric Dumazet <eric.dumazet@gmail.com>
> > Date: Mon, 14 Jun 2010 17:59:22 +0200
> > 
> > > Uses a seqcount_t to synchronize stat producer and consumer, for packets
> > > and bytes counter, now u64 types.
> > > 
> > > (dropped counter being rarely used, stay a native "unsigned long" type)
> > > 
> > > No noticeable performance impact on x86, as it only adds two increments
> > > per frame. It might be more expensive on arches where smp_wmb() is not
> > > free.
> > > 
> > > Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
> > 
> > Applied, but I suspect we might end up eventually needing to
> > abstract this kind of technique in a common place so other
> > spots can use it.
> 

David, I was planning adding similar stuff for SNMP, xt_RATEEST and txq
accounting, so yes, I'll probably move helpers to a common include file.

This first patch was a first shot to introduce the ground.

> Check i_size stuff in include/linux/fs.h if you consider doing this.

Yes, I was aware of this stuff.

> And keep preempt in mind too. I assume you can't be preempted at this
> point, but if you're prone to change the locking, it might be worth
> the (small) cost of doing explicit preempt_disable() (and maybe to
> help the sanity of the -rt guys too).
> 

Yes, we cannot be preempted at this point, as ndo_start_xmit() handlers
are called with BH disabled (there is a comment for this assertion in
front of loopback_xmit())

dev_queue_xmit() 
  rcu_read_lock_bh();
  ...
  ndo_start_xmit();

I'll take care of preempt status for following patches, but I suspect
its more a BH enable/disable question in network stack anyway.

Thanks !



^ permalink raw reply

* Re: [PATCH 1/8] scm: Reorder scm_cookie.
From: Pavel Emelyanov @ 2010-06-15  8:00 UTC (permalink / raw)
  To: Eric W. Biederman, David Miller
  Cc: Serge Hallyn, Linux Containers, Daniel Lezcano, netdev
In-Reply-To: <m1d3vvhxlj.fsf@fess.ebiederm.org>

On 06/13/2010 05:27 PM, Eric W. Biederman wrote:
> 
> Reorder the fields in scm_cookie so they pack better on 64bit.
> 
> Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>

Acked-by: Pavel Emelyanov <xemul@openvz.org>

^ permalink raw reply

* Re: [PATCH 2/8] user_ns: Introduce user_nsmap_uid and user_ns_map_gid.
From: Pavel Emelyanov @ 2010-06-15  8:02 UTC (permalink / raw)
  To: Eric W. Biederman, David Miller
  Cc: Linux Containers, Serge Hallyn, netdev-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <m17hm3hxjw.fsf_-_-+imSwln9KH6u2/kzUuoCbdi2O/JbrIOy@public.gmane.org>

On 06/13/2010 05:28 PM, Eric W. Biederman wrote:
> 
> Define what happens when a we view a uid from one user_namespace
> in another user_namepece.
> 
> - If the user namespaces are the same no mapping is necessary.
> 
> - For most cases of difference use overflowuid and overflowgid,
>   the uid and gid currently used for 16bit apis when we have a 32bit uid
>   that does fit in 16bits.  Effectively the situation is the same,
>   we want to return a uid or gid that is not assigned to any user.
> 
> - For the case when we happen to be mapping the uid or gid of the
>   creator of the target user namespace use uid 0 and gid as confusing
>   that user with root is not a problem.
> 
> Signed-off-by: Eric W. Biederman <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>

I suppose this one should go via Andrew, not Dave.

Anyway, Acked-by: Pavel Emelyanov <xemul-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org>

^ permalink raw reply

* Re: [PATCH 3/8] sock: Introduce cred_to_ucred
From: Pavel Emelyanov @ 2010-06-15  8:03 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: David Miller, Serge Hallyn, Linux Containers, Daniel Lezcano,
	netdev
In-Reply-To: <m139wrhxic.fsf@fess.ebiederm.org>

On 06/13/2010 05:28 PM, Eric W. Biederman wrote:
> 
> To keep the coming code clear and to allow both the sock
> code and the scm code to share the logic introduce a
> fuction to translate from struct cred to struct ucred.
> 
> Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>

Acked-by: Pavel Emelyanov <xemul@openvz.org>

^ permalink raw reply

* Re: [PATCH 4/8] af_unix: Allow SO_PEERCRED to work across namespaces.
From: Pavel Emelyanov @ 2010-06-15  8:04 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: David Miller, Serge Hallyn, Linux Containers, Daniel Lezcano,
	netdev
In-Reply-To: <m1r5kbgivt.fsf@fess.ebiederm.org>

On 06/13/2010 05:30 PM, Eric W. Biederman wrote:
> 
> Use struct pid and struct cred to store the peer credentials on struct
> sock.  This gives enough information to convert the peer credential
> information to a value relative to whatever namespace the socket is in
> at the time.
> 
> This removes nasty surprises when using SO_PEERCRED on socket
> connetions where the processes on either side are in different pid and
> user namespaces.
> 
> Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>

Acked-by: Pavel Emelyanov <xemul@openvz.org>

^ permalink raw reply

* Re: [PATCH 5/8] af_netlink: Add needed scm_destroy after scm_send.
From: Pavel Emelyanov @ 2010-06-15  8:06 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: David Miller, Serge Hallyn, Linux Containers, Daniel Lezcano,
	netdev
In-Reply-To: <m1ljajgiud.fsf@fess.ebiederm.org>

On 06/13/2010 05:31 PM, Eric W. Biederman wrote:
> 
> scm_send occasionally allocates state in the scm_cookie, so I have
> modified netlink_sendmsg to guarantee that when scm_send succeeds
> scm_destory will be called to free that state.
> 
> Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>

Acked-by: Pavel Emelyanov <xemul@openvz.org>

^ permalink raw reply

* Re: [PATCH 6/8] scm: Capture the full credentials of the scm sender.
From: Pavel Emelyanov @ 2010-06-15  8:08 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: David Miller, Serge Hallyn, Linux Containers, Daniel Lezcano,
	netdev
In-Reply-To: <m1d3vvgirx.fsf@fess.ebiederm.org>

> +static __inline__ void scm_destroy_cred(struct scm_cookie *scm)
> +{
> +	put_pid(scm->pid);
> +	scm->pid  = NULL;
> +
> +	if (scm->cred)
> +		put_cred(scm->cred);
> +	scm->cred = NULL;
> +}
> +
>  static __inline__ void scm_destroy(struct scm_cookie *scm)
>  {
> +	scm_destroy_cred(scm);
>  	if (scm && scm->fp)
>  		__scm_destroy(scm);
>  }

I'm a bit worried by the "if (scm" check. It makes me think scm can
be NULL here and thus scm_destroy_cred can oops.

^ permalink raw reply

* Re: [PATCH 7/8] af_unix: Allow credentials to work across user and pid namespaces.
From: Pavel Emelyanov @ 2010-06-15  8:11 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Linux Containers, Serge Hallyn, netdev-u79uwXL29TY76Z2rM5mHXA,
	David Miller
In-Reply-To: <m17hm3giom.fsf-+imSwln9KH6u2/kzUuoCbdi2O/JbrIOy@public.gmane.org>

On 06/13/2010 05:34 PM, Eric W. Biederman wrote:
> 
> In unix_skb_parms store pointers to struct pid and struct cred instead
> of raw uid, gid, and pid values, then translate the credentials on
> reception into values that are meaningful in the receiving processes
> namespaces.
> 
> Signed-off-by: Eric W. Biederman <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>

Acked-by: Pavel Emelyanov <xemul-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org>

^ permalink raw reply

* Re: [PATCH 8/8] af_unix: Allow connecting to sockets in other network namespaces.
From: Pavel Emelyanov @ 2010-06-15  8:12 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Linux Containers, Serge Hallyn, netdev-u79uwXL29TY76Z2rM5mHXA,
	David Miller
In-Reply-To: <m11vcbgimj.fsf-+imSwln9KH6u2/kzUuoCbdi2O/JbrIOy@public.gmane.org>

> [snip]
> 
> Signed-off-by: Eric W. Biederman <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>

Acked-by: Pavel Emelyanov <xemul-GEFAQzZX7r8dnm+yROfE0A@public.gmane.org>

^ permalink raw reply

* Re: [v2 Patch 1/2] s2io: add dynamic LRO disable support
From: Cong Wang @ 2010-06-15  8:26 UTC (permalink / raw)
  To: Ramkrishna Vepa
  Cc: Ben Hutchings, netdev@vger.kernel.org, nhorman@redhat.com,
	sgruszka@redhat.com, herbert.xu@redhat.com, davem@davemloft.net
In-Reply-To: <FCA91A92EE52B041906A0358FC28FCC38EF13E71B8@FRE1EXCH02.hq.exar.com>

On 06/10/10 02:52, Ramkrishna Vepa wrote:
>> On Wed, 2010-06-09 at 06:05 -0400, Amerigo Wang wrote:
>> [...]
>>> +static int s2io_ethtool_set_flags(struct net_device *dev, u32 data)
>>> +{
>>> +   struct s2io_nic *sp = netdev_priv(dev);
>>> +   int rc = 0;
>>> +   int changed = 0;
>>> +
>>> +   if (data&  ETH_FLAG_LRO) {
>>> +           if (lro_enable) {
>>> +                   if (!(dev->features&  NETIF_F_LRO)) {
>>> +                           dev->features |= NETIF_F_LRO;
>>> +                           changed = 1;
>>> +                   }
>>> +           } else
>>> +                   rc = -EINVAL;
>>> +   } else if (dev->features&  NETIF_F_LRO) {
>>> +           dev->features&= ~NETIF_F_LRO;
>>> +           changed = 1;
>>> +   }
>>> +
>>> +   if (changed&&  netif_running(dev)) {
>>> +           s2io_stop_all_tx_queue(sp);
>>> +           s2io_card_down(sp);
>>> +           sp->lro = dev->features&  NETIF_F_LRO;
>>> +           rc = s2io_card_up(sp);
>>> +           s2io_start_all_tx_queue(sp);
>> [...]
>>
>> Is it safe to call s2io_start_all_tx_queue() if s2io_card_up() failed?
> Ben,
> Good point. If s2io_card_up() fails the chip will not be accessed, so it's safe but all transmit skbs will be freed without the user knowing the reason for failing to transmit or receive for that matter.  The other option is to return with a failure and get the watchdog timer reset the adapter.
>

(Sorry for the delay, I was on vacation.)

So it seems the latter option is better?

Thanks.

^ permalink raw reply

* Re: [v2 Patch 2/2] mlx4: add dynamic LRO disable support
From: Cong Wang @ 2010-06-15  8:35 UTC (permalink / raw)
  To: Stanislaw Gruszka
  Cc: David Miller, netdev, nhorman, herbert.xu, bhutchings,
	Ramkrishna.Vepa
In-Reply-To: <20100609110556.GC2599@dhcp-lab-161.englab.brq.redhat.com>

On 06/09/10 19:05, Stanislaw Gruszka wrote:
> On Wed, Jun 09, 2010 at 06:05:34AM -0400, Amerigo Wang wrote:
>> diff --git a/drivers/net/mlx4/en_ethtool.c b/drivers/net/mlx4/en_ethtool.c
>> index d5afd03..2c77805 100644
>> --- a/drivers/net/mlx4/en_ethtool.c
>> +++ b/drivers/net/mlx4/en_ethtool.c
>> @@ -387,6 +387,37 @@ static void mlx4_en_get_ringparam(struct net_device *dev,
>>   	param->tx_pending = mdev->profile.prof[priv->port].tx_ring_size;
>>   }
>>
>> +static int mlx4_ethtool_op_set_flags(struct net_device *dev, u32 data)
>> +{
>> +	struct mlx4_en_priv *priv = netdev_priv(dev);
>> +	struct mlx4_en_dev *mdev = priv->mdev;
>> +	int rc = 0;
>> +	int changed = 0;
>> +
>> +	if (data&  ETH_FLAG_LRO) {
>> +		if (!(dev->features&  NETIF_F_LRO)) {
>> +			dev->features |= NETIF_F_LRO;
>> +			changed = 1;
>> +			mdev->profile.num_lro = min_t(int, num_lro , MLX4_EN_MAX_LRO_DESCRIPTORS);
>
> I do not understand why you override mdev->profile.num_lro in v2 patch.
> If in Rx patch NETIF_F_LRO flag is used we do not need this IMHO.

Oh, I thought you meant this in our previous disccussion. :-/
I will drop this line.

>
>> +		}
>> +	} else if (dev->features&  NETIF_F_LRO) {
>> +		dev->features&= ~NETIF_F_LRO;
>> +		changed = 1;
>> +		mdev->profile.num_lro = 0;
>> +	}
> [snip]
>>   const struct ethtool_ops mlx4_en_ethtool_ops = {
>>   	.get_drvinfo = mlx4_en_get_drvinfo,
>>   	.get_settings = mlx4_en_get_settings,
>> @@ -415,7 +446,7 @@ const struct ethtool_ops mlx4_en_ethtool_ops = {
>>   	.get_ringparam = mlx4_en_get_ringparam,
>>   	.set_ringparam = mlx4_en_set_ringparam,
>>   	.get_flags = ethtool_op_get_flags,
>> -	.set_flags = ethtool_op_set_flags,
>> +	.set_flags = mlx4_ethtool_op_set_flags,
>>   };
>
> Since we modify .set_flags, please assure we return -EOPNOTSUPP
> if someone will try to setup ETH_FLAG_NTUPLE and ETH_FLAG_RXHASH.


Yeah, good point!

>
> BTW: seems default ethtool_op_set_flags introduce a bug on many
> devices regarding ETH_FLAG_RXHASH. I think default should
> be EOPNOTSUPP, and these few devices that actually support RXHASH
> should have custom ethtool_ops->set_flags

Hmm, you mean this?

  if (data & ETH_FLAG_RXHASH)
+    if (!ops->set_flags)
+        return -EOPNOTSUPP;
....


^ permalink raw reply

* [PATCH 2/2] Driver core: reduce duplicated code
From: Uwe Kleine-König @ 2010-06-15  8:47 UTC (permalink / raw)
  To: linux-kernel
  Cc: Greg Kroah-Hartman, Magnus Damm, Rafael J. Wysocki, Paul Mundt,
	Dmitry Torokhov, Uwe Kleine-König, Eric Miao, netdev
In-Reply-To: <1276591677-4678-1-git-send-email-u.kleine-koenig@pengutronix.de>

This makes the two similar functions platform_device_register_simple
and platform_device_register_data one line inline functions using a new
generic function platform_device_register_resndata.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
---
 drivers/base/platform.c         |  102 ++++++++++-----------------------------
 include/linux/platform_device.h |   62 ++++++++++++++++++++++--
 2 files changed, 83 insertions(+), 81 deletions(-)

diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 26eb69d..23a5853 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -344,108 +344,56 @@ void platform_device_unregister(struct platform_device *pdev)
 EXPORT_SYMBOL_GPL(platform_device_unregister);
 
 /**
- * platform_device_register_simple - add a platform-level device and its resources
- * @name: base name of the device we're adding
- * @id: instance id
- * @res: set of resources that needs to be allocated for the device
- * @num: number of resources
- *
- * This function creates a simple platform device that requires minimal
- * resource and memory management. Canned release function freeing memory
- * allocated for the device allows drivers using such devices to be
- * unloaded without waiting for the last reference to the device to be
- * dropped.
+ * platform_device_register_resndata - add a platform-level device with
+ * resources and platform-specific data
  *
- * This interface is primarily intended for use with legacy drivers which
- * probe hardware directly.  Because such drivers create sysfs device nodes
- * themselves, rather than letting system infrastructure handle such device
- * enumeration tasks, they don't fully conform to the Linux driver model.
- * In particular, when such drivers are built as modules, they can't be
- * "hotplugged".
- *
- * Returns &struct platform_device pointer on success, or ERR_PTR() on error.
- */
-struct platform_device *platform_device_register_simple(const char *name,
-							int id,
-							const struct resource *res,
-							unsigned int num)
-{
-	struct platform_device *pdev;
-	int retval;
-
-	pdev = platform_device_alloc(name, id);
-	if (!pdev) {
-		retval = -ENOMEM;
-		goto error;
-	}
-
-	if (num) {
-		retval = platform_device_add_resources(pdev, res, num);
-		if (retval)
-			goto error;
-	}
-
-	retval = platform_device_add(pdev);
-	if (retval)
-		goto error;
-
-	return pdev;
-
-error:
-	platform_device_put(pdev);
-	return ERR_PTR(retval);
-}
-EXPORT_SYMBOL_GPL(platform_device_register_simple);
-
-/**
- * platform_device_register_data - add a platform-level device with platform-specific data
  * @parent: parent device for the device we're adding
  * @name: base name of the device we're adding
  * @id: instance id
+ * @res: set of resources that needs to be allocated for the device
+ * @num: number of resources
  * @data: platform specific data for this platform device
  * @size: size of platform specific data
  *
- * This function creates a simple platform device that requires minimal
- * resource and memory management. Canned release function freeing memory
- * allocated for the device allows drivers using such devices to be
- * unloaded without waiting for the last reference to the device to be
- * dropped.
- *
  * Returns &struct platform_device pointer on success, or ERR_PTR() on error.
  */
-struct platform_device *platform_device_register_data(
+struct platform_device *platform_device_register_resndata(
 		struct device *parent,
 		const char *name, int id,
+		const struct resource *res, unsigned int num,
 		const void *data, size_t size)
 {
+	int ret = -ENOMEM;
 	struct platform_device *pdev;
-	int retval;
 
 	pdev = platform_device_alloc(name, id);
-	if (!pdev) {
-		retval = -ENOMEM;
-		goto error;
-	}
+	if (!pdev)
+		goto err;
 
 	pdev->dev.parent = parent;
 
+	if (num) {
+		ret = platform_device_add_resources(pdev, res, num);
+		if (ret)
+			goto err;
+	}
+
 	if (size) {
-		retval = platform_device_add_data(pdev, data, size);
-		if (retval)
-			goto error;
+		ret = platform_device_add_data(pdev, data, size);
+		if (ret)
+			goto err;
 	}
 
-	retval = platform_device_add(pdev);
-	if (retval)
-		goto error;
+	ret = platform_device_add(pdev);
+	if (ret) {
+err:
+		platform_device_put(pdev);
+		return ERR_PTR(ret);
+	}
 
 	return pdev;
-
-error:
-	platform_device_put(pdev);
-	return ERR_PTR(retval);
 }
-EXPORT_SYMBOL_GPL(platform_device_register_data);
+EXPORT_SYMBOL_GPL(platform_device_register_resndata);
 
 static int platform_drv_probe(struct device *_dev)
 {
diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h
index 5417944..d7ecad0 100644
--- a/include/linux/platform_device.h
+++ b/include/linux/platform_device.h
@@ -43,10 +43,64 @@ extern struct resource *platform_get_resource_byname(struct platform_device *, u
 extern int platform_get_irq_byname(struct platform_device *, const char *);
 extern int platform_add_devices(struct platform_device **, int);
 
-extern struct platform_device *platform_device_register_simple(const char *, int id,
-					const struct resource *, unsigned int);
-extern struct platform_device *platform_device_register_data(struct device *,
-		const char *, int, const void *, size_t);
+extern struct platform_device *platform_device_register_resndata(
+		struct device *parent, const char *name, int id,
+		const struct resource *res, unsigned int num,
+		const void *data, size_t size);
+
+/**
+ * platform_device_register_simple - add a platform-level device and its resources
+ * @name: base name of the device we're adding
+ * @id: instance id
+ * @res: set of resources that needs to be allocated for the device
+ * @num: number of resources
+ *
+ * This function creates a simple platform device that requires minimal
+ * resource and memory management. Canned release function freeing memory
+ * allocated for the device allows drivers using such devices to be
+ * unloaded without waiting for the last reference to the device to be
+ * dropped.
+ *
+ * This interface is primarily intended for use with legacy drivers which
+ * probe hardware directly.  Because such drivers create sysfs device nodes
+ * themselves, rather than letting system infrastructure handle such device
+ * enumeration tasks, they don't fully conform to the Linux driver model.
+ * In particular, when such drivers are built as modules, they can't be
+ * "hotplugged".
+ *
+ * Returns &struct platform_device pointer on success, or ERR_PTR() on error.
+ */
+static inline struct platform_device *platform_device_register_simple(
+		const char *name, int id,
+		const struct resource *res, unsigned int num)
+{
+	return platform_device_register_resndata(NULL, name, id,
+			res, num, NULL, 0);
+}
+
+/**
+ * platform_device_register_data - add a platform-level device with platform-specific data
+ * @parent: parent device for the device we're adding
+ * @name: base name of the device we're adding
+ * @id: instance id
+ * @data: platform specific data for this platform device
+ * @size: size of platform specific data
+ *
+ * This function creates a simple platform device that requires minimal
+ * resource and memory management. Canned release function freeing memory
+ * allocated for the device allows drivers using such devices to be
+ * unloaded without waiting for the last reference to the device to be
+ * dropped.
+ *
+ * Returns &struct platform_device pointer on success, or ERR_PTR() on error.
+ */
+static inline struct platform_device *platform_device_register_data(
+		struct device *parent, const char *name, int id,
+		const void *data, size_t size)
+{
+	return platform_device_register_resndata(parent, name, id,
+			NULL, 0, data, size);
+}
 
 extern struct platform_device *platform_device_alloc(const char *name, int id);
 extern int platform_device_add_resources(struct platform_device *pdev,
-- 
1.7.1

^ permalink raw reply related

* Re: [Patch 2/2] mlx4: add dynamic LRO disable support
From: Cong Wang @ 2010-06-15  8:53 UTC (permalink / raw)
  To: Stanislaw Gruszka; +Cc: Ben Hutchings, netdev, herbert.xu, nhorman, davem
In-Reply-To: <20100609104950.GB2599@dhcp-lab-161.englab.brq.redhat.com>

On 06/09/10 18:49, Stanislaw Gruszka wrote:
> Hi Amerigo
>
> Sorry for being silent in this thread before.
>
> On Wed, Jun 09, 2010 at 05:23:35PM +0800, Cong Wang wrote:
>>>>> Is that flag test actually unsafe - and if so, how is testing num_lro
>>>>> any better?  Perhaps access to net_device::features should be wrapped
>>>>> with ACCESS_ONCE() to ensure that reads and writes are atomic.
>>>>>
>>>>
>>>> At least, I don't find there is any race with 'num_lro', thus
>>>> no lock is needed.
>>>
>>> In both cases there is a race condition but it is harmless so long as
>>> the read and the write are atomic.  There is a general assumption in
>>> networking code that this is the case for int and long.  Personally I
>>> would prefer to see this made explicit using ACCESS_ONCE(), but I don't
>>> see any specific problem in mlx4 (not that I'm familiar with this driver
>>> either).
>>
>> I read this email again.
>>
>> I think you misunderstood the race condition here. Even read and write
>> are atomic here, the race still exists. One can just set NETIF_F_LRO
>> asynchronously right before mlx4 check this flag in mlx4_en_process_rx_cq()
>> which doesn't take rtnl_lock.
>
> If so, it's better to stop device before modify LRO settings. I suggest
> something like that in mlx4_ethtool_op_set_flags:
>
> if (!!(data&  ETH_FLAG_LRO) != !!(dev->features&  NETIF_F_LRO)) {


What does this line mean? This is to ignore all other flags, right?

> 	/* Need to toggle LRO */
>
> 	if (netdev_running(dev)) {
>                 mutex_lock(&mdev->state_lock);
>                 mlx4_en_stop_port(dev);
>                 rc = mlx4_en_start_port(dev);
>                 if (rc)
>                         en_err(priv, "Failed to restart port\n");
> 	}
>
> 	dev->features ^= NETIF_F_LRO;
>
> 	if (netdev_running(dev))
>                 mutex_unlock(&mdev->state_lock);
> }
>

I don't think mdev->state_lock is used to protect dev->feature.
rtnl_lock is. I think switching to mlx4_ethtool_op_set_flags()
from the default one has already solved this.

Thanks!

^ permalink raw reply

* Re: [PATCH 2/2] Driver core: reduce duplicated code
From: Uwe Kleine-König @ 2010-06-15  9:05 UTC (permalink / raw)
  To: linux-kernel
  Cc: Greg Kroah-Hartman, Magnus Damm, Rafael J. Wysocki, Paul Mundt,
	Dmitry Torokhov, Eric Miao, netdev
In-Reply-To: <1276591677-4678-2-git-send-email-u.kleine-koenig@pengutronix.de>

Hello,

On Tue, Jun 15, 2010 at 10:47:56AM +0200, Uwe Kleine-König wrote:
> This makes the two similar functions platform_device_register_simple
> and platform_device_register_data one line inline functions using a new
> generic function platform_device_register_resndata.
I forgot to add some comments to this mail, ... sorry.

 - I'm not completely happy with the name of the new function.  If
   someone has a better name please tell me.
 - can platform_device_register_resndata be moved to __init_or_module?
 - I moved the kernel docs to the header but didn't test if they are
   picked up when generating docs.  Even if not, there is no better
   place, is there?

Best regards
Uwe

-- 
Pengutronix e.K.                           | Uwe Kleine-König            |
Industrial Linux Solutions                 | http://www.pengutronix.de/  |

^ permalink raw reply

* Re: [PATCH] net: IP_NODEFRAG option for IPv4 socket
From: Jiri Olsa @ 2010-06-15  9:18 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Jan Engelhardt, Patrick McHardy, netdev,
	Netfilter Developer Mailing List
In-Reply-To: <1276586029.2541.12.camel@edumazet-laptop>

On Tue, Jun 15, 2010 at 09:13:49AM +0200, Eric Dumazet wrote:
> Le mardi 15 juin 2010 à 08:53 +0200, Jiri Olsa a écrit :
> > hi,
> > 
> > I prepared the patch implementing IP_NODEFRAG option for IPv4 socket.
> > The reason is, there's no other way to send out the packet with user
> > customized header of the reassembly part.
> > 
> 
> Obviously, you need to update documentation and man pages as well.
> 
> MAN-PAGES: MANUAL PAGES FOR LINUX -- Sections 2, 3, 4, 5, and 7
> M:	Michael Kerrisk <mtk.manpages@gmail.com>
> W:	http://www.kernel.org/doc/man-pages
> L:	linux-man@vger.kernel.org
> S:	Maintained
hi,

I updated the man page, and will send it in the new post.

As for the in tree documentation, do you mean any specific doc?
I haven't found any part related to the setsockopt options.. 

> 
> 
> > wbr,
> > jirka
> > 
> > 
> > Signed-off-by: Jiri Olsa <jolsa@redhat.com>
> > ---
> > diff --git a/include/linux/in.h b/include/linux/in.h
> > index 583c76f..41d88a4 100644
> > --- a/include/linux/in.h
> > +++ b/include/linux/in.h
> > @@ -85,6 +85,7 @@ struct in_addr {
> >  #define IP_RECVORIGDSTADDR   IP_ORIGDSTADDR
> >  
> >  #define IP_MINTTL       21
> > +#define IP_NODEFRAG     22
> >  
> >  /* IP_MTU_DISCOVER values */
> >  #define IP_PMTUDISC_DONT		0	/* Never send DF frames */
> > diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
> > index 1653de5..1989cfd 100644
> > --- a/include/net/inet_sock.h
> > +++ b/include/net/inet_sock.h
> > @@ -137,7 +137,8 @@ struct inet_sock {
> >  				hdrincl:1,
> >  				mc_loop:1,
> >  				transparent:1,
> > -				mc_all:1;
> > +				mc_all:1,
> > +				nodefrag:1;
> >  	int			mc_index;
> >  	__be32			mc_addr;
> >  	struct ip_mc_socklist	*mc_list;
> > diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
> > index 551ce56..84d2c8e 100644
> > --- a/net/ipv4/af_inet.c
> > +++ b/net/ipv4/af_inet.c
> > @@ -355,6 +355,8 @@ lookup_protocol:
> >  	inet = inet_sk(sk);
> >  	inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0;
> >  
> > +	inet->nodefrag = 0;
> > +
> 
> Hmm... what about cloning ?

I think as this is the property of the socket (not skb),
it has no affect for cloning


thanks,
jirka

> 
> >  	if (SOCK_RAW == sock->type) {
> >  		inet->inet_num = protocol;
> >  		if (IPPROTO_RAW == protocol)
> > diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
> > index ce23178..d8196e1 100644
> > --- a/net/ipv4/ip_sockglue.c
> > +++ b/net/ipv4/ip_sockglue.c
> > @@ -449,7 +449,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
> >  			     (1<<IP_MTU_DISCOVER) | (1<<IP_RECVERR) |
> >  			     (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND) |
> >  			     (1<<IP_PASSSEC) | (1<<IP_TRANSPARENT) |
> > -			     (1<<IP_MINTTL))) ||
> > +			     (1<<IP_MINTTL) | (1<<IP_NODEFRAG))) ||
> >  	    optname == IP_MULTICAST_TTL ||
> >  	    optname == IP_MULTICAST_ALL ||
> >  	    optname == IP_MULTICAST_LOOP ||
> > @@ -572,6 +572,13 @@ static int do_ip_setsockopt(struct sock *sk, int level,
> >  		}
> >  		inet->hdrincl = val ? 1 : 0;
> >  		break;
> > +	case IP_NODEFRAG:
> > +		if (sk->sk_type != SOCK_RAW) {
> > +			err = -ENOPROTOOPT;
> > +			break;
> > +		}
> > +		inet->nodefrag = val ? 1 : 0;
> > +		break;
> >  	case IP_MTU_DISCOVER:
> >  		if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_PROBE)
> >  			goto e_inval;
> > diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
> > index cb763ae..eab8de3 100644
> > --- a/net/ipv4/netfilter/nf_defrag_ipv4.c
> > +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
> > @@ -66,6 +66,11 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
> >  					  const struct net_device *out,
> >  					  int (*okfn)(struct sk_buff *))
> >  {
> > +	struct inet_sock *inet = inet_sk(skb->sk);
> > +
> > +	if (inet && inet->nodefrag)
> > +		return NF_ACCEPT;
> > +
> >  #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
> >  #if !defined(CONFIG_NF_NAT) && !defined(CONFIG_NF_NAT_MODULE)
> >  	/* Previously seen (loopback)?  Ignore.  Do this before
> > --
> > To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
> 
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* [PATCH v5] netfilter: Xtables: idletimer target implementation
From: Luciano Coelho @ 2010-06-15  9:21 UTC (permalink / raw)
  To: netfilter-devel; +Cc: netdev, kaber, jengelh, Timo Teras

This patch implements an idletimer Xtables target that can be used to
identify when interfaces have been idle for a certain period of time.

Timers are identified by labels and are created when a rule is set with a new
label.  The rules also take a timeout value (in seconds) as an option.  If
more than one rule uses the same timer label, the timer will be restarted
whenever any of the rules get a hit.

One entry for each timer is created in sysfs.  This attribute contains the
timer remaining for the timer to expire.  The attributes are located under
the xt_idletimer class:

/sys/class/xt_idletimer/timers/<label>

When the timer expires, the target module sends a sysfs notification to the
userspace, which can then decide what to do (eg. disconnect to save power).

Cc: Timo Teras <timo.teras@iki.fi>
Signed-off-by: Luciano Coelho <luciano.coelho@nokia.com>
---
v2: Fixed according to Jan's comments
v3: Changed to a device class in the virtual bus in sysfs
    Removed unnecessary attribute group
    Fixed missing deallocation in some error cases
v4: Fixed according to Jan's and Patrick's comments to v3
    Changed to mutex locking instead of spin locks
    Save the timer in the target info struct to avoid extra reads
    Other small clean-ups
v5: Changed max label length to 28 to avoid waste in 64-bit archs
    Changed unnecessary atomic memory allocation
    Removed mutex locks from the target function

 include/linux/netfilter/xt_IDLETIMER.h |   45 +++++
 net/netfilter/Kconfig                  |   12 ++
 net/netfilter/Makefile                 |    1 +
 net/netfilter/xt_IDLETIMER.c           |  318 ++++++++++++++++++++++++++++++++
 4 files changed, 376 insertions(+), 0 deletions(-)
 create mode 100644 include/linux/netfilter/xt_IDLETIMER.h
 create mode 100644 net/netfilter/xt_IDLETIMER.c

diff --git a/include/linux/netfilter/xt_IDLETIMER.h b/include/linux/netfilter/xt_IDLETIMER.h
new file mode 100644
index 0000000..3e1aa1b
--- /dev/null
+++ b/include/linux/netfilter/xt_IDLETIMER.h
@@ -0,0 +1,45 @@
+/*
+ * linux/include/linux/netfilter/xt_IDLETIMER.h
+ *
+ * Header file for Xtables timer target module.
+ *
+ * Copyright (C) 2004, 2010 Nokia Corporation
+ * Written by Timo Teras <ext-timo.teras@nokia.com>
+ *
+ * Converted to x_tables and forward-ported to 2.6.34
+ * by Luciano Coelho <luciano.coelho@nokia.com>
+ *
+ * Contact: Luciano Coelho <luciano.coelho@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+
+#ifndef _XT_IDLETIMER_H
+#define _XT_IDLETIMER_H
+
+#include <linux/types.h>
+
+#define MAX_IDLETIMER_LABEL_SIZE 28
+
+struct idletimer_tg_info {
+	__u32 timeout;
+
+	char label[MAX_IDLETIMER_LABEL_SIZE];
+
+	/* for kernel module internal use only */
+	struct idletimer_tg *timer __attribute((aligned(8)));
+};
+
+#endif
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 8593a77..413ed24 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -424,6 +424,18 @@ config NETFILTER_XT_TARGET_HL
 	since you can easily create immortal packets that loop
 	forever on the network.
 
+config NETFILTER_XT_TARGET_IDLETIMER
+	tristate  "IDLETIMER target support"
+	depends on NETFILTER_ADVANCED
+	help
+
+	  This option adds the `IDLETIMER' target.  Each matching packet
+	  resets the timer associated with label specified when the rule is
+	  added.  When the timer expires, it triggers a sysfs notification.
+	  The remaining time for expiration can be read via sysfs.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 config NETFILTER_XT_TARGET_LED
 	tristate '"LED" target support'
 	depends on LEDS_CLASS && LEDS_TRIGGERS
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 14e3a8f..e28420a 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -61,6 +61,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_TCPMSS) += xt_TCPMSS.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP) += xt_TCPOPTSTRIP.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_TEE) += xt_TEE.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_TRACE) += xt_TRACE.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_IDLETIMER) += xt_IDLETIMER.o
 
 # matches
 obj-$(CONFIG_NETFILTER_XT_MATCH_CLUSTER) += xt_cluster.o
diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c
new file mode 100644
index 0000000..de67154
--- /dev/null
+++ b/net/netfilter/xt_IDLETIMER.c
@@ -0,0 +1,318 @@
+/*
+ * linux/net/netfilter/xt_IDLETIMER.c
+ *
+ * Netfilter module to trigger a timer when packet matches.
+ * After timer expires a kevent will be sent.
+ *
+ * Copyright (C) 2004, 2010 Nokia Corporation
+ * Written by Timo Teras <ext-timo.teras@nokia.com>
+ *
+ * Converted to x_tables and reworked for upstream inclusion
+ * by Luciano Coelho <luciano.coelho@nokia.com>
+ *
+ * Contact: Luciano Coelho <luciano.coelho@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/timer.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_IDLETIMER.h>
+#include <linux/kobject.h>
+#include <linux/workqueue.h>
+#include <linux/sysfs.h>
+
+struct idletimer_tg_attr {
+	struct attribute attr;
+	ssize_t	(*show)(struct kobject *kobj,
+			struct attribute *attr, char *buf);
+};
+
+struct idletimer_tg {
+	struct list_head entry;
+	struct timer_list timer;
+	struct work_struct work;
+
+	struct kobject *kobj;
+	struct idletimer_tg_attr attr;
+
+	unsigned int refcnt;
+};
+
+static LIST_HEAD(idletimer_tg_list);
+static DEFINE_MUTEX(list_mutex);
+
+static struct kobject *idletimer_tg_kobj;
+
+static
+struct idletimer_tg *__idletimer_tg_find_by_label(const char *label)
+{
+	struct idletimer_tg *entry;
+
+	BUG_ON(!label);
+
+	list_for_each_entry(entry, &idletimer_tg_list, entry) {
+		if (!strcmp(label, entry->attr.attr.name))
+			return entry;
+	}
+
+	return NULL;
+}
+
+static ssize_t idletimer_tg_show(struct kobject *kobj, struct attribute *attr,
+				 char *buf)
+{
+	struct idletimer_tg *timer;
+	unsigned long expires = 0;
+
+	mutex_lock(&list_mutex);
+
+	timer =	__idletimer_tg_find_by_label(attr->name);
+	if (timer)
+		expires = timer->timer.expires;
+
+	mutex_unlock(&list_mutex);
+
+	if (time_after(expires, jiffies))
+		return sprintf(buf, "%u\n",
+			       jiffies_to_msecs(expires - jiffies) / 1000);
+
+	return sprintf(buf, "0\n");
+}
+
+static void idletimer_tg_work(struct work_struct *work)
+{
+	struct idletimer_tg *timer = container_of(work, struct idletimer_tg,
+						  work);
+
+	sysfs_notify(idletimer_tg_kobj, NULL, timer->attr.attr.name);
+}
+
+static void idletimer_tg_expired(unsigned long data)
+{
+	struct idletimer_tg *timer = (struct idletimer_tg *) data;
+
+	pr_debug("timer %s expired\n", timer->attr.attr.name);
+
+	schedule_work(&timer->work);
+}
+
+static int idletimer_tg_create(struct idletimer_tg_info *info)
+{
+	int ret;
+
+	info->timer = kmalloc(sizeof(*info->timer), GFP_KERNEL);
+	if (!info->timer) {
+		pr_debug("couldn't alloc timer\n");
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	info->timer->attr.attr.name = kstrdup(info->label, GFP_KERNEL);
+	if (!info->timer->attr.attr.name) {
+		pr_debug("couldn't alloc attribute name\n");
+		ret = -ENOMEM;
+		goto out_free_timer;
+	}
+	info->timer->attr.attr.mode = S_IRUGO;
+	info->timer->attr.show = idletimer_tg_show;
+
+	ret = sysfs_create_file(idletimer_tg_kobj, &info->timer->attr.attr);
+	if (ret < 0) {
+		pr_debug("couldn't add file to sysfs");
+		goto out_free_attr;
+	}
+
+	list_add(&info->timer->entry, &idletimer_tg_list);
+
+	setup_timer(&info->timer->timer, idletimer_tg_expired,
+		    (unsigned long) info->timer);
+	info->timer->refcnt = 1;
+
+	mod_timer(&info->timer->timer,
+		  msecs_to_jiffies(info->timeout * 1000) + jiffies);
+
+	INIT_WORK(&info->timer->work, idletimer_tg_work);
+
+	return 0;
+
+out_free_attr:
+	kfree(info->timer->attr.attr.name);
+out_free_timer:
+	kfree(info->timer);
+out:
+	return ret;
+}
+
+/*
+ * The actual xt_tables plugin.
+ */
+static unsigned int idletimer_tg_target(struct sk_buff *skb,
+					 const struct xt_action_param *par)
+{
+	const struct idletimer_tg_info *info = par->targinfo;
+
+	pr_debug("resetting timer %s, timeout period %u\n",
+		 info->label, info->timeout);
+
+	BUG_ON(!info->timer);
+
+	mod_timer(&info->timer->timer,
+		  msecs_to_jiffies(info->timeout * 1000) + jiffies);
+
+	return XT_CONTINUE;
+}
+
+static int idletimer_tg_checkentry(struct xt_tgchk_param *par)
+{
+	struct idletimer_tg_info *info = par->targinfo;
+	int ret;
+
+	pr_debug("checkentry targinfo%s\n", info->label);
+
+	if (info->timeout == 0) {
+		pr_debug("timeout value is zero\n");
+		return -EINVAL;
+	}
+
+	if (info->label[0] == '\0' ||
+	    strnlen(info->label,
+		    MAX_IDLETIMER_LABEL_SIZE) == MAX_IDLETIMER_LABEL_SIZE) {
+		pr_debug("label is empty or not nul-terminated\n");
+		return -EINVAL;
+	}
+
+	mutex_lock(&list_mutex);
+
+	info->timer = __idletimer_tg_find_by_label(info->label);
+	if (info->timer) {
+		info->timer->refcnt++;
+		mod_timer(&info->timer->timer,
+			  msecs_to_jiffies(info->timeout * 1000) + jiffies);
+
+		pr_debug("increased refcnt of timer %s to %u\n",
+			 info->label, info->timer->refcnt);
+	} else {
+		ret = idletimer_tg_create(info);
+		if (ret < 0) {
+			pr_debug("failed to create timer\n");
+			mutex_unlock(&list_mutex);
+			return ret;
+		}
+	}
+
+	mutex_unlock(&list_mutex);
+	return 0;
+}
+
+static void idletimer_tg_destroy(const struct xt_tgdtor_param *par)
+{
+	const struct idletimer_tg_info *info = par->targinfo;
+
+	pr_debug("destroy targinfo %s\n", info->label);
+
+	mutex_lock(&list_mutex);
+	if (!info->timer) {
+		mutex_unlock(&list_mutex);
+		return;
+	}
+
+	if (--info->timer->refcnt == 0) {
+		pr_debug("deleting timer %s\n", info->label);
+
+		list_del(&info->timer->entry);
+		del_timer_sync(&info->timer->timer);
+		sysfs_remove_file(idletimer_tg_kobj, &info->timer->attr.attr);
+		kfree(info->timer->attr.attr.name);
+		kfree(info->timer);
+	} else {
+		pr_debug("decreased refcnt of timer %s to %u\n",
+			 info->label, info->timer->refcnt);
+	}
+
+	mutex_unlock(&list_mutex);
+}
+
+static struct xt_target idletimer_tg __read_mostly = {
+	.name		= "IDLETIMER",
+	.family		= NFPROTO_UNSPEC,
+	.target		= idletimer_tg_target,
+	.targetsize     = sizeof(struct idletimer_tg_info),
+	.checkentry	= idletimer_tg_checkentry,
+	.destroy        = idletimer_tg_destroy,
+	.me		= THIS_MODULE,
+};
+
+static struct class *idletimer_tg_class;
+
+static struct device *idletimer_tg_device;
+
+static int __init idletimer_tg_init(void)
+{
+	int err;
+
+	idletimer_tg_class = class_create(THIS_MODULE, "xt_idletimer");
+	err = PTR_ERR(idletimer_tg_class);
+	if (IS_ERR(idletimer_tg_class)) {
+		pr_debug("couldn't register device class\n");
+		goto out;
+	}
+
+	idletimer_tg_device = device_create(idletimer_tg_class, NULL,
+					    MKDEV(0, 0), NULL, "timers");
+	err = PTR_ERR(idletimer_tg_device);
+	if (IS_ERR(idletimer_tg_device)) {
+		pr_debug("couldn't register system device\n");
+		goto out_class;
+	}
+
+	idletimer_tg_kobj = &idletimer_tg_device->kobj;
+
+	err =  xt_register_target(&idletimer_tg);
+	if (err < 0) {
+		pr_debug("couldn't register xt target\n");
+		goto out_dev;
+	}
+
+	return 0;
+out_dev:
+	device_destroy(idletimer_tg_class, MKDEV(0, 0));
+out_class:
+	class_destroy(idletimer_tg_class);
+out:
+	return err;
+}
+
+static void __exit idletimer_tg_exit(void)
+{
+	xt_unregister_target(&idletimer_tg);
+
+	device_destroy(idletimer_tg_class, MKDEV(0, 0));
+	class_destroy(idletimer_tg_class);
+}
+
+module_init(idletimer_tg_init);
+module_exit(idletimer_tg_exit);
+
+MODULE_AUTHOR("Timo Teras <ext-timo.teras@nokia.com>");
+MODULE_AUTHOR("Luciano Coelho <luciano.coelho@nokia.com>");
+MODULE_DESCRIPTION("Xtables: idle time monitor");
+MODULE_LICENSE("GPL v2");
-- 
1.6.3.3


^ permalink raw reply related

* Re: [Patch 2/2] mlx4: add dynamic LRO disable support
From: Stanislaw Gruszka @ 2010-06-15  9:39 UTC (permalink / raw)
  To: Cong Wang; +Cc: Ben Hutchings, netdev, herbert.xu, nhorman, davem
In-Reply-To: <4C173F87.7000704@redhat.com>

On Tue, 15 Jun 2010 16:53:27 +0800
Cong Wang <amwang@redhat.com> wrote:

> > If so, it's better to stop device before modify LRO settings. I suggest
> > something like that in mlx4_ethtool_op_set_flags:
> >
> > if (!!(data&  ETH_FLAG_LRO) != !!(dev->features&  NETIF_F_LRO)) {
>
> What does this line mean? This is to ignore all other flags, right?

Yes, plus check if we are really changing current settings.
 
> > 	/* Need to toggle LRO */
> >
> > 	if (netdev_running(dev)) {
> >                 mutex_lock(&mdev->state_lock);
> >                 mlx4_en_stop_port(dev);
> >                 rc = mlx4_en_start_port(dev);
> >                 if (rc)
> >                         en_err(priv, "Failed to restart port\n");
> > 	}
> >
> > 	dev->features ^= NETIF_F_LRO;
> >
> > 	if (netdev_running(dev))
> >                 mutex_unlock(&mdev->state_lock);
> > }
> >
> 
> I don't think mdev->state_lock is used to protect dev->feature.
> rtnl_lock is. I think switching to mlx4_ethtool_op_set_flags()
> from the default one has already solved this.

Ahh, you have right, may intention was use it to stop and start
port. Code rather should look like below:

	if (netdev_running(dev)) {
		mutex_lock(&mdev->state_lock);
		mlx4_en_stop_port(dev);
	}

	dev->features ^= NETIF_F_LRO;

	if (netdev_running(dev)) {
		rc = mlx4_en_start_port(dev);
		mutex_unlock(&mdev->state_lock);
 		if (rc)
		en_err(priv, "Failed to restart port\n");
	}

Stanislaw

^ permalink raw reply

* Re: [PATCH] net: IP_NODEFRAG option for IPv4 socket
From: Eric Dumazet @ 2010-06-15  9:49 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: Jan Engelhardt, Patrick McHardy, netdev,
	Netfilter Developer Mailing List
In-Reply-To: <20100615091715.GB8840@jolsa.Belkin>

Le mardi 15 juin 2010 à 11:18 +0200, Jiri Olsa a écrit :

> > Hmm... what about cloning ?
> 
> I think as this is the property of the socket (not skb),
> it has no affect for cloning
> 

Sorry, I was thinking of sk_clone(). I sometime forgets sock_copy() copy
all fields.

This is a non issue for raw sockets, if IP_NODEFRAG is limited to RAW
sockets now and in the future.





--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH 6/8] scm: Capture the full credentials of the scm sender.
From: Eric W. Biederman @ 2010-06-15  9:53 UTC (permalink / raw)
  To: Pavel Emelyanov
  Cc: David Miller, Serge Hallyn, Linux Containers, Daniel Lezcano,
	netdev
In-Reply-To: <4C173510.4030007@openvz.org>

Pavel Emelyanov <xemul@openvz.org> writes:

>> +static __inline__ void scm_destroy_cred(struct scm_cookie *scm)
>> +{
>> +	put_pid(scm->pid);
>> +	scm->pid  = NULL;
>> +
>> +	if (scm->cred)
>> +		put_cred(scm->cred);
>> +	scm->cred = NULL;
>> +}
>> +
>>  static __inline__ void scm_destroy(struct scm_cookie *scm)
>>  {
>> +	scm_destroy_cred(scm);
>>  	if (scm && scm->fp)
>>  		__scm_destroy(scm);
>>  }
>
> I'm a bit worried by the "if (scm" check. It makes me think scm can
> be NULL here and thus scm_destroy_cred can oops.

Interesting point.  I just looked at all of the callers and scm
appears to always be valid.

Eric


^ permalink raw reply

* Re: [0/8] netpoll/bridge fixes
From: Cong Wang @ 2010-06-15 10:17 UTC (permalink / raw)
  To: Herbert Xu
  Cc: Stephen Hemminger, Michael S. Tsirkin, Qianfeng Zhang,
	David S. Miller, netdev, Matt Mackall
In-Reply-To: <20100611021142.GA24490@gondor.apana.org.au>

On 06/11/10 10:11, Herbert Xu wrote:
> On Fri, Jun 11, 2010 at 08:48:39AM +1000, Herbert Xu wrote:
>> On Thu, Jun 10, 2010 at 02:59:15PM -0700, Stephen Hemminger wrote:
>>>
>>> Okay, then add a comment where in_irq is used?
>>
>> Actually let me put it into a wrapper.  I'll respin the patches.
>
> OK here is a repost.  And this time it really is 8 patches :)
> I've tested it lightly.
>

(Sorry for the delay, I was on vacation.)

Thanks much for your nice work!

Patch 1-7 look great for me, so,

Reviewed-by: WANG Cong <amwang@redhat.com>

I don't quite understand patch 8, will reply to it separately.

Thank you.


^ permalink raw reply

* Re: [v2 Patch 2/2] mlx4: add dynamic LRO disable support
From: Stanislaw Gruszka @ 2010-06-15 10:14 UTC (permalink / raw)
  To: Cong Wang
  Cc: David Miller, netdev, nhorman, herbert.xu, bhutchings,
	Ramkrishna.Vepa
In-Reply-To: <4C173B57.70601@redhat.com>

On Tue, 15 Jun 2010 16:35:35 +0800
Cong Wang <amwang@redhat.com> wrote:
> > BTW: seems default ethtool_op_set_flags introduce a bug on many
> > devices regarding ETH_FLAG_RXHASH. I think default should
> > be EOPNOTSUPP, and these few devices that actually support RXHASH
> > should have custom ethtool_ops->set_flags
> 
> Hmm, you mean this?
> 
>   if (data & ETH_FLAG_RXHASH)
> +    if (!ops->set_flags)
> +        return -EOPNOTSUPP;
> ....

Not really, but I do not have good idea how patch with fix should
looks.

I dislike fact that we setup ->feature that are not in real supported by
particular device instead of returning EOPNOTSUPP. This actually include
both flags NETIF_F_LRO and NETIF_F_RXHASH. 

Perhaps ethtool_op_set_flags should be removed and drivers should use
only custom version. In particular seems e1000e and sfc use this
function improperly and should have NULL as .set_flags.

I will think more about that and maybe cook some patches.

Stanislaw

^ permalink raw reply

* [PATCH net-next-2.6] net: Introduce u64_stats_sync infrastructure
From: Eric Dumazet @ 2010-06-15 10:14 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, bhutchings, Nick Piggin
In-Reply-To: <20100614.231412.39191304.davem@davemloft.net>

Le lundi 14 juin 2010 à 23:14 -0700, David Miller a écrit :
> From: Eric Dumazet <eric.dumazet@gmail.com>
> Date: Mon, 14 Jun 2010 17:59:22 +0200
> 
> > Uses a seqcount_t to synchronize stat producer and consumer, for packets
> > and bytes counter, now u64 types.
> > 
> > (dropped counter being rarely used, stay a native "unsigned long" type)
> > 
> > No noticeable performance impact on x86, as it only adds two increments
> > per frame. It might be more expensive on arches where smp_wmb() is not
> > free.
> > 
> > Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
> 
> Applied, but I suspect we might end up eventually needing to
> abstract this kind of technique in a common place so other
> spots can use it.

Here is the followup patch to abstract things a bit, before upcoming
conversions.

Thanks !

[PATCH net-next-2.6] net: Introduce u64_stats_sync infrastructure

To properly implement 64bits network statistics on 32bit or 64bit hosts,
we provide one new type and four methods, to ease conversions.

Stats producer should use following template granted it already got an
exclusive access to counters (a previous lock is taken, or per cpu
data [used in a non preemptable context])

Let me repeat : stats producers must be serialized by other means before
using this template. Preemption must be disabled too.

u64_stats_update_begin(&stats->syncp);
stats->bytes += len;
stats->packets++;
u64_stats_update_end(&stats->syncp);

While a consumer should use following template to get consistent
snapshot :

u64 tbytes, tpackets;
unsigned int start;

do {
	start = u64_stats_fetch_begin(&stats->syncp);
	tbytes = stats->bytes;
	tpackets = stats->packets;
} while (u64_stats_fetch_retry(&stats->lock, syncp));

This patch uses this infrastructure in net loopback driver, instead of
specific one added in commit 6b10de38f0ef (loopback: Implement 64bit
stats on 32bit arches)

Suggested by David Miller

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Nick Piggin <npiggin@suse.de>
---
 drivers/net/loopback.c    |   61 ++++++++----------------------------
 include/linux/netdevice.h |   50 +++++++++++++++++++++++++++++
 2 files changed, 65 insertions(+), 46 deletions(-)

diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index 09334f8..f20b156 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -60,51 +60,12 @@
 #include <net/net_namespace.h>
 
 struct pcpu_lstats {
-	u64 packets;
-	u64 bytes;
-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
-	seqcount_t seq;
-#endif
-	unsigned long drops;
+	u64			packets;
+	u64			bytes;
+	struct u64_stats_sync	syncp;
+	unsigned long		drops;
 };
 
-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
-static void inline lstats_update_begin(struct pcpu_lstats *lstats)
-{
-	write_seqcount_begin(&lstats->seq);
-}
-static void inline lstats_update_end(struct pcpu_lstats *lstats)
-{
-	write_seqcount_end(&lstats->seq);
-}
-static void inline lstats_fetch_and_add(u64 *packets, u64 *bytes, const struct pcpu_lstats *lstats)
-{
-	u64 tpackets, tbytes;
-	unsigned int seq;
-
-	do {
-		seq = read_seqcount_begin(&lstats->seq);
-		tpackets = lstats->packets;
-		tbytes = lstats->bytes;
-	} while (read_seqcount_retry(&lstats->seq, seq));
-
-	*packets += tpackets;
-	*bytes += tbytes;
-}
-#else
-static void inline lstats_update_begin(struct pcpu_lstats *lstats)
-{
-}
-static void inline lstats_update_end(struct pcpu_lstats *lstats)
-{
-}
-static void inline lstats_fetch_and_add(u64 *packets, u64 *bytes, const struct pcpu_lstats *lstats)
-{
-	*packets += lstats->packets;
-	*bytes += lstats->bytes;
-}
-#endif
-
 /*
  * The higher levels take care of making this non-reentrant (it's
  * called with bh's disabled).
@@ -126,10 +87,10 @@ static netdev_tx_t loopback_xmit(struct sk_buff *skb,
 
 	len = skb->len;
 	if (likely(netif_rx(skb) == NET_RX_SUCCESS)) {
-		lstats_update_begin(lb_stats);
+		u64_stats_update_begin(&lb_stats->syncp);
 		lb_stats->bytes += len;
 		lb_stats->packets++;
-		lstats_update_end(lb_stats);
+		u64_stats_update_end(&lb_stats->syncp);
 	} else
 		lb_stats->drops++;
 
@@ -148,10 +109,18 @@ static struct rtnl_link_stats64 *loopback_get_stats64(struct net_device *dev)
 	pcpu_lstats = (void __percpu __force *)dev->ml_priv;
 	for_each_possible_cpu(i) {
 		const struct pcpu_lstats *lb_stats;
+		u64 tbytes, tpackets;
+		unsigned int start;
 
 		lb_stats = per_cpu_ptr(pcpu_lstats, i);
-		lstats_fetch_and_add(&packets, &bytes, lb_stats);
+		do {
+			start = u64_stats_fetch_begin(&lb_stats->syncp);
+			tbytes = lb_stats->bytes;
+			tpackets = lb_stats->packets;
+		} while (u64_stats_fetch_retry(&lb_stats->syncp, start));
 		drops   += lb_stats->drops;
+		bytes   += tbytes;
+		packets += tpackets;
 	}
 	stats->rx_packets = packets;
 	stats->tx_packets = packets;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 4fbccc5..dd1d93d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -174,6 +174,56 @@ static inline bool dev_xmit_complete(int rc)
 #define NET_DEVICE_STATS_DEFINE(name)	unsigned long pad_ ## name, name
 #endif
 
+#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+struct u64_stats_sync {
+	seqcount_t	seq;
+};
+
+static void inline u64_stats_update_begin(struct u64_stats_sync *syncp)
+{
+	write_seqcount_begin(&syncp->seq);
+}
+
+static void inline u64_stats_update_end(struct u64_stats_sync *syncp)
+{
+	write_seqcount_end(&syncp->seq);
+}
+
+static unsigned int inline u64_stats_fetch_begin(const struct u64_stats_sync *syncp)
+{
+	return read_seqcount_begin(&syncp->seq);
+}
+
+static bool inline u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
+					 unsigned int start)
+{
+	return read_seqcount_retry(&syncp->seq, start);
+}
+
+#else
+struct u64_stats_sync {
+};
+
+static void inline u64_stats_update_begin(struct u64_stats_sync *syncp)
+{
+}
+
+static void inline u64_stats_update_end(struct u64_stats_sync *syncp)
+{
+}
+
+static unsigned int inline u64_stats_fetch_begin(const struct u64_stats_sync *syncp)
+{
+	return 0;
+}
+
+static bool inline u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
+					 unsigned int start)
+{
+	return false;
+}
+#endif
+
 struct net_device_stats {
 	NET_DEVICE_STATS_DEFINE(rx_packets);
 	NET_DEVICE_STATS_DEFINE(tx_packets);



^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox