Netdev List
 help / color / mirror / Atom feed
* Re: [RFC v2] tcp: Export TCP Delayed ACK parameters to user
From: Daniel Baluta @ 2011-10-28 21:35 UTC (permalink / raw)
  To: David Miller
  Cc: eric.dumazet, kuznet, jmorris, yoshfuji, kaber, netdev, luto,
	rick.jones2
In-Reply-To: <20111028.171904.1635229691857703124.davem@davemloft.net>

On Sat, Oct 29, 2011 at 12:19 AM, David Miller <davem@davemloft.net> wrote:
> From: Daniel Baluta <dbaluta@ixiacom.com>
> Date: Sat, 29 Oct 2011 00:14:03 +0300
>
>> +static inline int tcp_delack_thresh(const struct sock *sk)
>> +{
>> +     return inet_csk(sk)->icsk_ack.rcv_mss * sysctl_tcp_delack_segs;
>> +}
>> +
>
> Please turn this into a shift or something, you're adding a multiply
> into a core code path.

Is there any generic API to do this? Default case is not
affected since tcp_delack_segs is 1.

Daniel.

^ permalink raw reply

* Re: [RFC v2] tcp: Export TCP Delayed ACK parameters to user
From: David Miller @ 2011-10-28 21:19 UTC (permalink / raw)
  To: dbaluta
  Cc: eric.dumazet, kuznet, jmorris, yoshfuji, kaber, netdev, luto,
	rick.jones2
In-Reply-To: <1319836443-4419-1-git-send-email-dbaluta@ixiacom.com>

From: Daniel Baluta <dbaluta@ixiacom.com>
Date: Sat, 29 Oct 2011 00:14:03 +0300

> +static inline int tcp_delack_thresh(const struct sock *sk)
> +{
> +	return inet_csk(sk)->icsk_ack.rcv_mss * sysctl_tcp_delack_segs;
> +}
> +

Please turn this into a shift or something, you're adding a multiply
into a core code path.

^ permalink raw reply

* [RFC v2] tcp: Export TCP Delayed ACK parameters to user
From: Daniel Baluta @ 2011-10-28 21:14 UTC (permalink / raw)
  To: davem, eric.dumazet
  Cc: kuznet, jmorris, yoshfuji, kaber, netdev, luto, rick.jones2,
	Daniel Baluta
In-Reply-To: <1319756841-2051-1-git-send-email-dbaluta@ixiacom.com>

RFC2581 ($4.2) specifies when an ACK should be generated as follows:

" .. an ACK SHOULD be generated for at least every second
  full-sized segment, and MUST be generated within 500 ms
  of the arrival of the first unacknowledged packet.
"

We export the number of segments and the timeout limits
specified above, so that a user can tune them according
to its needs.

Specifically:
	* /proc/sys/net/ipv4/tcp_delack_segs, represents
	the threshold for the number of segments.
	* /proc/sys/net/ipv4/tcp_delack_min, specifies
	the minimum timeout value
	* /proc/sys/net/ipv4/tcp_delack_max, specifies
	the maximum timeout value.

Signed-off-by: Daniel Baluta <dbaluta@ixiacom.com>
---
Changes since v1:
	* added documentation for newly introduced /proc entries.
	* exported symbols sysctl_tcp_delack_{min|max}.
	* removed TCP_DELACK_{MIN|MAX} and used directly 
	sysctl_tcp_delack{min|max}.
	* renamed tcp_snd_thresh to tcp_delack_thresh.
	* added const qualifier to struct sock *sk.
---
 Documentation/networking/ip-sysctl.txt |   13 +++++++++++++
 include/net/tcp.h                      |   18 +++++++++++++++---
 net/dccp/output.c                      |    2 +-
 net/dccp/timer.c                       |    2 +-
 net/ipv4/sysctl_net_ipv4.c             |   21 +++++++++++++++++++++
 net/ipv4/tcp.c                         |    5 +++--
 net/ipv4/tcp_input.c                   |    8 +++++---
 net/ipv4/tcp_output.c                  |   13 +++++++++----
 net/ipv4/tcp_timer.c                   |    3 ++-
 9 files changed, 70 insertions(+), 15 deletions(-)

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index cb7f314..efbd1b4 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -524,6 +524,19 @@ tcp_thin_dupack - BOOLEAN
 	Documentation/networking/tcp-thin.txt
 	Default: 0
 
+tcp_delack_segs: - INTEGER
+	Sets the strict minimal number of full-sized TCP segments
+	received after which an ACK should be sent.
+	Default: 1 (as specified in RFC2582, S4.2)
+
+tcp_delack_min:	- INTEGER
+	Sets the minimum time (in miliseconds) to delay before sending an ACK.
+	Default: 40ms
+
+tcp_delack_max: - INTEGER
+	Sets the maximum time (in miliseconds) to delay before sending an ACK.
+	Default: 200ms
+
 UDP variables:
 
 udp_mem - vector of 3 INTEGERs: min, pressure, max
diff --git a/include/net/tcp.h b/include/net/tcp.h
index e147f42..9e29a9d 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -111,14 +111,18 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
 				  * TIME-WAIT timer.
 				  */
 
-#define TCP_DELACK_MAX	((unsigned)(HZ/5))	/* maximal time to delay before sending an ACK */
+/* default maximum time to delay before sending an ACK */
+#define TCP_DELACK_MAX_DEFAULT	((unsigned)(HZ/5))
+
 #if HZ >= 100
-#define TCP_DELACK_MIN	((unsigned)(HZ/25))	/* minimal time to delay before sending an ACK */
+/* default minimum time to delay before sending an ACK */
+#define TCP_DELACK_MIN_DEFAULT	((unsigned)(HZ/25))
 #define TCP_ATO_MIN	((unsigned)(HZ/25))
 #else
-#define TCP_DELACK_MIN	4U
+#define TCP_DELACK_MIN_DEFAULT	4U
 #define TCP_ATO_MIN	4U
 #endif
+
 #define TCP_RTO_MAX	((unsigned)(120*HZ))
 #define TCP_RTO_MIN	((unsigned)(HZ/5))
 #define TCP_TIMEOUT_INIT ((unsigned)(1*HZ))	/* RFC2988bis initial RTO value	*/
@@ -251,6 +255,9 @@ extern int sysctl_tcp_max_ssthresh;
 extern int sysctl_tcp_cookie_size;
 extern int sysctl_tcp_thin_linear_timeouts;
 extern int sysctl_tcp_thin_dupack;
+extern int sysctl_tcp_delack_segs;
+extern int sysctl_tcp_delack_min;
+extern int sysctl_tcp_delack_max;
 
 extern atomic_long_t tcp_memory_allocated;
 extern struct percpu_counter tcp_sockets_allocated;
@@ -1558,6 +1565,11 @@ static inline struct tcp_extend_values *tcp_xv(struct request_values *rvp)
 	return (struct tcp_extend_values *)rvp;
 }
 
+static inline int tcp_delack_thresh(const struct sock *sk)
+{
+	return inet_csk(sk)->icsk_ack.rcv_mss * sysctl_tcp_delack_segs;
+}
+
 extern void tcp_v4_init(void);
 extern void tcp_init(void);
 
diff --git a/net/dccp/output.c b/net/dccp/output.c
index dede3ed..9b5b0c4 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -577,7 +577,7 @@ void dccp_send_ack(struct sock *sk)
 			inet_csk_schedule_ack(sk);
 			inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN;
 			inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
-						  TCP_DELACK_MAX,
+						  sysctl_tcp_delack_max,
 						  DCCP_RTO_MAX);
 			return;
 		}
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
index 7587870..7bae11e 100644
--- a/net/dccp/timer.c
+++ b/net/dccp/timer.c
@@ -202,7 +202,7 @@ static void dccp_delack_timer(unsigned long data)
 		icsk->icsk_ack.blocked = 1;
 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED);
 		sk_reset_timer(sk, &icsk->icsk_delack_timer,
-			       jiffies + TCP_DELACK_MIN);
+			       jiffies + sysctl_tcp_delack_min);
 		goto out;
 	}
 
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 69fd720..c22c4c5 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -639,6 +639,27 @@ static struct ctl_table ipv4_table[] = {
 		.proc_handler   = proc_dointvec
 	},
 	{
+		.procname	= "tcp_delack_segs",
+		.data		= &sysctl_tcp_delack_segs,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
+	{
+		.procname	= "tcp_delack_min",
+		.data		= &sysctl_tcp_delack_min,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_ms_jiffies
+	},
+	{
+		.procname	= "tcp_delack_max",
+		.data		= &sysctl_tcp_delack_max,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_ms_jiffies
+	},
+	{
 		.procname	= "udp_mem",
 		.data		= &sysctl_udp_mem,
 		.maxlen		= sizeof(sysctl_udp_mem),
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 34f5db1..731e284 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1204,8 +1204,9 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied)
 		   /* Delayed ACKs frequently hit locked sockets during bulk
 		    * receive. */
 		if (icsk->icsk_ack.blocked ||
-		    /* Once-per-two-segments ACK was not sent by tcp_input.c */
-		    tp->rcv_nxt - tp->rcv_wup > icsk->icsk_ack.rcv_mss ||
+		    /* More than once-per-tcp_delack_segs-segments ACK
+		     * was not sent by tcp_input.c */
+		    tp->rcv_nxt - tp->rcv_wup > tcp_delack_thresh(sk) ||
 		    /*
 		     * If this read emptied read buffer, we send ACK, if
 		     * connection is not bidirectional, user drained
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 52b5c2d..f2893a9 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -98,6 +98,8 @@ int sysctl_tcp_thin_dupack __read_mostly;
 int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
 int sysctl_tcp_abc __read_mostly;
 
+int sysctl_tcp_delack_segs __read_mostly = 1;
+
 #define FLAG_DATA		0x01 /* Incoming frame contained data.		*/
 #define FLAG_WIN_UPDATE		0x02 /* Incoming ACK was a window update.	*/
 #define FLAG_DATA_ACKED		0x04 /* This ACK acknowledged new data.		*/
@@ -4993,8 +4995,8 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	    /* More than one full frame received... */
-	if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss &&
+	    /* More than tcp_delack_segs full frame(s) received... */
+	if (((tp->rcv_nxt - tp->rcv_wup) > tcp_delack_thresh(sk) &&
 	     /* ... and right edge of window advances far enough.
 	      * (tcp_recvmsg() will send ACK otherwise). Or...
 	      */
@@ -5689,7 +5691,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 			tcp_incr_quickack(sk);
 			tcp_enter_quickack_mode(sk);
 			inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
-						  TCP_DELACK_MAX, TCP_RTO_MAX);
+						  sysctl_tcp_delack_max, TCP_RTO_MAX);
 
 discard:
 			__kfree_skb(skb);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 980b98f..f4e7614 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -63,6 +63,11 @@ int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
 int sysctl_tcp_cookie_size __read_mostly = 0; /* TCP_COOKIE_MAX */
 EXPORT_SYMBOL_GPL(sysctl_tcp_cookie_size);
 
+int sysctl_tcp_delack_min __read_mostly = TCP_DELACK_MIN_DEFAULT;
+EXPORT_SYMBOL(sysctl_tcp_delack_min);
+
+int sysctl_tcp_delack_max __read_mostly = TCP_DELACK_MAX_DEFAULT;
+EXPORT_SYMBOL(sysctl_tcp_delack_max);
 
 /* Account for new data that has been sent to the network. */
 static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb)
@@ -2670,13 +2675,13 @@ void tcp_send_delayed_ack(struct sock *sk)
 	int ato = icsk->icsk_ack.ato;
 	unsigned long timeout;
 
-	if (ato > TCP_DELACK_MIN) {
+	if (ato > sysctl_tcp_delack_min) {
 		const struct tcp_sock *tp = tcp_sk(sk);
 		int max_ato = HZ / 2;
 
 		if (icsk->icsk_ack.pingpong ||
 		    (icsk->icsk_ack.pending & ICSK_ACK_PUSHED))
-			max_ato = TCP_DELACK_MAX;
+			max_ato = sysctl_tcp_delack_max;
 
 		/* Slow path, intersegment interval is "high". */
 
@@ -2685,7 +2690,7 @@ void tcp_send_delayed_ack(struct sock *sk)
 		 * directly.
 		 */
 		if (tp->srtt) {
-			int rtt = max(tp->srtt >> 3, TCP_DELACK_MIN);
+			int rtt = max_t(unsigned, tp->srtt >> 3, sysctl_tcp_delack_min);
 
 			if (rtt < max_ato)
 				max_ato = rtt;
@@ -2734,7 +2739,7 @@ void tcp_send_ack(struct sock *sk)
 		inet_csk_schedule_ack(sk);
 		inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN;
 		inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
-					  TCP_DELACK_MAX, TCP_RTO_MAX);
+					  sysctl_tcp_delack_max, TCP_RTO_MAX);
 		return;
 	}
 
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 2e0f0af..1bdc1c4 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -219,7 +219,8 @@ static void tcp_delack_timer(unsigned long data)
 		/* Try again later. */
 		icsk->icsk_ack.blocked = 1;
 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED);
-		sk_reset_timer(sk, &icsk->icsk_delack_timer, jiffies + TCP_DELACK_MIN);
+		sk_reset_timer(sk, &icsk->icsk_delack_timer,
+			       jiffies + sysctl_tcp_delack_min);
 		goto out_unlock;
 	}
 
-- 
1.7.2.5

^ permalink raw reply related

* Re: [PATCH] i825xx: Fix incorrect dependency for BVME6000_NET
From: David Miller @ 2011-10-28 21:07 UTC (permalink / raw)
  To: jeffrey.t.kirsher; +Cc: geert, netdev, linux-kernel
In-Reply-To: <1319835584.10258.159.camel@jtkirshe-mobl>

From: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Date: Fri, 28 Oct 2011 13:59:43 -0700

> On Fri, 2011-10-28 at 13:53 -0700, Geert Uytterhoeven wrote:
>> Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
>> ---
>>  drivers/net/ethernet/i825xx/Kconfig |    2 +-
>>  1 files changed, 1 insertions(+), 1 deletions(-)
>> 
>> diff --git a/drivers/net/ethernet/i825xx/Kconfig b/drivers/net/ethernet/i825xx/Kconfig
>> index 2be4698..ca1ae98 100644
>> --- a/drivers/net/ethernet/i825xx/Kconfig
>> +++ b/drivers/net/ethernet/i825xx/Kconfig
>> @@ -85,7 +85,7 @@ config APRICOT
>>  
>>  config BVME6000_NET
>>  	tristate "BVME6000 Ethernet support"
>> -	depends on BVME6000MVME16x
>> +	depends on BVME6000
>>  	---help---
>>  	  This is the driver for the Ethernet interface on BVME4000 and
>>  	  BVME6000 VME boards.  Say Y here to include the driver for this chip
> 
> Acked-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>

Applied, thanks everyone.

^ permalink raw reply

* Re: -next: NET_VENDOR_8390 dependencies
From: Jeff Kirsher @ 2011-10-28 21:00 UTC (permalink / raw)
  To: Geert Uytterhoeven; +Cc: netdev@vger.kernel.org, linux-kernel@vger.kernel.org
In-Reply-To: <CAMuHMdU3H63vx8436fdm_WBuPBwqNxHtPiAO9j6RhMn0VCAy2w@mail.gmail.com>

[-- Attachment #1: Type: text/plain, Size: 1361 bytes --]

On Fri, 2011-10-28 at 13:35 -0700, Geert Uytterhoeven wrote:
> On Sun, Oct 23, 2011 at 23:21, Geert Uytterhoeven
> <geert@linux-m68k.org> wrote:
> > drivers/net/ethernet/8390/Kconfig:
> >
> > config NET_VENDOR_8390
> >        depends on NET_VENDOR_NATSEMI && (AMIGA_PCMCIA || PCI ||
> SUPERH || \
> >                   ISA || MCA || EISA || MAC || M32R || MACH_TX49XX
> || \
> >                   MCA_LEGACY || H8300 || ARM || MIPS || ZORRO ||
> PCMCIA || \
> >                   EXPERIMENTAL)
> 
> > So NET_VENDOR_8390 depends on NET_VENDOR_NATSEMI.
> 
> > config NET_VENDOR_NATSEMI
> >        depends on MCA || MAC || MACH_JAZZ || PCI ||
> XTENSA_PLATFORM_XT2000
> 
> > But NET_VENDOR_NATSEMI will never be true for several of the other
> > dependencies of NET_VENDOR_8390 (e.g. AMIGA_PCMCIA, EISA, H8300,
> ARM,
> > ZORRO, PCMCIA)?
> 
> There's a similar issue with:
> 
> config NET_VENDOR_I825XX
>         depends on NET_VENDOR_INTEL && (ISA || ISA_DMA_API || ARM || \
>                    ARCH_ACORN || MCA || MCA_LEGACY || SNI_RM || SUN3
> || \
>                    GSC || BVME6000 || MVME16x || EXPERIMENTAL)
> 
> But:
> 
> config NET_VENDOR_INTEL
>         depends on PCI || PCI_MSI
> 
> Gr{oetje,eeting}s,
> 
>                         Geert 

Thanks Geert, I will put together a patch to resolve both issues.

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply

* Re: [PATCH] i825xx: Fix incorrect dependency for BVME6000_NET
From: Jeff Kirsher @ 2011-10-28 20:59 UTC (permalink / raw)
  To: Geert Uytterhoeven; +Cc: netdev@vger.kernel.org, linux-kernel@vger.kernel.org
In-Reply-To: <1319835212-19473-1-git-send-email-geert@linux-m68k.org>

[-- Attachment #1: Type: text/plain, Size: 830 bytes --]

On Fri, 2011-10-28 at 13:53 -0700, Geert Uytterhoeven wrote:
> Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
> ---
>  drivers/net/ethernet/i825xx/Kconfig |    2 +-
>  1 files changed, 1 insertions(+), 1 deletions(-)
> 
> diff --git a/drivers/net/ethernet/i825xx/Kconfig b/drivers/net/ethernet/i825xx/Kconfig
> index 2be4698..ca1ae98 100644
> --- a/drivers/net/ethernet/i825xx/Kconfig
> +++ b/drivers/net/ethernet/i825xx/Kconfig
> @@ -85,7 +85,7 @@ config APRICOT
>  
>  config BVME6000_NET
>  	tristate "BVME6000 Ethernet support"
> -	depends on BVME6000MVME16x
> +	depends on BVME6000
>  	---help---
>  	  This is the driver for the Ethernet interface on BVME4000 and
>  	  BVME6000 VME boards.  Say Y here to include the driver for this chip

Acked-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply

* [PATCH] i825xx: Fix incorrect dependency for BVME6000_NET
From: Geert Uytterhoeven @ 2011-10-28 20:53 UTC (permalink / raw)
  To: Jeff Kirsher; +Cc: netdev, linux-kernel, Geert Uytterhoeven

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 drivers/net/ethernet/i825xx/Kconfig |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/net/ethernet/i825xx/Kconfig b/drivers/net/ethernet/i825xx/Kconfig
index 2be4698..ca1ae98 100644
--- a/drivers/net/ethernet/i825xx/Kconfig
+++ b/drivers/net/ethernet/i825xx/Kconfig
@@ -85,7 +85,7 @@ config APRICOT
 
 config BVME6000_NET
 	tristate "BVME6000 Ethernet support"
-	depends on BVME6000MVME16x
+	depends on BVME6000
 	---help---
 	  This is the driver for the Ethernet interface on BVME4000 and
 	  BVME6000 VME boards.  Say Y here to include the driver for this chip
-- 
1.7.0.4

^ permalink raw reply related

* Re: [PATCH] ipv6: fix route error binding peer in func icmp6_dst_alloc
From: David Miller @ 2011-10-28 20:36 UTC (permalink / raw)
  To: omarapazanadi; +Cc: eric.dumazet, netdev, gaofeng
In-Reply-To: <1319806017-6131-1-git-send-email-omarapazanadi@gmail.com>

From: Gao feng <omarapazanadi@gmail.com>
Date: Fri, 28 Oct 2011 20:46:57 +0800

> in func icmp6_dst_alloc,dst_metric_set call ipv6_cow_metrics to set metric.
> ipv6_cow_metrics may will call rt6_bind_peer to set rt6_info->rt6i_peer.
> So,we should move ipv6_addr_copy before dst_metric_set to make sure rt6_bind_peer success.
> 
> Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com>

Applied, thanks.

^ permalink raw reply

* Re: -next: NET_VENDOR_8390 dependencies
From: Geert Uytterhoeven @ 2011-10-28 20:35 UTC (permalink / raw)
  To: Jeff Kirsher; +Cc: netdev, linux-kernel
In-Reply-To: <CAMuHMdW6hC1JchxVL9-4f58a8Vg7cF+97BbKePxjEDHY3H8wNw@mail.gmail.com>

On Sun, Oct 23, 2011 at 23:21, Geert Uytterhoeven <geert@linux-m68k.org> wrote:
> drivers/net/ethernet/8390/Kconfig:
>
> config NET_VENDOR_8390
>        depends on NET_VENDOR_NATSEMI && (AMIGA_PCMCIA || PCI || SUPERH || \
>                   ISA || MCA || EISA || MAC || M32R || MACH_TX49XX || \
>                   MCA_LEGACY || H8300 || ARM || MIPS || ZORRO || PCMCIA || \
>                   EXPERIMENTAL)

> So NET_VENDOR_8390 depends on NET_VENDOR_NATSEMI.

> config NET_VENDOR_NATSEMI
>        depends on MCA || MAC || MACH_JAZZ || PCI || XTENSA_PLATFORM_XT2000

> But NET_VENDOR_NATSEMI will never be true for several of the other
> dependencies of NET_VENDOR_8390 (e.g. AMIGA_PCMCIA, EISA, H8300, ARM,
> ZORRO, PCMCIA)?

There's a similar issue with:

config NET_VENDOR_I825XX
        depends on NET_VENDOR_INTEL && (ISA || ISA_DMA_API || ARM || \
                   ARCH_ACORN || MCA || MCA_LEGACY || SNI_RM || SUN3 || \
                   GSC || BVME6000 || MVME16x || EXPERIMENTAL)

But:

config NET_VENDOR_INTEL
        depends on PCI || PCI_MSI

Gr{oetje,eeting}s,

                        Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds

^ permalink raw reply

* Re: [PATCH 2/2 v3] net/smsc911x: Add regulator support
From: Sascha Hauer @ 2011-10-28 20:33 UTC (permalink / raw)
  To: Linus Walleij
  Cc: netdev, Steve Glendinning, Mathieu Poirer, Robert Marklund,
	Paul Mundt, linux-sh, Tony Lindgren, linux-omap, Mike Frysinger,
	uclinux-dist-devel, Linus Walleij
In-Reply-To: <1319719691-15799-1-git-send-email-linus.walleij@stericsson.com>

Hi Linus,

On Thu, Oct 27, 2011 at 02:48:11PM +0200, Linus Walleij wrote:
> From: Robert Marklund <robert.marklund@stericsson.com>
> 
> Add some basic regulator support for the power pins, as needed
> by the ST-Ericsson Snowball platform that powers up the SMSC911
> chip using an external regulator.
> 
> Platforms that use regulators and the smsc911x and have no defined
> regulator for the smsc911x and claim complete regulator
> constraints with no dummy regulators will need to provide it, for
> example using a fixed voltage regulator. It appears that this may
> affect (apart from Ux500 Snowball) possibly these archs/machines
> that from some grep:s appear to define both CONFIG_SMSC911X and
> CONFIG_REGULATOR:
> 
> - ARM Freescale mx3 and OMAP 2 plus, Raumfeld machines
> - Blackfin
> - Super-H
> 

...

>  
> +
> +/*
> + * Request or free resources, currently just regulators.
> + *
> + * The SMSC911x has two power pins: vddvario and vdd33a, in designs where
> + * these are not always-on we need to request regulators to be turned on
> + * before we can try to access the device registers.
> + */
> +static int smsc911x_request_free_resources(struct platform_device *pdev,
> +		bool request)

I had to look twice at this function name. First I thought "request the
free resources?", which other resources would you request if not the
free ones? I think it would be nicer to have two functions instead.
Just my 2 cents.

Sascha


-- 
Pengutronix e.K.                           |                             |
Industrial Linux Solutions                 | http://www.pengutronix.de/  |
Peiner Str. 6-8, 31137 Hildesheim, Germany | Phone: +49-5121-206917-0    |
Amtsgericht Hildesheim, HRA 2686           | Fax:   +49-5121-206917-5555 |

^ permalink raw reply

* [net-next-2.6 PATCH 6/6 RFC v3] macvlan: Add support to get MAC/VLAN filter netdev ops
From: Roopa Prabhu @ 2011-10-29  2:34 UTC (permalink / raw)
  To: netdev
  Cc: sri, dragos.tatulea, kvm, arnd, mst, davem, gregory.v.rose, mchan,
	dwang2, shemminger, eric.dumazet, kaber, benve
In-Reply-To: <20111029023159.5198.60245.stgit@rhel6.1>

From: Roopa Prabhu <roprabhu@cisco.com>

This patch adds support to get MAC and VLAN filter netdev ops
on a macvlan interface. It adds support for get_rx_filter_addr_size,
get_rx_filter_vlan_size, fill_rx_filter_addr and fill_rx_filter_vlan
netdev ops

Signed-off-by: Roopa Prabhu <roprabhu@cisco.com>
Signed-off-by: Christian Benvenuti <benve@cisco.com>
Signed-off-by: David Wang <dwang2@cisco.com>
---
 drivers/net/macvlan.c |  158 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 158 insertions(+), 0 deletions(-)


diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 9d8cbe3..15dd7de 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -616,6 +616,55 @@ static int macvlan_set_rx_filter_vlan(struct net_device *dev, int vf,
 	return 0;
 }
 
+static size_t macvlan_get_rx_filter_vlan_size(const struct net_device *dev,
+					      int vf)
+{
+	struct macvlan_dev *vlan = netdev_priv(dev);
+	struct net_device *lowerdev = vlan->lowerdev;
+	const struct net_device_ops *ops = lowerdev->netdev_ops;
+
+	if (vf != SELF_VF)
+		return -EINVAL;
+
+	switch (vlan->mode) {
+	case MACVLAN_MODE_PASSTHRU:
+		if (ops->ndo_get_rx_filter_vlan_size)
+			return ops->ndo_get_rx_filter_vlan_size(dev, vf);
+		/* IFLA_RX_FILTER_VLAN_BITMAP */
+		return nla_total_size(VLAN_BITMAP_SIZE);
+	default:
+		return 0;
+	}
+}
+
+static int macvlan_get_rx_filter_vlan(const struct net_device *dev, int vf,
+				      struct sk_buff *skb)
+{
+	struct macvlan_dev *vlan = netdev_priv(dev);
+	struct net_device *lowerdev = vlan->lowerdev;
+	const struct net_device_ops *ops = lowerdev->netdev_ops;
+
+	if (vf != SELF_VF)
+		return -EINVAL;
+
+	switch (vlan->mode) {
+	case MACVLAN_MODE_PASSTHRU:
+		if (ops->ndo_get_rx_filter_vlan)
+			return ops->ndo_get_rx_filter_vlan(dev, vf, skb);
+
+		NLA_PUT(skb, IFLA_RX_FILTER_VLAN_BITMAP, VLAN_BITMAP_SIZE,
+			vlan->vlan_filter);
+		break;
+	default:
+		return -ENODATA; /* No data to Fill */
+	}
+
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
 static int macvlan_addr_in_hw_list(struct netdev_hw_addr_list *list,
 				   u8 *addr, int addrlen)
 {
@@ -795,6 +844,111 @@ static int macvlan_set_rx_filter_addr(struct net_device *dev, int vf,
 	return 0;
 }
 
+static size_t macvlan_get_rx_filter_addr_passthru_size(
+			const struct net_device *dev, int vf)
+{
+	struct macvlan_dev *vlan = netdev_priv(dev);
+	struct net_device *lowerdev = vlan->lowerdev;
+	const struct net_device_ops *ops = lowerdev->netdev_ops;
+	size_t size;
+
+	if (ops->ndo_get_rx_filter_addr_size)
+		return ops->ndo_get_rx_filter_addr_size(dev, vf);
+
+	/* IFLA_RX_FILTER_ADDR_FLAGS */
+	size = nla_total_size(sizeof(u32));
+
+	if (netdev_uc_count(dev))
+		/* IFLA_RX_FILTER_ADDR_UC_LIST */
+		size += nla_total_size(netdev_uc_count(dev) *
+				       ETH_ALEN * sizeof(struct nlattr));
+
+	if (netdev_mc_count(dev))
+		/* IFLA_RX_FILTER_ADDR_MC_LIST */
+		size += nla_total_size(netdev_mc_count(dev) *
+				       ETH_ALEN * sizeof(struct nlattr));
+
+	return size;
+}
+
+static size_t macvlan_get_rx_filter_addr_size(const struct net_device *dev,
+					      int vf)
+{
+	struct macvlan_dev *vlan = netdev_priv(dev);
+
+	if (vf != SELF_VF)
+		return -EINVAL;
+
+	switch (vlan->mode) {
+	case MACVLAN_MODE_PASSTHRU:
+		return macvlan_get_rx_filter_addr_passthru_size(dev, vf);
+	default:
+		return 0;
+	}
+}
+
+static int macvlan_get_rx_filter_addr_passthru(const struct net_device *dev,
+					       int vf, struct sk_buff *skb)
+{
+	struct macvlan_dev *vlan = netdev_priv(dev);
+	struct net_device *lowerdev = vlan->lowerdev;
+	const struct net_device_ops *ops = lowerdev->netdev_ops;
+	struct nlattr *uninitialized_var(uc_list), *mc_list;
+	struct netdev_hw_addr *ha;
+
+	if (ops->ndo_get_rx_filter_addr)
+		return ops->ndo_get_rx_filter_addr(dev, vf, skb);
+
+	NLA_PUT_U32(skb, IFLA_RX_FILTER_ADDR_FLAGS,
+		dev->flags & RX_FILTER_FLAGS);
+
+	if (netdev_uc_count(dev)) {
+		uc_list = nla_nest_start(skb, IFLA_RX_FILTER_ADDR_UC_LIST);
+		if (uc_list == NULL)
+			goto nla_put_failure;
+
+		netdev_for_each_uc_addr(ha, dev) {
+			NLA_PUT(skb, IFLA_ADDR_LIST_ENTRY, ETH_ALEN, ha->addr);
+		}
+		nla_nest_end(skb, uc_list);
+	}
+
+	if (netdev_mc_count(dev)) {
+		mc_list = nla_nest_start(skb, IFLA_RX_FILTER_ADDR_MC_LIST);
+		if (mc_list == NULL)
+			goto nla_uc_list_cancel;
+
+		netdev_for_each_mc_addr(ha, dev) {
+			NLA_PUT(skb, IFLA_ADDR_LIST_ENTRY, ETH_ALEN, ha->addr);
+		}
+		nla_nest_end(skb, mc_list);
+	}
+
+	return 0;
+
+nla_uc_list_cancel:
+	if (netdev_uc_count(dev))
+		nla_nest_cancel(skb, uc_list);
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+static int macvlan_get_rx_filter_addr(const struct net_device *dev, int vf,
+				      struct sk_buff *skb)
+{
+	struct macvlan_dev *vlan = netdev_priv(dev);
+
+	if (vf != SELF_VF)
+		return -EINVAL;
+
+	switch (vlan->mode) {
+	case MACVLAN_MODE_PASSTHRU:
+		return macvlan_get_rx_filter_addr_passthru(dev, vf, skb);
+	default:
+		return -ENODATA; /* No data to Fill */
+	}
+}
+
 static void macvlan_ethtool_get_drvinfo(struct net_device *dev,
 					struct ethtool_drvinfo *drvinfo)
 {
@@ -831,7 +985,11 @@ static const struct net_device_ops macvlan_netdev_ops = {
 	.ndo_vlan_rx_add_vid		= macvlan_vlan_rx_add_vid,
 	.ndo_vlan_rx_kill_vid		= macvlan_vlan_rx_kill_vid,
 	.ndo_set_rx_filter_addr		= macvlan_set_rx_filter_addr,
+	.ndo_get_rx_filter_addr_size	= macvlan_get_rx_filter_addr_size,
+	.ndo_get_rx_filter_addr		= macvlan_get_rx_filter_addr,
 	.ndo_set_rx_filter_vlan		= macvlan_set_rx_filter_vlan,
+	.ndo_get_rx_filter_vlan_size	= macvlan_get_rx_filter_vlan_size,
+	.ndo_get_rx_filter_vlan		= macvlan_get_rx_filter_vlan,
 };
 
 void macvlan_common_setup(struct net_device *dev)

^ permalink raw reply related

* [net-next-2.6 PATCH 4/6 RFC v3] rtnetlink: Add support to get MAC/VLAN filters
From: Roopa Prabhu @ 2011-10-29  2:34 UTC (permalink / raw)
  To: netdev
  Cc: sri, dragos.tatulea, kvm, arnd, mst, davem, gregory.v.rose, mchan,
	dwang2, shemminger, eric.dumazet, kaber, benve
In-Reply-To: <20111029023159.5198.60245.stgit@rhel6.1>

From: Roopa Prabhu <roprabhu@cisco.com>

This patch adds support in rtnetlink for IFLA_RX_VF_FILTERS and
IFLA_RX_FILTER get. It gets the size of the filters using
netdev_ops->get_rx_filter_addr_size and netdev_ops->get_rx_filter_vlan_size
and uses netdev_ops->get_rx_filter_addr and netdev_ops->get_rx_filter_vlan.
In case of IFLA_RX_VF_FILTERS it loops through all vf's to get the filter
data

Signed-off-by: Roopa Prabhu <roprabhu@cisco.com>
Signed-off-by: Christian Benvenuti <benve@cisco.com>
Signed-off-by: David Wang <dwang2@cisco.com>
---
 net/core/rtnetlink.c |  159 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 158 insertions(+), 1 deletions(-)


diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index a042910..ea861b4 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -475,6 +475,62 @@ static size_t rtnl_link_get_af_size(const struct net_device *dev)
 	return size;
 }
 
+static size_t rtnl_vf_rx_filter_size(const struct net_device *dev, int vf)
+{
+	const struct net_device_ops *ops = dev->netdev_ops;
+	size_t size;
+
+	/* IFLA_RX_FILTER  or IFLA_VF_RX_FILTER */
+	size = nla_total_size(sizeof(struct nlattr));
+
+	if (vf != SELF_VF)
+		size = nla_total_size(4); /* IFLA_RX_FILTER_VF */
+
+	if (ops->ndo_get_rx_filter_addr_size) {
+		size_t rx_filter_addr_size =
+				ops->ndo_get_rx_filter_addr_size(dev, vf);
+
+		if (rx_filter_addr_size)
+			/* IFLA_RX_FILTER_ADDR */
+			size += nla_total_size(sizeof(struct nlattr)) +
+					rx_filter_addr_size;
+	}
+
+	if (ops->ndo_get_rx_filter_vlan_size) {
+		size_t rx_filter_vlan_size =
+				ops->ndo_get_rx_filter_vlan_size(dev, vf);
+
+		if (rx_filter_vlan_size)
+			/* IFLA_RX_FILTER_VLAN */
+			size += nla_total_size(sizeof(struct nlattr)) +
+					rx_filter_vlan_size;
+	}
+
+	return size;
+}
+
+static size_t rtnl_rx_filter_size(const struct net_device *dev)
+{
+	const struct net_device_ops *ops = dev->netdev_ops;
+	int vf = SELF_VF;
+	size_t size;
+
+	if (!ops->ndo_get_rx_filter_addr_size &&
+	    !ops->ndo_get_rx_filter_vlan_size)
+		return 0;
+
+	size = rtnl_vf_rx_filter_size(dev, vf); /* SELF_VF */
+
+	if (dev->dev.parent && dev_num_vf(dev->dev.parent)) {
+		/* IFLA_VF_RX_FILTERS */
+		size = nla_total_size(sizeof(struct nlattr));
+		for (vf = 0; vf < dev_num_vf(dev->dev.parent); vf++)
+			size += rtnl_vf_rx_filter_size(dev, vf);
+	}
+
+	return size;
+}
+
 static int rtnl_link_fill(struct sk_buff *skb, const struct net_device *dev)
 {
 	const struct rtnl_link_ops *ops = dev->rtnl_link_ops;
@@ -513,6 +569,102 @@ out:
 	return err;
 }
 
+static int rtnl_vf_rx_filter_fill(struct sk_buff *skb,
+				  const struct net_device *dev, int vf)
+{
+	const struct net_device_ops *ops = dev->netdev_ops;
+	struct nlattr *addr_filter = NULL, *vlan_filter = NULL;
+	struct nlattr *rx_filter;
+	int err = -EMSGSIZE;
+	int filter_attrtype =
+		(vf == SELF_VF ? IFLA_RX_FILTER : IFLA_VF_RX_FILTER);
+
+	rx_filter = nla_nest_start(skb, filter_attrtype);
+	if (rx_filter == NULL)
+		goto nla_put_failure;
+
+	if (vf != SELF_VF)
+		NLA_PUT_U32(skb, IFLA_RX_FILTER_VF, vf);
+
+	if (ops->ndo_get_rx_filter_addr) {
+		addr_filter = nla_nest_start(skb, IFLA_RX_FILTER_ADDR);
+		if (addr_filter == NULL)
+			goto err_cancel_rx_filter;
+		err = ops->ndo_get_rx_filter_addr(dev, vf, skb);
+		if (err == -ENODATA)
+			nla_nest_cancel(skb, addr_filter);
+		else if (err < 0)
+			goto err_cancel_addr_filter;
+		else
+			nla_nest_end(skb, addr_filter);
+	}
+
+	if (ops->ndo_get_rx_filter_vlan) {
+		vlan_filter = nla_nest_start(skb, IFLA_RX_FILTER_VLAN);
+		if (vlan_filter == NULL)
+			goto err_cancel_addr_filter;
+		err = ops->ndo_get_rx_filter_vlan(dev, vf, skb);
+		if (err == -ENODATA)
+			nla_nest_cancel(skb, vlan_filter);
+		else if (err)
+			goto err_cancel_vlan_filter;
+		else
+			nla_nest_end(skb, vlan_filter);
+	}
+	nla_nest_end(skb, rx_filter);
+
+	return 0;
+
+err_cancel_vlan_filter:
+	if (vlan_filter)
+		nla_nest_cancel(skb, vlan_filter);
+err_cancel_addr_filter:
+	if (addr_filter)
+		nla_nest_cancel(skb, addr_filter);
+err_cancel_rx_filter:
+	nla_nest_cancel(skb, rx_filter);
+nla_put_failure:
+	return err;
+}
+
+static int rtnl_rx_filter_fill(struct sk_buff *skb,
+			       const struct net_device *dev)
+{
+	struct nlattr *vf_rx_filters = NULL;
+	int vf = SELF_VF;
+	int err;
+
+	if (!dev->netdev_ops->ndo_get_rx_filter_addr &&
+	    !dev->netdev_ops->ndo_get_rx_filter_vlan)
+		return 0;
+
+	err = rtnl_vf_rx_filter_fill(skb, dev, vf); /* SELF_VF */
+	if (err)
+		return err;
+
+	if (dev->dev.parent && dev_num_vf(dev->dev.parent)) {
+		vf_rx_filters = nla_nest_start(skb, IFLA_VF_RX_FILTERS);
+		if (!vf_rx_filters)
+			return -EMSGSIZE;
+
+		for (vf = 0; vf < dev_num_vf(dev->dev.parent); vf++) {
+			err = rtnl_vf_rx_filter_fill(skb, dev, vf);
+			if (err == -EMSGSIZE)
+				goto err_cancel_nest_vf_rx_filters;
+		}
+
+		nla_nest_end(skb, vf_rx_filters);
+	}
+
+	return 0;
+
+err_cancel_nest_vf_rx_filters:
+	if (vf_rx_filters)
+		nla_nest_cancel(skb, vf_rx_filters);
+
+	return err;
+}
+
 static const int rtm_min[RTM_NR_FAMILIES] =
 {
 	[RTM_FAM(RTM_NEWLINK)]      = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
@@ -786,7 +938,9 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev)
 	       + rtnl_vfinfo_size(dev) /* IFLA_VFINFO_LIST */
 	       + rtnl_port_size(dev) /* IFLA_VF_PORTS + IFLA_PORT_SELF */
 	       + rtnl_link_get_size(dev) /* IFLA_LINKINFO */
-	       + rtnl_link_get_af_size(dev); /* IFLA_AF_SPEC */
+	       + rtnl_link_get_af_size(dev) /* IFLA_AF_SPEC */
+		/* IFLA_VF_RX_FILTERS + IFLA_RX_FILTER */
+	       + rtnl_rx_filter_size(dev);
 }
 
 static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev)
@@ -996,6 +1150,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 	if (rtnl_port_fill(skb, dev))
 		goto nla_put_failure;
 
+	if (rtnl_rx_filter_fill(skb, dev) < 0)
+		goto nla_put_failure;
+
 	if (dev->rtnl_link_ops) {
 		if (rtnl_link_fill(skb, dev) < 0)
 			goto nla_put_failure;

^ permalink raw reply related

* [net-next-2.6 PATCH 1/6 RFC v3] rtnetlink: Netlink interface for setting MAC and VLAN filters
From: Roopa Prabhu @ 2011-10-29  2:34 UTC (permalink / raw)
  To: netdev
  Cc: sri, dragos.tatulea, kvm, arnd, mst, davem, gregory.v.rose, mchan,
	dwang2, shemminger, eric.dumazet, kaber, benve
In-Reply-To: <20111029023159.5198.60245.stgit@rhel6.1>

From: Roopa Prabhu <roprabhu@cisco.com>

This patch introduces the following netlink interface to set
MAC and VLAN filters on an network interface. It can be used to
set RX filter on any network interface (if supported by the driver) and
also on a SRIOV VF via its PF

Interface to set RX filter on a SRIOV VF
[IFLA_VF_RX_FILTERS] = {
	[IFLA_VF_RX_FILTER] = {
		[IFLA_RX_FILTER_VF]
		[IFLA_RX_FILTER_ADDR] = {
			[IFLA_RX_FILTER_ADDR_FLAGS]
			[IFLA_RX_FILTER_ADDR_UC_LIST] = {
				[IFLA_ADDR_LIST_ENTRY]
			}
			[IFLA_RX_FILTER_ADDR_MC_LIST] = {
				[IFLA_ADDR_LIST_ENTRY]
			}
		}
		[IFLA_RX_FILTER_VLAN] = {
			[IFLA_RX_FILTER_VLAN_BITMAP]
		}
	}
	...
}

Interface to set RX filter on any network interface.:
[IFLA_RX_FILTER] = {
	[IFLA_RX_FILTER_VF]
	[IFLA_RX_FILTER_ADDR] = {
		[IFLA_RX_FILTER_ADDR_FLAGS]
		[IFLA_RX_FILTER_ADDR_UC_LIST] = {
			[IFLA_ADDR_LIST_ENTRY]
		}
		[IFLA_RX_FILTER_ADDR_MC_LIST] = {
			[IFLA_ADDR_LIST_ENTRY]
		}
	}
	[IFLA_RX_FILTER_VLAN] = {
		[IFLA_RX_FILTER_VLAN_BITMAP]
	}
}

Signed-off-by: Roopa Prabhu <roprabhu@cisco.com>
Signed-off-by: Christian Benvenuti <benve@cisco.com>
Signed-off-by: David Wang <dwang2@cisco.com>
---
 include/linux/if_link.h |   61 +++++++++++++++++++++++++++++++++++++++++++++++
 net/core/rtnetlink.c    |   20 +++++++++++++++
 2 files changed, 81 insertions(+), 0 deletions(-)


diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index c52d4b5..74a9f17 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -137,6 +137,8 @@ enum {
 	IFLA_AF_SPEC,
 	IFLA_GROUP,		/* Group the device belongs to */
 	IFLA_NET_NS_FD,
+	IFLA_VF_RX_FILTERS,
+	IFLA_RX_FILTER,
 	__IFLA_MAX
 };
 
@@ -390,4 +392,63 @@ struct ifla_port_vsi {
 	__u8 pad[3];
 };
 
+/* VF rx filters management section
+ *
+ *	Nested layout of set/get msg is:
+ *
+ *	[IFLA_VF_RX_FILTERS]
+ *		[IFLA_VF_RX_FILTER]
+ *			[IFLA_RX_FILTER_*], ...
+ *		[IFLA_VF_RX_FILTER]
+ *			[IFLA_RX_FILTER_*], ...
+ *		...
+ *	[IFLA_RX_FILTER]
+ *		[IFLA_RX_FILTER_*], ...
+ */
+enum {
+	IFLA_VF_RX_FILTER_UNSPEC,
+	IFLA_VF_RX_FILTER,			/* nest */
+	__IFLA_VF_RX_FILTER_MAX,
+};
+
+#define IFLA_VF_RX_FILTER_MAX (__IFLA_VF_RX_FILTER_MAX - 1)
+
+enum {
+	IFLA_RX_FILTER_UNSPEC,
+	IFLA_RX_FILTER_VF,		/* __u32 */
+	IFLA_RX_FILTER_ADDR,
+	IFLA_RX_FILTER_VLAN,
+	__IFLA_RX_FILTER_MAX,
+};
+#define IFLA_RX_FILTER_MAX (__IFLA_RX_FILTER_MAX - 1)
+
+enum {
+	IFLA_RX_FILTER_ADDR_UNSPEC,
+	IFLA_RX_FILTER_ADDR_FLAGS,
+	IFLA_RX_FILTER_ADDR_UC_LIST,
+	IFLA_RX_FILTER_ADDR_MC_LIST,
+	__IFLA_RX_FILTER_ADDR_MAX,
+};
+#define IFLA_RX_FILTER_ADDR_MAX (__IFLA_RX_FILTER_ADDR_MAX - 1)
+
+#define RX_FILTER_FLAGS (IFF_UP | IFF_BROADCAST | IFF_MULTICAST | \
+				IFF_PROMISC | IFF_ALLMULTI)
+
+enum {
+	IFLA_ADDR_LIST_UNSPEC,
+	IFLA_ADDR_LIST_ENTRY,
+	__IFLA_ADDR_LIST_MAX,
+};
+#define IFLA_ADDR_LIST_MAX (__IFLA_ADDR_LIST_MAX - 1)
+
+enum {
+	IFLA_RX_FILTER_VLAN_UNSPEC,
+	IFLA_RX_FILTER_VLAN_BITMAP,
+	__IFLA_RX_FILTER_VLAN_MAX,
+};
+#define IFLA_RX_FILTER_VLAN_MAX (__IFLA_RX_FILTER_VLAN_MAX - 1)
+
+#define VLAN_BITMAP_SPLIT_MAX 8
+#define VLAN_BITMAP_SIZE	(VLAN_N_VID/VLAN_BITMAP_SPLIT_MAX)
+
 #endif /* _LINUX_IF_LINK_H */
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 9083e82..9eead8e 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -42,6 +42,7 @@
 
 #include <linux/inet.h>
 #include <linux/netdevice.h>
+#include <linux/if_vlan.h>
 #include <net/ip.h>
 #include <net/protocol.h>
 #include <net/arp.h>
@@ -1097,6 +1098,8 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = {
 	[IFLA_VF_PORTS]		= { .type = NLA_NESTED },
 	[IFLA_PORT_SELF]	= { .type = NLA_NESTED },
 	[IFLA_AF_SPEC]		= { .type = NLA_NESTED },
+	[IFLA_VF_RX_FILTERS]	= { .type = NLA_NESTED },
+	[IFLA_RX_FILTER]	= { .type = NLA_NESTED },
 };
 EXPORT_SYMBOL(ifla_policy);
 
@@ -1132,6 +1135,23 @@ static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = {
 	[IFLA_PORT_RESPONSE]	= { .type = NLA_U16, },
 };
 
+static const struct nla_policy ifla_rx_filter_policy[IFLA_RX_FILTER_MAX+1] = {
+	[IFLA_RX_FILTER_VF]	= { .type = NLA_U32 },
+	[IFLA_RX_FILTER_ADDR]	= { .type = NLA_NESTED },
+	[IFLA_RX_FILTER_VLAN]	= { .type = NLA_NESTED },
+};
+
+static const struct nla_policy ifla_addr_filter_policy[IFLA_RX_FILTER_ADDR_MAX+1] = {
+	[IFLA_RX_FILTER_ADDR_FLAGS]	= { .type = NLA_U32 },
+	[IFLA_RX_FILTER_ADDR_UC_LIST]	= { .type = NLA_NESTED },
+	[IFLA_RX_FILTER_ADDR_MC_LIST]	= { .type = NLA_NESTED },
+};
+
+static const struct nla_policy ifla_vlan_filter_policy[IFLA_RX_FILTER_VLAN_MAX+1] = {
+	[IFLA_RX_FILTER_VLAN_BITMAP]	= { .type = NLA_BINARY,
+					    .len = VLAN_BITMAP_SIZE },
+};
+
 struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[])
 {
 	struct net *net;

^ permalink raw reply related

* Re: [PATCH V2 02/10] cxgb4: Common platform specific changes for DB Drop Recovery
From: Roland Dreier @ 2011-10-28 18:28 UTC (permalink / raw)
  To: Kumar Sanghvi
  Cc: Felix Marti, Vipul Pandya, linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	netdev-u79uwXL29TY76Z2rM5mHXA, davem-fT/PcQaiUtIeIZ0/mPfg9Q,
	Divy Le Ray, Dimitrios Michailidis, Steve Wise
In-Reply-To: <20111028182234.GA11554-ZuiPNEE88OINxtijsoNbcrBI9BrxbZE7QQ4Iyu8u01E@public.gmane.org>

> Vipul did a respin of this patch, and posted the V3 version.
> Its available here:
> http://www.mail-archive.com/linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org/msg09659.html

Got it, thanks.

 - R.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [net-next-2.6 PATCH 8/8 RFC v2] macvtap: Add support to get MAC/VLAN filter rtnl link operations
From: Roopa Prabhu @ 2011-10-28 18:24 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: netdev, sri, dragos.tatulea, arnd, kvm, davem, mchan, dwang2,
	shemminger, eric.dumazet, kaber, benve
In-Reply-To: <20111024055633.GC24528@redhat.com>




On 10/23/11 10:56 PM, "Michael S. Tsirkin" <mst@redhat.com> wrote:

> On Tue, Oct 18, 2011 at 11:26:36PM -0700, Roopa Prabhu wrote:
>> From: Roopa Prabhu <roprabhu@cisco.com>
>> 
>> This patch adds support to get MAC and VLAN filter rtnl_link_ops
>> on a macvtap interface. It adds support for get_rx_addr_filter_size,
>> get_rx_vlan_filter_size, fill_rx_addr_filter and fill_rx_vlan_filter
>> rtnl link operations. Calls equivalent macvlan operations.
>> 
>> Signed-off-by: Roopa Prabhu <roprabhu@cisco.com>
>> Signed-off-by: Christian Benvenuti <benve@cisco.com>
>> Signed-off-by: David Wang <dwang2@cisco.com>
>> ---
>>  drivers/net/macvtap.c |   27 +++++++++++++++++++++++++++
>>  1 files changed, 27 insertions(+), 0 deletions(-)
>> 
>> 
>> diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
>> index 8a2cb59..9b40de7 100644
>> --- a/drivers/net/macvtap.c
>> +++ b/drivers/net/macvtap.c
>> @@ -285,6 +285,29 @@ static int macvtap_set_rx_vlan_filter(struct net_device
>> *dev,
>> return macvlan_set_rx_vlan_filter(dev, tb);
>>  }
>>  
>> +static int macvtap_fill_rx_addr_filter(struct sk_buff *skb,
>> + const struct net_device *dev)
>> +{
>> + return macvlan_fill_rx_addr_filter(skb, dev);
>> +}
>> +
>> +static int macvtap_fill_rx_vlan_filter(struct sk_buff *skb,
>> + const struct net_device *dev)
>> +{
>> + return macvlan_fill_rx_vlan_filter(skb, dev);
>> +}
>> +
>> +static size_t macvtap_get_rx_addr_filter_size(const struct net_device *dev)
>> +{
>> + return macvlan_get_rx_addr_filter_size(dev);
>> +}
>> +
>> +static size_t macvtap_get_rx_vlan_filter_size(const struct net_device *dev)
>> +{
>> + return macvlan_get_rx_vlan_filter_size(dev);
>> +}
> 
> So why do we need the above wrappers? Can't use macvlanXXX directly?
> 

I had followed the existing macvtap rtnl_link_ops convention here.
It seems cleaner this way. You can define the macvtap ops static and
Call equivalent macvlan functions from it if required. It  also gives you
flexibility in adding any macvtap specific stuff before or after you call
the macvlan equivalent function (like some of the macvtap rtnl link ops
already do today)

In any case this part and the below empty line error goes away in the new
version.

Thanks,
Roopa


>> +
>> +
> 
> don't add double emoty lines pls.
> 
>>  static int macvtap_newlink(struct net *src_net,
>>   struct net_device *dev,
>>   struct nlattr *tb[],
>> @@ -335,6 +358,10 @@ static struct rtnl_link_ops macvtap_link_ops
>> __read_mostly = {
>> .dellink   = macvtap_dellink,
>> .set_rx_addr_filter  = macvtap_set_rx_addr_filter,
>> .set_rx_vlan_filter  = macvtap_set_rx_vlan_filter,
>> + .get_rx_addr_filter_size = macvtap_get_rx_addr_filter_size,
>> + .get_rx_vlan_filter_size = macvtap_get_rx_vlan_filter_size,
>> + .fill_rx_addr_filter  = macvtap_fill_rx_addr_filter,
>> + .fill_rx_vlan_filter  = macvtap_fill_rx_vlan_filter,
>>  };
>>  
>>  

^ permalink raw reply

* Re: [PATCH V2 02/10] cxgb4: Common platform specific changes for DB Drop Recovery
From: Kumar Sanghvi @ 2011-10-28 18:22 UTC (permalink / raw)
  To: Roland Dreier
  Cc: Felix Marti, Vipul Pandya, linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	netdev-u79uwXL29TY76Z2rM5mHXA, davem-fT/PcQaiUtIeIZ0/mPfg9Q,
	Divy Le Ray, Dimitrios Michailidis, Steve Wise
In-Reply-To: <CAL1RGDXoeJNYz3R58bU-ixRyY1uKNgGPdUrWZ4nbcuyRDjL6Cg-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>

Hi Roland,

On Fri, Oct 28, 2011 at 10:59:47 -0700, Roland Dreier wrote:
> On Thu, Oct 20, 2011 at 10:18 AM, Felix Marti <felix-ut6Up61K2wZBDgjK7y7TUQ@public.gmane.org> wrote:
> > Don't add stuff to the t4fw_interface.h, that is owned by firmware.
> 
> Vipul, do you plan to respin to handle Felix's comment?
> 
>  - R.
Vipul is on leave, so replying on his behalf.

Vipul did a respin of this patch, and posted the V3 version.
Its available here:
http://www.mail-archive.com/linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org/msg09659.html

Thanks,
Kumar.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH V2 02/10] cxgb4: Common platform specific changes for DB Drop Recovery
From: Roland Dreier @ 2011-10-28 17:59 UTC (permalink / raw)
  To: Felix Marti
  Cc: Vipul Pandya, linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	netdev-u79uwXL29TY76Z2rM5mHXA, davem-fT/PcQaiUtIeIZ0/mPfg9Q,
	Divy Le Ray, Dimitrios Michailidis, Kumar A S, Steve Wise
In-Reply-To: <8A71B368A89016469F72CD08050AD3340A3D3120-utt48SW1nMZEErodcbzraFjMPmZJtkid@public.gmane.org>

On Thu, Oct 20, 2011 at 10:18 AM, Felix Marti <felix-ut6Up61K2wZBDgjK7y7TUQ@public.gmane.org> wrote:
> Don't add stuff to the t4fw_interface.h, that is owned by firmware.

Vipul, do you plan to respin to handle Felix's comment?

 - R.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [RFC] tcp: Export TCP Delayed ACK parameters to user
From: Rick Jones @ 2011-10-28 16:38 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Daniel Baluta, davem, kuznet, jmorris, yoshfuji, kaber, netdev
In-Reply-To: <1319791441.23112.80.camel@edumazet-laptop>

>> On Solaris there is a global option tcp_deferred_acks_max [2],
>> which is similar with our tcp_delack_segs.
>>
>
> and also has tcp_deferred_ack_interval

And those have similar settings in HP-UX 11.X.

For the sake of completeness, the ACK avoidance heuristic in HP-UX, and 
I presume Solaris (as they share a common "Mentat" heritage) includes a 
mechanism to reduce the per-connection effective number of segments per 
ACKnowledgement.  I believe this is done to handle cases where the 
sender may have reduced her cwnd.  That would have deployment going back 
to 1997 in the case of HP-UX 11.0, and presumably a few years before 
that in the case of Solaris.  That mechanism in their ACK avoidance 
heuristics may be the reason neither have gone so far as to make the 
settings per-route or per-connection (though I could be wrong).  I 
believe that Solaris does though have two deferred ACK limits - one for 
perceived to be local connections and one (lower) for perceived to be 
remote connections.

There can be "fun" interactions with senders which increase cwnd per ACK 
rather than per bytes ACKed.

Still, I myself am somewhat fond of ACK avoidance heuristics.

rick jones

PS - when discussing the performance benefits of an ACK avoidance 
heuristic, feel free to use netperf and service demand numbers :)

^ permalink raw reply

* Re: bridge: HSR support
From: Arvid Brodin @ 2011-10-28 16:36 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: netdev
In-Reply-To: <20111028175421.339b7c49@s6510.linuxnetplumber.net>

Stephen Hemminger wrote:
> On Fri, 28 Oct 2011 17:34:18 +0200
> Arvid Brodin <arvid.brodin@enea.com> wrote:
> 
>> Ok, so after a lot of reading and looking through code I have this idea of a
>> standalone solution:
>>
>> 1) Add ioctls to create (and remove) "hsr" netdevs which encapsulates two
>>    physical Ethernet interfaces each (somewhat like the bridge code does, but
>>    with precisely 2 interfaces slaved).
> 
> Please use the newer netlink interface and the master attribute for this
> rather than inventing yet another ioctl.

Ok, will do! Thanks!

-- 
Arvid Brodin
Enea Services Stockholm AB

^ permalink raw reply

* Re: bridge: HSR support
From: Stephen Hemminger @ 2011-10-28 15:54 UTC (permalink / raw)
  To: Arvid Brodin; +Cc: netdev
In-Reply-To: <4EAACB7A.4090207@enea.com>

On Fri, 28 Oct 2011 17:34:18 +0200
Arvid Brodin <arvid.brodin@enea.com> wrote:

> Ok, so after a lot of reading and looking through code I have this idea of a
> standalone solution:
> 
> 1) Add ioctls to create (and remove) "hsr" netdevs which encapsulates two
>    physical Ethernet interfaces each (somewhat like the bridge code does, but
>    with precisely 2 interfaces slaved).

Please use the newer netlink interface and the master attribute for this
rather than inventing yet another ioctl.

^ permalink raw reply

* Re: bridge: HSR support
From: Arvid Brodin @ 2011-10-28 15:34 UTC (permalink / raw)
  To: netdev; +Cc: Arvid Brodin
In-Reply-To: <4EA5738B.8080008@enea.com>

Arvid Brodin wrote:
> Stephen Hemminger wrote:
>> On Tue, 11 Oct 2011 20:25:08 +0200
>> Arvid Brodin <arvid.brodin@enea.com> wrote:
>>
>>> Hi,
>>>
>>> I want to add support for HSR ("High-availability Seamless Redundancy",
>>> IEC-62439-3) to the bridge code. With HSR, all connected units have two network
>>> ports and are connected in a ring. All new Ethernet packets are sent on both
>>> ports (or passed through if the current unit is not the originating unit). The
>>> same packet is never passed twice. Non-HSR units are not allowed in the ring.
>>>
>>> This gives instant, reconfiguration-free failover.
>>>
*snip*
> 
> I need to do two things:
> 
> 1) Bind two network interfaces into one (say, eth0 & eth1 => hsr0). Frames sent on
>    hsr0 should get an HSR tag (including the correct EtherType) and go out on both
>    eth0 and eth1.
> 
> 2) Ingress frames on eth0 & eth1, with EtherType 0x88fb, should be captured and 
>    handled specially (either received on hsr0 or forwarded to the other bound 
>    physical interface).
> 
> Any ideas on the best way to implement this -- what's the nicest place to "hook
> into" for this?
> 

Ok, so after a lot of reading and looking through code I have this idea of a
standalone solution:

1) Add ioctls to create (and remove) "hsr" netdevs which encapsulates two
   physical Ethernet interfaces each (somewhat like the bridge code does, but
   with precisely 2 interfaces slaved).

2) Use dev_add_pack() to register protocol ("EtherType") 0x88FB. The device
   that the frames come in on are checked for being a slave to a hsr netdev,
   and handled accordingly.

It would be great to get some input on the sanity of this solution before I get
too much time invested in it!


Thanks,
-- 
Arvid Brodin
Enea Services Stockholm AB

^ permalink raw reply

* [PATCH 7/7] SUNRPC: added debug messages to RPC pipefs
From: Stanislav Kinsbursky @ 2011-10-28 15:26 UTC (permalink / raw)
  To: Trond.Myklebust
  Cc: linux-nfs, xemul, neilb, netdev, linux-kernel, bfields, davem,
	devel
In-Reply-To: <20111028142245.5796.89937.stgit@localhost6.localdomain6>

This patch adds debug messages for notification events.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>

---
 net/sunrpc/rpc_pipe.c |    8 ++++++++
 1 files changed, 8 insertions(+), 0 deletions(-)

diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index cbf213e..42e4b6e 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -33,6 +33,10 @@
 #include "netns.h"
 #include "sunrpc.h"
 
+#define RPCDBG_FACILITY RPCDBG_DEBUG
+
+#define NET_NAME(net)	((net == &init_net) ? " (init_net)" : "")
+
 static struct vfsmount *rpc_mnt __read_mostly;
 static int rpc_mount_count;
 
@@ -1095,6 +1099,8 @@ rpc_fill_super(struct super_block *sb, void *data, int silent)
 	}
 	if (rpc_populate(root, files, RPCAUTH_lockd, RPCAUTH_RootEOF, NULL))
 		return -ENOMEM;
+	dprintk("RPC:	sending pipefs MOUNT notification for net %p%s\n", net,
+								NET_NAME(net));
 	err = blocking_notifier_call_chain(&rpc_pipefs_notifier_list,
 					   RPC_PIPEFS_MOUNT,
 					   sb);
@@ -1128,6 +1134,8 @@ void rpc_kill_sb(struct super_block *sb)
 	sn->pipefs_sb = NULL;
 	mutex_unlock(&sn->pipefs_sb_lock);
 	put_net(net);
+	dprintk("RPC:	sending pipefs UMOUNT notification for net %p%s\n", net,
+								NET_NAME(net));
 	blocking_notifier_call_chain(&rpc_pipefs_notifier_list,
 					   RPC_PIPEFS_UMOUNT,
 					   sb);

^ permalink raw reply related

* [PATCH 6/7] SUNRPC: pipefs per-net operations helper introduced
From: Stanislav Kinsbursky @ 2011-10-28 15:26 UTC (permalink / raw)
  To: Trond.Myklebust-HgOvQuBEEgTQT0dZR+AlfA
  Cc: linux-nfs-u79uwXL29TY76Z2rM5mHXA, xemul-bzQdu9zFT3WakBO8gow8eQ,
	neilb-l3A5Bk7waGM, netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	bfields-uC3wQj2KruNg9hUCZPvPmw, davem-fT/PcQaiUtIeIZ0/mPfg9Q,
	devel-GEFAQzZX7r8dnm+yROfE0A
In-Reply-To: <20111028142245.5796.89937.stgit-bi+AKbBUZKagILUCTcTcHdKyNwTtLsGr@public.gmane.org>

During per-net pipes creation and destruction we have to make sure, that pipefs
sb exists for the whole creation/destruction cycle. This is done by using
special mutex which controls pipefs sb reference on network namespace context.
Helper consists of two parts: first of them (rpc_get_dentry_net) searches for
dentry with specified name and returns with mutex taken on success. When pipe
creation or destructions is completed, caller should release this mutex by
rpc_put_dentry_net call.

Signed-off-by: Stanislav Kinsbursky <skinsbursky-bzQdu9zFT3WakBO8gow8eQ@public.gmane.org>

---
 include/linux/sunrpc/rpc_pipe_fs.h |    4 ++++
 net/sunrpc/netns.h                 |    1 +
 net/sunrpc/rpc_pipe.c              |   36 ++++++++++++++++++++++++++++++++++++
 net/sunrpc/sunrpc_syms.c           |    1 +
 4 files changed, 42 insertions(+), 0 deletions(-)

diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h
index 4a327ad..271e1b2 100644
--- a/include/linux/sunrpc/rpc_pipe_fs.h
+++ b/include/linux/sunrpc/rpc_pipe_fs.h
@@ -55,6 +55,10 @@ enum {
 extern struct dentry *rpc_d_lookup_sb(const struct super_block *sb,
 				      const unsigned char *dir_name);
 
+extern void rpc_pipefs_init_net(struct net *net);
+extern struct super_block *rpc_get_sb_net(const struct net *net);
+extern void rpc_put_sb_net(const struct net *net);
+
 extern int rpc_queue_upcall(struct inode *, struct rpc_pipe_msg *);
 
 struct rpc_clnt;
diff --git a/net/sunrpc/netns.h b/net/sunrpc/netns.h
index b384252..11d2f48 100644
--- a/net/sunrpc/netns.h
+++ b/net/sunrpc/netns.h
@@ -11,6 +11,7 @@ struct sunrpc_net {
 	struct cache_detail *ip_map_cache;
 
 	struct super_block *pipefs_sb;
+	struct mutex pipefs_sb_lock;
 };
 
 extern int sunrpc_net_id;
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 5c313d3..cbf213e 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -1036,6 +1036,40 @@ struct dentry *rpc_d_lookup_sb(const struct super_block *sb,
 }
 EXPORT_SYMBOL_GPL(rpc_d_lookup_sb);
 
+void rpc_pipefs_init_net(struct net *net)
+{
+	struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+
+	mutex_init(&sn->pipefs_sb_lock);
+}
+
+/*
+ * This call will be used for per network namespace operations calls.
+ * Note: Function will be returned with pipefs_sb_lock taken if superblock was
+ * found. This lock have to be released by rpc_put_sb_net() when all operations
+ * will be completed.
+ */
+struct super_block *rpc_get_sb_net(const struct net *net)
+{
+	struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+
+	mutex_lock(&sn->pipefs_sb_lock);
+	if (sn->pipefs_sb)
+		return sn->pipefs_sb;
+	mutex_unlock(&sn->pipefs_sb_lock);
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(rpc_get_sb_net);
+
+void rpc_put_sb_net(const struct net *net)
+{
+	struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+
+	BUG_ON(sn->pipefs_sb == NULL);
+	mutex_unlock(&sn->pipefs_sb_lock);
+}
+EXPORT_SYMBOL_GPL(rpc_put_sb_net);
+
 static int
 rpc_fill_super(struct super_block *sb, void *data, int silent)
 {
@@ -1090,7 +1124,9 @@ void rpc_kill_sb(struct super_block *sb)
 	struct net *net = sb->s_fs_info;
 	struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
 
+	mutex_lock(&sn->pipefs_sb_lock);
 	sn->pipefs_sb = NULL;
+	mutex_unlock(&sn->pipefs_sb_lock);
 	put_net(net);
 	blocking_notifier_call_chain(&rpc_pipefs_notifier_list,
 					   RPC_PIPEFS_UMOUNT,
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index 9d08091..880de8b 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -38,6 +38,7 @@ static __net_init int sunrpc_init_net(struct net *net)
 	if (err)
 		goto err_ipmap;
 
+	rpc_pipefs_init_net(net);
 	return 0;
 
 err_ipmap:

--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [PATCH 5/7] SUNRPC: put pipefs superblock link on network namespace
From: Stanislav Kinsbursky @ 2011-10-28 15:25 UTC (permalink / raw)
  To: Trond.Myklebust-HgOvQuBEEgTQT0dZR+AlfA
  Cc: linux-nfs-u79uwXL29TY76Z2rM5mHXA, xemul-bzQdu9zFT3WakBO8gow8eQ,
	neilb-l3A5Bk7waGM, netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	bfields-uC3wQj2KruNg9hUCZPvPmw, davem-fT/PcQaiUtIeIZ0/mPfg9Q,
	devel-GEFAQzZX7r8dnm+yROfE0A
In-Reply-To: <20111028142245.5796.89937.stgit-bi+AKbBUZKagILUCTcTcHdKyNwTtLsGr@public.gmane.org>

We have modules (like, pNFS blocklayout module) which creates pipes on
rpc_pipefs. Thus we need per-net operations for them. To make it possible we
require appropriate super block. So we have to put sb link on network namespace
context. Note, that it's not strongly required to create pipes in per-net
operations. IOW, if pipefs wasn't mounted yet, that no sb link reference will
present on network namespace and in this case we need just need to pass through
pipe creation. Pipe dentry will be created during pipefs mount notification.

Signed-off-by: Stanislav Kinsbursky <skinsbursky-bzQdu9zFT3WakBO8gow8eQ@public.gmane.org>

---
 net/sunrpc/netns.h    |    2 ++
 net/sunrpc/rpc_pipe.c |    4 ++++
 2 files changed, 6 insertions(+), 0 deletions(-)

diff --git a/net/sunrpc/netns.h b/net/sunrpc/netns.h
index d013bf2..b384252 100644
--- a/net/sunrpc/netns.h
+++ b/net/sunrpc/netns.h
@@ -9,6 +9,8 @@ struct cache_detail;
 struct sunrpc_net {
 	struct proc_dir_entry *proc_net_rpc;
 	struct cache_detail *ip_map_cache;
+
+	struct super_block *pipefs_sb;
 };
 
 extern int sunrpc_net_id;
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 4860a56..5c313d3 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -1042,6 +1042,7 @@ rpc_fill_super(struct super_block *sb, void *data, int silent)
 	struct inode *inode;
 	struct dentry *root;
 	struct net *net = data;
+	struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
 	int err;
 
 	sb->s_blocksize = PAGE_CACHE_SIZE;
@@ -1066,6 +1067,7 @@ rpc_fill_super(struct super_block *sb, void *data, int silent)
 	if (err)
 		goto err_depopulate;
 	sb->s_fs_info = get_net(net);
+	sn->pipefs_sb = sb;
 	return 0;
 
 err_depopulate:
@@ -1086,7 +1088,9 @@ rpc_mount(struct file_system_type *fs_type,
 void rpc_kill_sb(struct super_block *sb)
 {
 	struct net *net = sb->s_fs_info;
+	struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
 
+	sn->pipefs_sb = NULL;
 	put_net(net);
 	blocking_notifier_call_chain(&rpc_pipefs_notifier_list,
 					   RPC_PIPEFS_UMOUNT,

--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [PATCH 4/7] SUNRPC: pipefs dentry lookup helper introduced
From: Stanislav Kinsbursky @ 2011-10-28 15:25 UTC (permalink / raw)
  To: Trond.Myklebust
  Cc: linux-nfs, xemul, neilb, netdev, linux-kernel, bfields, davem,
	devel
In-Reply-To: <20111028142245.5796.89937.stgit@localhost6.localdomain6>

In all places, where pipefs dentries are created, only directory inode is
actually required to create new dentry. And all this directories has root
pipefs dentry as their parent. So we actually don't need this pipefs mount
point at all if some pipefs lookup method will be provided.
IOW, all we really need is just superblock and simple lookup method to find
root's child dentry with appropriate name. And this patch introduces this
method.
Note, that no locking implemented in rpc_d_lookup_sb(). So it can be used only
in case of assurance, that pipefs superblock still exist. IOW, we can use this
method only in pipefs mount-umount notification subscribers callbacks.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>

---
 include/linux/sunrpc/rpc_pipe_fs.h |    3 +++
 net/sunrpc/rpc_pipe.c              |   16 ++++++++++++++++
 2 files changed, 19 insertions(+), 0 deletions(-)

diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h
index c1cdb2f..4a327ad 100644
--- a/include/linux/sunrpc/rpc_pipe_fs.h
+++ b/include/linux/sunrpc/rpc_pipe_fs.h
@@ -52,6 +52,9 @@ enum {
 	RPC_PIPEFS_UMOUNT,
 };
 
+extern struct dentry *rpc_d_lookup_sb(const struct super_block *sb,
+				      const unsigned char *dir_name);
+
 extern int rpc_queue_upcall(struct inode *, struct rpc_pipe_msg *);
 
 struct rpc_clnt;
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 8abeb9d..4860a56 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -1020,6 +1020,22 @@ static const struct rpc_filelist files[] = {
 	},
 };
 
+/*
+ * This call can be used only in RPC pipefs mount notification hooks.
+ */
+struct dentry *rpc_d_lookup_sb(const struct super_block *sb,
+			       const unsigned char *dir_name)
+{
+	struct qstr dir = {
+		.name = dir_name,
+		.len = strlen(dir_name),
+		.hash = full_name_hash(dir_name, strlen(dir_name)),
+	};
+
+	return d_lookup(sb->s_root, &dir);
+}
+EXPORT_SYMBOL_GPL(rpc_d_lookup_sb);
+
 static int
 rpc_fill_super(struct super_block *sb, void *data, int silent)
 {

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox