Netdev List
 help / color / mirror / Atom feed
* [PATCH 3/3] tcp: use PRR to reduce cwin in CWR state
From: Yuchung Cheng @ 2012-09-03  3:38 UTC (permalink / raw)
  To: davem, ncardwell, nanditad; +Cc: mattmathis, edumazet, netdev, Yuchung Cheng
In-Reply-To: <1346643484-12947-1-git-send-email-ycheng@google.com>

Use proportional rate reduction (PRR) algorithm to reduce cwnd in CWR state,
in addition to Recovery state. Retire the current rate-halving in CWR.
When losses are detected via ACKs in CWR state, the sender enters Recovery
state but the cwnd reduction continues and does not restart.

Rename and refactor cwnd reduction functions since both CWR and Recovery
use the same algorithm:
tcp_init_cwnd_reduction() is new and initiates reduction state variables.
tcp_cwnd_reduction() is previously tcp_update_cwnd_in_recovery().
tcp_ends_cwnd_reduction() is previously  tcp_complete_cwr().

The rate halving functions and logic such as tcp_cwnd_down(), tcp_min_cwnd(),
and the cwnd moderation inside tcp_enter_cwr() are removed. The unused
parameter, flag, in tcp_cwnd_reduction() is also removed.

Signed-off-by: Yuchung Cheng <ycheng@google.com>
---
 include/net/tcp.h     |   10 +++-
 net/ipv4/tcp_input.c  |  119 +++++++++++++++++--------------------------------
 net/ipv4/tcp_output.c |    6 +-
 3 files changed, 52 insertions(+), 83 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 1421b02..a8cb00c 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -913,15 +913,21 @@ static inline bool tcp_in_initial_slowstart(const struct tcp_sock *tp)
 	return tp->snd_ssthresh >= TCP_INFINITE_SSTHRESH;
 }
 
+static inline bool tcp_in_cwnd_reduction(const struct sock *sk)
+{
+	return (TCPF_CA_CWR | TCPF_CA_Recovery) &
+	       (1 << inet_csk(sk)->icsk_ca_state);
+}
+
 /* If cwnd > ssthresh, we may raise ssthresh to be half-way to cwnd.
- * The exception is rate halving phase, when cwnd is decreasing towards
+ * The exception is cwnd reduction phase, when cwnd is decreasing towards
  * ssthresh.
  */
 static inline __u32 tcp_current_ssthresh(const struct sock *sk)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
 
-	if ((1 << inet_csk(sk)->icsk_ca_state) & (TCPF_CA_CWR | TCPF_CA_Recovery))
+	if (tcp_in_cwnd_reduction(sk))
 		return tp->snd_ssthresh;
 	else
 		return max(tp->snd_ssthresh,
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 38589e4..e2bec81 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2470,35 +2470,6 @@ static inline void tcp_moderate_cwnd(struct tcp_sock *tp)
 	tp->snd_cwnd_stamp = tcp_time_stamp;
 }
 
-/* Lower bound on congestion window is slow start threshold
- * unless congestion avoidance choice decides to overide it.
- */
-static inline u32 tcp_cwnd_min(const struct sock *sk)
-{
-	const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
-
-	return ca_ops->min_cwnd ? ca_ops->min_cwnd(sk) : tcp_sk(sk)->snd_ssthresh;
-}
-
-/* Decrease cwnd each second ack. */
-static void tcp_cwnd_down(struct sock *sk, int flag)
-{
-	struct tcp_sock *tp = tcp_sk(sk);
-	int decr = tp->snd_cwnd_cnt + 1;
-
-	if ((flag & (FLAG_ANY_PROGRESS | FLAG_DSACKING_ACK)) ||
-	    (tcp_is_reno(tp) && !(flag & FLAG_NOT_DUP))) {
-		tp->snd_cwnd_cnt = decr & 1;
-		decr >>= 1;
-
-		if (decr && tp->snd_cwnd > tcp_cwnd_min(sk))
-			tp->snd_cwnd -= decr;
-
-		tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1);
-		tp->snd_cwnd_stamp = tcp_time_stamp;
-	}
-}
-
 /* Nothing was retransmitted or returned timestamp is less
  * than timestamp of the first retransmission.
  */
@@ -2700,9 +2671,8 @@ static bool tcp_try_undo_loss(struct sock *sk)
 	return false;
 }
 
-/* This function implements the PRR algorithm, specifcally the PRR-SSRB
- * (proportional rate reduction with slow start reduction bound) as described in
- * http://www.ietf.org/id/draft-mathis-tcpm-proportional-rate-reduction-01.txt.
+/* The cwnd reduction in CWR and Recovery use the PRR algorithm
+ * https://datatracker.ietf.org/doc/draft-ietf-tcpm-proportional-rate-reduction/
  * It computes the number of packets to send (sndcnt) based on packets newly
  * delivered:
  *   1) If the packets in flight is larger than ssthresh, PRR spreads the
@@ -2711,13 +2681,29 @@ static bool tcp_try_undo_loss(struct sock *sk)
  *	losses and/or application stalls), do not perform any further cwnd
  *	reductions, but instead slow start up to ssthresh.
  */
-static void tcp_update_cwnd_in_recovery(struct sock *sk, int newly_acked_sacked,
-					int fast_rexmit, int flag)
+static void tcp_init_cwnd_reduction(struct sock *sk, const bool set_ssthresh)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	tp->high_seq = tp->snd_nxt;
+	tp->bytes_acked = 0;
+	tp->snd_cwnd_cnt = 0;
+	tp->prior_cwnd = tp->snd_cwnd;
+	tp->prr_delivered = 0;
+	tp->prr_out = 0;
+	if (set_ssthresh)
+		tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk);
+	TCP_ECN_queue_cwr(tp);
+}
+
+static void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked,
+			       int fast_rexmit)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	int sndcnt = 0;
 	int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp);
 
+	tp->prr_delivered += newly_acked_sacked;
 	if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) {
 		u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered +
 			       tp->prior_cwnd - 1;
@@ -2732,43 +2718,29 @@ static void tcp_update_cwnd_in_recovery(struct sock *sk, int newly_acked_sacked,
 	tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt;
 }
 
-static inline void tcp_complete_cwr(struct sock *sk)
+static inline void tcp_end_cwnd_reduction(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	/* Do not moderate cwnd if it's already undone in cwr or recovery. */
-	if (tp->undo_marker) {
-		if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR) {
-			tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
-			tp->snd_cwnd_stamp = tcp_time_stamp;
-		} else if (tp->snd_ssthresh < TCP_INFINITE_SSTHRESH) {
-			/* PRR algorithm. */
-			tp->snd_cwnd = tp->snd_ssthresh;
-			tp->snd_cwnd_stamp = tcp_time_stamp;
-		}
+	/* Reset cwnd to ssthresh in CWR or Recovery (unless it's undone) */
+	if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR ||
+	    (tp->undo_marker && tp->snd_ssthresh < TCP_INFINITE_SSTHRESH)) {
+		tp->snd_cwnd = tp->snd_ssthresh;
+		tp->snd_cwnd_stamp = tcp_time_stamp;
 	}
 	tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
 }
 
-/* Set slow start threshold and cwnd not falling to slow start */
+/* Enter CWR state. Disable cwnd undo since congestion is proven with ECN */
 void tcp_enter_cwr(struct sock *sk, const int set_ssthresh)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	const struct inet_connection_sock *icsk = inet_csk(sk);
 
 	tp->prior_ssthresh = 0;
 	tp->bytes_acked = 0;
-	if (icsk->icsk_ca_state < TCP_CA_CWR) {
+	if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
 		tp->undo_marker = 0;
-		if (set_ssthresh)
-			tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
-		tp->snd_cwnd = min(tp->snd_cwnd,
-				   tcp_packets_in_flight(tp) + 1U);
-		tp->snd_cwnd_cnt = 0;
-		tp->high_seq = tp->snd_nxt;
-		tp->snd_cwnd_stamp = tcp_time_stamp;
-		TCP_ECN_queue_cwr(tp);
-
+		tcp_init_cwnd_reduction(sk, set_ssthresh);
 		tcp_set_ca_state(sk, TCP_CA_CWR);
 	}
 }
@@ -2787,7 +2759,7 @@ static void tcp_try_keep_open(struct sock *sk)
 	}
 }
 
-static void tcp_try_to_open(struct sock *sk, int flag)
+static void tcp_try_to_open(struct sock *sk, int flag, int newly_acked_sacked)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
@@ -2804,7 +2776,7 @@ static void tcp_try_to_open(struct sock *sk, int flag)
 		if (inet_csk(sk)->icsk_ca_state != TCP_CA_Open)
 			tcp_moderate_cwnd(tp);
 	} else {
-		tcp_cwnd_down(sk, flag);
+		tcp_cwnd_reduction(sk, newly_acked_sacked, 0);
 	}
 }
 
@@ -2898,7 +2870,6 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
 
 	NET_INC_STATS_BH(sock_net(sk), mib_idx);
 
-	tp->high_seq = tp->snd_nxt;
 	tp->prior_ssthresh = 0;
 	tp->undo_marker = tp->snd_una;
 	tp->undo_retrans = tp->retrans_out;
@@ -2906,15 +2877,8 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
 	if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
 		if (!ece_ack)
 			tp->prior_ssthresh = tcp_current_ssthresh(sk);
-		tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk);
-		TCP_ECN_queue_cwr(tp);
+		tcp_init_cwnd_reduction(sk, true);
 	}
-
-	tp->bytes_acked = 0;
-	tp->snd_cwnd_cnt = 0;
-	tp->prior_cwnd = tp->snd_cwnd;
-	tp->prr_delivered = 0;
-	tp->prr_out = 0;
 	tcp_set_ca_state(sk, TCP_CA_Recovery);
 }
 
@@ -2974,7 +2938,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
 			/* CWR is to be held something *above* high_seq
 			 * is ACKed for CWR bit to reach receiver. */
 			if (tp->snd_una != tp->high_seq) {
-				tcp_complete_cwr(sk);
+				tcp_end_cwnd_reduction(sk);
 				tcp_set_ca_state(sk, TCP_CA_Open);
 			}
 			break;
@@ -2984,7 +2948,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
 				tcp_reset_reno_sack(tp);
 			if (tcp_try_undo_recovery(sk))
 				return;
-			tcp_complete_cwr(sk);
+			tcp_end_cwnd_reduction(sk);
 			break;
 		}
 	}
@@ -3025,7 +2989,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
 			tcp_try_undo_dsack(sk);
 
 		if (!tcp_time_to_recover(sk, flag)) {
-			tcp_try_to_open(sk, flag);
+			tcp_try_to_open(sk, flag, newly_acked_sacked);
 			return;
 		}
 
@@ -3047,8 +3011,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
 
 	if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk)))
 		tcp_update_scoreboard(sk, fast_rexmit);
-	tp->prr_delivered += newly_acked_sacked;
-	tcp_update_cwnd_in_recovery(sk, newly_acked_sacked, fast_rexmit, flag);
+	tcp_cwnd_reduction(sk, newly_acked_sacked, fast_rexmit);
 	tcp_xmit_retransmit_queue(sk);
 }
 
@@ -3394,7 +3357,7 @@ static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
 	return (!(flag & FLAG_ECE) || tp->snd_cwnd < tp->snd_ssthresh) &&
-		!((1 << inet_csk(sk)->icsk_ca_state) & (TCPF_CA_Recovery | TCPF_CA_CWR));
+		!tcp_in_cwnd_reduction(sk);
 }
 
 /* Check that window update is acceptable.
@@ -3462,9 +3425,9 @@ static void tcp_conservative_spur_to_response(struct tcp_sock *tp)
 }
 
 /* A conservative spurious RTO response algorithm: reduce cwnd using
- * rate halving and continue in congestion avoidance.
+ * PRR and continue in congestion avoidance.
  */
-static void tcp_ratehalving_spur_to_response(struct sock *sk)
+static void tcp_cwr_spur_to_response(struct sock *sk)
 {
 	tcp_enter_cwr(sk, 0);
 }
@@ -3472,7 +3435,7 @@ static void tcp_ratehalving_spur_to_response(struct sock *sk)
 static void tcp_undo_spur_to_response(struct sock *sk, int flag)
 {
 	if (flag & FLAG_ECE)
-		tcp_ratehalving_spur_to_response(sk);
+		tcp_cwr_spur_to_response(sk);
 	else
 		tcp_undo_cwr(sk, true);
 }
@@ -3579,7 +3542,7 @@ static bool tcp_process_frto(struct sock *sk, int flag)
 			tcp_conservative_spur_to_response(tp);
 			break;
 		default:
-			tcp_ratehalving_spur_to_response(sk);
+			tcp_cwr_spur_to_response(sk);
 			break;
 		}
 		tp->frto_counter = 0;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 9383b51..cfe6ffe 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2037,10 +2037,10 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
 		if (push_one)
 			break;
 	}
-	if (inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery)
-		tp->prr_out += sent_pkts;
 
 	if (likely(sent_pkts)) {
+		if (tcp_in_cwnd_reduction(sk))
+			tp->prr_out += sent_pkts;
 		tcp_cwnd_validate(sk);
 		return false;
 	}
@@ -2542,7 +2542,7 @@ begin_fwd:
 		}
 		NET_INC_STATS_BH(sock_net(sk), mib_idx);
 
-		if (inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery)
+		if (tcp_in_cwnd_reduction(sk))
 			tp->prr_out += tcp_skb_pcount(skb);
 
 		if (skb == tcp_write_queue_head(sk))
-- 
1.7.7.3

^ permalink raw reply related

* [PATCH 2/3] tcp: move tcp_update_cwnd_in_recovery
From: Yuchung Cheng @ 2012-09-03  3:38 UTC (permalink / raw)
  To: davem, ncardwell, nanditad; +Cc: mattmathis, edumazet, netdev, Yuchung Cheng
In-Reply-To: <1346643484-12947-1-git-send-email-ycheng@google.com>

To prepare replacing rate halving with PRR algorithm in CWR state.

Signed-off-by: Yuchung Cheng <ycheng@google.com>
---
 net/ipv4/tcp_input.c |   64 +++++++++++++++++++++++++-------------------------
 1 files changed, 32 insertions(+), 32 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 3ab0c75..38589e4 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2700,6 +2700,38 @@ static bool tcp_try_undo_loss(struct sock *sk)
 	return false;
 }
 
+/* This function implements the PRR algorithm, specifcally the PRR-SSRB
+ * (proportional rate reduction with slow start reduction bound) as described in
+ * http://www.ietf.org/id/draft-mathis-tcpm-proportional-rate-reduction-01.txt.
+ * It computes the number of packets to send (sndcnt) based on packets newly
+ * delivered:
+ *   1) If the packets in flight is larger than ssthresh, PRR spreads the
+ *	cwnd reductions across a full RTT.
+ *   2) If packets in flight is lower than ssthresh (such as due to excess
+ *	losses and/or application stalls), do not perform any further cwnd
+ *	reductions, but instead slow start up to ssthresh.
+ */
+static void tcp_update_cwnd_in_recovery(struct sock *sk, int newly_acked_sacked,
+					int fast_rexmit, int flag)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	int sndcnt = 0;
+	int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp);
+
+	if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) {
+		u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered +
+			       tp->prior_cwnd - 1;
+		sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out;
+	} else {
+		sndcnt = min_t(int, delta,
+			       max_t(int, tp->prr_delivered - tp->prr_out,
+				     newly_acked_sacked) + 1);
+	}
+
+	sndcnt = max(sndcnt, (fast_rexmit ? 1 : 0));
+	tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt;
+}
+
 static inline void tcp_complete_cwr(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
@@ -2854,38 +2886,6 @@ void tcp_simple_retransmit(struct sock *sk)
 }
 EXPORT_SYMBOL(tcp_simple_retransmit);
 
-/* This function implements the PRR algorithm, specifcally the PRR-SSRB
- * (proportional rate reduction with slow start reduction bound) as described in
- * http://www.ietf.org/id/draft-mathis-tcpm-proportional-rate-reduction-01.txt.
- * It computes the number of packets to send (sndcnt) based on packets newly
- * delivered:
- *   1) If the packets in flight is larger than ssthresh, PRR spreads the
- *	cwnd reductions across a full RTT.
- *   2) If packets in flight is lower than ssthresh (such as due to excess
- *	losses and/or application stalls), do not perform any further cwnd
- *	reductions, but instead slow start up to ssthresh.
- */
-static void tcp_update_cwnd_in_recovery(struct sock *sk, int newly_acked_sacked,
-					int fast_rexmit, int flag)
-{
-	struct tcp_sock *tp = tcp_sk(sk);
-	int sndcnt = 0;
-	int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp);
-
-	if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) {
-		u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered +
-			       tp->prior_cwnd - 1;
-		sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out;
-	} else {
-		sndcnt = min_t(int, delta,
-			       max_t(int, tp->prr_delivered - tp->prr_out,
-				     newly_acked_sacked) + 1);
-	}
-
-	sndcnt = max(sndcnt, (fast_rexmit ? 1 : 0));
-	tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt;
-}
-
 static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-- 
1.7.7.3

^ permalink raw reply related

* Re: [PATCH 1/3] tcp: move tcp_enter_cwr()
From: Neal Cardwell @ 2012-09-03  3:56 UTC (permalink / raw)
  To: Yuchung Cheng
  Cc: David Miller, Nandita Dukkipati, Matt Mathis, Eric Dumazet,
	Netdev
In-Reply-To: <1346643484-12947-1-git-send-email-ycheng@google.com>

On Sun, Sep 2, 2012 at 11:38 PM, Yuchung Cheng <ycheng@google.com> wrote:
> To prepare replacing rate halving with PRR algorithm in CWR state.
>
> Signed-off-by: Yuchung Cheng <ycheng@google.com>

Acked-by: Neal Cardwell <ncardwell@google.com>

neal

^ permalink raw reply

* Re: [PATCH 2/3] tcp: move tcp_update_cwnd_in_recovery
From: Neal Cardwell @ 2012-09-03  3:57 UTC (permalink / raw)
  To: Yuchung Cheng
  Cc: David Miller, Nandita Dukkipati, Matt Mathis, Eric Dumazet,
	Netdev
In-Reply-To: <1346643484-12947-2-git-send-email-ycheng@google.com>

On Sun, Sep 2, 2012 at 11:38 PM, Yuchung Cheng <ycheng@google.com> wrote:
> To prepare replacing rate halving with PRR algorithm in CWR state.
>
> Signed-off-by: Yuchung Cheng <ycheng@google.com>

Acked-by: Neal Cardwell <ncardwell@google.com>

neal

^ permalink raw reply

* Re: [PATCH 3/3] tcp: use PRR to reduce cwin in CWR state
From: Neal Cardwell @ 2012-09-03  4:00 UTC (permalink / raw)
  To: Yuchung Cheng
  Cc: David Miller, Nandita Dukkipati, Matt Mathis, Eric Dumazet,
	Netdev
In-Reply-To: <1346643484-12947-3-git-send-email-ycheng@google.com>

On Sun, Sep 2, 2012 at 11:38 PM, Yuchung Cheng <ycheng@google.com> wrote:
> Use proportional rate reduction (PRR) algorithm to reduce cwnd in CWR state,
> in addition to Recovery state. Retire the current rate-halving in CWR.
> When losses are detected via ACKs in CWR state, the sender enters Recovery
> state but the cwnd reduction continues and does not restart.
>
> Rename and refactor cwnd reduction functions since both CWR and Recovery
> use the same algorithm:
> tcp_init_cwnd_reduction() is new and initiates reduction state variables.
> tcp_cwnd_reduction() is previously tcp_update_cwnd_in_recovery().
> tcp_ends_cwnd_reduction() is previously  tcp_complete_cwr().
>
> The rate halving functions and logic such as tcp_cwnd_down(), tcp_min_cwnd(),
> and the cwnd moderation inside tcp_enter_cwr() are removed. The unused
> parameter, flag, in tcp_cwnd_reduction() is also removed.
>
> Signed-off-by: Yuchung Cheng <ycheng@google.com>

Acked-by: Neal Cardwell <ncardwell@google.com>

neal

^ permalink raw reply

* Question: routing packets via specific router in LAN?
From: Yi Li @ 2012-09-03  6:04 UTC (permalink / raw)
  To: netdev

Hi All,
I have server --- router ---client three machines,
and they all have only one ip in the same LAN.
I want to instruct the packets flowing through the router when the
server and client communicates.
I have do the following things to setup:
on the server:
# ip route add to unicast CLIENT_IP/32 via ROUTER_IP dev eth0
# echo 0 > /proc/sys/net/ipv4/conf/all/accept_redirects
# echo 0 > /proc/sys/net/ipv4/conf/eth0/accept_redirects

on the client:
/*modify route table*/
# ip route add to unicast SERVER_IP/32 via ROUTER_IP dev eth0
/*disable icmp-redirects accept*/
# echo 0 > /proc/sys/net/ipv4/conf/all/accept_redirects
# echo 0 > /proc/sys/net/ipv4/conf/eth0/accept_redirects

on the router:
/*enable forwarding*/
# echo 1 > /proc/sys/net/ipv4/ip_forwarding
/*disable icmp-redirects*/
# echo 0 > /proc/sys/net/ipv4/conf/all/send_redirects
# echo 0 > /proc/sys/net/ipv4/conf/eth0/send_redirects

BTW, I have disabled iptables on all of these three machines.

But I still can't tcpdump any packets on the router which means no
packets flowing
through the router. The server and client communicates by pass the router!

So, What I have missed, or we can't setup server-router-client topo in LAN?
Thanks in advaced.

^ permalink raw reply

* Support for draft-ietf-6man-impatient-nud-02
From: Kiran (Kiran Kumar) Kella @ 2012-09-03  6:19 UTC (permalink / raw)
  To: netdev@vger.kernel.org

Hi All,

   Would like to know if the patch for supporting new NDP state UNREACHABLE as per "draft-ietf-6man-impatient-nud-02" (Neighbor Unreachability Detection is too impatient) exists?
It is useful to have these changes supported as part of the initiatives to mitigate the effects of DoS attacks as mentioned in RFC 6583 (Operational Neighbor Discovery Problems).

I couldn't find any related emails in the netdev archives.

Appreciate your response.

Regards,
Kiran

^ permalink raw reply

* [PATCH] mISDN: fix possible memory leak in hfcmulti_init()
From: Wei Yongjun @ 2012-09-03  7:31 UTC (permalink / raw)
  To: isdn; +Cc: yongjun_wei, netdev

From: Wei Yongjun <yongjun_wei@trendmicro.com.cn>

hc has been allocated in this function and missing free it before
leaving from some error handling cases.

spatch with a semantic match is used to found this problem.
(http://coccinelle.lip6.fr/)

Signed-off-by: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
---
 drivers/isdn/hardware/mISDN/hfcmulti.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/isdn/hardware/mISDN/hfcmulti.c b/drivers/isdn/hardware/mISDN/hfcmulti.c
index 5e402cf..f027942 100644
--- a/drivers/isdn/hardware/mISDN/hfcmulti.c
+++ b/drivers/isdn/hardware/mISDN/hfcmulti.c
@@ -5059,6 +5059,7 @@ hfcmulti_init(struct hm_map *m, struct pci_dev *pdev,
 				printk(KERN_INFO
 				       "HFC-E1 #%d has overlapping B-channels on fragment #%d\n",
 				       E1_cnt + 1, pt);
+				kfree(hc);
 				return -EINVAL;
 			}
 			maskcheck |= hc->bmask[pt];
@@ -5086,6 +5087,7 @@ hfcmulti_init(struct hm_map *m, struct pci_dev *pdev,
 	if ((poll >> 1) > sizeof(hc->silence_data)) {
 		printk(KERN_ERR "HFCMULTI error: silence_data too small, "
 		       "please fix\n");
+		kfree(hc);
 		return -EINVAL;
 	}
 	for (i = 0; i < (poll >> 1); i++)

^ permalink raw reply related

* [net-next.git 0/7] stmmac: remove dead code for STMMAC_TIMER and add new mitigation schema.
From: Giuseppe CAVALLARO @ 2012-09-03  7:46 UTC (permalink / raw)
  To: netdev; +Cc: Giuseppe Cavallaro

These patch series remove the STMMAC_TIMER option no longer updated
and never used and add a new mitigation schema.
Having removed the Timer opt, this has made the driver slim.
On top of this work, it has been easier to introduce the new
mitigation schema based on HW RX-watchdog (available in new cores).
In fact, 3.50 and newer cores have an HW RX-Watchdog that can be used for 
mitigating the Rx-interrupts and first results look promising.

Running n-u-t-t-c-p with the following parameters:

 Throughput: 500Mbps
 UDP Buffer size: 1328bytes
 TCP Buffer size: 65536bytes

for example, I got on ST box (arm-based) these improvements:

--------------------------------------------------------------------
      Original                   |     With New Mitigation patch
--------------------------------------------------------------------
 Test        CPU usage  pkt/loss |        CPU usage     pkt/loss
 Type  Mbps        %         %   |Mbps      %              %
--------------------------------------------------------------------
UDP-RX 395.5065 95       20.89   |499.9966  25        0.00%
UDP-TX 499.5578 100     0.08915  |499.7156  100       0.06029%
TCP-RX 499.9221 77               |499.8648  41
TCP-TX 389.5719 99               |499.2802  79
--------------------------------------------------------------------

 ... no regression on ST boxes (SH based) I always test.

This is a brief explanation of the new mitigation schema although there
is a patch that updates the driver's documentation.

o On Rx-side I have:
  New GMACs will use the RX-watchdog timer; old ones will continue to
  use NAPI to mitigate the RX DMA interrupts.
  For the RX-watchdog, there is a parameter that is the RI Watchdog 
  Timer count. It indicates the number of system clock cycles and can be
  set via sysFS. Next step will be to tune it via ethtool.

o On Tx-side, the mitigation schema is based on a SW timer
  that calls the tx function (stmmac_tx) to reclaim the resource after
  transmitting the frames.
  Also there is another parameter (a threshold) used to program
  the descriptors avoiding to set the interrupt on completion bit in
  when the frame is sent (xmit). This means that the stmmac_tx can be
  called by the ISR too. Also this parameter can be tuned via sysFs and
  not yet via ethtool.

Note1: there is a patch that updates the driver to August 2012.
I hope to also release the PTP support and update the driver in the next weeks.

Note2: next step will be to tune coalesce params via ethtool. I'll do that.

peppe

Giuseppe Cavallaro (7):
  stmmac: remove dead code for TIMER
  stmmac: manage tx clean out of rx_poll
  stmmac: add the initial tx coalesce schema
  stmmac: add Rx watchdog optimization to mitigate the DMA irqs
  stmmac: add sysFs support
  stmmac: add mitigation and sysfs info in the doc
  stmmac: update the driver version to August_2012

 Documentation/networking/stmmac.txt                |   34 ++-
 drivers/net/ethernet/stmicro/stmmac/Kconfig        |   25 --
 drivers/net/ethernet/stmicro/stmmac/Makefile       |    3 +-
 drivers/net/ethernet/stmicro/stmmac/common.h       |   30 ++-
 drivers/net/ethernet/stmicro/stmmac/dwmac1000.h    |    3 -
 .../net/ethernet/stmicro/stmmac/dwmac1000_dma.c    |    6 +
 drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h    |    3 +-
 drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c    |    7 +-
 drivers/net/ethernet/stmicro/stmmac/stmmac-sysfs.c |  157 +++++++++++
 drivers/net/ethernet/stmicro/stmmac/stmmac.h       |   15 +-
 .../net/ethernet/stmicro/stmmac/stmmac_ethtool.c   |    9 +-
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c  |  282 +++++++++-----------
 drivers/net/ethernet/stmicro/stmmac/stmmac_timer.c |  134 ---------
 drivers/net/ethernet/stmicro/stmmac/stmmac_timer.h |   46 ----
 14 files changed, 354 insertions(+), 400 deletions(-)
 create mode 100644 drivers/net/ethernet/stmicro/stmmac/stmmac-sysfs.c
 delete mode 100644 drivers/net/ethernet/stmicro/stmmac/stmmac_timer.c
 delete mode 100644 drivers/net/ethernet/stmicro/stmmac/stmmac_timer.h

-- 
1.7.4.4

^ permalink raw reply

* [net-next.git 1/7] stmmac: remove dead code for TIMER
From: Giuseppe CAVALLARO @ 2012-09-03  7:46 UTC (permalink / raw)
  To: netdev; +Cc: Giuseppe Cavallaro
In-Reply-To: <1346658422-1925-1-git-send-email-peppe.cavallaro@st.com>

TIMER option is not longer supported and this
code can be considered dead for this driver in
the new kernel series.
In fact, It was not updated at all and never used.

Signed-off-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
---
 drivers/net/ethernet/stmicro/stmmac/Kconfig        |   25 ----
 drivers/net/ethernet/stmicro/stmmac/Makefile       |    1 -
 drivers/net/ethernet/stmicro/stmmac/stmmac.h       |    6 -
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c  |  101 +--------------
 drivers/net/ethernet/stmicro/stmmac/stmmac_timer.c |  134 --------------------
 drivers/net/ethernet/stmicro/stmmac/stmmac_timer.h |   46 -------
 6 files changed, 3 insertions(+), 310 deletions(-)
 delete mode 100644 drivers/net/ethernet/stmicro/stmmac/stmmac_timer.c
 delete mode 100644 drivers/net/ethernet/stmicro/stmmac/stmmac_timer.h

diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig
index 9f44827..1164930 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Kconfig
+++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig
@@ -54,31 +54,6 @@ config STMMAC_DA
 	  By default, the DMA arbitration scheme is based on Round-robin
 	  (rx:tx priority is 1:1).
 
-config STMMAC_TIMER
-	bool "STMMAC Timer optimisation"
-	default n
-	depends on RTC_HCTOSYS_DEVICE
-	---help---
-	  Use an external timer for mitigating the number of network
-	  interrupts. Currently, for SH architectures, it is possible
-	  to use the TMU channel 2 and the SH-RTC device.
-
-choice
-        prompt "Select Timer device"
-        depends on STMMAC_TIMER
-
-config STMMAC_TMU_TIMER
-        bool "TMU channel 2"
-        depends on CPU_SH4
-	---help---
-
-config STMMAC_RTC_TIMER
-        bool "Real time clock"
-        depends on RTC_CLASS
-	---help---
-
-endchoice
-
 choice
 	prompt "Select the DMA TX/RX descriptor operating modes"
 	depends on STMMAC_ETH
diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile
index bc965ac..c8e8ea6 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Makefile
+++ b/drivers/net/ethernet/stmicro/stmmac/Makefile
@@ -1,5 +1,4 @@
 obj-$(CONFIG_STMMAC_ETH) += stmmac.o
-stmmac-$(CONFIG_STMMAC_TIMER) += stmmac_timer.o
 stmmac-$(CONFIG_STMMAC_RING) += ring_mode.o
 stmmac-$(CONFIG_STMMAC_CHAINED) += chain_mode.o
 stmmac-$(CONFIG_STMMAC_PLATFORM) += stmmac_platform.o
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index e872e1d..9f35769 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -31,9 +31,6 @@
 #include <linux/phy.h>
 #include <linux/pci.h>
 #include "common.h"
-#ifdef CONFIG_STMMAC_TIMER
-#include "stmmac_timer.h"
-#endif
 
 struct stmmac_priv {
 	/* Frequently used values are kept adjacent for cache effect */
@@ -78,9 +75,6 @@ struct stmmac_priv {
 	spinlock_t tx_lock;
 	int wolopts;
 	int wol_irq;
-#ifdef CONFIG_STMMAC_TIMER
-	struct stmmac_timer *tm;
-#endif
 	struct plat_stmmacenet_data *plat;
 	struct stmmac_counters mmc;
 	struct dma_features dma_cap;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index c136162..c8985f3 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -115,16 +115,6 @@ static int tc = TC_DEFAULT;
 module_param(tc, int, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(tc, "DMA threshold control value");
 
-/* Pay attention to tune this parameter; take care of both
- * hardware capability and network stabitily/performance impact.
- * Many tests showed that ~4ms latency seems to be good enough. */
-#ifdef CONFIG_STMMAC_TIMER
-#define DEFAULT_PERIODIC_RATE	256
-static int tmrate = DEFAULT_PERIODIC_RATE;
-module_param(tmrate, int, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(tmrate, "External timer freq. (default: 256Hz)");
-#endif
-
 #define DMA_BUFFER_SIZE	BUF_SIZE_2KiB
 static int buf_sz = DMA_BUFFER_SIZE;
 module_param(buf_sz, int, S_IRUGO | S_IWUSR);
@@ -536,12 +526,6 @@ static void init_dma_desc_rings(struct net_device *dev)
 	else
 		bfsize = stmmac_set_bfsize(dev->mtu, priv->dma_buf_sz);
 
-#ifdef CONFIG_STMMAC_TIMER
-	/* Disable interrupts on completion for the reception if timer is on */
-	if (likely(priv->tm->enable))
-		dis_ic = 1;
-#endif
-
 	DBG(probe, INFO, "stmmac: txsize %d, rxsize %d, bfsize %d\n",
 	    txsize, rxsize, bfsize);
 
@@ -786,22 +770,12 @@ static void stmmac_tx(struct stmmac_priv *priv)
 
 static inline void stmmac_enable_irq(struct stmmac_priv *priv)
 {
-#ifdef CONFIG_STMMAC_TIMER
-	if (likely(priv->tm->enable))
-		priv->tm->timer_start(tmrate);
-	else
-#endif
-		priv->hw->dma->enable_dma_irq(priv->ioaddr);
+	priv->hw->dma->enable_dma_irq(priv->ioaddr);
 }
 
 static inline void stmmac_disable_irq(struct stmmac_priv *priv)
 {
-#ifdef CONFIG_STMMAC_TIMER
-	if (likely(priv->tm->enable))
-		priv->tm->timer_stop();
-	else
-#endif
-		priv->hw->dma->disable_dma_irq(priv->ioaddr);
+	priv->hw->dma->disable_dma_irq(priv->ioaddr);
 }
 
 static int stmmac_has_work(struct stmmac_priv *priv)
@@ -829,25 +803,6 @@ static inline void _stmmac_schedule(struct stmmac_priv *priv)
 	}
 }
 
-#ifdef CONFIG_STMMAC_TIMER
-void stmmac_schedule(struct net_device *dev)
-{
-	struct stmmac_priv *priv = netdev_priv(dev);
-
-	priv->xstats.sched_timer_n++;
-
-	_stmmac_schedule(priv);
-}
-
-static void stmmac_no_timer_started(unsigned int x)
-{;
-};
-
-static void stmmac_no_timer_stopped(void)
-{;
-};
-#endif
-
 /**
  * stmmac_tx_err:
  * @priv: pointer to the private device structure
@@ -1049,23 +1004,6 @@ static int stmmac_open(struct net_device *dev)
 	struct stmmac_priv *priv = netdev_priv(dev);
 	int ret;
 
-#ifdef CONFIG_STMMAC_TIMER
-	priv->tm = kzalloc(sizeof(struct stmmac_timer *), GFP_KERNEL);
-	if (unlikely(priv->tm == NULL))
-		return -ENOMEM;
-
-	priv->tm->freq = tmrate;
-
-	/* Test if the external timer can be actually used.
-	 * In case of failure continue without timer. */
-	if (unlikely((stmmac_open_ext_timer(dev, priv->tm)) < 0)) {
-		pr_warning("stmmaceth: cannot attach the external timer.\n");
-		priv->tm->freq = 0;
-		priv->tm->timer_start = stmmac_no_timer_started;
-		priv->tm->timer_stop = stmmac_no_timer_stopped;
-	} else
-		priv->tm->enable = 1;
-#endif
 	clk_enable(priv->stmmac_clk);
 
 	stmmac_check_ether_addr(priv);
@@ -1152,10 +1090,6 @@ static int stmmac_open(struct net_device *dev)
 	priv->hw->dma->start_tx(priv->ioaddr);
 	priv->hw->dma->start_rx(priv->ioaddr);
 
-#ifdef CONFIG_STMMAC_TIMER
-	priv->tm->timer_start(tmrate);
-#endif
-
 	/* Dump DMA/MAC registers */
 	if (netif_msg_hw(priv)) {
 		priv->hw->mac->dump_regs(priv->ioaddr);
@@ -1182,9 +1116,6 @@ open_error_wolirq:
 	free_irq(dev->irq, dev);
 
 open_error:
-#ifdef CONFIG_STMMAC_TIMER
-	kfree(priv->tm);
-#endif
 	if (priv->phydev)
 		phy_disconnect(priv->phydev);
 
@@ -1215,12 +1146,6 @@ static int stmmac_release(struct net_device *dev)
 
 	netif_stop_queue(dev);
 
-#ifdef CONFIG_STMMAC_TIMER
-	/* Stop and release the timer */
-	stmmac_close_ext_timer();
-	if (priv->tm != NULL)
-		kfree(priv->tm);
-#endif
 	napi_disable(&priv->napi);
 	skb_queue_purge(&priv->rx_recycle);
 
@@ -1336,12 +1261,6 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 	/* Interrupt on completition only for the latest segment */
 	priv->hw->desc->close_tx_desc(desc);
 
-#ifdef CONFIG_STMMAC_TIMER
-	/* Clean IC while using timer */
-	if (likely(priv->tm->enable))
-		priv->hw->desc->clear_tx_ic(desc);
-#endif
-
 	wmb();
 
 	/* To avoid raise condition */
@@ -1539,7 +1458,7 @@ static int stmmac_poll(struct napi_struct *napi, int budget)
  *  stmmac_tx_timeout
  *  @dev : Pointer to net device structure
  *  Description: this function is called when a packet transmission fails to
- *   complete within a reasonable tmrate. The driver will mark the error in the
+ *   complete within a reasonable time. The driver will mark the error in the
  *   netdev structure and arrange for the device to be reset to a sane state
  *   in order to transmit a new packet.
  */
@@ -2157,11 +2076,6 @@ int stmmac_suspend(struct net_device *ndev)
 	netif_device_detach(ndev);
 	netif_stop_queue(ndev);
 
-#ifdef CONFIG_STMMAC_TIMER
-	priv->tm->timer_stop();
-	if (likely(priv->tm->enable))
-		dis_ic = 1;
-#endif
 	napi_disable(&priv->napi);
 
 	/* Stop TX/RX DMA */
@@ -2212,10 +2126,6 @@ int stmmac_resume(struct net_device *ndev)
 	priv->hw->dma->start_tx(priv->ioaddr);
 	priv->hw->dma->start_rx(priv->ioaddr);
 
-#ifdef CONFIG_STMMAC_TIMER
-	if (likely(priv->tm->enable))
-		priv->tm->timer_start(tmrate);
-#endif
 	napi_enable(&priv->napi);
 
 	netif_start_queue(ndev);
@@ -2311,11 +2221,6 @@ static int __init stmmac_cmdline_opt(char *str)
 		} else if (!strncmp(opt, "eee_timer:", 6)) {
 			if (kstrtoint(opt + 10, 0, &eee_timer))
 				goto err;
-#ifdef CONFIG_STMMAC_TIMER
-		} else if (!strncmp(opt, "tmrate:", 7)) {
-			if (kstrtoint(opt + 7, 0, &tmrate))
-				goto err;
-#endif
 		}
 	}
 	return 0;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_timer.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_timer.c
deleted file mode 100644
index 2a0e1ab..0000000
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_timer.c
+++ /dev/null
@@ -1,134 +0,0 @@
-/*******************************************************************************
-  STMMAC external timer support.
-
-  Copyright (C) 2007-2009  STMicroelectronics Ltd
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, write to the Free Software Foundation, Inc.,
-  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Author: Giuseppe Cavallaro <peppe.cavallaro@st.com>
-*******************************************************************************/
-
-#include <linux/kernel.h>
-#include <linux/etherdevice.h>
-#include "stmmac_timer.h"
-
-static void stmmac_timer_handler(void *data)
-{
-	struct net_device *dev = (struct net_device *)data;
-
-	stmmac_schedule(dev);
-}
-
-#define STMMAC_TIMER_MSG(timer, freq) \
-printk(KERN_INFO "stmmac_timer: %s Timer ON (freq %dHz)\n", timer, freq);
-
-#if defined(CONFIG_STMMAC_RTC_TIMER)
-#include <linux/rtc.h>
-static struct rtc_device *stmmac_rtc;
-static rtc_task_t stmmac_task;
-
-static void stmmac_rtc_start(unsigned int new_freq)
-{
-	rtc_irq_set_freq(stmmac_rtc, &stmmac_task, new_freq);
-	rtc_irq_set_state(stmmac_rtc, &stmmac_task, 1);
-}
-
-static void stmmac_rtc_stop(void)
-{
-	rtc_irq_set_state(stmmac_rtc, &stmmac_task, 0);
-}
-
-int stmmac_open_ext_timer(struct net_device *dev, struct stmmac_timer *tm)
-{
-	stmmac_task.private_data = dev;
-	stmmac_task.func = stmmac_timer_handler;
-
-	stmmac_rtc = rtc_class_open(CONFIG_RTC_HCTOSYS_DEVICE);
-	if (stmmac_rtc == NULL) {
-		pr_err("open rtc device failed\n");
-		return -ENODEV;
-	}
-
-	rtc_irq_register(stmmac_rtc, &stmmac_task);
-
-	/* Periodic mode is not supported */
-	if ((rtc_irq_set_freq(stmmac_rtc, &stmmac_task, tm->freq) < 0)) {
-		pr_err("set periodic failed\n");
-		rtc_irq_unregister(stmmac_rtc, &stmmac_task);
-		rtc_class_close(stmmac_rtc);
-		return -1;
-	}
-
-	STMMAC_TIMER_MSG(CONFIG_RTC_HCTOSYS_DEVICE, tm->freq);
-
-	tm->timer_start = stmmac_rtc_start;
-	tm->timer_stop = stmmac_rtc_stop;
-
-	return 0;
-}
-
-int stmmac_close_ext_timer(void)
-{
-	rtc_irq_set_state(stmmac_rtc, &stmmac_task, 0);
-	rtc_irq_unregister(stmmac_rtc, &stmmac_task);
-	rtc_class_close(stmmac_rtc);
-	return 0;
-}
-
-#elif defined(CONFIG_STMMAC_TMU_TIMER)
-#include <linux/clk.h>
-#define TMU_CHANNEL "tmu2_clk"
-static struct clk *timer_clock;
-
-static void stmmac_tmu_start(unsigned int new_freq)
-{
-	clk_set_rate(timer_clock, new_freq);
-	clk_enable(timer_clock);
-}
-
-static void stmmac_tmu_stop(void)
-{
-	clk_disable(timer_clock);
-}
-
-int stmmac_open_ext_timer(struct net_device *dev, struct stmmac_timer *tm)
-{
-	timer_clock = clk_get(NULL, TMU_CHANNEL);
-
-	if (timer_clock == NULL)
-		return -1;
-
-	if (tmu2_register_user(stmmac_timer_handler, (void *)dev) < 0) {
-		timer_clock = NULL;
-		return -1;
-	}
-
-	STMMAC_TIMER_MSG("TMU2", tm->freq);
-	tm->timer_start = stmmac_tmu_start;
-	tm->timer_stop = stmmac_tmu_stop;
-
-	return 0;
-}
-
-int stmmac_close_ext_timer(void)
-{
-	clk_disable(timer_clock);
-	tmu2_unregister_user();
-	clk_put(timer_clock);
-	return 0;
-}
-#endif
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_timer.h b/drivers/net/ethernet/stmicro/stmmac/stmmac_timer.h
deleted file mode 100644
index aea9b14..0000000
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_timer.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*******************************************************************************
-  STMMAC external timer Header File.
-
-  Copyright (C) 2007-2009  STMicroelectronics Ltd
-
-  This program is free software; you can redistribute it and/or modify it
-  under the terms and conditions of the GNU General Public License,
-  version 2, as published by the Free Software Foundation.
-
-  This program is distributed in the hope it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-  more details.
-
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, write to the Free Software Foundation, Inc.,
-  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-
-  The full GNU General Public License is included in this distribution in
-  the file called "COPYING".
-
-  Author: Giuseppe Cavallaro <peppe.cavallaro@st.com>
-*******************************************************************************/
-#ifndef __STMMAC_TIMER_H__
-#define __STMMAC_TIMER_H__
-
-struct stmmac_timer {
-	void (*timer_start) (unsigned int new_freq);
-	void (*timer_stop) (void);
-	unsigned int freq;
-	unsigned int enable;
-};
-
-/* Open the HW timer device and return 0 in case of success */
-int stmmac_open_ext_timer(struct net_device *dev, struct stmmac_timer *tm);
-/* Stop the timer and release it */
-int stmmac_close_ext_timer(void);
-/* Function used for scheduling task within the stmmac */
-void stmmac_schedule(struct net_device *dev);
-
-#if defined(CONFIG_STMMAC_TMU_TIMER)
-extern int tmu2_register_user(void *fnt, void *data);
-extern void tmu2_unregister_user(void);
-#endif
-
-#endif /* __STMMAC_TIMER_H__ */
-- 
1.7.4.4

^ permalink raw reply related

* [net-next.git 2/7] stmmac: manage tx clean out of rx_poll
From: Giuseppe CAVALLARO @ 2012-09-03  7:46 UTC (permalink / raw)
  To: netdev; +Cc: Giuseppe Cavallaro
In-Reply-To: <1346658422-1925-1-git-send-email-peppe.cavallaro@st.com>

This patch is to invoke the stmmac_tx (tx handler)
out of the NAPI poll method.
This will make easier the next step to add the new
mitigation schema.
Also the patch enhances the ethtool to report some
stats for normal TX and RX IRQs.

Signed-off-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
---
 drivers/net/ethernet/stmicro/stmmac/common.h       |   13 +++++++----
 drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c    |    7 +++--
 .../net/ethernet/stmicro/stmmac/stmmac_ethtool.c   |    4 ++-
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c  |   22 ++++++++++++++-----
 4 files changed, 31 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index 719be39..bd32fe6 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -95,7 +95,9 @@ struct stmmac_extra_stats {
 	unsigned long threshold;
 	unsigned long tx_pkt_n;
 	unsigned long rx_pkt_n;
-	unsigned long poll_n;
+	unsigned long rx_napi_poll;
+	unsigned long rx_normal_irq_n;
+	unsigned long tx_normal_irq_n;
 	unsigned long sched_timer_n;
 	unsigned long normal_irq_n;
 	unsigned long mmc_tx_irq_n;
@@ -169,10 +171,11 @@ enum rx_frame_status { /* IPC status */
 	llc_snap = 4,
 };
 
-enum tx_dma_irq_status {
-	tx_hard_error = 1,
-	tx_hard_error_bump_tc = 2,
-	handle_tx_rx = 3,
+enum dma_irq_status {
+	tx_hard_error = 0x1,
+	tx_hard_error_bump_tc = 0x2,
+	handle_rx = 0x4,
+	handle_tx = 0x8,
 };
 
 enum core_specific_irq_mask {
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
index 4e0e18a..73766e6 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
@@ -206,9 +206,10 @@ int dwmac_dma_interrupt(void __iomem *ioaddr,
 	/* TX/RX NORMAL interrupts */
 	if (intr_status & DMA_STATUS_NIS) {
 		x->normal_irq_n++;
-		if (likely((intr_status & DMA_STATUS_RI) ||
-			 (intr_status & (DMA_STATUS_TI))))
-				ret = handle_tx_rx;
+		if (likely(intr_status & DMA_STATUS_RI))
+			ret |= handle_rx;
+		if (intr_status & (DMA_STATUS_TI))
+			ret |= handle_tx;
 	}
 	/* Optional hardware blocks, interrupts should be disabled */
 	if (unlikely(intr_status &
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index 76fd61a..505fe71 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -90,7 +90,9 @@ static const struct stmmac_stats stmmac_gstrings_stats[] = {
 	STMMAC_STAT(threshold),
 	STMMAC_STAT(tx_pkt_n),
 	STMMAC_STAT(rx_pkt_n),
-	STMMAC_STAT(poll_n),
+	STMMAC_STAT(rx_napi_poll),
+	STMMAC_STAT(rx_normal_irq_n),
+	STMMAC_STAT(tx_normal_irq_n),
 	STMMAC_STAT(sched_timer_n),
 	STMMAC_STAT(normal_irq_n),
 	STMMAC_STAT(normal_irq_n),
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index c8985f3..b247c39 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -824,16 +824,27 @@ static void stmmac_tx_err(struct stmmac_priv *priv)
 	netif_wake_queue(priv->dev);
 }
 
+static inline void stmmac_rx_schedule(struct stmmac_priv *priv)
+{
+	if (likely(napi_schedule_prep(&priv->napi))) {
+		stmmac_disable_irq(priv);
+		__napi_schedule(&priv->napi);
+	}
+}
 
 static void stmmac_dma_interrupt(struct stmmac_priv *priv)
 {
 	int status;
 
 	status = priv->hw->dma->dma_interrupt(priv->ioaddr, &priv->xstats);
-	if (likely(status == handle_tx_rx))
-		_stmmac_schedule(priv);
-
-	else if (unlikely(status == tx_hard_error_bump_tc)) {
+	if (likely(status == handle_rx)) {
+		priv->xstats.rx_normal_irq_n++;
+		stmmac_rx_schedule(priv);
+	}
+	if (likely(status == handle_tx)) {
+		priv->xstats.tx_normal_irq_n++;
+		stmmac_tx(priv);
+	} else if (unlikely(status == tx_hard_error_bump_tc)) {
 		/* Try to bump up the dma threshold on this failure */
 		if (unlikely(tc != SF_DMA_MODE) && (tc <= 256)) {
 			tc += 64;
@@ -1443,8 +1454,7 @@ static int stmmac_poll(struct napi_struct *napi, int budget)
 	struct stmmac_priv *priv = container_of(napi, struct stmmac_priv, napi);
 	int work_done = 0;
 
-	priv->xstats.poll_n++;
-	stmmac_tx(priv);
+	priv->xstats.rx_napi_poll++;
 	work_done = stmmac_rx(priv, budget);
 
 	if (work_done < budget) {
-- 
1.7.4.4

^ permalink raw reply related

* Re: [PATCH v2] netfilter: take care of timewait sockets
From: Florian Westphal @ 2012-09-03  7:47 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Florian Westphal, Sami Farin, netdev, e1000-devel
In-Reply-To: <1346629684.2563.78.camel@edumazet-glaptop>

Eric Dumazet <eric.dumazet@gmail.com> wrote:
> Sami Farin reported crashes in xt_LOG because it assumes skb->sk is a
> full blown socket.
> 
> But with TCP early demux, we can have skb->sk pointing to a timewait
> socket.
> 
> Same fix is needed in netfnetlink_log

Looks good, but IMHO it is very un-intuitive that
skb->sk might be a pointer to an object that is not struct sock (or
a compatible object).

^ permalink raw reply

* [net-next.git 4/7] stmmac: add Rx watchdog optimization to mitigate the DMA irqs
From: Giuseppe CAVALLARO @ 2012-09-03  7:46 UTC (permalink / raw)
  To: netdev; +Cc: Giuseppe Cavallaro
In-Reply-To: <1346658422-1925-1-git-send-email-peppe.cavallaro@st.com>

New GMAC devices (3.50 and newer) have an embedded timer
that can be used for mitigating the number of interrupts.
So this patch adds this optimizations.
Old MAC will continue to use NAPI.
In this implementation the rx timer stored in the Reg9 is fixed
to the max value.

Signed-off-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
---
 drivers/net/ethernet/stmicro/stmmac/common.h       |    7 ++
 drivers/net/ethernet/stmicro/stmmac/dwmac1000.h    |    3 -
 .../net/ethernet/stmicro/stmmac/dwmac1000_dma.c    |    6 ++
 drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h    |    3 +-
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c  |   67 +++++++++++++------
 5 files changed, 61 insertions(+), 25 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index 1d6bd3e..63d4bad 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -48,6 +48,10 @@
 #define CHIP_DBG(fmt, args...)  do { } while (0)
 #endif
 
+/* Synopsys Core versions */
+#define	DWMAC_CORE_3_40	0x34
+#define	DWMAC_CORE_3_50	0x35
+
 #undef FRAME_FILTER_DEBUG
 /* #define FRAME_FILTER_DEBUG */
 
@@ -165,6 +169,7 @@ struct stmmac_extra_stats {
 #define DMA_HW_FEAT_SAVLANINS	0x08000000 /* Source Addr or VLAN Insertion */
 #define DMA_HW_FEAT_ACTPHYIF	0x70000000 /* Active/selected PHY interface */
 #define DEFAULT_DMA_PBL		8
+#define DEFAULT_DMA_RIWT	0xff	/* Max RI Watchdog Timer count */
 
 enum rx_frame_status { /* IPC status */
 	good_frame = 0,
@@ -301,6 +306,8 @@ struct stmmac_dma_ops {
 			      struct stmmac_extra_stats *x);
 	/* If supported then get the optional core features */
 	unsigned int (*get_hw_feature) (void __iomem *ioaddr);
+	/* Manage HW RX Watchdog*/
+	void (*rx_watchdog) (void __iomem *ioaddr, u8 timer);
 };
 
 struct stmmac_ops {
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
index 0e4cace..7ad56af 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
@@ -230,8 +230,5 @@ enum rtc_control {
 #define GMAC_MMC_TX_INTR   0x108
 #define GMAC_MMC_RX_CSUM_OFFLOAD   0x208
 
-/* Synopsys Core versions */
-#define	DWMAC_CORE_3_40	0x34
-
 extern const struct stmmac_dma_ops dwmac1000_dma_ops;
 #endif /* __DWMAC1000_H__ */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c
index 0335000..e2c9431 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c
@@ -174,6 +174,11 @@ static unsigned int dwmac1000_get_hw_feature(void __iomem *ioaddr)
 	return readl(ioaddr + DMA_HW_FEATURE);
 }
 
+static void dwmac1000_rx_watchdog(void __iomem *ioaddr, u8 timer)
+{
+	writel(timer, ioaddr + DMA_RX_WATCHDOG);
+}
+
 const struct stmmac_dma_ops dwmac1000_dma_ops = {
 	.init = dwmac1000_dma_init,
 	.dump_regs = dwmac1000_dump_dma_regs,
@@ -187,4 +192,5 @@ const struct stmmac_dma_ops dwmac1000_dma_ops = {
 	.stop_rx = dwmac_dma_stop_rx,
 	.dma_interrupt = dwmac_dma_interrupt,
 	.get_hw_feature = dwmac1000_get_hw_feature,
+	.rx_watchdog = dwmac1000_rx_watchdog,
 };
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h b/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h
index e49c9a0..4eeff5d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h
@@ -35,7 +35,8 @@
 #define DMA_CONTROL		0x00001018	/* Ctrl (Operational Mode) */
 #define DMA_INTR_ENA		0x0000101c	/* Interrupt Enable */
 #define DMA_MISSED_FRAME_CTR	0x00001020	/* Missed Frame Counter */
-#define DMA_AXI_BUS_MODE       0x00001028      /* AXI Bus Mode */
+#define DMA_RX_WATCHDOG		0x00001024	/* Receive Int Watchdog Timer */
+#define DMA_AXI_BUS_MODE	0x00001028      /* AXI Bus Mode */
 #define DMA_CUR_TX_BUF_ADDR	0x00001050	/* Current Host Tx Buffer */
 #define DMA_CUR_RX_BUF_ADDR	0x00001054	/* Current Host Rx Buffer */
 #define DMA_HW_FEATURE		0x00001058	/* HW Feature Register */
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index d7f5482..bafe694 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -133,6 +133,7 @@ MODULE_PARM_DESC(eee_timer, "LPI tx expiration time in msec");
 #define STMMAC_LPI_TIMER(x) (jiffies + msecs_to_jiffies(x))
 
 static irqreturn_t stmmac_interrupt(int irq, void *dev_id);
+static int stmmac_rx(struct stmmac_priv *priv, int limit);
 
 #ifdef CONFIG_STMMAC_DEBUG_FS
 static int stmmac_init_fs(struct net_device *dev);
@@ -481,7 +482,6 @@ static void display_ring(struct dma_desc *p, int size)
 		       i, (unsigned int)virt_to_phys(&p[i]),
 		       (unsigned int)(x->a), (unsigned int)((x->a) >> 32),
 		       x->b, x->c);
-		pr_info("\n");
 	}
 }
 
@@ -516,7 +516,7 @@ static void init_dma_desc_rings(struct net_device *dev)
 	unsigned int txsize = priv->dma_tx_size;
 	unsigned int rxsize = priv->dma_rx_size;
 	unsigned int bfsize;
-	int dis_ic = 0;
+	int dis_ic = 1;
 	int des3_as_data_buf = 0;
 
 	/* Set the max buffer size according to the DESC mode
@@ -603,6 +603,8 @@ static void init_dma_desc_rings(struct net_device *dev)
 	priv->dirty_tx = 0;
 	priv->cur_tx = 0;
 
+	if (priv->synopsys_id < DWMAC_CORE_3_50)
+		dis_ic = 0;
 	/* Clear the Rx/Tx descriptors */
 	priv->hw->desc->init_rx_desc(priv->dma_rx, rxsize, dis_ic);
 	priv->hw->desc->init_tx_desc(priv->dma_tx, txsize);
@@ -746,7 +748,7 @@ static void stmmac_tx(struct stmmac_priv *priv)
 				skb_recycle_check(skb, priv->dma_buf_sz))
 				__skb_queue_head(&priv->rx_recycle, skb);
 			else
-				dev_kfree_skb(skb);
+				dev_kfree_skb_any(skb);
 
 			priv->tx_skbuff[entry] = NULL;
 		}
@@ -816,12 +818,15 @@ static void stmmac_tx_err(struct stmmac_priv *priv)
 	netif_wake_queue(priv->dev);
 }
 
-static void stmmac_rx_schedule(struct stmmac_priv *priv)
+static void stmmac_rx_work(struct stmmac_priv *priv)
 {
-	if (likely(napi_schedule_prep(&priv->napi))) {
-		stmmac_disable_irq(priv);
-		__napi_schedule(&priv->napi);
-	}
+	if (priv->synopsys_id < DWMAC_CORE_3_50) {
+		if (likely(napi_schedule_prep(&priv->napi))) {
+			stmmac_disable_irq(priv);
+			__napi_schedule(&priv->napi);
+		}
+	} else
+		stmmac_rx(priv, priv->dma_rx_size);
 }
 
 static void stmmac_dma_interrupt(struct stmmac_priv *priv)
@@ -831,7 +836,7 @@ static void stmmac_dma_interrupt(struct stmmac_priv *priv)
 	status = priv->hw->dma->dma_interrupt(priv->ioaddr, &priv->xstats);
 	if (likely(status == handle_rx)) {
 		priv->xstats.rx_normal_irq_n++;
-		stmmac_rx_schedule(priv);
+		stmmac_rx_work(priv);
 	}
 	if (likely(status == handle_tx)) {
 		priv->xstats.tx_normal_irq_n++;
@@ -1139,7 +1144,17 @@ static int stmmac_open(struct net_device *dev)
 	if (!ret)
 		add_timer(&priv->txtimer);
 
-	napi_enable(&priv->napi);
+	/* Enable NAPI on chip older than the 3.50 where the Rx watchdog
+	 * is not supported.
+	 */
+	if (priv->synopsys_id < DWMAC_CORE_3_50)
+		napi_enable(&priv->napi);
+	else if (priv->hw->dma->rx_watchdog)
+		/* Program RX Watchdog register to the default values
+		 * FIXME: provide user value for RIWT
+		 */
+		priv->hw->dma->rx_watchdog(priv->ioaddr, DEFAULT_DMA_RIWT);
+
 	skb_queue_head_init(&priv->rx_recycle);
 	netif_start_queue(dev);
 
@@ -1183,7 +1198,8 @@ static int stmmac_release(struct net_device *dev)
 
 	netif_stop_queue(dev);
 
-	napi_disable(&priv->napi);
+	if (priv->synopsys_id < DWMAC_CORE_3_50)
+		napi_disable(&priv->napi);
 	skb_queue_purge(&priv->rx_recycle);
 
 	/* Free the IRQ lines */
@@ -1448,14 +1464,15 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit)
 #endif
 			skb->protocol = eth_type_trans(skb, priv->dev);
 
-			if (unlikely(!priv->plat->rx_coe)) {
-				/* No RX COE for old mac10/100 devices */
+			if (unlikely(!priv->plat->rx_coe))
 				skb_checksum_none_assert(skb);
-				netif_receive_skb(skb);
-			} else {
+			else
 				skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+			if (priv->synopsys_id < DWMAC_CORE_3_50)
 				napi_gro_receive(&priv->napi, skb);
-			}
+			else
+				netif_rx(skb);
 
 			priv->dev->stats.rx_packets++;
 			priv->dev->stats.rx_bytes += frame_len;
@@ -2025,7 +2042,10 @@ struct stmmac_priv *stmmac_dvr_probe(struct device *device,
 	if (flow_ctrl)
 		priv->flow_ctrl = FLOW_AUTO;	/* RX/TX pause on */
 
-	netif_napi_add(ndev, &priv->napi, stmmac_poll, 64);
+	if (priv->synopsys_id < DWMAC_CORE_3_50)
+		netif_napi_add(ndev, &priv->napi, stmmac_poll, 64);
+	else
+		pr_info(" Enable Mitigation via HW RX_Watchdog Timer\n");
 
 	spin_lock_init(&priv->lock);
 	spin_lock_init(&priv->tx_lock);
@@ -2068,7 +2088,8 @@ error_mdio_register:
 error_clk_get:
 	unregister_netdev(ndev);
 error_netdev_register:
-	netif_napi_del(&priv->napi);
+	if (priv->synopsys_id < DWMAC_CORE_3_50)
+		netif_napi_del(&priv->napi);
 	free_netdev(ndev);
 
 	return NULL;
@@ -2102,7 +2123,7 @@ int stmmac_dvr_remove(struct net_device *ndev)
 int stmmac_suspend(struct net_device *ndev)
 {
 	struct stmmac_priv *priv = netdev_priv(ndev);
-	int dis_ic = 0;
+	int dis_ic = 1;
 	unsigned long flags;
 
 	if (!ndev || !netif_running(ndev))
@@ -2116,11 +2137,14 @@ int stmmac_suspend(struct net_device *ndev)
 	netif_device_detach(ndev);
 	netif_stop_queue(ndev);
 
-	napi_disable(&priv->napi);
+	if (priv->synopsys_id < DWMAC_CORE_3_50)
+		napi_disable(&priv->napi);
 
 	/* Stop TX/RX DMA */
 	priv->hw->dma->stop_tx(priv->ioaddr);
 	priv->hw->dma->stop_rx(priv->ioaddr);
+	if (priv->synopsys_id < DWMAC_CORE_3_50)
+		dis_ic = 0;
 	/* Clear the Rx/Tx descriptors */
 	priv->hw->desc->init_rx_desc(priv->dma_rx, priv->dma_rx_size,
 				     dis_ic);
@@ -2166,7 +2190,8 @@ int stmmac_resume(struct net_device *ndev)
 	priv->hw->dma->start_tx(priv->ioaddr);
 	priv->hw->dma->start_rx(priv->ioaddr);
 
-	napi_enable(&priv->napi);
+	if (priv->synopsys_id < DWMAC_CORE_3_50)
+		napi_enable(&priv->napi);
 
 	netif_start_queue(ndev);
 
-- 
1.7.4.4

^ permalink raw reply related

* [net-next.git 5/7] stmmac: add sysFs support
From: Giuseppe CAVALLARO @ 2012-09-03  7:47 UTC (permalink / raw)
  To: netdev; +Cc: Giuseppe Cavallaro
In-Reply-To: <1346658422-1925-1-git-send-email-peppe.cavallaro@st.com>

This patch adds the sysFs support.
Some internal driver parameters can be tuned by using some
entries exposed via sysFS. There parameter currently are,
for example, for internal timers used to mitigate the rx/tx
interrupts or for EEE.

Signed-off-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
---
 drivers/net/ethernet/stmicro/stmmac/Makefile       |    2 +-
 drivers/net/ethernet/stmicro/stmmac/common.h       |    8 +-
 drivers/net/ethernet/stmicro/stmmac/stmmac-sysfs.c |  157 ++++++++++++++++++++
 drivers/net/ethernet/stmicro/stmmac/stmmac.h       |    3 +
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c  |   36 ++---
 5 files changed, 185 insertions(+), 21 deletions(-)
 create mode 100644 drivers/net/ethernet/stmicro/stmmac/stmmac-sysfs.c

diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile
index c8e8ea6..4450fc6 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Makefile
+++ b/drivers/net/ethernet/stmicro/stmmac/Makefile
@@ -6,4 +6,4 @@ stmmac-$(CONFIG_STMMAC_PCI) += stmmac_pci.o
 stmmac-objs:= stmmac_main.o stmmac_ethtool.o stmmac_mdio.o	\
 	      dwmac_lib.o dwmac1000_core.o  dwmac1000_dma.o	\
 	      dwmac100_core.o dwmac100_dma.o enh_desc.o  norm_desc.o \
-	      mmc_core.o $(stmmac-y)
+	      mmc_core.o stmmac-sysfs.o $(stmmac-y)
diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index 63d4bad..b0b08bc 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -169,7 +169,13 @@ struct stmmac_extra_stats {
 #define DMA_HW_FEAT_SAVLANINS	0x08000000 /* Source Addr or VLAN Insertion */
 #define DMA_HW_FEAT_ACTPHYIF	0x70000000 /* Active/selected PHY interface */
 #define DEFAULT_DMA_PBL		8
-#define DEFAULT_DMA_RIWT	0xff	/* Max RI Watchdog Timer count */
+#define MAX_DMA_RIWT		0xff	/* Max RI Watchdog Timer count */
+
+#define	STMMAC_COAL_TX_TIMER		40000
+#define	STMMAC_MAX_COAL_TX_TIMER	100000
+#define STMMAC_TX_MAX_FRAMES		64
+#define STMMAC_DEFAULT_LPI_TIMER	1000
+#define STMMAC_MAX_LPI_TIMER		5000
 
 enum rx_frame_status { /* IPC status */
 	good_frame = 0,
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac-sysfs.c b/drivers/net/ethernet/stmicro/stmmac/stmmac-sysfs.c
new file mode 100644
index 0000000..92537a0
--- /dev/null
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac-sysfs.c
@@ -0,0 +1,157 @@
+/*******************************************************************************
+  STMMAC sysfs module
+
+	Copyright(C) 2012 STMicroelectronics Ltd
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Author: Giuseppe Cavallaro <peppe.cavallaro@st.com>
+*******************************************************************************/
+
+#include <linux/sysfs.h>
+
+#include "stmmac.h"
+
+/* EEE Timer attribute */
+static ssize_t eee_timer_show(struct device *dev, struct device_attribute *attr,
+			      char *buf)
+{
+	struct stmmac_priv *priv = netdev_priv(to_net_dev(dev));
+
+	return snprintf(buf, PAGE_SIZE, "%u", (u32) priv->eee_timer);
+}
+
+static ssize_t eee_timer_store(struct device *dev,
+			       struct device_attribute *attr, const char *buf,
+			       size_t count)
+{
+	struct stmmac_priv *priv = netdev_priv(to_net_dev(dev));
+
+	int eee_timer;
+
+	sscanf(buf, "%u", &eee_timer);
+
+	if ((eee_timer <= 0) || (eee_timer > STMMAC_MAX_LPI_TIMER))
+		pr_err("stmmac: invalid EEE timer value\n");
+	else
+		priv->eee_timer = eee_timer;
+
+	return count;
+}
+
+/* TX coalesce parameters */
+static ssize_t tx_coal_timer_show(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	struct stmmac_priv *priv = netdev_priv(to_net_dev(dev));
+
+	return snprintf(buf, PAGE_SIZE, "%u", (u32) priv->tx_coal_timer);
+}
+
+static ssize_t tx_coal_timer_store(struct device *dev,
+				   struct device_attribute *attr,
+				   const char *buf, size_t count)
+{
+	struct stmmac_priv *priv = netdev_priv(to_net_dev(dev));
+
+	int tx_coal_timer;
+
+	sscanf(buf, "%u", &tx_coal_timer);
+
+	if ((tx_coal_timer <= 0) || (tx_coal_timer > STMMAC_MAX_COAL_TX_TIMER))
+		pr_err("stmmac: Tx coalesce timer value\n");
+	else
+		priv->tx_coal_timer = tx_coal_timer;
+
+	return count;
+}
+
+static ssize_t tx_coal_frames_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct stmmac_priv *priv = netdev_priv(to_net_dev(dev));
+
+	return snprintf(buf, PAGE_SIZE, "%u", (u32) priv->tx_coal_frames);
+}
+
+static ssize_t tx_coal_frames_store(struct device *dev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t count)
+{
+	struct stmmac_priv *priv = netdev_priv(to_net_dev(dev));
+
+	int tx_coal_frames;
+
+	sscanf(buf, "%u", &tx_coal_frames);
+
+	if ((tx_coal_frames <= 0) || (tx_coal_frames >= STMMAC_TX_MAX_FRAMES))
+		pr_err("stmmac: invalid Tx coalesce value\n");
+	else
+		priv->tx_coal_frames = tx_coal_frames;
+
+	return count;
+}
+
+/* RX coalesce parameters */
+static ssize_t rx_riwt_show(struct device *dev, struct device_attribute *attr,
+			    char *buf)
+{
+	struct stmmac_priv *priv = netdev_priv(to_net_dev(dev));
+
+	return snprintf(buf, PAGE_SIZE, "%u", (u16) priv->rx_riwt);
+}
+
+static ssize_t rx_riwt_store(struct device *dev, struct device_attribute *attr,
+			     const char *buf, size_t count)
+{
+	struct stmmac_priv *priv = netdev_priv(to_net_dev(dev));
+
+	int rx_riwt;
+
+	sscanf(buf, "%u", &rx_riwt);
+
+	if ((rx_riwt <= 0) || (rx_riwt >= MAX_DMA_RIWT)) {
+		pr_err("stmmac: invalid RX WDT timer value\n");
+	} else {
+		priv->rx_riwt = rx_riwt;
+		priv->hw->dma->rx_watchdog(priv->ioaddr, priv->rx_riwt);
+	}
+
+	return count;
+}
+
+DEVICE_ATTR(eee_timer, 0644, eee_timer_show, eee_timer_store);
+DEVICE_ATTR(tx_coal_timer, 0644, tx_coal_timer_show,
+	    tx_coal_timer_store);
+DEVICE_ATTR(tx_coal_frames, 0644, tx_coal_frames_show,
+	    tx_coal_frames_store);
+DEVICE_ATTR(rx_riwt, 0644, rx_riwt_show, rx_riwt_store);
+
+void stmmac_create_sysfs(struct net_device *dev)
+{
+	int rc;
+	struct stmmac_priv *priv = netdev_priv(dev);
+
+	rc = device_create_file(&dev->dev, &dev_attr_tx_coal_timer);
+	rc |= device_create_file(&dev->dev, &dev_attr_tx_coal_frames);
+	if (priv->synopsys_id >= DWMAC_CORE_3_50)
+		rc |= device_create_file(&dev->dev, &dev_attr_rx_riwt);
+	if (priv->eee_enabled)
+		rc |= device_create_file(&dev->dev, &dev_attr_eee_timer);
+	if (rc)
+		pr_err("%s: failed to create the sysfs entries\n", __func__);
+}
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index 0f5ab28..05f17184 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -87,11 +87,13 @@ struct stmmac_priv {
 	int lpi_irq;
 	int eee_enabled;
 	int eee_active;
+	int eee_timer;
 	int tx_lpi_timer;
 	struct timer_list txtimer;
 	u32 tx_count_frames;
 	u32 tx_coal_frames;
 	u32 tx_coal_timer;
+	u16 rx_riwt;
 };
 
 extern int phyaddr;
@@ -111,6 +113,7 @@ struct stmmac_priv *stmmac_dvr_probe(struct device *device,
 				     void __iomem *addr);
 void stmmac_disable_eee_mode(struct stmmac_priv *priv);
 bool stmmac_eee_init(struct stmmac_priv *priv);
+void stmmac_create_sysfs(struct net_device *dev);
 
 #ifdef CONFIG_STMMAC_PLATFORM
 extern struct platform_driver stmmac_pltfr_driver;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index bafe694..1895130 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -77,8 +77,6 @@
 
 #define STMMAC_ALIGN(x)	L1_CACHE_ALIGN(x)
 #define JUMBO_LEN	9000
-#define	STMMAC_TX_TM	40000
-#define STMMAC_TX_MAX_FRAMES	64	/* Max coalesced frame */
 
 /* Module parameters */
 #define TX_TIMEO 5000 /* default 5 seconds */
@@ -126,11 +124,8 @@ static const u32 default_msg_level = (NETIF_MSG_DRV | NETIF_MSG_PROBE |
 				      NETIF_MSG_LINK | NETIF_MSG_IFUP |
 				      NETIF_MSG_IFDOWN | NETIF_MSG_TIMER);
 
-#define STMMAC_DEFAULT_LPI_TIMER	1000
-static int eee_timer = STMMAC_DEFAULT_LPI_TIMER;
-module_param(eee_timer, int, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(eee_timer, "LPI tx expiration time in msec");
 #define STMMAC_LPI_TIMER(x) (jiffies + msecs_to_jiffies(x))
+#define STMMAC_COAL_TIMER(x) (jiffies + usecs_to_jiffies(x))
 
 static irqreturn_t stmmac_interrupt(int irq, void *dev_id);
 static int stmmac_rx(struct stmmac_priv *priv, int limit);
@@ -161,8 +156,6 @@ static void stmmac_verify_args(void)
 		flow_ctrl = FLOW_OFF;
 	if (unlikely((pause < 0) || (pause > 0xffff)))
 		pause = PAUSE_TIME;
-	if (eee_timer < 0)
-		eee_timer = STMMAC_DEFAULT_LPI_TIMER;
 }
 
 static void stmmac_clk_csr_set(struct stmmac_priv *priv)
@@ -254,7 +247,7 @@ static void stmmac_eee_ctrl_timer(unsigned long arg)
 	struct stmmac_priv *priv = (struct stmmac_priv *)arg;
 
 	stmmac_enable_eee_mode(priv);
-	mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_TIMER(eee_timer));
+	mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_TIMER(priv->eee_timer));
 }
 
 /**
@@ -280,7 +273,8 @@ bool stmmac_eee_init(struct stmmac_priv *priv)
 		init_timer(&priv->eee_ctrl_timer);
 		priv->eee_ctrl_timer.function = stmmac_eee_ctrl_timer;
 		priv->eee_ctrl_timer.data = (unsigned long)priv;
-		priv->eee_ctrl_timer.expires = STMMAC_LPI_TIMER(eee_timer);
+		priv->eee_ctrl_timer.expires =
+			STMMAC_LPI_TIMER(priv->eee_timer);
 		add_timer(&priv->eee_ctrl_timer);
 
 		priv->hw->mac->set_eee_timer(priv->ioaddr,
@@ -770,7 +764,8 @@ static void stmmac_tx(struct stmmac_priv *priv)
 
 	if ((priv->eee_enabled) && (!priv->tx_path_in_lpi_mode)) {
 		stmmac_enable_eee_mode(priv);
-		mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_TIMER(eee_timer));
+		mod_timer(&priv->eee_ctrl_timer,
+			  STMMAC_LPI_TIMER(priv->eee_timer));
 	}
 	spin_unlock_irqrestore(&priv->tx_lock, flags);
 }
@@ -1016,9 +1011,9 @@ static int stmmac_init_tx_coalesce(struct stmmac_priv *priv)
 						 STMMAC_TX_MAX_FRAMES);
 	if (priv->tx_coal_frames) {
 		/* Set Tx coalesce parameters and timers */
-		priv->tx_coal_timer = jiffies + usecs_to_jiffies(STMMAC_TX_TM);
+		priv->tx_coal_timer = STMMAC_COAL_TX_TIMER;
 		init_timer(&priv->txtimer);
-		priv->txtimer.expires = priv->tx_coal_timer;
+		priv->txtimer.expires = STMMAC_COAL_TIMER(priv->tx_coal_timer);
 		priv->txtimer.data = (unsigned long)priv;
 		priv->txtimer.function = stmmac_txtimer;
 
@@ -1138,6 +1133,7 @@ static int stmmac_open(struct net_device *dev)
 		phy_start(priv->phydev);
 
 	priv->tx_lpi_timer = STMMAC_DEFAULT_TWT_LS_TIMER;
+	priv->eee_timer = STMMAC_DEFAULT_LPI_TIMER;
 	priv->eee_enabled = stmmac_eee_init(priv);
 
 	ret = stmmac_init_tx_coalesce(priv);
@@ -1149,11 +1145,13 @@ static int stmmac_open(struct net_device *dev)
 	 */
 	if (priv->synopsys_id < DWMAC_CORE_3_50)
 		napi_enable(&priv->napi);
-	else if (priv->hw->dma->rx_watchdog)
+	else if (priv->hw->dma->rx_watchdog) {
+		priv->rx_riwt = MAX_DMA_RIWT;
 		/* Program RX Watchdog register to the default values
 		 * FIXME: provide user value for RIWT
 		 */
-		priv->hw->dma->rx_watchdog(priv->ioaddr, DEFAULT_DMA_RIWT);
+		priv->hw->dma->rx_watchdog(priv->ioaddr, priv->rx_riwt);
+	}
 
 	skb_queue_head_init(&priv->rx_recycle);
 	netif_start_queue(dev);
@@ -1317,7 +1315,8 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (priv->tx_coal_frames > priv->tx_count_frames) {
 		priv->hw->desc->clear_tx_ic(desc);
 		priv->xstats.tx_reset_ic_bit++;
-		mod_timer(&priv->txtimer, priv->tx_coal_timer);
+		mod_timer(&priv->txtimer,
+			  STMMAC_COAL_TIMER(priv->tx_coal_timer));
 	} else
 		priv->tx_count_frames = 0;
 
@@ -2081,6 +2080,8 @@ struct stmmac_priv *stmmac_dvr_probe(struct device *device,
 		goto error_mdio_register;
 	}
 
+	stmmac_create_sysfs(ndev);
+
 	return priv;
 
 error_mdio_register:
@@ -2283,9 +2284,6 @@ static int __init stmmac_cmdline_opt(char *str)
 		} else if (!strncmp(opt, "pause:", 6)) {
 			if (kstrtoint(opt + 6, 0, &pause))
 				goto err;
-		} else if (!strncmp(opt, "eee_timer:", 6)) {
-			if (kstrtoint(opt + 10, 0, &eee_timer))
-				goto err;
 		}
 	}
 	return 0;
-- 
1.7.4.4

^ permalink raw reply related

* [net-next.git 6/7] stmmac: add mitigation and sysfs info in the doc
From: Giuseppe CAVALLARO @ 2012-09-03  7:47 UTC (permalink / raw)
  To: netdev; +Cc: Giuseppe Cavallaro
In-Reply-To: <1346658422-1925-1-git-send-email-peppe.cavallaro@st.com>

This patch updates the stmmac.txt addinf some information
about the new rx/tx mitigation schema and the sysFs support.

Signed-off-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
---
 Documentation/networking/stmmac.txt |   34 +++++++++++++++++++++-------------
 1 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/Documentation/networking/stmmac.txt b/Documentation/networking/stmmac.txt
index ef9ee71..67eaa35 100644
--- a/Documentation/networking/stmmac.txt
+++ b/Documentation/networking/stmmac.txt
@@ -29,11 +29,9 @@ The kernel configuration option is STMMAC_ETH:
 	dma_txsize: DMA tx ring size;
 	buf_sz: DMA buffer size;
 	tc: control the HW FIFO threshold;
-	tx_coe: Enable/Disable Tx Checksum Offload engine;
 	watchdog: transmit timeout (in milliseconds);
 	flow_ctrl: Flow control ability [on/off];
 	pause: Flow Control Pause Time;
-	tmrate: timer period (only if timer optimisation is configured).
 
 3) Command line options
 Driver parameters can be also passed in command line by using:
@@ -60,17 +58,21 @@ Then the poll method will be scheduled at some future point.
 The incoming packets are stored, by the DMA, in a list of pre-allocated socket
 buffers in order to avoid the memcpy (Zero-copy).
 
-4.3) Timer-Driver Interrupt
-Instead of having the device that asynchronously notifies the frame receptions,
-the driver configures a timer to generate an interrupt at regular intervals.
-Based on the granularity of the timer, the frames that are received by the
-device will experience different levels of latency. Some NICs have dedicated
-timer device to perform this task. STMMAC can use either the RTC device or the
-TMU channel 2  on STLinux platforms.
-The timers frequency can be passed to the driver as parameter; when change it,
-take care of both hardware capability and network stability/performance impact.
-Several performance tests on STM platforms showed this optimisation allows to
-spare the CPU while having the maximum throughput.
+4.3) Interrupt Mitigation
+The driver is able to mitigate the number of its DMA interrupts
+using NAPI for the reception on chips older than the 3.50.
+New chips have an HW RX-Watchdog used for this mitigation.
+
+User can tune (also via sysfs) a parameter that is the RI Watchdog
+Timer count. It indicates the number of system clock cycles.
+
+On Tx-side, the mitigation schema is based on a SW timer that calls the
+tx function (stmmac_tx) to reclaim the resource after transmitting the
+frames.
+Also there is another parameter (like a threshold) used to program
+the descriptors avoiding to set the interrupt on completion bit in
+when the frame is sent (xmit).
+These parameters can be tuned by sysfs entries.
 
 4.4) WOL
 Wake up on Lan feature through Magic and Unicast frames are supported for the
@@ -324,6 +326,12 @@ To enter in Tx LPI mode the driver needs to have a software timer
 that enable and disable the LPI mode when there is nothing to be
 transmitted.
 
+7) sys FS interface
+Some internal driver parameters can be tuned by using some
+entries exposed via sysFS. There parameter currently are,
+for example, for internal timers used to mitigate the rx/tx
+interrupts or for EEE.
+
 7) TODO:
  o XGMAC is not supported.
  o Add the PTP - precision time protocol
-- 
1.7.4.4

^ permalink raw reply related

* [net-next.git 7/7] stmmac: update the driver version to August_2012
From: Giuseppe CAVALLARO @ 2012-09-03  7:47 UTC (permalink / raw)
  To: netdev; +Cc: Giuseppe Cavallaro
In-Reply-To: <1346658422-1925-1-git-send-email-peppe.cavallaro@st.com>

Many new feautures have been introduced in the driver:
sysFS, Rx HW watchdog... so this patch updates the
driver's version.

Signed-off-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac.h |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index 05f17184..ad95f26 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -24,7 +24,7 @@
 #define __STMMAC_H__
 
 #define STMMAC_RESOURCE_NAME   "stmmaceth"
-#define DRV_MODULE_VERSION	"March_2012"
+#define DRV_MODULE_VERSION	"August_2012"
 
 #include <linux/clk.h>
 #include <linux/stmmac.h>
-- 
1.7.4.4

^ permalink raw reply related

* [net-next.git 3/7] stmmac: add the initial tx coalesce schema
From: Giuseppe CAVALLARO @ 2012-09-03  7:46 UTC (permalink / raw)
  To: netdev; +Cc: Giuseppe Cavallaro
In-Reply-To: <1346658422-1925-1-git-send-email-peppe.cavallaro@st.com>

This patch adds a new schema used for mitigating the
number of transmit interrupts.
It is based on a sw timer and a threshold value.
The timer is used to periodically call the stmmac_tx
function that can be invoked by the ISR but only for
the descriptors where the interrupt on completion
field has been set. This is tuned by a threshold.

Next step is to add the ability to tune these coalesce
values by ethtool.

Till now I have put a default that showed a real gain
on all the platforms ARM/SH4 where I performed benchmarks.

Signed-off-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
---
 drivers/net/ethernet/stmicro/stmmac/common.h       |    8 +-
 drivers/net/ethernet/stmicro/stmmac/stmmac.h       |    4 +
 .../net/ethernet/stmicro/stmmac/stmmac_ethtool.c   |    9 +-
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c  |   86 +++++++++++++-------
 4 files changed, 72 insertions(+), 35 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index bd32fe6..1d6bd3e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -95,11 +95,13 @@ struct stmmac_extra_stats {
 	unsigned long threshold;
 	unsigned long tx_pkt_n;
 	unsigned long rx_pkt_n;
-	unsigned long rx_napi_poll;
+	unsigned long normal_irq_n;
 	unsigned long rx_normal_irq_n;
+	unsigned long rx_napi_poll;
 	unsigned long tx_normal_irq_n;
-	unsigned long sched_timer_n;
-	unsigned long normal_irq_n;
+	unsigned long txtimer;
+	unsigned long tx_clean;
+	unsigned long tx_reset_ic_bit;
 	unsigned long mmc_tx_irq_n;
 	unsigned long mmc_rx_irq_n;
 	unsigned long mmc_rx_csum_offload_irq_n;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index 9f35769..0f5ab28 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -88,6 +88,10 @@ struct stmmac_priv {
 	int eee_enabled;
 	int eee_active;
 	int tx_lpi_timer;
+	struct timer_list txtimer;
+	u32 tx_count_frames;
+	u32 tx_coal_frames;
+	u32 tx_coal_timer;
 };
 
 extern int phyaddr;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index 505fe71..48ad0bc 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -90,12 +90,13 @@ static const struct stmmac_stats stmmac_gstrings_stats[] = {
 	STMMAC_STAT(threshold),
 	STMMAC_STAT(tx_pkt_n),
 	STMMAC_STAT(rx_pkt_n),
-	STMMAC_STAT(rx_napi_poll),
+	STMMAC_STAT(normal_irq_n),
 	STMMAC_STAT(rx_normal_irq_n),
+	STMMAC_STAT(rx_napi_poll),
 	STMMAC_STAT(tx_normal_irq_n),
-	STMMAC_STAT(sched_timer_n),
-	STMMAC_STAT(normal_irq_n),
-	STMMAC_STAT(normal_irq_n),
+	STMMAC_STAT(txtimer),
+	STMMAC_STAT(tx_clean),
+	STMMAC_STAT(tx_reset_ic_bit),
 	STMMAC_STAT(mmc_tx_irq_n),
 	STMMAC_STAT(mmc_rx_irq_n),
 	STMMAC_STAT(mmc_rx_csum_offload_irq_n),
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index b247c39..d7f5482 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -77,6 +77,8 @@
 
 #define STMMAC_ALIGN(x)	L1_CACHE_ALIGN(x)
 #define JUMBO_LEN	9000
+#define	STMMAC_TX_TM	40000
+#define STMMAC_TX_MAX_FRAMES	64	/* Max coalesced frame */
 
 /* Module parameters */
 #define TX_TIMEO 5000 /* default 5 seconds */
@@ -695,8 +697,11 @@ static void stmmac_dma_operation_mode(struct stmmac_priv *priv)
 static void stmmac_tx(struct stmmac_priv *priv)
 {
 	unsigned int txsize = priv->dma_tx_size;
+	unsigned long flags;
+
+	spin_lock_irqsave(&priv->tx_lock, flags);
 
-	spin_lock(&priv->tx_lock);
+	priv->xstats.tx_clean++;
 
 	while (priv->dirty_tx != priv->cur_tx) {
 		int last;
@@ -765,7 +770,7 @@ static void stmmac_tx(struct stmmac_priv *priv)
 		stmmac_enable_eee_mode(priv);
 		mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_TIMER(eee_timer));
 	}
-	spin_unlock(&priv->tx_lock);
+	spin_unlock_irqrestore(&priv->tx_lock, flags);
 }
 
 static inline void stmmac_enable_irq(struct stmmac_priv *priv)
@@ -778,29 +783,16 @@ static inline void stmmac_disable_irq(struct stmmac_priv *priv)
 	priv->hw->dma->disable_dma_irq(priv->ioaddr);
 }
 
-static int stmmac_has_work(struct stmmac_priv *priv)
+static void stmmac_txtimer(unsigned long data)
 {
-	unsigned int has_work = 0;
-	int rxret, tx_work = 0;
+	struct stmmac_priv *priv = (struct stmmac_priv *)data;
 
-	rxret = priv->hw->desc->get_rx_owner(priv->dma_rx +
-		(priv->cur_rx % priv->dma_rx_size));
+	priv->xstats.txtimer++;
 
 	if (priv->dirty_tx != priv->cur_tx)
-		tx_work = 1;
-
-	if (likely(!rxret || tx_work))
-		has_work = 1;
+		stmmac_tx(priv);
 
-	return has_work;
-}
-
-static inline void _stmmac_schedule(struct stmmac_priv *priv)
-{
-	if (likely(stmmac_has_work(priv))) {
-		stmmac_disable_irq(priv);
-		napi_schedule(&priv->napi);
-	}
+	return;
 }
 
 /**
@@ -824,7 +816,7 @@ static void stmmac_tx_err(struct stmmac_priv *priv)
 	netif_wake_queue(priv->dev);
 }
 
-static inline void stmmac_rx_schedule(struct stmmac_priv *priv)
+static void stmmac_rx_schedule(struct stmmac_priv *priv)
 {
 	if (likely(napi_schedule_prep(&priv->napi))) {
 		stmmac_disable_irq(priv);
@@ -1001,6 +993,36 @@ static int stmmac_init_dma_engine(struct stmmac_priv *priv)
 				   priv->dma_rx_phy);
 }
 
+static int stmmac_check_coal(int size, int max_coal_frames)
+{
+	int ret = 0;
+
+	if (max_coal_frames >= size)
+		return ret;
+
+	return max_coal_frames;
+}
+
+static int stmmac_init_tx_coalesce(struct stmmac_priv *priv)
+{
+	int ret = -EOPNOTSUPP;
+
+	priv->tx_coal_frames = stmmac_check_coal(priv->dma_tx_size,
+						 STMMAC_TX_MAX_FRAMES);
+	if (priv->tx_coal_frames) {
+		/* Set Tx coalesce parameters and timers */
+		priv->tx_coal_timer = jiffies + usecs_to_jiffies(STMMAC_TX_TM);
+		init_timer(&priv->txtimer);
+		priv->txtimer.expires = priv->tx_coal_timer;
+		priv->txtimer.data = (unsigned long)priv;
+		priv->txtimer.function = stmmac_txtimer;
+
+		ret = 0;
+	}
+
+	return ret;
+}
+
 /**
  *  stmmac_open - open entry point of the driver
  *  @dev : pointer to the device structure.
@@ -1113,6 +1135,10 @@ static int stmmac_open(struct net_device *dev)
 	priv->tx_lpi_timer = STMMAC_DEFAULT_TWT_LS_TIMER;
 	priv->eee_enabled = stmmac_eee_init(priv);
 
+	ret = stmmac_init_tx_coalesce(priv);
+	if (!ret)
+		add_timer(&priv->txtimer);
+
 	napi_enable(&priv->napi);
 	skb_queue_head_init(&priv->rx_recycle);
 	netif_start_queue(dev);
@@ -1202,6 +1228,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 	int nfrags = skb_shinfo(skb)->nr_frags;
 	struct dma_desc *desc, *first;
 	unsigned int nopaged_len = skb_headlen(skb);
+	unsigned long flags;
 
 	if (unlikely(stmmac_tx_avail(priv) < nfrags + 1)) {
 		if (!netif_queue_stopped(dev)) {
@@ -1213,10 +1240,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 		return NETDEV_TX_BUSY;
 	}
 
-	spin_lock(&priv->tx_lock);
-
-	if (priv->tx_path_in_lpi_mode)
-		stmmac_disable_eee_mode(priv);
+	spin_lock_irqsave(&priv->tx_lock, flags);
 
 	entry = priv->cur_tx % txsize;
 
@@ -1272,7 +1296,14 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 	/* Interrupt on completition only for the latest segment */
 	priv->hw->desc->close_tx_desc(desc);
 
-	wmb();
+	/* Do not set the IC according to the coalesce patameters */
+	priv->tx_count_frames++;
+	if (priv->tx_coal_frames > priv->tx_count_frames) {
+		priv->hw->desc->clear_tx_ic(desc);
+		priv->xstats.tx_reset_ic_bit++;
+		mod_timer(&priv->txtimer, priv->tx_coal_timer);
+	} else
+		priv->tx_count_frames = 0;
 
 	/* To avoid raise condition */
 	priv->hw->desc->set_tx_owner(first);
@@ -1302,7 +1333,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	priv->hw->dma->enable_dma_transmission(priv->ioaddr);
 
-	spin_unlock(&priv->tx_lock);
+	spin_unlock_irqrestore(&priv->tx_lock, flags);
 
 	return NETDEV_TX_OK;
 }
@@ -1447,7 +1478,6 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit)
  *	      all interfaces.
  *  Description :
  *   This function implements the the reception process.
- *   Also it runs the TX completion thread
  */
 static int stmmac_poll(struct napi_struct *napi, int budget)
 {
-- 
1.7.4.4

^ permalink raw reply related

* Re: [PATCH v2 1/2] tcp: add generic netlink support for tcp_metrics
From: Julian Anastasov @ 2012-09-03  8:22 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: David Miller, netdev, Stephen Hemminger, Paul E. McKenney
In-Reply-To: <1346633144.2563.97.camel@edumazet-glaptop>


	Hello,

On Mon, 3 Sep 2012, Eric Dumazet wrote:

> On Sun, 2012-09-02 at 08:36 +0300, Julian Anastasov wrote:
> > +
> > +static int tcp_metrics_flush_all(struct net *net)
> > +{
> > +	unsigned int max_rows = 1U << net->ipv4.tcp_metrics_hash_log;
> > +	struct tcpm_hash_bucket *hb = net->ipv4.tcp_metrics_hash;
> > +	struct tcp_metrics_block *tm;
> > +	unsigned int sync_count = 0;
> > +	unsigned int row;
> > +
> > +	for (row = 0; row < max_rows; row++, hb++) {
> > +		spin_lock_bh(&tcp_metrics_lock);
> > +		tm = deref_locked_genl(hb->chain);
> > +		if (tm)
> > +			hb->chain = NULL;
> > +		spin_unlock_bh(&tcp_metrics_lock);
> > +		while (tm) {
> > +			struct tcp_metrics_block *next;
> > +
> > +			next = deref_genl(tm->tcpm_next);
> > +			kfree_rcu(tm, rcu_head);
> > +			if (!((++sync_count) & 2047))
> > +				synchronize_rcu();
> > +			tm = next;
> > +		}
> > +	}
> > +	return 0;
> > +}
> 
> It looks like the synchronize_rcu() call is not exactly what you wanted,
> but then net/ipv4/fib_trie.c has the same mistake.

	I used fib_trie as reference...

> What we want here is to force pending call_rcu() calls to complete, so
> that we dont consume too much memory. So it would probably better to
> call rcu_barrier() instead.

	I see

> If other cpus are idle or outside of rcu read lock sections,
> synchronize_rcu() should basically do nothing at all.
> 
> But I am not sure its worth the trouble ?
> 
> Commit c3059477fce2d956a0bb3e04357324780c5d8eeb (ipv4: Use
> synchronize_rcu() during trie_rebalance()) was needed because FIB TRIE
> can really use huge amounts of memory, thats hardly the case with
> tcp_metrics.

	I was worrying for the case
(TCP_METRICS_RECLAIM_DEPTH + 1) * tcpmhash_entries to be
large, eg. if some non-default value is configured. May be the
chance table to be filled immediately is small. I'll remove it.

	BTW, is it appropriate to use kmem_cache for
metrics and as result call_rcu for freeing?

Regards

--
Julian Anastasov <ja@ssi.bg>

^ permalink raw reply

* [PATCH net] net: usbnet: fix softirq storm on suspend
From: Bjørn Mork @ 2012-09-03  8:26 UTC (permalink / raw)
  To: netdev-u79uwXL29TY76Z2rM5mHXA
  Cc: linux-usb-u79uwXL29TY76Z2rM5mHXA, Bjørn Mork, Ming Lei,
	Oliver Neukum

Suspending an open usbnet device results in constant
rescheduling of usbnet_bh.

commit 65841fd5 "usbnet: handle remote wakeup asap"
refactored the usbnet_bh code to allow sharing the
urb allocate and submit code with usbnet_resume. In
this process, a test for, and immediate return on,
ENOLINK from rx_submit was unintentionally dropped.

The rx queue will not grow if rx_submit fails,
making usbnet_bh reschedule itself.  This results
in a softirq storm if the error is persistent.
rx_submit translates the usb_submit_urb error
EHOSTUNREACH into ENOLINK, so this is an expected
and persistent error for a suspended device. The
old code tested for this condition and avoided
rescheduling.  Putting this test back.

Cc: <stable-u79uwXL29TY76Z2rM5mHXA@public.gmane.org> # v3.5
Cc: Ming Lei <ming.lei-Z7WLFzj8eWMS+FvcfC7Uqw@public.gmane.org>
Cc: Oliver Neukum <oneukum-l3A5Bk7waGM@public.gmane.org>
Signed-off-by: Bjørn Mork <bjorn-yOkvZcmFvRU@public.gmane.org>
---
Sorry for not noticing this before, but commit 65841fd5
makes usbnet autosuspend completely unusable.  The device
is suspended fine, but burning one CPU core at full load
uses a tiny bit more power making the power saving 
negative...

I hope this can go into 3.6 and 3.5-stable ASAP. It is
a hard to notice regression, but all the same a serious
one.


Thanks,
Bjørn


 drivers/net/usb/usbnet.c |   16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index fd4b26d..fc9f578 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -1201,19 +1201,26 @@ deferred:
 }
 EXPORT_SYMBOL_GPL(usbnet_start_xmit);
 
-static void rx_alloc_submit(struct usbnet *dev, gfp_t flags)
+static int rx_alloc_submit(struct usbnet *dev, gfp_t flags)
 {
 	struct urb	*urb;
 	int		i;
+	int		ret = 0;
 
 	/* don't refill the queue all at once */
 	for (i = 0; i < 10 && dev->rxq.qlen < RX_QLEN(dev); i++) {
 		urb = usb_alloc_urb(0, flags);
 		if (urb != NULL) {
-			if (rx_submit(dev, urb, flags) == -ENOLINK)
-				return;
+			ret = rx_submit(dev, urb, flags);
+			if (ret)
+				goto err;
+		} else {
+			ret = -ENOMEM;
+			goto err;
 		}
 	}
+err:
+	return ret;
 }
 
 /*-------------------------------------------------------------------------*/
@@ -1257,7 +1264,8 @@ static void usbnet_bh (unsigned long param)
 		int	temp = dev->rxq.qlen;
 
 		if (temp < RX_QLEN(dev)) {
-			rx_alloc_submit(dev, GFP_ATOMIC);
+			if (rx_alloc_submit(dev, GFP_ATOMIC) == -ENOLINK)
+				return;
 			if (temp != dev->rxq.qlen)
 				netif_dbg(dev, link, dev->net,
 					  "rxqlen %d --> %d\n",
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-usb" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* Re: Question: routing packets via specific router in LAN?
From: Cong Wang @ 2012-09-03  8:28 UTC (permalink / raw)
  To: netdev
In-Reply-To: <50444880.8080703@gmail.com>

On Mon, 03 Sep 2012 at 06:04 GMT, Yi Li <lovelylich@gmail.com> wrote:
> Hi All,
> I have server --- router ---client three machines,
> and they all have only one ip in the same LAN.
> I want to instruct the packets flowing through the router when the
> server and client communicates.
> I have do the following things to setup:
> on the server:
> # ip route add to unicast CLIENT_IP/32 via ROUTER_IP dev eth0
> # echo 0 > /proc/sys/net/ipv4/conf/all/accept_redirects
> # echo 0 > /proc/sys/net/ipv4/conf/eth0/accept_redirects
>
> on the client:
> /*modify route table*/
> # ip route add to unicast SERVER_IP/32 via ROUTER_IP dev eth0
> /*disable icmp-redirects accept*/
> # echo 0 > /proc/sys/net/ipv4/conf/all/accept_redirects
> # echo 0 > /proc/sys/net/ipv4/conf/eth0/accept_redirects
>
> on the router:
> /*enable forwarding*/
> # echo 1 > /proc/sys/net/ipv4/ip_forwarding
> /*disable icmp-redirects*/
> # echo 0 > /proc/sys/net/ipv4/conf/all/send_redirects
> # echo 0 > /proc/sys/net/ipv4/conf/eth0/send_redirects
>

Try to add some iptables rules like:

iptables -A FORWARD -j ACCEPT -s CLIENT_IP/xx -d  SERVER_IP/xx

^ permalink raw reply

* Re: [PATCH] net/can:  rename peak_usb dump_mem function
From: Marc Kleine-Budde @ 2012-09-03  9:02 UTC (permalink / raw)
  To: Randy Dunlap
  Cc: netdev, Geert Uytterhoeven, linux-kernel, David Miller,
	Stephane Grosjean, Wolfgang Grandegger, linux-can
In-Reply-To: <504393A7.8040007@xenotime.net>

[-- Attachment #1: Type: text/plain, Size: 900 bytes --]

On 09/02/2012 07:13 PM, Randy Dunlap wrote:
> From: Randy Dunlap <rdunlap@xenotime.net>
> 
> Rename generic-sounding function dump_mem() to pcan_dump_mem()
> so that it does not conflict with the dump_mem() function in
> arch/sh/include/asm/kdebug.h.
> 
> drivers/net/can/usb/peak_usb/pcan_usb_core.c: error: conflicting types for 'dump_mem':  => 56:6
> drivers/net/can/usb/peak_usb/pcan_usb_core.h: error: conflicting types for 'dump_mem':  => 134:6
> 
> Not tested.

:) I've converted the users of peak's dump_mem() function, too. Now it
compiles. Should this go into v3.6, or is v3.7 early enough?

Marc

-- 
Pengutronix e.K.                  | Marc Kleine-Budde           |
Industrial Linux Solutions        | Phone: +49-231-2826-924     |
Vertretung West/Dortmund          | Fax:   +49-5121-206917-5555 |
Amtsgericht Hildesheim, HRA 2686  | http://www.pengutronix.de   |


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 259 bytes --]

^ permalink raw reply

* [PATCH net-next 2/2] net: sierra_net: rx_urb_size is constant
From: Bjørn Mork @ 2012-09-03  9:20 UTC (permalink / raw)
  To: netdev-u79uwXL29TY76Z2rM5mHXA
  Cc: linux-usb-u79uwXL29TY76Z2rM5mHXA, Bjørn Mork
In-Reply-To: <1346664033-30284-1-git-send-email-bjorn-yOkvZcmFvRU@public.gmane.org>

The rx_urb_size is set to the same value for every device
supported by this driver.  No need to keep a per-device
data structure to do that. Replacing with a macro constant.

This was the last device specific info, and removing it
allows us to delete the sierra_net_info_data struct.

Signed-off-by: Bjørn Mork <bjorn-yOkvZcmFvRU@public.gmane.org>
---
 drivers/net/usb/sierra_net.c |   17 ++++-------------
 1 file changed, 4 insertions(+), 13 deletions(-)

diff --git a/drivers/net/usb/sierra_net.c b/drivers/net/usb/sierra_net.c
index 596ddaa..7ae70e9 100644
--- a/drivers/net/usb/sierra_net.c
+++ b/drivers/net/usb/sierra_net.c
@@ -68,9 +68,8 @@ static	atomic_t iface_counter = ATOMIC_INIT(0);
  */
 #define SIERRA_NET_USBCTL_BUF_LEN	1024
 
-struct sierra_net_info_data {
-	u16 rx_urb_size;
-};
+/* Overriding the default usbnet rx_urb_size */
+#define SIERRA_NET_RX_URB_SIZE		(8 * 1024)
 
 /* Private data structure */
 struct sierra_net_data {
@@ -678,9 +677,6 @@ static int sierra_net_bind(struct usbnet *dev, struct usb_interface *intf)
 	static const u8 shdwn_tmplate[sizeof(priv->shdwn_msg)] = {
 		0x00, 0x00, SIERRA_NET_HIP_SHUTD_ID, 0x00};
 
-	struct sierra_net_info_data *data =
-			(struct sierra_net_info_data *)dev->driver_info->data;
-
 	dev_dbg(&dev->udev->dev, "%s", __func__);
 
 	ifacenum = intf->cur_altsetting->desc.bInterfaceNumber;
@@ -725,9 +721,9 @@ static int sierra_net_bind(struct usbnet *dev, struct usb_interface *intf)
 	sierra_net_set_ctx_index(priv, 0);
 
 	/* decrease the rx_urb_size and max_tx_size to 4k on USB 1.1 */
-	dev->rx_urb_size  = data->rx_urb_size;
+	dev->rx_urb_size  = SIERRA_NET_RX_URB_SIZE;
 	if (dev->udev->speed != USB_SPEED_HIGH)
-		dev->rx_urb_size  = min_t(size_t, 4096, data->rx_urb_size);
+		dev->rx_urb_size  = min_t(size_t, 4096, SIERRA_NET_RX_URB_SIZE);
 
 	dev->net->hard_header_len += SIERRA_NET_HIP_EXT_HDR_LEN;
 	dev->hard_mtu = dev->net->mtu + dev->net->hard_header_len;
@@ -918,10 +914,6 @@ static struct sk_buff *sierra_net_tx_fixup(struct usbnet *dev,
 	return NULL;
 }
 
-static const struct sierra_net_info_data sierra_net_info_data_direct_ip = {
-	.rx_urb_size = 8 * 1024,
-};

^ permalink raw reply related

* [PATCH net-next 1/2] net: sierra_net: make private symbols static
From: Bjørn Mork @ 2012-09-03  9:20 UTC (permalink / raw)
  To: netdev; +Cc: linux-usb, Bjørn Mork
In-Reply-To: <1346664033-30284-1-git-send-email-bjorn@mork.no>

Signed-off-by: Bjørn Mork <bjorn@mork.no>
---
 drivers/net/usb/sierra_net.c |    6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/usb/sierra_net.c b/drivers/net/usb/sierra_net.c
index 7be49ea..596ddaa 100644
--- a/drivers/net/usb/sierra_net.c
+++ b/drivers/net/usb/sierra_net.c
@@ -560,7 +560,7 @@ static void sierra_net_defer_kevent(struct usbnet *dev, int work)
 /*
  * Sync Retransmit Timer Handler. On expiry, kick the work queue
  */
-void sierra_sync_timer(unsigned long syncdata)
+static void sierra_sync_timer(unsigned long syncdata)
 {
 	struct usbnet *dev = (struct usbnet *)syncdata;
 
@@ -866,8 +866,8 @@ static int sierra_net_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
 }
 
 /* ---------------------------- Transmit data path ----------------------*/
-struct sk_buff *sierra_net_tx_fixup(struct usbnet *dev, struct sk_buff *skb,
-		gfp_t flags)
+static struct sk_buff *sierra_net_tx_fixup(struct usbnet *dev,
+					   struct sk_buff *skb, gfp_t flags)
 {
 	struct sierra_net_data *priv = sierra_net_get_private(dev);
 	u16 len;
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH net-next] net: cx82310_eth: use common match macro
From: Bjørn Mork @ 2012-09-03  9:20 UTC (permalink / raw)
  To: netdev; +Cc: linux-usb, Bjørn Mork

Signed-off-by: Bjørn Mork <bjorn@mork.no>
---
 drivers/net/usb/cx82310_eth.c |   11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/drivers/net/usb/cx82310_eth.c b/drivers/net/usb/cx82310_eth.c
index 49ab45e..1e207f0 100644
--- a/drivers/net/usb/cx82310_eth.c
+++ b/drivers/net/usb/cx82310_eth.c
@@ -302,18 +302,9 @@ static const struct driver_info	cx82310_info = {
 	.tx_fixup	= cx82310_tx_fixup,
 };
 
-#define USB_DEVICE_CLASS(vend, prod, cl, sc, pr) \
-	.match_flags = USB_DEVICE_ID_MATCH_DEVICE | \
-		       USB_DEVICE_ID_MATCH_DEV_INFO, \
-	.idVendor = (vend), \
-	.idProduct = (prod), \
-	.bDeviceClass = (cl), \
-	.bDeviceSubClass = (sc), \
-	.bDeviceProtocol = (pr)
-
 static const struct usb_device_id products[] = {
 	{
-		USB_DEVICE_CLASS(0x0572, 0xcb01, 0xff, 0, 0),
+		USB_DEVICE_AND_INTERFACE_INFO(0x0572, 0xcb01, 0xff, 0, 0),
 		.driver_info = (unsigned long) &cx82310_info
 	},
 	{ },
-- 
1.7.10.4

^ permalink raw reply related

* Re: [PATCH v2] netfilter: take care of timewait sockets
From: Eric Dumazet @ 2012-09-03  9:57 UTC (permalink / raw)
  To: Florian Westphal, David Miller; +Cc: Sami Farin, netdev, e1000-devel
In-Reply-To: <20120903074718.GA14750@breakpoint.cc>

From: Eric Dumazet <edumazet@google.com>

On Mon, 2012-09-03 at 09:47 +0200, Florian Westphal wrote:
> Eric Dumazet <eric.dumazet@gmail.com> wrote:
> > Sami Farin reported crashes in xt_LOG because it assumes skb->sk is a
> > full blown socket.
> > 
> > But with TCP early demux, we can have skb->sk pointing to a timewait
> > socket.
> > 
> > Same fix is needed in netfnetlink_log
> 
> Looks good, but IMHO it is very un-intuitive that
> skb->sk might be a pointer to an object that is not struct sock (or
> a compatible object).

Its kind of a compatible object, if all skb->sk users are aware of it.

You are totally right, this is messy, but TCP edemux is a layering
violation helping a bit performance...

sock_edemux() should also be fixed.

David, tell me if you prefer to change TCP demux to avoid timewait,
as I have no strong opinion.

[PATCH] net: sock_edemux() should take care of timewait sockets

sock_edemux() can handle either a regular socket or a timewait socket

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/core/sock.c |    7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/net/core/sock.c b/net/core/sock.c
index 8f67ced..7f64467 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1523,7 +1523,12 @@ EXPORT_SYMBOL(sock_rfree);
 
 void sock_edemux(struct sk_buff *skb)
 {
-	sock_put(skb->sk);
+	struct sock *sk = skb->sk;
+
+	if (sk->sk_state == TCP_TIME_WAIT)
+		inet_twsk_put(inet_twsk(sk));
+	else
+		sock_put(sk);
 }
 EXPORT_SYMBOL(sock_edemux);
 

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox