public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	stable@vger.kernel.org, David Howells <dhowells@redhat.com>,
	"David S. Miller" <davem@davemloft.net>
Subject: [PATCH 4.19 40/42] rxrpc: Fix lockup due to no error backoff after ack transmit error
Date: Wed, 21 Nov 2018 20:06:16 +0100	[thread overview]
Message-ID: <20181121183149.704905588@linuxfoundation.org> (raw)
In-Reply-To: <20181121183147.869199006@linuxfoundation.org>

4.19-stable review patch.  If anyone has any objections, please let me know.

------------------

From: David Howells <dhowells@redhat.com>

[ Upstream commit c7e86acfcee30794dc99a0759924bf7b9d43f1ca ]

If the network becomes (partially) unavailable, say by disabling IPv6, the
background ACK transmission routine can get itself into a tizzy by
proposing immediate ACK retransmission.  Since we're in the call event
processor, that happens immediately without returning to the workqueue
manager.

The condition should clear after a while when either the network comes back
or the call times out.

Fix this by:

 (1) When re-proposing an ACK on failed Tx, don't schedule it immediately.
     This will allow a certain amount of time to elapse before we try
     again.

 (2) Enforce a return to the workqueue manager after a certain number of
     iterations of the call processing loop.

 (3) Add a backoff delay that increases the delay on deferred ACKs by a
     jiffy per failed transmission to a limit of HZ.  The backoff delay is
     cleared on a successful return from kernel_sendmsg().

 (4) Cancel calls immediately if the opening sendmsg fails.  The layer
     above can arrange retransmission or rotate to another server.

Fixes: 248f219cb8bc ("rxrpc: Rewrite the data and ack handling code")
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/rxrpc/ar-internal.h |    1 +
 net/rxrpc/call_event.c  |   18 ++++++++++++++----
 net/rxrpc/output.c      |   35 +++++++++++++++++++++++++++++++----
 3 files changed, 46 insertions(+), 8 deletions(-)

--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -611,6 +611,7 @@ struct rxrpc_call {
 						 * not hard-ACK'd packet follows this.
 						 */
 	rxrpc_seq_t		tx_top;		/* Highest Tx slot allocated. */
+	u16			tx_backoff;	/* Delay to insert due to Tx failure */
 
 	/* TCP-style slow-start congestion control [RFC5681].  Since the SMSS
 	 * is fixed, we keep these numbers in terms of segments (ie. DATA
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -123,6 +123,7 @@ static void __rxrpc_propose_ACK(struct r
 		else
 			ack_at = expiry;
 
+		ack_at += READ_ONCE(call->tx_backoff);
 		ack_at += now;
 		if (time_before(ack_at, call->ack_at)) {
 			WRITE_ONCE(call->ack_at, ack_at);
@@ -311,6 +312,7 @@ void rxrpc_process_call(struct work_stru
 		container_of(work, struct rxrpc_call, processor);
 	rxrpc_serial_t *send_ack;
 	unsigned long now, next, t;
+	unsigned int iterations = 0;
 
 	rxrpc_see_call(call);
 
@@ -319,6 +321,11 @@ void rxrpc_process_call(struct work_stru
 	       call->debug_id, rxrpc_call_states[call->state], call->events);
 
 recheck_state:
+	/* Limit the number of times we do this before returning to the manager */
+	iterations++;
+	if (iterations > 5)
+		goto requeue;
+
 	if (test_and_clear_bit(RXRPC_CALL_EV_ABORT, &call->events)) {
 		rxrpc_send_abort_packet(call);
 		goto recheck_state;
@@ -447,13 +454,16 @@ recheck_state:
 	rxrpc_reduce_call_timer(call, next, now, rxrpc_timer_restart);
 
 	/* other events may have been raised since we started checking */
-	if (call->events && call->state < RXRPC_CALL_COMPLETE) {
-		__rxrpc_queue_call(call);
-		goto out;
-	}
+	if (call->events && call->state < RXRPC_CALL_COMPLETE)
+		goto requeue;
 
 out_put:
 	rxrpc_put_call(call, rxrpc_call_put);
 out:
 	_leave("");
+	return;
+
+requeue:
+	__rxrpc_queue_call(call);
+	goto out;
 }
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -35,6 +35,21 @@ struct rxrpc_abort_buffer {
 static const char rxrpc_keepalive_string[] = "";
 
 /*
+ * Increase Tx backoff on transmission failure and clear it on success.
+ */
+static void rxrpc_tx_backoff(struct rxrpc_call *call, int ret)
+{
+	if (ret < 0) {
+		u16 tx_backoff = READ_ONCE(call->tx_backoff);
+
+		if (tx_backoff < HZ)
+			WRITE_ONCE(call->tx_backoff, tx_backoff + 1);
+	} else {
+		WRITE_ONCE(call->tx_backoff, 0);
+	}
+}
+
+/*
  * Arrange for a keepalive ping a certain time after we last transmitted.  This
  * lets the far side know we're still interested in this call and helps keep
  * the route through any intervening firewall open.
@@ -210,6 +225,7 @@ int rxrpc_send_ack_packet(struct rxrpc_c
 	else
 		trace_rxrpc_tx_packet(call->debug_id, &pkt->whdr,
 				      rxrpc_tx_point_call_ack);
+	rxrpc_tx_backoff(call, ret);
 
 	if (call->state < RXRPC_CALL_COMPLETE) {
 		if (ret < 0) {
@@ -218,7 +234,7 @@ int rxrpc_send_ack_packet(struct rxrpc_c
 			rxrpc_propose_ACK(call, pkt->ack.reason,
 					  ntohs(pkt->ack.maxSkew),
 					  ntohl(pkt->ack.serial),
-					  true, true,
+					  false, true,
 					  rxrpc_propose_ack_retry_tx);
 		} else {
 			spin_lock_bh(&call->lock);
@@ -300,7 +316,7 @@ int rxrpc_send_abort_packet(struct rxrpc
 	else
 		trace_rxrpc_tx_packet(call->debug_id, &pkt.whdr,
 				      rxrpc_tx_point_call_abort);
-
+	rxrpc_tx_backoff(call, ret);
 
 	rxrpc_put_connection(conn);
 	return ret;
@@ -411,6 +427,7 @@ int rxrpc_send_data_packet(struct rxrpc_
 	else
 		trace_rxrpc_tx_packet(call->debug_id, &whdr,
 				      rxrpc_tx_point_call_data_nofrag);
+	rxrpc_tx_backoff(call, ret);
 	if (ret == -EMSGSIZE)
 		goto send_fragmentable;
 
@@ -445,9 +462,18 @@ done:
 			rxrpc_reduce_call_timer(call, expect_rx_by, nowj,
 						rxrpc_timer_set_for_normal);
 		}
-	}
 
-	rxrpc_set_keepalive(call);
+		rxrpc_set_keepalive(call);
+	} else {
+		/* Cancel the call if the initial transmission fails,
+		 * particularly if that's due to network routing issues that
+		 * aren't going away anytime soon.  The layer above can arrange
+		 * the retransmission.
+		 */
+		if (!test_and_set_bit(RXRPC_CALL_BEGAN_RX_TIMER, &call->flags))
+			rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR,
+						  RX_USER_ABORT, ret);
+	}
 
 	_leave(" = %d [%u]", ret, call->peer->maxdata);
 	return ret;
@@ -506,6 +532,7 @@ send_fragmentable:
 	else
 		trace_rxrpc_tx_packet(call->debug_id, &whdr,
 				      rxrpc_tx_point_call_data_frag);
+	rxrpc_tx_backoff(call, ret);
 
 	up_write(&conn->params.local->defrag_sem);
 	goto done;



  parent reply	other threads:[~2018-11-21 19:08 UTC|newest]

Thread overview: 58+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-11-21 19:05 [PATCH 4.19 00/42] 4.19.4-stable review Greg Kroah-Hartman
2018-11-21 19:05 ` [PATCH 4.19 01/42] flow_dissector: do not dissect l4 ports for fragments Greg Kroah-Hartman
2018-11-21 19:05 ` [PATCH 4.19 02/42] ibmvnic: fix accelerated VLAN handling Greg Kroah-Hartman
2018-11-21 19:05 ` [PATCH 4.19 03/42] ip_tunnel: dont force DF when MTU is locked Greg Kroah-Hartman
2018-11-21 19:05 ` [PATCH 4.19 04/42] ipv6: fix a dst leak when removing its exception Greg Kroah-Hartman
2018-11-21 19:05 ` [PATCH 4.19 05/42] ipv6: Fix PMTU updates for UDP/raw sockets in presence of VRF Greg Kroah-Hartman
2018-11-21 19:05 ` [PATCH 4.19 06/42] net: bcmgenet: protect stop from timeout Greg Kroah-Hartman
2018-11-21 19:05 ` [PATCH 4.19 07/42] net-gro: reset skb->pkt_type in napi_reuse_skb() Greg Kroah-Hartman
2018-11-21 19:05 ` [PATCH 4.19 08/42] sctp: not allow to set asoc prsctp_enable by sockopt Greg Kroah-Hartman
2018-11-21 19:05 ` [PATCH 4.19 09/42] tcp: Fix SOF_TIMESTAMPING_RX_HARDWARE to use the latest timestamp during TCP coalescing Greg Kroah-Hartman
2018-11-21 19:05 ` [PATCH 4.19 10/42] tg3: Add PHY reset for 5717/5719/5720 in change ring and flow control paths Greg Kroah-Hartman
2018-11-21 19:05 ` [PATCH 4.19 11/42] tipc: dont assume linear buffer when reading ancillary data Greg Kroah-Hartman
2018-11-21 19:05 ` [PATCH 4.19 12/42] tipc: fix lockdep warning when reinitilaizing sockets Greg Kroah-Hartman
2018-11-21 19:05 ` [PATCH 4.19 13/42] tuntap: fix multiqueue rx Greg Kroah-Hartman
2018-11-21 19:05 ` [PATCH 4.19 14/42] net: systemport: Protect stop from timeout Greg Kroah-Hartman
2018-11-21 19:05 ` [PATCH 4.19 15/42] net/sched: act_pedit: fix memory leak when IDR allocation fails Greg Kroah-Hartman
2018-11-21 19:05 ` [PATCH 4.19 16/42] net: sched: cls_flower: validate nested enc_opts_policy to avoid warning Greg Kroah-Hartman
2018-11-21 19:05 ` [PATCH 4.19 17/42] tipc: fix link re-establish failure Greg Kroah-Hartman
2018-11-21 19:05 ` [PATCH 4.19 18/42] net/mlx5e: Dont match on vlan non-existence if ethertype is wildcarded Greg Kroah-Hartman
2018-11-21 19:05 ` [PATCH 4.19 19/42] net/mlx5e: Claim TC hw offloads support only under a proper build config Greg Kroah-Hartman
2018-11-21 19:05 ` [PATCH 4.19 20/42] net/mlx5e: Adjust to max number of channles when re-attaching Greg Kroah-Hartman
2018-11-21 19:05 ` [PATCH 4.19 21/42] net/mlx5e: RX, verify received packet size in Linear Striding RQ Greg Kroah-Hartman
2018-11-21 19:05 ` [PATCH 4.19 22/42] Revert "sctp: remove sctp_transport_pmtu_check" Greg Kroah-Hartman
2018-11-21 19:05 ` [PATCH 4.19 23/42] net/mlx5e: Always use the match level enum when parsing TC rule match Greg Kroah-Hartman
2018-11-21 19:06 ` [PATCH 4.19 24/42] net/mlx5e: Fix selftest for small MTUs Greg Kroah-Hartman
2018-11-21 19:06 ` [PATCH 4.19 25/42] net/mlx5e: Removed unnecessary warnings in FEC caps query Greg Kroah-Hartman
2018-11-21 19:06 ` [PATCH 4.19 26/42] inet: frags: better deal with smp races Greg Kroah-Hartman
2018-11-21 19:06 ` [PATCH 4.19 27/42] l2tp: fix a sock refcnt leak in l2tp_tunnel_register Greg Kroah-Hartman
2018-11-21 19:06 ` [PATCH 4.19 28/42] net/mlx5: IPSec, Fix the SA context hash key Greg Kroah-Hartman
2018-11-21 19:06 ` [PATCH 4.19 29/42] net/mlx5e: IPoIB, Reset QP after channels are closed Greg Kroah-Hartman
2018-11-21 19:06 ` [PATCH 4.19 30/42] net: dsa: mv88e6xxx: Fix clearing of stats counters Greg Kroah-Hartman
2018-11-21 19:06 ` [PATCH 4.19 31/42] net: phy: realtek: fix RTL8201F sysfs name Greg Kroah-Hartman
2018-11-21 19:06 ` [PATCH 4.19 32/42] sctp: define SCTP_SS_DEFAULT for Stream schedulers Greg Kroah-Hartman
2018-11-21 19:06 ` [PATCH 4.19 33/42] net: qualcomm: rmnet: Fix incorrect assignment of real_dev Greg Kroah-Hartman
2018-11-21 19:06 ` [PATCH 4.19 34/42] net: dsa: microchip: initialize mutex before use Greg Kroah-Hartman
2018-11-21 19:06 ` [PATCH 4.19 35/42] sctp: fix strchange_flags name for Stream Change Event Greg Kroah-Hartman
2018-11-21 19:06 ` [PATCH 4.19 36/42] net: phy: mdio-gpio: Fix working over slow can_sleep GPIOs Greg Kroah-Hartman
2018-11-21 19:06 ` [PATCH 4.19 37/42] sctp: not increase streams incnt before sending addstrm_in request Greg Kroah-Hartman
2018-11-21 19:06 ` [PATCH 4.19 38/42] mlxsw: spectrum: Fix IP2ME CPU policer configuration Greg Kroah-Hartman
2018-11-21 19:06 ` [PATCH 4.19 39/42] net: smsc95xx: Fix MTU range Greg Kroah-Hartman
2018-11-21 19:06 ` Greg Kroah-Hartman [this message]
2018-11-21 19:06 ` [PATCH 4.19 41/42] usbnet: smsc95xx: disable carrier check while suspending Greg Kroah-Hartman
2018-11-21 19:06 ` [PATCH 4.19 42/42] Revert "x86/speculation: Enable cross-hyperthread spectre v2 STIBP mitigation" Greg Kroah-Hartman
2018-11-22  5:26 ` [PATCH 4.19 00/42] 4.19.4-stable review kernelci.org bot
2018-11-22  7:15 ` Harsh Shandilya
2018-11-22  8:24   ` Greg Kroah-Hartman
2018-11-22 16:36 ` Guenter Roeck
2018-11-23  7:16   ` Greg Kroah-Hartman
2018-11-22 20:53 ` Thomas Voegtle
2018-11-22 22:01   ` Thomas Voegtle
2018-11-22 22:30     ` Guenter Roeck
2018-11-23  6:52       ` Greg Kroah-Hartman
2018-11-23  7:45       ` Greg Kroah-Hartman
2018-11-23 12:06         ` Guenter Roeck
2018-11-23  6:51   ` Greg Kroah-Hartman
2018-11-23 15:40     ` Thomas Voegtle
2018-11-23 15:44       ` David Laight
2018-11-23  8:13 ` Naresh Kamboju

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20181121183149.704905588@linuxfoundation.org \
    --to=gregkh@linuxfoundation.org \
    --cc=davem@davemloft.net \
    --cc=dhowells@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=stable@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox