All of lore.kernel.org
 help / color / mirror / Atom feed
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	stable@vger.kernel.org, David Howells <dhowells@redhat.com>,
	"David S. Miller" <davem@davemloft.net>
Subject: [PATCH 4.19 40/42] rxrpc: Fix lockup due to no error backoff after ack transmit error
Date: Wed, 21 Nov 2018 20:06:16 +0100	[thread overview]
Message-ID: <20181121183149.704905588@linuxfoundation.org> (raw)
In-Reply-To: <20181121183147.869199006@linuxfoundation.org>

4.19-stable review patch.  If anyone has any objections, please let me know.

------------------

From: David Howells <dhowells@redhat.com>

[ Upstream commit c7e86acfcee30794dc99a0759924bf7b9d43f1ca ]

If the network becomes (partially) unavailable, say by disabling IPv6, the
background ACK transmission routine can get itself into a tizzy by
proposing immediate ACK retransmission.  Since we're in the call event
processor, that happens immediately without returning to the workqueue
manager.

The condition should clear after a while when either the network comes back
or the call times out.

Fix this by:

 (1) When re-proposing an ACK on failed Tx, don't schedule it immediately.
     This will allow a certain amount of time to elapse before we try
     again.

 (2) Enforce a return to the workqueue manager after a certain number of
     iterations of the call processing loop.

 (3) Add a backoff delay that increases the delay on deferred ACKs by a
     jiffy per failed transmission to a limit of HZ.  The backoff delay is
     cleared on a successful return from kernel_sendmsg().

 (4) Cancel calls immediately if the opening sendmsg fails.  The layer
     above can arrange retransmission or rotate to another server.

Fixes: 248f219cb8bc ("rxrpc: Rewrite the data and ack handling code")
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/rxrpc/ar-internal.h |    1 +
 net/rxrpc/call_event.c  |   18 ++++++++++++++----
 net/rxrpc/output.c      |   35 +++++++++++++++++++++++++++++++----
 3 files changed, 46 insertions(+), 8 deletions(-)

--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -611,6 +611,7 @@ struct rxrpc_call {
 						 * not hard-ACK'd packet follows this.
 						 */
 	rxrpc_seq_t		tx_top;		/* Highest Tx slot allocated. */
+	u16			tx_backoff;	/* Delay to insert due to Tx failure */
 
 	/* TCP-style slow-start congestion control [RFC5681].  Since the SMSS
 	 * is fixed, we keep these numbers in terms of segments (ie. DATA
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -123,6 +123,7 @@ static void __rxrpc_propose_ACK(struct r
 		else
 			ack_at = expiry;
 
+		ack_at += READ_ONCE(call->tx_backoff);
 		ack_at += now;
 		if (time_before(ack_at, call->ack_at)) {
 			WRITE_ONCE(call->ack_at, ack_at);
@@ -311,6 +312,7 @@ void rxrpc_process_call(struct work_stru
 		container_of(work, struct rxrpc_call, processor);
 	rxrpc_serial_t *send_ack;
 	unsigned long now, next, t;
+	unsigned int iterations = 0;
 
 	rxrpc_see_call(call);
 
@@ -319,6 +321,11 @@ void rxrpc_process_call(struct work_stru
 	       call->debug_id, rxrpc_call_states[call->state], call->events);
 
 recheck_state:
+	/* Limit the number of times we do this before returning to the manager */
+	iterations++;
+	if (iterations > 5)
+		goto requeue;
+
 	if (test_and_clear_bit(RXRPC_CALL_EV_ABORT, &call->events)) {
 		rxrpc_send_abort_packet(call);
 		goto recheck_state;
@@ -447,13 +454,16 @@ recheck_state:
 	rxrpc_reduce_call_timer(call, next, now, rxrpc_timer_restart);
 
 	/* other events may have been raised since we started checking */
-	if (call->events && call->state < RXRPC_CALL_COMPLETE) {
-		__rxrpc_queue_call(call);
-		goto out;
-	}
+	if (call->events && call->state < RXRPC_CALL_COMPLETE)
+		goto requeue;
 
 out_put:
 	rxrpc_put_call(call, rxrpc_call_put);
 out:
 	_leave("");
+	return;
+
+requeue:
+	__rxrpc_queue_call(call);
+	goto out;
 }
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -35,6 +35,21 @@ struct rxrpc_abort_buffer {
 static const char rxrpc_keepalive_string[] = "";
 
 /*
+ * Increase Tx backoff on transmission failure and clear it on success.
+ */
+static void rxrpc_tx_backoff(struct rxrpc_call *call, int ret)
+{
+	if (ret < 0) {
+		u16 tx_backoff = READ_ONCE(call->tx_backoff);
+
+		if (tx_backoff < HZ)
+			WRITE_ONCE(call->tx_backoff, tx_backoff + 1);
+	} else {
+		WRITE_ONCE(call->tx_backoff, 0);
+	}
+}
+
+/*
  * Arrange for a keepalive ping a certain time after we last transmitted.  This
  * lets the far side know we're still interested in this call and helps keep
  * the route through any intervening firewall open.
@@ -210,6 +225,7 @@ int rxrpc_send_ack_packet(struct rxrpc_c
 	else
 		trace_rxrpc_tx_packet(call->debug_id, &pkt->whdr,
 				      rxrpc_tx_point_call_ack);
+	rxrpc_tx_backoff(call, ret);
 
 	if (call->state < RXRPC_CALL_COMPLETE) {
 		if (ret < 0) {
@@ -218,7 +234,7 @@ int rxrpc_send_ack_packet(struct rxrpc_c
 			rxrpc_propose_ACK(call, pkt->ack.reason,
 					  ntohs(pkt->ack.maxSkew),
 					  ntohl(pkt->ack.serial),
-					  true, true,
+					  false, true,
 					  rxrpc_propose_ack_retry_tx);
 		} else {
 			spin_lock_bh(&call->lock);
@@ -300,7 +316,7 @@ int rxrpc_send_abort_packet(struct rxrpc
 	else
 		trace_rxrpc_tx_packet(call->debug_id, &pkt.whdr,
 				      rxrpc_tx_point_call_abort);
-
+	rxrpc_tx_backoff(call, ret);
 
 	rxrpc_put_connection(conn);
 	return ret;
@@ -411,6 +427,7 @@ int rxrpc_send_data_packet(struct rxrpc_
 	else
 		trace_rxrpc_tx_packet(call->debug_id, &whdr,
 				      rxrpc_tx_point_call_data_nofrag);
+	rxrpc_tx_backoff(call, ret);
 	if (ret == -EMSGSIZE)
 		goto send_fragmentable;
 
@@ -445,9 +462,18 @@ done:
 			rxrpc_reduce_call_timer(call, expect_rx_by, nowj,
 						rxrpc_timer_set_for_normal);
 		}
-	}
 
-	rxrpc_set_keepalive(call);
+		rxrpc_set_keepalive(call);
+	} else {
+		/* Cancel the call if the initial transmission fails,
+		 * particularly if that's due to network routing issues that
+		 * aren't going away anytime soon.  The layer above can arrange
+		 * the retransmission.
+		 */
+		if (!test_and_set_bit(RXRPC_CALL_BEGAN_RX_TIMER, &call->flags))
+			rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR,
+						  RX_USER_ABORT, ret);
+	}
 
 	_leave(" = %d [%u]", ret, call->peer->maxdata);
 	return ret;
@@ -506,6 +532,7 @@ send_fragmentable:
 	else
 		trace_rxrpc_tx_packet(call->debug_id, &whdr,
 				      rxrpc_tx_point_call_data_frag);
+	rxrpc_tx_backoff(call, ret);
 
 	up_write(&conn->params.local->defrag_sem);
 	goto done;



  parent reply	other threads:[~2018-11-21 19:08 UTC|newest]

Thread overview: 58+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-11-21 19:05 [PATCH 4.19 00/42] 4.19.4-stable review Greg Kroah-Hartman
2018-11-21 19:05 ` [PATCH 4.19 01/42] flow_dissector: do not dissect l4 ports for fragments Greg Kroah-Hartman
2018-11-21 19:05 ` [PATCH 4.19 02/42] ibmvnic: fix accelerated VLAN handling Greg Kroah-Hartman
2018-11-21 19:05 ` [PATCH 4.19 03/42] ip_tunnel: dont force DF when MTU is locked Greg Kroah-Hartman
2018-11-21 19:05 ` [PATCH 4.19 04/42] ipv6: fix a dst leak when removing its exception Greg Kroah-Hartman
2018-11-21 19:05 ` [PATCH 4.19 05/42] ipv6: Fix PMTU updates for UDP/raw sockets in presence of VRF Greg Kroah-Hartman
2018-11-21 19:05 ` [PATCH 4.19 06/42] net: bcmgenet: protect stop from timeout Greg Kroah-Hartman
2018-11-21 19:05 ` [PATCH 4.19 07/42] net-gro: reset skb->pkt_type in napi_reuse_skb() Greg Kroah-Hartman
2018-11-21 19:05 ` [PATCH 4.19 08/42] sctp: not allow to set asoc prsctp_enable by sockopt Greg Kroah-Hartman
2018-11-21 19:05 ` [PATCH 4.19 09/42] tcp: Fix SOF_TIMESTAMPING_RX_HARDWARE to use the latest timestamp during TCP coalescing Greg Kroah-Hartman
2018-11-21 19:05 ` [PATCH 4.19 10/42] tg3: Add PHY reset for 5717/5719/5720 in change ring and flow control paths Greg Kroah-Hartman
2018-11-21 19:05 ` [PATCH 4.19 11/42] tipc: dont assume linear buffer when reading ancillary data Greg Kroah-Hartman
2018-11-21 19:05 ` [PATCH 4.19 12/42] tipc: fix lockdep warning when reinitilaizing sockets Greg Kroah-Hartman
2018-11-21 19:05 ` [PATCH 4.19 13/42] tuntap: fix multiqueue rx Greg Kroah-Hartman
2018-11-21 19:05 ` [PATCH 4.19 14/42] net: systemport: Protect stop from timeout Greg Kroah-Hartman
2018-11-21 19:05 ` [PATCH 4.19 15/42] net/sched: act_pedit: fix memory leak when IDR allocation fails Greg Kroah-Hartman
2018-11-21 19:05 ` [PATCH 4.19 16/42] net: sched: cls_flower: validate nested enc_opts_policy to avoid warning Greg Kroah-Hartman
2018-11-21 19:05 ` [PATCH 4.19 17/42] tipc: fix link re-establish failure Greg Kroah-Hartman
2018-11-21 19:05 ` [PATCH 4.19 18/42] net/mlx5e: Dont match on vlan non-existence if ethertype is wildcarded Greg Kroah-Hartman
2018-11-21 19:05 ` [PATCH 4.19 19/42] net/mlx5e: Claim TC hw offloads support only under a proper build config Greg Kroah-Hartman
2018-11-21 19:05 ` [PATCH 4.19 20/42] net/mlx5e: Adjust to max number of channles when re-attaching Greg Kroah-Hartman
2018-11-21 19:05 ` [PATCH 4.19 21/42] net/mlx5e: RX, verify received packet size in Linear Striding RQ Greg Kroah-Hartman
2018-11-21 19:05 ` [PATCH 4.19 22/42] Revert "sctp: remove sctp_transport_pmtu_check" Greg Kroah-Hartman
2018-11-21 19:05 ` [PATCH 4.19 23/42] net/mlx5e: Always use the match level enum when parsing TC rule match Greg Kroah-Hartman
2018-11-21 19:06 ` [PATCH 4.19 24/42] net/mlx5e: Fix selftest for small MTUs Greg Kroah-Hartman
2018-11-21 19:06 ` [PATCH 4.19 25/42] net/mlx5e: Removed unnecessary warnings in FEC caps query Greg Kroah-Hartman
2018-11-21 19:06 ` [PATCH 4.19 26/42] inet: frags: better deal with smp races Greg Kroah-Hartman
2018-11-21 19:06 ` [PATCH 4.19 27/42] l2tp: fix a sock refcnt leak in l2tp_tunnel_register Greg Kroah-Hartman
2018-11-21 19:06 ` [PATCH 4.19 28/42] net/mlx5: IPSec, Fix the SA context hash key Greg Kroah-Hartman
2018-11-21 19:06 ` [PATCH 4.19 29/42] net/mlx5e: IPoIB, Reset QP after channels are closed Greg Kroah-Hartman
2018-11-21 19:06 ` [PATCH 4.19 30/42] net: dsa: mv88e6xxx: Fix clearing of stats counters Greg Kroah-Hartman
2018-11-21 19:06 ` [PATCH 4.19 31/42] net: phy: realtek: fix RTL8201F sysfs name Greg Kroah-Hartman
2018-11-21 19:06 ` [PATCH 4.19 32/42] sctp: define SCTP_SS_DEFAULT for Stream schedulers Greg Kroah-Hartman
2018-11-21 19:06 ` [PATCH 4.19 33/42] net: qualcomm: rmnet: Fix incorrect assignment of real_dev Greg Kroah-Hartman
2018-11-21 19:06 ` [PATCH 4.19 34/42] net: dsa: microchip: initialize mutex before use Greg Kroah-Hartman
2018-11-21 19:06 ` [PATCH 4.19 35/42] sctp: fix strchange_flags name for Stream Change Event Greg Kroah-Hartman
2018-11-21 19:06 ` [PATCH 4.19 36/42] net: phy: mdio-gpio: Fix working over slow can_sleep GPIOs Greg Kroah-Hartman
2018-11-21 19:06 ` [PATCH 4.19 37/42] sctp: not increase streams incnt before sending addstrm_in request Greg Kroah-Hartman
2018-11-21 19:06 ` [PATCH 4.19 38/42] mlxsw: spectrum: Fix IP2ME CPU policer configuration Greg Kroah-Hartman
2018-11-21 19:06 ` [PATCH 4.19 39/42] net: smsc95xx: Fix MTU range Greg Kroah-Hartman
2018-11-21 19:06 ` Greg Kroah-Hartman [this message]
2018-11-21 19:06 ` [PATCH 4.19 41/42] usbnet: smsc95xx: disable carrier check while suspending Greg Kroah-Hartman
2018-11-21 19:06 ` [PATCH 4.19 42/42] Revert "x86/speculation: Enable cross-hyperthread spectre v2 STIBP mitigation" Greg Kroah-Hartman
2018-11-22  5:26 ` [PATCH 4.19 00/42] 4.19.4-stable review kernelci.org bot
2018-11-22  7:15 ` Harsh Shandilya
2018-11-22  8:24   ` Greg Kroah-Hartman
2018-11-22 16:36 ` Guenter Roeck
2018-11-23  7:16   ` Greg Kroah-Hartman
2018-11-22 20:53 ` Thomas Voegtle
2018-11-22 22:01   ` Thomas Voegtle
2018-11-22 22:30     ` Guenter Roeck
2018-11-23  6:52       ` Greg Kroah-Hartman
2018-11-23  7:45       ` Greg Kroah-Hartman
2018-11-23 12:06         ` Guenter Roeck
2018-11-23  6:51   ` Greg Kroah-Hartman
2018-11-23 15:40     ` Thomas Voegtle
2018-11-23 15:44       ` David Laight
2018-11-23  8:13 ` Naresh Kamboju

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20181121183149.704905588@linuxfoundation.org \
    --to=gregkh@linuxfoundation.org \
    --cc=davem@davemloft.net \
    --cc=dhowells@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=stable@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.