Linux Documentation

Linux Documentation
 help / color / mirror / Atom feed

* [PATCH v2 net-next 3/6] net: ethernet: ti: cpsw: add MQPRIO Qdisc offload
From: Ivan Khoronzhuk @ 2018-06-14  7:36 UTC (permalink / raw)
  To: grygorii.strashko, davem
  Cc: corbet, akpm, netdev, linux-doc, linux-kernel, linux-omap,
	vinicius.gomes, henrik, jesus.sanchez-palencia, ilias.apalodimas,
	p-varis, spatton, francois.ozog, yogeshs, nsekhar, andrew,
	Ivan Khoronzhuk
In-Reply-To: <20180614073650.29659-1-ivan.khoronzhuk@linaro.org>

That's possible to offload vlan to tc priority mapping with
assumption sk_prio == L2 prio.

Example:
$ ethtool -L eth0 rx 1 tx 4

$ qdisc replace dev eth0 handle 100: parent root mqprio num_tc 3 \
map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 queues 1@0 1@1 2@2 hw 1

$ tc -g class show dev eth0
+---(100:ffe2) mqprio
|    +---(100:3) mqprio
|    +---(100:4) mqprio
|    
+---(100:ffe1) mqprio
|    +---(100:2) mqprio
|    
+---(100:ffe0) mqprio
     +---(100:1) mqprio

Here, 100:1 is txq0, 100:2 is txq1, 100:3 is txq2, 100:4 is txq3
txq0 belongs to tc0, txq1 to tc1, txq2 and txq3 to tc2
The offload part only maps L2 prio to classes of traffic, but not
to transmit queues, so to direct traffic to traffic class vlan has
to be created with appropriate egress map.

Signed-off-by: Ivan Khoronzhuk <ivan.khoronzhuk@linaro.org>
---
 drivers/net/ethernet/ti/cpsw.c | 82 ++++++++++++++++++++++++++++++++++
 1 file changed, 82 insertions(+)

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 406537d74ec1..edd14def98df 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -39,6 +39,7 @@
 #include <linux/sys_soc.h>
 
 #include <linux/pinctrl/consumer.h>
+#include <net/pkt_cls.h>
 
 #include "cpsw.h"
 #include "cpsw_ale.h"
@@ -153,6 +154,8 @@ do {								\
 #define IRQ_NUM			2
 #define CPSW_MAX_QUEUES		8
 #define CPSW_CPDMA_DESCS_POOL_SIZE_DEFAULT 256
+#define CPSW_TC_NUM			4
+#define CPSW_FIFO_SHAPERS_NUM		(CPSW_TC_NUM - 1)
 
 #define CPSW_RX_VLAN_ENCAP_HDR_PRIO_SHIFT	29
 #define CPSW_RX_VLAN_ENCAP_HDR_PRIO_MSK		GENMASK(2, 0)
@@ -453,6 +456,7 @@ struct cpsw_priv {
 	u8				mac_addr[ETH_ALEN];
 	bool				rx_pause;
 	bool				tx_pause;
+	bool				mqprio_hw;
 	u32 emac_port;
 	struct cpsw_common *cpsw;
 };
@@ -1577,6 +1581,14 @@ static void cpsw_slave_stop(struct cpsw_slave *slave, struct cpsw_common *cpsw)
 	soft_reset_slave(slave);
 }
 
+static int cpsw_tc_to_fifo(int tc, int num_tc)
+{
+	if (tc == num_tc - 1)
+		return 0;
+
+	return CPSW_FIFO_SHAPERS_NUM - tc;
+}
+
 static int cpsw_ndo_open(struct net_device *ndev)
 {
 	struct cpsw_priv *priv = netdev_priv(ndev);
@@ -2190,6 +2202,75 @@ static int cpsw_ndo_set_tx_maxrate(struct net_device *ndev, int queue, u32 rate)
 	return ret;
 }
 
+static int cpsw_set_mqprio(struct net_device *ndev, void *type_data)
+{
+	struct tc_mqprio_qopt_offload *mqprio = type_data;
+	struct cpsw_priv *priv = netdev_priv(ndev);
+	struct cpsw_common *cpsw = priv->cpsw;
+	int fifo, num_tc, count, offset;
+	struct cpsw_slave *slave;
+	u32 tx_prio_map = 0;
+	int i, tc, ret;
+
+	num_tc = mqprio->qopt.num_tc;
+	if (num_tc > CPSW_TC_NUM)
+		return -EINVAL;
+
+	if (mqprio->mode != TC_MQPRIO_MODE_DCB)
+		return -EINVAL;
+
+	ret = pm_runtime_get_sync(cpsw->dev);
+	if (ret < 0) {
+		pm_runtime_put_noidle(cpsw->dev);
+		return ret;
+	}
+
+	if (num_tc) {
+		for (i = 0; i < 8; i++) {
+			tc = mqprio->qopt.prio_tc_map[i];
+			fifo = cpsw_tc_to_fifo(tc, num_tc);
+			tx_prio_map |= fifo << (4 * i);
+		}
+
+		netdev_set_num_tc(ndev, num_tc);
+		for (i = 0; i < num_tc; i++) {
+			count = mqprio->qopt.count[i];
+			offset = mqprio->qopt.offset[i];
+			netdev_set_tc_queue(ndev, i, count, offset);
+		}
+	}
+
+	if (!mqprio->qopt.hw) {
+		/* restore default configuration */
+		netdev_reset_tc(ndev);
+		tx_prio_map = TX_PRIORITY_MAPPING;
+	}
+
+	priv->mqprio_hw = mqprio->qopt.hw;
+
+	offset = cpsw->version == CPSW_VERSION_1 ?
+		 CPSW1_TX_PRI_MAP : CPSW2_TX_PRI_MAP;
+
+	slave = &cpsw->slaves[cpsw_slave_index(cpsw, priv)];
+	slave_write(slave, tx_prio_map, offset);
+
+	pm_runtime_put_sync(cpsw->dev);
+
+	return 0;
+}
+
+static int cpsw_ndo_setup_tc(struct net_device *ndev, enum tc_setup_type type,
+			     void *type_data)
+{
+	switch (type) {
+	case TC_SETUP_QDISC_MQPRIO:
+		return cpsw_set_mqprio(ndev, type_data);
+
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
 static const struct net_device_ops cpsw_netdev_ops = {
 	.ndo_open		= cpsw_ndo_open,
 	.ndo_stop		= cpsw_ndo_stop,
@@ -2205,6 +2286,7 @@ static const struct net_device_ops cpsw_netdev_ops = {
 #endif
 	.ndo_vlan_rx_add_vid	= cpsw_ndo_vlan_rx_add_vid,
 	.ndo_vlan_rx_kill_vid	= cpsw_ndo_vlan_rx_kill_vid,
+	.ndo_setup_tc           = cpsw_ndo_setup_tc,
 };
 
 static int cpsw_get_regs_len(struct net_device *ndev)
-- 
2.17.1

--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [PATCH v2 net-next 2/6] net: ethernet: ti: cpdma: fit rated channels in backward order
From: Ivan Khoronzhuk @ 2018-06-14  7:36 UTC (permalink / raw)
  To: grygorii.strashko, davem
  Cc: corbet, akpm, netdev, linux-doc, linux-kernel, linux-omap,
	vinicius.gomes, henrik, jesus.sanchez-palencia, ilias.apalodimas,
	p-varis, spatton, francois.ozog, yogeshs, nsekhar, andrew,
	Ivan Khoronzhuk
In-Reply-To: <20180614073650.29659-1-ivan.khoronzhuk@linaro.org>

According to TRM tx rated channels should be in 7..0 order,
so correct it.

Signed-off-by: Ivan Khoronzhuk <ivan.khoronzhuk@linaro.org>
---
 drivers/net/ethernet/ti/davinci_cpdma.c | 31 ++++++++++++-------------
 1 file changed, 15 insertions(+), 16 deletions(-)

diff --git a/drivers/net/ethernet/ti/davinci_cpdma.c b/drivers/net/ethernet/ti/davinci_cpdma.c
index cdbddf16dd29..19bb63902997 100644
--- a/drivers/net/ethernet/ti/davinci_cpdma.c
+++ b/drivers/net/ethernet/ti/davinci_cpdma.c
@@ -406,37 +406,36 @@ static int cpdma_chan_fit_rate(struct cpdma_chan *ch, u32 rate,
 	struct cpdma_chan *chan;
 	u32 old_rate = ch->rate;
 	u32 new_rmask = 0;
-	int rlim = 1;
+	int rlim = 0;
 	int i;
 
-	*prio_mode = 0;
 	for (i = tx_chan_num(0); i < tx_chan_num(CPDMA_MAX_CHANNELS); i++) {
 		chan = ctlr->channels[i];
-		if (!chan) {
-			rlim = 0;
+		if (!chan)
 			continue;
-		}
 
 		if (chan == ch)
 			chan->rate = rate;
 
 		if (chan->rate) {
-			if (rlim) {
-				new_rmask |= chan->mask;
-			} else {
-				ch->rate = old_rate;
-				dev_err(ctlr->dev, "Prev channel of %dch is not rate limited\n",
-					chan->chan_num);
-				return -EINVAL;
-			}
-		} else {
-			*prio_mode = 1;
-			rlim = 0;
+			rlim = 1;
+			new_rmask |= chan->mask;
+			continue;
 		}
+
+		if (rlim)
+			goto err;
 	}
 
 	*rmask = new_rmask;
+	*prio_mode = rlim;
 	return 0;
+
+err:
+	ch->rate = old_rate;
+	dev_err(ctlr->dev, "Upper cpdma ch%d is not rate limited\n",
+		chan->chan_num);
+	return -EINVAL;
 }
 
 static u32 cpdma_chan_set_factors(struct cpdma_ctlr *ctlr,
-- 
2.17.1

--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [PATCH v2 net-next 0/6] net: ethernet: ti: cpsw: add MQPRIO and CBS Qdisc offload
From: Ivan Khoronzhuk @ 2018-06-14  7:36 UTC (permalink / raw)
  To: grygorii.strashko, davem
  Cc: corbet, akpm, netdev, linux-doc, linux-kernel, linux-omap,
	vinicius.gomes, henrik, jesus.sanchez-palencia, ilias.apalodimas,
	p-varis, spatton, francois.ozog, yogeshs, nsekhar, andrew,
	Ivan Khoronzhuk

This series adds MQPRIO and CBS Qdisc offload for TI cpsw driver.
It potentially can be used in audio video bridging (AVB) and time
sensitive networking (TSN).

Patchset was tested on AM572x EVM and BBB boards. Last patch from this
series adds detailed description of configuration with examples. For
consistency reasons, in role of talker and listener, tools from
patchset "TSN: Add qdisc based config interface for CBS" were used and
can be seen here: https://www.spinics.net/lists/netdev/msg460869.html

Based on net-next/master

v2..v1:
 - changed name cpsw.txt on ti-cpsw.txt
 - changed name cpsw_set_tc() on cpsw_set_mqprio()

Ivan Khoronzhuk (6):
  net: ethernet: ti: cpsw: use cpdma channels in backward order for txq
  net: ethernet: ti: cpdma: fit rated channels in backward order
  net: ethernet: ti: cpsw: add MQPRIO Qdisc offload
  net: ethernet: ti: cpsw: add CBS Qdisc offload
  net: ethernet: ti: cpsw: restore shaper configuration while down/up
  Documentation: networking: cpsw: add MQPRIO & CBS offload examples

 Documentation/networking/ti-cpsw.txt    | 540 ++++++++++++++++++++++++
 drivers/net/ethernet/ti/cpsw.c          | 364 +++++++++++++++-
 drivers/net/ethernet/ti/davinci_cpdma.c |  31 +-
 3 files changed, 913 insertions(+), 22 deletions(-)
 create mode 100644 Documentation/networking/ti-cpsw.txt

-- 
2.17.1

--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* [PATCH v2 net-next 1/6] net: ethernet: ti: cpsw: use cpdma channels in backward order for txq
From: Ivan Khoronzhuk @ 2018-06-14  7:36 UTC (permalink / raw)
  To: grygorii.strashko, davem
  Cc: corbet, akpm, netdev, linux-doc, linux-kernel, linux-omap,
	vinicius.gomes, henrik, jesus.sanchez-palencia, ilias.apalodimas,
	p-varis, spatton, francois.ozog, yogeshs, nsekhar, andrew,
	Ivan Khoronzhuk
In-Reply-To: <20180614073650.29659-1-ivan.khoronzhuk@linaro.org>

The cpdma channel highest priority is from hi to lo number.
The driver has limited number of descriptors that are shared between
number of cpdma channels. Number of queues can be tuned with ethtool,
that allows to not spend descriptors on not needed cpdma channels.
In AVB usually only 2 tx queues can be enough with rate limitation.
The rate limitation can be used only for hi priority queues. Thus, to
use only 2 queues the 8 has to be created. It's wasteful.

So, in order to allow using only needed number of rate limited
tx queues, save resources, and be able to set rate limitation for
them, let assign tx cpdma channels in backward order to queues.

Signed-off-by: Ivan Khoronzhuk <ivan.khoronzhuk@linaro.org>
---
 drivers/net/ethernet/ti/cpsw.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 534596ce00d3..406537d74ec1 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -967,8 +967,8 @@ static int cpsw_tx_mq_poll(struct napi_struct *napi_tx, int budget)
 
 	/* process every unprocessed channel */
 	ch_map = cpdma_ctrl_txchs_state(cpsw->dma);
-	for (ch = 0, num_tx = 0; ch_map; ch_map >>= 1, ch++) {
-		if (!(ch_map & 0x01))
+	for (ch = 0, num_tx = 0; ch_map & 0xff; ch_map <<= 1, ch++) {
+		if (!(ch_map & 0x80))
 			continue;
 
 		txv = &cpsw->txv[ch];
@@ -2431,7 +2431,7 @@ static int cpsw_update_channels_res(struct cpsw_priv *priv, int ch_num, int rx)
 	void (*handler)(void *, int, int);
 	struct netdev_queue *queue;
 	struct cpsw_vector *vec;
-	int ret, *ch;
+	int ret, *ch, vch;
 
 	if (rx) {
 		ch = &cpsw->rx_ch_num;
@@ -2444,7 +2444,8 @@ static int cpsw_update_channels_res(struct cpsw_priv *priv, int ch_num, int rx)
 	}
 
 	while (*ch < ch_num) {
-		vec[*ch].ch = cpdma_chan_create(cpsw->dma, *ch, handler, rx);
+		vch = rx ? *ch : 7 - *ch;
+		vec[*ch].ch = cpdma_chan_create(cpsw->dma, vch, handler, rx);
 		queue = netdev_get_tx_queue(priv->ndev, *ch);
 		queue->tx_maxrate = 0;
 
@@ -2980,7 +2981,7 @@ static int cpsw_probe(struct platform_device *pdev)
 	u32 slave_offset, sliver_offset, slave_size;
 	const struct soc_device_attribute *soc;
 	struct cpsw_common		*cpsw;
-	int ret = 0, i;
+	int ret = 0, i, ch;
 	int irq;
 
 	cpsw = devm_kzalloc(&pdev->dev, sizeof(struct cpsw_common), GFP_KERNEL);
@@ -3155,7 +3156,8 @@ static int cpsw_probe(struct platform_device *pdev)
 	if (soc)
 		cpsw->quirk_irq = 1;
 
-	cpsw->txv[0].ch = cpdma_chan_create(cpsw->dma, 0, cpsw_tx_handler, 0);
+	ch = cpsw->quirk_irq ? 0 : 7;
+	cpsw->txv[0].ch = cpdma_chan_create(cpsw->dma, ch, cpsw_tx_handler, 0);
 	if (IS_ERR(cpsw->txv[0].ch)) {
 		dev_err(priv->dev, "error initializing tx dma channel\n");
 		ret = PTR_ERR(cpsw->txv[0].ch);
-- 
2.17.1

--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* Re: [PATCH v2 net-next 4/6] net: ethernet: ti: cpsw: add CBS Qdisc offload
From: Ilias Apalodimas @ 2018-06-14  8:09 UTC (permalink / raw)
  To: Ivan Khoronzhuk
  Cc: grygorii.strashko, davem, corbet, akpm, netdev, linux-doc,
	linux-kernel, linux-omap, vinicius.gomes, henrik,
	jesus.sanchez-palencia, p-varis, spatton, francois.ozog, yogeshs,
	nsekhar, andrew
In-Reply-To: <20180614073650.29659-5-ivan.khoronzhuk@linaro.org>

On Thu, Jun 14, 2018 at 10:36:48AM +0300, Ivan Khoronzhuk wrote:
> The cpsw has up to 4 FIFOs per port and upper 3 FIFOs can feed rate
> limited queue with shaping. In order to set and enable shaping for
> those 3 FIFOs queues the network device with CBS qdisc attached is
> needed. The CBS configuration is added for dual-emac/single port mode
> only, but potentially can be used in switch mode also, based on
> switchdev for instance.
> 
> Despite the FIFO shapers can work w/o cpdma level shapers the base
> usage must be in combine with cpdma level shapers as described in TRM,
> that are set as maximum rates for interface queues with sysfs.
> 
> One of the possible configuration with txq shapers and CBS shapers:
> 
>                       Configured with echo RATE >
>                   /sys/class/net/eth0/queues/tx-0/tx_maxrate
>              /---------------------------------------------------
>             /
>            /            cpdma level shapers
>         +----+ +----+ +----+ +----+ +----+ +----+ +----+ +----+
>         | c7 | | c6 | | c5 | | c4 | | c3 | | c2 | | c1 | | c0 |
>         \    / \    / \    / \    / \    / \    / \    / \    /
>          \  /   \  /   \  /   \  /   \  /   \  /   \  /   \  /
>           \/     \/     \/     \/     \/     \/     \/     \/
> +---------|------|------|------|-------------------------------------+
> |    +----+      |      |  +---+                                     |
> |    |      +----+      |  |                                         |
> |    v      v           v  v                                         |
> | +----+ +----+ +----+ +----+ p        p+----+ +----+ +----+ +----+  |
> | |    | |    | |    | |    | o        o|    | |    | |    | |    |  |
> | | f3 | | f2 | | f1 | | f0 | r  CPSW  r| f3 | | f2 | | f1 | | f0 |  |
> | |    | |    | |    | |    | t        t|    | |    | |    | |    |  |
> | \    / \    / \    / \    / 0        1\    / \    / \    / \    /  |
> |  \  X   \  /   \  /   \  /             \  /   \  /   \  /   \  /   |
> |   \/ \   \/     \/     \/               \/     \/     \/     \/    |
> +-------\------------------------------------------------------------+
>          \
>           \ FIFO shaper, set with CBS offload added in this patch,
>            \ FIFO0 cannot be rate limited
>             ------------------------------------------------------
> 
> CBS shaper configuration is supposed to be used with root MQPRIO Qdisc
> offload allowing to add sk_prio->tc->txq maps that direct traffic to
> appropriate tx queue and maps L2 priority to FIFO shaper.
> 
> The CBS shaper is intended to be used for AVB where L2 priority
> (pcp field) is used to differentiate class of traffic. So additionally
> vlan needs to be created with appropriate egress sk_prio->l2 prio map.
> 
> If CBS has several tx queues assigned to it, the sum of their
> bandwidth has not overlap bandwidth set for CBS. It's recomended the
> CBS bandwidth to be a little bit more.
> 
> The CBS shaper is configured with CBS qdisc offload interface using tc
> tool from iproute2 packet.
> 
> For instance:
> 
> $ tc qdisc replace dev eth0 handle 100: parent root mqprio num_tc 3 \
> map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 queues 1@0 1@1 2@2 hw 1
> 
> $ tc -g class show dev eth0
> +---(100:ffe2) mqprio
> |    +---(100:3) mqprio
> |    +---(100:4) mqprio
> |    
> +---(100:ffe1) mqprio
> |    +---(100:2) mqprio
> |    
> +---(100:ffe0) mqprio
>      +---(100:1) mqprio
> 
> $ tc qdisc add dev eth0 parent 100:1 cbs locredit -1440 \
> hicredit 60 sendslope -960000 idleslope 40000 offload 1
> 
> $ tc qdisc add dev eth0 parent 100:2 cbs locredit -1470 \
> hicredit 62 sendslope -980000 idleslope 20000 offload 1
> 
> The above code set CBS shapers for tc0 and tc1, for that txq0 and
> txq1 is used. Pay attention, the real set bandwidth can differ a bit
> due to discreteness of configuration parameters.
> 
> Here parameters like locredit, hicredit and sendslope are ignored
> internally and are supposed to be set with assumption that maximum
> frame size for frame - 1500.
> 
> It's supposed that interface speed is not changed while reconnection,
> not always is true, so inform user in case speed of interface was
> changed, as it can impact on dependent shapers configuration.
> 
> For more examples see Documentation.
> 
> Signed-off-by: Ivan Khoronzhuk <ivan.khoronzhuk@linaro.org>
> ---
>  drivers/net/ethernet/ti/cpsw.c | 221 +++++++++++++++++++++++++++++++++
>  1 file changed, 221 insertions(+)
> 
> diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
> index edd14def98df..3623c2994ddf 100644
> --- a/drivers/net/ethernet/ti/cpsw.c
> +++ b/drivers/net/ethernet/ti/cpsw.c
> @@ -46,6 +46,8 @@
>  #include "cpts.h"
>  #include "davinci_cpdma.h"
>  
> +#include <net/pkt_sched.h>
> +
>  #define CPSW_DEBUG	(NETIF_MSG_HW		| NETIF_MSG_WOL		| \
>  			 NETIF_MSG_DRV		| NETIF_MSG_LINK	| \
>  			 NETIF_MSG_IFUP		| NETIF_MSG_INTR	| \
> @@ -154,8 +156,12 @@ do {								\
>  #define IRQ_NUM			2
>  #define CPSW_MAX_QUEUES		8
>  #define CPSW_CPDMA_DESCS_POOL_SIZE_DEFAULT 256
> +#define CPSW_FIFO_QUEUE_TYPE_SHIFT	16
> +#define CPSW_FIFO_SHAPE_EN_SHIFT	16
> +#define CPSW_FIFO_RATE_EN_SHIFT		20
>  #define CPSW_TC_NUM			4
>  #define CPSW_FIFO_SHAPERS_NUM		(CPSW_TC_NUM - 1)
> +#define CPSW_PCT_MASK			0x7f
>  
>  #define CPSW_RX_VLAN_ENCAP_HDR_PRIO_SHIFT	29
>  #define CPSW_RX_VLAN_ENCAP_HDR_PRIO_MSK		GENMASK(2, 0)
> @@ -457,6 +463,8 @@ struct cpsw_priv {
>  	bool				rx_pause;
>  	bool				tx_pause;
>  	bool				mqprio_hw;
> +	int				fifo_bw[CPSW_TC_NUM];
> +	int				shp_cfg_speed;
>  	u32 emac_port;
>  	struct cpsw_common *cpsw;
>  };
> @@ -1081,6 +1089,38 @@ static void cpsw_set_slave_mac(struct cpsw_slave *slave,
>  	slave_write(slave, mac_lo(priv->mac_addr), SA_LO);
>  }
>  
> +static bool cpsw_shp_is_off(struct cpsw_priv *priv)
> +{
> +	struct cpsw_common *cpsw = priv->cpsw;
> +	struct cpsw_slave *slave;
> +	u32 shift, mask, val;
> +
> +	val = readl_relaxed(&cpsw->regs->ptype);
> +
> +	slave = &cpsw->slaves[cpsw_slave_index(cpsw, priv)];
> +	shift = CPSW_FIFO_SHAPE_EN_SHIFT + 3 * slave->slave_num;
> +	mask = 7 << shift;
> +	val = val & mask;
> +
> +	return !val;
> +}
> +
> +static void cpsw_fifo_shp_on(struct cpsw_priv *priv, int fifo, int on)
> +{
> +	struct cpsw_common *cpsw = priv->cpsw;
> +	struct cpsw_slave *slave;
> +	u32 shift, mask, val;
> +
> +	val = readl_relaxed(&cpsw->regs->ptype);
> +
> +	slave = &cpsw->slaves[cpsw_slave_index(cpsw, priv)];
> +	shift = CPSW_FIFO_SHAPE_EN_SHIFT + 3 * slave->slave_num;
> +	mask = (1 << --fifo) << shift;
> +	val = on ? val | mask : val & ~mask;
> +
> +	writel_relaxed(val, &cpsw->regs->ptype);
> +}
> +
>  static void _cpsw_adjust_link(struct cpsw_slave *slave,
>  			      struct cpsw_priv *priv, bool *link)
>  {
> @@ -1120,6 +1160,12 @@ static void _cpsw_adjust_link(struct cpsw_slave *slave,
>  			mac_control |= BIT(4);
>  
>  		*link = true;
> +
> +		if (priv->shp_cfg_speed &&
> +		    priv->shp_cfg_speed != slave->phy->speed &&
> +		    !cpsw_shp_is_off(priv))
> +			dev_warn(priv->dev,
> +				 "Speed was changed, CBS sahper speeds are changed!");
typo here, should be shaper
>  	} else {
>  		mac_control = 0;
>  		/* disable forwarding */
> @@ -1589,6 +1635,178 @@ static int cpsw_tc_to_fifo(int tc, int num_tc)
>  	return CPSW_FIFO_SHAPERS_NUM - tc;
>  }
>  
> +static int cpsw_set_fifo_bw(struct cpsw_priv *priv, int fifo, int bw)
> +{
> +	struct cpsw_common *cpsw = priv->cpsw;
> +	u32 val = 0, send_pct, shift;
> +	struct cpsw_slave *slave;
> +	int pct = 0, i;
> +
> +	if (bw > priv->shp_cfg_speed * 1000)
> +		goto err;
> +
> +	/* shaping has to stay enabled for highest fifos linearly
> +	 * and fifo bw no more then interface can allow
> +	 */
> +	slave = &cpsw->slaves[cpsw_slave_index(cpsw, priv)];
> +	send_pct = slave_read(slave, SEND_PERCENT);
> +	for (i = CPSW_FIFO_SHAPERS_NUM; i > 0; i--) {
> +		if (!bw) {
> +			if (i >= fifo || !priv->fifo_bw[i])
> +				continue;
> +
> +			dev_warn(priv->dev, "Prev FIFO%d is shaped", i);
> +			continue;
> +		}
> +
> +		if (!priv->fifo_bw[i] && i > fifo) {
> +			dev_err(priv->dev, "Upper FIFO%d is not shaped", i);
> +			return -EINVAL;
> +		}
> +
> +		shift = (i - 1) * 8;
> +		if (i == fifo) {
> +			send_pct &= ~(CPSW_PCT_MASK << shift);
> +			val = DIV_ROUND_UP(bw, priv->shp_cfg_speed * 10);
> +			if (!val)
> +				val = 1;
> +
> +			send_pct |= val << shift;
> +			pct += val;
> +			continue;
> +		}
> +
> +		if (priv->fifo_bw[i])
> +			pct += (send_pct >> shift) & CPSW_PCT_MASK;
> +	}
> +
> +	if (pct >= 100)
> +		goto err;
> +
> +	slave_write(slave, send_pct, SEND_PERCENT);
> +	priv->fifo_bw[fifo] = bw;
> +
> +	dev_warn(priv->dev, "set FIFO%d bw = %d\n", fifo,
> +		 DIV_ROUND_CLOSEST(val * priv->shp_cfg_speed, 100));
> +
> +	return 0;
> +err:
> +	dev_err(priv->dev, "Bandwidth doesn't fit in tc configuration");
> +	return -EINVAL;
> +}
> +
> +static int cpsw_set_fifo_rlimit(struct cpsw_priv *priv, int fifo, int bw)
> +{
> +	struct cpsw_common *cpsw = priv->cpsw;
> +	struct cpsw_slave *slave;
> +	u32 tx_in_ctl_rg, val;
> +	int ret;
> +
> +	ret = cpsw_set_fifo_bw(priv, fifo, bw);
> +	if (ret)
> +		return ret;
> +
> +	slave = &cpsw->slaves[cpsw_slave_index(cpsw, priv)];
> +	tx_in_ctl_rg = cpsw->version == CPSW_VERSION_1 ?
> +		       CPSW1_TX_IN_CTL : CPSW2_TX_IN_CTL;
> +
> +	if (!bw)
> +		cpsw_fifo_shp_on(priv, fifo, bw);
> +
> +	val = slave_read(slave, tx_in_ctl_rg);
> +	if (cpsw_shp_is_off(priv)) {
> +		/* disable FIFOs rate limited queues */
> +		val &= ~(0xf << CPSW_FIFO_RATE_EN_SHIFT);
> +
> +		/* set type of FIFO queues to normal priority mode */
> +		val &= ~(3 << CPSW_FIFO_QUEUE_TYPE_SHIFT);
> +
> +		/* set type of FIFO queues to be rate limited */
> +		if (bw)
> +			val |= 2 << CPSW_FIFO_QUEUE_TYPE_SHIFT;
> +		else
> +			priv->shp_cfg_speed = 0;
> +	}
> +
> +	/* toggle a FIFO rate limited queue */
> +	if (bw)
> +		val |= BIT(fifo + CPSW_FIFO_RATE_EN_SHIFT);
> +	else
> +		val &= ~BIT(fifo + CPSW_FIFO_RATE_EN_SHIFT);
> +	slave_write(slave, val, tx_in_ctl_rg);
> +
> +	/* FIFO transmit shape enable */
> +	cpsw_fifo_shp_on(priv, fifo, bw);
> +	return 0;
> +}
> +
> +/* Defaults:
> + * class A - prio 3
> + * class B - prio 2
> + * shaping for class A should be set first
> + */
> +static int cpsw_set_cbs(struct net_device *ndev,
> +			struct tc_cbs_qopt_offload *qopt)
> +{
> +	struct cpsw_priv *priv = netdev_priv(ndev);
> +	struct cpsw_common *cpsw = priv->cpsw;
> +	struct cpsw_slave *slave;
> +	int prev_speed = 0;
> +	int tc, ret, fifo;
> +	u32 bw = 0;
> +
> +	tc = netdev_txq_to_tc(priv->ndev, qopt->queue);
> +
> +	/* enable channels in backward order, as highest FIFOs must be rate
> +	 * limited first and for compliance with CPDMA rate limited channels
> +	 * that also used in bacward order. FIFO0 cannot be rate limited.
> +	 */
> +	fifo = cpsw_tc_to_fifo(tc, ndev->num_tc);
> +	if (!fifo) {
> +		dev_err(priv->dev, "Last tc%d can't be rate limited", tc);
> +		return -EINVAL;
> +	}
> +
> +	/* do nothing, it's disabled anyway */
> +	if (!qopt->enable && !priv->fifo_bw[fifo])
> +		return 0;
> +
> +	/* shapers can be set if link speed is known */
> +	slave = &cpsw->slaves[cpsw_slave_index(cpsw, priv)];
> +	if (slave->phy && slave->phy->link) {
> +		if (priv->shp_cfg_speed &&
> +		    priv->shp_cfg_speed != slave->phy->speed)
> +			prev_speed = priv->shp_cfg_speed;
> +
> +		priv->shp_cfg_speed = slave->phy->speed;
> +	}
> +
> +	if (!priv->shp_cfg_speed) {
> +		dev_err(priv->dev, "Link speed is not known");
> +		return -1;
> +	}
> +
> +	ret = pm_runtime_get_sync(cpsw->dev);
> +	if (ret < 0) {
> +		pm_runtime_put_noidle(cpsw->dev);
> +		return ret;
> +	}
> +
> +	bw = qopt->enable ? qopt->idleslope : 0;
> +	ret = cpsw_set_fifo_rlimit(priv, fifo, bw);
> +	if (ret) {
> +		priv->shp_cfg_speed = prev_speed;
> +		prev_speed = 0;
> +	}
> +
> +	if (bw && prev_speed)
> +		dev_warn(priv->dev,
> +			 "Speed was changed, CBS sahper speeds are changed!");
same c/p typo
> +
> +	pm_runtime_put_sync(cpsw->dev);
> +	return ret;
> +}
> +
>  static int cpsw_ndo_open(struct net_device *ndev)
>  {
>  	struct cpsw_priv *priv = netdev_priv(ndev);
> @@ -2263,6 +2481,9 @@ static int cpsw_ndo_setup_tc(struct net_device *ndev, enum tc_setup_type type,
>  			     void *type_data)
>  {
>  	switch (type) {
> +	case TC_SETUP_QDISC_CBS:
> +		return cpsw_set_cbs(ndev, type_data);
> +
>  	case TC_SETUP_QDISC_MQPRIO:
>  		return cpsw_set_mqprio(ndev, type_data);
>  
> -- 
> 2.17.1
> 
Other than that looks good 
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH v4 01/10] i3c: Add core I3C infrastructure
From: Wolfram Sang @ 2018-06-14  8:15 UTC (permalink / raw)
  To: Boris Brezillon
  Cc: linux-i2c, Jonathan Corbet, linux-doc, Greg Kroah-Hartman,
	Arnd Bergmann, Przemyslaw Sroka, Arkadiusz Golec, Alan Douglas,
	Bartosz Folta, Damian Kos, Alicja Jurasik-Urbaniak,
	Cyprian Wronka, Suresh Punnoose, Rafal Ciepiela, Thomas Petazzoni,
	Nishanth Menon, Rob Herring, Pawel Moll, Mark Rutland,
	Ian Campbell, Kumar Gala, devicetree, linux-kernel, Vitor Soares,
	Geert Uytterhoeven, Linus Walleij, Xiang Lin, linux-gpio
In-Reply-To: <20180614090758.17180594@bbrezillon>

[-- Attachment #1: Type: text/plain, Size: 913 bytes --]


> > I don't know if typical I3C transfers will be similar to I2C with
> > usually small payloads where DMA usually makes not much sense. Yet, I
> > think, that it might be a good idea to think about how this shall be
> > handled with I3C right away. Maybe simply enforcing buffers to be
> > DMA-safe. Or whatever.
> > 
> > A clear rule on that might save you hazzle later.
> 
> I completely agree. I'll clarify that and for people to pass DMA-able
> buffers to this struct (just as the SPI framework does). Note that we

Ok, cool!

> don't really have a way to ensure that the buffer is actually
> DMA-safe from the core, because this notion is architecture/SoC
> dependent.

True, we can't ensure it at the core. Still, a documented rule will make
it clear that everything else is considered a bug. Much better than a
gray area you have to deal with later.

Kind regards,

   Wolfram


[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply

* Re: [PATCH v2 1/2] locking: Implement an algorithm choice for Wound-Wait mutexes
From: Andrea Parri @ 2018-06-14 10:38 UTC (permalink / raw)
  To: Thomas Hellstrom
  Cc: dri-devel, linux-kernel, Peter Zijlstra, Ingo Molnar,
	Jonathan Corbet, Gustavo Padovan, Maarten Lankhorst, Sean Paul,
	David Airlie, Davidlohr Bueso, Paul E. McKenney, Josh Triplett,
	Thomas Gleixner, Kate Stewart, Philippe Ombredanne,
	Greg Kroah-Hartman, linux-doc, linux-media, linaro-mm-sig
In-Reply-To: <20180614072922.8114-2-thellstrom@vmware.com>

Hi Thomas,

On Thu, Jun 14, 2018 at 09:29:21AM +0200, Thomas Hellstrom wrote:
> The current Wound-Wait mutex algorithm is actually not Wound-Wait but
> Wait-Die. Implement also Wound-Wait as a per-ww-class choice. Wound-Wait
> is, contrary to Wait-Die a preemptive algorithm and is known to generate
> fewer backoffs. Testing reveals that this is true if the
> number of simultaneous contending transactions is small.
> As the number of simultaneous contending threads increases, Wait-Wound
> becomes inferior to Wait-Die in terms of elapsed time.
> Possibly due to the larger number of held locks of sleeping transactions.
> 
> Update documentation and callers.
> 
> Timings using git://people.freedesktop.org/~thomash/ww_mutex_test
> tag patch-18-06-14
> 
> Each thread runs 100000 batches of lock / unlock 800 ww mutexes randomly
> chosen out of 100000. Four core Intel x86_64:
> 
> Algorithm    #threads       Rollbacks  time
> Wound-Wait   4              ~100       ~17s.
> Wait-Die     4              ~150000    ~19s.
> Wound-Wait   16             ~360000    ~109s.
> Wait-Die     16             ~450000    ~82s.
> 
> Cc: Peter Zijlstra <peterz@infradead.org>
> Cc: Ingo Molnar <mingo@redhat.com>
> Cc: Jonathan Corbet <corbet@lwn.net>
> Cc: Gustavo Padovan <gustavo@padovan.org>
> Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> Cc: Sean Paul <seanpaul@chromium.org>
> Cc: David Airlie <airlied@linux.ie>
> Cc: Davidlohr Bueso <dave@stgolabs.net>
> Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
> Cc: Josh Triplett <josh@joshtriplett.org>
> Cc: Thomas Gleixner <tglx@linutronix.de>
> Cc: Kate Stewart <kstewart@linuxfoundation.org>
> Cc: Philippe Ombredanne <pombredanne@nexb.com>
> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
> Cc: linux-doc@vger.kernel.org
> Cc: linux-media@vger.kernel.org
> Cc: linaro-mm-sig@lists.linaro.org
> Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
> 
> ---
> v2:
> * Update API according to review comment by Greg Kroah-Hartman.
> * Address review comments by Peter Zijlstra:
>   - Avoid _Bool in composites
>   - Fix typo
>   - Use __mutex_owner() where applicable
>   - Rely on built-in barriers for the main loop exit condition,
>     struct ww_acquire_ctx::wounded. Update code comments.
>   - Explain unlocked use of list_empty().
> ---
>  Documentation/locking/ww-mutex-design.txt |  54 ++++++++++++----
>  drivers/dma-buf/reservation.c             |   2 +-
>  drivers/gpu/drm/drm_modeset_lock.c        |   2 +-
>  include/linux/ww_mutex.h                  |  19 ++++--
>  kernel/locking/locktorture.c              |   2 +-
>  kernel/locking/mutex.c                    | 103 +++++++++++++++++++++++++++---
>  kernel/locking/test-ww_mutex.c            |   2 +-
>  lib/locking-selftest.c                    |   2 +-
>  8 files changed, 156 insertions(+), 30 deletions(-)
> 
> diff --git a/Documentation/locking/ww-mutex-design.txt b/Documentation/locking/ww-mutex-design.txt
> index 34c3a1b50b9a..b9597def9581 100644
> --- a/Documentation/locking/ww-mutex-design.txt
> +++ b/Documentation/locking/ww-mutex-design.txt
> @@ -1,4 +1,4 @@
> -Wait/Wound Deadlock-Proof Mutex Design
> +Wound/Wait Deadlock-Proof Mutex Design
>  ======================================
>  
>  Please read mutex-design.txt first, as it applies to wait/wound mutexes too.
> @@ -32,10 +32,23 @@ the oldest task) wins, and the one with the higher reservation id (i.e. the
>  younger task) unlocks all of the buffers that it has already locked, and then
>  tries again.
>  
> -In the RDBMS literature this deadlock handling approach is called wait/wound:
> -The older tasks waits until it can acquire the contended lock. The younger tasks
> -needs to back off and drop all the locks it is currently holding, i.e. the
> -younger task is wounded.
> +In the RDBMS literature, a reservation ticket is associated with a transaction.
> +and the deadlock handling approach is called Wait-Die. The name is based on
> +the actions of a locking thread when it encounters an already locked mutex.
> +If the transaction holding the lock is younger, the locking transaction waits.
> +If the transaction holding the lock is older, the locking transaction backs off
> +and dies. Hence Wait-Die.
> +There is also another algorithm called Wound-Wait:
> +If the transaction holding the lock is younger, the locking transaction
> +preempts the transaction holding the lock, requiring it to back off. It
> +Wounds the other transaction.
> +If the transaction holding the lock is older, it waits for the other
> +transaction. Hence Wound-Wait.
> +The two algorithms are both fair in that a transaction will eventually succeed.
> +However, the Wound-Wait algorithm is typically stated to generate fewer backoffs
> +compared to Wait-Die, but is, on the other hand, associated with more work than
> +Wait-Die when recovering from a backoff. Wound-Wait is also a preemptive
> +algorithm which requires a reliable way to preempt another transaction.
>  
>  Concepts
>  --------
> @@ -47,10 +60,12 @@ Acquire context: To ensure eventual forward progress it is important the a task
>  trying to acquire locks doesn't grab a new reservation id, but keeps the one it
>  acquired when starting the lock acquisition. This ticket is stored in the
>  acquire context. Furthermore the acquire context keeps track of debugging state
> -to catch w/w mutex interface abuse.
> +to catch w/w mutex interface abuse. An acquire context is representing a
> +transaction.
>  
>  W/w class: In contrast to normal mutexes the lock class needs to be explicit for
> -w/w mutexes, since it is required to initialize the acquire context.
> +w/w mutexes, since it is required to initialize the acquire context. The lock
> +class also specifies what algorithm to use, Wound-Wait or Wait-Die.
>  
>  Furthermore there are three different class of w/w lock acquire functions:
>  
> @@ -90,6 +105,12 @@ provided.
>  Usage
>  -----
>  
> +The algorithm (Wait-Die vs Wound-Wait) is chosen by using either
> +DEFINE_WW_CLASS_WDIE() for Wait-Die or DEFINE_WW_CLASS() for Wound-Wait.
> +As a rough rule of thumb, use Wound-Wait iff you typically expect the number
> +of simultaneous competing transactions to be small, and the rollback cost can
> +be substantial.
> +
>  Three different ways to acquire locks within the same w/w class. Common
>  definitions for methods #1 and #2:
>  
> @@ -312,12 +333,23 @@ Design:
>    We maintain the following invariants for the wait list:
>    (1) Waiters with an acquire context are sorted by stamp order; waiters
>        without an acquire context are interspersed in FIFO order.
> -  (2) Among waiters with contexts, only the first one can have other locks
> -      acquired already (ctx->acquired > 0). Note that this waiter may come
> -      after other waiters without contexts in the list.
> +  (2) For Wait-Die, among waiters with contexts, only the first one can have
> +      other locks acquired already (ctx->acquired > 0). Note that this waiter
> +      may come after other waiters without contexts in the list.
> +
> +  The Wound-Wait preemption is implemented with a lazy-preemption scheme:
> +  The wounded status of the transaction is checked only when there is
> +  contention for a new lock and hence a true chance of deadlock. In that
> +  situation, if the transaction is wounded, it backs off, clears the
> +  wounded status and retries. A great benefit of implementing preemption in
> +  this way is that the wounded transaction can identify a contending lock to
> +  wait for before restarting the transaction. Just blindly restarting the
> +  transaction would likely make the transaction end up in a situation where
> +  it would have to back off again.
>  
>    In general, not much contention is expected. The locks are typically used to
> -  serialize access to resources for devices.
> +  serialize access to resources for devices, and optimization focus should
> +  therefore be directed towards the uncontended cases.
>  
>  Lockdep:
>    Special care has been taken to warn for as many cases of api abuse
> diff --git a/drivers/dma-buf/reservation.c b/drivers/dma-buf/reservation.c
> index 314eb1071cce..b94a4bab2ecd 100644
> --- a/drivers/dma-buf/reservation.c
> +++ b/drivers/dma-buf/reservation.c
> @@ -46,7 +46,7 @@
>   * write-side updates.
>   */
>  
> -DEFINE_WW_CLASS(reservation_ww_class);
> +DEFINE_WW_CLASS_WDIE(reservation_ww_class);
>  EXPORT_SYMBOL(reservation_ww_class);
>  
>  struct lock_class_key reservation_seqcount_class;
> diff --git a/drivers/gpu/drm/drm_modeset_lock.c b/drivers/gpu/drm/drm_modeset_lock.c
> index 8a5100685875..ff00a814f617 100644
> --- a/drivers/gpu/drm/drm_modeset_lock.c
> +++ b/drivers/gpu/drm/drm_modeset_lock.c
> @@ -70,7 +70,7 @@
>   * lists and lookup data structures.
>   */
>  
> -static DEFINE_WW_CLASS(crtc_ww_class);
> +static DEFINE_WW_CLASS_WDIE(crtc_ww_class);
>  
>  /**
>   * drm_modeset_lock_all - take all modeset locks
> diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h
> index 39fda195bf78..3880813b7db5 100644
> --- a/include/linux/ww_mutex.h
> +++ b/include/linux/ww_mutex.h
> @@ -8,6 +8,8 @@
>   *
>   * Wound/wait implementation:
>   *  Copyright (C) 2013 Canonical Ltd.
> + * Choice of algorithm:
> + *  Copyright (C) 2018 WMWare Inc.
>   *
>   * This file contains the main data structure and API definitions.
>   */
> @@ -23,15 +25,17 @@ struct ww_class {
>  	struct lock_class_key mutex_key;
>  	const char *acquire_name;
>  	const char *mutex_name;
> +	unsigned int is_wait_die;
>  };
>  
>  struct ww_acquire_ctx {
>  	struct task_struct *task;
>  	unsigned long stamp;
>  	unsigned acquired;
> +	unsigned int wounded;
> +	struct ww_class *ww_class;
>  #ifdef CONFIG_DEBUG_MUTEXES
>  	unsigned done_acquire;
> -	struct ww_class *ww_class;
>  	struct ww_mutex *contending_lock;
>  #endif
>  #ifdef CONFIG_DEBUG_LOCK_ALLOC
> @@ -58,17 +62,21 @@ struct ww_mutex {
>  # define __WW_CLASS_MUTEX_INITIALIZER(lockname, class)
>  #endif
>  
> -#define __WW_CLASS_INITIALIZER(ww_class) \
> +#define __WW_CLASS_INITIALIZER(ww_class, _is_wait_die)	    \
>  		{ .stamp = ATOMIC_LONG_INIT(0) \
>  		, .acquire_name = #ww_class "_acquire" \
> -		, .mutex_name = #ww_class "_mutex" }
> +		, .mutex_name = #ww_class "_mutex" \
> +		, .is_wait_die = _is_wait_die }
>  
>  #define __WW_MUTEX_INITIALIZER(lockname, class) \
>  		{ .base =  __MUTEX_INITIALIZER(lockname.base) \
>  		__WW_CLASS_MUTEX_INITIALIZER(lockname, class) }
>  
>  #define DEFINE_WW_CLASS(classname) \
> -	struct ww_class classname = __WW_CLASS_INITIALIZER(classname)
> +	struct ww_class classname = __WW_CLASS_INITIALIZER(classname, 0)
> +
> +#define DEFINE_WW_CLASS_WDIE(classname)	\
> +	struct ww_class classname = __WW_CLASS_INITIALIZER(classname, 1)
>  
>  #define DEFINE_WW_MUTEX(mutexname, ww_class) \
>  	struct ww_mutex mutexname = __WW_MUTEX_INITIALIZER(mutexname, ww_class)
> @@ -123,8 +131,9 @@ static inline void ww_acquire_init(struct ww_acquire_ctx *ctx,
>  	ctx->task = current;
>  	ctx->stamp = atomic_long_inc_return_relaxed(&ww_class->stamp);
>  	ctx->acquired = 0;
> -#ifdef CONFIG_DEBUG_MUTEXES
>  	ctx->ww_class = ww_class;
> +	ctx->wounded = false;
> +#ifdef CONFIG_DEBUG_MUTEXES
>  	ctx->done_acquire = 0;
>  	ctx->contending_lock = NULL;
>  #endif
> diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
> index 6850ffd69125..e861c1bf0e1e 100644
> --- a/kernel/locking/locktorture.c
> +++ b/kernel/locking/locktorture.c
> @@ -365,7 +365,7 @@ static struct lock_torture_ops mutex_lock_ops = {
>  };
>  
>  #include <linux/ww_mutex.h>
> -static DEFINE_WW_CLASS(torture_ww_class);
> +static DEFINE_WW_CLASS_WDIE(torture_ww_class);
>  static DEFINE_WW_MUTEX(torture_ww_mutex_0, &torture_ww_class);
>  static DEFINE_WW_MUTEX(torture_ww_mutex_1, &torture_ww_class);
>  static DEFINE_WW_MUTEX(torture_ww_mutex_2, &torture_ww_class);
> diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
> index 2048359f33d2..ffa00b5aaf03 100644
> --- a/kernel/locking/mutex.c
> +++ b/kernel/locking/mutex.c
> @@ -290,12 +290,49 @@ __ww_ctx_stamp_after(struct ww_acquire_ctx *a, struct ww_acquire_ctx *b)
>  	       (a->stamp != b->stamp || a > b);
>  }
>  
> +/*
> + * Wound the lock holder transaction if it's younger than the contending
> + * transaction, and there is a possibility of a deadlock.
> + * Also if the lock holder transaction isn't the current transaction,
> + * make sure it's woken up in case it's sleeping on another ww mutex.
> + */
> +static bool __ww_mutex_wound(struct mutex *lock,
> +			     struct ww_acquire_ctx *ww_ctx,
> +			     struct ww_acquire_ctx *hold_ctx)
> +{
> +	struct task_struct *owner = __mutex_owner(lock);
> +
> +	lockdep_assert_held(&lock->wait_lock);
> +
> +	if (owner && hold_ctx && __ww_ctx_stamp_after(hold_ctx, ww_ctx) &&
> +	    ww_ctx->acquired > 0) {
> +		hold_ctx->wounded = 1;
> +
> +		/*
> +		 * wake_up_process() paired with set_current_state() inserts
> +		 * sufficient barriers to make sure @owner either sees it's
> +		 * wounded or has a wakeup pending to re-read the wounded
> +		 * state.

IIUC, "sufficient barriers" = full memory barriers (here).  (You may
want to be more specific.)

> +		 *
> +		 * The value of hold_ctx->wounded in
> +		 * __ww_mutex_lock_check_stamp();

Missing parts/incomplete sentence?

  Andrea


> +		 */
> +		if (owner != current)
> +			wake_up_process(owner);
> +
> +		return true;
> +	}
> +
> +	return false;
> +}
> +
>  /*
>   * Wake up any waiters that may have to back off when the lock is held by the
>   * given context.
>   *
>   * Due to the invariants on the wait list, this can only affect the first
> - * waiter with a context.
> + * waiter with a context, unless the Wound-Wait algorithm is used where
> + * also subsequent waiters with a context main wound the lock holder.
>   *
>   * The current task must not be on the wait list.
>   */
> @@ -303,6 +340,7 @@ static void __sched
>  __ww_mutex_wakeup_for_backoff(struct mutex *lock, struct ww_acquire_ctx *ww_ctx)
>  {
>  	struct mutex_waiter *cur;
> +	unsigned int is_wait_die = ww_ctx->ww_class->is_wait_die;
>  
>  	lockdep_assert_held(&lock->wait_lock);
>  
> @@ -310,13 +348,14 @@ __ww_mutex_wakeup_for_backoff(struct mutex *lock, struct ww_acquire_ctx *ww_ctx)
>  		if (!cur->ww_ctx)
>  			continue;
>  
> -		if (cur->ww_ctx->acquired > 0 &&
> +		if (is_wait_die && cur->ww_ctx->acquired > 0 &&
>  		    __ww_ctx_stamp_after(cur->ww_ctx, ww_ctx)) {
>  			debug_mutex_wake_waiter(lock, cur);
>  			wake_up_process(cur->task);
>  		}
>  
> -		break;
> +		if (is_wait_die || __ww_mutex_wound(lock, cur->ww_ctx, ww_ctx))
> +			break;
>  	}
>  }
>  
> @@ -338,12 +377,18 @@ ww_mutex_set_context_fastpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
>  	 * and keep spinning, or it will acquire wait_lock, add itself
>  	 * to waiter list and sleep.
>  	 */
> -	smp_mb(); /* ^^^ */
> +	smp_mb(); /* See comments above and below. */
>  
>  	/*
> -	 * Check if lock is contended, if not there is nobody to wake up
> +	 * Check if lock is contended, if not there is nobody to wake up.
> +	 * We can use list_empty() unlocked here since it only compares a
> +	 * list_head field pointer to the address of the list head
> +	 * itself, similarly to how list_empty() can be considered RCU-safe.
> +	 * The memory barrier above pairs with the memory barrier in
> +	 * __ww_mutex_add_waiter and makes sure lock->ctx is visible before
> +	 * we check for waiters.
>  	 */
> -	if (likely(!(atomic_long_read(&lock->base.owner) & MUTEX_FLAG_WAITERS)))
> +	if (likely(list_empty(&lock->base.wait_list)))
>  		return;
>  
>  	/*
> @@ -653,6 +698,13 @@ __ww_mutex_lock_check_stamp(struct mutex *lock, struct mutex_waiter *waiter,
>  	struct ww_acquire_ctx *hold_ctx = READ_ONCE(ww->ctx);
>  	struct mutex_waiter *cur;
>  
> +	if (!ctx->ww_class->is_wait_die) {
> +		if (ctx->wounded)
> +			goto deadlock;
> +		else
> +			return 0;
> +	}
> +
>  	if (hold_ctx && __ww_ctx_stamp_after(ctx, hold_ctx))
>  		goto deadlock;
>  
> @@ -683,16 +735,21 @@ __ww_mutex_add_waiter(struct mutex_waiter *waiter,
>  {
>  	struct mutex_waiter *cur;
>  	struct list_head *pos;
> +	unsigned int is_wait_die;
>  
>  	if (!ww_ctx) {
>  		list_add_tail(&waiter->list, &lock->wait_list);
>  		return 0;
>  	}
>  
> +	is_wait_die = ww_ctx->ww_class->is_wait_die;
> +
>  	/*
>  	 * Add the waiter before the first waiter with a higher stamp.
>  	 * Waiters without a context are skipped to avoid starving
> -	 * them.
> +	 * them. Wait-Die waiters may back off here. Wound-Wait waiters
> +	 * never back off here, but they are sorted in stamp order and
> +	 * may wound the lock holder.
>  	 */
>  	pos = &lock->wait_list;
>  	list_for_each_entry_reverse(cur, &lock->wait_list, list) {
> @@ -701,7 +758,7 @@ __ww_mutex_add_waiter(struct mutex_waiter *waiter,
>  
>  		if (__ww_ctx_stamp_after(ww_ctx, cur->ww_ctx)) {
>  			/* Back off immediately if necessary. */
> -			if (ww_ctx->acquired > 0) {
> +			if (is_wait_die && ww_ctx->acquired > 0) {
>  #ifdef CONFIG_DEBUG_MUTEXES
>  				struct ww_mutex *ww;
>  
> @@ -721,13 +778,28 @@ __ww_mutex_add_waiter(struct mutex_waiter *waiter,
>  		 * Wake up the waiter so that it gets a chance to back
>  		 * off.
>  		 */
> -		if (cur->ww_ctx->acquired > 0) {
> +		if (is_wait_die && cur->ww_ctx->acquired > 0) {
>  			debug_mutex_wake_waiter(lock, cur);
>  			wake_up_process(cur->task);
>  		}
>  	}
>  
>  	list_add_tail(&waiter->list, pos);
> +	if (!is_wait_die) {
> +		struct ww_mutex *ww = container_of(lock, struct ww_mutex, base);
> +
> +		/*
> +		 * Make sure a racing lock taker sees a non-empty waiting list
> +		 * before we read ww->ctx, so that if we miss ww->ctx, the
> +		 * racing lock taker will see a non-empty list and call
> +		 * __ww_mutex_wake_up_for_backoff() and wound itself. The
> +		 * memory barrier pairs with the one in
> +		 * ww_mutex_set_context_fastpath().
> +		 */
> +		smp_mb();
> +		__ww_mutex_wound(lock, ww_ctx, ww->ctx);
> +	}
> +
>  	return 0;
>  }
>  
> @@ -750,6 +822,14 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
>  	if (use_ww_ctx && ww_ctx) {
>  		if (unlikely(ww_ctx == READ_ONCE(ww->ctx)))
>  			return -EALREADY;
> +
> +		/*
> +		 * Reset the wounded flag after a backoff.
> +		 * No other process can race and wound us here since they
> +		 * can't have a valid owner pointer at this time
> +		 */
> +		if (ww_ctx->acquired == 0)
> +			ww_ctx->wounded = 0;
>  	}
>  
>  	preempt_disable();
> @@ -858,6 +938,11 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
>  acquired:
>  	__set_current_state(TASK_RUNNING);
>  
> +	/* We stole the lock. Need to check wounded status. */
> +	if (use_ww_ctx && ww_ctx && !ww_ctx->ww_class->is_wait_die &&
> +	    !__mutex_waiter_is_first(lock, &waiter))
> +		__ww_mutex_wakeup_for_backoff(lock, ww_ctx);
> +
>  	mutex_remove_waiter(lock, &waiter, current);
>  	if (likely(list_empty(&lock->wait_list)))
>  		__mutex_clear_flag(lock, MUTEX_FLAGS);
> diff --git a/kernel/locking/test-ww_mutex.c b/kernel/locking/test-ww_mutex.c
> index 0e4cd64ad2c0..3413430611d8 100644
> --- a/kernel/locking/test-ww_mutex.c
> +++ b/kernel/locking/test-ww_mutex.c
> @@ -26,7 +26,7 @@
>  #include <linux/slab.h>
>  #include <linux/ww_mutex.h>
>  
> -static DEFINE_WW_CLASS(ww_class);
> +static DEFINE_WW_CLASS_WDIE(ww_class);
>  struct workqueue_struct *wq;
>  
>  struct test_mutex {
> diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
> index b5c1293ce147..d0abf65ba9ad 100644
> --- a/lib/locking-selftest.c
> +++ b/lib/locking-selftest.c
> @@ -29,7 +29,7 @@
>   */
>  static unsigned int debug_locks_verbose;
>  
> -static DEFINE_WW_CLASS(ww_lockdep);
> +static DEFINE_WW_CLASS_WDIE(ww_lockdep);
>  
>  static int __init setup_debug_locks_verbose(char *str)
>  {
> -- 
> 2.14.3
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH 1/2] locking: Implement an algorithm choice for Wound-Wait mutexes
From: Peter Zijlstra @ 2018-06-14 10:51 UTC (permalink / raw)
  To: Thomas Hellstrom
  Cc: dri-devel, linux-kernel, Ingo Molnar, Jonathan Corbet,
	Gustavo Padovan, Maarten Lankhorst, Sean Paul, David Airlie,
	Davidlohr Bueso, Paul E. McKenney, Josh Triplett, Thomas Gleixner,
	Kate Stewart, Philippe Ombredanne, Greg Kroah-Hartman, linux-doc,
	linux-media, linaro-mm-sig
In-Reply-To: <9afd482d-7082-fa17-5e34-179a652376e5@vmware.com>

On Wed, Jun 13, 2018 at 04:05:43PM +0200, Thomas Hellstrom wrote:
> In short, with Wait-Die (before the patch) it's the process _taking_ the
> contended lock that backs off if necessary. No preemption required. With
> Wound-Wait, it's the process _holding_ the contended lock that gets wounded
> (preempted), and it needs to back off at its own discretion but no later
> than when it's going to sleep on another ww mutex. That point is where we
> intercept the preemption request. We're preempting the transaction rather
> than the process.

This:

  Wait-die:
    The newer transactions are killed when:
      It (= the newer transaction) makes a reqeust for a lock being held
      by an older transactions

  Wound-wait:
    The newer transactions are killed when:
      An older transaction makes a request for a lock being held by the
      newer transactions

Would make for an excellent comment somewhere. No talking about
preemption, although I think I know what you mean with it, that is not
how preemption is normally used.

In scheduling speak preemption is when we pick a runnable (but !running)
task to run instead of the current running task.  In this case however,
our T2 is blocked on a lock acquisition (one owned by our T1) and T1 is
the only runnable task. Only when T1's progress is inhibited by T2 (T1
wants a lock held by T2) do we wound/wake T2.

In any case, I had a little look at the current ww_mutex code and ended
up with the below patch that hopefully clarifies things a little.

---
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index f44f658ae629..a20c04619b2a 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -244,6 +244,10 @@ void __sched mutex_lock(struct mutex *lock)
 EXPORT_SYMBOL(mutex_lock);
 #endif
 
+/*
+ * Associate the ww_mutex @ww with the context @ww_ctx under which we acquired
+ * it.
+ */
 static __always_inline void
 ww_mutex_lock_acquired(struct ww_mutex *ww, struct ww_acquire_ctx *ww_ctx)
 {
@@ -282,26 +286,36 @@ ww_mutex_lock_acquired(struct ww_mutex *ww, struct ww_acquire_ctx *ww_ctx)
 	DEBUG_LOCKS_WARN_ON(ww_ctx->ww_class != ww->ww_class);
 #endif
 	ww_ctx->acquired++;
+	lock->ctx = ctx;
 }
 
+/*
+ * Determine if context @a is 'after' context @b. IOW, @a should be wounded in
+ * favour of @b.
+ */
 static inline bool __sched
 __ww_ctx_stamp_after(struct ww_acquire_ctx *a, struct ww_acquire_ctx *b)
 {
-	return a->stamp - b->stamp <= LONG_MAX &&
-	       (a->stamp != b->stamp || a > b);
+
+	return (signed long)(a->stamp - b->stamp) > 0;
 }
 
 /*
- * Wake up any waiters that may have to back off when the lock is held by the
- * given context.
+ * We just acquired @lock under @ww_ctx, if there are later contexts waiting
+ * behind us on the wait-list, wake them up so they can wound themselves.
  *
- * Due to the invariants on the wait list, this can only affect the first
- * waiter with a context.
+ * See __ww_mutex_add_waiter() for the list-order construction; basically the
+ * list is ordered by stamp smallest (oldest) first, so if there is a later
+ * (younger) stamp on the list behind us, wake it so it can wound itself.
+ *
+ * Because __ww_mutex_add_waiter() and __ww_mutex_check_stamp() wake any
+ * but the earliest context, this can only affect the first waiter (with a
+ * context).
  *
  * The current task must not be on the wait list.
  */
 static void __sched
-__ww_mutex_wakeup_for_backoff(struct mutex *lock, struct ww_acquire_ctx *ww_ctx)
+__ww_mutex_wakeup_for_wound(struct mutex *lock, struct ww_acquire_ctx *ww_ctx)
 {
 	struct mutex_waiter *cur;
 
@@ -322,16 +336,14 @@ __ww_mutex_wakeup_for_backoff(struct mutex *lock, struct ww_acquire_ctx *ww_ctx)
 }
 
 /*
- * After acquiring lock with fastpath or when we lost out in contested
- * slowpath, set ctx and wake up any waiters so they can recheck.
+ * After acquiring lock with fastpath, where we do not hold wait_lock, set ctx
+ * and wake up any waiters so they can recheck.
  */
 static __always_inline void
 ww_mutex_set_context_fastpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
 {
 	ww_mutex_lock_acquired(lock, ctx);
 
-	lock->ctx = ctx;
-
 	/*
 	 * The lock->ctx update should be visible on all cores before
 	 * the atomic read is done, otherwise contended waiters might be
@@ -352,25 +364,10 @@ ww_mutex_set_context_fastpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
 	 * so they can see the new lock->ctx.
 	 */
 	spin_lock(&lock->base.wait_lock);
-	__ww_mutex_wakeup_for_backoff(&lock->base, ctx);
+	__ww_mutex_wakeup_for_wound(&lock->base, ctx);
 	spin_unlock(&lock->base.wait_lock);
 }
 
-/*
- * After acquiring lock in the slowpath set ctx.
- *
- * Unlike for the fast path, the caller ensures that waiters are woken up where
- * necessary.
- *
- * Callers must hold the mutex wait_lock.
- */
-static __always_inline void
-ww_mutex_set_context_slowpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
-{
-	ww_mutex_lock_acquired(lock, ctx);
-	lock->ctx = ctx;
-}
-
 #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
 
 static inline
@@ -646,20 +643,30 @@ void __sched ww_mutex_unlock(struct ww_mutex *lock)
 }
 EXPORT_SYMBOL(ww_mutex_unlock);
 
+/*
+ * Check the wound condition for the current lock acquire.  If we're trying to
+ * acquire a lock already held by an older context, wound ourselves.
+ *
+ * Since __ww_mutex_add_waiter() orders the wait-list on stamp, we only have to
+ * look at waiters before us in the wait-list.
+ */
 static inline int __sched
-__ww_mutex_lock_check_stamp(struct mutex *lock, struct mutex_waiter *waiter,
+__ww_mutex_check_stamp(struct mutex *lock, struct mutex_waiter *waiter,
 			    struct ww_acquire_ctx *ctx)
 {
 	struct ww_mutex *ww = container_of(lock, struct ww_mutex, base);
 	struct ww_acquire_ctx *hold_ctx = READ_ONCE(ww->ctx);
 	struct mutex_waiter *cur;
 
+	if (ctx->acquired == 0)
+		return 0;
+
 	if (hold_ctx && __ww_ctx_stamp_after(ctx, hold_ctx))
 		goto deadlock;
 
 	/*
 	 * If there is a waiter in front of us that has a context, then its
-	 * stamp is earlier than ours and we must back off.
+	 * stamp is earlier than ours and we must wound ourself.
 	 */
 	cur = waiter;
 	list_for_each_entry_continue_reverse(cur, &lock->wait_list, list) {
@@ -677,6 +684,14 @@ __ww_mutex_lock_check_stamp(struct mutex *lock, struct mutex_waiter *waiter,
 	return -EDEADLK;
 }
 
+/*
+ * Add @waiter to the wait-list, keep the wait-list ordered by stamp, smallest
+ * first. Such that older contexts are preferred to acquire the lock over
+ * younger contexts.
+ *
+ * Furthermore, wound ourself immediately when possible (there are older
+ * contexts already waiting) to avoid unnecessary waiting.
+ */
 static inline int __sched
 __ww_mutex_add_waiter(struct mutex_waiter *waiter,
 		      struct mutex *lock,
@@ -700,8 +715,12 @@ __ww_mutex_add_waiter(struct mutex_waiter *waiter,
 		if (!cur->ww_ctx)
 			continue;
 
+		/*
+		 * If we find an older context waiting, there is no point in
+		 * queueing behind it, as we'd have to wound ourselves the
+		 * moment it would acquire the lock.
+		 */
 		if (__ww_ctx_stamp_after(ww_ctx, cur->ww_ctx)) {
-			/* Back off immediately if necessary. */
 			if (ww_ctx->acquired > 0) {
 #ifdef CONFIG_DEBUG_MUTEXES
 				struct ww_mutex *ww;
@@ -719,8 +738,9 @@ __ww_mutex_add_waiter(struct mutex_waiter *waiter,
 		pos = &cur->list;
 
 		/*
-		 * Wake up the waiter so that it gets a chance to back
-		 * off.
+		 * When we enqueued an older context, wake all younger
+		 * contexts such that they can wound themselves, see
+		 * __ww_mutex_check_stamp().
 		 */
 		if (cur->ww_ctx->acquired > 0) {
 			debug_mutex_wake_waiter(lock, cur);
@@ -772,7 +792,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 	 */
 	if (__mutex_trylock(lock)) {
 		if (use_ww_ctx && ww_ctx)
-			__ww_mutex_wakeup_for_backoff(lock, ww_ctx);
+			__ww_mutex_wakeup_for_wound(lock, ww_ctx);
 
 		goto skip_wait;
 	}
@@ -790,10 +810,10 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 		waiter.ww_ctx = MUTEX_POISON_WW_CTX;
 #endif
 	} else {
-		/* Add in stamp order, waking up waiters that must back off. */
+		/* Add in stamp order, waking up waiters that must wound themselves. */
 		ret = __ww_mutex_add_waiter(&waiter, lock, ww_ctx);
 		if (ret)
-			goto err_early_backoff;
+			goto err_early_wound;
 
 		waiter.ww_ctx = ww_ctx;
 	}
@@ -824,8 +844,8 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 			goto err;
 		}
 
-		if (use_ww_ctx && ww_ctx && ww_ctx->acquired > 0) {
-			ret = __ww_mutex_lock_check_stamp(lock, &waiter, ww_ctx);
+		if (use_ww_ctx && ww_ctx) {
+			ret = __ww_mutex_check_stamp(lock, &waiter, ww_ctx);
 			if (ret)
 				goto err;
 		}
@@ -870,7 +890,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 	lock_acquired(&lock->dep_map, ip);
 
 	if (use_ww_ctx && ww_ctx)
-		ww_mutex_set_context_slowpath(ww, ww_ctx);
+		ww_mutex_lock_acquired(ww, ww_ctx);
 
 	spin_unlock(&lock->wait_lock);
 	preempt_enable();
@@ -879,7 +899,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 err:
 	__set_current_state(TASK_RUNNING);
 	mutex_remove_waiter(lock, &waiter, current);
-err_early_backoff:
+err_early_wound:
 	spin_unlock(&lock->wait_lock);
 	debug_mutex_free_waiter(&waiter);
 	mutex_release(&lock->dep_map, 1, ip);
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* Re: [PATCH v2 1/2] locking: Implement an algorithm choice for Wound-Wait mutexes
From: Thomas Hellstrom @ 2018-06-14 11:10 UTC (permalink / raw)
  To: Andrea Parri
  Cc: dri-devel, linux-kernel, Peter Zijlstra, Ingo Molnar,
	Jonathan Corbet, Gustavo Padovan, Maarten Lankhorst, Sean Paul,
	David Airlie, Davidlohr Bueso, Paul E. McKenney, Josh Triplett,
	Thomas Gleixner, Kate Stewart, Philippe Ombredanne,
	Greg Kroah-Hartman, linux-doc, linux-media, linaro-mm-sig
In-Reply-To: <20180614103852.GA18216@andrea>

On 06/14/2018 12:38 PM, Andrea Parri wrote:
> Hi Thomas,
>
> On Thu, Jun 14, 2018 at 09:29:21AM +0200, Thomas Hellstrom wrote:
>> The current Wound-Wait mutex algorithm is actually not Wound-Wait but
>> Wait-Die. Implement also Wound-Wait as a per-ww-class choice. Wound-Wait
>> is, contrary to Wait-Die a preemptive algorithm and is known to generate
>> fewer backoffs. Testing reveals that this is true if the
>> number of simultaneous contending transactions is small.
>> As the number of simultaneous contending threads increases, Wait-Wound
>> becomes inferior to Wait-Die in terms of elapsed time.
>> Possibly due to the larger number of held locks of sleeping transactions.
>>
>> Update documentation and callers.
>>
>> Timings using git://people.freedesktop.org/~thomash/ww_mutex_test
>> tag patch-18-06-14
>>
>> Each thread runs 100000 batches of lock / unlock 800 ww mutexes randomly
>> chosen out of 100000. Four core Intel x86_64:
>>
>> Algorithm    #threads       Rollbacks  time
>> Wound-Wait   4              ~100       ~17s.
>> Wait-Die     4              ~150000    ~19s.
>> Wound-Wait   16             ~360000    ~109s.
>> Wait-Die     16             ~450000    ~82s.
>>
>> Cc: Peter Zijlstra <peterz@infradead.org>
>> Cc: Ingo Molnar <mingo@redhat.com>
>> Cc: Jonathan Corbet <corbet@lwn.net>
>> Cc: Gustavo Padovan <gustavo@padovan.org>
>> Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
>> Cc: Sean Paul <seanpaul@chromium.org>
>> Cc: David Airlie <airlied@linux.ie>
>> Cc: Davidlohr Bueso <dave@stgolabs.net>
>> Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
>> Cc: Josh Triplett <josh@joshtriplett.org>
>> Cc: Thomas Gleixner <tglx@linutronix.de>
>> Cc: Kate Stewart <kstewart@linuxfoundation.org>
>> Cc: Philippe Ombredanne <pombredanne@nexb.com>
>> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
>> Cc: linux-doc@vger.kernel.org
>> Cc: linux-media@vger.kernel.org
>> Cc: linaro-mm-sig@lists.linaro.org
>> Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
>>
>> ---
>> v2:
>> * Update API according to review comment by Greg Kroah-Hartman.
>> * Address review comments by Peter Zijlstra:
>>    - Avoid _Bool in composites
>>    - Fix typo
>>    - Use __mutex_owner() where applicable
>>    - Rely on built-in barriers for the main loop exit condition,
>>      struct ww_acquire_ctx::wounded. Update code comments.
>>    - Explain unlocked use of list_empty().
>> ---
>>   Documentation/locking/ww-mutex-design.txt |  54 ++++++++++++----
>>   drivers/dma-buf/reservation.c             |   2 +-
>>   drivers/gpu/drm/drm_modeset_lock.c        |   2 +-
>>   include/linux/ww_mutex.h                  |  19 ++++--
>>   kernel/locking/locktorture.c              |   2 +-
>>   kernel/locking/mutex.c                    | 103 +++++++++++++++++++++++++++---
>>   kernel/locking/test-ww_mutex.c            |   2 +-
>>   lib/locking-selftest.c                    |   2 +-
>>   8 files changed, 156 insertions(+), 30 deletions(-)
>>
>> diff --git a/Documentation/locking/ww-mutex-design.txt b/Documentation/locking/ww-mutex-design.txt
>> index 34c3a1b50b9a..b9597def9581 100644
>> --- a/Documentation/locking/ww-mutex-design.txt
>> +++ b/Documentation/locking/ww-mutex-design.txt
>> @@ -1,4 +1,4 @@
>> -Wait/Wound Deadlock-Proof Mutex Design
>> +Wound/Wait Deadlock-Proof Mutex Design
>>   ======================================
>>   
>>   Please read mutex-design.txt first, as it applies to wait/wound mutexes too.
>> @@ -32,10 +32,23 @@ the oldest task) wins, and the one with the higher reservation id (i.e. the
>>   younger task) unlocks all of the buffers that it has already locked, and then
>>   tries again.
>>   
>> -In the RDBMS literature this deadlock handling approach is called wait/wound:
>> -The older tasks waits until it can acquire the contended lock. The younger tasks
>> -needs to back off and drop all the locks it is currently holding, i.e. the
>> -younger task is wounded.
>> +In the RDBMS literature, a reservation ticket is associated with a transaction.
>> +and the deadlock handling approach is called Wait-Die. The name is based on
>> +the actions of a locking thread when it encounters an already locked mutex.
>> +If the transaction holding the lock is younger, the locking transaction waits.
>> +If the transaction holding the lock is older, the locking transaction backs off
>> +and dies. Hence Wait-Die.
>> +There is also another algorithm called Wound-Wait:
>> +If the transaction holding the lock is younger, the locking transaction
>> +preempts the transaction holding the lock, requiring it to back off. It
>> +Wounds the other transaction.
>> +If the transaction holding the lock is older, it waits for the other
>> +transaction. Hence Wound-Wait.
>> +The two algorithms are both fair in that a transaction will eventually succeed.
>> +However, the Wound-Wait algorithm is typically stated to generate fewer backoffs
>> +compared to Wait-Die, but is, on the other hand, associated with more work than
>> +Wait-Die when recovering from a backoff. Wound-Wait is also a preemptive
>> +algorithm which requires a reliable way to preempt another transaction.
>>   
>>   Concepts
>>   --------
>> @@ -47,10 +60,12 @@ Acquire context: To ensure eventual forward progress it is important the a task
>>   trying to acquire locks doesn't grab a new reservation id, but keeps the one it
>>   acquired when starting the lock acquisition. This ticket is stored in the
>>   acquire context. Furthermore the acquire context keeps track of debugging state
>> -to catch w/w mutex interface abuse.
>> +to catch w/w mutex interface abuse. An acquire context is representing a
>> +transaction.
>>   
>>   W/w class: In contrast to normal mutexes the lock class needs to be explicit for
>> -w/w mutexes, since it is required to initialize the acquire context.
>> +w/w mutexes, since it is required to initialize the acquire context. The lock
>> +class also specifies what algorithm to use, Wound-Wait or Wait-Die.
>>   
>>   Furthermore there are three different class of w/w lock acquire functions:
>>   
>> @@ -90,6 +105,12 @@ provided.
>>   Usage
>>   -----
>>   
>> +The algorithm (Wait-Die vs Wound-Wait) is chosen by using either
>> +DEFINE_WW_CLASS_WDIE() for Wait-Die or DEFINE_WW_CLASS() for Wound-Wait.
>> +As a rough rule of thumb, use Wound-Wait iff you typically expect the number
>> +of simultaneous competing transactions to be small, and the rollback cost can
>> +be substantial.
>> +
>>   Three different ways to acquire locks within the same w/w class. Common
>>   definitions for methods #1 and #2:
>>   
>> @@ -312,12 +333,23 @@ Design:
>>     We maintain the following invariants for the wait list:
>>     (1) Waiters with an acquire context are sorted by stamp order; waiters
>>         without an acquire context are interspersed in FIFO order.
>> -  (2) Among waiters with contexts, only the first one can have other locks
>> -      acquired already (ctx->acquired > 0). Note that this waiter may come
>> -      after other waiters without contexts in the list.
>> +  (2) For Wait-Die, among waiters with contexts, only the first one can have
>> +      other locks acquired already (ctx->acquired > 0). Note that this waiter
>> +      may come after other waiters without contexts in the list.
>> +
>> +  The Wound-Wait preemption is implemented with a lazy-preemption scheme:
>> +  The wounded status of the transaction is checked only when there is
>> +  contention for a new lock and hence a true chance of deadlock. In that
>> +  situation, if the transaction is wounded, it backs off, clears the
>> +  wounded status and retries. A great benefit of implementing preemption in
>> +  this way is that the wounded transaction can identify a contending lock to
>> +  wait for before restarting the transaction. Just blindly restarting the
>> +  transaction would likely make the transaction end up in a situation where
>> +  it would have to back off again.
>>   
>>     In general, not much contention is expected. The locks are typically used to
>> -  serialize access to resources for devices.
>> +  serialize access to resources for devices, and optimization focus should
>> +  therefore be directed towards the uncontended cases.
>>   
>>   Lockdep:
>>     Special care has been taken to warn for as many cases of api abuse
>> diff --git a/drivers/dma-buf/reservation.c b/drivers/dma-buf/reservation.c
>> index 314eb1071cce..b94a4bab2ecd 100644
>> --- a/drivers/dma-buf/reservation.c
>> +++ b/drivers/dma-buf/reservation.c
>> @@ -46,7 +46,7 @@
>>    * write-side updates.
>>    */
>>   
>> -DEFINE_WW_CLASS(reservation_ww_class);
>> +DEFINE_WW_CLASS_WDIE(reservation_ww_class);
>>   EXPORT_SYMBOL(reservation_ww_class);
>>   
>>   struct lock_class_key reservation_seqcount_class;
>> diff --git a/drivers/gpu/drm/drm_modeset_lock.c b/drivers/gpu/drm/drm_modeset_lock.c
>> index 8a5100685875..ff00a814f617 100644
>> --- a/drivers/gpu/drm/drm_modeset_lock.c
>> +++ b/drivers/gpu/drm/drm_modeset_lock.c
>> @@ -70,7 +70,7 @@
>>    * lists and lookup data structures.
>>    */
>>   
>> -static DEFINE_WW_CLASS(crtc_ww_class);
>> +static DEFINE_WW_CLASS_WDIE(crtc_ww_class);
>>   
>>   /**
>>    * drm_modeset_lock_all - take all modeset locks
>> diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h
>> index 39fda195bf78..3880813b7db5 100644
>> --- a/include/linux/ww_mutex.h
>> +++ b/include/linux/ww_mutex.h
>> @@ -8,6 +8,8 @@
>>    *
>>    * Wound/wait implementation:
>>    *  Copyright (C) 2013 Canonical Ltd.
>> + * Choice of algorithm:
>> + *  Copyright (C) 2018 WMWare Inc.
>>    *
>>    * This file contains the main data structure and API definitions.
>>    */
>> @@ -23,15 +25,17 @@ struct ww_class {
>>   	struct lock_class_key mutex_key;
>>   	const char *acquire_name;
>>   	const char *mutex_name;
>> +	unsigned int is_wait_die;
>>   };
>>   
>>   struct ww_acquire_ctx {
>>   	struct task_struct *task;
>>   	unsigned long stamp;
>>   	unsigned acquired;
>> +	unsigned int wounded;
>> +	struct ww_class *ww_class;
>>   #ifdef CONFIG_DEBUG_MUTEXES
>>   	unsigned done_acquire;
>> -	struct ww_class *ww_class;
>>   	struct ww_mutex *contending_lock;
>>   #endif
>>   #ifdef CONFIG_DEBUG_LOCK_ALLOC
>> @@ -58,17 +62,21 @@ struct ww_mutex {
>>   # define __WW_CLASS_MUTEX_INITIALIZER(lockname, class)
>>   #endif
>>   
>> -#define __WW_CLASS_INITIALIZER(ww_class) \
>> +#define __WW_CLASS_INITIALIZER(ww_class, _is_wait_die)	    \
>>   		{ .stamp = ATOMIC_LONG_INIT(0) \
>>   		, .acquire_name = #ww_class "_acquire" \
>> -		, .mutex_name = #ww_class "_mutex" }
>> +		, .mutex_name = #ww_class "_mutex" \
>> +		, .is_wait_die = _is_wait_die }
>>   
>>   #define __WW_MUTEX_INITIALIZER(lockname, class) \
>>   		{ .base =  __MUTEX_INITIALIZER(lockname.base) \
>>   		__WW_CLASS_MUTEX_INITIALIZER(lockname, class) }
>>   
>>   #define DEFINE_WW_CLASS(classname) \
>> -	struct ww_class classname = __WW_CLASS_INITIALIZER(classname)
>> +	struct ww_class classname = __WW_CLASS_INITIALIZER(classname, 0)
>> +
>> +#define DEFINE_WW_CLASS_WDIE(classname)	\
>> +	struct ww_class classname = __WW_CLASS_INITIALIZER(classname, 1)
>>   
>>   #define DEFINE_WW_MUTEX(mutexname, ww_class) \
>>   	struct ww_mutex mutexname = __WW_MUTEX_INITIALIZER(mutexname, ww_class)
>> @@ -123,8 +131,9 @@ static inline void ww_acquire_init(struct ww_acquire_ctx *ctx,
>>   	ctx->task = current;
>>   	ctx->stamp = atomic_long_inc_return_relaxed(&ww_class->stamp);
>>   	ctx->acquired = 0;
>> -#ifdef CONFIG_DEBUG_MUTEXES
>>   	ctx->ww_class = ww_class;
>> +	ctx->wounded = false;
>> +#ifdef CONFIG_DEBUG_MUTEXES
>>   	ctx->done_acquire = 0;
>>   	ctx->contending_lock = NULL;
>>   #endif
>> diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
>> index 6850ffd69125..e861c1bf0e1e 100644
>> --- a/kernel/locking/locktorture.c
>> +++ b/kernel/locking/locktorture.c
>> @@ -365,7 +365,7 @@ static struct lock_torture_ops mutex_lock_ops = {
>>   };
>>   
>>   #include <linux/ww_mutex.h>
>> -static DEFINE_WW_CLASS(torture_ww_class);
>> +static DEFINE_WW_CLASS_WDIE(torture_ww_class);
>>   static DEFINE_WW_MUTEX(torture_ww_mutex_0, &torture_ww_class);
>>   static DEFINE_WW_MUTEX(torture_ww_mutex_1, &torture_ww_class);
>>   static DEFINE_WW_MUTEX(torture_ww_mutex_2, &torture_ww_class);
>> diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
>> index 2048359f33d2..ffa00b5aaf03 100644
>> --- a/kernel/locking/mutex.c
>> +++ b/kernel/locking/mutex.c
>> @@ -290,12 +290,49 @@ __ww_ctx_stamp_after(struct ww_acquire_ctx *a, struct ww_acquire_ctx *b)
>>   	       (a->stamp != b->stamp || a > b);
>>   }
>>   
>> +/*
>> + * Wound the lock holder transaction if it's younger than the contending
>> + * transaction, and there is a possibility of a deadlock.
>> + * Also if the lock holder transaction isn't the current transaction,
>> + * make sure it's woken up in case it's sleeping on another ww mutex.
>> + */
>> +static bool __ww_mutex_wound(struct mutex *lock,
>> +			     struct ww_acquire_ctx *ww_ctx,
>> +			     struct ww_acquire_ctx *hold_ctx)
>> +{
>> +	struct task_struct *owner = __mutex_owner(lock);
>> +
>> +	lockdep_assert_held(&lock->wait_lock);
>> +
>> +	if (owner && hold_ctx && __ww_ctx_stamp_after(hold_ctx, ww_ctx) &&
>> +	    ww_ctx->acquired > 0) {
>> +		hold_ctx->wounded = 1;
>> +
>> +		/*
>> +		 * wake_up_process() paired with set_current_state() inserts
>> +		 * sufficient barriers to make sure @owner either sees it's
>> +		 * wounded or has a wakeup pending to re-read the wounded
>> +		 * state.
> IIUC, "sufficient barriers" = full memory barriers (here).  (You may
> want to be more specific.)

Thanks for reviewing!
OK. What about if someone relaxes that in the future? I mean, what we 
care about in this code is just that we have sufficient barriers for 
that statement to be true, regardless what type of barriers those really 
are?
>
>> +		 *
>> +		 * The value of hold_ctx->wounded in
>> +		 * __ww_mutex_lock_check_stamp();
> Missing parts/incomplete sentence?

Oops. I'll fix in next version.


>
>    Andrea
Thanks,
Thomas


--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH v2 1/2] locking: Implement an algorithm choice for Wound-Wait mutexes
From: Peter Zijlstra @ 2018-06-14 11:36 UTC (permalink / raw)
  To: Thomas Hellstrom
  Cc: dri-devel, linux-kernel, Ingo Molnar, Jonathan Corbet,
	Gustavo Padovan, Maarten Lankhorst, Sean Paul, David Airlie,
	Davidlohr Bueso, Paul E. McKenney, Josh Triplett, Thomas Gleixner,
	Kate Stewart, Philippe Ombredanne, Greg Kroah-Hartman, linux-doc,
	linux-media, linaro-mm-sig
In-Reply-To: <20180614072922.8114-2-thellstrom@vmware.com>

On Thu, Jun 14, 2018 at 09:29:21AM +0200, Thomas Hellstrom wrote:

>  __ww_mutex_wakeup_for_backoff(struct mutex *lock, struct ww_acquire_ctx *ww_ctx)
>  {
>  	struct mutex_waiter *cur;
> +	unsigned int is_wait_die = ww_ctx->ww_class->is_wait_die;
>  
>  	lockdep_assert_held(&lock->wait_lock);
>  
> @@ -310,13 +348,14 @@ __ww_mutex_wakeup_for_backoff(struct mutex *lock, struct ww_acquire_ctx *ww_ctx)
>  		if (!cur->ww_ctx)
>  			continue;
>  
> -		if (cur->ww_ctx->acquired > 0 &&
> +		if (is_wait_die && cur->ww_ctx->acquired > 0 &&
>  		    __ww_ctx_stamp_after(cur->ww_ctx, ww_ctx)) {
>  			debug_mutex_wake_waiter(lock, cur);
>  			wake_up_process(cur->task);
>  		}
>  
> -		break;
> +		if (is_wait_die || __ww_mutex_wound(lock, cur->ww_ctx, ww_ctx))
> +			break;
>  	}
>  }

I ended up with:


static void __sched
__ww_mutex_check_waiters(struct mutex *lock, struct ww_acquire_ctx *ww_ctx)
{
	bool is_wait_die = ww_ctx->ww_class->is_wait_die;
	struct mutex_waiter *cur;

	lockdep_assert_held(&lock->wait_lock);

	list_for_each_entry(cur, &lock->wait_list, list) {
		if (!cur->ww_ctx)
			continue;

		if (is_wait_die) {
			/*
			 * Because __ww_mutex_add_waiter() and
			 * __ww_mutex_check_stamp() wake any but the earliest
			 * context, this can only affect the first waiter (with
			 * a context).
			 */
			if (cur->ww_ctx->acquired > 0 &&
			    __ww_ctx_stamp_after(cur->ww_ctx, ww_ctx)) {
				debug_mutex_wake_waiter(lock, cur);
				wake_up_process(cur->task);
			}

			break;
		}

		if (__ww_mutex_wound(lock, cur->ww_ctx, ww_ctx))
			break;
	}
}


Currently you don't allow mixing WD and WW contexts (which is not
immediately obvious from the above code), and the above hard relies on
that. Are there sensible use cases for mixing them? IOW will your
current restriction stand without hassle?
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH 1/2] locking: Implement an algorithm choice for Wound-Wait mutexes
From: Thomas Hellstrom @ 2018-06-14 11:48 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: dri-devel, linux-kernel, Ingo Molnar, Jonathan Corbet,
	Gustavo Padovan, Maarten Lankhorst, Sean Paul, David Airlie,
	Davidlohr Bueso, Paul E. McKenney, Josh Triplett, Thomas Gleixner,
	Kate Stewart, Philippe Ombredanne, Greg Kroah-Hartman, linux-doc,
	linux-media, linaro-mm-sig
In-Reply-To: <20180614105151.GY12198@hirez.programming.kicks-ass.net>

On 06/14/2018 12:51 PM, Peter Zijlstra wrote:
> On Wed, Jun 13, 2018 at 04:05:43PM +0200, Thomas Hellstrom wrote:
>> In short, with Wait-Die (before the patch) it's the process _taking_ the
>> contended lock that backs off if necessary. No preemption required. With
>> Wound-Wait, it's the process _holding_ the contended lock that gets wounded
>> (preempted), and it needs to back off at its own discretion but no later
>> than when it's going to sleep on another ww mutex. That point is where we
>> intercept the preemption request. We're preempting the transaction rather
>> than the process.
> This:
>
>    Wait-die:
>      The newer transactions are killed when:
>        It (= the newer transaction) makes a reqeust for a lock being held
>        by an older transactions
>
>    Wound-wait:
>      The newer transactions are killed when:
>        An older transaction makes a request for a lock being held by the
>        newer transactions
>
> Would make for an excellent comment somewhere. No talking about
> preemption, although I think I know what you mean with it, that is not
> how preemption is normally used.

Ok. I'll incorporate something along this line. Unfortunately that last 
statement is not fully true. It should read
"The newer transactions are wounded when:", not "killed" when.

The literature makes a distinction between "killed" and "wounded". In 
our context, "Killed" is when a transaction actually receives an 
-EDEADLK and needs to back off. "Wounded" is when someone (typically 
another transaction) requests a transaction to kill itself. A wound will 
often, but not always, lead to a kill. If the wounded transaction has 
finished its locking sequence, or has the opportunity to grab 
uncontended ww mutexes or steal contended (non-handoff) ww mutexes to 
finish its transaction it will do so and never kill itself.



>
> In scheduling speak preemption is when we pick a runnable (but !running)
> task to run instead of the current running task.  In this case however,
> our T2 is blocked on a lock acquisition (one owned by our T1) and T1 is
> the only runnable task. Only when T1's progress is inhibited by T2 (T1
> wants a lock held by T2) do we wound/wake T2.

Indeed. The preemption spoken about in the Wound-Wait litterature means 
that a transaction preempts another transaction when it wounds it. In 
distributed computing my understanding is that the preempted transaction 
is aborted instantly and restarted after a random delay. Of course, we 
have no means of mapping wounding to process preemption in the linux 
kernel, so that's why I referred to it as "lazy preemption". In process 
analogy "wounded" wound roughly correspond to (need_resched() == true), 
and returning -EDEADLK would correspond to voluntary preemption.



>
> In any case, I had a little look at the current ww_mutex code and ended
> up with the below patch that hopefully clarifies things a little.
>
> ---
> diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
> index f44f658ae629..a20c04619b2a 100644
> --- a/kernel/locking/mutex.c
> +++ b/kernel/locking/mutex.c
> @@ -244,6 +244,10 @@ void __sched mutex_lock(struct mutex *lock)
>   EXPORT_SYMBOL(mutex_lock);
>   #endif
>   
> +/*
> + * Associate the ww_mutex @ww with the context @ww_ctx under which we acquired
> + * it.
> + */

IMO use of "acquire_context" or "context" is a little unfortunate when 
the literature uses "transaction",
but otherwise fine.


>   static __always_inline void
>   ww_mutex_lock_acquired(struct ww_mutex *ww, struct ww_acquire_ctx *ww_ctx)
>   {
> @@ -282,26 +286,36 @@ ww_mutex_lock_acquired(struct ww_mutex *ww, struct ww_acquire_ctx *ww_ctx)
>   	DEBUG_LOCKS_WARN_ON(ww_ctx->ww_class != ww->ww_class);
>   #endif
>   	ww_ctx->acquired++;
> +	lock->ctx = ctx;
>   }
>   
> +/*
> + * Determine if context @a is 'after' context @b. IOW, @a should be wounded in
> + * favour of @b.
> + */

So "wounded" should never really be used with Wait-Die
"Determine whether context @a represents a younger transaction than 
context @b"?

>   static inline bool __sched
>   __ww_ctx_stamp_after(struct ww_acquire_ctx *a, struct ww_acquire_ctx *b)
>   {
> -	return a->stamp - b->stamp <= LONG_MAX &&
> -	       (a->stamp != b->stamp || a > b);
> +
> +	return (signed long)(a->stamp - b->stamp) > 0;
>   }
>   
>   /*
> - * Wake up any waiters that may have to back off when the lock is held by the
> - * given context.
> + * We just acquired @lock under @ww_ctx, if there are later contexts waiting
> + * behind us on the wait-list, wake them up so they can wound themselves.

Actually for Wait-Die, Back off or "Die" is the correct terminology.

>    *
> - * Due to the invariants on the wait list, this can only affect the first
> - * waiter with a context.
> + * See __ww_mutex_add_waiter() for the list-order construction; basically the
> + * list is ordered by stamp smallest (oldest) first, so if there is a later
> + * (younger) stamp on the list behind us, wake it so it can wound itself.
> + *
> + * Because __ww_mutex_add_waiter() and __ww_mutex_check_stamp() wake any
> + * but the earliest context, this can only affect the first waiter (with a
> + * context).

The wait list invariants are stated in 
Documentation/locking/ww-mutex-design.txt.
Perhaps we could copy those into the code to make the comment more 
understandable:
"  We maintain the following invariants for the wait list:
   (1) Waiters with an acquire context are sorted by stamp order; waiters
       without an acquire context are interspersed in FIFO order.
   (2) For Wait-Die, among waiters with contexts, only the first one can 
have
       other locks acquired already (ctx->acquired > 0). Note that this 
waiter
       may come after other waiters without contexts in the list."

>    *
>    * The current task must not be on the wait list.
>    */
>   static void __sched
> -__ww_mutex_wakeup_for_backoff(struct mutex *lock, struct ww_acquire_ctx *ww_ctx)
> +__ww_mutex_wakeup_for_wound(struct mutex *lock, struct ww_acquire_ctx *ww_ctx)

Again, "wound" is unsuitable for Wait-Die. + numerous additional places.

Thanks,
Thomas




--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH v2 1/2] locking: Implement an algorithm choice for Wound-Wait mutexes
From: Andrea Parri @ 2018-06-14 11:49 UTC (permalink / raw)
  To: Thomas Hellstrom
  Cc: dri-devel, linux-kernel, Peter Zijlstra, Ingo Molnar,
	Jonathan Corbet, Gustavo Padovan, Maarten Lankhorst, Sean Paul,
	David Airlie, Davidlohr Bueso, Paul E. McKenney, Josh Triplett,
	Thomas Gleixner, Kate Stewart, Philippe Ombredanne,
	Greg Kroah-Hartman, linux-doc, linux-media, linaro-mm-sig
In-Reply-To: <b84a5ef9-6cd1-7dc9-a51d-cb195cdea83c@vmware.com>

[...]

> >>+		/*
> >>+		 * wake_up_process() paired with set_current_state() inserts
> >>+		 * sufficient barriers to make sure @owner either sees it's
> >>+		 * wounded or has a wakeup pending to re-read the wounded
> >>+		 * state.
> >IIUC, "sufficient barriers" = full memory barriers (here).  (You may
> >want to be more specific.)
> 
> Thanks for reviewing!
> OK. What about if someone relaxes that in the future?

This is actually one of my main concerns ;-)  as, IIUC, those barriers are
not only sufficient but also necessary: anything "less than a full barrier"
(in either wake_up_process() or set_current_state()) would _not_ guarantee
the "condition" above unless I'm misunderstanding it.

But am I misunderstanding it?  Which barriers/guarantee do you _need_ from
the above mentioned pairing? (hence my comment...)

  Andrea
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH v2 1/2] locking: Implement an algorithm choice for Wound-Wait mutexes
From: Thomas Hellstrom @ 2018-06-14 11:54 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: dri-devel, linux-kernel, Ingo Molnar, Jonathan Corbet,
	Gustavo Padovan, Maarten Lankhorst, Sean Paul, David Airlie,
	Davidlohr Bueso, Paul E. McKenney, Josh Triplett, Thomas Gleixner,
	Kate Stewart, Philippe Ombredanne, Greg Kroah-Hartman, linux-doc,
	linux-media, linaro-mm-sig
In-Reply-To: <20180614113604.GZ12198@hirez.programming.kicks-ass.net>

On 06/14/2018 01:36 PM, Peter Zijlstra wrote:
> On Thu, Jun 14, 2018 at 09:29:21AM +0200, Thomas Hellstrom wrote:
>
>>   __ww_mutex_wakeup_for_backoff(struct mutex *lock, struct ww_acquire_ctx *ww_ctx)
>>   {
>>   	struct mutex_waiter *cur;
>> +	unsigned int is_wait_die = ww_ctx->ww_class->is_wait_die;
>>   
>>   	lockdep_assert_held(&lock->wait_lock);
>>   
>> @@ -310,13 +348,14 @@ __ww_mutex_wakeup_for_backoff(struct mutex *lock, struct ww_acquire_ctx *ww_ctx)
>>   		if (!cur->ww_ctx)
>>   			continue;
>>   
>> -		if (cur->ww_ctx->acquired > 0 &&
>> +		if (is_wait_die && cur->ww_ctx->acquired > 0 &&
>>   		    __ww_ctx_stamp_after(cur->ww_ctx, ww_ctx)) {
>>   			debug_mutex_wake_waiter(lock, cur);
>>   			wake_up_process(cur->task);
>>   		}
>>   
>> -		break;
>> +		if (is_wait_die || __ww_mutex_wound(lock, cur->ww_ctx, ww_ctx))
>> +			break;
>>   	}
>>   }
> I ended up with:
>
>
> static void __sched
> __ww_mutex_check_waiters(struct mutex *lock, struct ww_acquire_ctx *ww_ctx)
> {
> 	bool is_wait_die = ww_ctx->ww_class->is_wait_die;
> 	struct mutex_waiter *cur;
>
> 	lockdep_assert_held(&lock->wait_lock);
>
> 	list_for_each_entry(cur, &lock->wait_list, list) {
> 		if (!cur->ww_ctx)
> 			continue;
>
> 		if (is_wait_die) {
> 			/*
> 			 * Because __ww_mutex_add_waiter() and
> 			 * __ww_mutex_check_stamp() wake any but the earliest
> 			 * context, this can only affect the first waiter (with
> 			 * a context).
> 			 */
> 			if (cur->ww_ctx->acquired > 0 &&
> 			    __ww_ctx_stamp_after(cur->ww_ctx, ww_ctx)) {
> 				debug_mutex_wake_waiter(lock, cur);
> 				wake_up_process(cur->task);
> 			}
>
> 			break;
> 		}
>
> 		if (__ww_mutex_wound(lock, cur->ww_ctx, ww_ctx))
> 			break;
> 	}
> }

Looks OK to me.

>
> Currently you don't allow mixing WD and WW contexts (which is not
> immediately obvious from the above code), and the above hard relies on
> that. Are there sensible use cases for mixing them? IOW will your
> current restriction stand without hassle?

Contexts _must_ agree on the algorithm used to resolve deadlocks. With 
Wait-Die, for example, older transactions will wait if a lock is held by 
a younger transaction and with Wound-Wait, younger transactions will 
wait if a lock is held by an older transaction so there is no way of 
mixing them.

Thanks,

/Thomas


--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH v2 1/2] locking: Implement an algorithm choice for Wound-Wait mutexes
From: Thomas Hellstrom @ 2018-06-14 12:04 UTC (permalink / raw)
  To: Andrea Parri
  Cc: dri-devel, linux-kernel, Peter Zijlstra, Ingo Molnar,
	Jonathan Corbet, Gustavo Padovan, Maarten Lankhorst, Sean Paul,
	David Airlie, Davidlohr Bueso, Paul E. McKenney, Josh Triplett,
	Thomas Gleixner, Kate Stewart, Philippe Ombredanne,
	Greg Kroah-Hartman, linux-doc, linux-media, linaro-mm-sig
In-Reply-To: <20180614114944.GA18651@andrea>

On 06/14/2018 01:49 PM, Andrea Parri wrote:
> [...]
>
>>>> +		/*
>>>> +		 * wake_up_process() paired with set_current_state() inserts
>>>> +		 * sufficient barriers to make sure @owner either sees it's
>>>> +		 * wounded or has a wakeup pending to re-read the wounded
>>>> +		 * state.
>>> IIUC, "sufficient barriers" = full memory barriers (here).  (You may
>>> want to be more specific.)
>> Thanks for reviewing!
>> OK. What about if someone relaxes that in the future?
> This is actually one of my main concerns ;-)  as, IIUC, those barriers are
> not only sufficient but also necessary: anything "less than a full barrier"
> (in either wake_up_process() or set_current_state()) would _not_ guarantee
> the "condition" above unless I'm misunderstanding it.
>
> But am I misunderstanding it?  Which barriers/guarantee do you _need_ from
> the above mentioned pairing? (hence my comment...)
>
>    Andrea

No you are probably not misunderstanding me at all. My comment 
originated from the reading of the kerneldoc of set_current_state()

* set_current_state() includes a barrier so that the write of current->state
* is correctly serialised wrt the caller's subsequent test of whether to
* actually sleep:
*
* for (;;) {
* set_current_state(TASK_UNINTERRUPTIBLE);
* if (!need_sleep)
* break;
*
* schedule();
* }
* __set_current_state(TASK_RUNNING);
*
* If the caller does not need such serialisation (because, for instance, the
* condition test and condition change and wakeup are under the same 
lock) then
* use __set_current_state().
*
* The above is typically ordered against the wakeup, which does:
*
* need_sleep = false;
* wake_up_state(p, TASK_UNINTERRUPTIBLE);
*
* Where wake_up_state() (and all other wakeup primitives) imply enough
* barriers to order the store of the variable against wakeup. And with 
ctx->wounded := !need_sleep this exactly matches what's happening in my 
code. So what I was trying to say in the comment was that this above 
contract is sufficient to guarantee the "condition" above, whitout me 
actually knowing exactly what barriers are required. Thanks, Thomas

--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH v2 1/2] locking: Implement an algorithm choice for Wound-Wait mutexes
From: Thomas Hellstrom @ 2018-06-14 12:08 UTC (permalink / raw)
  To: Andrea Parri
  Cc: dri-devel, linux-kernel, Peter Zijlstra, Ingo Molnar,
	Jonathan Corbet, Gustavo Padovan, Maarten Lankhorst, Sean Paul,
	David Airlie, Davidlohr Bueso, Paul E. McKenney, Josh Triplett,
	Thomas Gleixner, Kate Stewart, Philippe Ombredanne,
	Greg Kroah-Hartman, linux-doc, linux-media, linaro-mm-sig
In-Reply-To: <20180614114944.GA18651@andrea>

Resending hopefully better formatted..


On 06/14/2018 01:49 PM, Andrea Parri wrote:
> [...]
>
>>>> +		/*
>>>> +		 * wake_up_process() paired with set_current_state() inserts
>>>> +		 * sufficient barriers to make sure @owner either sees it's
>>>> +		 * wounded or has a wakeup pending to re-read the wounded
>>>> +		 * state.
>>> IIUC, "sufficient barriers" = full memory barriers (here).  (You may
>>> want to be more specific.)
>> Thanks for reviewing!
>> OK. What about if someone relaxes that in the future?
> This is actually one of my main concerns ;-)  as, IIUC, those barriers are
> not only sufficient but also necessary: anything "less than a full barrier"
> (in either wake_up_process() or set_current_state()) would _not_ guarantee
> the "condition" above unless I'm misunderstanding it.
>
> But am I misunderstanding it?  Which barriers/guarantee do you _need_ from
> the above mentioned pairing? (hence my comment...)
>
>    Andrea

No you are probably not misunderstanding me at all. My comment 
originated from the reading of the kerneldoc of set_current_state()

/*
* set_current_state() includes a barrier so that the write of current->state
* is correctly serialised wrt the caller's subsequent test of whether to
* actually sleep:
*
* for (;;) {
* set_current_state(TASK_UNINTERRUPTIBLE);
* if (!need_sleep)
* break;
*
* schedule();
* }
* __set_current_state(TASK_RUNNING);
*
* If the caller does not need such serialisation (because, for instance, the
* condition test and condition change and wakeup are under the same 
lock) then
* use __set_current_state().
*
* The above is typically ordered against the wakeup, which does:
*
* need_sleep = false;
* wake_up_state(p, TASK_UNINTERRUPTIBLE);
*
* Where wake_up_state() (and all other wakeup primitives) imply enough
* barriers to order the store of the variable against wakeup.
---
*/

And with ctx->wounded := !need_sleep this exactly matches what's 
happening in my code. So what I was trying to say in the comment was 
that this above contract is sufficient to guarantee the "condition" 
above, whitout me actually knowing exactly what barriers are required. 
Thanks, Thomas

--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH v2 1/2] locking: Implement an algorithm choice for Wound-Wait mutexes
From: Peter Zijlstra @ 2018-06-14 12:41 UTC (permalink / raw)
  To: Thomas Hellstrom
  Cc: dri-devel, linux-kernel, Ingo Molnar, Jonathan Corbet,
	Gustavo Padovan, Maarten Lankhorst, Sean Paul, David Airlie,
	Davidlohr Bueso, Paul E. McKenney, Josh Triplett, Thomas Gleixner,
	Kate Stewart, Philippe Ombredanne, Greg Kroah-Hartman, linux-doc,
	linux-media, linaro-mm-sig
In-Reply-To: <20180614072922.8114-2-thellstrom@vmware.com>

On Thu, Jun 14, 2018 at 09:29:21AM +0200, Thomas Hellstrom wrote:
> +static bool __ww_mutex_wound(struct mutex *lock,
> +			     struct ww_acquire_ctx *ww_ctx,
> +			     struct ww_acquire_ctx *hold_ctx)
> +{
> +	struct task_struct *owner = __mutex_owner(lock);
> +
> +	lockdep_assert_held(&lock->wait_lock);
> +
> +	if (owner && hold_ctx && __ww_ctx_stamp_after(hold_ctx, ww_ctx) &&
> +	    ww_ctx->acquired > 0) {
> +		hold_ctx->wounded = 1;
> +
> +		/*
> +		 * wake_up_process() paired with set_current_state() inserts
> +		 * sufficient barriers to make sure @owner either sees it's
> +		 * wounded or has a wakeup pending to re-read the wounded
> +		 * state.
> +		 *
> +		 * The value of hold_ctx->wounded in
> +		 * __ww_mutex_lock_check_stamp();
> +		 */
> +		if (owner != current)
> +			wake_up_process(owner);
> +
> +		return true;
> +	}
> +
> +	return false;
> +}

> @@ -338,12 +377,18 @@ ww_mutex_set_context_fastpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
>  	 * and keep spinning, or it will acquire wait_lock, add itself
>  	 * to waiter list and sleep.
>  	 */
> -	smp_mb(); /* ^^^ */
> +	smp_mb(); /* See comments above and below. */
>  
>  	/*
> -	 * Check if lock is contended, if not there is nobody to wake up
> +	 * Check if lock is contended, if not there is nobody to wake up.
> +	 * We can use list_empty() unlocked here since it only compares a
> +	 * list_head field pointer to the address of the list head
> +	 * itself, similarly to how list_empty() can be considered RCU-safe.
> +	 * The memory barrier above pairs with the memory barrier in
> +	 * __ww_mutex_add_waiter and makes sure lock->ctx is visible before
> +	 * we check for waiters.
>  	 */
> -	if (likely(!(atomic_long_read(&lock->base.owner) & MUTEX_FLAG_WAITERS)))
> +	if (likely(list_empty(&lock->base.wait_list)))
>  		return;
>  

OK, so what happens is that if we see !empty list, we take wait_lock,
if we end up in __ww_mutex_wound() we must really have !empty wait-list.

It can however still see !owner because __mutex_unlock_slowpath() can
clear the owner field. But if owner is set, it must stay valid because
FLAG_WAITERS and we're holding wait_lock.

So the wake_up_process() is in fact safe.

Let me put that in a comment.
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH v2 1/2] locking: Implement an algorithm choice for Wound-Wait mutexes
From: Thomas Hellstrom @ 2018-06-14 12:48 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: dri-devel, linux-kernel, Ingo Molnar, Jonathan Corbet,
	Gustavo Padovan, Maarten Lankhorst, Sean Paul, David Airlie,
	Davidlohr Bueso, Paul E. McKenney, Josh Triplett, Thomas Gleixner,
	Kate Stewart, Philippe Ombredanne, Greg Kroah-Hartman, linux-doc,
	linux-media, linaro-mm-sig
In-Reply-To: <20180614124129.GA12198@hirez.programming.kicks-ass.net>

Hi, Peter,

On 06/14/2018 02:41 PM, Peter Zijlstra wrote:
> On Thu, Jun 14, 2018 at 09:29:21AM +0200, Thomas Hellstrom wrote:
>> +static bool __ww_mutex_wound(struct mutex *lock,
>> +			     struct ww_acquire_ctx *ww_ctx,
>> +			     struct ww_acquire_ctx *hold_ctx)
>> +{
>> +	struct task_struct *owner = __mutex_owner(lock);
>> +
>> +	lockdep_assert_held(&lock->wait_lock);
>> +
>> +	if (owner && hold_ctx && __ww_ctx_stamp_after(hold_ctx, ww_ctx) &&
>> +	    ww_ctx->acquired > 0) {
>> +		hold_ctx->wounded = 1;
>> +
>> +		/*
>> +		 * wake_up_process() paired with set_current_state() inserts
>> +		 * sufficient barriers to make sure @owner either sees it's
>> +		 * wounded or has a wakeup pending to re-read the wounded
>> +		 * state.
>> +		 *
>> +		 * The value of hold_ctx->wounded in
>> +		 * __ww_mutex_lock_check_stamp();
>> +		 */
>> +		if (owner != current)
>> +			wake_up_process(owner);
>> +
>> +		return true;
>> +	}
>> +
>> +	return false;
>> +}
>> @@ -338,12 +377,18 @@ ww_mutex_set_context_fastpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
>>   	 * and keep spinning, or it will acquire wait_lock, add itself
>>   	 * to waiter list and sleep.
>>   	 */
>> -	smp_mb(); /* ^^^ */
>> +	smp_mb(); /* See comments above and below. */
>>   
>>   	/*
>> -	 * Check if lock is contended, if not there is nobody to wake up
>> +	 * Check if lock is contended, if not there is nobody to wake up.
>> +	 * We can use list_empty() unlocked here since it only compares a
>> +	 * list_head field pointer to the address of the list head
>> +	 * itself, similarly to how list_empty() can be considered RCU-safe.
>> +	 * The memory barrier above pairs with the memory barrier in
>> +	 * __ww_mutex_add_waiter and makes sure lock->ctx is visible before
>> +	 * we check for waiters.
>>   	 */
>> -	if (likely(!(atomic_long_read(&lock->base.owner) & MUTEX_FLAG_WAITERS)))
>> +	if (likely(list_empty(&lock->base.wait_list)))
>>   		return;
>>   
> OK, so what happens is that if we see !empty list, we take wait_lock,
> if we end up in __ww_mutex_wound() we must really have !empty wait-list.
>
> It can however still see !owner because __mutex_unlock_slowpath() can
> clear the owner field. But if owner is set, it must stay valid because
> FLAG_WAITERS and we're holding wait_lock.

If __ww_mutex_wound() is called from ww_mutex_set_context_fastpath() 
owner is the current process so we can never see !owner. However if 
__ww_mutex_wound() is called from __ww_mutex_add_waiter() then the above 
is true.

>
> So the wake_up_process() is in fact safe.
>
> Let me put that in a comment.


Thanks,

Thomas


--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH v2 1/2] locking: Implement an algorithm choice for Wound-Wait mutexes
From: Thomas Hellstrom @ 2018-06-14 13:18 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: dri-devel, linux-kernel, Ingo Molnar, Jonathan Corbet,
	Gustavo Padovan, Maarten Lankhorst, Sean Paul, David Airlie,
	Davidlohr Bueso, Paul E. McKenney, Josh Triplett, Thomas Gleixner,
	Kate Stewart, Philippe Ombredanne, Greg Kroah-Hartman, linux-doc,
	linux-media, linaro-mm-sig
In-Reply-To: <3d73590a-13de-9164-4b32-9d7da6a1055b@vmware.com>

On 06/14/2018 02:48 PM, Thomas Hellstrom wrote:
> Hi, Peter,
>
> On 06/14/2018 02:41 PM, Peter Zijlstra wrote:
>> On Thu, Jun 14, 2018 at 09:29:21AM +0200, Thomas Hellstrom wrote:
>>> +static bool __ww_mutex_wound(struct mutex *lock,
>>> +                 struct ww_acquire_ctx *ww_ctx,
>>> +                 struct ww_acquire_ctx *hold_ctx)
>>> +{
>>> +    struct task_struct *owner = __mutex_owner(lock);
>>> +
>>> +    lockdep_assert_held(&lock->wait_lock);
>>> +
>>> +    if (owner && hold_ctx && __ww_ctx_stamp_after(hold_ctx, ww_ctx) &&
>>> +        ww_ctx->acquired > 0) {
>>> +        hold_ctx->wounded = 1;
>>> +
>>> +        /*
>>> +         * wake_up_process() paired with set_current_state() inserts
>>> +         * sufficient barriers to make sure @owner either sees it's
>>> +         * wounded or has a wakeup pending to re-read the wounded
>>> +         * state.
>>> +         *
>>> +         * The value of hold_ctx->wounded in
>>> +         * __ww_mutex_lock_check_stamp();
>>> +         */
>>> +        if (owner != current)
>>> +            wake_up_process(owner);
>>> +
>>> +        return true;
>>> +    }
>>> +
>>> +    return false;
>>> +}
>>> @@ -338,12 +377,18 @@ ww_mutex_set_context_fastpath(struct ww_mutex 
>>> *lock, struct ww_acquire_ctx *ctx)
>>>        * and keep spinning, or it will acquire wait_lock, add itself
>>>        * to waiter list and sleep.
>>>        */
>>> -    smp_mb(); /* ^^^ */
>>> +    smp_mb(); /* See comments above and below. */
>>>         /*
>>> -     * Check if lock is contended, if not there is nobody to wake up
>>> +     * Check if lock is contended, if not there is nobody to wake up.
>>> +     * We can use list_empty() unlocked here since it only compares a
>>> +     * list_head field pointer to the address of the list head
>>> +     * itself, similarly to how list_empty() can be considered 
>>> RCU-safe.
>>> +     * The memory barrier above pairs with the memory barrier in
>>> +     * __ww_mutex_add_waiter and makes sure lock->ctx is visible 
>>> before
>>> +     * we check for waiters.
>>>        */
>>> -    if (likely(!(atomic_long_read(&lock->base.owner) & 
>>> MUTEX_FLAG_WAITERS)))
>>> +    if (likely(list_empty(&lock->base.wait_list)))
>>>           return;
>> OK, so what happens is that if we see !empty list, we take wait_lock,
>> if we end up in __ww_mutex_wound() we must really have !empty wait-list.
>>
>> It can however still see !owner because __mutex_unlock_slowpath() can
>> clear the owner field. But if owner is set, it must stay valid because
>> FLAG_WAITERS and we're holding wait_lock.
>
> If __ww_mutex_wound() is called from ww_mutex_set_context_fastpath() 
> owner is the current process so we can never see !owner. However if 
> __ww_mutex_wound() is called from __ww_mutex_add_waiter() then the 
> above is true.

Or actually it was intended to be true, but FLAG_WAITERS is set too 
late. It needs to be moved to just after we actually add the waiter to 
the list.

Then the hunk that replaces a FLAG_WAITERS read with a lockless 
list_empty() can also be ditched.

/Thomas


>
>>
>> So the wake_up_process() is in fact safe.
>>
>> Let me put that in a comment.
>
>
> Thanks,
>
> Thomas
>
>

--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH v2 1/2] locking: Implement an algorithm choice for Wound-Wait mutexes
From: Matthew Wilcox @ 2018-06-14 13:29 UTC (permalink / raw)
  To: Thomas Hellstrom
  Cc: Peter Zijlstra, dri-devel, linux-kernel, Ingo Molnar,
	Jonathan Corbet, Gustavo Padovan, Maarten Lankhorst, Sean Paul,
	David Airlie, Davidlohr Bueso, Paul E. McKenney, Josh Triplett,
	Thomas Gleixner, Kate Stewart, Philippe Ombredanne,
	Greg Kroah-Hartman, linux-doc, linux-media, linaro-mm-sig
In-Reply-To: <7eb10c22-57b3-1472-0a77-7f787f612217@vmware.com>

On Thu, Jun 14, 2018 at 01:54:15PM +0200, Thomas Hellstrom wrote:
> On 06/14/2018 01:36 PM, Peter Zijlstra wrote:
> > Currently you don't allow mixing WD and WW contexts (which is not
> > immediately obvious from the above code), and the above hard relies on
> > that. Are there sensible use cases for mixing them? IOW will your
> > current restriction stand without hassle?
> 
> Contexts _must_ agree on the algorithm used to resolve deadlocks. With
> Wait-Die, for example, older transactions will wait if a lock is held by a
> younger transaction and with Wound-Wait, younger transactions will wait if a
> lock is held by an older transaction so there is no way of mixing them.

Maybe the compiler should be enforcing that; ie make it a different type?
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH v2 1/2] locking: Implement an algorithm choice for Wound-Wait mutexes
From: Thomas Hellstrom @ 2018-06-14 13:43 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Peter Zijlstra, dri-devel, linux-kernel, Ingo Molnar,
	Jonathan Corbet, Gustavo Padovan, Maarten Lankhorst, Sean Paul,
	David Airlie, Davidlohr Bueso, Paul E. McKenney, Josh Triplett,
	Thomas Gleixner, Kate Stewart, Philippe Ombredanne,
	Greg Kroah-Hartman, linux-doc, linux-media, linaro-mm-sig
In-Reply-To: <20180614132905.GA7841@bombadil.infradead.org>

On 06/14/2018 03:29 PM, Matthew Wilcox wrote:
> On Thu, Jun 14, 2018 at 01:54:15PM +0200, Thomas Hellstrom wrote:
>> On 06/14/2018 01:36 PM, Peter Zijlstra wrote:
>>> Currently you don't allow mixing WD and WW contexts (which is not
>>> immediately obvious from the above code), and the above hard relies on
>>> that. Are there sensible use cases for mixing them? IOW will your
>>> current restriction stand without hassle?
>> Contexts _must_ agree on the algorithm used to resolve deadlocks. With
>> Wait-Die, for example, older transactions will wait if a lock is held by a
>> younger transaction and with Wound-Wait, younger transactions will wait if a
>> lock is held by an older transaction so there is no way of mixing them.
> Maybe the compiler should be enforcing that; ie make it a different type?

It's intended to be enforced by storing the algorithm choice in the 
WW_MUTEX_CLASS which must be common for an acquire context and the 
ww_mutexes it acquires. However, I don't think there is a check that 
that holds. I guess we could add it as a DEBUG_MUTEX test in 
ww_mutex_lock().

Thanks,

Thomas


--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH v2 09/11] docs: Fix some broken references
From: Mauro Carvalho Chehab @ 2018-06-14 14:23 UTC (permalink / raw)
  To: Andrea Parri
  Cc: Alan Stern, Linux Doc Mailing List, Mauro Carvalho Chehab,
	linux-kernel, Jonathan Corbet
In-Reply-To: <Pine.LNX.4.44L0.1805091509100.1391-100000@iolanthe.rowland.org>

Em Wed, 9 May 2018 15:11:07 -0400 (EDT)
Alan Stern <stern@rowland.harvard.edu> escreveu:

> On Wed, 9 May 2018, Mauro Carvalho Chehab wrote:
> 
> > Em Wed, 9 May 2018 19:15:01 +0200
> > Andrea Parri <parri.andrea@gmail.com> escreveu:  
> 
> > > >  tools/memory-model/README                     | 10 +++++-----    
> > > 
> > > As mentioned in the previous thread, I am for keeping the current
> > > references: the REAMDE is listing the doc files, as well as other
> > > files in tools/memory-model/, relatively to that directory.  
> > 
> > Yeah, at least this hunk deserves some rework, as now some
> > references are Documentation/.../foo, while others are just
> > bar.
> > 
> > As on (almost) all other places (except for tools/memory-model/README),
> > the references are always from the main directory, I would make all
> > patches there also relative to main dir. If you're afraid of
> > not being too clearer, we could prefix all of them with something
> > like:
> > 
> > 	${LINUX}/tools/memory-model/...
> > 
> > just like some DT binding files do:
> > 
> > 	Documentation/devicetree/bindings/sound/audio-graph-card.txt:see ${LINUX}/Documentation/devicetree/bindings/graph.txt
> > 
> > A bonus of doing that is that the broken reference detect script can
> > keep parsing it without changes (well, it wouldn't be hard to make
> > it also accept a relative file, but doing that just due to 
> > tools/memory-model/README seems overkill).
> > 
> > Another advantage is that it would allow to easily add references
> > there from the main kernel Documentation, if needed in the future,
> > without messing with local x non-local relative namespace.  
> 
> How about changing the relative references so that something like
> Documentation/recipes.txt becomes ./Documentation/recipes.txt?

Thanks,
Mauro

After thinking a little bit about that, it should be easy to fix the
tool to consider references relative to tools/.*/Documentation.

I intend to send a new patch series with the changes to 
tools/memory-model/README removed from this patch and with the enclosed
patch on it.

Regards,
Mauro

[PATCH] scripts/documentation-file-ref-check: check
 tools/*/Documentation

Some files, like tools/memory-model/README has references to
a Documentation file that is locale to it. Handle references
that are relative to them too.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>

diff --git a/scripts/documentation-file-ref-check b/scripts/documentation-file-ref-check
index 047f463cdf4b..078999a3fdff 100755
--- a/scripts/documentation-file-ref-check
+++ b/scripts/documentation-file-ref-check
@@ -78,6 +78,13 @@ while (<IN>) {
 		# Check if exists, evaluating wildcards
 		next if (grep -e, glob("$ref $fulref"));
 
+		# Accept relative Documentation patches for tools/
+		if ($f =~ m/tools/) {
+			my $path = $f;
+			$path =~ s,(.*)/.*,$1,;
+			next if (grep -e, glob("$path/$ref $path/$fulref"));
+		}
+
 		if ($fix) {
 			if (!($ref =~ m/(scripts|Kconfig|Kbuild)/)) {
 				$broken_ref{$ref}++;
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [PATCH 4.9 19/30] serial: sh-sci: Stop using printk format %pCr
From: Greg Kroah-Hartman @ 2018-06-14 14:05 UTC (permalink / raw)
  To: linux-kernel, Jia-Ju Bai, Jonathan Corbet, Michael Turquette,
	Stephen Boyd, Zhang Rui, Eduardo Valentin, Eric Anholt,
	Stefan Wahren
  Cc: Greg Kroah-Hartman, stable, Sergey Senozhatsky, Petr Mladek,
	Linus Torvalds, Steven Rostedt, linux-doc, linux-clk, linux-pm,
	linux-serial, linux-arm-kernel, linux-renesas-soc,
	Geert Uytterhoeven
In-Reply-To: <20180614132600.255515394@linuxfoundation.org>

4.9-stable review patch.  If anyone has any objections, please let me know.

------------------

From: Geert Uytterhoeven <geert+renesas@glider.be>

commit d63c16f8e1ab761775275adcf54f4bef7c330295 upstream.

Printk format "%pCr" will be removed soon, as clk_get_rate() must not be
called in atomic context.

Replace it by open-coding the operation.  This is safe here, as the code
runs in task context.

Link: http://lkml.kernel.org/r/1527845302-12159-4-git-send-email-geert+renesas@glider.be
To: Jia-Ju Bai <baijiaju1990@gmail.com>
To: Jonathan Corbet <corbet@lwn.net>
To: Michael Turquette <mturquette@baylibre.com>
To: Stephen Boyd <sboyd@kernel.org>
To: Zhang Rui <rui.zhang@intel.com>
To: Eduardo Valentin <edubezval@gmail.com>
To: Eric Anholt <eric@anholt.net>
To: Stefan Wahren <stefan.wahren@i2se.com>
To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: linux-doc@vger.kernel.org
Cc: linux-clk@vger.kernel.org
Cc: linux-pm@vger.kernel.org
Cc: linux-serial@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-renesas-soc@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: Geert Uytterhoeven <geert+renesas@glider.be>
Cc: stable@vger.kernel.org # 4.5+
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
 drivers/tty/serial/sh-sci.c |    4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

--- a/drivers/tty/serial/sh-sci.c
+++ b/drivers/tty/serial/sh-sci.c
@@ -2626,8 +2626,8 @@ found:
 			dev_dbg(dev, "failed to get %s (%ld)\n", clk_names[i],
 				PTR_ERR(clk));
 		else
-			dev_dbg(dev, "clk %s is %pC rate %pCr\n", clk_names[i],
-				clk, clk);
+			dev_dbg(dev, "clk %s is %pC rate %lu\n", clk_names[i],
+				clk, clk_get_rate(clk));
 		sci_port->clks[i] = IS_ERR(clk) ? NULL : clk;
 	}
 	return 0;


--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* [PATCH 4.14 18/36] serial: sh-sci: Stop using printk format %pCr
From: Greg Kroah-Hartman @ 2018-06-14 14:04 UTC (permalink / raw)
  To: linux-kernel, Jia-Ju Bai, Jonathan Corbet, Michael Turquette,
	Stephen Boyd, Zhang Rui, Eduardo Valentin, Eric Anholt,
	Stefan Wahren
  Cc: Greg Kroah-Hartman, stable, Sergey Senozhatsky, Petr Mladek,
	Linus Torvalds, Steven Rostedt, linux-doc, linux-clk, linux-pm,
	linux-serial, linux-arm-kernel, linux-renesas-soc,
	Geert Uytterhoeven
In-Reply-To: <20180614132157.333004166@linuxfoundation.org>

4.14-stable review patch.  If anyone has any objections, please let me know.

------------------

From: Geert Uytterhoeven <geert+renesas@glider.be>

commit d63c16f8e1ab761775275adcf54f4bef7c330295 upstream.

Printk format "%pCr" will be removed soon, as clk_get_rate() must not be
called in atomic context.

Replace it by open-coding the operation.  This is safe here, as the code
runs in task context.

Link: http://lkml.kernel.org/r/1527845302-12159-4-git-send-email-geert+renesas@glider.be
To: Jia-Ju Bai <baijiaju1990@gmail.com>
To: Jonathan Corbet <corbet@lwn.net>
To: Michael Turquette <mturquette@baylibre.com>
To: Stephen Boyd <sboyd@kernel.org>
To: Zhang Rui <rui.zhang@intel.com>
To: Eduardo Valentin <edubezval@gmail.com>
To: Eric Anholt <eric@anholt.net>
To: Stefan Wahren <stefan.wahren@i2se.com>
To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: linux-doc@vger.kernel.org
Cc: linux-clk@vger.kernel.org
Cc: linux-pm@vger.kernel.org
Cc: linux-serial@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-renesas-soc@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: Geert Uytterhoeven <geert+renesas@glider.be>
Cc: stable@vger.kernel.org # 4.5+
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
 drivers/tty/serial/sh-sci.c |    4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

--- a/drivers/tty/serial/sh-sci.c
+++ b/drivers/tty/serial/sh-sci.c
@@ -2669,8 +2669,8 @@ found:
 			dev_dbg(dev, "failed to get %s (%ld)\n", clk_names[i],
 				PTR_ERR(clk));
 		else
-			dev_dbg(dev, "clk %s is %pC rate %pCr\n", clk_names[i],
-				clk, clk);
+			dev_dbg(dev, "clk %s is %pC rate %lu\n", clk_names[i],
+				clk, clk_get_rate(clk));
 		sci_port->clks[i] = IS_ERR(clk) ? NULL : clk;
 	}
 	return 0;


--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH 1/2] locking: Implement an algorithm choice for Wound-Wait mutexes
From: Peter Zijlstra @ 2018-06-14 14:42 UTC (permalink / raw)
  To: Thomas Hellstrom
  Cc: dri-devel, linux-kernel, Ingo Molnar, Jonathan Corbet,
	Gustavo Padovan, Maarten Lankhorst, Sean Paul, David Airlie,
	Davidlohr Bueso, Paul E. McKenney, Josh Triplett, Thomas Gleixner,
	Kate Stewart, Philippe Ombredanne, Greg Kroah-Hartman, linux-doc,
	linux-media, linaro-mm-sig
In-Reply-To: <dd0c5e50-ac14-912c-d31c-c2341fdd2864@vmware.com>

On Thu, Jun 14, 2018 at 01:48:39PM +0200, Thomas Hellstrom wrote:
> The literature makes a distinction between "killed" and "wounded". In our
> context, "Killed" is when a transaction actually receives an -EDEADLK and
> needs to back off. "Wounded" is when someone (typically another transaction)
> requests a transaction to kill itself. A wound will often, but not always,
> lead to a kill. If the wounded transaction has finished its locking
> sequence, or has the opportunity to grab uncontended ww mutexes or steal
> contended (non-handoff) ww mutexes to finish its transaction it will do so
> and never kill itself.

Hopefully I got it all right this time; I folded your patch in and
mucked around with it a bit, but haven't done anything except compile
it.

I left the context/transaction thing because well, that's what we called
the thing.


diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h
index 39fda195bf78..50ef5a10cfa0 100644
--- a/include/linux/ww_mutex.h
+++ b/include/linux/ww_mutex.h
@@ -8,6 +8,8 @@
  *
  * Wound/wait implementation:
  *  Copyright (C) 2013 Canonical Ltd.
+ * Choice of algorithm:
+ *  Copyright (C) 2018 WMWare Inc.
  *
  * This file contains the main data structure and API definitions.
  */
@@ -23,14 +25,17 @@ struct ww_class {
 	struct lock_class_key mutex_key;
 	const char *acquire_name;
 	const char *mutex_name;
+	unsigned int is_wait_die;
 };
 
 struct ww_acquire_ctx {
 	struct task_struct *task;
 	unsigned long stamp;
-	unsigned acquired;
+	unsigned int acquired;
+	unsigned short wounded;
+	unsigned short is_wait_die;
 #ifdef CONFIG_DEBUG_MUTEXES
-	unsigned done_acquire;
+	unsigned int done_acquire;
 	struct ww_class *ww_class;
 	struct ww_mutex *contending_lock;
 #endif
@@ -38,8 +43,8 @@ struct ww_acquire_ctx {
 	struct lockdep_map dep_map;
 #endif
 #ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH
-	unsigned deadlock_inject_interval;
-	unsigned deadlock_inject_countdown;
+	unsigned int deadlock_inject_interval;
+	unsigned int deadlock_inject_countdown;
 #endif
 };
 
@@ -58,17 +63,21 @@ struct ww_mutex {
 # define __WW_CLASS_MUTEX_INITIALIZER(lockname, class)
 #endif
 
-#define __WW_CLASS_INITIALIZER(ww_class) \
+#define __WW_CLASS_INITIALIZER(ww_class, _is_wait_die)	    \
 		{ .stamp = ATOMIC_LONG_INIT(0) \
 		, .acquire_name = #ww_class "_acquire" \
-		, .mutex_name = #ww_class "_mutex" }
+		, .mutex_name = #ww_class "_mutex" \
+		, .is_wait_die = _is_wait_die }
 
 #define __WW_MUTEX_INITIALIZER(lockname, class) \
 		{ .base =  __MUTEX_INITIALIZER(lockname.base) \
 		__WW_CLASS_MUTEX_INITIALIZER(lockname, class) }
 
+#define DEFINE_WD_CLASS(classname) \
+	struct ww_class classname = __WW_CLASS_INITIALIZER(classname, 1)
+
 #define DEFINE_WW_CLASS(classname) \
-	struct ww_class classname = __WW_CLASS_INITIALIZER(classname)
+	struct ww_class classname = __WW_CLASS_INITIALIZER(classname, 0)
 
 #define DEFINE_WW_MUTEX(mutexname, ww_class) \
 	struct ww_mutex mutexname = __WW_MUTEX_INITIALIZER(mutexname, ww_class)
@@ -123,6 +132,8 @@ static inline void ww_acquire_init(struct ww_acquire_ctx *ctx,
 	ctx->task = current;
 	ctx->stamp = atomic_long_inc_return_relaxed(&ww_class->stamp);
 	ctx->acquired = 0;
+	ctx->wounded = false;
+	ctx->is_wait_die = ww_class->is_wait_die;
 #ifdef CONFIG_DEBUG_MUTEXES
 	ctx->ww_class = ww_class;
 	ctx->done_acquire = 0;
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index f44f658ae629..9e244af4647d 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -244,6 +244,22 @@ void __sched mutex_lock(struct mutex *lock)
 EXPORT_SYMBOL(mutex_lock);
 #endif
 
+/*
+ * Wait-Die:
+ *   The newer transactions are killed when:
+ *     It (the new transaction) makes a request for a lock being held
+ *     by an older transaction.
+ *
+ * Wound-Wait:
+ *   The newer transactions are wounded when:
+ *     An older transaction makes a request for a lock being held by
+ *     the newer transaction.
+ */
+
+/*
+ * Associate the ww_mutex @ww with the context @ww_ctx under which we acquired
+ * it.
+ */
 static __always_inline void
 ww_mutex_lock_acquired(struct ww_mutex *ww, struct ww_acquire_ctx *ww_ctx)
 {
@@ -282,26 +298,96 @@ ww_mutex_lock_acquired(struct ww_mutex *ww, struct ww_acquire_ctx *ww_ctx)
 	DEBUG_LOCKS_WARN_ON(ww_ctx->ww_class != ww->ww_class);
 #endif
 	ww_ctx->acquired++;
+	ww->ctx = ww_ctx;
 }
 
+/*
+ * Determine if context @a is 'after' context @b. IOW, @a should be wounded in
+ * favour of @b.
+ */
 static inline bool __sched
 __ww_ctx_stamp_after(struct ww_acquire_ctx *a, struct ww_acquire_ctx *b)
 {
-	return a->stamp - b->stamp <= LONG_MAX &&
-	       (a->stamp != b->stamp || a > b);
+
+	return (signed long)(a->stamp - b->stamp) > 0;
 }
 
 /*
- * Wake up any waiters that may have to back off when the lock is held by the
- * given context.
+ * Wait-Die; wake a younger waiter context (when locks held) such that it can die.
  *
- * Due to the invariants on the wait list, this can only affect the first
- * waiter with a context.
+ * Among waiters with context, only the first one can have other locks acquired
+ * already (ctx->acquired > 0), because __ww_mutex_add_waiter() and
+ * __ww_mutex_check_wound() wake any but the earliest context.
+ */
+static bool __ww_mutex_die(struct mutex *lock, struct mutex_waiter *waiter,
+		           struct ww_acquire_ctx *ww_ctx)
+{
+	if (!ww_ctx->is_wait_die)
+		return false;
+
+	if (waiter->ww_ctx->acquired > 0 &&
+			__ww_ctx_stamp_after(waiter->ww_ctx, ww_ctx)) {
+		debug_mutex_wake_waiter(lock, waiter);
+		wake_up_process(waiter->task);
+	}
+
+	return true;
+}
+
+/*
+ * Wound-Wait; wound a younger @hold_ctx (if it has locks held).
+ *
+ * XXX more; explain why we too only need to wake the first.
+ */
+static bool __ww_mutex_wound(struct mutex *lock,
+			     struct ww_acquire_ctx *ww_ctx,
+			     struct ww_acquire_ctx *hold_ctx)
+{
+	struct task_struct *owner = __mutex_owner(lock);
+
+	lockdep_assert_held(&lock->wait_lock);
+
+	/*
+	 * Possible through __ww_mutex_add_waiter() when we race with
+	 * ww_mutex_set_context_fastpath(). In that case we'll get here again
+	 * through __ww_mutex_check_waiters().
+	 */
+	if (!hold_ctx)
+		return false;
+
+	/*
+	 * Can have !owner because of __mutex_unlock_slowpath(), but if owner,
+	 * it cannot go away because we'll have FLAG_WAITERS set and hold
+	 * wait_lock.
+	 */
+	if (!owner)
+		return false;
+
+	if (ww_ctx->acquired > 0 && __ww_ctx_stamp_after(hold_ctx, ww_ctx)) {
+		hold_ctx->wounded = 1;
+		if (owner != current)
+			wake_up_process(owner);
+
+		return true;
+	}
+
+	return false;
+}
+
+/*
+ * We just acquired @lock under @ww_ctx, if there are later contexts waiting
+ * behind us on the wait-list, check if they need wounding/killing.
+ *
+ * See __ww_mutex_add_waiter() for the list-order construction; basically the
+ * list is ordered by stamp, smallest (oldest) first.
+ *
+ * This relies on never mixing wait-die/wound-wait on the same wait-list; which is
+ * currently ensured by that being a ww_class property.
  *
  * The current task must not be on the wait list.
  */
 static void __sched
-__ww_mutex_wakeup_for_backoff(struct mutex *lock, struct ww_acquire_ctx *ww_ctx)
+__ww_mutex_check_waiters(struct mutex *lock, struct ww_acquire_ctx *ww_ctx)
 {
 	struct mutex_waiter *cur;
 
@@ -311,66 +397,50 @@ __ww_mutex_wakeup_for_backoff(struct mutex *lock, struct ww_acquire_ctx *ww_ctx)
 		if (!cur->ww_ctx)
 			continue;
 
-		if (cur->ww_ctx->acquired > 0 &&
-		    __ww_ctx_stamp_after(cur->ww_ctx, ww_ctx)) {
-			debug_mutex_wake_waiter(lock, cur);
-			wake_up_process(cur->task);
-		}
-
-		break;
+		if (__ww_mutex_die(lock, cur, ww_ctx) ||
+		    __ww_mutex_wound(lock, cur->ww_ctx, ww_ctx))
+			break;
 	}
 }
 
 /*
- * After acquiring lock with fastpath or when we lost out in contested
- * slowpath, set ctx and wake up any waiters so they can recheck.
+ * After acquiring lock with fastpath, where we do not hold wait_lock, set ctx
+ * and wake up any waiters so they can recheck.
  */
 static __always_inline void
 ww_mutex_set_context_fastpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
 {
 	ww_mutex_lock_acquired(lock, ctx);
 
-	lock->ctx = ctx;
-
 	/*
 	 * The lock->ctx update should be visible on all cores before
-	 * the atomic read is done, otherwise contended waiters might be
+	 * the list_empty check is done, otherwise contended waiters might be
 	 * missed. The contended waiters will either see ww_ctx == NULL
 	 * and keep spinning, or it will acquire wait_lock, add itself
 	 * to waiter list and sleep.
 	 */
-	smp_mb(); /* ^^^ */
+	smp_mb(); /* See comments above and below. */
 
 	/*
-	 * Check if lock is contended, if not there is nobody to wake up
+	 * [W] ww->ctx = ctx	[W] list_add_tail()
+	 *     MB		    MB
+	 * [R] list_empty()	[R] ww->ctx
+	 *
+	 * The memory barrier above pairs with the memory barrier in
+	 * __ww_mutex_add_waiter() and makes sure we either observe ww->ctx
+	 * and/or !empty list.
 	 */
-	if (likely(!(atomic_long_read(&lock->base.owner) & MUTEX_FLAG_WAITERS)))
+	if (likely(list_empty(&lock->base.wait_list)))
 		return;
 
 	/*
-	 * Uh oh, we raced in fastpath, wake up everyone in this case,
-	 * so they can see the new lock->ctx.
+	 * Uh oh, we raced in fastpath, check if any of the waiters need wounding.
 	 */
 	spin_lock(&lock->base.wait_lock);
-	__ww_mutex_wakeup_for_backoff(&lock->base, ctx);
+	__ww_mutex_check_waiters(&lock->base, ctx);
 	spin_unlock(&lock->base.wait_lock);
 }
 
-/*
- * After acquiring lock in the slowpath set ctx.
- *
- * Unlike for the fast path, the caller ensures that waiters are woken up where
- * necessary.
- *
- * Callers must hold the mutex wait_lock.
- */
-static __always_inline void
-ww_mutex_set_context_slowpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
-{
-	ww_mutex_lock_acquired(lock, ctx);
-	lock->ctx = ctx;
-}
-
 #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
 
 static inline
@@ -646,37 +716,83 @@ void __sched ww_mutex_unlock(struct ww_mutex *lock)
 }
 EXPORT_SYMBOL(ww_mutex_unlock);
 
+
+static __always_inline int __sched
+__ww_mutex_kill(struct mutex *lock, struct ww_acquire_ctx *ww_ctx)
+{
+	if (ww_ctx->acquired > 0) {
+#ifdef CONFIG_DEBUG_MUTEXES
+		struct ww_mutex *ww;
+
+		ww = container_of(lock, struct ww_mutex, base);
+		DEBUG_LOCKS_WARN_ON(ww_ctx->contending_lock);
+		ww_ctx->contending_lock = ww;
+#endif
+		return -EDEADLK;
+	}
+
+	return 0;
+}
+
+
+/*
+ * Check the wound condition for the current lock acquire.
+ *
+ * Wound-Wait: If we're wounded, kill ourself.
+ *
+ * Wait-Die: If we're trying to acquire a lock already held by an older
+ *           context, kill ourselves.
+ *
+ * Since __ww_mutex_add_waiter() orders the wait-list on stamp, we only have to
+ * look at waiters before us in the wait-list.
+ */
 static inline int __sched
-__ww_mutex_lock_check_stamp(struct mutex *lock, struct mutex_waiter *waiter,
+__ww_mutex_check_wound(struct mutex *lock, struct mutex_waiter *waiter,
 			    struct ww_acquire_ctx *ctx)
 {
 	struct ww_mutex *ww = container_of(lock, struct ww_mutex, base);
 	struct ww_acquire_ctx *hold_ctx = READ_ONCE(ww->ctx);
 	struct mutex_waiter *cur;
 
+	if (ctx->acquired == 0)
+		return 0;
+
+	if (!ctx->is_wait_die) {
+		if (ctx->wounded)
+			return __ww_mutex_kill(lock, ctx);
+
+		return 0;
+	}
+
 	if (hold_ctx && __ww_ctx_stamp_after(ctx, hold_ctx))
-		goto deadlock;
+		return __ww_mutex_kill(lock, ctx);
 
 	/*
 	 * If there is a waiter in front of us that has a context, then its
-	 * stamp is earlier than ours and we must back off.
+	 * stamp is earlier than ours and we must wound ourself.
 	 */
 	cur = waiter;
 	list_for_each_entry_continue_reverse(cur, &lock->wait_list, list) {
-		if (cur->ww_ctx)
-			goto deadlock;
+		if (!cur->ww_ctx)
+			continue;
+
+		return __ww_mutex_kill(lock, ctx);
 	}
 
 	return 0;
-
-deadlock:
-#ifdef CONFIG_DEBUG_MUTEXES
-	DEBUG_LOCKS_WARN_ON(ctx->contending_lock);
-	ctx->contending_lock = ww;
-#endif
-	return -EDEADLK;
 }
 
+/*
+ * Add @waiter to the wait-list, keep the wait-list ordered by stamp, smallest
+ * first. Such that older contexts are preferred to acquire the lock over
+ * younger contexts.
+ *
+ * Waiters without context are interspersed in FIFO order.
+ *
+ * Furthermore, for Wait-Die kill ourself immediately when possible (there are
+ * older contexts already waiting) to avoid unnecessary waiting and for
+ * Wound-Wait ensure we wound the owning context when it is younger.
+ */
 static inline int __sched
 __ww_mutex_add_waiter(struct mutex_waiter *waiter,
 		      struct mutex *lock,
@@ -684,16 +800,21 @@ __ww_mutex_add_waiter(struct mutex_waiter *waiter,
 {
 	struct mutex_waiter *cur;
 	struct list_head *pos;
+	bool is_wait_die;
 
 	if (!ww_ctx) {
 		list_add_tail(&waiter->list, &lock->wait_list);
 		return 0;
 	}
 
+	is_wait_die = ww_ctx->is_wait_die;
+
 	/*
 	 * Add the waiter before the first waiter with a higher stamp.
 	 * Waiters without a context are skipped to avoid starving
-	 * them.
+	 * them. Wait-Die waiters may back off here. Wound-Wait waiters
+	 * never back off here, but they are sorted in stamp order and
+	 * may wound the lock holder.
 	 */
 	pos = &lock->wait_list;
 	list_for_each_entry_reverse(cur, &lock->wait_list, list) {
@@ -701,16 +822,16 @@ __ww_mutex_add_waiter(struct mutex_waiter *waiter,
 			continue;
 
 		if (__ww_ctx_stamp_after(ww_ctx, cur->ww_ctx)) {
-			/* Back off immediately if necessary. */
-			if (ww_ctx->acquired > 0) {
-#ifdef CONFIG_DEBUG_MUTEXES
-				struct ww_mutex *ww;
-
-				ww = container_of(lock, struct ww_mutex, base);
-				DEBUG_LOCKS_WARN_ON(ww_ctx->contending_lock);
-				ww_ctx->contending_lock = ww;
-#endif
-				return -EDEADLK;
+			/*
+			 * Wait-Die: if we find an older context waiting, there
+			 * is no point in queueing behind it, as we'd have to
+			 * wound ourselves the moment it would acquire the
+			 * lock.
+			 */
+			if (is_wait_die) {
+				int ret = __ww_mutex_kill(lock, ww_ctx);
+				if (ret)
+					return ret;
 			}
 
 			break;
@@ -718,17 +839,29 @@ __ww_mutex_add_waiter(struct mutex_waiter *waiter,
 
 		pos = &cur->list;
 
+		/* Wait-Die: ensure younger waiters die. */
+		__ww_mutex_die(lock, cur, ww_ctx);
+	}
+
+	list_add_tail(&waiter->list, pos);
+
+	/*
+	 * Wound-Wait: if we're blocking on a mutex owned by a younger context,
+	 * wound that such that we might proceed.
+	 */
+	if (!is_wait_die) {
+		struct ww_mutex *ww = container_of(lock, struct ww_mutex, base);
+
 		/*
-		 * Wake up the waiter so that it gets a chance to back
-		 * off.
+		 * See ww_mutex_set_context_fastpath(). Orders the
+		 * list_add_tail() vs the ww->ctx load, such that either we
+		 * or the fastpath will wound @ww->ctx.
 		 */
-		if (cur->ww_ctx->acquired > 0) {
-			debug_mutex_wake_waiter(lock, cur);
-			wake_up_process(cur->task);
-		}
+		smp_mb();
+
+		__ww_mutex_wound(lock, ww_ctx, ww->ctx);
 	}
 
-	list_add_tail(&waiter->list, pos);
 	return 0;
 }
 
@@ -751,6 +884,14 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 	if (use_ww_ctx && ww_ctx) {
 		if (unlikely(ww_ctx == READ_ONCE(ww->ctx)))
 			return -EALREADY;
+
+		/*
+		 * Reset the wounded flag after a kill.  No other process can
+		 * race and wound us here since they can't have a valid owner
+		 * pointer at this time.
+		 */
+		if (ww_ctx->acquired == 0)
+			ww_ctx->wounded = 0;
 	}
 
 	preempt_disable();
@@ -772,7 +913,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 	 */
 	if (__mutex_trylock(lock)) {
 		if (use_ww_ctx && ww_ctx)
-			__ww_mutex_wakeup_for_backoff(lock, ww_ctx);
+			__ww_mutex_check_waiters(lock, ww_ctx);
 
 		goto skip_wait;
 	}
@@ -790,10 +931,10 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 		waiter.ww_ctx = MUTEX_POISON_WW_CTX;
 #endif
 	} else {
-		/* Add in stamp order, waking up waiters that must back off. */
+		/* Add in stamp order, waking up waiters that must wound themselves. */
 		ret = __ww_mutex_add_waiter(&waiter, lock, ww_ctx);
 		if (ret)
-			goto err_early_backoff;
+			goto err_early_kill;
 
 		waiter.ww_ctx = ww_ctx;
 	}
@@ -824,8 +965,8 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 			goto err;
 		}
 
-		if (use_ww_ctx && ww_ctx && ww_ctx->acquired > 0) {
-			ret = __ww_mutex_lock_check_stamp(lock, &waiter, ww_ctx);
+		if (use_ww_ctx && ww_ctx) {
+			ret = __ww_mutex_check_wound(lock, &waiter, ww_ctx);
 			if (ret)
 				goto err;
 		}
@@ -859,6 +1000,16 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 acquired:
 	__set_current_state(TASK_RUNNING);
 
+	if (use_ww_ctx && ww_ctx) {
+		/*
+		 * Wound-Wait; we stole the lock (!first_waiter), check the
+		 * waiters. This, together with XXX, ensures __ww_mutex_wound()
+		 * only needs to check the first waiter (with context).
+		 */
+		if (!ww_ctx->is_wait_die && !__mutex_waiter_is_first(lock, &waiter))
+			__ww_mutex_check_waiters(lock, ww_ctx);
+	}
+
 	mutex_remove_waiter(lock, &waiter, current);
 	if (likely(list_empty(&lock->wait_list)))
 		__mutex_clear_flag(lock, MUTEX_FLAGS);
@@ -870,7 +1021,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 	lock_acquired(&lock->dep_map, ip);
 
 	if (use_ww_ctx && ww_ctx)
-		ww_mutex_set_context_slowpath(ww, ww_ctx);
+		ww_mutex_lock_acquired(ww, ww_ctx);
 
 	spin_unlock(&lock->wait_lock);
 	preempt_enable();
@@ -879,7 +1030,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 err:
 	__set_current_state(TASK_RUNNING);
 	mutex_remove_waiter(lock, &waiter, current);
-err_early_backoff:
+err_early_kill:
 	spin_unlock(&lock->wait_lock);
 	debug_mutex_free_waiter(&waiter);
 	mutex_release(&lock->dep_map, 1, ip);
--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [PATCH 4.16 25/43] serial: sh-sci: Stop using printk format %pCr
From: Greg Kroah-Hartman @ 2018-06-14 14:04 UTC (permalink / raw)
  To: linux-kernel, Jia-Ju Bai, Jonathan Corbet, Michael Turquette,
	Stephen Boyd, Zhang Rui, Eduardo Valentin, Eric Anholt,
	Stefan Wahren
  Cc: Greg Kroah-Hartman, stable, Sergey Senozhatsky, Petr Mladek,
	Linus Torvalds, Steven Rostedt, linux-doc, linux-clk, linux-pm,
	linux-serial, linux-arm-kernel, linux-renesas-soc,
	Geert Uytterhoeven
In-Reply-To: <20180614132135.111973468@linuxfoundation.org>

4.16-stable review patch.  If anyone has any objections, please let me know.

------------------

From: Geert Uytterhoeven <geert+renesas@glider.be>

commit d63c16f8e1ab761775275adcf54f4bef7c330295 upstream.

Printk format "%pCr" will be removed soon, as clk_get_rate() must not be
called in atomic context.

Replace it by open-coding the operation.  This is safe here, as the code
runs in task context.

Link: http://lkml.kernel.org/r/1527845302-12159-4-git-send-email-geert+renesas@glider.be
To: Jia-Ju Bai <baijiaju1990@gmail.com>
To: Jonathan Corbet <corbet@lwn.net>
To: Michael Turquette <mturquette@baylibre.com>
To: Stephen Boyd <sboyd@kernel.org>
To: Zhang Rui <rui.zhang@intel.com>
To: Eduardo Valentin <edubezval@gmail.com>
To: Eric Anholt <eric@anholt.net>
To: Stefan Wahren <stefan.wahren@i2se.com>
To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: linux-doc@vger.kernel.org
Cc: linux-clk@vger.kernel.org
Cc: linux-pm@vger.kernel.org
Cc: linux-serial@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-renesas-soc@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: Geert Uytterhoeven <geert+renesas@glider.be>
Cc: stable@vger.kernel.org # 4.5+
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
 drivers/tty/serial/sh-sci.c |    4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

--- a/drivers/tty/serial/sh-sci.c
+++ b/drivers/tty/serial/sh-sci.c
@@ -2691,8 +2691,8 @@ found:
 			dev_dbg(dev, "failed to get %s (%ld)\n", clk_names[i],
 				PTR_ERR(clk));
 		else
-			dev_dbg(dev, "clk %s is %pC rate %pCr\n", clk_names[i],
-				clk, clk);
+			dev_dbg(dev, "clk %s is %pC rate %lu\n", clk_names[i],
+				clk, clk_get_rate(clk));
 		sci_port->clks[i] = IS_ERR(clk) ? NULL : clk;
 	}
 	return 0;


--
To unsubscribe from this list: send the line "unsubscribe linux-doc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox