LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net-next 15/15] crypto: caam: Replace in_irq() usage.
From: Sebastian Andrzej Siewior @ 2020-10-27 22:54 UTC (permalink / raw)
  To: netdev
  Cc: Aymen Sghaier, Madalin Bucur, Sebastian Andrzej Siewior,
	Zhu Yanjun, Samuel Chessman, Ping-Ke Shih, Herbert Xu,
	Horia Geantă, linux-rdma, Rain River, Kalle Valo,
	Ulrich Kunitz, Jouni Malinen, Daniel Drake, Jakub Kicinski,
	Thomas Gleixner, linux-arm-kernel, Leon Romanovsky, linuxppc-dev,
	linux-wireless, Li Yang, linux-crypto, Jon Mason, Saeed Mahameed,
	David S. Miller
In-Reply-To: <20201027225454.3492351-1-bigeasy@linutronix.de>

The driver uses in_irq() + in_serving_softirq() magic to decide if NAPI
scheduling is required or packet processing.

The usage of in_*() in drivers is phased out and Linus clearly requested
that code which changes behaviour depending on context should either be
seperated or the context be conveyed in an argument passed by the caller,
which usually knows the context.

Use the `napi' argument passed by the callback. It is set true if
called from the interrupt handler and NAPI should be scheduled.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: "Horia Geantă" <horia.geanta@nxp.com>
Cc: Aymen Sghaier <aymen.sghaier@nxp.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Madalin Bucur <madalin.bucur@nxp.com>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Li Yang <leoyang.li@nxp.com>
Cc: linux-crypto@vger.kernel.org
Cc: netdev@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Cc: linux-arm-kernel@lists.infradead.org
---
 drivers/crypto/caam/qi.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/drivers/crypto/caam/qi.c b/drivers/crypto/caam/qi.c
index 09ea398304c8b..79dbd90887f8a 100644
--- a/drivers/crypto/caam/qi.c
+++ b/drivers/crypto/caam/qi.c
@@ -545,14 +545,10 @@ static void cgr_cb(struct qman_portal *qm, struct qman_cgr *cgr, int congested)
 	}
 }
 
-static int caam_qi_napi_schedule(struct qman_portal *p, struct caam_napi *np)
+static int caam_qi_napi_schedule(struct qman_portal *p, struct caam_napi *np,
+				 bool napi)
 {
-	/*
-	 * In case of threaded ISR, for RT kernels in_irq() does not return
-	 * appropriate value, so use in_serving_softirq to distinguish between
-	 * softirq and irq contexts.
-	 */
-	if (unlikely(in_irq() || !in_serving_softirq())) {
+	if (napi) {
 		/* Disable QMan IRQ source and invoke NAPI */
 		qman_p_irqsource_remove(p, QM_PIRQ_DQRI);
 		np->p = p;
@@ -574,7 +570,7 @@ static enum qman_cb_dqrr_result caam_rsp_fq_dqrr_cb(struct qman_portal *p,
 	struct caam_drv_private *priv = dev_get_drvdata(qidev);
 	u32 status;
 
-	if (caam_qi_napi_schedule(p, caam_napi))
+	if (caam_qi_napi_schedule(p, caam_napi, napi))
 		return qman_cb_dqrr_stop;
 
 	fd = &dqrr->fd;
-- 
2.28.0


^ permalink raw reply related

* [PATCH net-next 08/15] net: airo: Replace in_atomic() usage.
From: Sebastian Andrzej Siewior @ 2020-10-27 22:54 UTC (permalink / raw)
  To: netdev
  Cc: Aymen Sghaier, Madalin Bucur, Sebastian Andrzej Siewior,
	Zhu Yanjun, Samuel Chessman, Ping-Ke Shih, Herbert Xu,
	Horia Geantă, linux-rdma, Rain River, Kalle Valo,
	Ulrich Kunitz, Jouni Malinen, Daniel Drake, Jakub Kicinski,
	Thomas Gleixner, linux-arm-kernel, Leon Romanovsky, linuxppc-dev,
	linux-wireless, Li Yang, linux-crypto, Jon Mason, Saeed Mahameed,
	David S. Miller
In-Reply-To: <20201027225454.3492351-1-bigeasy@linutronix.de>

issuecommand() is using in_atomic() to decide if it is safe to invoke
schedule() while waiting for the command to be accepted.

Usage of in_atomic() for this is only half correct as it can not detect all
condition where it is not allowed to schedule(). Also Linus clearly
requested that code which changes behaviour depending on context should
either be seperated or the context be conveyed in an argument passed by the
caller, which usually knows the context.

Add an may_sleep argument to issuecommand() indicating when it is save to
sleep and change schedule() to cond_resched() because it's pointless to
invoke schedule() if there is no request to reschedule.

Pass the may_sleep condition through the various call chains leading to
issuecommand().

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Kalle Valo <kvalo@codeaurora.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: linux-wireless@vger.kernel.org
Cc: netdev@vger.kernel.org
---
 drivers/net/wireless/cisco/airo.c | 86 +++++++++++++++++--------------
 1 file changed, 47 insertions(+), 39 deletions(-)

diff --git a/drivers/net/wireless/cisco/airo.c b/drivers/net/wireless/cisco/airo.c
index 369a6ca44d1ff..74acf9af2adb1 100644
--- a/drivers/net/wireless/cisco/airo.c
+++ b/drivers/net/wireless/cisco/airo.c
@@ -1115,7 +1115,8 @@ static int enable_MAC(struct airo_info *ai, int lock);
 static void disable_MAC(struct airo_info *ai, int lock);
 static void enable_interrupts(struct airo_info*);
 static void disable_interrupts(struct airo_info*);
-static u16 issuecommand(struct airo_info*, Cmd *pCmd, Resp *pRsp);
+static u16 issuecommand(struct airo_info*, Cmd *pCmd, Resp *pRsp,
+			bool may_sleep);
 static int bap_setup(struct airo_info*, u16 rid, u16 offset, int whichbap);
 static int aux_bap_read(struct airo_info*, __le16 *pu16Dst, int bytelen,
 			int whichbap);
@@ -1130,8 +1131,10 @@ static int PC4500_writerid(struct airo_info*, u16 rid, const void
 static int do_writerid(struct airo_info*, u16 rid, const void *rid_data,
 			int len, int dummy);
 static u16 transmit_allocate(struct airo_info*, int lenPayload, int raw);
-static int transmit_802_3_packet(struct airo_info*, int len, char *pPacket);
-static int transmit_802_11_packet(struct airo_info*, int len, char *pPacket);
+static int transmit_802_3_packet(struct airo_info*, int len, char *pPacket,
+				 bool may_sleep);
+static int transmit_802_11_packet(struct airo_info*, int len, char *pPacket,
+				  bool may_sleep);
 
 static int mpi_send_packet(struct net_device *dev);
 static void mpi_unmap_card(struct pci_dev *pci);
@@ -1753,7 +1756,7 @@ static int readBSSListRid(struct airo_info *ai, int first,
 		if (down_interruptible(&ai->sem))
 			return -ERESTARTSYS;
 		ai->list_bss_task = current;
-		issuecommand(ai, &cmd, &rsp);
+		issuecommand(ai, &cmd, &rsp, true);
 		up(&ai->sem);
 		/* Let the command take effect */
 		schedule_timeout_uninterruptible(3 * HZ);
@@ -2096,7 +2099,7 @@ static void get_tx_error(struct airo_info *ai, s32 fid)
 	}
 }
 
-static void airo_end_xmit(struct net_device *dev)
+static void airo_end_xmit(struct net_device *dev, bool may_sleep)
 {
 	u16 status;
 	int i;
@@ -2107,7 +2110,7 @@ static void airo_end_xmit(struct net_device *dev)
 
 	clear_bit(JOB_XMIT, &priv->jobs);
 	clear_bit(FLAG_PENDING_XMIT, &priv->flags);
-	status = transmit_802_3_packet (priv, fids[fid], skb->data);
+	status = transmit_802_3_packet(priv, fids[fid], skb->data, may_sleep);
 	up(&priv->sem);
 
 	i = 0;
@@ -2164,11 +2167,11 @@ static netdev_tx_t airo_start_xmit(struct sk_buff *skb,
 		set_bit(JOB_XMIT, &priv->jobs);
 		wake_up_interruptible(&priv->thr_wait);
 	} else
-		airo_end_xmit(dev);
+		airo_end_xmit(dev, false);
 	return NETDEV_TX_OK;
 }
 
-static void airo_end_xmit11(struct net_device *dev)
+static void airo_end_xmit11(struct net_device *dev, bool may_sleep)
 {
 	u16 status;
 	int i;
@@ -2179,7 +2182,7 @@ static void airo_end_xmit11(struct net_device *dev)
 
 	clear_bit(JOB_XMIT11, &priv->jobs);
 	clear_bit(FLAG_PENDING_XMIT11, &priv->flags);
-	status = transmit_802_11_packet (priv, fids[fid], skb->data);
+	status = transmit_802_11_packet(priv, fids[fid], skb->data, may_sleep);
 	up(&priv->sem);
 
 	i = MAX_FIDS / 2;
@@ -2243,7 +2246,7 @@ static netdev_tx_t airo_start_xmit11(struct sk_buff *skb,
 		set_bit(JOB_XMIT11, &priv->jobs);
 		wake_up_interruptible(&priv->thr_wait);
 	} else
-		airo_end_xmit11(dev);
+		airo_end_xmit11(dev, false);
 	return NETDEV_TX_OK;
 }
 
@@ -2293,7 +2296,7 @@ static struct net_device_stats *airo_get_stats(struct net_device *dev)
 	return &dev->stats;
 }
 
-static void airo_set_promisc(struct airo_info *ai)
+static void airo_set_promisc(struct airo_info *ai, bool may_sleep)
 {
 	Cmd cmd;
 	Resp rsp;
@@ -2302,7 +2305,7 @@ static void airo_set_promisc(struct airo_info *ai)
 	cmd.cmd = CMD_SETMODE;
 	clear_bit(JOB_PROMISC, &ai->jobs);
 	cmd.parm0=(ai->flags&IFF_PROMISC) ? PROMISC : NOPROMISC;
-	issuecommand(ai, &cmd, &rsp);
+	issuecommand(ai, &cmd, &rsp, may_sleep);
 	up(&ai->sem);
 }
 
@@ -2316,7 +2319,7 @@ static void airo_set_multicast_list(struct net_device *dev)
 			set_bit(JOB_PROMISC, &ai->jobs);
 			wake_up_interruptible(&ai->thr_wait);
 		} else
-			airo_set_promisc(ai);
+			airo_set_promisc(ai, false);
 	}
 
 	if ((dev->flags&IFF_ALLMULTI) || !netdev_mc_empty(dev)) {
@@ -2476,7 +2479,7 @@ static int mpi_init_descriptors (struct airo_info *ai)
 	cmd.parm0 = FID_RX;
 	cmd.parm1 = (ai->rxfids[0].card_ram_off - ai->pciaux);
 	cmd.parm2 = MPI_MAX_FIDS;
-	rc = issuecommand(ai, &cmd, &rsp);
+	rc = issuecommand(ai, &cmd, &rsp, true);
 	if (rc != SUCCESS) {
 		airo_print_err(ai->dev->name, "Couldn't allocate RX FID");
 		return rc;
@@ -2504,7 +2507,7 @@ static int mpi_init_descriptors (struct airo_info *ai)
 	}
 	ai->txfids[i-1].tx_desc.eoc = 1; /* Last descriptor has EOC set */
 
-	rc = issuecommand(ai, &cmd, &rsp);
+	rc = issuecommand(ai, &cmd, &rsp, true);
 	if (rc != SUCCESS) {
 		airo_print_err(ai->dev->name, "Couldn't allocate TX FID");
 		return rc;
@@ -2518,7 +2521,7 @@ static int mpi_init_descriptors (struct airo_info *ai)
 	cmd.parm0 = RID_RW;
 	cmd.parm1 = (ai->config_desc.card_ram_off - ai->pciaux);
 	cmd.parm2 = 1; /* Magic number... */
-	rc = issuecommand(ai, &cmd, &rsp);
+	rc = issuecommand(ai, &cmd, &rsp, true);
 	if (rc != SUCCESS) {
 		airo_print_err(ai->dev->name, "Couldn't allocate RID");
 		return rc;
@@ -3144,13 +3147,13 @@ static int airo_thread(void *data)
 		}
 
 		if (test_bit(JOB_XMIT, &ai->jobs))
-			airo_end_xmit(dev);
+			airo_end_xmit(dev, true);
 		else if (test_bit(JOB_XMIT11, &ai->jobs))
-			airo_end_xmit11(dev);
+			airo_end_xmit11(dev, true);
 		else if (test_bit(JOB_STATS, &ai->jobs))
 			airo_read_stats(dev);
 		else if (test_bit(JOB_PROMISC, &ai->jobs))
-			airo_set_promisc(ai);
+			airo_set_promisc(ai, true);
 		else if (test_bit(JOB_MIC, &ai->jobs))
 			micinit(ai);
 		else if (test_bit(JOB_EVENT, &ai->jobs))
@@ -3599,7 +3602,7 @@ static int enable_MAC(struct airo_info *ai, int lock)
 	if (!test_bit(FLAG_ENABLED, &ai->flags)) {
 		memset(&cmd, 0, sizeof(cmd));
 		cmd.cmd = MAC_ENABLE;
-		rc = issuecommand(ai, &cmd, &rsp);
+		rc = issuecommand(ai, &cmd, &rsp, true);
 		if (rc == SUCCESS)
 			set_bit(FLAG_ENABLED, &ai->flags);
 	} else
@@ -3631,7 +3634,7 @@ static void disable_MAC(struct airo_info *ai, int lock)
 			netif_carrier_off(ai->dev);
 		memset(&cmd, 0, sizeof(cmd));
 		cmd.cmd = MAC_DISABLE; // disable in case already enabled
-		issuecommand(ai, &cmd, &rsp);
+		issuecommand(ai, &cmd, &rsp, true);
 		clear_bit(FLAG_ENABLED, &ai->flags);
 	}
 	if (lock == 1)
@@ -3834,7 +3837,7 @@ static u16 setup_card(struct airo_info *ai, u8 *mac, int lock)
 	cmd.parm0 = cmd.parm1 = cmd.parm2 = 0;
 	if (lock && down_interruptible(&ai->sem))
 		return ERROR;
-	if (issuecommand(ai, &cmd, &rsp) != SUCCESS) {
+	if (issuecommand(ai, &cmd, &rsp, true) != SUCCESS) {
 		if (lock)
 			up(&ai->sem);
 		return ERROR;
@@ -3844,7 +3847,7 @@ static u16 setup_card(struct airo_info *ai, u8 *mac, int lock)
 	// Let's figure out if we need to use the AUX port
 	if (!test_bit(FLAG_MPI,&ai->flags)) {
 		cmd.cmd = CMD_ENABLEAUX;
-		if (issuecommand(ai, &cmd, &rsp) != SUCCESS) {
+		if (issuecommand(ai, &cmd, &rsp, true) != SUCCESS) {
 			if (lock)
 				up(&ai->sem);
 			airo_print_err(ai->dev->name, "Error checking for AUX port");
@@ -3956,7 +3959,8 @@ static u16 setup_card(struct airo_info *ai, u8 *mac, int lock)
 	return SUCCESS;
 }
 
-static u16 issuecommand(struct airo_info *ai, Cmd *pCmd, Resp *pRsp)
+static u16 issuecommand(struct airo_info *ai, Cmd *pCmd, Resp *pRsp,
+			bool may_sleep)
 {
         // Im really paranoid about letting it run forever!
 	int max_tries = 600000;
@@ -3973,8 +3977,8 @@ static u16 issuecommand(struct airo_info *ai, Cmd *pCmd, Resp *pRsp)
 		if ((IN4500(ai, COMMAND)) == pCmd->cmd)
 			// PC4500 didn't notice command, try again
 			OUT4500(ai, COMMAND, pCmd->cmd);
-		if (!in_atomic() && (max_tries & 255) == 0)
-			schedule();
+		if (may_sleep && (max_tries & 255) == 0)
+			cond_resched();
 	}
 
 	if (max_tries == -1) {
@@ -4131,7 +4135,7 @@ static int PC4500_accessrid(struct airo_info *ai, u16 rid, u16 accmd)
 	memset(&cmd, 0, sizeof(cmd));
 	cmd.cmd = accmd;
 	cmd.parm0 = rid;
-	status = issuecommand(ai, &cmd, &rsp);
+	status = issuecommand(ai, &cmd, &rsp, true);
 	if (status != 0) return status;
 	if ((rsp.status & 0x7F00) != 0) {
 		return (accmd << 8) + (rsp.rsp0 & 0xFF);
@@ -4167,7 +4171,7 @@ static int PC4500_readrid(struct airo_info *ai, u16 rid, void *pBuf, int len, in
 		memcpy_toio(ai->config_desc.card_ram_off,
 			&ai->config_desc.rid_desc, sizeof(Rid));
 
-		rc = issuecommand(ai, &cmd, &rsp);
+		rc = issuecommand(ai, &cmd, &rsp, true);
 
 		if (rsp.status & 0x7f00)
 			rc = rsp.rsp0;
@@ -4246,7 +4250,7 @@ static int PC4500_writerid(struct airo_info *ai, u16 rid,
 			memcpy(ai->config_desc.virtual_host_addr,
 				pBuf, len);
 
-			rc = issuecommand(ai, &cmd, &rsp);
+			rc = issuecommand(ai, &cmd, &rsp, true);
 			if ((rc & 0xff00) != 0) {
 				airo_print_err(ai->dev->name, "%s: Write rid Error %d",
 						__func__, rc);
@@ -4292,7 +4296,7 @@ static u16 transmit_allocate(struct airo_info *ai, int lenPayload, int raw)
 	cmd.parm0 = lenPayload;
 	if (down_interruptible(&ai->sem))
 		return ERROR;
-	if (issuecommand(ai, &cmd, &rsp) != SUCCESS) {
+	if (issuecommand(ai, &cmd, &rsp, true) != SUCCESS) {
 		txFid = ERROR;
 		goto done;
 	}
@@ -4338,7 +4342,8 @@ static u16 transmit_allocate(struct airo_info *ai, int lenPayload, int raw)
 /* In general BAP1 is dedicated to transmiting packets.  However,
    since we need a BAP when accessing RIDs, we also use BAP1 for that.
    Make sure the BAP1 spinlock is held when this is called. */
-static int transmit_802_3_packet(struct airo_info *ai, int len, char *pPacket)
+static int transmit_802_3_packet(struct airo_info *ai, int len, char *pPacket,
+				 bool may_sleep)
 {
 	__le16 payloadLen;
 	Cmd cmd;
@@ -4376,12 +4381,14 @@ static int transmit_802_3_packet(struct airo_info *ai, int len, char *pPacket)
 	memset(&cmd, 0, sizeof(cmd));
 	cmd.cmd = CMD_TRANSMIT;
 	cmd.parm0 = txFid;
-	if (issuecommand(ai, &cmd, &rsp) != SUCCESS) return ERROR;
+	if (issuecommand(ai, &cmd, &rsp, may_sleep) != SUCCESS)
+		return ERROR;
 	if ((rsp.status & 0xFF00) != 0) return ERROR;
 	return SUCCESS;
 }
 
-static int transmit_802_11_packet(struct airo_info *ai, int len, char *pPacket)
+static int transmit_802_11_packet(struct airo_info *ai, int len, char *pPacket,
+				  bool may_sleep)
 {
 	__le16 fc, payloadLen;
 	Cmd cmd;
@@ -4416,7 +4423,8 @@ static int transmit_802_11_packet(struct airo_info *ai, int len, char *pPacket)
 	memset(&cmd, 0, sizeof(cmd));
 	cmd.cmd = CMD_TRANSMIT;
 	cmd.parm0 = txFid;
-	if (issuecommand(ai, &cmd, &rsp) != SUCCESS) return ERROR;
+	if (issuecommand(ai, &cmd, &rsp, may_sleep) != SUCCESS)
+		return ERROR;
 	if ((rsp.status & 0xFF00) != 0) return ERROR;
 	return SUCCESS;
 }
@@ -5480,7 +5488,7 @@ static int proc_BSSList_open(struct inode *inode, struct file *file)
 				kfree(file->private_data);
 				return -ERESTARTSYS;
 			}
-			issuecommand(ai, &cmd, &rsp);
+			issuecommand(ai, &cmd, &rsp, true);
 			up(&ai->sem);
 			data->readlen = 0;
 			return 0;
@@ -5617,7 +5625,7 @@ static int __maybe_unused airo_pci_suspend(struct device *dev_d)
 	netif_device_detach(dev);
 	ai->power = PMSG_SUSPEND;
 	cmd.cmd = HOSTSLEEP;
-	issuecommand(ai, &cmd, &rsp);
+	issuecommand(ai, &cmd, &rsp, true);
 
 	device_wakeup_enable(dev_d);
 	return 0;
@@ -5960,7 +5968,7 @@ static int airo_set_wap(struct net_device *dev,
 		cmd.cmd = CMD_LOSE_SYNC;
 		if (down_interruptible(&local->sem))
 			return -ERESTARTSYS;
-		issuecommand(local, &cmd, &rsp);
+		issuecommand(local, &cmd, &rsp, true);
 		up(&local->sem);
 	} else {
 		memset(APList_rid, 0, sizeof(*APList_rid));
@@ -7258,7 +7266,7 @@ static int airo_set_scan(struct net_device *dev,
 	ai->scan_timeout = RUN_AT(3*HZ);
 	memset(&cmd, 0, sizeof(cmd));
 	cmd.cmd = CMD_LISTBSS;
-	issuecommand(ai, &cmd, &rsp);
+	issuecommand(ai, &cmd, &rsp, true);
 	wake = 1;
 
 out:
@@ -7525,7 +7533,7 @@ static int airo_config_commit(struct net_device *dev,
 	writeConfigRid(local, 0);
 	enable_MAC(local, 0);
 	if (test_bit (FLAG_RESET, &local->flags))
-		airo_set_promisc(local);
+		airo_set_promisc(local, true);
 	else
 		up(&local->sem);
 
-- 
2.28.0


^ permalink raw reply related

* [PATCH net-next 11/15] net: rtlwifi: Remove in_interrupt() usage in is_any_client_connect_to_ap().
From: Sebastian Andrzej Siewior @ 2020-10-27 22:54 UTC (permalink / raw)
  To: netdev
  Cc: Aymen Sghaier, Madalin Bucur, Sebastian Andrzej Siewior,
	Zhu Yanjun, Samuel Chessman, Ping-Ke Shih, Herbert Xu,
	Horia Geantă, linux-rdma, Rain River, Kalle Valo,
	Ulrich Kunitz, Jouni Malinen, Daniel Drake, Jakub Kicinski,
	Thomas Gleixner, linux-arm-kernel, Leon Romanovsky, linuxppc-dev,
	linux-wireless, Li Yang, linux-crypto, Jon Mason, Saeed Mahameed,
	David S. Miller
In-Reply-To: <20201027225454.3492351-1-bigeasy@linutronix.de>

is_any_client_connect_to_ap() is using in_interrupt() to determine whether
it should acquire the lock prior accessing the list.

The usage of in_interrupt() in drivers is phased out and Linus clearly
requested that code which changes behaviour depending on context should
either be separated or the context be conveyed in an argument passed by the
caller, which usually knows the context.

The function is called from:

    - halbtc_get()

    - halbtc_get()
        halbtc_get_wifi_link_status()

    - halbtc_display_dbg_msg()
	halbtc_display_wifi_status()
          halbtc_get_wifi_link_status()

All top level callers are part of the btc_coexist callback inferface and
are never invoked from a context which can hold the lock already.

The contexts which hold the lock are either protecting list add/del
operations or list walks which never call into any of the btc_coexist
interfaces.

In fact the conditional is outright dangerous because if this function
would be invoked from a BH disabled context the check would avoid taking
the lock while on another CPU the list could be manipulated under the lock.

Remove the in_interrupt() check and always acquire the lock.

To simplify the code further use list_empty() instead of walking the list
and counting the entries just to check the count for > 0 at the end.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Ping-Ke Shih <pkshih@realtek.com>
Cc: Kalle Valo <kvalo@codeaurora.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: linux-wireless@vger.kernel.org
Cc: netdev@vger.kernel.org
---
 .../realtek/rtlwifi/btcoexist/halbtcoutsrc.c  | 25 +++++--------------
 1 file changed, 6 insertions(+), 19 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c
index 2c05369b79e4d..2155a6699ef8d 100644
--- a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c
+++ b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c
@@ -47,30 +47,17 @@ static bool is_any_client_connect_to_ap(struct btc_coexist *btcoexist)
 {
 	struct rtl_priv *rtlpriv = btcoexist->adapter;
 	struct rtl_mac *mac = rtl_mac(rtlpriv);
-	struct rtl_sta_info *drv_priv;
-	u8 cnt = 0;
+	bool ret = false;
 
 	if (mac->opmode == NL80211_IFTYPE_ADHOC ||
 	    mac->opmode == NL80211_IFTYPE_MESH_POINT ||
 	    mac->opmode == NL80211_IFTYPE_AP) {
-		if (in_interrupt() > 0) {
-			list_for_each_entry(drv_priv, &rtlpriv->entry_list,
-					    list) {
-				cnt++;
-			}
-		} else {
-			spin_lock_bh(&rtlpriv->locks.entry_list_lock);
-			list_for_each_entry(drv_priv, &rtlpriv->entry_list,
-					    list) {
-				cnt++;
-			}
-			spin_unlock_bh(&rtlpriv->locks.entry_list_lock);
-		}
+		spin_lock_bh(&rtlpriv->locks.entry_list_lock);
+		if (!list_empty(&rtlpriv->entry_list))
+			ret = true;
+		spin_unlock_bh(&rtlpriv->locks.entry_list_lock);
 	}
-	if (cnt > 0)
-		return true;
-	else
-		return false;
+	return ret;
 }
 
 static bool halbtc_legacy(struct rtl_priv *adapter)
-- 
2.28.0


^ permalink raw reply related

* [PATCH net-next 09/15] net: hostap: Remove in_atomic() check.
From: Sebastian Andrzej Siewior @ 2020-10-27 22:54 UTC (permalink / raw)
  To: netdev
  Cc: Aymen Sghaier, Madalin Bucur, Sebastian Andrzej Siewior,
	Zhu Yanjun, Samuel Chessman, Ping-Ke Shih, Herbert Xu,
	Horia Geantă, linux-rdma, Rain River, Kalle Valo,
	Ulrich Kunitz, Jouni Malinen, Daniel Drake, Jakub Kicinski,
	Thomas Gleixner, linux-arm-kernel, Leon Romanovsky, linuxppc-dev,
	linux-wireless, Li Yang, linux-crypto, Jon Mason, Saeed Mahameed,
	David S. Miller
In-Reply-To: <20201027225454.3492351-1-bigeasy@linutronix.de>

hostap_get_wireless_stats() is the iw_handler_if::get_wireless_stats()
callback of this driver. This callback was not allowed to sleep until
commit a160ee69c6a46 ("wext: let get_wireless_stats() sleep") in v2.6.32.

Remove the therefore pointless in_atomic() check.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Jouni Malinen <j@w1.fi>
Cc: Kalle Valo <kvalo@codeaurora.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: linux-wireless@vger.kernel.org
Cc: netdev@vger.kernel.org
---
 drivers/net/wireless/intersil/hostap/hostap_ioctl.c | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/drivers/net/wireless/intersil/hostap/hostap_ioctl.c b/drivers/net/wireless/intersil/hostap/hostap_ioctl.c
index 514c7b01dbf6f..49766b285230c 100644
--- a/drivers/net/wireless/intersil/hostap/hostap_ioctl.c
+++ b/drivers/net/wireless/intersil/hostap/hostap_ioctl.c
@@ -44,19 +44,8 @@ static struct iw_statistics *hostap_get_wireless_stats(struct net_device *dev)
 
 	if (local->iw_mode != IW_MODE_MASTER &&
 	    local->iw_mode != IW_MODE_REPEAT) {
-		int update = 1;
-#ifdef in_atomic
-		/* RID reading might sleep and it must not be called in
-		 * interrupt context or while atomic. However, this
-		 * function seems to be called while atomic (at least in Linux
-		 * 2.5.59). Update signal quality values only if in suitable
-		 * context. Otherwise, previous values read from tick timer
-		 * will be used. */
-		if (in_atomic())
-			update = 0;
-#endif /* in_atomic */
 
-		if (update && prism2_update_comms_qual(dev) == 0)
+		if (prism2_update_comms_qual(dev) == 0)
 			wstats->qual.updated = IW_QUAL_ALL_UPDATED |
 				IW_QUAL_DBM;
 
-- 
2.28.0


^ permalink raw reply related

* [PATCH net-next 10/15] net: zd1211rw: Remove in_atomic() usage.
From: Sebastian Andrzej Siewior @ 2020-10-27 22:54 UTC (permalink / raw)
  To: netdev
  Cc: Aymen Sghaier, Madalin Bucur, Sebastian Andrzej Siewior,
	Zhu Yanjun, Samuel Chessman, Ping-Ke Shih, Herbert Xu,
	Horia Geantă, linux-rdma, Rain River, Kalle Valo,
	Ulrich Kunitz, Jouni Malinen, Daniel Drake, Jakub Kicinski,
	Thomas Gleixner, linux-arm-kernel, Leon Romanovsky, linuxppc-dev,
	linux-wireless, Li Yang, linux-crypto, Jon Mason, Saeed Mahameed,
	David S. Miller
In-Reply-To: <20201027225454.3492351-1-bigeasy@linutronix.de>

The usage of in_atomic() in driver code is deprecated as it can not
always detect all states where it is not allowed to sleep.

All callers are in premptible thread context and all functions invoke core
functions which have checks for invalid calling contexts already.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Daniel Drake <dsd@gentoo.org>
Cc: Ulrich Kunitz <kune@deine-taler.de>
Cc: Kalle Valo <kvalo@codeaurora.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: linux-wireless@vger.kernel.org
Cc: netdev@vger.kernel.org
---
 drivers/net/wireless/zydas/zd1211rw/zd_usb.c | 15 ---------------
 1 file changed, 15 deletions(-)

diff --git a/drivers/net/wireless/zydas/zd1211rw/zd_usb.c b/drivers/net/wireless/zydas/zd1211rw/zd_usb.c
index 66367ab7e4c1e..5c4cd0e1adebb 100644
--- a/drivers/net/wireless/zydas/zd1211rw/zd_usb.c
+++ b/drivers/net/wireless/zydas/zd1211rw/zd_usb.c
@@ -1711,11 +1711,6 @@ int zd_usb_ioread16v(struct zd_usb *usb, u16 *values,
 			 count, USB_MAX_IOREAD16_COUNT);
 		return -EINVAL;
 	}
-	if (in_atomic()) {
-		dev_dbg_f(zd_usb_dev(usb),
-			 "error: io in atomic context not supported\n");
-		return -EWOULDBLOCK;
-	}
 	if (!usb_int_enabled(usb)) {
 		dev_dbg_f(zd_usb_dev(usb),
 			  "error: usb interrupt not enabled\n");
@@ -1882,11 +1877,6 @@ int zd_usb_iowrite16v_async(struct zd_usb *usb, const struct zd_ioreq16 *ioreqs,
 			count, USB_MAX_IOWRITE16_COUNT);
 		return -EINVAL;
 	}
-	if (in_atomic()) {
-		dev_dbg_f(zd_usb_dev(usb),
-			"error: io in atomic context not supported\n");
-		return -EWOULDBLOCK;
-	}
 
 	udev = zd_usb_to_usbdev(usb);
 
@@ -1966,11 +1956,6 @@ int zd_usb_rfwrite(struct zd_usb *usb, u32 value, u8 bits)
 	int i, req_len, actual_req_len;
 	u16 bit_value_template;
 
-	if (in_atomic()) {
-		dev_dbg_f(zd_usb_dev(usb),
-			"error: io in atomic context not supported\n");
-		return -EWOULDBLOCK;
-	}
 	if (bits < USB_MIN_RFWRITE_BIT_COUNT) {
 		dev_dbg_f(zd_usb_dev(usb),
 			"error: bits %d are smaller than"
-- 
2.28.0


^ permalink raw reply related

* [PATCH net-next 07/15] net: airo: Always use JOB_STATS and JOB_EVENT
From: Sebastian Andrzej Siewior @ 2020-10-27 22:54 UTC (permalink / raw)
  To: netdev
  Cc: Aymen Sghaier, Madalin Bucur, Sebastian Andrzej Siewior,
	Zhu Yanjun, Samuel Chessman, Ping-Ke Shih, Herbert Xu,
	Horia Geantă, linux-rdma, Rain River, Kalle Valo,
	Ulrich Kunitz, Jouni Malinen, Daniel Drake, Jakub Kicinski,
	Thomas Gleixner, linux-arm-kernel, Leon Romanovsky, linuxppc-dev,
	linux-wireless, Li Yang, linux-crypto, Jon Mason, Saeed Mahameed,
	David S. Miller
In-Reply-To: <20201027225454.3492351-1-bigeasy@linutronix.de>

issuecommand() is using in_atomic() to decide if it is safe to invoke
schedule() while waiting for the command to be accepted.

Usage of in_atomic() for this is only half correct as it can not detect all
condition where it is not allowed to schedule(). Also Linus clearly
requested that code which changes behaviour depending on context should
either be seperated or the context be conveyed in an argument passed by the
caller, which usually knows the context.

Chasing the call chains leading up to issuecommand() is straight forward,
but airo_link() and airo_get_stats() would require to pass the context
through a quite large amount of functions.

As this is ancient hardware, avoid the churn and enforce the invocation of
those functions through the JOB machinery.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Kalle Valo <kvalo@codeaurora.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: linux-wireless@vger.kernel.org
Cc: netdev@vger.kernel.org
---
 drivers/net/wireless/cisco/airo.c | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

diff --git a/drivers/net/wireless/cisco/airo.c b/drivers/net/wireless/cisco/airo.c
index ca423f3b6b3ea..369a6ca44d1ff 100644
--- a/drivers/net/wireless/cisco/airo.c
+++ b/drivers/net/wireless/cisco/airo.c
@@ -2286,12 +2286,8 @@ static struct net_device_stats *airo_get_stats(struct net_device *dev)
 	struct airo_info *local =  dev->ml_priv;
 
 	if (!test_bit(JOB_STATS, &local->jobs)) {
-		/* Get stats out of the card if available */
-		if (down_trylock(&local->sem) != 0) {
-			set_bit(JOB_STATS, &local->jobs);
-			wake_up_interruptible(&local->thr_wait);
-		} else
-			airo_read_stats(dev);
+		set_bit(JOB_STATS, &local->jobs);
+		wake_up_interruptible(&local->thr_wait);
 	}
 
 	return &dev->stats;
@@ -3277,11 +3273,9 @@ static void airo_handle_link(struct airo_info *ai)
 		set_bit(FLAG_UPDATE_UNI, &ai->flags);
 		set_bit(FLAG_UPDATE_MULTI, &ai->flags);
 
-		if (down_trylock(&ai->sem) != 0) {
-			set_bit(JOB_EVENT, &ai->jobs);
-			wake_up_interruptible(&ai->thr_wait);
-		} else
-			airo_send_event(ai->dev);
+		set_bit(JOB_EVENT, &ai->jobs);
+		wake_up_interruptible(&ai->thr_wait);
+
 		netif_carrier_on(ai->dev);
 	} else if (!scan_forceloss) {
 		if (auto_wep && !ai->expires) {
-- 
2.28.0


^ permalink raw reply related

* [PATCH net-next 04/15] net: mlx5: Replace in_irq() usage.
From: Sebastian Andrzej Siewior @ 2020-10-27 22:54 UTC (permalink / raw)
  To: netdev
  Cc: Aymen Sghaier, Madalin Bucur, Sebastian Andrzej Siewior,
	Zhu Yanjun, Samuel Chessman, Ping-Ke Shih, Herbert Xu,
	Horia Geantă, linux-rdma, Rain River, Kalle Valo,
	Ulrich Kunitz, Jouni Malinen, Daniel Drake, Jakub Kicinski,
	Thomas Gleixner, linux-arm-kernel, Leon Romanovsky, linuxppc-dev,
	linux-wireless, Li Yang, linux-crypto, Jon Mason, Saeed Mahameed,
	David S. Miller
In-Reply-To: <20201027225454.3492351-1-bigeasy@linutronix.de>

mlx5_eq_async_int() uses in_irq() to decide whether eq::lock needs to be
acquired and released with spin_[un]lock() or the irq saving/restoring
variants.

The usage of in_*() in drivers is phased out and Linus clearly requested
that code which changes behaviour depending on context should either be
seperated or the context be conveyed in an argument passed by the caller,
which usually knows the context.

mlx5_eq_async_int() knows the context via the action argument already so
using it for the lock variant decision is a straight forward replacement
for in_irq().

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Saeed Mahameed <saeedm@nvidia.com>
Cc: Leon Romanovsky <leon@kernel.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: linux-rdma@vger.kernel.org
---
 drivers/net/ethernet/mellanox/mlx5/core/eq.c | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 8ebfe782f95e5..3800e9415158b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -189,19 +189,21 @@ u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq)
 	return count_eqe;
 }
 
-static void mlx5_eq_async_int_lock(struct mlx5_eq_async *eq, unsigned long *flags)
+static void mlx5_eq_async_int_lock(struct mlx5_eq_async *eq, bool recovery,
+				   unsigned long *flags)
 	__acquires(&eq->lock)
 {
-	if (in_irq())
+	if (!recovery)
 		spin_lock(&eq->lock);
 	else
 		spin_lock_irqsave(&eq->lock, *flags);
 }
 
-static void mlx5_eq_async_int_unlock(struct mlx5_eq_async *eq, unsigned long *flags)
+static void mlx5_eq_async_int_unlock(struct mlx5_eq_async *eq, bool recovery,
+				     unsigned long *flags)
 	__releases(&eq->lock)
 {
-	if (in_irq())
+	if (!recovery)
 		spin_unlock(&eq->lock);
 	else
 		spin_unlock_irqrestore(&eq->lock, *flags);
@@ -222,12 +224,14 @@ static int mlx5_eq_async_int(struct notifier_block *nb,
 	struct mlx5_core_dev *dev;
 	struct mlx5_eqe *eqe;
 	unsigned long flags;
+	bool recovery;
 	int num_eqes = 0;
 
 	dev = eq->dev;
 	eqt = dev->priv.eq_table;
 
-	mlx5_eq_async_int_lock(eq_async, &flags);
+	recovery = action == ASYNC_EQ_RECOVER;
+	mlx5_eq_async_int_lock(eq_async, recovery, &flags);
 
 	eqe = next_eqe_sw(eq);
 	if (!eqe)
@@ -249,9 +253,9 @@ static int mlx5_eq_async_int(struct notifier_block *nb,
 
 out:
 	eq_update_ci(eq, 1);
-	mlx5_eq_async_int_unlock(eq_async, &flags);
+	mlx5_eq_async_int_unlock(eq_async, recovery, &flags);
 
-	return unlikely(action == ASYNC_EQ_RECOVER) ? num_eqes : 0;
+	return unlikely(recovery) ? num_eqes : 0;
 }
 
 void mlx5_cmd_eq_recover(struct mlx5_core_dev *dev)
-- 
2.28.0


^ permalink raw reply related

* [PATCH net-next 06/15] net: airo: Invoke airo_read_wireless_stats() directly
From: Sebastian Andrzej Siewior @ 2020-10-27 22:54 UTC (permalink / raw)
  To: netdev
  Cc: Aymen Sghaier, Madalin Bucur, Sebastian Andrzej Siewior,
	Zhu Yanjun, Samuel Chessman, Ping-Ke Shih, Herbert Xu,
	Horia Geantă, linux-rdma, Rain River, Kalle Valo,
	Ulrich Kunitz, Jouni Malinen, Daniel Drake, Jakub Kicinski,
	Thomas Gleixner, linux-arm-kernel, Leon Romanovsky, linuxppc-dev,
	linux-wireless, Li Yang, linux-crypto, Jon Mason, Saeed Mahameed,
	David S. Miller
In-Reply-To: <20201027225454.3492351-1-bigeasy@linutronix.de>

airo_get_wireless_stats() is the iw_handler_if::get_wireless_stats()
callback of this driver. This callback was not allowed to sleep until
commit a160ee69c6a46 ("wext: let get_wireless_stats() sleep") in v2.6.32.

airo still delegates the readout to a thread, which is not longer
necessary.

Invoke airo_read_wireless_stats() directly from the callback and remove
the now unused JOB_WSTATS handling.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Kalle Valo <kvalo@codeaurora.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: linux-wireless@vger.kernel.org
Cc: netdev@vger.kernel.org
---
 drivers/net/wireless/cisco/airo.c | 22 +++++-----------------
 1 file changed, 5 insertions(+), 17 deletions(-)

diff --git a/drivers/net/wireless/cisco/airo.c b/drivers/net/wireless/cisco/airo.c
index 87b9398b03fd4..ca423f3b6b3ea 100644
--- a/drivers/net/wireless/cisco/airo.c
+++ b/drivers/net/wireless/cisco/airo.c
@@ -1144,7 +1144,6 @@ static int airo_thread(void *data);
 static void timer_func(struct net_device *dev);
 static int airo_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
 static struct iw_statistics *airo_get_wireless_stats(struct net_device *dev);
-static void airo_read_wireless_stats(struct airo_info *local);
 #ifdef CISCO_EXT
 static int readrids(struct net_device *dev, aironet_ioctl *comp);
 static int writerids(struct net_device *dev, aironet_ioctl *comp);
@@ -1200,7 +1199,6 @@ struct airo_info {
 #define JOB_MIC	5
 #define JOB_EVENT	6
 #define JOB_AUTOWEP	7
-#define JOB_WSTATS	8
 #define JOB_SCAN_RESULTS  9
 	unsigned long jobs;
 	int (*bap_read)(struct airo_info*, __le16 *pu16Dst, int bytelen,
@@ -3155,8 +3153,6 @@ static int airo_thread(void *data)
 			airo_end_xmit11(dev);
 		else if (test_bit(JOB_STATS, &ai->jobs))
 			airo_read_stats(dev);
-		else if (test_bit(JOB_WSTATS, &ai->jobs))
-			airo_read_wireless_stats(ai);
 		else if (test_bit(JOB_PROMISC, &ai->jobs))
 			airo_set_promisc(ai);
 		else if (test_bit(JOB_MIC, &ai->jobs))
@@ -7732,15 +7728,12 @@ static void airo_read_wireless_stats(struct airo_info *local)
 	__le32 *vals = stats_rid.vals;
 
 	/* Get stats out of the card */
-	clear_bit(JOB_WSTATS, &local->jobs);
-	if (local->power.event) {
-		up(&local->sem);
+	if (local->power.event)
 		return;
-	}
+
 	readCapabilityRid(local, &cap_rid, 0);
 	readStatusRid(local, &status_rid, 0);
 	readStatsRid(local, &stats_rid, RID_STATS, 0);
-	up(&local->sem);
 
 	/* The status */
 	local->wstats.status = le16_to_cpu(status_rid.mode);
@@ -7783,15 +7776,10 @@ static struct iw_statistics *airo_get_wireless_stats(struct net_device *dev)
 {
 	struct airo_info *local =  dev->ml_priv;
 
-	if (!test_bit(JOB_WSTATS, &local->jobs)) {
-		/* Get stats out of the card if available */
-		if (down_trylock(&local->sem) != 0) {
-			set_bit(JOB_WSTATS, &local->jobs);
-			wake_up_interruptible(&local->thr_wait);
-		} else
-			airo_read_wireless_stats(local);
+	if (!down_interruptible(&local->sem)) {
+		airo_read_wireless_stats(local);
+		up(&local->sem);
 	}
-
 	return &local->wstats;
 }
 
-- 
2.28.0


^ permalink raw reply related

* [PATCH net-next 03/15] net: forcedeth: Replace context and lock check with a lockdep_assert()
From: Sebastian Andrzej Siewior @ 2020-10-27 22:54 UTC (permalink / raw)
  To: netdev
  Cc: Aymen Sghaier, Madalin Bucur, Sebastian Andrzej Siewior,
	Zhu Yanjun, Samuel Chessman, Ping-Ke Shih, Herbert Xu,
	Horia Geantă, linux-rdma, Rain River, Kalle Valo,
	Ulrich Kunitz, Jouni Malinen, Daniel Drake, Jakub Kicinski,
	Thomas Gleixner, linux-arm-kernel, Leon Romanovsky, linuxppc-dev,
	linux-wireless, Li Yang, linux-crypto, Jon Mason, Saeed Mahameed,
	David S. Miller
In-Reply-To: <20201027225454.3492351-1-bigeasy@linutronix.de>

nv_update_stats() triggers a WARN_ON() when invoked from hard interrupt
context because the locks in use are not hard interrupt safe. It also has
an assert_spin_locked() which was the lock check before the lockdep era.

Lockdep has way broader locking correctness checks and covers both issues,
so replace the warning and the lock assert with lockdep_assert_held().

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Rain River <rain.1986.08.12@gmail.com>
Cc: Zhu Yanjun <zyjzyj2000@gmail.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: netdev@vger.kernel.org
---
 drivers/net/ethernet/nvidia/forcedeth.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c
index 2fc10a36afa4a..7e85cf943be11 100644
--- a/drivers/net/ethernet/nvidia/forcedeth.c
+++ b/drivers/net/ethernet/nvidia/forcedeth.c
@@ -1666,11 +1666,7 @@ static void nv_update_stats(struct net_device *dev)
 	struct fe_priv *np = netdev_priv(dev);
 	u8 __iomem *base = get_hwbase(dev);
 
-	/* If it happens that this is run in top-half context, then
-	 * replace the spin_lock of hwstats_lock with
-	 * spin_lock_irqsave() in calling functions. */
-	WARN_ONCE(in_irq(), "forcedeth: estats spin_lock(_bh) from top-half");
-	assert_spin_locked(&np->hwstats_lock);
+	lockdep_assert_held(&np->hwstats_lock);
 
 	/* query hardware */
 	np->estats.tx_bytes += readl(base + NvRegTxCnt);
-- 
2.28.0


^ permalink raw reply related

* [PATCH net-next 05/15] net: tlan: Replace in_irq() usage
From: Sebastian Andrzej Siewior @ 2020-10-27 22:54 UTC (permalink / raw)
  To: netdev
  Cc: Aymen Sghaier, Madalin Bucur, Sebastian Andrzej Siewior,
	Zhu Yanjun, Samuel Chessman, Ping-Ke Shih, Herbert Xu,
	Horia Geantă, linux-rdma, Rain River, Kalle Valo,
	Ulrich Kunitz, Jouni Malinen, Daniel Drake, Jakub Kicinski,
	Thomas Gleixner, linux-arm-kernel, Leon Romanovsky, linuxppc-dev,
	linux-wireless, Li Yang, linux-crypto, Jon Mason, Saeed Mahameed,
	David S. Miller
In-Reply-To: <20201027225454.3492351-1-bigeasy@linutronix.de>

The driver uses in_irq() to determine if the tlan_priv::lock has to be
acquired in tlan_mii_read_reg() and tlan_mii_write_reg().

The interrupt handler acquires the lock outside of these functions so the
in_irq() check is meant to prevent a lock recursion deadlock. But this
check is incorrect when interrupt force threading is enabled because then
the handler runs in thread context and in_irq() correctly returns false.

The usage of in_*() in drivers is phased out and Linus clearly requested
that code which changes behaviour depending on context should either be
seperated or the context be conveyed in an argument passed by the caller,
which usually knows the context.

tlan_set_timer() has this conditional as well, but this function is only
invoked from task context or the timer callback itself. So it always has to
lock and the check can be removed.

tlan_mii_read_reg(), tlan_mii_write_reg() and tlan_phy_print() are invoked
from interrupt and other contexts.

Split out the actual function body into helper variants which are called
from interrupt context and make the original functions wrappers which
acquire tlan_priv::lock unconditionally.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Samuel Chessman <chessman@tux.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: netdev@vger.kernel.org
---
 drivers/net/ethernet/ti/tlan.c | 98 ++++++++++++++++++++--------------
 1 file changed, 57 insertions(+), 41 deletions(-)

diff --git a/drivers/net/ethernet/ti/tlan.c b/drivers/net/ethernet/ti/tlan.c
index 267c080ee084b..0b2ce4bdc2c3d 100644
--- a/drivers/net/ethernet/ti/tlan.c
+++ b/drivers/net/ethernet/ti/tlan.c
@@ -186,6 +186,7 @@ static void	tlan_reset_adapter(struct net_device *);
 static void	tlan_finish_reset(struct net_device *);
 static void	tlan_set_mac(struct net_device *, int areg, char *mac);
 
+static void	__tlan_phy_print(struct net_device *);
 static void	tlan_phy_print(struct net_device *);
 static void	tlan_phy_detect(struct net_device *);
 static void	tlan_phy_power_down(struct net_device *);
@@ -201,9 +202,11 @@ static void	tlan_phy_finish_auto_neg(struct net_device *);
   static int	tlan_phy_dp83840a_check(struct net_device *);
 */
 
-static bool	tlan_mii_read_reg(struct net_device *, u16, u16, u16 *);
+static bool	__tlan_mii_read_reg(struct net_device *, u16, u16, u16 *);
+static void	tlan_mii_read_reg(struct net_device *, u16, u16, u16 *);
 static void	tlan_mii_send_data(u16, u32, unsigned);
 static void	tlan_mii_sync(u16);
+static void	__tlan_mii_write_reg(struct net_device *, u16, u16, u16);
 static void	tlan_mii_write_reg(struct net_device *, u16, u16, u16);
 
 static void	tlan_ee_send_start(u16);
@@ -242,23 +245,20 @@ static u32
 	tlan_handle_rx_eoc
 };
 
-static inline void
+static void
 tlan_set_timer(struct net_device *dev, u32 ticks, u32 type)
 {
 	struct tlan_priv *priv = netdev_priv(dev);
 	unsigned long flags = 0;
 
-	if (!in_irq())
-		spin_lock_irqsave(&priv->lock, flags);
+	spin_lock_irqsave(&priv->lock, flags);
 	if (priv->timer.function != NULL &&
 	    priv->timer_type != TLAN_TIMER_ACTIVITY) {
-		if (!in_irq())
-			spin_unlock_irqrestore(&priv->lock, flags);
+		spin_unlock_irqrestore(&priv->lock, flags);
 		return;
 	}
 	priv->timer.function = tlan_timer;
-	if (!in_irq())
-		spin_unlock_irqrestore(&priv->lock, flags);
+	spin_unlock_irqrestore(&priv->lock, flags);
 
 	priv->timer_set_at = jiffies;
 	priv->timer_type = type;
@@ -1703,22 +1703,22 @@ static u32 tlan_handle_status_check(struct net_device *dev, u16 host_int)
 				 dev->name, (unsigned) net_sts);
 		}
 		if ((net_sts & TLAN_NET_STS_MIRQ) &&  (priv->phy_num == 0)) {
-			tlan_mii_read_reg(dev, phy, TLAN_TLPHY_STS, &tlphy_sts);
-			tlan_mii_read_reg(dev, phy, TLAN_TLPHY_CTL, &tlphy_ctl);
+			__tlan_mii_read_reg(dev, phy, TLAN_TLPHY_STS, &tlphy_sts);
+			__tlan_mii_read_reg(dev, phy, TLAN_TLPHY_CTL, &tlphy_ctl);
 			if (!(tlphy_sts & TLAN_TS_POLOK) &&
 			    !(tlphy_ctl & TLAN_TC_SWAPOL)) {
 				tlphy_ctl |= TLAN_TC_SWAPOL;
-				tlan_mii_write_reg(dev, phy, TLAN_TLPHY_CTL,
-						   tlphy_ctl);
+				__tlan_mii_write_reg(dev, phy, TLAN_TLPHY_CTL,
+						     tlphy_ctl);
 			} else if ((tlphy_sts & TLAN_TS_POLOK) &&
 				   (tlphy_ctl & TLAN_TC_SWAPOL)) {
 				tlphy_ctl &= ~TLAN_TC_SWAPOL;
-				tlan_mii_write_reg(dev, phy, TLAN_TLPHY_CTL,
-						   tlphy_ctl);
+				__tlan_mii_write_reg(dev, phy, TLAN_TLPHY_CTL,
+						     tlphy_ctl);
 			}
 
 			if (debug)
-				tlan_phy_print(dev);
+				__tlan_phy_print(dev);
 		}
 	}
 
@@ -2379,7 +2379,7 @@ ThunderLAN driver PHY layer routines
 
 
 /*********************************************************************
- *	tlan_phy_print
+ *	__tlan_phy_print
  *
  *	Returns:
  *		Nothing
@@ -2391,11 +2391,13 @@ ThunderLAN driver PHY layer routines
  *
  ********************************************************************/
 
-static void tlan_phy_print(struct net_device *dev)
+static void __tlan_phy_print(struct net_device *dev)
 {
 	struct tlan_priv *priv = netdev_priv(dev);
 	u16 i, data0, data1, data2, data3, phy;
 
+	lockdep_assert_held(&priv->lock);
+
 	phy = priv->phy[priv->phy_num];
 
 	if (priv->adapter->flags & TLAN_ADAPTER_UNMANAGED_PHY) {
@@ -2404,10 +2406,10 @@ static void tlan_phy_print(struct net_device *dev)
 		netdev_info(dev, "PHY 0x%02x\n", phy);
 		pr_info("   Off.  +0     +1     +2     +3\n");
 		for (i = 0; i < 0x20; i += 4) {
-			tlan_mii_read_reg(dev, phy, i, &data0);
-			tlan_mii_read_reg(dev, phy, i + 1, &data1);
-			tlan_mii_read_reg(dev, phy, i + 2, &data2);
-			tlan_mii_read_reg(dev, phy, i + 3, &data3);
+			__tlan_mii_read_reg(dev, phy, i, &data0);
+			__tlan_mii_read_reg(dev, phy, i + 1, &data1);
+			__tlan_mii_read_reg(dev, phy, i + 2, &data2);
+			__tlan_mii_read_reg(dev, phy, i + 3, &data3);
 			pr_info("   0x%02x 0x%04hx 0x%04hx 0x%04hx 0x%04hx\n",
 				i, data0, data1, data2, data3);
 		}
@@ -2417,7 +2419,15 @@ static void tlan_phy_print(struct net_device *dev)
 
 }
 
+static void tlan_phy_print(struct net_device *dev)
+{
+	struct tlan_priv *priv = netdev_priv(dev);
+	unsigned long flags;
 
+	spin_lock_irqsave(&priv->lock, flags);
+	__tlan_phy_print(dev);
+	spin_unlock_irqrestore(&priv->lock, flags);
+}
 
 
 /*********************************************************************
@@ -2795,7 +2805,7 @@ these routines are based on the information in chap. 2 of the
 
 
 /***************************************************************
- *	tlan_mii_read_reg
+ *	__tlan_mii_read_reg
  *
  *	Returns:
  *		false	if ack received ok
@@ -2819,7 +2829,7 @@ these routines are based on the information in chap. 2 of the
  **************************************************************/
 
 static bool
-tlan_mii_read_reg(struct net_device *dev, u16 phy, u16 reg, u16 *val)
+__tlan_mii_read_reg(struct net_device *dev, u16 phy, u16 reg, u16 *val)
 {
 	u8	nack;
 	u16	sio, tmp;
@@ -2827,15 +2837,13 @@ tlan_mii_read_reg(struct net_device *dev, u16 phy, u16 reg, u16 *val)
 	bool	err;
 	int	minten;
 	struct tlan_priv *priv = netdev_priv(dev);
-	unsigned long flags = 0;
+
+	lockdep_assert_held(&priv->lock);
 
 	err = false;
 	outw(TLAN_NET_SIO, dev->base_addr + TLAN_DIO_ADR);
 	sio = dev->base_addr + TLAN_DIO_DATA + TLAN_NET_SIO;
 
-	if (!in_irq())
-		spin_lock_irqsave(&priv->lock, flags);
-
 	tlan_mii_sync(dev->base_addr);
 
 	minten = tlan_get_bit(TLAN_NET_SIO_MINTEN, sio);
@@ -2881,15 +2889,19 @@ tlan_mii_read_reg(struct net_device *dev, u16 phy, u16 reg, u16 *val)
 
 	*val = tmp;
 
-	if (!in_irq())
-		spin_unlock_irqrestore(&priv->lock, flags);
-
 	return err;
-
 }
 
+static void tlan_mii_read_reg(struct net_device *dev, u16 phy, u16 reg,
+			      u16 *val)
+{
+	struct tlan_priv *priv = netdev_priv(dev);
+	unsigned long flags;
 
-
+	spin_lock_irqsave(&priv->lock, flags);
+	__tlan_mii_read_reg(dev, phy, reg, val);
+	spin_unlock_irqrestore(&priv->lock, flags);
+}
 
 /***************************************************************
  *	tlan_mii_send_data
@@ -2971,7 +2983,7 @@ static void tlan_mii_sync(u16 base_port)
 
 
 /***************************************************************
- *	tlan_mii_write_reg
+ *	__tlan_mii_write_reg
  *
  *	Returns:
  *		Nothing
@@ -2991,19 +3003,17 @@ static void tlan_mii_sync(u16 base_port)
  **************************************************************/
 
 static void
-tlan_mii_write_reg(struct net_device *dev, u16 phy, u16 reg, u16 val)
+__tlan_mii_write_reg(struct net_device *dev, u16 phy, u16 reg, u16 val)
 {
 	u16	sio;
 	int	minten;
-	unsigned long flags = 0;
 	struct tlan_priv *priv = netdev_priv(dev);
 
+	lockdep_assert_held(&priv->lock);
+
 	outw(TLAN_NET_SIO, dev->base_addr + TLAN_DIO_ADR);
 	sio = dev->base_addr + TLAN_DIO_DATA + TLAN_NET_SIO;
 
-	if (!in_irq())
-		spin_lock_irqsave(&priv->lock, flags);
-
 	tlan_mii_sync(dev->base_addr);
 
 	minten = tlan_get_bit(TLAN_NET_SIO_MINTEN, sio);
@@ -3024,12 +3034,18 @@ tlan_mii_write_reg(struct net_device *dev, u16 phy, u16 reg, u16 val)
 	if (minten)
 		tlan_set_bit(TLAN_NET_SIO_MINTEN, sio);
 
-	if (!in_irq())
-		spin_unlock_irqrestore(&priv->lock, flags);
-
 }
 
+static void
+tlan_mii_write_reg(struct net_device *dev, u16 phy, u16 reg, u16 val)
+{
+	struct tlan_priv *priv = netdev_priv(dev);
+	unsigned long flags;
 
+	spin_lock_irqsave(&priv->lock, flags);
+	__tlan_mii_write_reg(dev, phy, reg, val);
+	spin_unlock_irqrestore(&priv->lock, flags);
+}
 
 
 /*****************************************************************************
-- 
2.28.0


^ permalink raw reply related

* [PATCH net-next 02/15] net: neterion: s2io: Replace in_interrupt() for context detection
From: Sebastian Andrzej Siewior @ 2020-10-27 22:54 UTC (permalink / raw)
  To: netdev
  Cc: Aymen Sghaier, Madalin Bucur, Sebastian Andrzej Siewior,
	Zhu Yanjun, Samuel Chessman, Ping-Ke Shih, Herbert Xu,
	Horia Geantă, linux-rdma, Rain River, Kalle Valo,
	Ulrich Kunitz, Jouni Malinen, Daniel Drake, Jakub Kicinski,
	Thomas Gleixner, linux-arm-kernel, Leon Romanovsky, linuxppc-dev,
	linux-wireless, Li Yang, linux-crypto, Jon Mason, Saeed Mahameed,
	David S. Miller
In-Reply-To: <20201027225454.3492351-1-bigeasy@linutronix.de>

wait_for_cmd_complete() uses in_interrupt() to detect whether it is safe to
sleep or not.

The usage of in_interrupt() in drivers is phased out and Linus clearly
requested that code which changes behaviour depending on context should
either be seperated or the context be conveyed in an argument passed by the
caller, which usually knows the context.

in_interrupt() also is only partially correct because it fails to chose the
correct code path when just preemption or interrupts are disabled.

Add an argument 'may_block' to both functions and adjust the callers to
pass the context information.

The following call chains which end up invoking wait_for_cmd_complete()
were analyzed to be safe to sleep:

 s2io_card_up()
   s2io_set_multicast()

 init_nic()
   init_tti()

 s2io_close()
   do_s2io_delete_unicast_mc()
     do_s2io_add_mac()

 s2io_set_mac_addr()
   do_s2io_prog_unicast()
     do_s2io_add_mac()

 s2io_reset()
   do_s2io_restore_unicast_mc()
     do_s2io_add_mc()
       do_s2io_add_mac()

 s2io_open()
   do_s2io_prog_unicast()
     do_s2io_add_mac()

The following call chains which end up invoking wait_for_cmd_complete()
were analyzed to be safe to sleep:

 __dev_set_rx_mode()
    s2io_set_multicast()

 s2io_txpic_intr_handle()
   s2io_link()
     init_tti()

Add a may_sleep argument to wait_for_cmd_complete(), s2io_set_multicast()
and init_tti() and hand the context information in from the call sites.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Jon Mason <jdmason@kudzu.us>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: netdev@vger.kernel.org
---
 drivers/net/ethernet/neterion/s2io.c | 41 ++++++++++++++++------------
 drivers/net/ethernet/neterion/s2io.h |  4 +--
 2 files changed, 25 insertions(+), 20 deletions(-)

diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c
index d13d92bf74478..8f2f091bce899 100644
--- a/drivers/net/ethernet/neterion/s2io.c
+++ b/drivers/net/ethernet/neterion/s2io.c
@@ -1106,7 +1106,7 @@ static int s2io_print_pci_mode(struct s2io_nic *nic)
  *  '-1' on failure
  */
 
-static int init_tti(struct s2io_nic *nic, int link)
+static int init_tti(struct s2io_nic *nic, int link, bool may_sleep)
 {
 	struct XENA_dev_config __iomem *bar0 = nic->bar0;
 	register u64 val64 = 0;
@@ -1166,7 +1166,7 @@ static int init_tti(struct s2io_nic *nic, int link)
 
 		if (wait_for_cmd_complete(&bar0->tti_command_mem,
 					  TTI_CMD_MEM_STROBE_NEW_CMD,
-					  S2IO_BIT_RESET) != SUCCESS)
+					  S2IO_BIT_RESET, may_sleep) != SUCCESS)
 			return FAILURE;
 	}
 
@@ -1659,7 +1659,7 @@ static int init_nic(struct s2io_nic *nic)
 	 */
 
 	/* Initialize TTI */
-	if (SUCCESS != init_tti(nic, nic->last_link_state))
+	if (SUCCESS != init_tti(nic, nic->last_link_state, true))
 		return -ENODEV;
 
 	/* RTI Initialization */
@@ -3331,7 +3331,7 @@ static void s2io_updt_xpak_counter(struct net_device *dev)
  */
 
 static int wait_for_cmd_complete(void __iomem *addr, u64 busy_bit,
-				 int bit_state)
+				 int bit_state, bool may_sleep)
 {
 	int ret = FAILURE, cnt = 0, delay = 1;
 	u64 val64;
@@ -3353,7 +3353,7 @@ static int wait_for_cmd_complete(void __iomem *addr, u64 busy_bit,
 			}
 		}
 
-		if (in_interrupt())
+		if (!may_sleep)
 			mdelay(delay);
 		else
 			msleep(delay);
@@ -4877,8 +4877,7 @@ static struct net_device_stats *s2io_get_stats(struct net_device *dev)
  *  Return value:
  *  void.
  */
-
-static void s2io_set_multicast(struct net_device *dev)
+static void s2io_set_multicast(struct net_device *dev, bool may_sleep)
 {
 	int i, j, prev_cnt;
 	struct netdev_hw_addr *ha;
@@ -4903,7 +4902,7 @@ static void s2io_set_multicast(struct net_device *dev)
 		/* Wait till command completes */
 		wait_for_cmd_complete(&bar0->rmac_addr_cmd_mem,
 				      RMAC_ADDR_CMD_MEM_STROBE_CMD_EXECUTING,
-				      S2IO_BIT_RESET);
+				      S2IO_BIT_RESET, may_sleep);
 
 		sp->m_cast_flg = 1;
 		sp->all_multi_pos = config->max_mc_addr - 1;
@@ -4920,7 +4919,7 @@ static void s2io_set_multicast(struct net_device *dev)
 		/* Wait till command completes */
 		wait_for_cmd_complete(&bar0->rmac_addr_cmd_mem,
 				      RMAC_ADDR_CMD_MEM_STROBE_CMD_EXECUTING,
-				      S2IO_BIT_RESET);
+				      S2IO_BIT_RESET, may_sleep);
 
 		sp->m_cast_flg = 0;
 		sp->all_multi_pos = 0;
@@ -5000,7 +4999,7 @@ static void s2io_set_multicast(struct net_device *dev)
 			/* Wait for command completes */
 			if (wait_for_cmd_complete(&bar0->rmac_addr_cmd_mem,
 						  RMAC_ADDR_CMD_MEM_STROBE_CMD_EXECUTING,
-						  S2IO_BIT_RESET)) {
+						  S2IO_BIT_RESET, may_sleep)) {
 				DBG_PRINT(ERR_DBG,
 					  "%s: Adding Multicasts failed\n",
 					  dev->name);
@@ -5030,7 +5029,7 @@ static void s2io_set_multicast(struct net_device *dev)
 			/* Wait for command completes */
 			if (wait_for_cmd_complete(&bar0->rmac_addr_cmd_mem,
 						  RMAC_ADDR_CMD_MEM_STROBE_CMD_EXECUTING,
-						  S2IO_BIT_RESET)) {
+						  S2IO_BIT_RESET, may_sleep)) {
 				DBG_PRINT(ERR_DBG,
 					  "%s: Adding Multicasts failed\n",
 					  dev->name);
@@ -5041,6 +5040,12 @@ static void s2io_set_multicast(struct net_device *dev)
 	}
 }
 
+/* NDO wrapper for s2io_set_multicast */
+static void s2io_ndo_set_multicast(struct net_device *dev)
+{
+	s2io_set_multicast(dev, false);
+}
+
 /* read from CAM unicast & multicast addresses and store it in
  * def_mac_addr structure
  */
@@ -5127,7 +5132,7 @@ static int do_s2io_add_mac(struct s2io_nic *sp, u64 addr, int off)
 	/* Wait till command completes */
 	if (wait_for_cmd_complete(&bar0->rmac_addr_cmd_mem,
 				  RMAC_ADDR_CMD_MEM_STROBE_CMD_EXECUTING,
-				  S2IO_BIT_RESET)) {
+				  S2IO_BIT_RESET, true)) {
 		DBG_PRINT(INFO_DBG, "do_s2io_add_mac failed\n");
 		return FAILURE;
 	}
@@ -5171,7 +5176,7 @@ static u64 do_s2io_read_unicast_mc(struct s2io_nic *sp, int offset)
 	/* Wait till command completes */
 	if (wait_for_cmd_complete(&bar0->rmac_addr_cmd_mem,
 				  RMAC_ADDR_CMD_MEM_STROBE_CMD_EXECUTING,
-				  S2IO_BIT_RESET)) {
+				  S2IO_BIT_RESET, true)) {
 		DBG_PRINT(INFO_DBG, "do_s2io_read_unicast_mc failed\n");
 		return FAILURE;
 	}
@@ -7141,7 +7146,7 @@ static int s2io_card_up(struct s2io_nic *sp)
 	}
 
 	/* Setting its receive mode */
-	s2io_set_multicast(dev);
+	s2io_set_multicast(dev, true);
 
 	if (dev->features & NETIF_F_LRO) {
 		/* Initialize max aggregatable pkts per session based on MTU */
@@ -7447,7 +7452,7 @@ static void s2io_link(struct s2io_nic *sp, int link)
 	struct swStat *swstats = &sp->mac_control.stats_info->sw_stat;
 
 	if (link != sp->last_link_state) {
-		init_tti(sp, link);
+		init_tti(sp, link, false);
 		if (link == LINK_DOWN) {
 			DBG_PRINT(ERR_DBG, "%s: Link down\n", dev->name);
 			s2io_stop_all_tx_queue(sp);
@@ -7604,7 +7609,7 @@ static int rts_ds_steer(struct s2io_nic *nic, u8 ds_codepoint, u8 ring)
 
 	return wait_for_cmd_complete(&bar0->rts_ds_mem_ctrl,
 				     RTS_DS_MEM_CTRL_STROBE_CMD_BEING_EXECUTED,
-				     S2IO_BIT_RESET);
+				     S2IO_BIT_RESET, true);
 }
 
 static const struct net_device_ops s2io_netdev_ops = {
@@ -7613,7 +7618,7 @@ static const struct net_device_ops s2io_netdev_ops = {
 	.ndo_get_stats	        = s2io_get_stats,
 	.ndo_start_xmit    	= s2io_xmit,
 	.ndo_validate_addr	= eth_validate_addr,
-	.ndo_set_rx_mode	= s2io_set_multicast,
+	.ndo_set_rx_mode	= s2io_ndo_set_multicast,
 	.ndo_do_ioctl	   	= s2io_ioctl,
 	.ndo_set_mac_address    = s2io_set_mac_addr,
 	.ndo_change_mtu	   	= s2io_change_mtu,
@@ -7929,7 +7934,7 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre)
 	writeq(val64, &bar0->rmac_addr_cmd_mem);
 	wait_for_cmd_complete(&bar0->rmac_addr_cmd_mem,
 			      RMAC_ADDR_CMD_MEM_STROBE_CMD_EXECUTING,
-			      S2IO_BIT_RESET);
+			      S2IO_BIT_RESET, true);
 	tmp64 = readq(&bar0->rmac_addr_data0_mem);
 	mac_down = (u32)tmp64;
 	mac_up = (u32) (tmp64 >> 32);
diff --git a/drivers/net/ethernet/neterion/s2io.h b/drivers/net/ethernet/neterion/s2io.h
index 6fa3159a977fd..5a6032212c19d 100644
--- a/drivers/net/ethernet/neterion/s2io.h
+++ b/drivers/net/ethernet/neterion/s2io.h
@@ -1066,7 +1066,7 @@ static void tx_intr_handler(struct fifo_info *fifo_data);
 static void s2io_handle_errors(void * dev_id);
 
 static void s2io_tx_watchdog(struct net_device *dev, unsigned int txqueue);
-static void s2io_set_multicast(struct net_device *dev);
+static void s2io_set_multicast(struct net_device *dev, bool may_sleep);
 static int rx_osm_handler(struct ring_info *ring_data, struct RxD_t * rxdp);
 static void s2io_link(struct s2io_nic * sp, int link);
 static void s2io_reset(struct s2io_nic * sp);
@@ -1087,7 +1087,7 @@ static int s2io_set_swapper(struct s2io_nic * sp);
 static void s2io_card_down(struct s2io_nic *nic);
 static int s2io_card_up(struct s2io_nic *nic);
 static int wait_for_cmd_complete(void __iomem *addr, u64 busy_bit,
-					int bit_state);
+				 int bit_state, bool may_sleep);
 static int s2io_add_isr(struct s2io_nic * sp);
 static void s2io_rem_isr(struct s2io_nic * sp);
 
-- 
2.28.0


^ permalink raw reply related

* [PATCH net-next 01/15] net: orinoco: Remove BUG_ON(in_interrupt/irq())
From: Sebastian Andrzej Siewior @ 2020-10-27 22:54 UTC (permalink / raw)
  To: netdev
  Cc: Aymen Sghaier, Madalin Bucur, Sebastian Andrzej Siewior,
	Zhu Yanjun, Samuel Chessman, Ping-Ke Shih, Herbert Xu,
	Horia Geantă, linux-rdma, Rain River, Kalle Valo,
	Ulrich Kunitz, Jouni Malinen, Daniel Drake, Jakub Kicinski,
	Thomas Gleixner, linux-arm-kernel, Leon Romanovsky, linuxppc-dev,
	linux-wireless, Li Yang, linux-crypto, Jon Mason, Saeed Mahameed,
	David S. Miller
In-Reply-To: <20201027225454.3492351-1-bigeasy@linutronix.de>

The usage of in_irq()/in_interrupt() in drivers is phased out and the
BUG_ON()'s based on those are not covering all contexts in which these
functions cannot be called.

Aside of that BUG_ON() should only be used as last resort, which is clearly
not the case here.

A broad variety of checks in the invoked functions (always enabled or debug
option dependent) cover these conditions already, so the BUG_ON()'s do not
really provide additional value.

Just remove them.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Kalle Valo <kvalo@codeaurora.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: linux-wireless@vger.kernel.org
Cc: netdev@vger.kernel.org
---
 drivers/net/wireless/intersil/orinoco/orinoco_usb.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/net/wireless/intersil/orinoco/orinoco_usb.c b/drivers/net/wireless/intersil/orinoco/orinoco_usb.c
index b849d27bd741e..046f2453ad5d9 100644
--- a/drivers/net/wireless/intersil/orinoco/orinoco_usb.c
+++ b/drivers/net/wireless/intersil/orinoco/orinoco_usb.c
@@ -859,8 +859,6 @@ static int ezusb_access_ltv(struct ezusb_priv *upriv,
 	int retval = 0;
 	enum ezusb_state state;
 
-	BUG_ON(in_irq());
-
 	if (!upriv->udev) {
 		retval = -ENODEV;
 		goto exit;
@@ -1349,7 +1347,6 @@ static int ezusb_init(struct hermes *hw)
 	struct ezusb_priv *upriv = hw->priv;
 	int retval;
 
-	BUG_ON(in_interrupt());
 	if (!upriv)
 		return -EINVAL;
 
@@ -1448,7 +1445,6 @@ static inline void ezusb_delete(struct ezusb_priv *upriv)
 	struct list_head *tmp_item;
 	unsigned long flags;
 
-	BUG_ON(in_interrupt());
 	BUG_ON(!upriv);
 
 	mutex_lock(&upriv->mtx);
-- 
2.28.0


^ permalink raw reply related

* [PATCH net-next 00/15] in_interrupt() cleanup, part 2
From: Sebastian Andrzej Siewior @ 2020-10-27 22:54 UTC (permalink / raw)
  To: netdev
  Cc: Aymen Sghaier, Madalin Bucur, Sebastian Andrzej Siewior,
	Zhu Yanjun, Samuel Chessman, Ping-Ke Shih, Herbert Xu,
	Horia Geantă, linux-rdma, Rain River, Kalle Valo,
	Ulrich Kunitz, Jouni Malinen, Daniel Drake, Jakub Kicinski,
	Thomas Gleixner, linux-arm-kernel, Leon Romanovsky, linuxppc-dev,
	linux-wireless, Li Yang, linux-crypto, Jon Mason, Saeed Mahameed,
	David S. Miller

Folks,

in the discussion about preempt count consistency across kernel configurations:

  https://lore.kernel.org/r/20200914204209.256266093@linutronix.de/

Linus clearly requested that code in drivers and libraries which changes
behaviour based on execution context should either be split up so that
e.g. task context invocations and BH invocations have different interfaces
or if that's not possible the context information has to be provided by the
caller which knows in which context it is executing.

This includes conditional locking, allocation mode (GFP_*) decisions and
avoidance of code paths which might sleep.

In the long run, usage of 'preemptible, in_*irq etc.' should be banned from
driver code completely.

This is part two addressing remaining drivers except for orinoco-usb.

Sebastian

^ permalink raw reply

* Re: [PATCH 2/4] PM: hibernate: improve robustness of mapping pages in the direct map
From: Edgecombe, Rick P @ 2020-10-27 22:44 UTC (permalink / raw)
  To: rppt@kernel.org
  Cc: david@redhat.com, peterz@infradead.org, catalin.marinas@arm.com,
	dave.hansen@linux.intel.com, linux-mm@kvack.org, paulus@samba.org,
	pavel@ucw.cz, hpa@zytor.com, sparclinux@vger.kernel.org,
	cl@linux.com, will@kernel.org, linux-riscv@lists.infradead.org,
	linux-s390@vger.kernel.org, x86@kernel.org, rppt@linux.ibm.com,
	borntraeger@de.ibm.com, mingo@redhat.com, rientjes@google.com,
	Brown, Len, aou@eecs.berkeley.edu, gor@linux.ibm.com,
	linux-pm@vger.kernel.org, hca@linux.ibm.com, bp@alien8.de,
	luto@kernel.org, paul.walmsley@sifive.com, kirill@shutemov.name,
	tglx@linutronix.de, iamjoonsoo.kim@lge.com,
	linux-arm-kernel@lists.infradead.org, rjw@rjwysocki.net,
	linux-kernel@vger.kernel.org, penberg@kernel.org,
	palmer@dabbelt.com, akpm@linux-foundation.org,
	linuxppc-dev@lists.ozlabs.org, davem@davemloft.net
In-Reply-To: <20201027084902.GH1154158@kernel.org>

On Tue, 2020-10-27 at 10:49 +0200, Mike Rapoport wrote:
> On Mon, Oct 26, 2020 at 06:57:32PM +0000, Edgecombe, Rick P wrote:
> > On Mon, 2020-10-26 at 11:15 +0200, Mike Rapoport wrote:
> > > On Mon, Oct 26, 2020 at 12:38:32AM +0000, Edgecombe, Rick P
> > > wrote:
> > > > On Sun, 2020-10-25 at 12:15 +0200, Mike Rapoport wrote:
> > > > > From: Mike Rapoport <rppt@linux.ibm.com>
> > > > > 
> > > > > When DEBUG_PAGEALLOC or ARCH_HAS_SET_DIRECT_MAP is enabled a
> > > > > page
> > > > > may
> > > > > be
> > > > > not present in the direct map and has to be explicitly mapped
> > > > > before
> > > > > it
> > > > > could be copied.
> > > > > 
> > > > > On arm64 it is possible that a page would be removed from the
> > > > > direct
> > > > > map
> > > > > using set_direct_map_invalid_noflush() but
> > > > > __kernel_map_pages()
> > > > > will
> > > > > refuse
> > > > > to map this page back if DEBUG_PAGEALLOC is disabled.
> > > > 
> > > > It looks to me that arm64 __kernel_map_pages() will still
> > > > attempt
> > > > to
> > > > map it if rodata_full is true, how does this happen?
> > > 
> > > Unless I misread the code, arm64 requires both rodata_full and
> > > debug_pagealloc_enabled() to be true for __kernel_map_pages() to
> > > do
> > > anything.
> > > But rodata_full condition applies to set_direct_map_*_noflush()
> > > as
> > > well,
> > > so with !rodata_full the linear map won't be ever changed.
> > 
> > Hmm, looks to me that __kernel_map_pages() will only skip it if
> > both
> > debug pagealloc and rodata_full are false.
> > 
> > But now I'm wondering if maybe we could simplify things by just
> > moving
> > the hibernate unmapped page logic off of the direct map. On x86,
> > text_poke() used to use this reserved fixmap pte thing that it
> > could
> > rely on to remap memory with. If hibernate had some separate pte
> > for
> > remapping like that, then we could not have any direct map
> > restrictions
> > caused by it/kernel_map_pages(), and it wouldn't have to worry
> > about
> > relying on anything else.
> 
> Well, there is map_kernel_range() that can be used by hibernation as
> there is no requirement for particular virtual address, but that
> would
> be quite costly if done for every page.
> 
> Maybe we can do somthing like
> 
> 	if (kernel_page_present(s_page)) {
> 		do_copy_page(dst, page_address(s_page));
> 	} else {
> 		map_kernel_range_noflush(page_address(page), PAGE_SIZE,
> 					 PROT_READ, &page);
> 		do_copy_page(dst, page_address(s_page));
> 		unmap_kernel_range_noflush(page_address(page),
> PAGE_SIZE);
> 	}
> 
> But it seems that a prerequisite for changing the way a page is
> mapped
> in safe_copy_page() would be to teach hibernation that a mapping here
> may fail.
> 
Yea that is what I meant, the direct map could still be used for mapped
pages.

But for the unmapped case it could have a pre-setup 4k pte for some non
direct map address. Then just change the pte to point to any unmapped
direct map page that was encountered. The point would be to give
hibernate some 4k pte of its own to manipulate so that it can't fail.

Yet another option would be have hibernate_map_page() just map large
pages if it finds them.

So we could teach hibernate to handle mapping failures, OR we could
change it so it doesn't rely on direct map page sizes in order to
succeed. The latter seems better to me since there isn't a reason why
it should have to fail and the resulting logic might be simpler. Both
seem like improvements in robustness though.


^ permalink raw reply

* Re: [PATCH] ibmvfc: add new fields for version 2 of several MADs
From: Tyrel Datwyler @ 2020-10-27 21:52 UTC (permalink / raw)
  To: Martin K. Petersen
  Cc: james.bottomley, brking, linuxppc-dev, linux-kernel, linux-scsi
In-Reply-To: <yq1v9ew4ekf.fsf@ca-mkp.ca.oracle.com>

On 10/26/20 6:56 PM, Martin K. Petersen wrote:
> 
> Tyrel,
> 
>> Introduce a targetWWPN field to several MADs. Its possible that a scsi
>> ID of a target can change due to some fabric changes. The WWPN of the
>> scsi target provides a better way to identify the target. Also, add
>> flags for receiving MAD versioning information and advertising client
>> support for targetWWPN with the VIOS. This latter capability flag will
>> be required for future clients capable of requesting multiple hardware
>> queues from the host adapter.
> 
> Applied to 5.11/scsi-staging, thanks!
> 

Hi Martin,

I'm going to have to ask that this patch be unstaged.

After some clarification from our VIOS folks I made the assumption that the MAD
size was staying the same and new fields just used up existing reserved padding.
Turns out they chose to keep the same amount of padding increasing the size of
those structures. So, this patch needs to be reworked.

Sorry about that,

-Tyrel

^ permalink raw reply

* Re: [PATCH kernel v2 1/2] dma: Allow mixing bypass and normal IOMMU operation
From: Christoph Hellwig @ 2020-10-27 16:48 UTC (permalink / raw)
  To: Alexey Kardashevskiy; +Cc: iommu, linuxppc-dev, Christoph Hellwig, linux-kernel
In-Reply-To: <20201027101841.96056-2-aik@ozlabs.ru>

> +static inline bool dma_handle_direct(struct device *dev, dma_addr_t dma_handle)
> +{
> +       return dma_handle >= dev->archdata.dma_offset;
> +}

This won't compile except for powerpc, and directly accesing arch members
in common code is a bad idea.  Maybe both your helpers need to be
supplied by arch code to better abstract this out.

>  	if (dma_map_direct(dev, ops))
>  		addr = dma_direct_map_page(dev, page, offset, size, dir, attrs);
> +#ifdef CONFIG_DMA_OPS_BYPASS_BUS_LIMIT
> +	else if (dev->bus_dma_limit &&
> +		 can_map_direct(dev, (phys_addr_t) page_to_phys(page) + offset + size))
> +		addr = dma_direct_map_page(dev, page, offset, size, dir, attrs);
> +#endif

I don't think page_to_phys needs a phys_addr_t on the return value.
I'd also much prefer if we make this a little more beautiful, here
are a few suggestions:

 - hide the bus_dma_limit check inside can_map_direct, and provide a
   stub so that we can avoid the ifdef
 - use a better name for can_map_direct, and maybe also a better calling
   convention by passing the page (the sg code also has the page), and
   maybe even hide the dma_map_direct inside it.

	if (dma_map_direct(dev, ops) ||
	    arch_dma_map_page_direct(dev, page, offset, size))
		addr = dma_direct_map_page(dev, page, offset, size, dir, attrs);

>  	BUG_ON(!valid_dma_direction(dir));
>  	if (dma_map_direct(dev, ops))
>  		dma_direct_unmap_page(dev, addr, size, dir, attrs);
> +#ifdef CONFIG_DMA_OPS_BYPASS_BUS_LIMIT
> +	else if (dev->bus_dma_limit && dma_handle_direct(dev, addr + size))
> +		dma_direct_unmap_page(dev, addr, size, dir, attrs);
> +#endif

Same here.

>  	if (dma_map_direct(dev, ops))
>  		ents = dma_direct_map_sg(dev, sg, nents, dir, attrs);
> +#ifdef CONFIG_DMA_OPS_BYPASS_BUS_LIMIT
> +	else if (dev->bus_dma_limit) {
> +		struct scatterlist *s;
> +		bool direct = true;
> +		int i;
> +
> +		for_each_sg(sg, s, nents, i) {
> +			direct = can_map_direct(dev, sg_phys(s) + s->offset + s->length);
> +			if (!direct)
> +				break;
> +		}
> +		if (direct)
> +			ents = dma_direct_map_sg(dev, sg, nents, dir, attrs);
> +		else
> +			ents = ops->map_sg(dev, sg, nents, dir, attrs);
> +	}
> +#endif

This needs to go into a helper as well.  I think the same style as
above would work pretty nicely as well:

 	if (dma_map_direct(dev, ops) ||
	    arch_dma_map_sg_direct(dev, sg, nents))
 		ents = dma_direct_map_sg(dev, sg, nents, dir, attrs);
 	else
 		ents = ops->map_sg(dev, sg, nents, dir, attrs);

> +#ifdef CONFIG_DMA_OPS_BYPASS_BUS_LIMIT
> +	if (dev->bus_dma_limit) {
> +		struct scatterlist *s;
> +		bool direct = true;
> +		int i;
> +
> +		for_each_sg(sg, s, nents, i) {
> +			direct = dma_handle_direct(dev, s->dma_address + s->length);
> +			if (!direct)
> +				break;
> +		}
> +		if (direct) {
> +			dma_direct_unmap_sg(dev, sg, nents, dir, attrs);
> +			return;
> +		}
> +	}
> +#endif

One more time here..

^ permalink raw reply

* Re: [RFC PATCH kernel 1/2] irq: Add reference counting to IRQ mappings
From: Marc Zyngier @ 2020-10-27 16:09 UTC (permalink / raw)
  To: Alexey Kardashevskiy
  Cc: Rob Herring, linux-kernel, Qian Cai, Cédric Le Goater,
	Frederic Barrat, Oliver O'Halloran, Thomas Gleixner,
	Michal Suchánek, linuxppc-dev
In-Reply-To: <20201027090655.14118-2-aik@ozlabs.ru>

Hi Alexey,

On 2020-10-27 09:06, Alexey Kardashevskiy wrote:
> PCI devices share 4 legacy INTx interrupts from the same PCI host 
> bridge.
> Device drivers map/unmap hardware interrupts via irq_create_mapping()/
> irq_dispose_mapping(). The problem with that these interrupts are
> shared and when performing hot unplug, we need to unmap the interrupt
> only when the last device is released.
> 
> This reuses already existing irq_desc::kobj for this purpose.
> The refcounter is naturally 1 when the descriptor is allocated already;
> this adds kobject_get() in places where already existing mapped virq
> is returned.

That's quite interesting, as I was about to revive a patch series that
rework the irqdomain subsystem to directly cache irq_desc instead of
raw interrupt numbers. And for that, I needed some form of 
refcounting...

> 
> This reorganizes irq_dispose_mapping() to release the kobj and let
> the release callback do the cleanup.
> 
> If some driver or platform does its own reference counting, this 
> expects
> those parties to call irq_find_mapping() and call irq_dispose_mapping()
> for every irq_create_fwspec_mapping()/irq_create_mapping().
> 
> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> ---
>  kernel/irq/irqdesc.c   | 35 +++++++++++++++++++++++------------
>  kernel/irq/irqdomain.c | 27 +++++++++++++--------------
>  2 files changed, 36 insertions(+), 26 deletions(-)
> 
> diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
> index 1a7723604399..dae096238500 100644
> --- a/kernel/irq/irqdesc.c
> +++ b/kernel/irq/irqdesc.c
> @@ -419,20 +419,39 @@ static struct irq_desc *alloc_desc(int irq, int
> node, unsigned int flags,
>  	return NULL;
>  }
> 
> +static void delayed_free_desc(struct rcu_head *rhp);
>  static void irq_kobj_release(struct kobject *kobj)
>  {
>  	struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj);
> +	struct irq_domain *domain;
> +	unsigned int virq = desc->irq_data.irq;
> 
> -	free_masks(desc);
> -	free_percpu(desc->kstat_irqs);
> -	kfree(desc);
> +	domain = desc->irq_data.domain;
> +	if (domain) {
> +		if (irq_domain_is_hierarchy(domain)) {
> +			irq_domain_free_irqs(virq, 1);

How does this work with hierarchical domains? Each domain should
contribute as a reference on the irq_desc. But if you got here,
it means the refcount has already dropped to 0.

So either there is nothing to free here, or you don't track the
references implied by the hierarchy. I suspect the latter.

> +		} else {
> +			irq_domain_disassociate(domain, virq);
> +			irq_free_desc(virq);
> +		}
> +	}
> +
> +	/*
> +	 * We free the descriptor, masks and stat fields via RCU. That
> +	 * allows demultiplex interrupts to do rcu based management of
> +	 * the child interrupts.
> +	 * This also allows us to use rcu in kstat_irqs_usr().
> +	 */
> +	call_rcu(&desc->rcu, delayed_free_desc);
>  }
> 
>  static void delayed_free_desc(struct rcu_head *rhp)
>  {
>  	struct irq_desc *desc = container_of(rhp, struct irq_desc, rcu);
> 
> -	kobject_put(&desc->kobj);
> +	free_masks(desc);
> +	free_percpu(desc->kstat_irqs);
> +	kfree(desc);
>  }
> 
>  static void free_desc(unsigned int irq)
> @@ -453,14 +472,6 @@ static void free_desc(unsigned int irq)
>  	 */
>  	irq_sysfs_del(desc);
>  	delete_irq_desc(irq);
> -
> -	/*
> -	 * We free the descriptor, masks and stat fields via RCU. That
> -	 * allows demultiplex interrupts to do rcu based management of
> -	 * the child interrupts.
> -	 * This also allows us to use rcu in kstat_irqs_usr().
> -	 */
> -	call_rcu(&desc->rcu, delayed_free_desc);
>  }
> 
>  static int alloc_descs(unsigned int start, unsigned int cnt, int node,
> diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
> index cf8b374b892d..02733ddc321f 100644
> --- a/kernel/irq/irqdomain.c
> +++ b/kernel/irq/irqdomain.c
> @@ -638,6 +638,7 @@ unsigned int irq_create_mapping(struct irq_domain 
> *domain,
>  {
>  	struct device_node *of_node;
>  	int virq;
> +	struct irq_desc *desc;
> 
>  	pr_debug("irq_create_mapping(0x%p, 0x%lx)\n", domain, hwirq);
> 
> @@ -655,7 +656,9 @@ unsigned int irq_create_mapping(struct irq_domain 
> *domain,
>  	/* Check if mapping already exists */
>  	virq = irq_find_mapping(domain, hwirq);
>  	if (virq) {
> +		desc = irq_to_desc(virq);
>  		pr_debug("-> existing mapping on virq %d\n", virq);
> +		kobject_get(&desc->kobj);

My worry with this is that there is probably a significant amount of
code out there that relies on multiple calls to irq_create_mapping()
with the same parameters not to have any side effects. They would
expect a subsequent irq_dispose_mapping() to drop the translation
altogether, and that's obviously not the case here.

Have you audited the various call sites to see what could break?

>  		return virq;
>  	}
> 
> @@ -751,6 +754,7 @@ unsigned int irq_create_fwspec_mapping(struct
> irq_fwspec *fwspec)
>  	irq_hw_number_t hwirq;
>  	unsigned int type = IRQ_TYPE_NONE;
>  	int virq;
> +	struct irq_desc *desc;
> 
>  	if (fwspec->fwnode) {
>  		domain = irq_find_matching_fwspec(fwspec, DOMAIN_BUS_WIRED);
> @@ -787,8 +791,11 @@ unsigned int irq_create_fwspec_mapping(struct
> irq_fwspec *fwspec)
>  		 * current trigger type then we are done so return the
>  		 * interrupt number.
>  		 */
> -		if (type == IRQ_TYPE_NONE || type == irq_get_trigger_type(virq))
> +		if (type == IRQ_TYPE_NONE || type == irq_get_trigger_type(virq)) {
> +			desc = irq_to_desc(virq);
> +			kobject_get(&desc->kobj);
>  			return virq;
> +		}
> 
>  		/*
>  		 * If the trigger type has not been set yet, then set
> @@ -800,6 +807,8 @@ unsigned int irq_create_fwspec_mapping(struct
> irq_fwspec *fwspec)
>  				return 0;
> 
>  			irqd_set_trigger_type(irq_data, type);
> +			desc = irq_to_desc(virq);
> +			kobject_get(&desc->kobj);
>  			return virq;
>  		}
> 
> @@ -852,22 +861,12 @@ EXPORT_SYMBOL_GPL(irq_create_of_mapping);
>   */
>  void irq_dispose_mapping(unsigned int virq)
>  {
> -	struct irq_data *irq_data = irq_get_irq_data(virq);
> -	struct irq_domain *domain;
> +	struct irq_desc *desc = irq_to_desc(virq);
> 
> -	if (!virq || !irq_data)
> +	if (!virq || !desc)
>  		return;
> 
> -	domain = irq_data->domain;
> -	if (WARN_ON(domain == NULL))
> -		return;
> -
> -	if (irq_domain_is_hierarchy(domain)) {
> -		irq_domain_free_irqs(virq, 1);
> -	} else {
> -		irq_domain_disassociate(domain, virq);
> -		irq_free_desc(virq);
> -	}
> +	kobject_put(&desc->kobj);
>  }
>  EXPORT_SYMBOL_GPL(irq_dispose_mapping);

Thanks,

         M.
-- 
Jazz is not dead. It just smells funny...

^ permalink raw reply

* Re: [PATCH 2/2] ASoC: fsl_aud2htx: Add aud2htx module driver
From: Shengjiu Wang @ 2020-10-27 13:17 UTC (permalink / raw)
  To: Mark Brown
  Cc: open list:OPEN FIRMWARE AND FLATTENED DEVICE TREE BINDINGS,
	alsa-devel, Timur Tabi, Xiubo Li, Fabio Estevam, Shengjiu Wang,
	Liam Girdwood, Takashi Iwai, Nicolin Chen, Rob Herring,
	linuxppc-dev, linux-kernel
In-Reply-To: <20201026133003.GD7402@sirena.org.uk>

On Mon, Oct 26, 2020 at 9:31 PM Mark Brown <broonie@kernel.org> wrote:
>
> On Mon, Oct 26, 2020 at 06:40:55PM +0800, Shengjiu Wang wrote:
>
> > +static int fsl_aud2htx_hw_params(struct snd_pcm_substream *substream,
> > +                              struct snd_pcm_hw_params *params,
> > +                              struct snd_soc_dai *cpu_dai)
> > +{
> > +     struct fsl_aud2htx *aud2htx = snd_soc_dai_get_drvdata(cpu_dai);
> > +
> > +     /* DMA request when number of entries < WTMK_LOW */
> > +     regmap_update_bits(aud2htx->regmap, AUD2HTX_CTRL_EXT,
> > +                        AUD2HTX_CTRE_DT_MASK, 0);
> > +
> > +     /* Disable interrupts*/
> > +     regmap_update_bits(aud2htx->regmap, AUD2HTX_IRQ_MASK,
> > +                        AUD2HTX_WM_HIGH_IRQ_MASK |
> > +                        AUD2HTX_WM_LOW_IRQ_MASK |
> > +                        AUD2HTX_OVF_MASK,
> > +                        AUD2HTX_WM_HIGH_IRQ_MASK |
> > +                        AUD2HTX_WM_LOW_IRQ_MASK |
> > +                        AUD2HTX_OVF_MASK);
> > +
> > +     /* Configur watermark */
> > +     regmap_update_bits(aud2htx->regmap, AUD2HTX_CTRL_EXT,
> > +                        AUD2HTX_CTRE_WL_MASK,
> > +                        AUD2HTX_WTMK_LOW << AUD2HTX_CTRE_WL_SHIFT);
> > +     regmap_update_bits(aud2htx->regmap, AUD2HTX_CTRL_EXT,
> > +                        AUD2HTX_CTRE_WH_MASK,
> > +                        AUD2HTX_WTMK_HIGH << AUD2HTX_CTRE_WH_SHIFT);
> > +     return 0;
> > +}
>
> This doesn't look like a hw_params operation - it doesn't appear to
> reference the params at all, or even containt any conditional
> statements.  Shouldn't this be configured just once at driver load?

Ok, I will update it.

best regards
wang shengjiu

^ permalink raw reply

* Re: [PATCH 1/2] ASoC: dt-bindings: fsl_aud2htx: Add binding doc for aud2htx module
From: Krzysztof Kozlowski @ 2020-10-27 11:08 UTC (permalink / raw)
  To: Shengjiu Wang
  Cc: devicetree, alsa-devel, timur, Xiubo.Lee, lgirdwood, linuxppc-dev,
	tiwai, robh+dt, perex, nicoleotsuka, broonie, festevam,
	linux-kernel
In-Reply-To: <1603708855-2663-1-git-send-email-shengjiu.wang@nxp.com>

On Mon, Oct 26, 2020 at 06:40:54PM +0800, Shengjiu Wang wrote:
> AUD2HTX (Audio Subsystem TO HDMI TX Subsystem) is a new
> IP module found on i.MX8MP.
> 
> Signed-off-by: Shengjiu Wang <shengjiu.wang@nxp.com>
> ---
>  .../bindings/sound/fsl,aud2htx.yaml           | 67 +++++++++++++++++++
>  1 file changed, 67 insertions(+)
>  create mode 100644 Documentation/devicetree/bindings/sound/fsl,aud2htx.yaml
> 
> diff --git a/Documentation/devicetree/bindings/sound/fsl,aud2htx.yaml b/Documentation/devicetree/bindings/sound/fsl,aud2htx.yaml
> new file mode 100644
> index 000000000000..18548d0889a8
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/sound/fsl,aud2htx.yaml
> @@ -0,0 +1,67 @@
> +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
> +%YAML 1.2
> +---
> +$id: http://devicetree.org/schemas/sound/fsl,aud2htx.yaml#
> +$schema: http://devicetree.org/meta-schemas/core.yaml#
> +
> +title: NXP Audio Subsystem to HDMI RTX Subsystem Controller
> +
> +maintainers:
> +  - Shengjiu Wang <shengjiu.wang@nxp.com>
> +
> +properties:
> +  $nodename:
> +    pattern: "^aud2htx@.*"

aud2htx is not a generic class of a device so it should not be enforced.

> +
> +  compatible:
> +    const: fsl,imx8mp-aud2htx
> +
> +  reg:
> +    maxItems: 1
> +
> +  interrupts:
> +    maxItems: 1
> +
> +  clocks:
> +    items:
> +      - description: Peripheral clock
> +
> +  clock-names:
> +    items:
> +      - const: bus
> +
> +  dmas:
> +    items:
> +      - description: DMA controller phandle and request line for TX
> +
> +  dma-names:
> +    items:
> +      - const: tx
> +
> +  power-domains:
> +    maxItems: 1
> +
> +required:
> +  - compatible
> +  - reg
> +  - interrupts
> +  - clocks
> +  - clock-names
> +  - dmas
> +  - dma-names
> +
> +examples:
> +  - |
> +    #include <dt-bindings/interrupt-controller/arm-gic.h>
> +    #include <dt-bindings/clock/imx8mp-clock.h>
> +
> +    aud2htx: aud2htx@30cb0000 {
> +             compatible = "fsl,imx8mp-aud2htx";

Wrong indentation. Most of examples are indented with 4 spaces.

Best regards,
Krzysztof

^ permalink raw reply

* Re: [PATCH 02/10] fs: don't allow splice read/write without explicit ops
From: David Howells @ 2020-10-27 10:38 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: linux-arch, linuxppc-dev, Kees Cook, x86, linux-kernel, dhowells,
	Al Viro, linux-fsdevel, Linus Torvalds
In-Reply-To: <20201027095455.GA30298@lst.de>

Christoph Hellwig <hch@lst.de> wrote:

> > That said, for afs at least, the fix seems to be just this:
> 
> And that is the correct fix, I was about to send it to you.

Thanks.

David


^ permalink raw reply

* Re: [PATCH 0/4] arch, mm: improve robustness of direct map manipulation
From: David Hildenbrand @ 2020-10-27 10:34 UTC (permalink / raw)
  To: Mike Rapoport
  Cc: peterz@infradead.org, catalin.marinas@arm.com,
	dave.hansen@linux.intel.com, linux-mm@kvack.org, paulus@samba.org,
	pavel@ucw.cz, hpa@zytor.com, sparclinux@vger.kernel.org,
	cl@linux.com, will@kernel.org, linux-riscv@lists.infradead.org,
	linux-s390@vger.kernel.org, x86@kernel.org, rppt@linux.ibm.com,
	borntraeger@de.ibm.com, mingo@redhat.com, rientjes@google.com,
	Brown, Len, aou@eecs.berkeley.edu, gor@linux.ibm.com,
	linux-pm@vger.kernel.org, hca@linux.ibm.com, bp@alien8.de,
	luto@kernel.org, paul.walmsley@sifive.com, kirill@shutemov.name,
	tglx@linutronix.de, iamjoonsoo.kim@lge.com,
	linux-arm-kernel@lists.infradead.org, rjw@rjwysocki.net,
	linux-kernel@vger.kernel.org, penberg@kernel.org,
	palmer@dabbelt.com, akpm@linux-foundation.org, Edgecombe, Rick P,
	linuxppc-dev@lists.ozlabs.org, davem@davemloft.net
In-Reply-To: <20201027094714.GI1154158@kernel.org>

On 27.10.20 10:47, Mike Rapoport wrote:
> On Tue, Oct 27, 2020 at 09:46:35AM +0100, David Hildenbrand wrote:
>> On 27.10.20 09:38, Mike Rapoport wrote:
>>> On Mon, Oct 26, 2020 at 06:05:30PM +0000, Edgecombe, Rick P wrote:
>>>> On Mon, 2020-10-26 at 11:05 +0200, Mike Rapoport wrote:
>>>>> On Mon, Oct 26, 2020 at 01:13:52AM +0000, Edgecombe, Rick P wrote:
>>>>>> On Sun, 2020-10-25 at 12:15 +0200, Mike Rapoport wrote:
>>>>>>> Indeed, for architectures that define
>>>>>>> CONFIG_ARCH_HAS_SET_DIRECT_MAP
>>>>>>> it is
>>>>>>> possible that __kernel_map_pages() would fail, but since this
>>>>>>> function is
>>>>>>> void, the failure will go unnoticed.
>>>>>>
>>>>>> Could you elaborate on how this could happen? Do you mean during
>>>>>> runtime today or if something new was introduced?
>>>>>
>>>>> A failure in__kernel_map_pages() may happen today. For instance, on
>>>>> x86
>>>>> if the kernel is built with DEBUG_PAGEALLOC.
>>>>>
>>>>>           __kernel_map_pages(page, 1, 0);
>>>>>
>>>>> will need to split, say, 2M page and during the split an allocation
>>>>> of
>>>>> page table could fail.
>>>>
>>>> On x86 at least, DEBUG_PAGEALLOC expects to never have to break a page
>>>> on the direct map and even disables locking in cpa because it assumes
>>>> this. If this is happening somehow anyway then we should probably fix
>>>> that. Even if it's a debug feature, it will not be as useful if it is
>>>> causing its own crashes.
>>>>
>>>> I'm still wondering if there is something I'm missing here. It seems
>>>> like you are saying there is a bug in some arch's, so let's add a WARN
>>>> in cross-arch code to log it as it crashes. A warn and making things
>>>> clearer seem like good ideas, but if there is a bug we should fix it.
>>>> The code around the callers still functionally assume re-mapping can't
>>>> fail.
>>>
>>> Oh, I've meant x86 kernel *without* DEBUG_PAGEALLOC, and indeed the call
>>> that unmaps pages back in safe_copy_page will just reset a 4K page to
>>> NP because whatever made it NP at the first place already did the split.
>>>
>>> Still, on arm64 with DEBUG_PAGEALLOC=n there is a possibility of a race
>>> between map/unmap dance in __vunmap() and safe_copy_page() that may
>>> cause access to unmapped memory:
>>>
>>> __vunmap()
>>>       vm_remove_mappings()
>>>           set_direct_map_invalid()
>>> 					safe_copy_page()	
>>> 					    __kernel_map_pages()
>>> 					    	return
>>> 					    do_copy_page() -> fault
>>> 					   	
>>> This is a theoretical bug, but it is still not nice :) 							
>>>
>>>>> Currently, the only user of __kernel_map_pages() outside
>>>>> DEBUG_PAGEALLOC
>>>>> is hibernation, but I think it would be safer to entirely prevent
>>>>> usage
>>>>> of __kernel_map_pages() when DEBUG_PAGEALLOC=n.
>>>>
>>>> I totally agree it's error prone FWIW. On x86, my mental model of how
>>>> it is supposed to work is: If a page is 4k and NP it cannot fail to be
>>>> remapped. set_direct_map_invalid_noflush() should result in 4k NP
>>>> pages, and DEBUG_PAGEALLOC should result in all 4k pages on the direct
>>>> map. Are you seeing this violated or do I have wrong assumptions?
>>>
>>> You are right, there is a set of assumptions about the remapping of the
>>> direct map pages that make it all work, at least on x86.
>>> But this is very subtle and it's not easy to wrap one's head around
>>> this.
>>>
>>> That's why putting __kernel_map_pages() out of "common" use and
>>> keep it only for DEBUG_PAGEALLOC would make things clearer.
>>>
>>>> Beyond whatever you are seeing, for the latter case of new things
>>>> getting introduced to an interface with hidden dependencies... Another
>>>> edge case could be a new caller to set_memory_np() could result in
>>>> large NP pages. None of the callers today should cause this AFAICT, but
>>>> it's not great to rely on the callers to know these details.
>>> A caller of set_memory_*() or set_direct_map_*() should expect a failure
>>> and be ready for that. So adding a WARN to safe_copy_page() is the first
>>> step in that direction :)
>>>
>>
>> I am probably missing something important, but why are we saving/restoring
>> the content of pages that were explicitly removed from the identity mapping
>> such that nobody will access them?
>>
>> Pages that are not allocated should contain garbage or be zero
>> (init_on_free). That should be easy to handle without ever reading the page
>> content.
> 
> I'm not familiar with hibernation to say anything smart here, but the
> help text of DEBUG_PAGEALLOC in Kconfig says:
> 
> 	... this option cannot be enabled in combination with
> 	hibernation as that would result in incorrect warnings of memory
> 	corruption after a resume because free pages are not saved to
> 	the suspend image.
> 
> Probably you are right and free pages need to be handled differently,
> but it does not seem the case now.
> 
>> The other user seems to be vm_remove_mappings(), where we only *temporarily*
>> remove the mapping - while hibernating, that code shouldn't be active
>> anymore I guess - or we could protect it from happening.
> 
> Hmm, I _think_ vm_remove_mappings() shouldn't be active while
> hibernating, but I'm not 100% sure.
> 
>> As I expressed in another mail, secretmem pages should rather not be saved
>> when hibernating - hibernation should be rather be disabled.
> 
> Agree.
> 
>> What am I missing?
> 
> I think I miscommunicated the purpose of this set, which was to hide
> __kernel_map_pages() under DEBUG_PAGEALLOC and make hibernation use
> set_direct_map_*() explictly without major rework of free pages handling
> during hibernation.
> 
> Does it help?
> 

Heh, as always, once you touch questionable code, people will beg for 
proper cleanups instead :)


-- 
Thanks,

David / dhildenb


^ permalink raw reply

* Re: [PATCH kernel 0/2] powerpc/dma: Fallback to dma_ops when persistent memory present
From: Christoph Hellwig @ 2020-10-27  8:36 UTC (permalink / raw)
  To: Alexey Kardashevskiy; +Cc: linuxppc-dev, Christoph Hellwig, linux-kernel
In-Reply-To: <20201021032026.45030-1-aik@ozlabs.ru>

On Wed, Oct 21, 2020 at 02:20:24PM +1100, Alexey Kardashevskiy wrote:
> This allows mixing direct DMA (to/from RAM) and
> IOMMU (to/from apersistent memory) on the PPC64/pseries
> platform. This was supposed to be a single patch but
> unexpected move of direct DMA functions happened.
> 
> This is based on sha1
> 7cf726a59435 Linus Torvalds "Merge tag 'linux-kselftest-kunit-5.10-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest".
> 
> Please comment. Thanks.

I really don't like your revert.  I'm almost ready to kill of
dma-direct.h, and I really want it private in kernel/dma/, as people
keep adding abuses to drivers.

We have two options here:

 (1) duplicate the code in arch/powerpc/
 (2) add a hook to kernel/dma/

I've not been a fan of (2) in the past, but now that the code is out
of line, and we could make it dependent on a config option only set by
powerpc, I see it as the lesser evil now.

^ permalink raw reply

* [PATCH kernel v2 1/2] dma: Allow mixing bypass and normal IOMMU operation
From: Alexey Kardashevskiy @ 2020-10-27 10:18 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Alexey Kardashevskiy, iommu, Christoph Hellwig, linux-kernel
In-Reply-To: <20201027101841.96056-1-aik@ozlabs.ru>

At the moment we allow bypassing DMA ops only when we can do this for
the entire RAM. However there are configs with mixed type memory
where we could still allow bypassing IOMMU in most cases;
POWERPC with persistent memory is one example.

This adds another check for the bus limit to determine where bypass
can still work and we invoke direct DMA API; when DMA handle is outside
that limit, we fall back to DMA ops.

This adds a CONFIG_DMA_OPS_BYPASS_BUS_LIMIT config option which is off
by default and will be enable for PPC_PSERIES in the following patch.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---
 kernel/dma/mapping.c | 61 ++++++++++++++++++++++++++++++++++++++++++--
 kernel/dma/Kconfig   |  4 +++
 2 files changed, 63 insertions(+), 2 deletions(-)

diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c
index 51bb8fa8eb89..0f4f998e6c72 100644
--- a/kernel/dma/mapping.c
+++ b/kernel/dma/mapping.c
@@ -137,6 +137,18 @@ static inline bool dma_map_direct(struct device *dev,
 	return dma_go_direct(dev, *dev->dma_mask, ops);
 }
 
+#ifdef CONFIG_DMA_OPS_BYPASS_BUS_LIMIT
+static inline bool can_map_direct(struct device *dev, phys_addr_t addr)
+{
+	return dev->bus_dma_limit >= phys_to_dma(dev, addr);
+}
+
+static inline bool dma_handle_direct(struct device *dev, dma_addr_t dma_handle)
+{
+	return dma_handle >= dev->archdata.dma_offset;
+}
+#endif
+
 dma_addr_t dma_map_page_attrs(struct device *dev, struct page *page,
 		size_t offset, size_t size, enum dma_data_direction dir,
 		unsigned long attrs)
@@ -151,6 +163,11 @@ dma_addr_t dma_map_page_attrs(struct device *dev, struct page *page,
 
 	if (dma_map_direct(dev, ops))
 		addr = dma_direct_map_page(dev, page, offset, size, dir, attrs);
+#ifdef CONFIG_DMA_OPS_BYPASS_BUS_LIMIT
+	else if (dev->bus_dma_limit &&
+		 can_map_direct(dev, (phys_addr_t) page_to_phys(page) + offset + size))
+		addr = dma_direct_map_page(dev, page, offset, size, dir, attrs);
+#endif
 	else
 		addr = ops->map_page(dev, page, offset, size, dir, attrs);
 	debug_dma_map_page(dev, page, offset, size, dir, addr);
@@ -167,6 +184,10 @@ void dma_unmap_page_attrs(struct device *dev, dma_addr_t addr, size_t size,
 	BUG_ON(!valid_dma_direction(dir));
 	if (dma_map_direct(dev, ops))
 		dma_direct_unmap_page(dev, addr, size, dir, attrs);
+#ifdef CONFIG_DMA_OPS_BYPASS_BUS_LIMIT
+	else if (dev->bus_dma_limit && dma_handle_direct(dev, addr + size))
+		dma_direct_unmap_page(dev, addr, size, dir, attrs);
+#endif
 	else if (ops->unmap_page)
 		ops->unmap_page(dev, addr, size, dir, attrs);
 	debug_dma_unmap_page(dev, addr, size, dir);
@@ -190,6 +211,23 @@ int dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, int nents,
 
 	if (dma_map_direct(dev, ops))
 		ents = dma_direct_map_sg(dev, sg, nents, dir, attrs);
+#ifdef CONFIG_DMA_OPS_BYPASS_BUS_LIMIT
+	else if (dev->bus_dma_limit) {
+		struct scatterlist *s;
+		bool direct = true;
+		int i;
+
+		for_each_sg(sg, s, nents, i) {
+			direct = can_map_direct(dev, sg_phys(s) + s->offset + s->length);
+			if (!direct)
+				break;
+		}
+		if (direct)
+			ents = dma_direct_map_sg(dev, sg, nents, dir, attrs);
+		else
+			ents = ops->map_sg(dev, sg, nents, dir, attrs);
+	}
+#endif
 	else
 		ents = ops->map_sg(dev, sg, nents, dir, attrs);
 	BUG_ON(ents < 0);
@@ -207,9 +245,28 @@ void dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg,
 
 	BUG_ON(!valid_dma_direction(dir));
 	debug_dma_unmap_sg(dev, sg, nents, dir);
-	if (dma_map_direct(dev, ops))
+	if (dma_map_direct(dev, ops)) {
 		dma_direct_unmap_sg(dev, sg, nents, dir, attrs);
-	else if (ops->unmap_sg)
+		return;
+	}
+#ifdef CONFIG_DMA_OPS_BYPASS_BUS_LIMIT
+	if (dev->bus_dma_limit) {
+		struct scatterlist *s;
+		bool direct = true;
+		int i;
+
+		for_each_sg(sg, s, nents, i) {
+			direct = dma_handle_direct(dev, s->dma_address + s->length);
+			if (!direct)
+				break;
+		}
+		if (direct) {
+			dma_direct_unmap_sg(dev, sg, nents, dir, attrs);
+			return;
+		}
+	}
+#endif
+	if (ops->unmap_sg)
 		ops->unmap_sg(dev, sg, nents, dir, attrs);
 }
 EXPORT_SYMBOL(dma_unmap_sg_attrs);
diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig
index c99de4a21458..02fa174fbdec 100644
--- a/kernel/dma/Kconfig
+++ b/kernel/dma/Kconfig
@@ -20,6 +20,10 @@ config DMA_OPS
 config DMA_OPS_BYPASS
 	bool
 
+# IOMMU driver limited by a DMA window size may switch between bypass and window
+config DMA_OPS_BYPASS_BUS_LIMIT
+	bool
+
 config NEED_SG_DMA_LENGTH
 	bool
 
-- 
2.17.1


^ permalink raw reply related

* [PATCH kernel v2 0/2] DMA, powerpc/dma: Fallback to dma_ops when persistent memory present
From: Alexey Kardashevskiy @ 2020-10-27 10:18 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Alexey Kardashevskiy, iommu, Christoph Hellwig, linux-kernel

This allows mixing direct DMA (to/from RAM) and
IOMMU (to/from apersistent memory) on the PPC64/pseries
platform.

This replaces this: https://lkml.org/lkml/2020/10/20/1085
A lesser evil this is :)

This is based on sha1
4525c8781ec0 Linus Torvalds "scsi: qla2xxx: remove incorrect sparse #ifdef".

Please comment. Thanks.



Alexey Kardashevskiy (2):
  dma: Allow mixing bypass and normal IOMMU operation
  powerpc/dma: Fallback to dma_ops when persistent memory present

 arch/powerpc/kernel/dma-iommu.c        | 12 ++++-
 arch/powerpc/platforms/pseries/iommu.c | 44 ++++++++++++++-----
 kernel/dma/mapping.c                   | 61 +++++++++++++++++++++++++-
 arch/powerpc/Kconfig                   |  1 +
 kernel/dma/Kconfig                     |  4 ++
 5 files changed, 108 insertions(+), 14 deletions(-)

-- 
2.17.1


^ permalink raw reply

* [PATCH kernel v2 2/2] powerpc/dma: Fallback to dma_ops when persistent memory present
From: Alexey Kardashevskiy @ 2020-10-27 10:18 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Alexey Kardashevskiy, iommu, Christoph Hellwig, linux-kernel
In-Reply-To: <20201027101841.96056-1-aik@ozlabs.ru>

So far we have been using huge DMA windows to map all the RAM available.
The RAM is normally mapped to the VM address space contiguously, and
there is always a reasonable upper limit for possible future hot plugged
RAM which makes it easy to map all RAM via IOMMU.

Now there is persistent memory ("ibm,pmemory" in the FDT) which (unlike
normal RAM) can map anywhere in the VM space beyond the maximum RAM size
and since it can be used for DMA, it requires extending the huge window
up to MAX_PHYSMEM_BITS which requires hypervisor support for:
1. huge TCE tables;
2. multilevel TCE tables;
3. huge IOMMU pages.

Certain hypervisors cannot do either so the only option left is
restricting the huge DMA window to include only RAM and fallback to
the default DMA window for persistent memory.

This checks if the system has persistent memory. If it does not,
the DMA bypass mode is selected, i.e.
* dev->bus_dma_limit = 0
* dev->dma_ops_bypass = true <- this avoid calling dma_ops for mapping.

If there is such memory, this creates identity mapping only for RAM and
sets the dev->bus_dma_limit to let the generic code decide whether to
call into the direct DMA or the indirect DMA ops.

This should not change the existing behaviour when no persistent memory
as dev->dma_ops_bypass is expected to be set.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---
 arch/powerpc/kernel/dma-iommu.c        | 12 +++++--
 arch/powerpc/platforms/pseries/iommu.c | 44 ++++++++++++++++++++------
 arch/powerpc/Kconfig                   |  1 +
 3 files changed, 45 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c
index a1c744194018..d123b7205f76 100644
--- a/arch/powerpc/kernel/dma-iommu.c
+++ b/arch/powerpc/kernel/dma-iommu.c
@@ -90,8 +90,16 @@ int dma_iommu_dma_supported(struct device *dev, u64 mask)
 	struct iommu_table *tbl = get_iommu_table_base(dev);
 
 	if (dev_is_pci(dev) && dma_iommu_bypass_supported(dev, mask)) {
-		dev->dma_ops_bypass = true;
-		dev_dbg(dev, "iommu: 64-bit OK, using fixed ops\n");
+		/*
+		 * dma_iommu_bypass_supported() sets dma_max when there is
+		 * 1:1 mapping but it is somehow limited.
+		 * ibm,pmemory is one example.
+		 */
+		dev->dma_ops_bypass = dev->bus_dma_limit == 0;
+		if (!dev->dma_ops_bypass)
+			dev_warn(dev, "iommu: 64-bit OK but using default ops\n");
+		else
+			dev_dbg(dev, "iommu: 64-bit OK, using fixed ops\n");
 		return 1;
 	}
 
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index e4198700ed1a..91112e748491 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -839,7 +839,7 @@ static void remove_ddw(struct device_node *np, bool remove_prop)
 			np, ret);
 }
 
-static u64 find_existing_ddw(struct device_node *pdn)
+static u64 find_existing_ddw(struct device_node *pdn, int *window_shift)
 {
 	struct direct_window *window;
 	const struct dynamic_dma_window_prop *direct64;
@@ -851,6 +851,7 @@ static u64 find_existing_ddw(struct device_node *pdn)
 		if (window->device == pdn) {
 			direct64 = window->prop;
 			dma_addr = be64_to_cpu(direct64->dma_base);
+			*window_shift = be32_to_cpu(direct64->window_shift);
 			break;
 		}
 	}
@@ -1111,11 +1112,13 @@ static void reset_dma_window(struct pci_dev *dev, struct device_node *par_dn)
  */
 static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 {
-	int len, ret;
+	int len = 0, ret;
+	bool pmem_present = of_find_node_by_type(NULL, "ibm,pmemory") != NULL;
+	int max_ram_len = order_base_2(ddw_memory_hotplug_max());
 	struct ddw_query_response query;
 	struct ddw_create_response create;
 	int page_shift;
-	u64 dma_addr, max_addr;
+	u64 dma_addr;
 	struct device_node *dn;
 	u32 ddw_avail[DDW_APPLICABLE_SIZE];
 	struct direct_window *window;
@@ -1126,7 +1129,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 
 	mutex_lock(&direct_window_init_mutex);
 
-	dma_addr = find_existing_ddw(pdn);
+	dma_addr = find_existing_ddw(pdn, &len);
 	if (dma_addr != 0)
 		goto out_unlock;
 
@@ -1212,14 +1215,26 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 	}
 	/* verify the window * number of ptes will map the partition */
 	/* check largest block * page size > max memory hotplug addr */
-	max_addr = ddw_memory_hotplug_max();
-	if (query.largest_available_block < (max_addr >> page_shift)) {
-		dev_dbg(&dev->dev, "can't map partition max 0x%llx with %llu "
-			  "%llu-sized pages\n", max_addr,  query.largest_available_block,
-			  1ULL << page_shift);
+	/*
+	 * The "ibm,pmemory" can appear anywhere in the address space.
+	 * Assuming it is still backed by page structs, try MAX_PHYSMEM_BITS
+	 * for the upper limit and fallback to max RAM otherwise but this
+	 * disables device::dma_ops_bypass.
+	 */
+	len = max_ram_len;
+	if (pmem_present) {
+		if (query.largest_available_block >=
+		    (1ULL << (MAX_PHYSMEM_BITS - page_shift)))
+			len = MAX_PHYSMEM_BITS - page_shift;
+		else
+			dev_info(&dev->dev, "Skipping ibm,pmemory");
+	}
+
+	if (query.largest_available_block < (1ULL << (len - page_shift))) {
+		dev_dbg(&dev->dev, "can't map partition max 0x%llx with %llu %llu-sized pages\n",
+			1ULL << len, query.largest_available_block, 1ULL << page_shift);
 		goto out_failed;
 	}
-	len = order_base_2(max_addr);
 	win64 = kzalloc(sizeof(struct property), GFP_KERNEL);
 	if (!win64) {
 		dev_info(&dev->dev,
@@ -1299,6 +1314,15 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 
 out_unlock:
 	mutex_unlock(&direct_window_init_mutex);
+
+	/*
+	 * If we have persistent memory and the window size is only as big
+	 * as RAM, then we failed to create a window to cover persistent
+	 * memory and need to set the DMA limit.
+	 */
+	if (pmem_present && dma_addr && (len == max_ram_len))
+		dev->dev.bus_dma_limit = dma_addr + (1ULL << len);
+
 	return dma_addr;
 }
 
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index e9f13fe08492..5a8881bf140e 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -159,6 +159,7 @@ config PPC
 	select DCACHE_WORD_ACCESS		if PPC64 && CPU_LITTLE_ENDIAN
 	select DMA_OPS				if PPC64
 	select DMA_OPS_BYPASS			if PPC64
+	select DMA_OPS_BYPASS_BUS_LIMIT		if PPC64 && PPC_PSERIES
 	select DYNAMIC_FTRACE			if FUNCTION_TRACER
 	select EDAC_ATOMIC_SCRUB
 	select EDAC_SUPPORT
-- 
2.17.1


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox