LinuxPPC-Dev Archive on lore.kernel.org

LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH v3 03/18] ibmvfc: add Subordinate CRQ definitions
From: Tyrel Datwyler @ 2020-12-03  2:07 UTC (permalink / raw)
  To: james.bottomley
  Cc: Tyrel Datwyler, martin.petersen, linux-scsi, linux-kernel,
	Brian King, brking, linuxppc-dev
In-Reply-To: <20201203020806.14747-1-tyreld@linux.ibm.com>

Subordinate Command Response Queues (Sub CRQ) are used in conjunction
with the primary CRQ when more than one queue is needed by the virtual
IO adapter. Recent phyp firmware versions support Sub CRQ's with ibmvfc
adapters. This feature is a prerequisite for supporting multiple
hardware backed submission queues in the vfc adapter.

The Sub CRQ command element differs from the standard CRQ in that it is
32bytes long as opposed to 16bytes for the latter. Despite this extra
16bytes the ibmvfc protocol will use the original CRQ command element
mapped to the first 16bytes of the Sub CRQ element initially.

Add definitions for the Sub CRQ command element and queue.

Signed-off-by: Tyrel Datwyler <tyreld@linux.ibm.com>
Reviewed-by: Brian King <brking@linux.vnet.ibm.com>
---
 drivers/scsi/ibmvscsi/ibmvfc.h | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/drivers/scsi/ibmvscsi/ibmvfc.h b/drivers/scsi/ibmvscsi/ibmvfc.h
index e095daada70e..b3cd35cbf067 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.h
+++ b/drivers/scsi/ibmvscsi/ibmvfc.h
@@ -656,6 +656,29 @@ struct ibmvfc_crq_queue {
 	dma_addr_t msg_token;
 };

+struct ibmvfc_sub_crq {
+	struct ibmvfc_crq crq;
+	__be64 reserved[2];
+} __packed __aligned(8);
+
+struct ibmvfc_sub_queue {
+	struct ibmvfc_sub_crq *msgs;
+	dma_addr_t msg_token;
+	int size, cur;
+	struct ibmvfc_host *vhost;
+	unsigned long cookie;
+	unsigned long vios_cookie;
+	unsigned long hw_irq;
+	unsigned long irq;
+	unsigned long hwq_id;
+	char name[32];
+};
+
+struct ibmvfc_scsi_channels {
+	struct ibmvfc_sub_queue *scrqs;
+	unsigned int active_queues;
+};
+
 enum ibmvfc_ae_link_state {
 	IBMVFC_AE_LS_LINK_UP		= 0x01,
 	IBMVFC_AE_LS_LINK_BOUNCED	= 0x02,
-- 
2.27.0

^ permalink raw reply related

* [PATCH v3 01/18] ibmvfc: add vhost fields and defaults for MQ enablement
From: Tyrel Datwyler @ 2020-12-03  2:07 UTC (permalink / raw)
  To: james.bottomley
  Cc: Tyrel Datwyler, martin.petersen, linux-scsi, linux-kernel, brking,
	linuxppc-dev
In-Reply-To: <20201203020806.14747-1-tyreld@linux.ibm.com>

Introduce several new vhost fields for managing MQ state of the adapter
as well as initial defaults for MQ enablement.

Signed-off-by: Tyrel Datwyler <tyreld@linux.ibm.com>
---
 drivers/scsi/ibmvscsi/ibmvfc.c |  9 ++++++++-
 drivers/scsi/ibmvscsi/ibmvfc.h | 13 +++++++++++--
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index 42e4d35e0d35..f1d677a7423d 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -5161,12 +5161,13 @@ static int ibmvfc_probe(struct vio_dev *vdev, const struct vio_device_id *id)
 	}
 
 	shost->transportt = ibmvfc_transport_template;
-	shost->can_queue = max_requests;
+	shost->can_queue = (max_requests / IBMVFC_SCSI_HW_QUEUES);
 	shost->max_lun = max_lun;
 	shost->max_id = max_targets;
 	shost->max_sectors = IBMVFC_MAX_SECTORS;
 	shost->max_cmd_len = IBMVFC_MAX_CDB_LEN;
 	shost->unique_id = shost->host_no;
+	shost->nr_hw_queues = IBMVFC_SCSI_HW_QUEUES;
 
 	vhost = shost_priv(shost);
 	INIT_LIST_HEAD(&vhost->sent);
@@ -5178,6 +5179,12 @@ static int ibmvfc_probe(struct vio_dev *vdev, const struct vio_device_id *id)
 	vhost->partition_number = -1;
 	vhost->log_level = log_level;
 	vhost->task_set = 1;
+
+	vhost->mq_enabled = IBMVFC_MQ;
+	vhost->client_scsi_channels = IBMVFC_SCSI_CHANNELS;
+	vhost->using_channels = 0;
+	vhost->do_enquiry = 1;
+
 	strcpy(vhost->partition_name, "UNKNOWN");
 	init_waitqueue_head(&vhost->work_wait_q);
 	init_waitqueue_head(&vhost->init_wait_q);
diff --git a/drivers/scsi/ibmvscsi/ibmvfc.h b/drivers/scsi/ibmvscsi/ibmvfc.h
index 9d58cfd774d3..e095daada70e 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.h
+++ b/drivers/scsi/ibmvscsi/ibmvfc.h
@@ -41,16 +41,21 @@
 #define IBMVFC_DEFAULT_LOG_LEVEL	2
 #define IBMVFC_MAX_CDB_LEN		16
 #define IBMVFC_CLS3_ERROR		0
+#define IBMVFC_MQ			0
+#define IBMVFC_SCSI_CHANNELS		0
+#define IBMVFC_SCSI_HW_QUEUES		1
+#define IBMVFC_MIG_NO_SUB_TO_CRQ	0
+#define IBMVFC_MIG_NO_N_TO_M		0
 
 /*
  * Ensure we have resources for ERP and initialization:
- * 1 for ERP
  * 1 for initialization
  * 1 for NPIV Logout
  * 2 for BSG passthru
  * 2 for each discovery thread
+ * 1 ERP for each possible HW Queue
  */
-#define IBMVFC_NUM_INTERNAL_REQ	(1 + 1 + 1 + 2 + (disc_threads * 2))
+#define IBMVFC_NUM_INTERNAL_REQ	(1 + 1 + 2 + (disc_threads * 2) + IBMVFC_SCSI_HW_QUEUES)
 
 #define IBMVFC_MAD_SUCCESS		0x00
 #define IBMVFC_MAD_NOT_SUPPORTED	0xF1
@@ -826,6 +831,10 @@ struct ibmvfc_host {
 	int delay_init;
 	int scan_complete;
 	int logged_in;
+	int mq_enabled;
+	int using_channels;
+	int do_enquiry;
+	int client_scsi_channels;
 	int aborting_passthru;
 	int events_to_log;
 #define IBMVFC_AE_LINKUP	0x0001
-- 
2.27.0


^ permalink raw reply related

* [PATCH v3 07/18] ibmvfc: define Sub-CRQ interrupt handler routine
From: Tyrel Datwyler @ 2020-12-03  2:07 UTC (permalink / raw)
  To: james.bottomley
  Cc: Tyrel Datwyler, martin.petersen, linux-scsi, linux-kernel,
	Brian King, brking, linuxppc-dev
In-Reply-To: <20201203020806.14747-1-tyreld@linux.ibm.com>

Simple handler that calls Sub-CRQ drain routine directly.

Signed-off-by: Tyrel Datwyler <tyreld@linux.ibm.com>
Reviewed-by: Brian King <brking@linux.vnet.ibm.com>
---
 drivers/scsi/ibmvscsi/ibmvfc.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index b61ae1df21e5..649268996a5c 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -3461,6 +3461,16 @@ static void ibmvfc_drain_sub_crq(struct ibmvfc_sub_queue *scrq)
 	spin_unlock_irqrestore(scrq->vhost->host->host_lock, flags);
 }
 
+static irqreturn_t ibmvfc_interrupt_scsi(int irq, void *scrq_instance)
+{
+	struct ibmvfc_sub_queue *scrq = (struct ibmvfc_sub_queue *)scrq_instance;
+
+	ibmvfc_toggle_scrq_irq(scrq, 0);
+	ibmvfc_drain_sub_crq(scrq);
+
+	return IRQ_HANDLED;
+}
+
 /**
  * ibmvfc_init_tgt - Set the next init job step for the target
  * @tgt:		ibmvfc target struct
-- 
2.27.0


^ permalink raw reply related

* [PATCH v3 08/18] ibmvfc: map/request irq and register Sub-CRQ interrupt handler
From: Tyrel Datwyler @ 2020-12-03  2:07 UTC (permalink / raw)
  To: james.bottomley
  Cc: Tyrel Datwyler, martin.petersen, linux-scsi, linux-kernel,
	Brian King, brking, linuxppc-dev
In-Reply-To: <20201203020806.14747-1-tyreld@linux.ibm.com>

Create an irq mapping for the hw_irq number provided from phyp firmware.
Request an irq assigned our Sub-CRQ interrupt handler.

Signed-off-by: Tyrel Datwyler <tyreld@linux.ibm.com>
Reviewed-by: Brian King <brking@linux.vnet.ibm.com>
---
 drivers/scsi/ibmvscsi/ibmvfc.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index 649268996a5c..6b299df786fc 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -5134,12 +5134,34 @@ static int ibmvfc_register_scsi_channel(struct ibmvfc_host *vhost,
 		goto reg_failed;
 	}
 
+	scrq->irq = irq_create_mapping(NULL, scrq->hw_irq);
+
+	if (!scrq->irq) {
+		rc = -EINVAL;
+		dev_err(dev, "Error mapping sub-crq[%d] irq\n", index);
+		goto irq_failed;
+	}
+
+	snprintf(scrq->name, sizeof(scrq->name), "ibmvfc-%x-scsi%d",
+		 vdev->unit_address, index);
+	rc = request_irq(scrq->irq, ibmvfc_interrupt_scsi, 0, scrq->name, scrq);
+
+	if (rc) {
+		dev_err(dev, "Couldn't register sub-crq[%d] irq\n", index);
+		irq_dispose_mapping(scrq->irq);
+		goto irq_failed;
+	}
+
 	scrq->hwq_id = index;
 	scrq->vhost = vhost;
 
 	LEAVE;
 	return 0;
 
+irq_failed:
+	do {
+		plpar_hcall_norets(H_FREE_SUB_CRQ, vdev->unit_address, scrq->cookie);
+	} while (rc == H_BUSY || H_IS_LONG_BUSY(rc));
 reg_failed:
 	dma_unmap_single(dev, scrq->msg_token, PAGE_SIZE, DMA_BIDIRECTIONAL);
 dma_map_failed:
-- 
2.27.0


^ permalink raw reply related

* [PATCH v3 09/18] ibmvfc: implement channel enquiry and setup commands
From: Tyrel Datwyler @ 2020-12-03  2:07 UTC (permalink / raw)
  To: james.bottomley
  Cc: Tyrel Datwyler, martin.petersen, linux-scsi, linux-kernel,
	Brian King, brking, linuxppc-dev
In-Reply-To: <20201203020806.14747-1-tyreld@linux.ibm.com>

New NPIV_ENQUIRY_CHANNEL and NPIV_SETUP_CHANNEL management datagrams
(MADs) were defined in a previous patchset. If the client advertises a
desire to use channels and the partner VIOS is channel capable then the
client must proceed with channel enquiry to determine the maximum number
of channels the VIOS is capable of providing, and registering SubCRQs
via channel setup with the VIOS immediately following NPIV Login. This
handshaking should not be performed for subsequent NPIV Logins unless
the CRQ connection has been reset.

Implement these two new MADs and issue them following a successful NPIV
login where the VIOS has set the SUPPORT_CHANNELS capability bit in the
NPIV Login response.

Signed-off-by: Tyrel Datwyler <tyreld@linux.ibm.com>
Reviewed-by: Brian King <brking@linux.vnet.ibm.com>
---
 drivers/scsi/ibmvscsi/ibmvfc.c | 135 ++++++++++++++++++++++++++++++++-
 drivers/scsi/ibmvscsi/ibmvfc.h |   3 +
 2 files changed, 136 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index 6b299df786fc..d584a4ce0682 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -806,6 +806,8 @@ static int ibmvfc_reset_crq(struct ibmvfc_host *vhost)
 	spin_lock_irqsave(vhost->host->host_lock, flags);
 	vhost->state = IBMVFC_NO_CRQ;
 	vhost->logged_in = 0;
+	vhost->do_enquiry = 1;
+	vhost->using_channels = 0;
 
 	/* Clean out the queue */
 	memset(crq->msgs, 0, PAGE_SIZE);
@@ -4476,6 +4478,118 @@ static void ibmvfc_discover_targets(struct ibmvfc_host *vhost)
 		ibmvfc_link_down(vhost, IBMVFC_LINK_DEAD);
 }
 
+static void ibmvfc_channel_setup_done(struct ibmvfc_event *evt)
+{
+	struct ibmvfc_host *vhost = evt->vhost;
+	u32 mad_status = be16_to_cpu(evt->xfer_iu->channel_setup.common.status);
+	int level = IBMVFC_DEFAULT_LOG_LEVEL;
+
+	ibmvfc_free_event(evt);
+
+	switch (mad_status) {
+	case IBMVFC_MAD_SUCCESS:
+		ibmvfc_dbg(vhost, "Channel Setup succeded\n");
+		vhost->do_enquiry = 0;
+		break;
+	case IBMVFC_MAD_FAILED:
+		level += ibmvfc_retry_host_init(vhost);
+		ibmvfc_log(vhost, level, "Channel Setup failed\n");
+		fallthrough;
+	case IBMVFC_MAD_DRIVER_FAILED:
+		return;
+	default:
+		dev_err(vhost->dev, "Invalid Channel Setup response: 0x%x\n",
+			mad_status);
+		ibmvfc_link_down(vhost, IBMVFC_LINK_DEAD);
+		return;
+	}
+
+	ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_QUERY);
+	wake_up(&vhost->work_wait_q);
+}
+
+static void ibmvfc_channel_setup(struct ibmvfc_host *vhost)
+{
+	struct ibmvfc_channel_setup_mad *mad;
+	struct ibmvfc_channel_setup *setup_buf = vhost->channel_setup_buf;
+	struct ibmvfc_event *evt = ibmvfc_get_event(vhost);
+
+	memset(setup_buf, 0, sizeof(*setup_buf));
+	setup_buf->flags = cpu_to_be32(IBMVFC_CANCEL_CHANNELS);
+
+	ibmvfc_init_event(evt, ibmvfc_channel_setup_done, IBMVFC_MAD_FORMAT);
+	mad = &evt->iu.channel_setup;
+	memset(mad, 0, sizeof(*mad));
+	mad->common.version = cpu_to_be32(1);
+	mad->common.opcode = cpu_to_be32(IBMVFC_CHANNEL_SETUP);
+	mad->common.length = cpu_to_be16(sizeof(*mad));
+	mad->buffer.va = cpu_to_be64(vhost->channel_setup_dma);
+	mad->buffer.len = cpu_to_be32(sizeof(*vhost->channel_setup_buf));
+
+	ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_INIT_WAIT);
+
+	if (!ibmvfc_send_event(evt, vhost, default_timeout))
+		ibmvfc_dbg(vhost, "Sent channel setup\n");
+	else
+		ibmvfc_link_down(vhost, IBMVFC_LINK_DOWN);
+}
+
+static void ibmvfc_channel_enquiry_done(struct ibmvfc_event *evt)
+{
+	struct ibmvfc_host *vhost = evt->vhost;
+	struct ibmvfc_channel_enquiry *rsp = &evt->xfer_iu->channel_enquiry;
+	u32 mad_status = be16_to_cpu(rsp->common.status);
+	int level = IBMVFC_DEFAULT_LOG_LEVEL;
+
+	switch (mad_status) {
+	case IBMVFC_MAD_SUCCESS:
+		ibmvfc_dbg(vhost, "Channel Enquiry succeeded\n");
+		vhost->max_vios_scsi_channels = be32_to_cpu(rsp->num_scsi_subq_channels);
+		ibmvfc_free_event(evt);
+		break;
+	case IBMVFC_MAD_FAILED:
+		level += ibmvfc_retry_host_init(vhost);
+		ibmvfc_log(vhost, level, "Channel Enquiry failed\n");
+		fallthrough;
+	case IBMVFC_MAD_DRIVER_FAILED:
+		ibmvfc_free_event(evt);
+		return;
+	default:
+		dev_err(vhost->dev, "Invalid Channel Enquiry response: 0x%x\n",
+			mad_status);
+		ibmvfc_link_down(vhost, IBMVFC_LINK_DEAD);
+		ibmvfc_free_event(evt);
+		return;
+	}
+
+	ibmvfc_channel_setup(vhost);
+}
+
+static void ibmvfc_channel_enquiry(struct ibmvfc_host *vhost)
+{
+	struct ibmvfc_channel_enquiry *mad;
+	struct ibmvfc_event *evt = ibmvfc_get_event(vhost);
+
+	ibmvfc_init_event(evt, ibmvfc_channel_enquiry_done, IBMVFC_MAD_FORMAT);
+	mad = &evt->iu.channel_enquiry;
+	memset(mad, 0, sizeof(*mad));
+	mad->common.version = cpu_to_be32(1);
+	mad->common.opcode = cpu_to_be32(IBMVFC_CHANNEL_ENQUIRY);
+	mad->common.length = cpu_to_be16(sizeof(*mad));
+
+	if (IBMVFC_MIG_NO_SUB_TO_CRQ)
+		mad->flags |= cpu_to_be32(IBMVFC_NO_CHANNELS_TO_CRQ_SUPPORT);
+	if (IBMVFC_MIG_NO_N_TO_M)
+		mad->flags |= cpu_to_be32(IBMVFC_NO_N_TO_M_CHANNELS_SUPPORT);
+
+	ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_INIT_WAIT);
+
+	if (!ibmvfc_send_event(evt, vhost, default_timeout))
+		ibmvfc_dbg(vhost, "Send channel enquiry\n");
+	else
+		ibmvfc_link_down(vhost, IBMVFC_LINK_DEAD);
+}
+
 /**
  * ibmvfc_npiv_login_done - Completion handler for NPIV Login
  * @evt:	ibmvfc event struct
@@ -4557,8 +4671,14 @@ static void ibmvfc_npiv_login_done(struct ibmvfc_event *evt)
 
 	vhost->host->can_queue = be32_to_cpu(rsp->max_cmds) - IBMVFC_NUM_INTERNAL_REQ;
 	vhost->host->max_sectors = npiv_max_sectors;
-	ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_QUERY);
-	wake_up(&vhost->work_wait_q);
+
+	if (ibmvfc_check_caps(vhost, IBMVFC_CAN_SUPPORT_CHANNELS) && vhost->do_enquiry) {
+		ibmvfc_channel_enquiry(vhost);
+	} else {
+		vhost->do_enquiry = 0;
+		ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_QUERY);
+		wake_up(&vhost->work_wait_q);
+	}
 }
 
 /**
@@ -5331,9 +5451,20 @@ static int ibmvfc_alloc_mem(struct ibmvfc_host *vhost)
 		goto free_trace;
 	}
 
+	vhost->channel_setup_buf = dma_alloc_coherent(dev, sizeof(*vhost->channel_setup_buf),
+						      &vhost->channel_setup_dma,
+						      GFP_KERNEL);
+
+	if (!vhost->channel_setup_buf) {
+		dev_err(dev, "Couldn't allocate Channel Setup buffer\n");
+		goto free_tgt_pool;
+	}
+
 	LEAVE;
 	return 0;
 
+free_tgt_pool:
+	mempool_destroy(vhost->tgt_pool);
 free_trace:
 	kfree(vhost->trace);
 free_disc_buffer:
diff --git a/drivers/scsi/ibmvscsi/ibmvfc.h b/drivers/scsi/ibmvscsi/ibmvfc.h
index 986ce4530382..dff26dbd912c 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.h
+++ b/drivers/scsi/ibmvscsi/ibmvfc.h
@@ -842,10 +842,13 @@ struct ibmvfc_host {
 	struct ibmvfc_npiv_login login_info;
 	union ibmvfc_npiv_login_data *login_buf;
 	dma_addr_t login_buf_dma;
+	struct ibmvfc_channel_setup *channel_setup_buf;
+	dma_addr_t channel_setup_dma;
 	int disc_buf_sz;
 	int log_level;
 	struct ibmvfc_discover_targets_entry *disc_buf;
 	struct mutex passthru_mutex;
+	int max_vios_scsi_channels;
 	int task_set;
 	int init_retries;
 	int discovery_threads;
-- 
2.27.0


^ permalink raw reply related

* [PATCH v3 10/18] ibmvfc: advertise client support for using hardware channels
From: Tyrel Datwyler @ 2020-12-03  2:07 UTC (permalink / raw)
  To: james.bottomley
  Cc: Tyrel Datwyler, martin.petersen, linux-scsi, linux-kernel,
	Brian King, brking, linuxppc-dev
In-Reply-To: <20201203020806.14747-1-tyreld@linux.ibm.com>

Previous patches have plumbed the necessary Sub-CRQ interface and
channel negotiation MADs to fully channelized hardware queues.

Advertise client support via NPIV Login capability
IBMVFC_CAN_USE_CHANNELS when the client bits have MQ enabled via
vhost->mq_enabled, or when channels were already in use during a
subsequent NPIV Login. The later is required because channel support is
only renegotiated after a CRQ pair is broken. Simple NPIV Logout/Logins
require the client to continue to advertise the channel capability until
the CRQ pair between the client is broken.

Signed-off-by: Tyrel Datwyler <tyreld@linux.ibm.com>
Reviewed-by: Brian King <brking@linux.vnet.ibm.com>
---
 drivers/scsi/ibmvscsi/ibmvfc.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index d584a4ce0682..0eb91ac86d96 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -1282,6 +1282,10 @@ static void ibmvfc_set_login_info(struct ibmvfc_host *vhost)
 
 	login_info->max_cmds = cpu_to_be32(max_requests + IBMVFC_NUM_INTERNAL_REQ);
 	login_info->capabilities = cpu_to_be64(IBMVFC_CAN_MIGRATE | IBMVFC_CAN_SEND_VF_WWPN);
+
+	if (vhost->mq_enabled || vhost->using_channels)
+		login_info->capabilities |= cpu_to_be64(IBMVFC_CAN_USE_CHANNELS);
+
 	login_info->async.va = cpu_to_be64(vhost->async_crq.msg_token);
 	login_info->async.len = cpu_to_be32(vhost->async_crq.size * sizeof(*vhost->async_crq.msgs));
 	strncpy(login_info->partition_name, vhost->partition_name, IBMVFC_MAX_NAME);
-- 
2.27.0


^ permalink raw reply related

* [PATCH v3 11/18] ibmvfc: set and track hw queue in ibmvfc_event struct
From: Tyrel Datwyler @ 2020-12-03  2:07 UTC (permalink / raw)
  To: james.bottomley
  Cc: Tyrel Datwyler, martin.petersen, linux-scsi, linux-kernel,
	Brian King, brking, linuxppc-dev
In-Reply-To: <20201203020806.14747-1-tyreld@linux.ibm.com>

Extract the hwq id from a SCSI command and store it in the ibmvfc_event
structure to identify which Sub-CRQ to send the command down when
channels are being utilized.

Signed-off-by: Tyrel Datwyler <tyreld@linux.ibm.com>
Reviewed-by: Brian King <brking@linux.vnet.ibm.com>
---
 drivers/scsi/ibmvscsi/ibmvfc.c | 5 +++++
 drivers/scsi/ibmvscsi/ibmvfc.h | 1 +
 2 files changed, 6 insertions(+)

diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index 0eb91ac86d96..b51ae17883b7 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -1397,6 +1397,7 @@ static void ibmvfc_init_event(struct ibmvfc_event *evt,
 	evt->crq.format = format;
 	evt->done = done;
 	evt->eh_comp = NULL;
+	evt->hwq = 0;
 }
 
 /**
@@ -1748,6 +1749,8 @@ static int ibmvfc_queuecommand_lck(struct scsi_cmnd *cmnd,
 	struct ibmvfc_cmd *vfc_cmd;
 	struct ibmvfc_fcp_cmd_iu *iu;
 	struct ibmvfc_event *evt;
+	u32 tag_and_hwq = blk_mq_unique_tag(cmnd->request);
+	u16 hwq = blk_mq_unique_tag_to_hwq(tag_and_hwq);
 	int rc;
 
 	if (unlikely((rc = fc_remote_port_chkready(rport))) ||
@@ -1775,6 +1778,8 @@ static int ibmvfc_queuecommand_lck(struct scsi_cmnd *cmnd,
 	}
 
 	vfc_cmd->correlation = cpu_to_be64(evt);
+	if (vhost->using_channels)
+		evt->hwq = hwq % vhost->scsi_scrqs.active_queues;
 
 	if (likely(!(rc = ibmvfc_map_sg_data(cmnd, evt, vfc_cmd, vhost->dev))))
 		return ibmvfc_send_event(evt, vhost, 0);
diff --git a/drivers/scsi/ibmvscsi/ibmvfc.h b/drivers/scsi/ibmvscsi/ibmvfc.h
index dff26dbd912c..e0ffb0416223 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.h
+++ b/drivers/scsi/ibmvscsi/ibmvfc.h
@@ -781,6 +781,7 @@ struct ibmvfc_event {
 	struct completion comp;
 	struct completion *eh_comp;
 	struct timer_list timer;
+	u16 hwq;
 };
 
 /* a pool of event structs for use */
-- 
2.27.0


^ permalink raw reply related

* [PATCH v3 13/18] ibmvfc: register Sub-CRQ handles with VIOS during channel setup
From: Tyrel Datwyler @ 2020-12-03  2:08 UTC (permalink / raw)
  To: james.bottomley
  Cc: Tyrel Datwyler, martin.petersen, linux-scsi, linux-kernel,
	Brian King, brking, linuxppc-dev
In-Reply-To: <20201203020806.14747-1-tyreld@linux.ibm.com>

If the ibmvfc client adapter requests channels it must submit a number
of Sub-CRQ handles matching the number of channels being requested. The
VIOS in its response will overwrite the actual number of channel
resources allocated which may be less than what was requested. The
client then must store the VIOS Sub-CRQ handle for each queue. This VIOS
handle is needed as a parameter with  h_send_sub_crq().

Signed-off-by: Tyrel Datwyler <tyreld@linux.ibm.com>
Reviewed-by: Brian King <brking@linux.vnet.ibm.com>
---
 drivers/scsi/ibmvscsi/ibmvfc.c | 32 +++++++++++++++++++++++++++++++-
 1 file changed, 31 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index d94db73d4a15..d65de320252e 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -4512,15 +4512,35 @@ static void ibmvfc_discover_targets(struct ibmvfc_host *vhost)
 static void ibmvfc_channel_setup_done(struct ibmvfc_event *evt)
 {
 	struct ibmvfc_host *vhost = evt->vhost;
+	struct ibmvfc_channel_setup *setup = vhost->channel_setup_buf;
+	struct ibmvfc_scsi_channels *scrqs = &vhost->scsi_scrqs;
 	u32 mad_status = be16_to_cpu(evt->xfer_iu->channel_setup.common.status);
 	int level = IBMVFC_DEFAULT_LOG_LEVEL;
+	int flags, active_queues, i;
 
 	ibmvfc_free_event(evt);
 
 	switch (mad_status) {
 	case IBMVFC_MAD_SUCCESS:
 		ibmvfc_dbg(vhost, "Channel Setup succeded\n");
+		flags = be32_to_cpu(setup->flags);
 		vhost->do_enquiry = 0;
+		active_queues = be32_to_cpu(setup->num_scsi_subq_channels);
+		scrqs->active_queues = active_queues;
+
+		if (flags & IBMVFC_CHANNELS_CANCELED) {
+			ibmvfc_dbg(vhost, "Channels Canceled\n");
+			vhost->using_channels = 0;
+		} else {
+			if (active_queues)
+				vhost->using_channels = 1;
+			for (i = 0; i < active_queues; i++)
+				scrqs->scrqs[i].vios_cookie =
+					be64_to_cpu(setup->channel_handles[i]);
+
+			ibmvfc_dbg(vhost, "Using %u channels\n",
+				   vhost->scsi_scrqs.active_queues);
+		}
 		break;
 	case IBMVFC_MAD_FAILED:
 		level += ibmvfc_retry_host_init(vhost);
@@ -4544,9 +4564,19 @@ static void ibmvfc_channel_setup(struct ibmvfc_host *vhost)
 	struct ibmvfc_channel_setup_mad *mad;
 	struct ibmvfc_channel_setup *setup_buf = vhost->channel_setup_buf;
 	struct ibmvfc_event *evt = ibmvfc_get_event(vhost);
+	struct ibmvfc_scsi_channels *scrqs = &vhost->scsi_scrqs;
+	unsigned int num_channels =
+		min(vhost->client_scsi_channels, vhost->max_vios_scsi_channels);
+	int i;
 
 	memset(setup_buf, 0, sizeof(*setup_buf));
-	setup_buf->flags = cpu_to_be32(IBMVFC_CANCEL_CHANNELS);
+	if (num_channels == 0)
+		setup_buf->flags = cpu_to_be32(IBMVFC_CANCEL_CHANNELS);
+	else {
+		setup_buf->num_scsi_subq_channels = cpu_to_be32(num_channels);
+		for (i = 0; i < num_channels; i++)
+			setup_buf->channel_handles[i] = cpu_to_be64(scrqs->scrqs[i].cookie);
+	}
 
 	ibmvfc_init_event(evt, ibmvfc_channel_setup_done, IBMVFC_MAD_FORMAT);
 	mad = &evt->iu.channel_setup;
-- 
2.27.0


^ permalink raw reply related

* [PATCH v3 14/18] ibmvfc: add cancel mad initialization helper
From: Tyrel Datwyler @ 2020-12-03  2:08 UTC (permalink / raw)
  To: james.bottomley
  Cc: Tyrel Datwyler, martin.petersen, linux-scsi, linux-kernel,
	Brian King, brking, linuxppc-dev
In-Reply-To: <20201203020806.14747-1-tyreld@linux.ibm.com>

Add a helper routine for initializing a Cancel MAD. This will be useful
for a channelized client that needs to send a Cancel commands down every
channel commands were sent for a particular LUN.

Signed-off-by: Tyrel Datwyler <tyreld@linux.ibm.com>
Reviewed-by: Brian King <brking@linux.vnet.ibm.com>
---
 drivers/scsi/ibmvscsi/ibmvfc.c | 67 ++++++++++++++++++++--------------
 1 file changed, 39 insertions(+), 28 deletions(-)

diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index d65de320252e..ec3db5a6baf3 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -2286,6 +2286,44 @@ static int ibmvfc_wait_for_ops(struct ibmvfc_host *vhost, void *device,
 	return SUCCESS;
 }
 
+static struct ibmvfc_event *ibmvfc_init_tmf(struct ibmvfc_host *vhost,
+				     struct scsi_device *sdev,
+				     int type)
+{
+	struct scsi_target *starget = scsi_target(sdev);
+	struct fc_rport *rport = starget_to_rport(starget);
+	struct ibmvfc_event *evt;
+	struct ibmvfc_tmf *tmf;
+
+	evt = ibmvfc_get_event(vhost);
+	ibmvfc_init_event(evt, ibmvfc_sync_completion, IBMVFC_MAD_FORMAT);
+
+	tmf = &evt->iu.tmf;
+	memset(tmf, 0, sizeof(*tmf));
+	if (ibmvfc_check_caps(vhost, IBMVFC_HANDLE_VF_WWPN)) {
+		tmf->common.version = cpu_to_be32(2);
+		tmf->target_wwpn = cpu_to_be64(rport->port_name);
+	} else {
+		tmf->common.version = cpu_to_be32(1);
+	}
+	tmf->common.opcode = cpu_to_be32(IBMVFC_TMF_MAD);
+	tmf->common.length = cpu_to_be16(sizeof(*tmf));
+	tmf->scsi_id = cpu_to_be64(rport->port_id);
+	int_to_scsilun(sdev->lun, &tmf->lun);
+	if (!ibmvfc_check_caps(vhost, IBMVFC_CAN_SUPPRESS_ABTS))
+		type &= ~IBMVFC_TMF_SUPPRESS_ABTS;
+	if (vhost->state == IBMVFC_ACTIVE)
+		tmf->flags = cpu_to_be32((type | IBMVFC_TMF_LUA_VALID));
+	else
+		tmf->flags = cpu_to_be32(((type & IBMVFC_TMF_SUPPRESS_ABTS) | IBMVFC_TMF_LUA_VALID));
+	tmf->cancel_key = cpu_to_be32((unsigned long)sdev->hostdata);
+	tmf->my_cancel_key = cpu_to_be32((unsigned long)starget->hostdata);
+
+	init_completion(&evt->comp);
+
+	return evt;
+}
+
 /**
  * ibmvfc_cancel_all - Cancel all outstanding commands to the device
  * @sdev:	scsi device to cancel commands
@@ -2300,9 +2338,6 @@ static int ibmvfc_wait_for_ops(struct ibmvfc_host *vhost, void *device,
 static int ibmvfc_cancel_all(struct scsi_device *sdev, int type)
 {
 	struct ibmvfc_host *vhost = shost_priv(sdev->host);
-	struct scsi_target *starget = scsi_target(sdev);
-	struct fc_rport *rport = starget_to_rport(starget);
-	struct ibmvfc_tmf *tmf;
 	struct ibmvfc_event *evt, *found_evt;
 	union ibmvfc_iu rsp;
 	int rsp_rc = -EBUSY;
@@ -2327,32 +2362,8 @@ static int ibmvfc_cancel_all(struct scsi_device *sdev, int type)
 	}
 
 	if (vhost->logged_in) {
-		evt = ibmvfc_get_event(vhost);
-		ibmvfc_init_event(evt, ibmvfc_sync_completion, IBMVFC_MAD_FORMAT);
-
-		tmf = &evt->iu.tmf;
-		memset(tmf, 0, sizeof(*tmf));
-		if (ibmvfc_check_caps(vhost, IBMVFC_HANDLE_VF_WWPN)) {
-			tmf->common.version = cpu_to_be32(2);
-			tmf->target_wwpn = cpu_to_be64(rport->port_name);
-		} else {
-			tmf->common.version = cpu_to_be32(1);
-		}
-		tmf->common.opcode = cpu_to_be32(IBMVFC_TMF_MAD);
-		tmf->common.length = cpu_to_be16(sizeof(*tmf));
-		tmf->scsi_id = cpu_to_be64(rport->port_id);
-		int_to_scsilun(sdev->lun, &tmf->lun);
-		if (!ibmvfc_check_caps(vhost, IBMVFC_CAN_SUPPRESS_ABTS))
-			type &= ~IBMVFC_TMF_SUPPRESS_ABTS;
-		if (vhost->state == IBMVFC_ACTIVE)
-			tmf->flags = cpu_to_be32((type | IBMVFC_TMF_LUA_VALID));
-		else
-			tmf->flags = cpu_to_be32(((type & IBMVFC_TMF_SUPPRESS_ABTS) | IBMVFC_TMF_LUA_VALID));
-		tmf->cancel_key = cpu_to_be32((unsigned long)sdev->hostdata);
-		tmf->my_cancel_key = cpu_to_be32((unsigned long)starget->hostdata);
-
+		evt = ibmvfc_init_tmf(vhost, sdev, type);
 		evt->sync_iu = &rsp;
-		init_completion(&evt->comp);
 		rsp_rc = ibmvfc_send_event(evt, vhost, default_timeout);
 	}
 
-- 
2.27.0


^ permalink raw reply related

* [PATCH v3 12/18] ibmvfc: send commands down HW Sub-CRQ when channelized
From: Tyrel Datwyler @ 2020-12-03  2:08 UTC (permalink / raw)
  To: james.bottomley
  Cc: Tyrel Datwyler, martin.petersen, linux-scsi, linux-kernel,
	Brian King, brking, linuxppc-dev
In-Reply-To: <20201203020806.14747-1-tyreld@linux.ibm.com>

When the client has negotiated the use of channels all vfcFrames are
required to go down a Sub-CRQ channel or it is a protocoal violation. If
the adapter state is channelized submit vfcFrames to the appropriate
Sub-CRQ via the h_send_sub_crq() helper.

Signed-off-by: Tyrel Datwyler <tyreld@linux.ibm.com>
Reviewed-by: Brian King <brking@linux.vnet.ibm.com>
---
 drivers/scsi/ibmvscsi/ibmvfc.c | 32 +++++++++++++++++++++++++++-----
 1 file changed, 27 insertions(+), 5 deletions(-)

diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index b51ae17883b7..d94db73d4a15 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -701,6 +701,15 @@ static int ibmvfc_send_crq(struct ibmvfc_host *vhost, u64 word1, u64 word2)
 	return plpar_hcall_norets(H_SEND_CRQ, vdev->unit_address, word1, word2);
 }
 
+static int ibmvfc_send_sub_crq(struct ibmvfc_host *vhost, u64 cookie, u64 word1,
+			       u64 word2, u64 word3, u64 word4)
+{
+	struct vio_dev *vdev = to_vio_dev(vhost->dev);
+
+	return plpar_hcall_norets(H_SEND_SUB_CRQ, vdev->unit_address, cookie,
+				  word1, word2, word3, word4);
+}
+
 /**
  * ibmvfc_send_crq_init - Send a CRQ init message
  * @vhost:	ibmvfc host struct
@@ -1513,15 +1522,19 @@ static int ibmvfc_send_event(struct ibmvfc_event *evt,
 			     struct ibmvfc_host *vhost, unsigned long timeout)
 {
 	__be64 *crq_as_u64 = (__be64 *) &evt->crq;
+	int channel_cmd = 0;
 	int rc;
 
 	/* Copy the IU into the transfer area */
 	*evt->xfer_iu = evt->iu;
-	if (evt->crq.format == IBMVFC_CMD_FORMAT)
+	if (evt->crq.format == IBMVFC_CMD_FORMAT) {
 		evt->xfer_iu->cmd.tag = cpu_to_be64((u64)evt);
-	else if (evt->crq.format == IBMVFC_MAD_FORMAT)
+		channel_cmd = 1;
+	} else if (evt->crq.format == IBMVFC_MAD_FORMAT) {
 		evt->xfer_iu->mad_common.tag = cpu_to_be64((u64)evt);
-	else
+		if (evt->xfer_iu->mad_common.opcode == IBMVFC_TMF_MAD)
+			channel_cmd = 1;
+	} else
 		BUG();
 
 	list_add_tail(&evt->queue, &vhost->sent);
@@ -1534,8 +1547,17 @@ static int ibmvfc_send_event(struct ibmvfc_event *evt,
 
 	mb();
 
-	if ((rc = ibmvfc_send_crq(vhost, be64_to_cpu(crq_as_u64[0]),
-				  be64_to_cpu(crq_as_u64[1])))) {
+	if (vhost->using_channels && channel_cmd)
+		rc = ibmvfc_send_sub_crq(vhost,
+					 vhost->scsi_scrqs.scrqs[evt->hwq].vios_cookie,
+					 be64_to_cpu(crq_as_u64[0]),
+					 be64_to_cpu(crq_as_u64[1]),
+					 0, 0);
+	else
+		rc = ibmvfc_send_crq(vhost, be64_to_cpu(crq_as_u64[0]),
+				     be64_to_cpu(crq_as_u64[1]));
+
+	if (rc) {
 		list_del(&evt->queue);
 		del_timer(&evt->timer);
 
-- 
2.27.0


^ permalink raw reply related

* [PATCH v3 15/18] ibmvfc: send Cancel MAD down each hw scsi channel
From: Tyrel Datwyler @ 2020-12-03  2:08 UTC (permalink / raw)
  To: james.bottomley
  Cc: Tyrel Datwyler, martin.petersen, linux-scsi, linux-kernel, brking,
	linuxppc-dev
In-Reply-To: <20201203020806.14747-1-tyreld@linux.ibm.com>

In general the client needs to send Cancel MADs and task management
commands down the same channel as the command(s) intended to cancel or
abort. The client assigns cancel keys per LUN and thus must send a
Cancel down each channel commands were submitted for that LUN. Further,
the client then must wait for those cancel completions prior to
submitting a LUN RESET or ABORT TASK SET.

Add a cancel event pointer and cancel rsp iu storage to the
ibmvfc_sub_queue struct such that the cancel routine can assign a cancel
event to each applicable queue. When in legacy CRQ mode we fake treating
it as a subqueue by using a subqueue struct allocated on the stack. Wait
for completion of each submitted cancel.

Signed-off-by: Tyrel Datwyler <tyreld@linux.ibm.com>
---
 drivers/scsi/ibmvscsi/ibmvfc.c | 104 ++++++++++++++++++++++-----------
 drivers/scsi/ibmvscsi/ibmvfc.h |  38 ++++++------
 2 files changed, 90 insertions(+), 52 deletions(-)

diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index ec3db5a6baf3..e353b9e88104 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -2339,67 +2339,103 @@ static int ibmvfc_cancel_all(struct scsi_device *sdev, int type)
 {
 	struct ibmvfc_host *vhost = shost_priv(sdev->host);
 	struct ibmvfc_event *evt, *found_evt;
-	union ibmvfc_iu rsp;
-	int rsp_rc = -EBUSY;
+	struct ibmvfc_sub_queue *scrqs;
+	struct ibmvfc_sub_queue legacy_crq;
+	int rsp_rc = 0;
 	unsigned long flags;
 	u16 status;
+	int cancel_cnt = 0;
+	int num_hwq;
+	int ret = 0;
+	int i;
 
 	ENTER;
 	spin_lock_irqsave(vhost->host->host_lock, flags);
-	found_evt = NULL;
-	list_for_each_entry(evt, &vhost->sent, queue) {
-		if (evt->cmnd && evt->cmnd->device == sdev) {
-			found_evt = evt;
+	if (vhost->using_channels && vhost->scsi_scrqs.active_queues) {
+		num_hwq = vhost->scsi_scrqs.active_queues;
+		scrqs = vhost->scsi_scrqs.scrqs;
+	} else {
+		/* Use ibmvfc_sub_queue on the stack to fake legacy CRQ as a subqueue */
+		num_hwq = 1;
+		scrqs = &legacy_crq;
+	}
+
+	for (i = 0; i < num_hwq; i++) {
+		scrqs[i].cancel_event = NULL;
+		found_evt = NULL;
+		list_for_each_entry(evt, &vhost->sent, queue) {
+			if (evt->cmnd && evt->cmnd->device == sdev && evt->hwq == i) {
+				found_evt = evt;
+				cancel_cnt++;
+				break;
+			}
+		}
+
+		if (!found_evt)
+			continue;
+
+		if (vhost->logged_in) {
+			scrqs[i].cancel_event = ibmvfc_init_tmf(vhost, sdev, type);
+			scrqs[i].cancel_event->hwq = i;
+			scrqs[i].cancel_event->sync_iu = &scrqs[i].cancel_rsp;
+			rsp_rc = ibmvfc_send_event(scrqs[i].cancel_event, vhost, default_timeout);
+			if (rsp_rc)
+				break;
+		} else {
+			rsp_rc = -EBUSY;
 			break;
 		}
 	}
 
-	if (!found_evt) {
+	spin_unlock_irqrestore(vhost->host->host_lock, flags);
+
+	if (!cancel_cnt) {
 		if (vhost->log_level > IBMVFC_DEFAULT_LOG_LEVEL)
 			sdev_printk(KERN_INFO, sdev, "No events found to cancel\n");
-		spin_unlock_irqrestore(vhost->host->host_lock, flags);
 		return 0;
 	}
 
-	if (vhost->logged_in) {
-		evt = ibmvfc_init_tmf(vhost, sdev, type);
-		evt->sync_iu = &rsp;
-		rsp_rc = ibmvfc_send_event(evt, vhost, default_timeout);
-	}
-
-	spin_unlock_irqrestore(vhost->host->host_lock, flags);
-
 	if (rsp_rc != 0) {
 		sdev_printk(KERN_ERR, sdev, "Failed to send cancel event. rc=%d\n", rsp_rc);
 		/* If failure is received, the host adapter is most likely going
 		 through reset, return success so the caller will wait for the command
 		 being cancelled to get returned */
-		return 0;
+		goto free_events;
 	}
 
 	sdev_printk(KERN_INFO, sdev, "Cancelling outstanding commands.\n");
 
-	wait_for_completion(&evt->comp);
-	status = be16_to_cpu(rsp.mad_common.status);
-	spin_lock_irqsave(vhost->host->host_lock, flags);
-	ibmvfc_free_event(evt);
-	spin_unlock_irqrestore(vhost->host->host_lock, flags);
+	for (i = 0; i < num_hwq; i++) {
+		if (!scrqs[i].cancel_event)
+			continue;
 
-	if (status != IBMVFC_MAD_SUCCESS) {
-		sdev_printk(KERN_WARNING, sdev, "Cancel failed with rc=%x\n", status);
-		switch (status) {
-		case IBMVFC_MAD_DRIVER_FAILED:
-		case IBMVFC_MAD_CRQ_ERROR:
-			/* Host adapter most likely going through reset, return success to
-			 the caller will wait for the command being cancelled to get returned */
-			return 0;
-		default:
-			return -EIO;
-		};
+		wait_for_completion(&scrqs[i].cancel_event->comp);
+		status = be16_to_cpu(scrqs[i].cancel_rsp.mad_common.status);
+
+		if (status != IBMVFC_MAD_SUCCESS) {
+			sdev_printk(KERN_WARNING, sdev, "Cancel failed with rc=%x\n", status);
+			switch (status) {
+			case IBMVFC_MAD_DRIVER_FAILED:
+			case IBMVFC_MAD_CRQ_ERROR:
+				/* Host adapter most likely going through reset, return success to
+				 the caller will wait for the command being cancelled to get returned */
+				goto free_events;
+			default:
+				ret = -EIO;
+				goto free_events;
+			};
+		}
 	}
 
 	sdev_printk(KERN_INFO, sdev, "Successfully cancelled outstanding commands\n");
-	return 0;
+free_events:
+	spin_lock_irqsave(vhost->host->host_lock, flags);
+	for (i = 0; i < num_hwq; i++)
+		if (scrqs[i].cancel_event)
+			ibmvfc_free_event(scrqs[i].cancel_event);
+	spin_unlock_irqrestore(vhost->host->host_lock, flags);
+
+	return ret;
 }
 
 /**
diff --git a/drivers/scsi/ibmvscsi/ibmvfc.h b/drivers/scsi/ibmvscsi/ibmvfc.h
index e0ffb0416223..980eb9afe93a 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.h
+++ b/drivers/scsi/ibmvscsi/ibmvfc.h
@@ -584,6 +584,24 @@ struct ibmvfc_connection_info {
 	__be64 reserved[16];
 } __packed __aligned(8);
 
+union ibmvfc_iu {
+	struct ibmvfc_mad_common mad_common;
+	struct ibmvfc_npiv_login_mad npiv_login;
+	struct ibmvfc_npiv_logout_mad npiv_logout;
+	struct ibmvfc_discover_targets discover_targets;
+	struct ibmvfc_port_login plogi;
+	struct ibmvfc_process_login prli;
+	struct ibmvfc_move_login move_login;
+	struct ibmvfc_query_tgt query_tgt;
+	struct ibmvfc_implicit_logout implicit_logout;
+	struct ibmvfc_tmf tmf;
+	struct ibmvfc_cmd cmd;
+	struct ibmvfc_passthru_mad passthru;
+	struct ibmvfc_channel_enquiry channel_enquiry;
+	struct ibmvfc_channel_setup_mad channel_setup;
+	struct ibmvfc_connection_info connection_info;
+} __packed __aligned(8);
+
 struct ibmvfc_trace_start_entry {
 	u32 xfer_len;
 } __packed;
@@ -666,6 +684,8 @@ struct ibmvfc_sub_queue {
 	dma_addr_t msg_token;
 	int size, cur;
 	struct ibmvfc_host *vhost;
+	struct ibmvfc_event *cancel_event;
+	union ibmvfc_iu cancel_rsp;
 	unsigned long cookie;
 	unsigned long vios_cookie;
 	unsigned long hw_irq;
@@ -712,24 +732,6 @@ struct ibmvfc_async_crq_queue {
 	dma_addr_t msg_token;
 };
 
-union ibmvfc_iu {
-	struct ibmvfc_mad_common mad_common;
-	struct ibmvfc_npiv_login_mad npiv_login;
-	struct ibmvfc_npiv_logout_mad npiv_logout;
-	struct ibmvfc_discover_targets discover_targets;
-	struct ibmvfc_port_login plogi;
-	struct ibmvfc_process_login prli;
-	struct ibmvfc_move_login move_login;
-	struct ibmvfc_query_tgt query_tgt;
-	struct ibmvfc_implicit_logout implicit_logout;
-	struct ibmvfc_tmf tmf;
-	struct ibmvfc_cmd cmd;
-	struct ibmvfc_passthru_mad passthru;
-	struct ibmvfc_channel_enquiry channel_enquiry;
-	struct ibmvfc_channel_setup_mad channel_setup;
-	struct ibmvfc_connection_info connection_info;
-} __packed __aligned(8);
-
 enum ibmvfc_target_action {
 	IBMVFC_TGT_ACTION_NONE = 0,
 	IBMVFC_TGT_ACTION_INIT,
-- 
2.27.0


^ permalink raw reply related

* [PATCH v3 17/18] ibmvfc: provide modules parameters for MQ settings
From: Tyrel Datwyler @ 2020-12-03  2:08 UTC (permalink / raw)
  To: james.bottomley
  Cc: Tyrel Datwyler, martin.petersen, linux-scsi, linux-kernel, brking,
	linuxppc-dev
In-Reply-To: <20201203020806.14747-1-tyreld@linux.ibm.com>

Add the various module parameter toggles for adjusting the MQ
characteristics at boot/load time as well as a device attribute for
changing the client scsi channel request amount.

Signed-off-by: Tyrel Datwyler <tyreld@linux.ibm.com>
---
 drivers/scsi/ibmvscsi/ibmvfc.c | 76 +++++++++++++++++++++++++++++-----
 1 file changed, 66 insertions(+), 10 deletions(-)

diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index e353b9e88104..e499599662ec 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -40,6 +40,12 @@ static unsigned int disc_threads = IBMVFC_MAX_DISC_THREADS;
 static unsigned int ibmvfc_debug = IBMVFC_DEBUG;
 static unsigned int log_level = IBMVFC_DEFAULT_LOG_LEVEL;
 static unsigned int cls3_error = IBMVFC_CLS3_ERROR;
+static unsigned int mq_enabled = IBMVFC_MQ;
+static unsigned int nr_scsi_hw_queues = IBMVFC_SCSI_HW_QUEUES;
+static unsigned int nr_scsi_channels = IBMVFC_SCSI_CHANNELS;
+static unsigned int mig_channels_only = IBMVFC_MIG_NO_SUB_TO_CRQ;
+static unsigned int mig_no_less_channels = IBMVFC_MIG_NO_N_TO_M;
+
 static LIST_HEAD(ibmvfc_head);
 static DEFINE_SPINLOCK(ibmvfc_driver_lock);
 static struct scsi_transport_template *ibmvfc_transport_template;
@@ -49,6 +55,22 @@ MODULE_AUTHOR("Brian King <brking@linux.vnet.ibm.com>");
 MODULE_LICENSE("GPL");
 MODULE_VERSION(IBMVFC_DRIVER_VERSION);
 
+module_param_named(mq, mq_enabled, uint, S_IRUGO);
+MODULE_PARM_DESC(mq, "Enable multiqueue support. "
+		 "[Default=" __stringify(IBMVFC_MQ) "]");
+module_param_named(scsi_host_queues, nr_scsi_hw_queues, uint, S_IRUGO);
+MODULE_PARM_DESC(scsi_host_queues, "Number of SCSI Host submission queues. "
+		 "[Default=" __stringify(IBMVFC_SCSI_HW_QUEUES) "]");
+module_param_named(scsi_hw_channels, nr_scsi_channels, uint, S_IRUGO);
+MODULE_PARM_DESC(scsi_hw_channels, "Number of hw scsi channels to request. "
+		 "[Default=" __stringify(IBMVFC_SCSI_CHANNELS) "]");
+module_param_named(mig_channels_only, mig_channels_only, uint, S_IRUGO);
+MODULE_PARM_DESC(mig_channels_only, "Prevent migration to non-channelized system. "
+		 "[Default=" __stringify(IBMVFC_MIG_NO_SUB_TO_CRQ) "]");
+module_param_named(mig_no_less_channels, mig_no_less_channels, uint, S_IRUGO);
+MODULE_PARM_DESC(mig_no_less_channels, "Prevent migration to system with less channels. "
+		 "[Default=" __stringify(IBMVFC_MIG_NO_N_TO_M) "]");
+
 module_param_named(init_timeout, init_timeout, uint, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(init_timeout, "Initialization timeout in seconds. "
 		 "[Default=" __stringify(IBMVFC_INIT_TIMEOUT) "]");
@@ -823,7 +845,7 @@ static int ibmvfc_reset_crq(struct ibmvfc_host *vhost)
 	crq->cur = 0;
 
 	if (vhost->scsi_scrqs.scrqs) {
-		for (i = 0; i < IBMVFC_SCSI_HW_QUEUES; i++) {
+		for (i = 0; i < nr_scsi_hw_queues; i++) {
 			scrq = &vhost->scsi_scrqs.scrqs[i];
 			memset(scrq->msgs, 0, PAGE_SIZE);
 			scrq->cur = 0;
@@ -3234,6 +3256,37 @@ static ssize_t ibmvfc_store_log_level(struct device *dev,
 	return strlen(buf);
 }
 
+static ssize_t ibmvfc_show_scsi_channels(struct device *dev,
+					 struct device_attribute *attr, char *buf)
+{
+	struct Scsi_Host *shost = class_to_shost(dev);
+	struct ibmvfc_host *vhost = shost_priv(shost);
+	unsigned long flags = 0;
+	int len;
+
+	spin_lock_irqsave(shost->host_lock, flags);
+	len = snprintf(buf, PAGE_SIZE, "%d\n", vhost->client_scsi_channels);
+	spin_unlock_irqrestore(shost->host_lock, flags);
+	return len;
+}
+
+static ssize_t ibmvfc_store_scsi_channels(struct device *dev,
+					 struct device_attribute *attr,
+					 const char *buf, size_t count)
+{
+	struct Scsi_Host *shost = class_to_shost(dev);
+	struct ibmvfc_host *vhost = shost_priv(shost);
+	unsigned long flags = 0;
+	unsigned int channels;
+
+	spin_lock_irqsave(shost->host_lock, flags);
+	channels = simple_strtoul(buf, NULL, 10);
+	vhost->client_scsi_channels = min(channels, nr_scsi_hw_queues);
+	ibmvfc_hard_reset_host(vhost);
+	spin_unlock_irqrestore(shost->host_lock, flags);
+	return strlen(buf);
+}
+
 static DEVICE_ATTR(partition_name, S_IRUGO, ibmvfc_show_host_partition_name, NULL);
 static DEVICE_ATTR(device_name, S_IRUGO, ibmvfc_show_host_device_name, NULL);
 static DEVICE_ATTR(port_loc_code, S_IRUGO, ibmvfc_show_host_loc_code, NULL);
@@ -3242,6 +3295,8 @@ static DEVICE_ATTR(npiv_version, S_IRUGO, ibmvfc_show_host_npiv_version, NULL);
 static DEVICE_ATTR(capabilities, S_IRUGO, ibmvfc_show_host_capabilities, NULL);
 static DEVICE_ATTR(log_level, S_IRUGO | S_IWUSR,
 		   ibmvfc_show_log_level, ibmvfc_store_log_level);
+static DEVICE_ATTR(nr_scsi_channels, S_IRUGO | S_IWUSR,
+		   ibmvfc_show_scsi_channels, ibmvfc_store_scsi_channels);
 
 #ifdef CONFIG_SCSI_IBMVFC_TRACE
 /**
@@ -3298,6 +3353,7 @@ static struct device_attribute *ibmvfc_attrs[] = {
 	&dev_attr_npiv_version,
 	&dev_attr_capabilities,
 	&dev_attr_log_level,
+	&dev_attr_nr_scsi_channels,
 	NULL
 };
 
@@ -4685,9 +4741,9 @@ static void ibmvfc_channel_enquiry(struct ibmvfc_host *vhost)
 	mad->common.opcode = cpu_to_be32(IBMVFC_CHANNEL_ENQUIRY);
 	mad->common.length = cpu_to_be16(sizeof(*mad));
 
-	if (IBMVFC_MIG_NO_SUB_TO_CRQ)
+	if (mig_channels_only)
 		mad->flags |= cpu_to_be32(IBMVFC_NO_CHANNELS_TO_CRQ_SUPPORT);
-	if (IBMVFC_MIG_NO_N_TO_M)
+	if (mig_no_less_channels)
 		mad->flags |= cpu_to_be32(IBMVFC_NO_N_TO_M_CHANNELS_SUPPORT);
 
 	ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_INIT_WAIT);
@@ -5426,13 +5482,13 @@ static int ibmvfc_init_sub_crqs(struct ibmvfc_host *vhost)
 
 	ENTER;
 
-	vhost->scsi_scrqs.scrqs = kcalloc(IBMVFC_SCSI_HW_QUEUES,
+	vhost->scsi_scrqs.scrqs = kcalloc(nr_scsi_hw_queues,
 					  sizeof(*vhost->scsi_scrqs.scrqs),
 					  GFP_KERNEL);
 	if (!vhost->scsi_scrqs.scrqs)
 		return -1;
 
-	for (i = 0; i < IBMVFC_SCSI_HW_QUEUES; i++) {
+	for (i = 0; i < nr_scsi_hw_queues; i++) {
 		if (ibmvfc_register_scsi_channel(vhost, i)) {
 			for (j = i; j > 0; j--)
 				ibmvfc_deregister_scsi_channel(vhost, j - 1);
@@ -5456,7 +5512,7 @@ static void ibmvfc_release_sub_crqs(struct ibmvfc_host *vhost)
 	if (!vhost->scsi_scrqs.scrqs)
 		return;
 
-	for (i = 0; i < IBMVFC_SCSI_HW_QUEUES; i++)
+	for (i = 0; i < nr_scsi_hw_queues; i++)
 		ibmvfc_deregister_scsi_channel(vhost, i);
 
 	kfree(vhost->scsi_scrqs.scrqs);
@@ -5668,13 +5724,13 @@ static int ibmvfc_probe(struct vio_dev *vdev, const struct vio_device_id *id)
 	}
 
 	shost->transportt = ibmvfc_transport_template;
-	shost->can_queue = (max_requests / IBMVFC_SCSI_HW_QUEUES);
+	shost->can_queue = (max_requests / nr_scsi_hw_queues);
 	shost->max_lun = max_lun;
 	shost->max_id = max_targets;
 	shost->max_sectors = IBMVFC_MAX_SECTORS;
 	shost->max_cmd_len = IBMVFC_MAX_CDB_LEN;
 	shost->unique_id = shost->host_no;
-	shost->nr_hw_queues = IBMVFC_SCSI_HW_QUEUES;
+	shost->nr_hw_queues = nr_scsi_hw_queues;
 
 	vhost = shost_priv(shost);
 	INIT_LIST_HEAD(&vhost->sent);
@@ -5687,8 +5743,8 @@ static int ibmvfc_probe(struct vio_dev *vdev, const struct vio_device_id *id)
 	vhost->log_level = log_level;
 	vhost->task_set = 1;
 
-	vhost->mq_enabled = IBMVFC_MQ;
-	vhost->client_scsi_channels = IBMVFC_SCSI_CHANNELS;
+	vhost->mq_enabled = mq_enabled;
+	vhost->client_scsi_channels = min(nr_scsi_hw_queues, nr_scsi_channels);
 	vhost->using_channels = 0;
 	vhost->do_enquiry = 1;
 
-- 
2.27.0


^ permalink raw reply related

* [PATCH v3 16/18] ibmvfc: enable MQ and set reasonable defaults
From: Tyrel Datwyler @ 2020-12-03  2:08 UTC (permalink / raw)
  To: james.bottomley
  Cc: Tyrel Datwyler, martin.petersen, linux-scsi, linux-kernel,
	Brian King, brking, linuxppc-dev
In-Reply-To: <20201203020806.14747-1-tyreld@linux.ibm.com>

Turn on MQ by default and set sane values for the upper limit on hw
queues for the scsi host, and number of hw scsi channels to request from
the partner VIOS.

Signed-off-by: Tyrel Datwyler <tyreld@linux.ibm.com>
Reviewed-by: Brian King <brking@linux.vnet.ibm.com>
---
 drivers/scsi/ibmvscsi/ibmvfc.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/scsi/ibmvscsi/ibmvfc.h b/drivers/scsi/ibmvscsi/ibmvfc.h
index 980eb9afe93a..93c234a5512d 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.h
+++ b/drivers/scsi/ibmvscsi/ibmvfc.h
@@ -41,9 +41,9 @@
 #define IBMVFC_DEFAULT_LOG_LEVEL	2
 #define IBMVFC_MAX_CDB_LEN		16
 #define IBMVFC_CLS3_ERROR		0
-#define IBMVFC_MQ			0
-#define IBMVFC_SCSI_CHANNELS		0
-#define IBMVFC_SCSI_HW_QUEUES		1
+#define IBMVFC_MQ			1
+#define IBMVFC_SCSI_CHANNELS		8
+#define IBMVFC_SCSI_HW_QUEUES		16
 #define IBMVFC_MIG_NO_SUB_TO_CRQ	0
 #define IBMVFC_MIG_NO_N_TO_M		0
 
-- 
2.27.0


^ permalink raw reply related

* [PATCH v3 18/18] ibmvfc: drop host lock when completing commands in CRQ
From: Tyrel Datwyler @ 2020-12-03  2:08 UTC (permalink / raw)
  To: james.bottomley
  Cc: Tyrel Datwyler, martin.petersen, linux-scsi, linux-kernel, brking,
	linuxppc-dev
In-Reply-To: <20201203020806.14747-1-tyreld@linux.ibm.com>

The legacy CRQ holds the host lock the even while completing commands.
This presents a problem when in legacy single queue mode and
nr_hw_queues is greater than one since calling scsi_done() introduces
the potential for deadlock.

If nr_hw_queues is greater than one drop the hostlock in the legacy CRQ
path when completing a command.

Signed-off-by: Tyrel Datwyler <tyreld@linux.ibm.com>
---
 drivers/scsi/ibmvscsi/ibmvfc.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index e499599662ec..e2200bdff2be 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -2969,6 +2969,7 @@ static void ibmvfc_handle_crq(struct ibmvfc_crq *crq, struct ibmvfc_host *vhost)
 {
 	long rc;
 	struct ibmvfc_event *evt = (struct ibmvfc_event *)be64_to_cpu(crq->ioba);
+	unsigned long flags;
 
 	switch (crq->valid) {
 	case IBMVFC_CRQ_INIT_RSP:
@@ -3039,7 +3040,12 @@ static void ibmvfc_handle_crq(struct ibmvfc_crq *crq, struct ibmvfc_host *vhost)
 	del_timer(&evt->timer);
 	list_del(&evt->queue);
 	ibmvfc_trc_end(evt);
-	evt->done(evt);
+	if (nr_scsi_hw_queues > 1) {
+		spin_unlock_irqrestore(vhost->host->host_lock, flags);
+		evt->done(evt);
+		spin_lock_irqsave(vhost->host->host_lock, flags);
+	} else
+		evt->done(evt);
 }
 
 /**
-- 
2.27.0


^ permalink raw reply related

* [powerpc:fixes-test] BUILD SUCCESS a1ee28117077c3bf24e5ab6324c835eaab629c45
From: kernel test robot @ 2020-12-03  2:22 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: linuxppc-dev

tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git  fixes-test
branch HEAD: a1ee28117077c3bf24e5ab6324c835eaab629c45  powerpc/64s/powernv: Fix memory corruption when saving SLB entries on MCE

elapsed time: 730m

configs tested: 148
configs skipped: 2

The following configs have been built successfully.
More configs may be tested in the coming days.

gcc tested configs:
arm                                 defconfig
arm64                            allyesconfig
arm64                               defconfig
arm                              allyesconfig
arm                              allmodconfig
sh                           se7206_defconfig
sh                          sdk7786_defconfig
sparc64                          alldefconfig
arm                          exynos_defconfig
mips                           ip32_defconfig
mips                      loongson3_defconfig
arm                        clps711x_defconfig
arm                        shmobile_defconfig
powerpc64                        alldefconfig
mips                 decstation_r4k_defconfig
nios2                         3c120_defconfig
mips                         bigsur_defconfig
mips                malta_qemu_32r6_defconfig
mips                         mpc30x_defconfig
microblaze                      mmu_defconfig
sh                     sh7710voipgw_defconfig
powerpc                     redwood_defconfig
parisc                           alldefconfig
mips                      pic32mzda_defconfig
powerpc                     skiroot_defconfig
sh                          rsk7264_defconfig
powerpc                        fsp2_defconfig
sh                             shx3_defconfig
sh                        sh7785lcr_defconfig
sh                          urquell_defconfig
arm                        realview_defconfig
arm                           sunxi_defconfig
sh                            titan_defconfig
csky                             alldefconfig
m68k                          sun3x_defconfig
arm                          ep93xx_defconfig
sh                           se7724_defconfig
powerpc                     powernv_defconfig
powerpc                      walnut_defconfig
arm                         s3c6400_defconfig
arm                       aspeed_g5_defconfig
sparc                       sparc32_defconfig
mips                         tb0287_defconfig
m68k                          hp300_defconfig
mips                     cu1830-neo_defconfig
sh                           se7343_defconfig
arm                            u300_defconfig
m68k                             alldefconfig
arm                        spear6xx_defconfig
arm                         assabet_defconfig
sh                              ul2_defconfig
xtensa                  nommu_kc705_defconfig
nds32                             allnoconfig
arm                       versatile_defconfig
sh                            hp6xx_defconfig
powerpc                     mpc5200_defconfig
powerpc                 mpc8272_ads_defconfig
openrisc                            defconfig
powerpc                       holly_defconfig
arm                          prima2_defconfig
arc                           tb10x_defconfig
m68k                                defconfig
arm                        keystone_defconfig
mips                        nlm_xlr_defconfig
xtensa                              defconfig
powerpc                     stx_gp3_defconfig
arm                       cns3420vb_defconfig
arm                           omap1_defconfig
mips                       rbtx49xx_defconfig
arm                         s3c2410_defconfig
arm                         nhk8815_defconfig
mips                        maltaup_defconfig
h8300                    h8300h-sim_defconfig
arm                           corgi_defconfig
powerpc                     ep8248e_defconfig
sh                   sh7770_generic_defconfig
powerpc                 mpc85xx_cds_defconfig
sh                          landisk_defconfig
mips                     cu1000-neo_defconfig
arm                        mvebu_v5_defconfig
m68k                       m5275evb_defconfig
ia64                             allmodconfig
ia64                                defconfig
ia64                             allyesconfig
m68k                             allmodconfig
m68k                             allyesconfig
nios2                               defconfig
arc                              allyesconfig
c6x                              allyesconfig
nds32                               defconfig
nios2                            allyesconfig
csky                                defconfig
alpha                               defconfig
alpha                            allyesconfig
xtensa                           allyesconfig
h8300                            allyesconfig
arc                                 defconfig
sh                               allmodconfig
parisc                              defconfig
s390                             allyesconfig
parisc                           allyesconfig
s390                                defconfig
i386                             allyesconfig
sparc                            allyesconfig
sparc                               defconfig
i386                                defconfig
mips                             allyesconfig
mips                             allmodconfig
powerpc                          allyesconfig
powerpc                          allmodconfig
powerpc                           allnoconfig
x86_64               randconfig-a004-20201202
x86_64               randconfig-a006-20201202
x86_64               randconfig-a001-20201202
x86_64               randconfig-a002-20201202
x86_64               randconfig-a005-20201202
x86_64               randconfig-a003-20201202
i386                 randconfig-a004-20201202
i386                 randconfig-a005-20201202
i386                 randconfig-a001-20201202
i386                 randconfig-a002-20201202
i386                 randconfig-a006-20201202
i386                 randconfig-a003-20201202
i386                 randconfig-a014-20201202
i386                 randconfig-a013-20201202
i386                 randconfig-a011-20201202
i386                 randconfig-a015-20201202
i386                 randconfig-a012-20201202
i386                 randconfig-a016-20201202
riscv                    nommu_k210_defconfig
riscv                            allyesconfig
riscv                    nommu_virt_defconfig
riscv                             allnoconfig
riscv                               defconfig
riscv                          rv32_defconfig
riscv                            allmodconfig
x86_64                                   rhel
x86_64                           allyesconfig
x86_64                    rhel-7.6-kselftests
x86_64                              defconfig
x86_64                               rhel-8.3
x86_64                                  kexec

clang tested configs:
x86_64               randconfig-a016-20201202
x86_64               randconfig-a012-20201202
x86_64               randconfig-a014-20201202
x86_64               randconfig-a013-20201202
x86_64               randconfig-a015-20201202
x86_64               randconfig-a011-20201202

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

^ permalink raw reply

* [powerpc:next-test] BUILD REGRESSION fb003959777a635dea8910cf71109b612c7f940c
From: kernel test robot @ 2020-12-03  2:28 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: linuxppc-dev

tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git  next-test
branch HEAD: fb003959777a635dea8910cf71109b612c7f940c  powerpc/44x: Don't support 47x code and non 47x code at the same time

Error/Warning reports:

https://lore.kernel.org/linuxppc-dev/202012030320.soNPcJR8-lkp@intel.com
https://lore.kernel.org/linuxppc-dev/202012030759.zuEULDQ3-lkp@intel.com

Error/Warning in current branch:

drivers/misc/lkdtm/powerpc.c:13:10: error: use of undeclared identifier 'SLB_VSID_KERNEL'
drivers/misc/lkdtm/powerpc.c:13:54: error: no member named 'sllp' in 'struct mmu_psize_def'
drivers/misc/lkdtm/powerpc.c:17:15: error: implicit declaration of function 'mk_vsid_data' [-Werror,-Wimplicit-function-declaration]
drivers/misc/lkdtm/powerpc.c:18:15: error: implicit declaration of function 'mk_esid_data' [-Werror,-Wimplicit-function-declaration]
drivers/misc/lkdtm/powerpc.c:18:38: error: use of undeclared identifier 'SLB_NUM_BOLTED'
drivers/misc/lkdtm/powerpc.c:37:37: error: use of undeclared identifier 'MMU_SEGSIZE_1T'
drivers/misc/lkdtm/powerpc.c:37:53: error: use of undeclared identifier 'mmu_vmalloc_psize'

Error/Warning ids grouped by kconfigs:

gcc_recent_errors
`-- powerpc-randconfig-c003-20201202
    `-- arch-powerpc-kernel-firmware.c:WARNING:return-of-in-function-check_kvm_guest-with-return-type-bool

clang_recent_errors
`-- powerpc64-randconfig-r013-20201202
    |-- drivers-misc-lkdtm-powerpc.c:error:implicit-declaration-of-function-mk_esid_data-Werror-Wimplicit-function-declaration
    |-- drivers-misc-lkdtm-powerpc.c:error:implicit-declaration-of-function-mk_vsid_data-Werror-Wimplicit-function-declaration
    |-- drivers-misc-lkdtm-powerpc.c:error:no-member-named-sllp-in-struct-mmu_psize_def
    |-- drivers-misc-lkdtm-powerpc.c:error:use-of-undeclared-identifier-MMU_SEGSIZE_1T
    |-- drivers-misc-lkdtm-powerpc.c:error:use-of-undeclared-identifier-SLB_NUM_BOLTED
    |-- drivers-misc-lkdtm-powerpc.c:error:use-of-undeclared-identifier-SLB_VSID_KERNEL
    `-- drivers-misc-lkdtm-powerpc.c:error:use-of-undeclared-identifier-mmu_vmalloc_psize

elapsed time: 733m

configs tested: 141
configs skipped: 2

gcc tested configs:
arm                                 defconfig
arm64                            allyesconfig
arm64                               defconfig
arm                              allyesconfig
arm                              allmodconfig
arm                        clps711x_defconfig
arm                        shmobile_defconfig
powerpc64                        alldefconfig
mips                 decstation_r4k_defconfig
nios2                         3c120_defconfig
mips                         bigsur_defconfig
mips                malta_qemu_32r6_defconfig
mips                         mpc30x_defconfig
microblaze                      mmu_defconfig
sh                     sh7710voipgw_defconfig
powerpc                     redwood_defconfig
parisc                           alldefconfig
mips                      pic32mzda_defconfig
powerpc                     skiroot_defconfig
sh                          rsk7264_defconfig
powerpc                        fsp2_defconfig
sh                             shx3_defconfig
sh                        sh7785lcr_defconfig
sh                            titan_defconfig
csky                             alldefconfig
m68k                          sun3x_defconfig
arm                          ep93xx_defconfig
sh                           se7724_defconfig
powerpc                     powernv_defconfig
powerpc                      walnut_defconfig
arm                         s3c6400_defconfig
arm                       aspeed_g5_defconfig
sparc                       sparc32_defconfig
mips                         tb0287_defconfig
m68k                          hp300_defconfig
mips                     cu1830-neo_defconfig
sh                           se7343_defconfig
arm                            u300_defconfig
m68k                             alldefconfig
arm                        spear6xx_defconfig
arm                         assabet_defconfig
sh                              ul2_defconfig
xtensa                  nommu_kc705_defconfig
nds32                             allnoconfig
arm                       versatile_defconfig
sh                            hp6xx_defconfig
arm                           sunxi_defconfig
powerpc                     mpc5200_defconfig
powerpc                 mpc8272_ads_defconfig
openrisc                            defconfig
powerpc                       holly_defconfig
arm                          prima2_defconfig
arc                           tb10x_defconfig
m68k                                defconfig
arm                        keystone_defconfig
mips                        nlm_xlr_defconfig
xtensa                              defconfig
powerpc                     stx_gp3_defconfig
arm                       cns3420vb_defconfig
arm                           omap1_defconfig
mips                       rbtx49xx_defconfig
arm                         s3c2410_defconfig
arm                         nhk8815_defconfig
mips                        maltaup_defconfig
mips                           ip32_defconfig
h8300                    h8300h-sim_defconfig
arm                           corgi_defconfig
powerpc                     ep8248e_defconfig
sh                   sh7770_generic_defconfig
powerpc                 mpc85xx_cds_defconfig
sh                          landisk_defconfig
mips                     cu1000-neo_defconfig
arm                        mvebu_v5_defconfig
m68k                       m5275evb_defconfig
ia64                             allmodconfig
ia64                                defconfig
ia64                             allyesconfig
m68k                             allmodconfig
m68k                             allyesconfig
nios2                               defconfig
arc                              allyesconfig
c6x                              allyesconfig
nds32                               defconfig
nios2                            allyesconfig
csky                                defconfig
alpha                               defconfig
alpha                            allyesconfig
xtensa                           allyesconfig
h8300                            allyesconfig
arc                                 defconfig
sh                               allmodconfig
parisc                              defconfig
s390                             allyesconfig
parisc                           allyesconfig
s390                                defconfig
i386                             allyesconfig
sparc                            allyesconfig
sparc                               defconfig
i386                                defconfig
mips                             allyesconfig
mips                             allmodconfig
powerpc                          allyesconfig
powerpc                          allmodconfig
powerpc                           allnoconfig
x86_64               randconfig-a004-20201202
x86_64               randconfig-a006-20201202
x86_64               randconfig-a001-20201202
x86_64               randconfig-a002-20201202
x86_64               randconfig-a005-20201202
x86_64               randconfig-a003-20201202
i386                 randconfig-a004-20201202
i386                 randconfig-a005-20201202
i386                 randconfig-a001-20201202
i386                 randconfig-a002-20201202
i386                 randconfig-a006-20201202
i386                 randconfig-a003-20201202
i386                 randconfig-a014-20201202
i386                 randconfig-a013-20201202
i386                 randconfig-a011-20201202
i386                 randconfig-a015-20201202
i386                 randconfig-a012-20201202
i386                 randconfig-a016-20201202
riscv                    nommu_k210_defconfig
riscv                            allyesconfig
riscv                    nommu_virt_defconfig
riscv                             allnoconfig
riscv                               defconfig
riscv                          rv32_defconfig
riscv                            allmodconfig
x86_64                                   rhel
x86_64                           allyesconfig
x86_64                    rhel-7.6-kselftests
x86_64                              defconfig
x86_64                               rhel-8.3
x86_64                                  kexec

clang tested configs:
x86_64               randconfig-a016-20201202
x86_64               randconfig-a012-20201202
x86_64               randconfig-a014-20201202
x86_64               randconfig-a013-20201202
x86_64               randconfig-a015-20201202
x86_64               randconfig-a011-20201202

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

^ permalink raw reply

* [powerpc:merge] BUILD SUCCESS a1aeabd25a36d9e019381278e543e2d538dd44a7
From: kernel test robot @ 2020-12-03  2:28 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: linuxppc-dev

tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git  merge
branch HEAD: a1aeabd25a36d9e019381278e543e2d538dd44a7  Automatic merge of 'fixes' into merge (2020-12-02 22:37)

elapsed time: 734m

configs tested: 151
configs skipped: 2

The following configs have been built successfully.
More configs may be tested in the coming days.

gcc tested configs:
arm                                 defconfig
arm64                            allyesconfig
arm64                               defconfig
arm                              allyesconfig
arm                              allmodconfig
arm                        clps711x_defconfig
arm                        shmobile_defconfig
powerpc64                        alldefconfig
mips                 decstation_r4k_defconfig
nios2                         3c120_defconfig
mips                         bigsur_defconfig
mips                malta_qemu_32r6_defconfig
mips                         mpc30x_defconfig
microblaze                      mmu_defconfig
sh                     sh7710voipgw_defconfig
powerpc                     redwood_defconfig
parisc                           alldefconfig
mips                      pic32mzda_defconfig
powerpc                     skiroot_defconfig
sh                          rsk7264_defconfig
powerpc                        fsp2_defconfig
sh                             shx3_defconfig
sh                        sh7785lcr_defconfig
powerpc                      ep88xc_defconfig
powerpc                     sbc8548_defconfig
arm                        mini2440_defconfig
sh                        apsh4ad0a_defconfig
h8300                               defconfig
powerpc                 mpc836x_mds_defconfig
sh                            titan_defconfig
csky                             alldefconfig
m68k                          sun3x_defconfig
arm                          ep93xx_defconfig
sh                           se7724_defconfig
powerpc                     powernv_defconfig
powerpc                      walnut_defconfig
arm                         s3c6400_defconfig
arm                       aspeed_g5_defconfig
sparc                       sparc32_defconfig
mips                         tb0287_defconfig
m68k                          hp300_defconfig
mips                     cu1830-neo_defconfig
sh                           se7343_defconfig
arm                            u300_defconfig
m68k                             alldefconfig
arm                        spear6xx_defconfig
arm                         assabet_defconfig
sh                              ul2_defconfig
xtensa                  nommu_kc705_defconfig
nds32                             allnoconfig
arm                       versatile_defconfig
sh                            hp6xx_defconfig
arm                           sunxi_defconfig
powerpc                     mpc5200_defconfig
powerpc                 mpc8272_ads_defconfig
openrisc                            defconfig
powerpc                       holly_defconfig
arm                          prima2_defconfig
arc                           tb10x_defconfig
m68k                                defconfig
arm                        keystone_defconfig
mips                        nlm_xlr_defconfig
xtensa                              defconfig
powerpc                     stx_gp3_defconfig
arm                       cns3420vb_defconfig
arm                           omap1_defconfig
mips                       rbtx49xx_defconfig
arm                         s3c2410_defconfig
arm                         nhk8815_defconfig
mips                        maltaup_defconfig
mips                           ip32_defconfig
h8300                    h8300h-sim_defconfig
arm                  colibri_pxa300_defconfig
mips                      pistachio_defconfig
arm                       spear13xx_defconfig
mips                          ath79_defconfig
arm                           corgi_defconfig
powerpc                     ep8248e_defconfig
sh                   sh7770_generic_defconfig
powerpc                 mpc85xx_cds_defconfig
sh                          landisk_defconfig
mips                     cu1000-neo_defconfig
arm                        mvebu_v5_defconfig
m68k                       m5275evb_defconfig
ia64                             allmodconfig
ia64                                defconfig
ia64                             allyesconfig
m68k                             allmodconfig
m68k                             allyesconfig
nios2                               defconfig
arc                              allyesconfig
c6x                              allyesconfig
nds32                               defconfig
nios2                            allyesconfig
csky                                defconfig
alpha                               defconfig
alpha                            allyesconfig
xtensa                           allyesconfig
h8300                            allyesconfig
arc                                 defconfig
sh                               allmodconfig
parisc                              defconfig
s390                             allyesconfig
parisc                           allyesconfig
s390                                defconfig
i386                             allyesconfig
sparc                            allyesconfig
sparc                               defconfig
i386                                defconfig
mips                             allyesconfig
mips                             allmodconfig
powerpc                          allyesconfig
powerpc                          allmodconfig
powerpc                           allnoconfig
x86_64               randconfig-a004-20201202
x86_64               randconfig-a006-20201202
x86_64               randconfig-a001-20201202
x86_64               randconfig-a002-20201202
x86_64               randconfig-a005-20201202
x86_64               randconfig-a003-20201202
i386                 randconfig-a004-20201202
i386                 randconfig-a005-20201202
i386                 randconfig-a001-20201202
i386                 randconfig-a002-20201202
i386                 randconfig-a006-20201202
i386                 randconfig-a003-20201202
i386                 randconfig-a014-20201202
i386                 randconfig-a013-20201202
i386                 randconfig-a011-20201202
i386                 randconfig-a015-20201202
i386                 randconfig-a012-20201202
i386                 randconfig-a016-20201202
riscv                    nommu_k210_defconfig
riscv                            allyesconfig
riscv                    nommu_virt_defconfig
riscv                             allnoconfig
riscv                               defconfig
riscv                          rv32_defconfig
riscv                            allmodconfig
x86_64                                   rhel
x86_64                           allyesconfig
x86_64                    rhel-7.6-kselftests
x86_64                              defconfig
x86_64                               rhel-8.3
x86_64                                  kexec

clang tested configs:
x86_64               randconfig-a016-20201202
x86_64               randconfig-a012-20201202
x86_64               randconfig-a014-20201202
x86_64               randconfig-a013-20201202
x86_64               randconfig-a015-20201202
x86_64               randconfig-a011-20201202

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

^ permalink raw reply

* Re: [PATCH 2/2] ASoC: fsl: Add imx-hdmi machine driver
From: Shengjiu Wang @ 2020-12-03  3:51 UTC (permalink / raw)
  To: Nicolin Chen
  Cc: open list:OPEN FIRMWARE AND FLATTENED DEVICE TREE BINDINGS,
	alsa-devel, Timur Tabi, Xiubo Li, Fabio Estevam, Shengjiu Wang,
	Liam Girdwood, Takashi Iwai, Rob Herring, Mark Brown,
	linuxppc-dev, linux-kernel
In-Reply-To: <20201202201955.GB1498@Asurada-Nvidia>

On Thu, Dec 3, 2020 at 4:23 AM Nicolin Chen <nicoleotsuka@gmail.com> wrote:
>
> On Fri, Nov 27, 2020 at 01:30:21PM +0800, Shengjiu Wang wrote:
> > The driver is initially designed for sound card using HDMI
> > interface on i.MX platform. There is internal HDMI IP or
> > external HDMI modules connect with SAI or AUD2HTX interface.
> > It supports both transmitter and receiver devices.
> >
> > Signed-off-by: Shengjiu Wang <shengjiu.wang@nxp.com>
> > ---
> >  sound/soc/fsl/Kconfig    |  12 ++
> >  sound/soc/fsl/Makefile   |   2 +
> >  sound/soc/fsl/imx-hdmi.c | 235 +++++++++++++++++++++++++++++++++++++++
> >  3 files changed, 249 insertions(+)
> >  create mode 100644 sound/soc/fsl/imx-hdmi.c
>
> > diff --git a/sound/soc/fsl/imx-hdmi.c b/sound/soc/fsl/imx-hdmi.c
> > new file mode 100644
> > index 000000000000..ac164514b1b2
> > --- /dev/null
> > +++ b/sound/soc/fsl/imx-hdmi.c
>
> > +static int imx_hdmi_hw_params(struct snd_pcm_substream *substream,
> > +                           struct snd_pcm_hw_params *params)
> > +{
> > +     struct snd_soc_pcm_runtime *rtd = substream->private_data;
> > +     struct imx_hdmi_data *data = snd_soc_card_get_drvdata(rtd->card);
> > +     bool tx = substream->stream == SNDRV_PCM_STREAM_PLAYBACK;
> > +     struct snd_soc_dai *cpu_dai = asoc_rtd_to_cpu(rtd, 0);
> > +     struct snd_soc_card *card = rtd->card;
> > +     struct device *dev = card->dev;
> > +     int ret;
> > +
> > +     /* set cpu DAI configuration */
> > +     ret = snd_soc_dai_set_sysclk(cpu_dai, data->cpu_priv.sysclk_id[tx],
> > +                                  8 * data->cpu_priv.slot_width * params_rate(params),
>
> Looks like fixed 2 slots being used, judging by the set_tdm_slot
> call below. Then...why "8 *"? Probably need a line of comments?

The master clock always 256 * rate, when slot_width=32.  so use
the 8 * slot_width.  will add comments.

>
> > +                                  tx ? SND_SOC_CLOCK_OUT : SND_SOC_CLOCK_IN);
> > +     if (ret && ret != -ENOTSUPP) {
> > +             dev_err(dev, "failed to set cpu sysclk: %d\n", ret);
> > +             return ret;
> > +     }
> > +
> > +     ret = snd_soc_dai_set_tdm_slot(cpu_dai, 0, 0, 2, data->cpu_priv.slot_width);
>
> May have a local variable to cache slot_width.

ok.

>
> > +static int imx_hdmi_probe(struct platform_device *pdev)
>
> > +     data->dai.name = "i.MX HDMI";
> > +     data->dai.stream_name = "i.MX HDMI";
> > +     data->dai.cpus->dai_name = dev_name(&cpu_pdev->dev);
> > +     data->dai.platforms->of_node = cpu_np;
> > +     data->dai.ops = &imx_hdmi_ops;
> > +     data->dai.playback_only = true;
> > +     data->dai.capture_only = false;
> > +     data->dai.init = imx_hdmi_init;
> > +
> > +
> > +     if (of_property_read_bool(np, "hdmi-out")) {
> > +             data->dai.playback_only = true;
> > +             data->dai.capture_only = false;
> > +             data->dai.codecs->dai_name = "i2s-hifi";
> > +             data->dai.codecs->name = "hdmi-audio-codec.1";
> > +             data->dai.dai_fmt = data->dai_fmt |
> > +                                 SND_SOC_DAIFMT_NB_NF |
> > +                                 SND_SOC_DAIFMT_CBS_CFS;
> > +     }
> > +
> > +     if (of_property_read_bool(np, "hdmi-in")) {
> > +             data->dai.playback_only = false;
> > +             data->dai.capture_only = true;
> > +             data->dai.codecs->dai_name = "i2s-hifi";
> > +             data->dai.codecs->name = "hdmi-audio-codec.2";
> > +             data->dai.dai_fmt = data->dai_fmt |
> > +                                 SND_SOC_DAIFMT_NB_NF |
> > +                                 SND_SOC_DAIFMT_CBM_CFM;
> > +     }
> > +
> > +     if ((data->dai.playback_only && data->dai.capture_only) ||
> > +         (!data->dai.playback_only && !data->dai.capture_only)) {
> > +             dev_err(&pdev->dev, "Wrongly enable HDMI DAI link\n");
> > +             goto fail;
> > +     }
>
> Seems that this condition check can never be true, given that:
> 1. By default: playback_only=true && capture_only=false
> 2. Conditionally overwritten: playback_only=true && capture_only=false
> 3. Conditionally overwritten: playback_only=false && capture_only=true
>
> If I understand it correctly, probably should be something like:
>         bool hdmi_out = of_property_read_bool(np, "hdmi-out");
>         bool hdmi_in = of_property_read_bool(np, "hdmi-in");
>
>         if ((hdmi_out && hdmi_in) || (!hdmi_out || !hdmi_in))
>                 // "Invalid HDMI DAI link"; goto fail;
>
>         if (hdmi_out) {
>                 // ...
>         } else if (hdmi_in) {
>                 // ...
>         } else // No need of this line if two properties are exclusive
>

Good catch, will update it.

> > +     data->card.num_links = 1;
> > +     data->card.dai_link = &data->dai;
> > +
> > +     platform_set_drvdata(pdev, &data->card);
>
> Why pass card pointer?

Seems it duplicates with dev_set_drvdata(card->dev, card);
in snd_soc_register_card.  will remove it.

best regards
wang shengjiu

^ permalink raw reply

* [PATCH] powerpc: add security.config, enforcing lockdown=integrity
From: Daniel Axtens @ 2020-12-03  4:28 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Daniel Axtens

It's sometimes handy to have a config that boots a bit like a system
under secure boot (forcing lockdown=integrity, without needing any
extra stuff like a command line option).

This config file allows that, and also turns on a few assorted security
and hardening options for good measure.

Suggested-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Daniel Axtens <dja@axtens.net>
---
 arch/powerpc/configs/security.config | 15 +++++++++++++++
 1 file changed, 15 insertions(+)
 create mode 100644 arch/powerpc/configs/security.config

diff --git a/arch/powerpc/configs/security.config b/arch/powerpc/configs/security.config
new file mode 100644
index 000000000000..1c91a35c6a73
--- /dev/null
+++ b/arch/powerpc/configs/security.config
@@ -0,0 +1,15 @@
+# This is the equivalent of booting with lockdown=integrity
+CONFIG_SECURITY=y
+CONFIG_SECURITYFS=y
+CONFIG_SECURITY_LOCKDOWN_LSM=y
+CONFIG_SECURITY_LOCKDOWN_LSM_EARLY=y
+CONFIG_LOCK_DOWN_KERNEL_FORCE_INTEGRITY=y
+
+# These are some general, reasonably inexpensive hardening options
+CONFIG_HARDENED_USERCOPY=y
+CONFIG_FORTIFY_SOURCE=y
+CONFIG_INIT_ON_ALLOC_DEFAULT_ON=y
+
+# UBSAN bounds checking is very cheap and good for hardening
+CONFIG_UBSAN=y
+# CONFIG_UBSAN_MISC is not set
\ No newline at end of file
-- 
2.25.1


^ permalink raw reply related

* Re: [PATCH 6/8] lazy tlb: shoot lazies, a non-refcounting lazy tlb option
From: Andy Lutomirski @ 2020-12-03  5:05 UTC (permalink / raw)
  To: Nicholas Piggin
  Cc: linux-arch, X86 ML, Arnd Bergmann, Vasily Gorbik, Dave Hansen,
	Peter Zijlstra, Catalin Marinas, Heiko Carstens, linuxppc-dev,
	LKML, Linux-MM, Christian Borntraeger, Mathieu Desnoyers,
	Andy Lutomirski, Will Deacon
In-Reply-To: <1606879302.tdngvs3yq4.astroid@bobo.none>

> On Dec 1, 2020, at 7:47 PM, Nicholas Piggin <npiggin@gmail.com> wrote:
>
> Excerpts from Andy Lutomirski's message of December 1, 2020 4:31 am:
>> other arch folk: there's some background here:
>>
>> https://lkml.kernel.org/r/CALCETrVXUbe8LfNn-Qs+DzrOQaiw+sFUg1J047yByV31SaTOZw@mail.gmail.com
>>
>>> On Sun, Nov 29, 2020 at 12:16 PM Andy Lutomirski <luto@kernel.org> wrote:
>>>
>>> On Sat, Nov 28, 2020 at 7:54 PM Andy Lutomirski <luto@kernel.org> wrote:
>>>>
>>>> On Sat, Nov 28, 2020 at 8:02 AM Nicholas Piggin <npiggin@gmail.com> wrote:
>>>>>
>>>>> On big systems, the mm refcount can become highly contented when doing
>>>>> a lot of context switching with threaded applications (particularly
>>>>> switching between the idle thread and an application thread).
>>>>>
>>>>> Abandoning lazy tlb slows switching down quite a bit in the important
>>>>> user->idle->user cases, so so instead implement a non-refcounted scheme
>>>>> that causes __mmdrop() to IPI all CPUs in the mm_cpumask and shoot down
>>>>> any remaining lazy ones.
>>>>>
>>>>> Shootdown IPIs are some concern, but they have not been observed to be
>>>>> a big problem with this scheme (the powerpc implementation generated
>>>>> 314 additional interrupts on a 144 CPU system during a kernel compile).
>>>>> There are a number of strategies that could be employed to reduce IPIs
>>>>> if they turn out to be a problem for some workload.
>>>>
>>>> I'm still wondering whether we can do even better.
>>>>
>>>
>>> Hold on a sec.. __mmput() unmaps VMAs, frees pagetables, and flushes
>>> the TLB.  On x86, this will shoot down all lazies as long as even a
>>> single pagetable was freed.  (Or at least it will if we don't have a
>>> serious bug, but the code seems okay.  We'll hit pmd_free_tlb, which
>>> sets tlb->freed_tables, which will trigger the IPI.)  So, on
>>> architectures like x86, the shootdown approach should be free.  The
>>> only way it ought to have any excess IPIs is if we have CPUs in
>>> mm_cpumask() that don't need IPI to free pagetables, which could
>>> happen on paravirt.
>>
>> Indeed, on x86, we do this:
>>
>> [   11.558844]  flush_tlb_mm_range.cold+0x18/0x1d
>> [   11.559905]  tlb_finish_mmu+0x10e/0x1a0
>> [   11.561068]  exit_mmap+0xc8/0x1a0
>> [   11.561932]  mmput+0x29/0xd0
>> [   11.562688]  do_exit+0x316/0xa90
>> [   11.563588]  do_group_exit+0x34/0xb0
>> [   11.564476]  __x64_sys_exit_group+0xf/0x10
>> [   11.565512]  do_syscall_64+0x34/0x50
>>
>> and we have info->freed_tables set.
>>
>> What are the architectures that have large systems like?
>>
>> x86: we already zap lazies, so it should cost basically nothing to do
>
> This is not zapping lazies, this is freeing the user page tables.
>
> "lazy mm" is where a switch to a kernel thread takes on the
> previous mm for its kernel mapping rather than switch to init_mm.

The intent of the code is to flush the TLB after freeing user pages
tables, but, on bare metal, lazies get zapped as a side effect.

Anyway, I'm going to send out a mockup of an alternative approach shortly.

^ permalink raw reply

* [PATCH] powerpc/book3s_hv_uvmem: Check for failed page migration
From: Alistair Popple @ 2020-12-03  5:08 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Alistair Popple, bharata

migrate_vma_pages() may still clear MIGRATE_PFN_MIGRATE on pages which
are not able to be migrated. Drivers may safely copy data prior to
calling migrate_vma_pages() however a remote mapping must not be
established until after migrate_vma_pages() has returned as the
migration could still fail.

UV_PAGE_IN_in both copies and maps the data page, therefore it should
only be called after checking the results of migrate_vma_pages().

Signed-off-by: Alistair Popple <alistair@popple.id.au>
---
 arch/powerpc/kvm/book3s_hv_uvmem.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c
index 84e5a2dc8be5..08aa6a90c525 100644
--- a/arch/powerpc/kvm/book3s_hv_uvmem.c
+++ b/arch/powerpc/kvm/book3s_hv_uvmem.c
@@ -762,7 +762,10 @@ static int kvmppc_svm_page_in(struct vm_area_struct *vma,
 		goto out_finalize;
 	}
 
-	if (pagein) {
+	*mig.dst = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED;
+	migrate_vma_pages(&mig);
+
+	if ((*mig.src & MIGRATE_PFN_MIGRATE) && pagein) {
 		pfn = *mig.src >> MIGRATE_PFN_SHIFT;
 		spage = migrate_pfn_to_page(*mig.src);
 		if (spage) {
@@ -773,8 +776,6 @@ static int kvmppc_svm_page_in(struct vm_area_struct *vma,
 		}
 	}
 
-	*mig.dst = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED;
-	migrate_vma_pages(&mig);
 out_finalize:
 	migrate_vma_finalize(&mig);
 	return ret;
-- 
2.20.1


^ permalink raw reply related

* Re: [PATCH 2/8] x86: use exit_lazy_tlb rather than membarrier_mm_sync_core_before_usermode
From: Andy Lutomirski @ 2020-12-03  5:09 UTC (permalink / raw)
  To: Nicholas Piggin
  Cc: linux-arch, Arnd Bergmann, Peter Zijlstra, X86 ML, LKML, Linux-MM,
	Mathieu Desnoyers, Andy Lutomirski, linuxppc-dev
In-Reply-To: <1606876327.dyrhkih2kh.astroid@bobo.none>

On Tue, Dec 1, 2020 at 6:50 PM Nicholas Piggin <npiggin@gmail.com> wrote:
>
> Excerpts from Andy Lutomirski's message of November 29, 2020 3:55 am:
> > On Sat, Nov 28, 2020 at 8:02 AM Nicholas Piggin <npiggin@gmail.com> wrote:
> >>
> >> And get rid of the generic sync_core_before_usermode facility. This is
> >> functionally a no-op in the core scheduler code, but it also catches
> >>
> >> This helper is the wrong way around I think. The idea that membarrier
> >> state requires a core sync before returning to user is the easy one
> >> that does not need hiding behind membarrier calls. The gap in core
> >> synchronization due to x86's sysret/sysexit and lazy tlb mode, is the
> >> tricky detail that is better put in x86 lazy tlb code.
> >>
> >> Consider if an arch did not synchronize core in switch_mm either, then
> >> membarrier_mm_sync_core_before_usermode would be in the wrong place
> >> but arch specific mmu context functions would still be the right place.
> >> There is also a exit_lazy_tlb case that is not covered by this call, which
> >> could be a bugs (kthread use mm the membarrier process's mm then context
> >> switch back to the process without switching mm or lazy mm switch).
> >>
> >> This makes lazy tlb code a bit more modular.
> >
> > I have a couple of membarrier fixes that I want to send out today or
> > tomorrow, and they might eliminate the need for this patch.  Let me
> > think about this a little bit.  I'll cc you.  The existing code is way
> > to subtle and the comments are far too confusing for me to be quickly
> > confident about any of my conclusions :)
> >
>
> Thanks for the head's up. I'll have to have a better look through them
> but I don't know that it eliminates the need for this entirely although
> it might close some gaps and make this not a bug fix. The problem here
> is x86 code wanted something to be called when a lazy mm is unlazied,
> but it missed some spots and also the core scheduler doesn't need to
> know about those x86 details if it has this generic call that annotates
> the lazy handling better.

I'll send v3 tomorrow.  They add more sync_core_before_usermode() callers.

Having looked at your patches a bunch and the membarrier code a bunch,
I don't think I like the approach of pushing this logic out into new
core functions called by arch code.  Right now, even with my
membarrier patches applied, understanding how (for example) the x86
switch_mm_irqs_off() plus the scheduler code provides the barriers
that membarrier needs is quite complicated, and it's not clear to me
that the code is even correct.  At the very least I'm pretty sure that
the x86 comments are misleading.  With your patches, someone trying to
audit the code would have to follow core code calling into arch code
and back out into core code to figure out what's going on.  I think
the result is worse.

I wrote this incomplete patch which takes the opposite approach (sorry
for whitespace damage):

commit 928b5c60e93f475934892d6e0b357ebf0a2bf87d
Author: Andy Lutomirski <luto@kernel.org>
Date:   Wed Dec 2 17:24:02 2020 -0800

    [WIP] x86/mm: Handle unlazying membarrier core sync in the arch code

    The core scheduler isn't a great place for
    membarrier_mm_sync_core_before_usermode() -- the core scheduler
    doesn't actually know whether we are lazy.  With the old code, if a
    CPU is running a membarrier-registered task, goes idle, gets unlazied
    via a TLB shootdown IPI, and switches back to the
    membarrier-registered task, it will do an unnecessary core sync.

    Conveniently, x86 is the only architecture that does anything in this
    hook, so we can just move the code.

    XXX: actually delete the old code.

    Signed-off-by: Andy Lutomirski <luto@kernel.org>

diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 3338a1feccf9..e27300fc865b 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -496,6 +496,8 @@ void switch_mm_irqs_off(struct mm_struct *prev,
struct mm_struct *next,
          * from one thread in a process to another thread in the same
          * process. No TLB flush required.
          */
+
+        // XXX: why is this okay wrt membarrier?
         if (!was_lazy)
             return;

@@ -508,12 +510,24 @@ void switch_mm_irqs_off(struct mm_struct *prev,
struct mm_struct *next,
         smp_mb();
         next_tlb_gen = atomic64_read(&next->context.tlb_gen);
         if (this_cpu_read(cpu_tlbstate.ctxs[prev_asid].tlb_gen) ==
-                next_tlb_gen)
+            next_tlb_gen) {
+            /*
+             * We're reactivating an mm, and membarrier might
+             * need to serialize.  Tell membarrier.
+             */
+
+            // XXX: I can't understand the logic in
+            // membarrier_mm_sync_core_before_usermode().  What's
+            // the mm check for?
+            membarrier_mm_sync_core_before_usermode(next);
             return;
+        }

         /*
          * TLB contents went out of date while we were in lazy
          * mode. Fall through to the TLB switching code below.
+         * No need for an explicit membarrier invocation -- the CR3
+         * write will serialize.
          */
         new_asid = prev_asid;
         need_flush = true;

^ permalink raw reply related

* [MOCKUP] x86/mm: Lightweight lazy mm refcounting
From: Andy Lutomirski @ 2020-12-03  5:25 UTC (permalink / raw)
  To: Nicholas Piggin
  Cc: linux-arch, Arnd Bergmann, Rik van Riel, Will Deacon, X86 ML,
	Dave Hansen, LKML, Linux-MM, Mathieu Desnoyers, Andy Lutomirski,
	Catalin Marinas, linuxppc-dev

For context, this is part of a series here:

https://git.kernel.org/pub/scm/linux/kernel/git/luto/linux.git/commit/?h=x86/mm&id=7c4bcc0a464ca60be1e0aeba805a192be0ee81e5

This code compiles, but I haven't even tried to boot it.  The earlier
part of the series isn't terribly interesting -- it's a handful of
cleanups that remove all reads of ->active_mm from arch/x86.  I've
been meaning to do that for a while, and now I did it.  But, with
that done, I think we can move to a totally different lazy mm refcounting
model.

So this patch is a mockup of what this could look like.  The algorithm
involves no atomics at all in the context switch path except for a
single atomic_long_xchg() of a percpu variable when going from lazy
mode to nonlazy mode.  Even that could be optimized -- I suspect it could
be replaced with non-atomic code if mm_users > 0.  Instead, on mm exit,
there's a single loop over all CPUs on which that mm could be lazily
loaded that atomic_long_cmpxchg_relaxed()'s a remote percpu variable to
tell the CPU to kindly mmdrop() the mm when it reschedules.  All cpus
that don't have the mm lazily loaded are ignored because they can't
have lazy references, and no cpus can gain lazy references after
mm_users hits zero.  (I need to verify that all the relevant barriers
are in place.  I suspect that they are on x86 but that I'm short an
smp_mb() on arches for which _relaxed atomics are genuinely relaxed.)

Here's how I think it fits with various arches:

x86: On bare metal (i.e. paravirt flush unavailable), the loop won't do
much.  The existing TLB shootdown when user tables are freed will
empty mm_cpumask of everything but the calling CPU.  So x86 ends up
pretty close to as good as we can get short of reworking mm_cpumask() itself.

arm64: with s/for_each_cpu/for_each_online_cpu, I think it will give
good performance.  The mmgrab()/mmdrop() overhead goes away, and,
on smallish systems, the cost of the loop should be low.

power: same as ARM, except that the loop may be rather larger since
the systems are bigger.  But I imagine it's still faster than Nick's
approach -- a cmpxchg to a remote cacheline should still be faster than
an IPI shootdown.  (Nick, don't benchmark this patch -- at least the
mm_users optimization mentioned above should be done, but also the
mmgrab() and mmdrop() aren't actually removed.)

Other arches: I don't know.  Further research is required.

What do you all think?

As mentioned, there are several things blatantly wrong with this patch:

The coding stype is not up to kernel standars.  I have prototypes in the
wrong places and other hacks.

mms are likely to be freed with IRQs off.  I think this is safe, but it's
suboptimal.

This whole thing is in arch/x86.  The core algorithm ought to move outside
arch/, but doing so without making a mess might take some thought.  It
doesn't help that different architectures have very different ideas
of what mm_cpumask() does.

Finally, this patch has no benefit by itself.  I didn't remove the
active_mm refounting, so the big benefit of removing mmgrab() and
mmdrop() calls on transitions to and from lazy mode isn't there.
There is no point at all in benchmarking this patch as is.  That
being said, getting rid of the active_mm refcounting shouldn't be
so hard, since x86 (in this tree) no longer uses active_mm at all.

I should contemplate whether the calling CPU is special in
arch_fixup_lazy_mm_refs().  On a very very quick think, it's not, but
it needs more thought.

Signed-off-by: Andy Lutomirski <luto@kernel.org>

 arch/x86/include/asm/tlbflush.h | 20 ++++++++
 arch/x86/mm/tlb.c               | 81 +++++++++++++++++++++++++++++++--
 kernel/fork.c                   |  5 ++
 3 files changed, 101 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 8c87a2e0b660..efcd4f125f2c 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -124,6 +124,26 @@ struct tlb_state {
 	 */
 	unsigned short user_pcid_flush_mask;

+	/*
+	 * Lazy mm tracking.
+	 *
+	 *  - If this is NULL, it means that any mm_struct referenced by
+	 *    this CPU is kept alive by a real reference count.
+	 *
+	 *  - If this is nonzero but the low bit is clear, it points to
+	 *    an mm_struct that must not be freed out from under this
+	 *    CPU.
+	 *
+	 *  - If the low bit is set, it still points to an mm_struct,
+	 *    but some other CPU has mmgrab()'d it on our behalf, and we
+	 *    must mmdrop() it when we're done with it.
+	 *
+	 * See lazy_mm_grab() and related functions for the precise
+	 * access rules.
+	 */
+	atomic_long_t		lazy_mm;
+
+
 	/*
 	 * Access to this CR4 shadow and to H/W CR4 is protected by
 	 * disabling interrupts when modifying either one.
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index e27300fc865b..00f5bace534b 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -8,6 +8,7 @@
 #include <linux/export.h>
 #include <linux/cpu.h>
 #include <linux/debugfs.h>
+#include <linux/sched/mm.h>

 #include <asm/tlbflush.h>
 #include <asm/mmu_context.h>
@@ -420,6 +421,64 @@ void cr4_update_pce(void *ignored)
 static inline void cr4_update_pce_mm(struct mm_struct *mm) { }
 #endif

+static void lazy_mm_grab(struct mm_struct *mm)
+{
+	atomic_long_t *lazy_mm = this_cpu_ptr(&cpu_tlbstate.lazy_mm);
+
+	WARN_ON_ONCE(atomic_long_read(lazy_mm) != 0);
+	atomic_long_set(lazy_mm, (unsigned long)mm);
+}
+
+static void lazy_mm_drop(void)
+{
+	atomic_long_t *lazy_mm = this_cpu_ptr(&cpu_tlbstate.lazy_mm);
+
+	unsigned long prev = atomic_long_xchg(lazy_mm, 0);
+	if (prev & 1)
+		mmdrop((struct mm_struct *)(prev & ~1UL));
+}
+
+void arch_fixup_lazy_mm_refs(struct mm_struct *mm)
+{
+	int cpu;
+
+	/*
+	 * mm_users is zero, so no new lazy refs will be taken.
+	 */
+	WARN_ON_ONCE(atomic_read(&mm->mm_users) != 0);
+
+	for_each_cpu(cpu, mm_cpumask(mm)) {
+		atomic_long_t *lazy_mm = per_cpu_ptr(&cpu_tlbstate.lazy_mm, cpu);
+		unsigned long old;
+
+		// Hmm, is this check actually useful?
+		if (atomic_long_read(lazy_mm) != (unsigned long)mm)
+			continue;
+
+		// XXX: we could optimize this by adding a bunch to
+		// mm_count at the beginning and subtracting the unused refs
+		// back off at the end.
+		mmgrab(mm);
+
+		// XXX: is relaxed okay here?  We need to be sure that the
+		// remote CPU has observed mm_users == 0.  This is just
+		// x86 for now, but we might want to move it into a library
+		// and use it elsewhere.
+		old = atomic_long_cmpxchg_relaxed(lazy_mm, (unsigned long)mm,
+						  (unsigned long)mm | 1);
+		if (old == (unsigned long)mm) {
+			/* The remote CPU now owns the reference we grabbed. */
+		} else {
+			/*
+			 * We raced!  The remote CPU switched mms and no longer
+			 * needs its reference.  We didn't transfer ownership
+			 * of the reference, so drop it.
+			 */
+			mmdrop(mm);
+		}
+	}
+}
+
 void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 			struct task_struct *tsk)
 {
@@ -587,16 +646,24 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 		cr4_update_pce_mm(next);
 		switch_ldt(real_prev, next);
 	}
+
+	// XXX: this can end up in __mmdrop().  Is this okay with IRQs off?
+	// It might be nicer to defer this to a later stage in the scheduler
+	// with IRQs on.
+	if (was_lazy)
+		lazy_mm_drop();
 }

 /*
- * Please ignore the name of this function.  It should be called
- * switch_to_kernel_thread().
+ * Please don't think too hard about the name of this function.  It
+ * should be called something like switch_to_kernel_mm().
  *
  * enter_lazy_tlb() is a hint from the scheduler that we are entering a
- * kernel thread or other context without an mm.  Acceptable implementations
- * include doing nothing whatsoever, switching to init_mm, or various clever
- * lazy tricks to try to minimize TLB flushes.
+ * kernel thread or other context without an mm.  Acceptable
+ * implementations include switching to init_mm, or various clever lazy
+ * tricks to try to minimize TLB flushes.  We are, however, required to
+ * either stop referencing the previous mm or to take some action to
+ * keep it from being freed out from under us.
  *
  * The scheduler reserves the right to call enter_lazy_tlb() several times
  * in a row.  It will notify us that we're going back to a real mm by
@@ -607,7 +674,11 @@ void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
 	if (this_cpu_read(cpu_tlbstate.loaded_mm) == &init_mm)
 		return;

+	if (this_cpu_read(cpu_tlbstate.is_lazy))
+		return;  /* nothing to do */
+
 	this_cpu_write(cpu_tlbstate.is_lazy, true);
+	lazy_mm_grab(mm);
 }

 /*
diff --git a/kernel/fork.c b/kernel/fork.c
index da8d360fb032..4d68162c1d02 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1066,6 +1066,9 @@ struct mm_struct *mm_alloc(void)
 	return mm_init(mm, current, current_user_ns());
 }

+// XXX: move to a header
+extern void arch_fixup_lazy_mm_refs(struct mm_struct *mm);
+
 static inline void __mmput(struct mm_struct *mm)
 {
 	VM_BUG_ON(atomic_read(&mm->mm_users));
@@ -1084,6 +1087,8 @@ static inline void __mmput(struct mm_struct *mm)
 	}
 	if (mm->binfmt)
 		module_put(mm->binfmt->module);
+
+	arch_fixup_lazy_mm_refs(mm);
 	mmdrop(mm);
 }

-- 
2.28.0

^ permalink raw reply related

* Re: [PATCH kernel] powerpc/perf: Stop crashing with generic_compat_pmu
From: Madhavan Srinivasan @ 2020-12-03  5:27 UTC (permalink / raw)
  To: Alexey Kardashevskiy, linuxppc-dev; +Cc: Madhavan Srinivasan
In-Reply-To: <848255d6-5c8f-d4c2-a865-e3a7ffce7fdb@ozlabs.ru>


On 12/2/20 8:31 AM, Alexey Kardashevskiy wrote:
> Hi Maddy,
>
> I just noticed that I still have "powerpc/perf: Add checks for 
> reserved values" in my pile (pushed here 
> https://github.com/aik/linux/commit/61e1bc3f2e19d450e2e2d39174d422160b21957b 
> ), do we still need it? The lockups I saw were fixed by 
> https://github.com/aik/linux/commit/17899eaf88d689 but it is hardly a 
> replacement. Thanks,

sorry missed this. Will look at this again. Since we will need 
generation specific checks for the reserve field.

Maddy

>
>
> On 04/06/2020 02:34, Madhavan Srinivasan wrote:
>>
>>
>> On 6/2/20 8:26 AM, Alexey Kardashevskiy wrote:
>>> The bhrb_filter_map ("The  Branch History  Rolling  Buffer") 
>>> callback is
>>> only defined in raw CPUs' power_pmu structs. The "architected" CPUs use
>>> generic_compat_pmu which does not have this callback and crashed occur.
>>>
>>> This add a NULL pointer check for bhrb_filter_map() which behaves as if
>>> the callback returned an error.
>>>
>>> This does not add the same check for config_bhrb() as the only caller
>>> checks for cpuhw->bhrb_users which remains zero if bhrb_filter_map==0.
>>
>> Changes looks fine.
>> Reviewed-by: Madhavan Srinivasan <maddy@linux.ibm.com>
>>
>> The commit be80e758d0c2e ('powerpc/perf: Add generic compat mode pmu 
>> driver')
>> which introduced generic_compat_pmu was merged in v5.2.  So we need to
>> CC stable starting from 5.2 :( .  My bad,  sorry.
>>
>> Maddy
>>
>>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
>>> ---
>>>   arch/powerpc/perf/core-book3s.c | 19 ++++++++++++++-----
>>>   1 file changed, 14 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/arch/powerpc/perf/core-book3s.c 
>>> b/arch/powerpc/perf/core-book3s.c
>>> index 3dcfecf858f3..36870569bf9c 100644
>>> --- a/arch/powerpc/perf/core-book3s.c
>>> +++ b/arch/powerpc/perf/core-book3s.c
>>> @@ -1515,9 +1515,16 @@ static int power_pmu_add(struct perf_event 
>>> *event, int ef_flags)
>>>       ret = 0;
>>>    out:
>>>       if (has_branch_stack(event)) {
>>> -        power_pmu_bhrb_enable(event);
>>> -        cpuhw->bhrb_filter = ppmu->bhrb_filter_map(
>>> -                    event->attr.branch_sample_type);
>>> +        u64 bhrb_filter = -1;
>>> +
>>> +        if (ppmu->bhrb_filter_map)
>>> +            bhrb_filter = ppmu->bhrb_filter_map(
>>> +                event->attr.branch_sample_type);
>>> +
>>> +        if (bhrb_filter != -1) {
>>> +            cpuhw->bhrb_filter = bhrb_filter;
>>> +            power_pmu_bhrb_enable(event); /* Does bhrb_users++ */
>>> +        }
>>>       }
>>>
>>>       perf_pmu_enable(event->pmu);
>>> @@ -1839,7 +1846,6 @@ static int power_pmu_event_init(struct 
>>> perf_event *event)
>>>       int n;
>>>       int err;
>>>       struct cpu_hw_events *cpuhw;
>>> -    u64 bhrb_filter;
>>>
>>>       if (!ppmu)
>>>           return -ENOENT;
>>> @@ -1945,7 +1951,10 @@ static int power_pmu_event_init(struct 
>>> perf_event *event)
>>>       err = power_check_constraints(cpuhw, events, cflags, n + 1);
>>>
>>>       if (has_branch_stack(event)) {
>>> -        bhrb_filter = ppmu->bhrb_filter_map(
>>> +        u64 bhrb_filter = -1;
>>> +
>>> +        if (ppmu->bhrb_filter_map)
>>> +            bhrb_filter = ppmu->bhrb_filter_map(
>>>                       event->attr.branch_sample_type);
>>>
>>>           if (bhrb_filter == -1) {
>>
>

^ permalink raw reply

* [PATCH kernel v2] powerpc/kuap: Restore AMR after replaying soft interrupts
From: Alexey Kardashevskiy @ 2020-12-03  5:47 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Alexey Kardashevskiy, Nicholas Piggin

When interrupted in raw_copy_from_user()/... after user memory access
is enabled, a nested handler may also access user memory (perf is
one example) and when it does so, it calls prevent_read_from_user()
which prevents the upper handler from accessing user memory.

This saves/restores AMR when replaying interrupts.

get_kuap/set_kuap have stubs for disabled KUAP on RADIX but there are
none for hash-only configs (BOOK3E) so this adds stubs and moves
AMR_KUAP_BLOCK_xxx.

Found by syzkaller. More likely to break with enabled
CONFIG_DEBUG_ATOMIC_SLEEP, the call chain is
timer_interrupt -> ktime_get -> read_seqcount_begin -> local_irq_restore.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---
Changes:
v2:
* fixed compile on hash
* moved get/set to arch_local_irq_restore
* block KUAP before replaying


---

This is an example:

------------[ cut here ]------------
Bug: Read fault blocked by AMR!
WARNING: CPU: 0 PID: 1603 at /home/aik/p/kernel/arch/powerpc/include/asm/book3s/64/kup-radix.h:145 __do_page_fau

Modules linked in:
CPU: 0 PID: 1603 Comm: amr Not tainted 5.10.0-rc6_v5.10-rc6_a+fstn1 #24
NIP:  c00000000009ece8 LR: c00000000009ece4 CTR: 0000000000000000
REGS: c00000000dc63560 TRAP: 0700   Not tainted  (5.10.0-rc6_v5.10-rc6_a+fstn1)
MSR:  8000000000021033 <SF,ME,IR,DR,RI,LE>  CR: 28002888  XER: 20040000
CFAR: c0000000001fa928 IRQMASK: 1
GPR00: c00000000009ece4 c00000000dc637f0 c000000002397600 000000000000001f
GPR04: c0000000020eb318 0000000000000000 c00000000dc63494 0000000000000027
GPR08: c00000007fe4de68 c00000000dfe9180 0000000000000000 0000000000000001
GPR12: 0000000000002000 c0000000030a0000 0000000000000000 0000000000000000
GPR16: 0000000000000000 0000000000000000 0000000000000000 bfffffffffffffff
GPR20: 0000000000000000 c0000000134a4020 c0000000019c2218 0000000000000fe0
GPR24: 0000000000000000 0000000000000000 c00000000d106200 0000000040000000
GPR28: 0000000000000000 0000000000000300 c00000000dc63910 c000000001946730
NIP [c00000000009ece8] __do_page_fault+0xb38/0xde0
LR [c00000000009ece4] __do_page_fault+0xb34/0xde0
Call Trace:
[c00000000dc637f0] [c00000000009ece4] __do_page_fault+0xb34/0xde0 (unreliable)
[c00000000dc638a0] [c00000000000c968] handle_page_fault+0x10/0x2c
--- interrupt: 300 at strncpy_from_user+0x290/0x440
    LR = strncpy_from_user+0x284/0x440
[c00000000dc63ba0] [c000000000c3dcb0] strncpy_from_user+0x2f0/0x440 (unreliable)
[c00000000dc63c30] [c00000000068b888] getname_flags+0x88/0x2c0
[c00000000dc63c90] [c000000000662a44] do_sys_openat2+0x2d4/0x5f0
[c00000000dc63d30] [c00000000066560c] do_sys_open+0xcc/0x140
[c00000000dc63dc0] [c000000000045e10] system_call_exception+0x160/0x240
[c00000000dc63e20] [c00000000000da60] system_call_common+0xf0/0x27c
Instruction dump:
409c0048 3fe2ff5b 3bfff128 fac10060 fae10068 482f7a85 60000000 3c62ff5b
7fe4fb78 3863f250 4815bbd9 60000000 <0fe00000> 3c62ff5b 3863f2b8 4815c8b5
irq event stamp: 254
hardirqs last  enabled at (253): [<c000000000019550>] arch_local_irq_restore+0xa0/0x150
hardirqs last disabled at (254): [<c000000000008a10>] data_access_common_virt+0x1b0/0x1d0
softirqs last  enabled at (0): [<c0000000001f6d5c>] copy_process+0x78c/0x2120
softirqs last disabled at (0): [<0000000000000000>] 0x0
---[ end trace ba98aec5151f3aeb ]---
---
 arch/powerpc/include/asm/book3s/64/kup-radix.h |  3 ---
 arch/powerpc/include/asm/kup.h                 | 10 ++++++++++
 arch/powerpc/kernel/irq.c                      |  6 ++++++
 3 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup-radix.h
index a39e2d193fdc..4ad607461b75 100644
--- a/arch/powerpc/include/asm/book3s/64/kup-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h
@@ -5,9 +5,6 @@
 #include <linux/const.h>
 #include <asm/reg.h>
 
-#define AMR_KUAP_BLOCK_READ	UL(0x4000000000000000)
-#define AMR_KUAP_BLOCK_WRITE	UL(0x8000000000000000)
-#define AMR_KUAP_BLOCKED	(AMR_KUAP_BLOCK_READ | AMR_KUAP_BLOCK_WRITE)
 #define AMR_KUAP_SHIFT		62
 
 #ifdef __ASSEMBLY__
diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h
index 0d93331d0fab..e63930767a96 100644
--- a/arch/powerpc/include/asm/kup.h
+++ b/arch/powerpc/include/asm/kup.h
@@ -14,6 +14,10 @@
 #define KUAP_CURRENT_WRITE	8
 #define KUAP_CURRENT		(KUAP_CURRENT_READ | KUAP_CURRENT_WRITE)
 
+#define AMR_KUAP_BLOCK_READ	UL(0x4000000000000000)
+#define AMR_KUAP_BLOCK_WRITE	UL(0x8000000000000000)
+#define AMR_KUAP_BLOCKED	(AMR_KUAP_BLOCK_READ | AMR_KUAP_BLOCK_WRITE)
+
 #ifdef CONFIG_PPC_BOOK3S_64
 #include <asm/book3s/64/kup-radix.h>
 #endif
@@ -64,6 +68,12 @@ bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
 }
 
 static inline void kuap_check_amr(void) { }
+static inline unsigned long get_kuap(void)
+{
+	return AMR_KUAP_BLOCKED;
+}
+
+static inline void set_kuap(unsigned long value) { }
 
 /*
  * book3s/64/kup-radix.h defines these functions for the !KUAP case to flush
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 7d0f7682d01d..d9fd46da04d6 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -314,6 +314,7 @@ void replay_soft_interrupts(void)
 notrace void arch_local_irq_restore(unsigned long mask)
 {
 	unsigned char irq_happened;
+	unsigned long kuap_state;
 
 	/* Write the new soft-enabled value */
 	irq_soft_mask_set(mask);
@@ -373,9 +374,14 @@ notrace void arch_local_irq_restore(unsigned long mask)
 	irq_soft_mask_set(IRQS_ALL_DISABLED);
 	trace_hardirqs_off();
 
+	kuap_state = get_kuap();
+	set_kuap(AMR_KUAP_BLOCKED);
+
 	replay_soft_interrupts();
 	local_paca->irq_happened = 0;
 
+	set_kuap(kuap_state);
+
 	trace_hardirqs_on();
 	irq_soft_mask_set(IRQS_ENABLED);
 	__hard_irq_enable();
-- 
2.17.1


^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox