* [PATCH] ibmvfc: fix missing cast of ibmvfc_event pointer to u64 handle
From: Tyrel Datwyler @ 2021-01-06 20:37 UTC (permalink / raw)
To: james.bottomley
Cc: Tyrel Datwyler, martin.petersen, linux-scsi, linux-kernel, brking,
linuxppc-dev, kernel test robot
Commit 2aa0102c6688 ("scsi: ibmvfc: Use correlation token to tag
commands") sets the vfcFrame correlation token to the pointer handle of
the associated ibmvfc_event. However, that commit failed to cast the
pointer to an appropriate type which in this case is a u64. As such
sparse warnings are generated for both correlation token assignments.
ibmvfc.c:2375:36: sparse: incorrect type in argument 1 (different base types)
ibmvfc.c:2375:36: sparse: expected unsigned long long [usertype] val
ibmvfc.c:2375:36: sparse: got struct ibmvfc_event *[assigned] evt
Add the apporpriate u64 casts when assigning an ibmvfc_event as a
correlation token.
Fixes: Commit 2aa0102c6688 ("scsi: ibmvfc: Use correlation token to tag commands")
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Tyrel Datwyler <tyreld@linux.ibm.com>
---
drivers/scsi/ibmvscsi/ibmvfc.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index 42e4d35e0d35..7312f31df878 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -1744,7 +1744,7 @@ static int ibmvfc_queuecommand_lck(struct scsi_cmnd *cmnd,
iu->pri_task_attr = IBMVFC_SIMPLE_TASK;
}
- vfc_cmd->correlation = cpu_to_be64(evt);
+ vfc_cmd->correlation = cpu_to_be64((u64)evt);
if (likely(!(rc = ibmvfc_map_sg_data(cmnd, evt, vfc_cmd, vhost->dev))))
return ibmvfc_send_event(evt, vhost, 0);
@@ -2418,7 +2418,7 @@ static int ibmvfc_abort_task_set(struct scsi_device *sdev)
tmf->flags = cpu_to_be16((IBMVFC_NO_MEM_DESC | IBMVFC_TMF));
evt->sync_iu = &rsp_iu;
- tmf->correlation = cpu_to_be64(evt);
+ tmf->correlation = cpu_to_be64((u64)evt);
init_completion(&evt->comp);
rsp_rc = ibmvfc_send_event(evt, vhost, default_timeout);
--
2.27.0
^ permalink raw reply related
* [PATCH v2 5/5] ibmvfc: relax locking around ibmvfc_queuecommand
From: Tyrel Datwyler @ 2021-01-06 20:18 UTC (permalink / raw)
To: james.bottomley
Cc: Tyrel Datwyler, martin.petersen, linux-scsi, linux-kernel,
Brian King, brking, linuxppc-dev
In-Reply-To: <20210106201835.1053593-1-tyreld@linux.ibm.com>
The drivers queuecommand routine is still wrapped to hold the host lock
for the duration of the call. This will become problematic when moving
to multiple queues due to the lock contention preventing asynchronous
submissions to mulitple queues. There is no real legatimate reason to
hold the host lock, and previous patches have insured proper protection
of moving ibmvfc_event objects between free and sent lists.
Signed-off-by: Tyrel Datwyler <tyreld@linux.ibm.com>
Reviewed-by: Brian King <brking@linux.vnet.ibm.com>
---
drivers/scsi/ibmvscsi/ibmvfc.c | 12 ++++--------
1 file changed, 4 insertions(+), 8 deletions(-)
diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index f680f96d5d06..ff86c43b4b33 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -1793,10 +1793,9 @@ static struct ibmvfc_cmd *ibmvfc_init_vfc_cmd(struct ibmvfc_event *evt, struct s
* Returns:
* 0 on success / other on failure
**/
-static int ibmvfc_queuecommand_lck(struct scsi_cmnd *cmnd,
- void (*done) (struct scsi_cmnd *))
+static int ibmvfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd)
{
- struct ibmvfc_host *vhost = shost_priv(cmnd->device->host);
+ struct ibmvfc_host *vhost = shost_priv(shost);
struct fc_rport *rport = starget_to_rport(scsi_target(cmnd->device));
struct ibmvfc_cmd *vfc_cmd;
struct ibmvfc_fcp_cmd_iu *iu;
@@ -1806,7 +1805,7 @@ static int ibmvfc_queuecommand_lck(struct scsi_cmnd *cmnd,
if (unlikely((rc = fc_remote_port_chkready(rport))) ||
unlikely((rc = ibmvfc_host_chkready(vhost)))) {
cmnd->result = rc;
- done(cmnd);
+ cmnd->scsi_done(cmnd);
return 0;
}
@@ -1814,7 +1813,6 @@ static int ibmvfc_queuecommand_lck(struct scsi_cmnd *cmnd,
evt = ibmvfc_get_event(&vhost->crq);
ibmvfc_init_event(evt, ibmvfc_scsi_done, IBMVFC_CMD_FORMAT);
evt->cmnd = cmnd;
- cmnd->scsi_done = done;
vfc_cmd = ibmvfc_init_vfc_cmd(evt, cmnd->device);
iu = ibmvfc_get_fcp_iu(vhost, vfc_cmd);
@@ -1841,12 +1839,10 @@ static int ibmvfc_queuecommand_lck(struct scsi_cmnd *cmnd,
"Failed to map DMA buffer for command. rc=%d\n", rc);
cmnd->result = DID_ERROR << 16;
- done(cmnd);
+ cmnd->scsi_done(cmnd);
return 0;
}
-static DEF_SCSI_QCMD(ibmvfc_queuecommand)
-
/**
* ibmvfc_sync_completion - Signal that a synchronous command has completed
* @evt: ibmvfc event struct
--
2.27.0
^ permalink raw reply related
* [PATCH v2 4/5] ibmvfc: complete commands outside the host/queue lock
From: Tyrel Datwyler @ 2021-01-06 20:18 UTC (permalink / raw)
To: james.bottomley
Cc: Tyrel Datwyler, martin.petersen, linux-scsi, linux-kernel,
Brian King, brking, linuxppc-dev
In-Reply-To: <20210106201835.1053593-1-tyreld@linux.ibm.com>
Drain the command queue and place all commands on a completion list.
Perform command completion on that list outside the host/queue locks.
Further, move purged command compeletions outside the host_lock as well.
Signed-off-by: Tyrel Datwyler <tyreld@linux.ibm.com>
Reviewed-by: Brian King <brking@linux.vnet.ibm.com>
---
drivers/scsi/ibmvscsi/ibmvfc.c | 58 ++++++++++++++++++++++++++--------
drivers/scsi/ibmvscsi/ibmvfc.h | 3 +-
2 files changed, 47 insertions(+), 14 deletions(-)
diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index 69a6401ca504..f680f96d5d06 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -894,7 +894,7 @@ static void ibmvfc_scsi_eh_done(struct ibmvfc_event *evt)
* @purge_list: list head of failed commands
*
* This function runs completions on commands to fail as a result of a
- * host reset or platform migration. Caller must hold host_lock.
+ * host reset or platform migration.
**/
static void ibmvfc_complete_purge(struct list_head *purge_list)
{
@@ -1407,6 +1407,23 @@ static struct ibmvfc_event *ibmvfc_get_event(struct ibmvfc_queue *queue)
return evt;
}
+/**
+ * ibmvfc_locked_done - Calls evt completion with host_lock held
+ * @evt: ibmvfc evt to complete
+ *
+ * All non-scsi command completion callbacks have the expectation that the
+ * host_lock is held. This callback is used by ibmvfc_init_event to wrap a
+ * MAD evt with the host_lock.
+ **/
+static void ibmvfc_locked_done(struct ibmvfc_event *evt)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(evt->vhost->host->host_lock, flags);
+ evt->_done(evt);
+ spin_unlock_irqrestore(evt->vhost->host->host_lock, flags);
+}
+
/**
* ibmvfc_init_event - Initialize fields in an event struct that are always
* required.
@@ -1419,9 +1436,14 @@ static void ibmvfc_init_event(struct ibmvfc_event *evt,
{
evt->cmnd = NULL;
evt->sync_iu = NULL;
- evt->crq.format = format;
- evt->done = done;
evt->eh_comp = NULL;
+ evt->crq.format = format;
+ if (format == IBMVFC_CMD_FORMAT)
+ evt->done = done;
+ else {
+ evt->_done = done;
+ evt->done = ibmvfc_locked_done;
+ }
}
/**
@@ -1640,7 +1662,9 @@ static void ibmvfc_relogin(struct scsi_device *sdev)
struct ibmvfc_host *vhost = shost_priv(sdev->host);
struct fc_rport *rport = starget_to_rport(scsi_target(sdev));
struct ibmvfc_target *tgt;
+ unsigned long flags;
+ spin_lock_irqsave(vhost->host->host_lock, flags);
list_for_each_entry(tgt, &vhost->targets, queue) {
if (rport == tgt->rport) {
ibmvfc_del_tgt(tgt);
@@ -1649,6 +1673,7 @@ static void ibmvfc_relogin(struct scsi_device *sdev)
}
ibmvfc_reinit_host(vhost);
+ spin_unlock_irqrestore(vhost->host->host_lock, flags);
}
/**
@@ -2901,7 +2926,8 @@ static void ibmvfc_handle_async(struct ibmvfc_async_crq *crq,
* @vhost: ibmvfc host struct
*
**/
-static void ibmvfc_handle_crq(struct ibmvfc_crq *crq, struct ibmvfc_host *vhost)
+static void ibmvfc_handle_crq(struct ibmvfc_crq *crq, struct ibmvfc_host *vhost,
+ struct list_head *evt_doneq)
{
long rc;
struct ibmvfc_event *evt = (struct ibmvfc_event *)be64_to_cpu(crq->ioba);
@@ -2972,12 +2998,9 @@ static void ibmvfc_handle_crq(struct ibmvfc_crq *crq, struct ibmvfc_host *vhost)
return;
}
- del_timer(&evt->timer);
spin_lock(&evt->queue->l_lock);
- list_del(&evt->queue_list);
+ list_move_tail(&evt->queue_list, evt_doneq);
spin_unlock(&evt->queue->l_lock);
- ibmvfc_trc_end(evt);
- evt->done(evt);
}
/**
@@ -3364,8 +3387,10 @@ static void ibmvfc_tasklet(void *data)
struct vio_dev *vdev = to_vio_dev(vhost->dev);
struct ibmvfc_crq *crq;
struct ibmvfc_async_crq *async;
+ struct ibmvfc_event *evt, *temp;
unsigned long flags;
int done = 0;
+ LIST_HEAD(evt_doneq);
spin_lock_irqsave(vhost->host->host_lock, flags);
spin_lock(vhost->crq.q_lock);
@@ -3379,7 +3404,7 @@ static void ibmvfc_tasklet(void *data)
/* Pull all the valid messages off the CRQ */
while ((crq = ibmvfc_next_crq(vhost)) != NULL) {
- ibmvfc_handle_crq(crq, vhost);
+ ibmvfc_handle_crq(crq, vhost, &evt_doneq);
crq->valid = 0;
wmb();
}
@@ -3392,7 +3417,7 @@ static void ibmvfc_tasklet(void *data)
wmb();
} else if ((crq = ibmvfc_next_crq(vhost)) != NULL) {
vio_disable_interrupts(vdev);
- ibmvfc_handle_crq(crq, vhost);
+ ibmvfc_handle_crq(crq, vhost, &evt_doneq);
crq->valid = 0;
wmb();
} else
@@ -3401,6 +3426,13 @@ static void ibmvfc_tasklet(void *data)
spin_unlock(vhost->crq.q_lock);
spin_unlock_irqrestore(vhost->host->host_lock, flags);
+
+ list_for_each_entry_safe(evt, temp, &evt_doneq, queue_list) {
+ del_timer(&evt->timer);
+ list_del(&evt->queue_list);
+ ibmvfc_trc_end(evt);
+ evt->done(evt);
+ }
}
/**
@@ -4790,8 +4822,8 @@ static void ibmvfc_do_work(struct ibmvfc_host *vhost)
case IBMVFC_HOST_ACTION_RESET:
vhost->action = IBMVFC_HOST_ACTION_TGT_DEL;
list_splice_init(&vhost->purge, &purge);
- ibmvfc_complete_purge(&purge);
spin_unlock_irqrestore(vhost->host->host_lock, flags);
+ ibmvfc_complete_purge(&purge);
rc = ibmvfc_reset_crq(vhost);
spin_lock_irqsave(vhost->host->host_lock, flags);
if (rc == H_CLOSED)
@@ -4805,8 +4837,8 @@ static void ibmvfc_do_work(struct ibmvfc_host *vhost)
case IBMVFC_HOST_ACTION_REENABLE:
vhost->action = IBMVFC_HOST_ACTION_TGT_DEL;
list_splice_init(&vhost->purge, &purge);
- ibmvfc_complete_purge(&purge);
spin_unlock_irqrestore(vhost->host->host_lock, flags);
+ ibmvfc_complete_purge(&purge);
rc = ibmvfc_reenable_crq_queue(vhost);
spin_lock_irqsave(vhost->host->host_lock, flags);
if (rc || (rc = ibmvfc_send_crq_init(vhost))) {
@@ -5369,8 +5401,8 @@ static int ibmvfc_remove(struct vio_dev *vdev)
spin_lock_irqsave(vhost->host->host_lock, flags);
ibmvfc_purge_requests(vhost, DID_ERROR);
list_splice_init(&vhost->purge, &purge);
- ibmvfc_complete_purge(&purge);
spin_unlock_irqrestore(vhost->host->host_lock, flags);
+ ibmvfc_complete_purge(&purge);
ibmvfc_free_event_pool(vhost, &vhost->crq);
ibmvfc_free_mem(vhost);
diff --git a/drivers/scsi/ibmvscsi/ibmvfc.h b/drivers/scsi/ibmvscsi/ibmvfc.h
index faf5b50d65b9..632e977449c5 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.h
+++ b/drivers/scsi/ibmvscsi/ibmvfc.h
@@ -733,7 +733,8 @@ struct ibmvfc_event {
struct scsi_cmnd *cmnd;
atomic_t free;
union ibmvfc_iu *xfer_iu;
- void (*done) (struct ibmvfc_event *);
+ void (*done)(struct ibmvfc_event *evt);
+ void (*_done)(struct ibmvfc_event *evt);
struct ibmvfc_crq crq;
union ibmvfc_iu iu;
union ibmvfc_iu *sync_iu;
--
2.27.0
^ permalink raw reply related
* [PATCH v2 0/5] ibmvfc: MQ preparatory locking work
From: Tyrel Datwyler @ 2021-01-06 20:18 UTC (permalink / raw)
To: james.bottomley
Cc: Tyrel Datwyler, martin.petersen, linux-scsi, linux-kernel, brking,
linuxppc-dev
The ibmvfc driver in its current form relies heavily on the host_lock. This
patchset introduces a genric queue with its own queue lock and sent/free event
list locks. This generic queue allows the driver to decouple the primary queue
and future subordinate queues from the host lock reducing lock contention while
also relaxing locking for submissions and completions to simply the list lock of
the queue in question.
changes in v2:
* Patch 4: Made ibmvfc_locked_done() static fixing a no-prototype warning
Tyrel Datwyler (5):
ibmvfc: define generic queue structure for CRQs
ibmvfc: make command event pool queue specific
ibmvfc: define per-queue state/list locks
ibmvfc: complete commands outside the host/queue lock
ibmvfc: relax locking around ibmvfc_queuecommand
drivers/scsi/ibmvscsi/ibmvfc.c | 379 ++++++++++++++++++++++-----------
drivers/scsi/ibmvscsi/ibmvfc.h | 54 +++--
2 files changed, 286 insertions(+), 147 deletions(-)
--
2.27.0
^ permalink raw reply
* [PATCH v2 3/5] ibmvfc: define per-queue state/list locks
From: Tyrel Datwyler @ 2021-01-06 20:18 UTC (permalink / raw)
To: james.bottomley
Cc: Tyrel Datwyler, martin.petersen, linux-scsi, linux-kernel,
Brian King, brking, linuxppc-dev
In-Reply-To: <20210106201835.1053593-1-tyreld@linux.ibm.com>
Define per-queue locks for protecting queue state and event pool
sent/free lists. The evt list lock is initially redundant but it allows
the driver to be modified in the follow-up patches to relax the queue
locking around submissions and completions.
Signed-off-by: Tyrel Datwyler <tyreld@linux.ibm.com>
Reviewed-by: Brian King <brking@linux.vnet.ibm.com>
---
drivers/scsi/ibmvscsi/ibmvfc.c | 93 +++++++++++++++++++++++++++-------
drivers/scsi/ibmvscsi/ibmvfc.h | 7 ++-
2 files changed, 80 insertions(+), 20 deletions(-)
diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index 8de2a25b05ee..69a6401ca504 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -176,8 +176,9 @@ static void ibmvfc_trc_start(struct ibmvfc_event *evt)
struct ibmvfc_mad_common *mad = &evt->iu.mad_common;
struct ibmvfc_fcp_cmd_iu *iu = ibmvfc_get_fcp_iu(vhost, vfc_cmd);
struct ibmvfc_trace_entry *entry;
+ int index = atomic_inc_return(&vhost->trace_index) & IBMVFC_TRACE_INDEX_MASK;
- entry = &vhost->trace[vhost->trace_index++];
+ entry = &vhost->trace[index];
entry->evt = evt;
entry->time = jiffies;
entry->fmt = evt->crq.format;
@@ -211,8 +212,10 @@ static void ibmvfc_trc_end(struct ibmvfc_event *evt)
struct ibmvfc_mad_common *mad = &evt->xfer_iu->mad_common;
struct ibmvfc_fcp_cmd_iu *iu = ibmvfc_get_fcp_iu(vhost, vfc_cmd);
struct ibmvfc_fcp_rsp *rsp = ibmvfc_get_fcp_rsp(vhost, vfc_cmd);
- struct ibmvfc_trace_entry *entry = &vhost->trace[vhost->trace_index++];
+ struct ibmvfc_trace_entry *entry;
+ int index = atomic_inc_return(&vhost->trace_index) & IBMVFC_TRACE_INDEX_MASK;
+ entry = &vhost->trace[index];
entry->evt = evt;
entry->time = jiffies;
entry->fmt = evt->crq.format;
@@ -805,6 +808,7 @@ static int ibmvfc_reset_crq(struct ibmvfc_host *vhost)
} while (rc == H_BUSY || H_IS_LONG_BUSY(rc));
spin_lock_irqsave(vhost->host->host_lock, flags);
+ spin_lock(vhost->crq.q_lock);
vhost->state = IBMVFC_NO_CRQ;
vhost->logged_in = 0;
@@ -821,6 +825,7 @@ static int ibmvfc_reset_crq(struct ibmvfc_host *vhost)
dev_warn(vhost->dev, "Partner adapter not ready\n");
else if (rc != 0)
dev_warn(vhost->dev, "Couldn't register crq (rc=%d)\n", rc);
+ spin_unlock(vhost->crq.q_lock);
spin_unlock_irqrestore(vhost->host->host_lock, flags);
return rc;
@@ -853,10 +858,16 @@ static int ibmvfc_valid_event(struct ibmvfc_event_pool *pool,
static void ibmvfc_free_event(struct ibmvfc_event *evt)
{
struct ibmvfc_event_pool *pool = &evt->queue->evt_pool;
+ unsigned long flags;
BUG_ON(!ibmvfc_valid_event(pool, evt));
BUG_ON(atomic_inc_return(&evt->free) != 1);
+
+ spin_lock_irqsave(&evt->queue->l_lock, flags);
list_add_tail(&evt->queue_list, &evt->queue->free);
+ if (evt->eh_comp)
+ complete(evt->eh_comp);
+ spin_unlock_irqrestore(&evt->queue->l_lock, flags);
}
/**
@@ -875,12 +886,27 @@ static void ibmvfc_scsi_eh_done(struct ibmvfc_event *evt)
cmnd->scsi_done(cmnd);
}
- if (evt->eh_comp)
- complete(evt->eh_comp);
-
ibmvfc_free_event(evt);
}
+/**
+ * ibmvfc_complete_purge - Complete failed command list
+ * @purge_list: list head of failed commands
+ *
+ * This function runs completions on commands to fail as a result of a
+ * host reset or platform migration. Caller must hold host_lock.
+ **/
+static void ibmvfc_complete_purge(struct list_head *purge_list)
+{
+ struct ibmvfc_event *evt, *pos;
+
+ list_for_each_entry_safe(evt, pos, purge_list, queue_list) {
+ list_del(&evt->queue_list);
+ ibmvfc_trc_end(evt);
+ evt->done(evt);
+ }
+}
+
/**
* ibmvfc_fail_request - Fail request with specified error code
* @evt: ibmvfc event struct
@@ -897,10 +923,7 @@ static void ibmvfc_fail_request(struct ibmvfc_event *evt, int error_code)
} else
evt->xfer_iu->mad_common.status = cpu_to_be16(IBMVFC_MAD_DRIVER_FAILED);
- list_del(&evt->queue_list);
del_timer(&evt->timer);
- ibmvfc_trc_end(evt);
- evt->done(evt);
}
/**
@@ -914,10 +937,14 @@ static void ibmvfc_fail_request(struct ibmvfc_event *evt, int error_code)
static void ibmvfc_purge_requests(struct ibmvfc_host *vhost, int error_code)
{
struct ibmvfc_event *evt, *pos;
+ unsigned long flags;
ibmvfc_dbg(vhost, "Purging all requests\n");
+ spin_lock_irqsave(&vhost->crq.l_lock, flags);
list_for_each_entry_safe(evt, pos, &vhost->crq.sent, queue_list)
ibmvfc_fail_request(evt, error_code);
+ list_splice_init(&vhost->crq.sent, &vhost->purge);
+ spin_unlock_irqrestore(&vhost->crq.l_lock, flags);
}
/**
@@ -1314,6 +1341,7 @@ static int ibmvfc_init_event_pool(struct ibmvfc_host *vhost,
INIT_LIST_HEAD(&queue->sent);
INIT_LIST_HEAD(&queue->free);
+ spin_lock_init(&queue->l_lock);
for (i = 0; i < pool->size; ++i) {
struct ibmvfc_event *evt = &pool->events[i];
@@ -1368,11 +1396,14 @@ static void ibmvfc_free_event_pool(struct ibmvfc_host *vhost,
static struct ibmvfc_event *ibmvfc_get_event(struct ibmvfc_queue *queue)
{
struct ibmvfc_event *evt;
+ unsigned long flags;
+ spin_lock_irqsave(&queue->l_lock, flags);
BUG_ON(list_empty(&queue->free));
evt = list_entry(queue->free.next, struct ibmvfc_event, queue_list);
atomic_set(&evt->free, 0);
list_del(&evt->queue_list);
+ spin_unlock_irqrestore(&queue->l_lock, flags);
return evt;
}
@@ -1506,6 +1537,7 @@ static int ibmvfc_send_event(struct ibmvfc_event *evt,
struct ibmvfc_host *vhost, unsigned long timeout)
{
__be64 *crq_as_u64 = (__be64 *) &evt->crq;
+ unsigned long flags;
int rc;
/* Copy the IU into the transfer area */
@@ -1517,7 +1549,6 @@ static int ibmvfc_send_event(struct ibmvfc_event *evt,
else
BUG();
- list_add_tail(&evt->queue_list, &evt->queue->sent);
timer_setup(&evt->timer, ibmvfc_timeout, 0);
if (timeout) {
@@ -1525,11 +1556,15 @@ static int ibmvfc_send_event(struct ibmvfc_event *evt,
add_timer(&evt->timer);
}
+ spin_lock_irqsave(&evt->queue->l_lock, flags);
+ list_add_tail(&evt->queue_list, &evt->queue->sent);
+
mb();
if ((rc = ibmvfc_send_crq(vhost, be64_to_cpu(crq_as_u64[0]),
be64_to_cpu(crq_as_u64[1])))) {
list_del(&evt->queue_list);
+ spin_unlock_irqrestore(&evt->queue->l_lock, flags);
del_timer(&evt->timer);
/* If send_crq returns H_CLOSED, return SCSI_MLQUEUE_HOST_BUSY.
@@ -1554,8 +1589,10 @@ static int ibmvfc_send_event(struct ibmvfc_event *evt,
evt->xfer_iu->mad_common.status = cpu_to_be16(IBMVFC_MAD_CRQ_ERROR);
evt->done(evt);
- } else
+ } else {
+ spin_unlock_irqrestore(&evt->queue->l_lock, flags);
ibmvfc_trc_start(evt);
+ }
return 0;
}
@@ -1663,9 +1700,6 @@ static void ibmvfc_scsi_done(struct ibmvfc_event *evt)
cmnd->scsi_done(cmnd);
}
- if (evt->eh_comp)
- complete(evt->eh_comp);
-
ibmvfc_free_event(evt);
}
@@ -2219,28 +2253,28 @@ static int ibmvfc_wait_for_ops(struct ibmvfc_host *vhost, void *device,
ENTER;
do {
wait = 0;
- spin_lock_irqsave(vhost->host->host_lock, flags);
+ spin_lock_irqsave(&vhost->crq.l_lock, flags);
list_for_each_entry(evt, &vhost->crq.sent, queue_list) {
if (match(evt, device)) {
evt->eh_comp = ∁
wait++;
}
}
- spin_unlock_irqrestore(vhost->host->host_lock, flags);
+ spin_unlock_irqrestore(&vhost->crq.l_lock, flags);
if (wait) {
timeout = wait_for_completion_timeout(&comp, timeout);
if (!timeout) {
wait = 0;
- spin_lock_irqsave(vhost->host->host_lock, flags);
+ spin_lock_irqsave(&vhost->crq.l_lock, flags);
list_for_each_entry(evt, &vhost->crq.sent, queue_list) {
if (match(evt, device)) {
evt->eh_comp = NULL;
wait++;
}
}
- spin_unlock_irqrestore(vhost->host->host_lock, flags);
+ spin_unlock_irqrestore(&vhost->crq.l_lock, flags);
if (wait)
dev_err(vhost->dev, "Timed out waiting for aborted commands\n");
LEAVE;
@@ -2277,14 +2311,16 @@ static int ibmvfc_cancel_all(struct scsi_device *sdev, int type)
u16 status;
ENTER;
- spin_lock_irqsave(vhost->host->host_lock, flags);
found_evt = NULL;
+ spin_lock_irqsave(vhost->host->host_lock, flags);
+ spin_lock(&vhost->crq.l_lock);
list_for_each_entry(evt, &vhost->crq.sent, queue_list) {
if (evt->cmnd && evt->cmnd->device == sdev) {
found_evt = evt;
break;
}
}
+ spin_unlock(&vhost->crq.l_lock);
if (!found_evt) {
if (vhost->log_level > IBMVFC_DEFAULT_LOG_LEVEL)
@@ -2414,14 +2450,16 @@ static int ibmvfc_abort_task_set(struct scsi_device *sdev)
unsigned long flags, timeout = IBMVFC_ABORT_TIMEOUT;
int rsp_code = 0;
- spin_lock_irqsave(vhost->host->host_lock, flags);
found_evt = NULL;
+ spin_lock_irqsave(vhost->host->host_lock, flags);
+ spin_lock(&vhost->crq.l_lock);
list_for_each_entry(evt, &vhost->crq.sent, queue_list) {
if (evt->cmnd && evt->cmnd->device == sdev) {
found_evt = evt;
break;
}
}
+ spin_unlock(&vhost->crq.l_lock);
if (!found_evt) {
if (vhost->log_level > IBMVFC_DEFAULT_LOG_LEVEL)
@@ -2935,7 +2973,9 @@ static void ibmvfc_handle_crq(struct ibmvfc_crq *crq, struct ibmvfc_host *vhost)
}
del_timer(&evt->timer);
+ spin_lock(&evt->queue->l_lock);
list_del(&evt->queue_list);
+ spin_unlock(&evt->queue->l_lock);
ibmvfc_trc_end(evt);
evt->done(evt);
}
@@ -3328,6 +3368,7 @@ static void ibmvfc_tasklet(void *data)
int done = 0;
spin_lock_irqsave(vhost->host->host_lock, flags);
+ spin_lock(vhost->crq.q_lock);
while (!done) {
/* Pull all the valid messages off the async CRQ */
while ((async = ibmvfc_next_async_crq(vhost)) != NULL) {
@@ -3358,6 +3399,7 @@ static void ibmvfc_tasklet(void *data)
done = 1;
}
+ spin_unlock(vhost->crq.q_lock);
spin_unlock_irqrestore(vhost->host->host_lock, flags);
}
@@ -4734,6 +4776,7 @@ static void ibmvfc_do_work(struct ibmvfc_host *vhost)
struct ibmvfc_target *tgt;
unsigned long flags;
struct fc_rport *rport;
+ LIST_HEAD(purge);
int rc;
ibmvfc_log_ae(vhost, vhost->events_to_log);
@@ -4746,6 +4789,8 @@ static void ibmvfc_do_work(struct ibmvfc_host *vhost)
break;
case IBMVFC_HOST_ACTION_RESET:
vhost->action = IBMVFC_HOST_ACTION_TGT_DEL;
+ list_splice_init(&vhost->purge, &purge);
+ ibmvfc_complete_purge(&purge);
spin_unlock_irqrestore(vhost->host->host_lock, flags);
rc = ibmvfc_reset_crq(vhost);
spin_lock_irqsave(vhost->host->host_lock, flags);
@@ -4759,6 +4804,8 @@ static void ibmvfc_do_work(struct ibmvfc_host *vhost)
break;
case IBMVFC_HOST_ACTION_REENABLE:
vhost->action = IBMVFC_HOST_ACTION_TGT_DEL;
+ list_splice_init(&vhost->purge, &purge);
+ ibmvfc_complete_purge(&purge);
spin_unlock_irqrestore(vhost->host->host_lock, flags);
rc = ibmvfc_reenable_crq_queue(vhost);
spin_lock_irqsave(vhost->host->host_lock, flags);
@@ -4936,6 +4983,9 @@ static int ibmvfc_alloc_queue(struct ibmvfc_host *vhost,
size_t fmt_size;
ENTER;
+ spin_lock_init(&queue->_lock);
+ queue->q_lock = &queue->_lock;
+
switch (fmt) {
case IBMVFC_CRQ_FMT:
fmt_size = sizeof(*queue->msgs.crq);
@@ -5098,6 +5148,7 @@ static int ibmvfc_alloc_mem(struct ibmvfc_host *vhost)
vhost->trace = kcalloc(IBMVFC_NUM_TRACE_ENTRIES,
sizeof(struct ibmvfc_trace_entry), GFP_KERNEL);
+ atomic_set(&vhost->trace_index, -1);
if (!vhost->trace)
goto free_disc_buffer;
@@ -5214,6 +5265,7 @@ static int ibmvfc_probe(struct vio_dev *vdev, const struct vio_device_id *id)
vhost = shost_priv(shost);
INIT_LIST_HEAD(&vhost->targets);
+ INIT_LIST_HEAD(&vhost->purge);
sprintf(vhost->name, IBMVFC_NAME);
vhost->host = shost;
vhost->dev = dev;
@@ -5298,6 +5350,7 @@ static int ibmvfc_probe(struct vio_dev *vdev, const struct vio_device_id *id)
static int ibmvfc_remove(struct vio_dev *vdev)
{
struct ibmvfc_host *vhost = dev_get_drvdata(&vdev->dev);
+ LIST_HEAD(purge);
unsigned long flags;
ENTER;
@@ -5315,6 +5368,8 @@ static int ibmvfc_remove(struct vio_dev *vdev)
spin_lock_irqsave(vhost->host->host_lock, flags);
ibmvfc_purge_requests(vhost, DID_ERROR);
+ list_splice_init(&vhost->purge, &purge);
+ ibmvfc_complete_purge(&purge);
spin_unlock_irqrestore(vhost->host->host_lock, flags);
ibmvfc_free_event_pool(vhost, &vhost->crq);
diff --git a/drivers/scsi/ibmvscsi/ibmvfc.h b/drivers/scsi/ibmvscsi/ibmvfc.h
index 61c73b6f7a77..faf5b50d65b9 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.h
+++ b/drivers/scsi/ibmvscsi/ibmvfc.h
@@ -768,10 +768,13 @@ struct ibmvfc_queue {
dma_addr_t msg_token;
enum ibmvfc_msg_fmt fmt;
int size, cur;
+ spinlock_t _lock;
+ spinlock_t *q_lock;
struct ibmvfc_event_pool evt_pool;
struct list_head sent;
struct list_head free;
+ spinlock_t l_lock;
};
enum ibmvfc_host_action {
@@ -808,11 +811,13 @@ struct ibmvfc_host {
enum ibmvfc_host_action action;
#define IBMVFC_NUM_TRACE_INDEX_BITS 8
#define IBMVFC_NUM_TRACE_ENTRIES (1 << IBMVFC_NUM_TRACE_INDEX_BITS)
+#define IBMVFC_TRACE_INDEX_MASK (IBMVFC_NUM_TRACE_ENTRIES - 1)
#define IBMVFC_TRACE_SIZE (sizeof(struct ibmvfc_trace_entry) * IBMVFC_NUM_TRACE_ENTRIES)
struct ibmvfc_trace_entry *trace;
- u32 trace_index:IBMVFC_NUM_TRACE_INDEX_BITS;
+ atomic_t trace_index;
int num_targets;
struct list_head targets;
+ struct list_head purge;
struct device *dev;
struct dma_pool *sg_pool;
mempool_t *tgt_pool;
--
2.27.0
^ permalink raw reply related
* [PATCH v2 2/5] ibmvfc: make command event pool queue specific
From: Tyrel Datwyler @ 2021-01-06 20:18 UTC (permalink / raw)
To: james.bottomley
Cc: Tyrel Datwyler, martin.petersen, linux-scsi, linux-kernel,
Brian King, brking, linuxppc-dev
In-Reply-To: <20210106201835.1053593-1-tyreld@linux.ibm.com>
There is currently a single command event pool per host. In anticipation
of providing multiple queues add a per-queue event pool definition and
reimplement the existing CRQ to use its queue defined event pool for
command submission and completion.
Signed-off-by: Tyrel Datwyler <tyreld@linux.ibm.com>
Reviewed-by: Brian King <brking@linux.vnet.ibm.com>
---
drivers/scsi/ibmvscsi/ibmvfc.c | 95 ++++++++++++++++++----------------
drivers/scsi/ibmvscsi/ibmvfc.h | 10 ++--
2 files changed, 55 insertions(+), 50 deletions(-)
diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index c8e7c4701ac4..8de2a25b05ee 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -852,12 +852,11 @@ static int ibmvfc_valid_event(struct ibmvfc_event_pool *pool,
**/
static void ibmvfc_free_event(struct ibmvfc_event *evt)
{
- struct ibmvfc_host *vhost = evt->vhost;
- struct ibmvfc_event_pool *pool = &vhost->pool;
+ struct ibmvfc_event_pool *pool = &evt->queue->evt_pool;
BUG_ON(!ibmvfc_valid_event(pool, evt));
BUG_ON(atomic_inc_return(&evt->free) != 1);
- list_add_tail(&evt->queue, &vhost->free);
+ list_add_tail(&evt->queue_list, &evt->queue->free);
}
/**
@@ -898,7 +897,7 @@ static void ibmvfc_fail_request(struct ibmvfc_event *evt, int error_code)
} else
evt->xfer_iu->mad_common.status = cpu_to_be16(IBMVFC_MAD_DRIVER_FAILED);
- list_del(&evt->queue);
+ list_del(&evt->queue_list);
del_timer(&evt->timer);
ibmvfc_trc_end(evt);
evt->done(evt);
@@ -917,7 +916,7 @@ static void ibmvfc_purge_requests(struct ibmvfc_host *vhost, int error_code)
struct ibmvfc_event *evt, *pos;
ibmvfc_dbg(vhost, "Purging all requests\n");
- list_for_each_entry_safe(evt, pos, &vhost->sent, queue)
+ list_for_each_entry_safe(evt, pos, &vhost->crq.sent, queue_list)
ibmvfc_fail_request(evt, error_code);
}
@@ -1292,10 +1291,11 @@ static void ibmvfc_set_login_info(struct ibmvfc_host *vhost)
*
* Returns zero on success.
**/
-static int ibmvfc_init_event_pool(struct ibmvfc_host *vhost)
+static int ibmvfc_init_event_pool(struct ibmvfc_host *vhost,
+ struct ibmvfc_queue *queue)
{
int i;
- struct ibmvfc_event_pool *pool = &vhost->pool;
+ struct ibmvfc_event_pool *pool = &queue->evt_pool;
ENTER;
pool->size = max_requests + IBMVFC_NUM_INTERNAL_REQ;
@@ -1312,6 +1312,9 @@ static int ibmvfc_init_event_pool(struct ibmvfc_host *vhost)
return -ENOMEM;
}
+ INIT_LIST_HEAD(&queue->sent);
+ INIT_LIST_HEAD(&queue->free);
+
for (i = 0; i < pool->size; ++i) {
struct ibmvfc_event *evt = &pool->events[i];
atomic_set(&evt->free, 1);
@@ -1319,8 +1322,9 @@ static int ibmvfc_init_event_pool(struct ibmvfc_host *vhost)
evt->crq.ioba = cpu_to_be64(pool->iu_token + (sizeof(*evt->xfer_iu) * i));
evt->xfer_iu = pool->iu_storage + i;
evt->vhost = vhost;
+ evt->queue = queue;
evt->ext_list = NULL;
- list_add_tail(&evt->queue, &vhost->free);
+ list_add_tail(&evt->queue_list, &queue->free);
}
LEAVE;
@@ -1332,14 +1336,15 @@ static int ibmvfc_init_event_pool(struct ibmvfc_host *vhost)
* @vhost: ibmvfc host who owns the event pool
*
**/
-static void ibmvfc_free_event_pool(struct ibmvfc_host *vhost)
+static void ibmvfc_free_event_pool(struct ibmvfc_host *vhost,
+ struct ibmvfc_queue *queue)
{
int i;
- struct ibmvfc_event_pool *pool = &vhost->pool;
+ struct ibmvfc_event_pool *pool = &queue->evt_pool;
ENTER;
for (i = 0; i < pool->size; ++i) {
- list_del(&pool->events[i].queue);
+ list_del(&pool->events[i].queue_list);
BUG_ON(atomic_read(&pool->events[i].free) != 1);
if (pool->events[i].ext_list)
dma_pool_free(vhost->sg_pool,
@@ -1360,14 +1365,14 @@ static void ibmvfc_free_event_pool(struct ibmvfc_host *vhost)
*
* Returns a free event from the pool.
**/
-static struct ibmvfc_event *ibmvfc_get_event(struct ibmvfc_host *vhost)
+static struct ibmvfc_event *ibmvfc_get_event(struct ibmvfc_queue *queue)
{
struct ibmvfc_event *evt;
- BUG_ON(list_empty(&vhost->free));
- evt = list_entry(vhost->free.next, struct ibmvfc_event, queue);
+ BUG_ON(list_empty(&queue->free));
+ evt = list_entry(queue->free.next, struct ibmvfc_event, queue_list);
atomic_set(&evt->free, 0);
- list_del(&evt->queue);
+ list_del(&evt->queue_list);
return evt;
}
@@ -1512,7 +1517,7 @@ static int ibmvfc_send_event(struct ibmvfc_event *evt,
else
BUG();
- list_add_tail(&evt->queue, &vhost->sent);
+ list_add_tail(&evt->queue_list, &evt->queue->sent);
timer_setup(&evt->timer, ibmvfc_timeout, 0);
if (timeout) {
@@ -1524,7 +1529,7 @@ static int ibmvfc_send_event(struct ibmvfc_event *evt,
if ((rc = ibmvfc_send_crq(vhost, be64_to_cpu(crq_as_u64[0]),
be64_to_cpu(crq_as_u64[1])))) {
- list_del(&evt->queue);
+ list_del(&evt->queue_list);
del_timer(&evt->timer);
/* If send_crq returns H_CLOSED, return SCSI_MLQUEUE_HOST_BUSY.
@@ -1747,7 +1752,7 @@ static int ibmvfc_queuecommand_lck(struct scsi_cmnd *cmnd,
}
cmnd->result = (DID_OK << 16);
- evt = ibmvfc_get_event(vhost);
+ evt = ibmvfc_get_event(&vhost->crq);
ibmvfc_init_event(evt, ibmvfc_scsi_done, IBMVFC_CMD_FORMAT);
evt->cmnd = cmnd;
cmnd->scsi_done = done;
@@ -1836,7 +1841,7 @@ static int ibmvfc_bsg_timeout(struct bsg_job *job)
}
vhost->aborting_passthru = 1;
- evt = ibmvfc_get_event(vhost);
+ evt = ibmvfc_get_event(&vhost->crq);
ibmvfc_init_event(evt, ibmvfc_bsg_timeout_done, IBMVFC_MAD_FORMAT);
tmf = &evt->iu.tmf;
@@ -1894,7 +1899,7 @@ static int ibmvfc_bsg_plogi(struct ibmvfc_host *vhost, unsigned int port_id)
if (unlikely((rc = ibmvfc_host_chkready(vhost))))
goto unlock_out;
- evt = ibmvfc_get_event(vhost);
+ evt = ibmvfc_get_event(&vhost->crq);
ibmvfc_init_event(evt, ibmvfc_sync_completion, IBMVFC_MAD_FORMAT);
plogi = &evt->iu.plogi;
memset(plogi, 0, sizeof(*plogi));
@@ -2012,7 +2017,7 @@ static int ibmvfc_bsg_request(struct bsg_job *job)
goto out;
}
- evt = ibmvfc_get_event(vhost);
+ evt = ibmvfc_get_event(&vhost->crq);
ibmvfc_init_event(evt, ibmvfc_sync_completion, IBMVFC_MAD_FORMAT);
mad = &evt->iu.passthru;
@@ -2096,7 +2101,7 @@ static int ibmvfc_reset_device(struct scsi_device *sdev, int type, char *desc)
spin_lock_irqsave(vhost->host->host_lock, flags);
if (vhost->state == IBMVFC_ACTIVE) {
- evt = ibmvfc_get_event(vhost);
+ evt = ibmvfc_get_event(&vhost->crq);
ibmvfc_init_event(evt, ibmvfc_sync_completion, IBMVFC_CMD_FORMAT);
tmf = ibmvfc_init_vfc_cmd(evt, sdev);
iu = ibmvfc_get_fcp_iu(vhost, tmf);
@@ -2215,7 +2220,7 @@ static int ibmvfc_wait_for_ops(struct ibmvfc_host *vhost, void *device,
do {
wait = 0;
spin_lock_irqsave(vhost->host->host_lock, flags);
- list_for_each_entry(evt, &vhost->sent, queue) {
+ list_for_each_entry(evt, &vhost->crq.sent, queue_list) {
if (match(evt, device)) {
evt->eh_comp = ∁
wait++;
@@ -2229,7 +2234,7 @@ static int ibmvfc_wait_for_ops(struct ibmvfc_host *vhost, void *device,
if (!timeout) {
wait = 0;
spin_lock_irqsave(vhost->host->host_lock, flags);
- list_for_each_entry(evt, &vhost->sent, queue) {
+ list_for_each_entry(evt, &vhost->crq.sent, queue_list) {
if (match(evt, device)) {
evt->eh_comp = NULL;
wait++;
@@ -2274,7 +2279,7 @@ static int ibmvfc_cancel_all(struct scsi_device *sdev, int type)
ENTER;
spin_lock_irqsave(vhost->host->host_lock, flags);
found_evt = NULL;
- list_for_each_entry(evt, &vhost->sent, queue) {
+ list_for_each_entry(evt, &vhost->crq.sent, queue_list) {
if (evt->cmnd && evt->cmnd->device == sdev) {
found_evt = evt;
break;
@@ -2289,7 +2294,7 @@ static int ibmvfc_cancel_all(struct scsi_device *sdev, int type)
}
if (vhost->logged_in) {
- evt = ibmvfc_get_event(vhost);
+ evt = ibmvfc_get_event(&vhost->crq);
ibmvfc_init_event(evt, ibmvfc_sync_completion, IBMVFC_MAD_FORMAT);
tmf = &evt->iu.tmf;
@@ -2411,7 +2416,7 @@ static int ibmvfc_abort_task_set(struct scsi_device *sdev)
spin_lock_irqsave(vhost->host->host_lock, flags);
found_evt = NULL;
- list_for_each_entry(evt, &vhost->sent, queue) {
+ list_for_each_entry(evt, &vhost->crq.sent, queue_list) {
if (evt->cmnd && evt->cmnd->device == sdev) {
found_evt = evt;
break;
@@ -2426,7 +2431,7 @@ static int ibmvfc_abort_task_set(struct scsi_device *sdev)
}
if (vhost->state == IBMVFC_ACTIVE) {
- evt = ibmvfc_get_event(vhost);
+ evt = ibmvfc_get_event(&vhost->crq);
ibmvfc_init_event(evt, ibmvfc_sync_completion, IBMVFC_CMD_FORMAT);
tmf = ibmvfc_init_vfc_cmd(evt, sdev);
iu = ibmvfc_get_fcp_iu(vhost, tmf);
@@ -2917,7 +2922,7 @@ static void ibmvfc_handle_crq(struct ibmvfc_crq *crq, struct ibmvfc_host *vhost)
* things we send. Make sure this response is to something we
* actually sent
*/
- if (unlikely(!ibmvfc_valid_event(&vhost->pool, evt))) {
+ if (unlikely(!ibmvfc_valid_event(&vhost->crq.evt_pool, evt))) {
dev_err(vhost->dev, "Returned correlation_token 0x%08llx is invalid!\n",
crq->ioba);
return;
@@ -2930,7 +2935,7 @@ static void ibmvfc_handle_crq(struct ibmvfc_crq *crq, struct ibmvfc_host *vhost)
}
del_timer(&evt->timer);
- list_del(&evt->queue);
+ list_del(&evt->queue_list);
ibmvfc_trc_end(evt);
evt->done(evt);
}
@@ -3508,7 +3513,7 @@ static void ibmvfc_tgt_send_prli(struct ibmvfc_target *tgt)
return;
kref_get(&tgt->kref);
- evt = ibmvfc_get_event(vhost);
+ evt = ibmvfc_get_event(&vhost->crq);
vhost->discovery_threads++;
ibmvfc_init_event(evt, ibmvfc_tgt_prli_done, IBMVFC_MAD_FORMAT);
evt->tgt = tgt;
@@ -3615,7 +3620,7 @@ static void ibmvfc_tgt_send_plogi(struct ibmvfc_target *tgt)
kref_get(&tgt->kref);
tgt->logo_rcvd = 0;
- evt = ibmvfc_get_event(vhost);
+ evt = ibmvfc_get_event(&vhost->crq);
vhost->discovery_threads++;
ibmvfc_set_tgt_action(tgt, IBMVFC_TGT_ACTION_INIT_WAIT);
ibmvfc_init_event(evt, ibmvfc_tgt_plogi_done, IBMVFC_MAD_FORMAT);
@@ -3690,7 +3695,7 @@ static struct ibmvfc_event *__ibmvfc_tgt_get_implicit_logout_evt(struct ibmvfc_t
struct ibmvfc_event *evt;
kref_get(&tgt->kref);
- evt = ibmvfc_get_event(vhost);
+ evt = ibmvfc_get_event(&vhost->crq);
ibmvfc_init_event(evt, done, IBMVFC_MAD_FORMAT);
evt->tgt = tgt;
mad = &evt->iu.implicit_logout;
@@ -3855,7 +3860,7 @@ static void ibmvfc_tgt_move_login(struct ibmvfc_target *tgt)
return;
kref_get(&tgt->kref);
- evt = ibmvfc_get_event(vhost);
+ evt = ibmvfc_get_event(&vhost->crq);
vhost->discovery_threads++;
ibmvfc_set_tgt_action(tgt, IBMVFC_TGT_ACTION_INIT_WAIT);
ibmvfc_init_event(evt, ibmvfc_tgt_move_login_done, IBMVFC_MAD_FORMAT);
@@ -4021,7 +4026,7 @@ static void ibmvfc_adisc_timeout(struct timer_list *t)
vhost->abort_threads++;
kref_get(&tgt->kref);
- evt = ibmvfc_get_event(vhost);
+ evt = ibmvfc_get_event(&vhost->crq);
ibmvfc_init_event(evt, ibmvfc_tgt_adisc_cancel_done, IBMVFC_MAD_FORMAT);
evt->tgt = tgt;
@@ -4071,7 +4076,7 @@ static void ibmvfc_tgt_adisc(struct ibmvfc_target *tgt)
return;
kref_get(&tgt->kref);
- evt = ibmvfc_get_event(vhost);
+ evt = ibmvfc_get_event(&vhost->crq);
vhost->discovery_threads++;
ibmvfc_init_event(evt, ibmvfc_tgt_adisc_done, IBMVFC_MAD_FORMAT);
evt->tgt = tgt;
@@ -4174,7 +4179,7 @@ static void ibmvfc_tgt_query_target(struct ibmvfc_target *tgt)
return;
kref_get(&tgt->kref);
- evt = ibmvfc_get_event(vhost);
+ evt = ibmvfc_get_event(&vhost->crq);
vhost->discovery_threads++;
evt->tgt = tgt;
ibmvfc_init_event(evt, ibmvfc_tgt_query_target_done, IBMVFC_MAD_FORMAT);
@@ -4341,7 +4346,7 @@ static void ibmvfc_discover_targets_done(struct ibmvfc_event *evt)
static void ibmvfc_discover_targets(struct ibmvfc_host *vhost)
{
struct ibmvfc_discover_targets *mad;
- struct ibmvfc_event *evt = ibmvfc_get_event(vhost);
+ struct ibmvfc_event *evt = ibmvfc_get_event(&vhost->crq);
ibmvfc_init_event(evt, ibmvfc_discover_targets_done, IBMVFC_MAD_FORMAT);
mad = &evt->iu.discover_targets;
@@ -4454,7 +4459,7 @@ static void ibmvfc_npiv_login_done(struct ibmvfc_event *evt)
static void ibmvfc_npiv_login(struct ibmvfc_host *vhost)
{
struct ibmvfc_npiv_login_mad *mad;
- struct ibmvfc_event *evt = ibmvfc_get_event(vhost);
+ struct ibmvfc_event *evt = ibmvfc_get_event(&vhost->crq);
ibmvfc_gather_partition_info(vhost);
ibmvfc_set_login_info(vhost);
@@ -4491,7 +4496,7 @@ static void ibmvfc_npiv_logout_done(struct ibmvfc_event *evt)
switch (mad_status) {
case IBMVFC_MAD_SUCCESS:
- if (list_empty(&vhost->sent) &&
+ if (list_empty(&vhost->crq.sent) &&
vhost->action == IBMVFC_HOST_ACTION_LOGO_WAIT) {
ibmvfc_init_host(vhost);
return;
@@ -4519,7 +4524,7 @@ static void ibmvfc_npiv_logout(struct ibmvfc_host *vhost)
struct ibmvfc_npiv_logout_mad *mad;
struct ibmvfc_event *evt;
- evt = ibmvfc_get_event(vhost);
+ evt = ibmvfc_get_event(&vhost->crq);
ibmvfc_init_event(evt, ibmvfc_npiv_logout_done, IBMVFC_MAD_FORMAT);
mad = &evt->iu.npiv_logout;
@@ -5208,8 +5213,6 @@ static int ibmvfc_probe(struct vio_dev *vdev, const struct vio_device_id *id)
shost->unique_id = shost->host_no;
vhost = shost_priv(shost);
- INIT_LIST_HEAD(&vhost->sent);
- INIT_LIST_HEAD(&vhost->free);
INIT_LIST_HEAD(&vhost->targets);
sprintf(vhost->name, IBMVFC_NAME);
vhost->host = shost;
@@ -5241,7 +5244,7 @@ static int ibmvfc_probe(struct vio_dev *vdev, const struct vio_device_id *id)
goto kill_kthread;
}
- if ((rc = ibmvfc_init_event_pool(vhost))) {
+ if ((rc = ibmvfc_init_event_pool(vhost, &vhost->crq))) {
dev_err(dev, "Couldn't initialize event pool. rc=%d\n", rc);
goto release_crq;
}
@@ -5271,7 +5274,7 @@ static int ibmvfc_probe(struct vio_dev *vdev, const struct vio_device_id *id)
remove_shost:
scsi_remove_host(shost);
release_event_pool:
- ibmvfc_free_event_pool(vhost);
+ ibmvfc_free_event_pool(vhost, &vhost->crq);
release_crq:
ibmvfc_release_crq_queue(vhost);
kill_kthread:
@@ -5313,7 +5316,7 @@ static int ibmvfc_remove(struct vio_dev *vdev)
spin_lock_irqsave(vhost->host->host_lock, flags);
ibmvfc_purge_requests(vhost, DID_ERROR);
spin_unlock_irqrestore(vhost->host->host_lock, flags);
- ibmvfc_free_event_pool(vhost);
+ ibmvfc_free_event_pool(vhost, &vhost->crq);
ibmvfc_free_mem(vhost);
spin_lock(&ibmvfc_driver_lock);
diff --git a/drivers/scsi/ibmvscsi/ibmvfc.h b/drivers/scsi/ibmvscsi/ibmvfc.h
index 5bf1621223d6..61c73b6f7a77 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.h
+++ b/drivers/scsi/ibmvscsi/ibmvfc.h
@@ -726,8 +726,9 @@ struct ibmvfc_target {
/* a unit of work for the hosting partition */
struct ibmvfc_event {
- struct list_head queue;
+ struct list_head queue_list;
struct ibmvfc_host *vhost;
+ struct ibmvfc_queue *queue;
struct ibmvfc_target *tgt;
struct scsi_cmnd *cmnd;
atomic_t free;
@@ -767,6 +768,10 @@ struct ibmvfc_queue {
dma_addr_t msg_token;
enum ibmvfc_msg_fmt fmt;
int size, cur;
+
+ struct ibmvfc_event_pool evt_pool;
+ struct list_head sent;
+ struct list_head free;
};
enum ibmvfc_host_action {
@@ -808,10 +813,7 @@ struct ibmvfc_host {
u32 trace_index:IBMVFC_NUM_TRACE_INDEX_BITS;
int num_targets;
struct list_head targets;
- struct list_head sent;
- struct list_head free;
struct device *dev;
- struct ibmvfc_event_pool pool;
struct dma_pool *sg_pool;
mempool_t *tgt_pool;
struct ibmvfc_queue crq;
--
2.27.0
^ permalink raw reply related
* [PATCH v2 1/5] ibmvfc: define generic queue structure for CRQs
From: Tyrel Datwyler @ 2021-01-06 20:18 UTC (permalink / raw)
To: james.bottomley
Cc: Tyrel Datwyler, martin.petersen, linux-scsi, linux-kernel,
Brian King, brking, linuxppc-dev
In-Reply-To: <20210106201835.1053593-1-tyreld@linux.ibm.com>
The primary and async CRQs are nearly identical outside of the format
and length of each message entry in the dma mapped page that represents
the queue data. These queues can be represented with a generic queue
structure that uses a union to differentiate between message format of
the mapped page.
This structure will further be leveraged in a followup patcheset that
introduce Sub-CRQs.
Signed-off-by: Tyrel Datwyler <tyreld@linux.ibm.com>
Reviewed-by: Brian King <brking@linux.vnet.ibm.com>
---
drivers/scsi/ibmvscsi/ibmvfc.c | 135 +++++++++++++++++++++------------
drivers/scsi/ibmvscsi/ibmvfc.h | 34 +++++----
2 files changed, 107 insertions(+), 62 deletions(-)
diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index 42e4d35e0d35..c8e7c4701ac4 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -660,7 +660,7 @@ static void ibmvfc_init_host(struct ibmvfc_host *vhost)
}
if (!ibmvfc_set_host_state(vhost, IBMVFC_INITIALIZING)) {
- memset(vhost->async_crq.msgs, 0, PAGE_SIZE);
+ memset(vhost->async_crq.msgs.async, 0, PAGE_SIZE);
vhost->async_crq.cur = 0;
list_for_each_entry(tgt, &vhost->targets, queue)
@@ -713,6 +713,23 @@ static int ibmvfc_send_crq_init_complete(struct ibmvfc_host *vhost)
return ibmvfc_send_crq(vhost, 0xC002000000000000LL, 0);
}
+/**
+ * ibmvfc_free_queue - Deallocate queue
+ * @vhost: ibmvfc host struct
+ * @queue: ibmvfc queue struct
+ *
+ * Unmaps dma and deallocates page for messages
+ **/
+static void ibmvfc_free_queue(struct ibmvfc_host *vhost,
+ struct ibmvfc_queue *queue)
+{
+ struct device *dev = vhost->dev;
+
+ dma_unmap_single(dev, queue->msg_token, PAGE_SIZE, DMA_BIDIRECTIONAL);
+ free_page((unsigned long)queue->msgs.handle);
+ queue->msgs.handle = NULL;
+}
+
/**
* ibmvfc_release_crq_queue - Deallocates data and unregisters CRQ
* @vhost: ibmvfc host struct
@@ -724,7 +741,7 @@ static void ibmvfc_release_crq_queue(struct ibmvfc_host *vhost)
{
long rc = 0;
struct vio_dev *vdev = to_vio_dev(vhost->dev);
- struct ibmvfc_crq_queue *crq = &vhost->crq;
+ struct ibmvfc_queue *crq = &vhost->crq;
ibmvfc_dbg(vhost, "Releasing CRQ\n");
free_irq(vdev->irq, vhost);
@@ -737,8 +754,8 @@ static void ibmvfc_release_crq_queue(struct ibmvfc_host *vhost)
vhost->state = IBMVFC_NO_CRQ;
vhost->logged_in = 0;
- dma_unmap_single(vhost->dev, crq->msg_token, PAGE_SIZE, DMA_BIDIRECTIONAL);
- free_page((unsigned long)crq->msgs);
+
+ ibmvfc_free_queue(vhost, crq);
}
/**
@@ -778,7 +795,7 @@ static int ibmvfc_reset_crq(struct ibmvfc_host *vhost)
int rc = 0;
unsigned long flags;
struct vio_dev *vdev = to_vio_dev(vhost->dev);
- struct ibmvfc_crq_queue *crq = &vhost->crq;
+ struct ibmvfc_queue *crq = &vhost->crq;
/* Close the CRQ */
do {
@@ -792,7 +809,7 @@ static int ibmvfc_reset_crq(struct ibmvfc_host *vhost)
vhost->logged_in = 0;
/* Clean out the queue */
- memset(crq->msgs, 0, PAGE_SIZE);
+ memset(crq->msgs.crq, 0, PAGE_SIZE);
crq->cur = 0;
/* And re-open it again */
@@ -1238,6 +1255,7 @@ static void ibmvfc_gather_partition_info(struct ibmvfc_host *vhost)
static void ibmvfc_set_login_info(struct ibmvfc_host *vhost)
{
struct ibmvfc_npiv_login *login_info = &vhost->login_info;
+ struct ibmvfc_queue *async_crq = &vhost->async_crq;
struct device_node *of_node = vhost->dev->of_node;
const char *location;
@@ -1257,7 +1275,8 @@ static void ibmvfc_set_login_info(struct ibmvfc_host *vhost)
login_info->max_cmds = cpu_to_be32(max_requests + IBMVFC_NUM_INTERNAL_REQ);
login_info->capabilities = cpu_to_be64(IBMVFC_CAN_MIGRATE | IBMVFC_CAN_SEND_VF_WWPN);
login_info->async.va = cpu_to_be64(vhost->async_crq.msg_token);
- login_info->async.len = cpu_to_be32(vhost->async_crq.size * sizeof(*vhost->async_crq.msgs));
+ login_info->async.len = cpu_to_be32(async_crq->size *
+ sizeof(*async_crq->msgs.async));
strncpy(login_info->partition_name, vhost->partition_name, IBMVFC_MAX_NAME);
strncpy(login_info->device_name,
dev_name(&vhost->host->shost_gendev), IBMVFC_MAX_NAME);
@@ -3230,10 +3249,10 @@ static struct scsi_host_template driver_template = {
**/
static struct ibmvfc_async_crq *ibmvfc_next_async_crq(struct ibmvfc_host *vhost)
{
- struct ibmvfc_async_crq_queue *async_crq = &vhost->async_crq;
+ struct ibmvfc_queue *async_crq = &vhost->async_crq;
struct ibmvfc_async_crq *crq;
- crq = &async_crq->msgs[async_crq->cur];
+ crq = &async_crq->msgs.async[async_crq->cur];
if (crq->valid & 0x80) {
if (++async_crq->cur == async_crq->size)
async_crq->cur = 0;
@@ -3253,10 +3272,10 @@ static struct ibmvfc_async_crq *ibmvfc_next_async_crq(struct ibmvfc_host *vhost)
**/
static struct ibmvfc_crq *ibmvfc_next_crq(struct ibmvfc_host *vhost)
{
- struct ibmvfc_crq_queue *queue = &vhost->crq;
+ struct ibmvfc_queue *queue = &vhost->crq;
struct ibmvfc_crq *crq;
- crq = &queue->msgs[queue->cur];
+ crq = &queue->msgs.crq[queue->cur];
if (crq->valid & 0x80) {
if (++queue->cur == queue->size)
queue->cur = 0;
@@ -4895,6 +4914,54 @@ static int ibmvfc_work(void *data)
return 0;
}
+/**
+ * ibmvfc_alloc_queue - Allocate queue
+ * @vhost: ibmvfc host struct
+ * @queue: ibmvfc queue to allocate
+ * @fmt: queue format to allocate
+ *
+ * Returns:
+ * 0 on success / non-zero on failure
+ **/
+static int ibmvfc_alloc_queue(struct ibmvfc_host *vhost,
+ struct ibmvfc_queue *queue,
+ enum ibmvfc_msg_fmt fmt)
+{
+ struct device *dev = vhost->dev;
+ size_t fmt_size;
+
+ ENTER;
+ switch (fmt) {
+ case IBMVFC_CRQ_FMT:
+ fmt_size = sizeof(*queue->msgs.crq);
+ break;
+ case IBMVFC_ASYNC_FMT:
+ fmt_size = sizeof(*queue->msgs.async);
+ break;
+ default:
+ dev_warn(dev, "Unknown command/response queue message format: %d\n", fmt);
+ return -EINVAL;
+ }
+
+ queue->msgs.handle = (void *)get_zeroed_page(GFP_KERNEL);
+ if (!queue->msgs.handle)
+ return -ENOMEM;
+
+ queue->msg_token = dma_map_single(dev, queue->msgs.handle, PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
+
+ if (dma_mapping_error(dev, queue->msg_token)) {
+ free_page((unsigned long)queue->msgs.handle);
+ queue->msgs.handle = NULL;
+ return -ENOMEM;
+ }
+
+ queue->cur = 0;
+ queue->fmt = fmt;
+ queue->size = PAGE_SIZE / fmt_size;
+ return 0;
+}
+
/**
* ibmvfc_init_crq - Initializes and registers CRQ with hypervisor
* @vhost: ibmvfc host struct
@@ -4910,21 +4977,12 @@ static int ibmvfc_init_crq(struct ibmvfc_host *vhost)
int rc, retrc = -ENOMEM;
struct device *dev = vhost->dev;
struct vio_dev *vdev = to_vio_dev(dev);
- struct ibmvfc_crq_queue *crq = &vhost->crq;
+ struct ibmvfc_queue *crq = &vhost->crq;
ENTER;
- crq->msgs = (struct ibmvfc_crq *)get_zeroed_page(GFP_KERNEL);
-
- if (!crq->msgs)
+ if (ibmvfc_alloc_queue(vhost, crq, IBMVFC_CRQ_FMT))
return -ENOMEM;
- crq->size = PAGE_SIZE / sizeof(*crq->msgs);
- crq->msg_token = dma_map_single(dev, crq->msgs,
- PAGE_SIZE, DMA_BIDIRECTIONAL);
-
- if (dma_mapping_error(dev, crq->msg_token))
- goto map_failed;
-
retrc = rc = plpar_hcall_norets(H_REG_CRQ, vdev->unit_address,
crq->msg_token, PAGE_SIZE);
@@ -4953,7 +5011,6 @@ static int ibmvfc_init_crq(struct ibmvfc_host *vhost)
goto req_irq_failed;
}
- crq->cur = 0;
LEAVE;
return retrc;
@@ -4963,9 +5020,7 @@ static int ibmvfc_init_crq(struct ibmvfc_host *vhost)
rc = plpar_hcall_norets(H_FREE_CRQ, vdev->unit_address);
} while (rc == H_BUSY || H_IS_LONG_BUSY(rc));
reg_crq_failed:
- dma_unmap_single(dev, crq->msg_token, PAGE_SIZE, DMA_BIDIRECTIONAL);
-map_failed:
- free_page((unsigned long)crq->msgs);
+ ibmvfc_free_queue(vhost, crq);
return retrc;
}
@@ -4978,7 +5033,7 @@ static int ibmvfc_init_crq(struct ibmvfc_host *vhost)
**/
static void ibmvfc_free_mem(struct ibmvfc_host *vhost)
{
- struct ibmvfc_async_crq_queue *async_q = &vhost->async_crq;
+ struct ibmvfc_queue *async_q = &vhost->async_crq;
ENTER;
mempool_destroy(vhost->tgt_pool);
@@ -4988,9 +5043,7 @@ static void ibmvfc_free_mem(struct ibmvfc_host *vhost)
dma_free_coherent(vhost->dev, sizeof(*vhost->login_buf),
vhost->login_buf, vhost->login_buf_dma);
dma_pool_destroy(vhost->sg_pool);
- dma_unmap_single(vhost->dev, async_q->msg_token,
- async_q->size * sizeof(*async_q->msgs), DMA_BIDIRECTIONAL);
- free_page((unsigned long)async_q->msgs);
+ ibmvfc_free_queue(vhost, async_q);
LEAVE;
}
@@ -5003,26 +5056,15 @@ static void ibmvfc_free_mem(struct ibmvfc_host *vhost)
**/
static int ibmvfc_alloc_mem(struct ibmvfc_host *vhost)
{
- struct ibmvfc_async_crq_queue *async_q = &vhost->async_crq;
+ struct ibmvfc_queue *async_q = &vhost->async_crq;
struct device *dev = vhost->dev;
ENTER;
- async_q->msgs = (struct ibmvfc_async_crq *)get_zeroed_page(GFP_KERNEL);
- if (!async_q->msgs) {
- dev_err(dev, "Couldn't allocate async queue.\n");
+ if (ibmvfc_alloc_queue(vhost, async_q, IBMVFC_ASYNC_FMT)) {
+ dev_err(dev, "Couldn't allocate/map async queue.\n");
goto nomem;
}
- async_q->size = PAGE_SIZE / sizeof(struct ibmvfc_async_crq);
- async_q->msg_token = dma_map_single(dev, async_q->msgs,
- async_q->size * sizeof(*async_q->msgs),
- DMA_BIDIRECTIONAL);
-
- if (dma_mapping_error(dev, async_q->msg_token)) {
- dev_err(dev, "Failed to map async queue\n");
- goto free_async_crq;
- }
-
vhost->sg_pool = dma_pool_create(IBMVFC_NAME, dev,
SG_ALL * sizeof(struct srp_direct_buf),
sizeof(struct srp_direct_buf), 0);
@@ -5077,10 +5119,7 @@ static int ibmvfc_alloc_mem(struct ibmvfc_host *vhost)
free_sg_pool:
dma_pool_destroy(vhost->sg_pool);
unmap_async_crq:
- dma_unmap_single(dev, async_q->msg_token,
- async_q->size * sizeof(*async_q->msgs), DMA_BIDIRECTIONAL);
-free_async_crq:
- free_page((unsigned long)async_q->msgs);
+ ibmvfc_free_queue(vhost, async_q);
nomem:
LEAVE;
return -ENOMEM;
diff --git a/drivers/scsi/ibmvscsi/ibmvfc.h b/drivers/scsi/ibmvscsi/ibmvfc.h
index 9d58cfd774d3..5bf1621223d6 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.h
+++ b/drivers/scsi/ibmvscsi/ibmvfc.h
@@ -645,12 +645,6 @@ struct ibmvfc_crq {
volatile __be64 ioba;
} __packed __aligned(8);
-struct ibmvfc_crq_queue {
- struct ibmvfc_crq *msgs;
- int size, cur;
- dma_addr_t msg_token;
-};
-
enum ibmvfc_ae_link_state {
IBMVFC_AE_LS_LINK_UP = 0x01,
IBMVFC_AE_LS_LINK_BOUNCED = 0x02,
@@ -678,12 +672,6 @@ struct ibmvfc_async_crq {
__be64 reserved;
} __packed __aligned(8);
-struct ibmvfc_async_crq_queue {
- struct ibmvfc_async_crq *msgs;
- int size, cur;
- dma_addr_t msg_token;
-};
-
union ibmvfc_iu {
struct ibmvfc_mad_common mad_common;
struct ibmvfc_npiv_login_mad npiv_login;
@@ -763,6 +751,24 @@ struct ibmvfc_event_pool {
dma_addr_t iu_token;
};
+enum ibmvfc_msg_fmt {
+ IBMVFC_CRQ_FMT = 0,
+ IBMVFC_ASYNC_FMT,
+};
+
+union ibmvfc_msgs {
+ void *handle;
+ struct ibmvfc_crq *crq;
+ struct ibmvfc_async_crq *async;
+};
+
+struct ibmvfc_queue {
+ union ibmvfc_msgs msgs;
+ dma_addr_t msg_token;
+ enum ibmvfc_msg_fmt fmt;
+ int size, cur;
+};
+
enum ibmvfc_host_action {
IBMVFC_HOST_ACTION_NONE = 0,
IBMVFC_HOST_ACTION_RESET,
@@ -808,8 +814,8 @@ struct ibmvfc_host {
struct ibmvfc_event_pool pool;
struct dma_pool *sg_pool;
mempool_t *tgt_pool;
- struct ibmvfc_crq_queue crq;
- struct ibmvfc_async_crq_queue async_crq;
+ struct ibmvfc_queue crq;
+ struct ibmvfc_queue async_crq;
struct ibmvfc_npiv_login login_info;
union ibmvfc_npiv_login_data *login_buf;
dma_addr_t login_buf_dma;
--
2.27.0
^ permalink raw reply related
* Re: [PATCH -next] pci/controller/dwc: convert comma to semicolon
From: Bjorn Helgaas @ 2021-01-06 19:07 UTC (permalink / raw)
To: Zheng Yongjun
Cc: robh, roy.zang, linux-pci, linux-kernel, minghuan.Lian,
linux-arm-kernel, linuxppc-dev, mingkai.hu
In-Reply-To: <20201216131944.14990-1-zhengyongjun3@huawei.com>
On Wed, Dec 16, 2020 at 09:19:44PM +0800, Zheng Yongjun wrote:
> Replace a comma between expression statements by a semicolon.
Looks like a good fix, but read this about the changelog title:
https://lore.kernel.org/r/20171026223701.GA25649@bhelgaas-glaptop.roam.corp.google.com
> Signed-off-by: Zheng Yongjun <zhengyongjun3@huawei.com>
> ---
> drivers/pci/controller/dwc/pci-layerscape-ep.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/pci/controller/dwc/pci-layerscape-ep.c b/drivers/pci/controller/dwc/pci-layerscape-ep.c
> index 84206f265e54..917ba8d254fc 100644
> --- a/drivers/pci/controller/dwc/pci-layerscape-ep.c
> +++ b/drivers/pci/controller/dwc/pci-layerscape-ep.c
> @@ -178,7 +178,7 @@ static int __init ls_pcie_ep_probe(struct platform_device *pdev)
> pci->dev = dev;
> pci->ops = pcie->drvdata->dw_pcie_ops;
>
> - ls_epc->bar_fixed_64bit = (1 << BAR_2) | (1 << BAR_4),
> + ls_epc->bar_fixed_64bit = (1 << BAR_2) | (1 << BAR_4);
>
> pcie->pci = pci;
> pcie->ls_epc = ls_epc;
> --
> 2.22.0
>
>
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply
* Re: [RFC PATCH v3 5/6] dt-bindings: of: Add restricted DMA pool
From: Konrad Rzeszutek Wilk @ 2021-01-06 18:57 UTC (permalink / raw)
To: Claire Chang
Cc: heikki.krogerus, peterz, grant.likely, paulus, frowand.list,
mingo, m.szyprowski, sstabellini, saravanak, joro,
rafael.j.wysocki, hch, bgolaszewski, xen-devel, treding,
devicetree, will, dan.j.williams, linuxppc-dev, robh+dt,
boris.ostrovsky, andriy.shevchenko, jgross, drinkcat, gregkh,
rdunlap, linux-kernel, tfiga, iommu, xypron.glpk, robin.murphy,
bauerman
In-Reply-To: <20210106034124.30560-6-tientzu@chromium.org>
On Wed, Jan 06, 2021 at 11:41:23AM +0800, Claire Chang wrote:
> Introduce the new compatible string, restricted-dma-pool, for restricted
> DMA. One can specify the address and length of the restricted DMA memory
> region by restricted-dma-pool in the device tree.
>
> Signed-off-by: Claire Chang <tientzu@chromium.org>
> ---
> .../reserved-memory/reserved-memory.txt | 24 +++++++++++++++++++
> 1 file changed, 24 insertions(+)
>
> diff --git a/Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt b/Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt
> index e8d3096d922c..44975e2a1fd2 100644
> --- a/Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt
> +++ b/Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt
> @@ -51,6 +51,20 @@ compatible (optional) - standard definition
> used as a shared pool of DMA buffers for a set of devices. It can
> be used by an operating system to instantiate the necessary pool
> management subsystem if necessary.
> + - restricted-dma-pool: This indicates a region of memory meant to be
> + used as a pool of restricted DMA buffers for a set of devices. The
> + memory region would be the only region accessible to those devices.
> + When using this, the no-map and reusable properties must not be set,
> + so the operating system can create a virtual mapping that will be used
> + for synchronization. The main purpose for restricted DMA is to
> + mitigate the lack of DMA access control on systems without an IOMMU,
> + which could result in the DMA accessing the system memory at
> + unexpected times and/or unexpected addresses, possibly leading to data
> + leakage or corruption. The feature on its own provides a basic level
> + of protection against the DMA overwriting buffer contents at
> + unexpected times. However, to protect against general data leakage and
> + system memory corruption, the system needs to provide way to restrict
> + the DMA to a predefined memory region.
Heya!
I think I am missing something obvious here so please bear with my
questions:
- This code adds the means of having the SWIOTLB pool tied to a specific
memory correct?
- Nothing stops the physical device from bypassing the SWIOTLB buffer.
That is if an errant device screwed up the length or DMA address, the
SWIOTLB would gladly do what the device told it do?
- This has to be combined with SWIOTLB-force-ish to always use the
bounce buffer, otherwise you could still do DMA without using
SWIOTLB (by not hitting the criteria for needing to use SWIOTLB)?
^ permalink raw reply
* Re: [RFC PATCH v3 2/6] swiotlb: Add restricted DMA pool
From: Konrad Rzeszutek Wilk @ 2021-01-06 18:52 UTC (permalink / raw)
To: Claire Chang
Cc: heikki.krogerus, peterz, grant.likely, paulus, frowand.list,
mingo, m.szyprowski, sstabellini, saravanak, joro,
rafael.j.wysocki, hch, bgolaszewski, xen-devel, treding,
devicetree, will, dan.j.williams, linuxppc-dev, robh+dt,
boris.ostrovsky, andriy.shevchenko, jgross, drinkcat, gregkh,
rdunlap, linux-kernel, tfiga, iommu, xypron.glpk, robin.murphy,
bauerman
In-Reply-To: <20210106034124.30560-3-tientzu@chromium.org>
Hello!
In this file:
> diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
> index e4368159f88a..7fb2ac087d23 100644
> --- a/kernel/dma/swiotlb.c
> +++ b/kernel/dma/swiotlb.c
..
> +static const struct reserved_mem_ops rmem_swiotlb_ops = {
> + .device_init = rmem_swiotlb_device_init,
> + .device_release = rmem_swiotlb_device_release,
> +};
> +
> +static int __init rmem_swiotlb_setup(struct reserved_mem *rmem)
> +{
> + unsigned long node = rmem->fdt_node;
> +
> + if (of_get_flat_dt_prop(node, "reusable", NULL) ||
> + of_get_flat_dt_prop(node, "linux,cma-default", NULL) ||
> + of_get_flat_dt_prop(node, "linux,dma-default", NULL) ||
> + of_get_flat_dt_prop(node, "no-map", NULL))
> + return -EINVAL;
> +
> + rmem->ops = &rmem_swiotlb_ops;
> + pr_info("Reserved memory: created device swiotlb memory pool at %pa, size %ld MiB\n",
> + &rmem->base, (unsigned long)rmem->size / SZ_1M);
> + return 0;
> +}
> +
> +RESERVEDMEM_OF_DECLARE(dma, "restricted-dma-pool", rmem_swiotlb_setup);
The code should be as much as possible arch-agnostic. That is why there
are multiple -swiotlb files scattered in arch directories that own the
architecture specific code.
Would it be possible to move the code there and perhaps have a ARM
specific front-end for this DMA restricted pool there? See for example
the xen-swiotlb code.
Cheers!
Konrad
^ permalink raw reply
* Re: [RFC PATCH v3 0/6] Restricted DMA
From: Florian Fainelli @ 2021-01-06 18:48 UTC (permalink / raw)
To: Claire Chang, robh+dt, mpe, benh, paulus, joro, will,
frowand.list, konrad.wilk, boris.ostrovsky, jgross, sstabellini,
hch, m.szyprowski, robin.murphy
Cc: heikki.krogerus, peterz, grant.likely, mingo, drinkcat, saravanak,
xypron.glpk, rafael.j.wysocki, bgolaszewski, xen-devel, treding,
devicetree, dan.j.williams, andriy.shevchenko, gregkh, rdunlap,
linux-kernel, tfiga, iommu, Jim Quinlan, linuxppc-dev, bauerman
In-Reply-To: <20210106034124.30560-1-tientzu@chromium.org>
Hi,
First of all let me say that I am glad that someone is working on a
upstream solution for this issue, would appreciate if you could CC and
Jim Quinlan on subsequent submissions.
On 1/5/21 7:41 PM, Claire Chang wrote:
> This series implements mitigations for lack of DMA access control on
> systems without an IOMMU, which could result in the DMA accessing the
> system memory at unexpected times and/or unexpected addresses, possibly
> leading to data leakage or corruption.
>
> For example, we plan to use the PCI-e bus for Wi-Fi and that PCI-e bus is
> not behind an IOMMU. As PCI-e, by design, gives the device full access to
> system memory, a vulnerability in the Wi-Fi firmware could easily escalate
> to a full system exploit (remote wifi exploits: [1a], [1b] that shows a
> full chain of exploits; [2], [3]).
>
> To mitigate the security concerns, we introduce restricted DMA. Restricted
> DMA utilizes the existing swiotlb to bounce streaming DMA in and out of a
> specially allocated region and does memory allocation from the same region.
> The feature on its own provides a basic level of protection against the DMA
> overwriting buffer contents at unexpected times. However, to protect
> against general data leakage and system memory corruption, the system needs
> to provide a way to restrict the DMA to a predefined memory region (this is
> usually done at firmware level, e.g. in ATF on some ARM platforms).
Can you explain how ATF gets involved and to what extent it does help,
besides enforcing a secure region from the ARM CPU's perpsective? Does
the PCIe root complex not have an IOMMU but can somehow be denied access
to a region that is marked NS=0 in the ARM CPU's MMU? If so, that is
still some sort of basic protection that the HW enforces, right?
On Broadcom STB SoCs we have had something similar for a while however
and while we don't have an IOMMU for the PCIe bridge, we do have a a
basic protection mechanism whereby we can configure a region in DRAM to
be PCIe read/write and CPU read/write which then gets used as the PCIe
inbound region for the PCIe EP. By default the PCIe bridge is not
allowed access to DRAM so we must call into a security agent to allow
the PCIe bridge to access the designated DRAM region.
We have done this using a private CMA area region assigned via Device
Tree, assigned with a and requiring the PCIe EP driver to use
dma_alloc_from_contiguous() in order to allocate from this device
private CMA area. The only drawback with that approach is that it
requires knowing how much memory you need up front for buffers and DMA
descriptors that the PCIe EP will need to process. The problem is that
it requires driver modifications and that does not scale over the number
of PCIe EP drivers, some we absolutely do not control, but there is no
need to bounce buffer. Your approach scales better across PCIe EP
drivers however it does require bounce buffering which could be a
performance hit.
Thanks!
--
Florian
^ permalink raw reply
* Re: [PATCH v2 4/5] ibmvfc: complete commands outside the host/queue lock
From: Tyrel Datwyler @ 2021-01-06 17:18 UTC (permalink / raw)
To: Martin K. Petersen
Cc: linux-scsi, linux-kernel, james.bottomley, Brian King, brking,
linuxppc-dev
In-Reply-To: <yq1v9caekxl.fsf@ca-mkp.ca.oracle.com>
On 1/5/21 8:42 PM, Martin K. Petersen wrote:
>
> Tyrel,
>
>> Drain the command queue and place all commands on a completion list.
>> Perform command completion on that list outside the host/queue locks.
>> Further, move purged command compeletions outside the host_lock as well.
>
> Please resubmit entire series instead of amending individual patches.
>
> thanks!
>
No problem. I wasn't sure since it was simply adding a "static" keyword. I'll
send a v2 out today.
-Tyrel
^ permalink raw reply
* Re: [RFC PATCH v3 2/6] swiotlb: Add restricted DMA pool
From: Greg KH @ 2021-01-06 7:50 UTC (permalink / raw)
To: Claire Chang
Cc: heikki.krogerus, peterz, grant.likely, paulus, frowand.list,
mingo, m.szyprowski, sstabellini, saravanak, joro,
rafael.j.wysocki, hch, bgolaszewski, xen-devel, treding,
devicetree, will, konrad.wilk, dan.j.williams, robh+dt,
boris.ostrovsky, andriy.shevchenko, jgross, drinkcat,
linuxppc-dev, rdunlap, linux-kernel, tfiga, iommu, xypron.glpk,
robin.murphy, bauerman
In-Reply-To: <20210106034124.30560-3-tientzu@chromium.org>
On Wed, Jan 06, 2021 at 11:41:20AM +0800, Claire Chang wrote:
> Add the initialization function to create restricted DMA pools from
> matching reserved-memory nodes in the device tree.
>
> Signed-off-by: Claire Chang <tientzu@chromium.org>
> ---
> include/linux/device.h | 4 ++
> include/linux/swiotlb.h | 7 +-
> kernel/dma/Kconfig | 1 +
> kernel/dma/swiotlb.c | 144 ++++++++++++++++++++++++++++++++++------
> 4 files changed, 131 insertions(+), 25 deletions(-)
>
> diff --git a/include/linux/device.h b/include/linux/device.h
> index 89bb8b84173e..ca6f71ec8871 100644
> --- a/include/linux/device.h
> +++ b/include/linux/device.h
> @@ -413,6 +413,7 @@ struct dev_links_info {
> * @dma_pools: Dma pools (if dma'ble device).
> * @dma_mem: Internal for coherent mem override.
> * @cma_area: Contiguous memory area for dma allocations
> + * @dma_io_tlb_mem: Internal for swiotlb io_tlb_mem override.
Why does this have to be added here? Shouldn't the platform-specific
code handle it instead?
thanks,
greg k-h
^ permalink raw reply
* Re: [PATCH v2 4/5] ibmvfc: complete commands outside the host/queue lock
From: Martin K. Petersen @ 2021-01-06 4:42 UTC (permalink / raw)
To: Tyrel Datwyler
Cc: martin.petersen, linux-scsi, linux-kernel, james.bottomley,
Brian King, brking, linuxppc-dev
In-Reply-To: <20210104222422.981457-1-tyreld@linux.ibm.com>
Tyrel,
> Drain the command queue and place all commands on a completion list.
> Perform command completion on that list outside the host/queue locks.
> Further, move purged command compeletions outside the host_lock as well.
Please resubmit entire series instead of amending individual patches.
thanks!
--
Martin K. Petersen Oracle Linux Engineering
^ permalink raw reply
* [RFC PATCH v3 6/6] of: Add plumbing for restricted DMA pool
From: Claire Chang @ 2021-01-06 3:41 UTC (permalink / raw)
To: robh+dt, mpe, benh, paulus, joro, will, frowand.list, konrad.wilk,
boris.ostrovsky, jgross, sstabellini, hch, m.szyprowski,
robin.murphy
Cc: heikki.krogerus, peterz, grant.likely, mingo, drinkcat, saravanak,
xypron.glpk, rafael.j.wysocki, bgolaszewski, xen-devel, treding,
devicetree, Claire Chang, dan.j.williams, andriy.shevchenko,
gregkh, rdunlap, linux-kernel, tfiga, iommu, linuxppc-dev,
bauerman
In-Reply-To: <20210106034124.30560-1-tientzu@chromium.org>
If a device is not behind an IOMMU, we look up the device node and set
up the restricted DMA when the restricted-dma-pool is presented.
Signed-off-by: Claire Chang <tientzu@chromium.org>
---
drivers/of/address.c | 21 +++++++++++++++++++++
drivers/of/device.c | 4 ++++
drivers/of/of_private.h | 5 +++++
3 files changed, 30 insertions(+)
diff --git a/drivers/of/address.c b/drivers/of/address.c
index 73ddf2540f3f..94eca8249854 100644
--- a/drivers/of/address.c
+++ b/drivers/of/address.c
@@ -8,6 +8,7 @@
#include <linux/logic_pio.h>
#include <linux/module.h>
#include <linux/of_address.h>
+#include <linux/of_reserved_mem.h>
#include <linux/pci.h>
#include <linux/pci_regs.h>
#include <linux/sizes.h>
@@ -1094,3 +1095,23 @@ bool of_dma_is_coherent(struct device_node *np)
return false;
}
EXPORT_SYMBOL_GPL(of_dma_is_coherent);
+
+int of_dma_set_restricted_buffer(struct device *dev)
+{
+ struct device_node *node;
+ int count, i;
+
+ if (!dev->of_node)
+ return 0;
+
+ count = of_property_count_elems_of_size(dev->of_node, "memory-region",
+ sizeof(phandle));
+ for (i = 0; i < count; i++) {
+ node = of_parse_phandle(dev->of_node, "memory-region", i);
+ if (of_device_is_compatible(node, "restricted-dma-pool"))
+ return of_reserved_mem_device_init_by_idx(
+ dev, dev->of_node, i);
+ }
+
+ return 0;
+}
diff --git a/drivers/of/device.c b/drivers/of/device.c
index aedfaaafd3e7..e2c7409956ab 100644
--- a/drivers/of/device.c
+++ b/drivers/of/device.c
@@ -182,6 +182,10 @@ int of_dma_configure_id(struct device *dev, struct device_node *np,
arch_setup_dma_ops(dev, dma_start, size, iommu, coherent);
dev->dma_range_map = map;
+
+ if (!iommu)
+ return of_dma_set_restricted_buffer(dev);
+
return 0;
}
EXPORT_SYMBOL_GPL(of_dma_configure_id);
diff --git a/drivers/of/of_private.h b/drivers/of/of_private.h
index d9e6a324de0a..28a2dfa197ba 100644
--- a/drivers/of/of_private.h
+++ b/drivers/of/of_private.h
@@ -161,12 +161,17 @@ struct bus_dma_region;
#if defined(CONFIG_OF_ADDRESS) && defined(CONFIG_HAS_DMA)
int of_dma_get_range(struct device_node *np,
const struct bus_dma_region **map);
+int of_dma_set_restricted_buffer(struct device *dev);
#else
static inline int of_dma_get_range(struct device_node *np,
const struct bus_dma_region **map)
{
return -ENODEV;
}
+static inline int of_dma_get_restricted_buffer(struct device *dev)
+{
+ return -ENODEV;
+}
#endif
#endif /* _LINUX_OF_PRIVATE_H */
--
2.29.2.729.g45daf8777d-goog
^ permalink raw reply related
* [RFC PATCH v3 5/6] dt-bindings: of: Add restricted DMA pool
From: Claire Chang @ 2021-01-06 3:41 UTC (permalink / raw)
To: robh+dt, mpe, benh, paulus, joro, will, frowand.list, konrad.wilk,
boris.ostrovsky, jgross, sstabellini, hch, m.szyprowski,
robin.murphy
Cc: heikki.krogerus, peterz, grant.likely, mingo, drinkcat, saravanak,
xypron.glpk, rafael.j.wysocki, bgolaszewski, xen-devel, treding,
devicetree, Claire Chang, dan.j.williams, andriy.shevchenko,
gregkh, rdunlap, linux-kernel, tfiga, iommu, linuxppc-dev,
bauerman
In-Reply-To: <20210106034124.30560-1-tientzu@chromium.org>
Introduce the new compatible string, restricted-dma-pool, for restricted
DMA. One can specify the address and length of the restricted DMA memory
region by restricted-dma-pool in the device tree.
Signed-off-by: Claire Chang <tientzu@chromium.org>
---
.../reserved-memory/reserved-memory.txt | 24 +++++++++++++++++++
1 file changed, 24 insertions(+)
diff --git a/Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt b/Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt
index e8d3096d922c..44975e2a1fd2 100644
--- a/Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt
+++ b/Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt
@@ -51,6 +51,20 @@ compatible (optional) - standard definition
used as a shared pool of DMA buffers for a set of devices. It can
be used by an operating system to instantiate the necessary pool
management subsystem if necessary.
+ - restricted-dma-pool: This indicates a region of memory meant to be
+ used as a pool of restricted DMA buffers for a set of devices. The
+ memory region would be the only region accessible to those devices.
+ When using this, the no-map and reusable properties must not be set,
+ so the operating system can create a virtual mapping that will be used
+ for synchronization. The main purpose for restricted DMA is to
+ mitigate the lack of DMA access control on systems without an IOMMU,
+ which could result in the DMA accessing the system memory at
+ unexpected times and/or unexpected addresses, possibly leading to data
+ leakage or corruption. The feature on its own provides a basic level
+ of protection against the DMA overwriting buffer contents at
+ unexpected times. However, to protect against general data leakage and
+ system memory corruption, the system needs to provide way to restrict
+ the DMA to a predefined memory region.
- vendor specific string in the form <vendor>,[<device>-]<usage>
no-map (optional) - empty property
- Indicates the operating system must not create a virtual mapping
@@ -120,6 +134,11 @@ one for multimedia processing (named multimedia-memory@77000000, 64MiB).
compatible = "acme,multimedia-memory";
reg = <0x77000000 0x4000000>;
};
+
+ restricted_dma_mem_reserved: restricted_dma_mem_reserved {
+ compatible = "restricted-dma-pool";
+ reg = <0x50000000 0x400000>;
+ };
};
/* ... */
@@ -138,4 +157,9 @@ one for multimedia processing (named multimedia-memory@77000000, 64MiB).
memory-region = <&multimedia_reserved>;
/* ... */
};
+
+ pcie_device: pcie_device@0,0 {
+ memory-region = <&restricted_dma_mem_reserved>;
+ /* ... */
+ };
};
--
2.29.2.729.g45daf8777d-goog
^ permalink raw reply related
* [RFC PATCH v3 4/6] swiotlb: Add restricted DMA alloc/free support.
From: Claire Chang @ 2021-01-06 3:41 UTC (permalink / raw)
To: robh+dt, mpe, benh, paulus, joro, will, frowand.list, konrad.wilk,
boris.ostrovsky, jgross, sstabellini, hch, m.szyprowski,
robin.murphy
Cc: heikki.krogerus, peterz, grant.likely, mingo, drinkcat, saravanak,
xypron.glpk, rafael.j.wysocki, bgolaszewski, xen-devel, treding,
devicetree, Claire Chang, dan.j.williams, andriy.shevchenko,
gregkh, rdunlap, linux-kernel, tfiga, iommu, linuxppc-dev,
bauerman
In-Reply-To: <20210106034124.30560-1-tientzu@chromium.org>
Add the functions, swiotlb_alloc and swiotlb_free to support the
memory allocation from restricted DMA pool.
Signed-off-by: Claire Chang <tientzu@chromium.org>
---
include/linux/swiotlb.h | 6 ++
kernel/dma/direct.c | 12 +++
kernel/dma/swiotlb.c | 171 +++++++++++++++++++++++++++++-----------
3 files changed, 144 insertions(+), 45 deletions(-)
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 5135e5636042..84fe96e40685 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -68,6 +68,12 @@ extern void swiotlb_tbl_sync_single(struct device *hwdev,
dma_addr_t swiotlb_map(struct device *dev, phys_addr_t phys,
size_t size, enum dma_data_direction dir, unsigned long attrs);
+void *swiotlb_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
+ unsigned long attrs);
+
+void swiotlb_free(struct device *dev, size_t size, void *vaddr,
+ dma_addr_t dma_addr, unsigned long attrs);
+
#ifdef CONFIG_SWIOTLB
extern enum swiotlb_force swiotlb_force;
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 30ccbc08e229..126e9b3354d6 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -137,6 +137,11 @@ void *dma_direct_alloc(struct device *dev, size_t size,
void *ret;
int err;
+#ifdef CONFIG_SWIOTLB
+ if (unlikely(dev->dma_io_tlb_mem))
+ return swiotlb_alloc(dev, size, dma_handle, attrs);
+#endif
+
size = PAGE_ALIGN(size);
if (attrs & DMA_ATTR_NO_WARN)
gfp |= __GFP_NOWARN;
@@ -246,6 +251,13 @@ void dma_direct_free(struct device *dev, size_t size,
{
unsigned int page_order = get_order(size);
+#ifdef CONFIG_SWIOTLB
+ if (unlikely(dev->dma_io_tlb_mem)) {
+ swiotlb_free(dev, size, cpu_addr, dma_addr, attrs);
+ return;
+ }
+#endif
+
if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) &&
!force_dma_unencrypted(dev)) {
/* cpu_addr is a struct page cookie, not a kernel address */
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 1f05af09e61a..ca88ef59435d 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -459,14 +459,13 @@ static void swiotlb_bounce(phys_addr_t orig_addr, phys_addr_t tlb_addr,
}
}
-phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t orig_addr,
- size_t mapping_size, size_t alloc_size,
- enum dma_data_direction dir, unsigned long attrs)
+static int swiotlb_tbl_find_free_region(struct device *hwdev,
+ dma_addr_t tbl_dma_addr,
+ size_t alloc_size,
+ unsigned long attrs)
{
struct io_tlb_mem *mem = get_io_tlb_mem(hwdev);
- dma_addr_t tbl_dma_addr = phys_to_dma_unencrypted(hwdev, mem->start);
unsigned long flags;
- phys_addr_t tlb_addr;
unsigned int nslots, stride, index, wrap;
int i;
unsigned long mask;
@@ -477,15 +476,6 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t orig_addr,
if (no_iotlb_memory && !hwdev->dma_io_tlb_mem)
panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer");
- if (mem_encrypt_active())
- pr_warn_once("Memory encryption is active and system is using DMA bounce buffers\n");
-
- if (mapping_size > alloc_size) {
- dev_warn_once(hwdev, "Invalid sizes (mapping: %zd bytes, alloc: %zd bytes)",
- mapping_size, alloc_size);
- return (phys_addr_t)DMA_MAPPING_ERROR;
- }
-
mask = dma_get_seg_boundary(hwdev);
tbl_dma_addr &= mask;
@@ -547,7 +537,6 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t orig_addr,
mem->list[i] = 0;
for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1) && mem->list[i]; i--)
mem->list[i] = ++count;
- tlb_addr = mem->start + (index << IO_TLB_SHIFT);
/*
* Update the indices to avoid searching in the next
@@ -570,45 +559,21 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t orig_addr,
if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit())
dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n",
alloc_size, mem->nslabs, tmp_io_tlb_used);
- return (phys_addr_t)DMA_MAPPING_ERROR;
+ return -ENOMEM;
+
found:
mem->used += nslots;
spin_unlock_irqrestore(&mem->lock, flags);
- /*
- * Save away the mapping from the original address to the DMA address.
- * This is needed when we sync the memory. Then we sync the buffer if
- * needed.
- */
- for (i = 0; i < nslots; i++)
- mem->orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT);
- if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
- (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
- swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_TO_DEVICE);
-
- return tlb_addr;
+ return index;
}
-/*
- * tlb_addr is the physical address of the bounce buffer to unmap.
- */
-void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
- size_t mapping_size, size_t alloc_size,
- enum dma_data_direction dir, unsigned long attrs)
+static void swiotlb_tbl_release_region(struct device *hwdev, int index,
+ size_t size)
{
struct io_tlb_mem *mem = get_io_tlb_mem(hwdev);
unsigned long flags;
- int i, count, nslots = ALIGN(alloc_size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
- int index = (tlb_addr - mem->start) >> IO_TLB_SHIFT;
- phys_addr_t orig_addr = mem->orig_addr[index];
-
- /*
- * First, sync the memory before unmapping the entry
- */
- if (orig_addr != INVALID_PHYS_ADDR &&
- !(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
- ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL)))
- swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_FROM_DEVICE);
+ int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
/*
* Return the buffer to the free list by setting the corresponding
@@ -640,6 +605,69 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
spin_unlock_irqrestore(&mem->lock, flags);
}
+phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t orig_addr,
+ size_t mapping_size, size_t alloc_size,
+ enum dma_data_direction dir, unsigned long attrs)
+{
+ struct io_tlb_mem *mem = get_io_tlb_mem(hwdev);
+ dma_addr_t tbl_dma_addr = phys_to_dma_unencrypted(hwdev, mem->start);
+ phys_addr_t tlb_addr;
+ unsigned int nslots, index;
+ int i;
+
+ if (mem_encrypt_active())
+ pr_warn_once("Memory encryption is active and system is using DMA bounce buffers\n");
+
+ if (mapping_size > alloc_size) {
+ dev_warn_once(hwdev, "Invalid sizes (mapping: %zd bytes, alloc: %zd bytes)",
+ mapping_size, alloc_size);
+ return (phys_addr_t)DMA_MAPPING_ERROR;
+ }
+
+ index = swiotlb_tbl_find_free_region(hwdev, tbl_dma_addr, alloc_size,
+ attrs);
+ if (index < 0)
+ return (phys_addr_t)DMA_MAPPING_ERROR;
+
+ tlb_addr = mem->start + (index << IO_TLB_SHIFT);
+
+ /*
+ * Save away the mapping from the original address to the DMA address.
+ * This is needed when we sync the memory. Then we sync the buffer if
+ * needed.
+ */
+ nslots = ALIGN(alloc_size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
+ for (i = 0; i < nslots; i++)
+ mem->orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT);
+ if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
+ (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
+ swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_TO_DEVICE);
+
+ return tlb_addr;
+}
+
+/*
+ * tlb_addr is the physical address of the bounce buffer to unmap.
+ */
+void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
+ size_t mapping_size, size_t alloc_size,
+ enum dma_data_direction dir, unsigned long attrs)
+{
+ struct io_tlb_mem *mem = get_io_tlb_mem(hwdev);
+ int index = (tlb_addr - mem->start) >> IO_TLB_SHIFT;
+ phys_addr_t orig_addr = mem->orig_addr[index];
+
+ /*
+ * First, sync the memory before unmapping the entry
+ */
+ if (orig_addr != INVALID_PHYS_ADDR &&
+ !(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
+ ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL)))
+ swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_FROM_DEVICE);
+
+ swiotlb_tbl_release_region(hwdev, index, alloc_size);
+}
+
void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr,
size_t size, enum dma_data_direction dir,
enum dma_sync_target target)
@@ -706,6 +734,59 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size,
return dma_addr;
}
+void *swiotlb_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
+ unsigned long attrs)
+{
+ struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
+ int index;
+ void *vaddr;
+ phys_addr_t tlb_addr;
+
+ size = PAGE_ALIGN(size);
+ index = swiotlb_tbl_find_free_region(dev, mem->start, size, attrs);
+ if (index < 0)
+ return NULL;
+
+ tlb_addr = mem->start + (index << IO_TLB_SHIFT);
+ *dma_handle = phys_to_dma_unencrypted(dev, tlb_addr);
+
+ if (!dev_is_dma_coherent(dev)) {
+ unsigned long pfn = PFN_DOWN(tlb_addr);
+
+ /* remove any dirty cache lines on the kernel alias */
+ arch_dma_prep_coherent(pfn_to_page(pfn), size);
+
+ /* create a coherent mapping */
+ vaddr = dma_common_contiguous_remap(
+ pfn_to_page(pfn), size,
+ dma_pgprot(dev, PAGE_KERNEL, attrs),
+ __builtin_return_address(0));
+ if (!vaddr) {
+ swiotlb_tbl_release_region(dev, index, size);
+ return NULL;
+ }
+ } else {
+ vaddr = phys_to_virt(tlb_addr);
+ }
+
+ memset(vaddr, 0, size);
+
+ return vaddr;
+}
+
+void swiotlb_free(struct device *dev, size_t size, void *vaddr,
+ dma_addr_t dma_addr, unsigned long attrs)
+{
+ struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
+ unsigned int index;
+
+ if (!dev_is_dma_coherent(dev))
+ vunmap(vaddr);
+
+ index = (dma_addr - mem->start) >> IO_TLB_SHIFT;
+ swiotlb_tbl_release_region(dev, index, PAGE_ALIGN(size));
+}
+
size_t swiotlb_max_mapping_size(struct device *dev)
{
return ((size_t)1 << IO_TLB_SHIFT) * IO_TLB_SEGSIZE;
--
2.29.2.729.g45daf8777d-goog
^ permalink raw reply related
* [RFC PATCH v3 3/6] swiotlb: Use restricted DMA pool if available
From: Claire Chang @ 2021-01-06 3:41 UTC (permalink / raw)
To: robh+dt, mpe, benh, paulus, joro, will, frowand.list, konrad.wilk,
boris.ostrovsky, jgross, sstabellini, hch, m.szyprowski,
robin.murphy
Cc: heikki.krogerus, peterz, grant.likely, mingo, drinkcat, saravanak,
xypron.glpk, rafael.j.wysocki, bgolaszewski, xen-devel, treding,
devicetree, Claire Chang, dan.j.williams, andriy.shevchenko,
gregkh, rdunlap, linux-kernel, tfiga, iommu, linuxppc-dev,
bauerman
In-Reply-To: <20210106034124.30560-1-tientzu@chromium.org>
Regardless of swiotlb setting, the restricted DMA pool is preferred if
available.
The restricted DMA pools provide a basic level of protection against
the DMA overwriting buffer contents at unexpected times. However, to
protect against general data leakage and system memory corruption, the
system needs to provide a way to restrict the DMA to a predefined memory
region.
Signed-off-by: Claire Chang <tientzu@chromium.org>
---
drivers/iommu/dma-iommu.c | 12 ++++++------
include/linux/swiotlb.h | 17 +++++++++++------
kernel/dma/direct.c | 8 ++++----
kernel/dma/direct.h | 10 ++++++----
kernel/dma/swiotlb.c | 13 ++++++-------
5 files changed, 33 insertions(+), 27 deletions(-)
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index f0305e6aac1b..1343cc2ef27a 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -516,7 +516,7 @@ static void __iommu_dma_unmap_swiotlb(struct device *dev, dma_addr_t dma_addr,
__iommu_dma_unmap(dev, dma_addr, size);
- if (unlikely(is_swiotlb_buffer(phys)))
+ if (unlikely(is_swiotlb_buffer(dev, phys)))
swiotlb_tbl_unmap_single(dev, phys, size,
iova_align(iovad, size), dir, attrs);
}
@@ -592,7 +592,7 @@ static dma_addr_t __iommu_dma_map_swiotlb(struct device *dev, phys_addr_t phys,
}
iova = __iommu_dma_map(dev, phys, aligned_size, prot, dma_mask);
- if ((iova == DMA_MAPPING_ERROR) && is_swiotlb_buffer(phys))
+ if ((iova == DMA_MAPPING_ERROR) && is_swiotlb_buffer(dev, phys))
swiotlb_tbl_unmap_single(dev, phys, org_size,
aligned_size, dir, attrs);
@@ -764,7 +764,7 @@ static void iommu_dma_sync_single_for_cpu(struct device *dev,
if (!dev_is_dma_coherent(dev))
arch_sync_dma_for_cpu(phys, size, dir);
- if (is_swiotlb_buffer(phys))
+ if (is_swiotlb_buffer(dev, phys))
swiotlb_tbl_sync_single(dev, phys, size, dir, SYNC_FOR_CPU);
}
@@ -777,7 +777,7 @@ static void iommu_dma_sync_single_for_device(struct device *dev,
return;
phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
- if (is_swiotlb_buffer(phys))
+ if (is_swiotlb_buffer(dev, phys))
swiotlb_tbl_sync_single(dev, phys, size, dir, SYNC_FOR_DEVICE);
if (!dev_is_dma_coherent(dev))
@@ -798,7 +798,7 @@ static void iommu_dma_sync_sg_for_cpu(struct device *dev,
if (!dev_is_dma_coherent(dev))
arch_sync_dma_for_cpu(sg_phys(sg), sg->length, dir);
- if (is_swiotlb_buffer(sg_phys(sg)))
+ if (is_swiotlb_buffer(dev, sg_phys(sg)))
swiotlb_tbl_sync_single(dev, sg_phys(sg), sg->length,
dir, SYNC_FOR_CPU);
}
@@ -815,7 +815,7 @@ static void iommu_dma_sync_sg_for_device(struct device *dev,
return;
for_each_sg(sgl, sg, nelems, i) {
- if (is_swiotlb_buffer(sg_phys(sg)))
+ if (is_swiotlb_buffer(dev, sg_phys(sg)))
swiotlb_tbl_sync_single(dev, sg_phys(sg), sg->length,
dir, SYNC_FOR_DEVICE);
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index a1bbd7788885..5135e5636042 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -2,12 +2,12 @@
#ifndef __LINUX_SWIOTLB_H
#define __LINUX_SWIOTLB_H
+#include <linux/device.h>
#include <linux/dma-direction.h>
#include <linux/init.h>
#include <linux/types.h>
#include <linux/limits.h>
-struct device;
struct page;
struct scatterlist;
@@ -106,9 +106,14 @@ struct io_tlb_mem {
};
extern struct io_tlb_mem io_tlb_default_mem;
-static inline bool is_swiotlb_buffer(phys_addr_t paddr)
+static inline struct io_tlb_mem *get_io_tlb_mem(struct device *dev)
{
- struct io_tlb_mem *mem = &io_tlb_default_mem;
+ return dev->dma_io_tlb_mem ? dev->dma_io_tlb_mem : &io_tlb_default_mem;
+}
+
+static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr)
+{
+ struct io_tlb_mem *mem = get_io_tlb_mem(dev);
return paddr >= mem->start && paddr < mem->end;
}
@@ -116,11 +121,11 @@ static inline bool is_swiotlb_buffer(phys_addr_t paddr)
void __init swiotlb_exit(void);
unsigned int swiotlb_max_segment(void);
size_t swiotlb_max_mapping_size(struct device *dev);
-bool is_swiotlb_active(void);
+bool is_swiotlb_active(struct device *dev);
void __init swiotlb_adjust_size(unsigned long new_size);
#else
#define swiotlb_force SWIOTLB_NO_FORCE
-static inline bool is_swiotlb_buffer(phys_addr_t paddr)
+static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr)
{
return false;
}
@@ -136,7 +141,7 @@ static inline size_t swiotlb_max_mapping_size(struct device *dev)
return SIZE_MAX;
}
-static inline bool is_swiotlb_active(void)
+static inline bool is_swiotlb_active(struct device *dev)
{
return false;
}
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 002268262c9a..30ccbc08e229 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -343,7 +343,7 @@ void dma_direct_sync_sg_for_device(struct device *dev,
for_each_sg(sgl, sg, nents, i) {
phys_addr_t paddr = dma_to_phys(dev, sg_dma_address(sg));
- if (unlikely(is_swiotlb_buffer(paddr)))
+ if (unlikely(is_swiotlb_buffer(dev, paddr)))
swiotlb_tbl_sync_single(dev, paddr, sg->length,
dir, SYNC_FOR_DEVICE);
@@ -369,7 +369,7 @@ void dma_direct_sync_sg_for_cpu(struct device *dev,
if (!dev_is_dma_coherent(dev))
arch_sync_dma_for_cpu(paddr, sg->length, dir);
- if (unlikely(is_swiotlb_buffer(paddr)))
+ if (unlikely(is_swiotlb_buffer(dev, paddr)))
swiotlb_tbl_sync_single(dev, paddr, sg->length, dir,
SYNC_FOR_CPU);
@@ -495,7 +495,7 @@ int dma_direct_supported(struct device *dev, u64 mask)
size_t dma_direct_max_mapping_size(struct device *dev)
{
/* If SWIOTLB is active, use its maximum mapping size */
- if (is_swiotlb_active() &&
+ if (is_swiotlb_active(dev) &&
(dma_addressing_limited(dev) || swiotlb_force == SWIOTLB_FORCE))
return swiotlb_max_mapping_size(dev);
return SIZE_MAX;
@@ -504,7 +504,7 @@ size_t dma_direct_max_mapping_size(struct device *dev)
bool dma_direct_need_sync(struct device *dev, dma_addr_t dma_addr)
{
return !dev_is_dma_coherent(dev) ||
- is_swiotlb_buffer(dma_to_phys(dev, dma_addr));
+ is_swiotlb_buffer(dev, dma_to_phys(dev, dma_addr));
}
/**
diff --git a/kernel/dma/direct.h b/kernel/dma/direct.h
index b98615578737..7188834cc4c7 100644
--- a/kernel/dma/direct.h
+++ b/kernel/dma/direct.h
@@ -56,7 +56,7 @@ static inline void dma_direct_sync_single_for_device(struct device *dev,
{
phys_addr_t paddr = dma_to_phys(dev, addr);
- if (unlikely(is_swiotlb_buffer(paddr)))
+ if (unlikely(is_swiotlb_buffer(dev, paddr)))
swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_DEVICE);
if (!dev_is_dma_coherent(dev))
@@ -73,7 +73,7 @@ static inline void dma_direct_sync_single_for_cpu(struct device *dev,
arch_sync_dma_for_cpu_all();
}
- if (unlikely(is_swiotlb_buffer(paddr)))
+ if (unlikely(is_swiotlb_buffer(dev, paddr)))
swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_CPU);
if (dir == DMA_FROM_DEVICE)
@@ -87,8 +87,10 @@ static inline dma_addr_t dma_direct_map_page(struct device *dev,
phys_addr_t phys = page_to_phys(page) + offset;
dma_addr_t dma_addr = phys_to_dma(dev, phys);
- if (unlikely(swiotlb_force == SWIOTLB_FORCE))
+#ifdef CONFIG_SWIOTLB
+ if (unlikely(swiotlb_force == SWIOTLB_FORCE) || dev->dma_io_tlb_mem)
return swiotlb_map(dev, phys, size, dir, attrs);
+#endif
if (unlikely(!dma_capable(dev, dma_addr, size, true))) {
if (swiotlb_force != SWIOTLB_NO_FORCE)
@@ -113,7 +115,7 @@ static inline void dma_direct_unmap_page(struct device *dev, dma_addr_t addr,
if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
dma_direct_sync_single_for_cpu(dev, addr, size, dir);
- if (unlikely(is_swiotlb_buffer(phys)))
+ if (unlikely(is_swiotlb_buffer(dev, phys)))
swiotlb_tbl_unmap_single(dev, phys, size, size, dir, attrs);
}
#endif /* _KERNEL_DMA_DIRECT_H */
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 7fb2ac087d23..1f05af09e61a 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -222,7 +222,6 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
mem->orig_addr[i] = INVALID_PHYS_ADDR;
}
mem->index = 0;
- no_iotlb_memory = false;
if (verbose)
swiotlb_print_info();
@@ -464,7 +463,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t orig_addr,
size_t mapping_size, size_t alloc_size,
enum dma_data_direction dir, unsigned long attrs)
{
- struct io_tlb_mem *mem = &io_tlb_default_mem;
+ struct io_tlb_mem *mem = get_io_tlb_mem(hwdev);
dma_addr_t tbl_dma_addr = phys_to_dma_unencrypted(hwdev, mem->start);
unsigned long flags;
phys_addr_t tlb_addr;
@@ -475,7 +474,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t orig_addr,
unsigned long max_slots;
unsigned long tmp_io_tlb_used;
- if (no_iotlb_memory)
+ if (no_iotlb_memory && !hwdev->dma_io_tlb_mem)
panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer");
if (mem_encrypt_active())
@@ -597,7 +596,7 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
size_t mapping_size, size_t alloc_size,
enum dma_data_direction dir, unsigned long attrs)
{
- struct io_tlb_mem *mem = &io_tlb_default_mem;
+ struct io_tlb_mem *mem = get_io_tlb_mem(hwdev);
unsigned long flags;
int i, count, nslots = ALIGN(alloc_size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
int index = (tlb_addr - mem->start) >> IO_TLB_SHIFT;
@@ -645,7 +644,7 @@ void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr,
size_t size, enum dma_data_direction dir,
enum dma_sync_target target)
{
- struct io_tlb_mem *mem = &io_tlb_default_mem;
+ struct io_tlb_mem *mem = get_io_tlb_mem(hwdev);
int index = (tlb_addr - mem->start) >> IO_TLB_SHIFT;
phys_addr_t orig_addr = mem->orig_addr[index];
@@ -712,13 +711,13 @@ size_t swiotlb_max_mapping_size(struct device *dev)
return ((size_t)1 << IO_TLB_SHIFT) * IO_TLB_SEGSIZE;
}
-bool is_swiotlb_active(void)
+bool is_swiotlb_active(struct device *dev)
{
/*
* When SWIOTLB is initialized, even if mem->start points to physical
* address zero, mem->end surely doesn't.
*/
- return io_tlb_default_mem.end != 0;
+ return io_tlb_default_mem.end != 0 || dev->dma_io_tlb_mem;
}
#ifdef CONFIG_DEBUG_FS
--
2.29.2.729.g45daf8777d-goog
^ permalink raw reply related
* [RFC PATCH v3 2/6] swiotlb: Add restricted DMA pool
From: Claire Chang @ 2021-01-06 3:41 UTC (permalink / raw)
To: robh+dt, mpe, benh, paulus, joro, will, frowand.list, konrad.wilk,
boris.ostrovsky, jgross, sstabellini, hch, m.szyprowski,
robin.murphy
Cc: heikki.krogerus, peterz, grant.likely, mingo, drinkcat, saravanak,
xypron.glpk, rafael.j.wysocki, bgolaszewski, xen-devel, treding,
devicetree, Claire Chang, dan.j.williams, andriy.shevchenko,
gregkh, rdunlap, linux-kernel, tfiga, iommu, linuxppc-dev,
bauerman
In-Reply-To: <20210106034124.30560-1-tientzu@chromium.org>
Add the initialization function to create restricted DMA pools from
matching reserved-memory nodes in the device tree.
Signed-off-by: Claire Chang <tientzu@chromium.org>
---
include/linux/device.h | 4 ++
include/linux/swiotlb.h | 7 +-
kernel/dma/Kconfig | 1 +
kernel/dma/swiotlb.c | 144 ++++++++++++++++++++++++++++++++++------
4 files changed, 131 insertions(+), 25 deletions(-)
diff --git a/include/linux/device.h b/include/linux/device.h
index 89bb8b84173e..ca6f71ec8871 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -413,6 +413,7 @@ struct dev_links_info {
* @dma_pools: Dma pools (if dma'ble device).
* @dma_mem: Internal for coherent mem override.
* @cma_area: Contiguous memory area for dma allocations
+ * @dma_io_tlb_mem: Internal for swiotlb io_tlb_mem override.
* @archdata: For arch-specific additions.
* @of_node: Associated device tree node.
* @fwnode: Associated device node supplied by platform firmware.
@@ -515,6 +516,9 @@ struct device {
#ifdef CONFIG_DMA_CMA
struct cma *cma_area; /* contiguous memory area for dma
allocations */
+#endif
+#ifdef CONFIG_SWIOTLB
+ struct io_tlb_mem *dma_io_tlb_mem;
#endif
/* arch specific additions */
struct dev_archdata archdata;
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index dd8eb57cbb8f..a1bbd7788885 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -76,12 +76,13 @@ extern enum swiotlb_force swiotlb_force;
*
* @start: The start address of the swiotlb memory pool. Used to do a quick
* range check to see if the memory was in fact allocated by this
- * API.
+ * API. For restricted DMA pool, this is device tree adjustable.
* @end: The end address of the swiotlb memory pool. Used to do a quick
* range check to see if the memory was in fact allocated by this
- * API.
+ * API. For restricted DMA pool, this is device tree adjustable.
* @nslabs: The number of IO TLB blocks (in groups of 64) between @start and
- * @end. This is command line adjustable via setup_io_tlb_npages.
+ * @end. For default swiotlb, this is command line adjustable via
+ * setup_io_tlb_npages.
* @used: The number of used IO TLB block.
* @list: The free list describing the number of free entries available
* from each index.
diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig
index 479fc145acfc..131a0a66781b 100644
--- a/kernel/dma/Kconfig
+++ b/kernel/dma/Kconfig
@@ -82,6 +82,7 @@ config ARCH_HAS_FORCE_DMA_UNENCRYPTED
config SWIOTLB
bool
select NEED_DMA_MAP_STATE
+ select OF_EARLY_FLATTREE
#
# Should be selected if we can mmap non-coherent mappings to userspace.
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index e4368159f88a..7fb2ac087d23 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -36,6 +36,11 @@
#include <linux/scatterlist.h>
#include <linux/mem_encrypt.h>
#include <linux/set_memory.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <linux/of_reserved_mem.h>
+#include <linux/slab.h>
#ifdef CONFIG_DEBUG_FS
#include <linux/debugfs.h>
#endif
@@ -319,20 +324,21 @@ static void swiotlb_cleanup(void)
max_segment = 0;
}
-int
-swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
+static int swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start,
+ size_t size)
{
- struct io_tlb_mem *mem = &io_tlb_default_mem;
- unsigned long i, bytes;
+ unsigned long i;
+ void *vaddr = phys_to_virt(start);
- bytes = nslabs << IO_TLB_SHIFT;
+ size = ALIGN(size, 1 << IO_TLB_SHIFT);
+ mem->nslabs = size >> IO_TLB_SHIFT;
+ mem->nslabs = ALIGN(mem->nslabs, IO_TLB_SEGSIZE);
- mem->nslabs = nslabs;
- mem->start = virt_to_phys(tlb);
- mem->end = mem->start + bytes;
+ mem->start = start;
+ mem->end = mem->start + size;
- set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT);
- memset(tlb, 0, bytes);
+ set_memory_decrypted((unsigned long)vaddr, size >> PAGE_SHIFT);
+ memset(vaddr, 0, size);
/*
* Allocate and initialize the free list array. This array is used
@@ -356,13 +362,6 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
mem->orig_addr[i] = INVALID_PHYS_ADDR;
}
mem->index = 0;
- no_iotlb_memory = false;
-
- swiotlb_print_info();
-
- late_alloc = 1;
-
- swiotlb_set_max_segment(mem->nslabs << IO_TLB_SHIFT);
return 0;
@@ -375,6 +374,27 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
return -ENOMEM;
}
+int swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
+{
+ struct io_tlb_mem *mem = &io_tlb_default_mem;
+ unsigned long bytes = nslabs << IO_TLB_SHIFT;
+ int ret;
+
+ ret = swiotlb_init_io_tlb_mem(mem, virt_to_phys(tlb), bytes);
+ if (ret)
+ return ret;
+
+ no_iotlb_memory = false;
+
+ swiotlb_print_info();
+
+ late_alloc = 1;
+
+ swiotlb_set_max_segment(bytes);
+
+ return 0;
+}
+
void __init swiotlb_exit(void)
{
struct io_tlb_mem *mem = &io_tlb_default_mem;
@@ -703,16 +723,96 @@ bool is_swiotlb_active(void)
#ifdef CONFIG_DEBUG_FS
-static int __init swiotlb_create_debugfs(void)
+static void swiotlb_create_debugfs(struct io_tlb_mem *mem, const char *name,
+ struct dentry *node)
{
- struct io_tlb_mem *mem = &io_tlb_default_mem;
-
- mem->debugfs = debugfs_create_dir("swiotlb", NULL);
+ mem->debugfs = debugfs_create_dir(name, node);
debugfs_create_ulong("io_tlb_nslabs", 0400, mem->debugfs, &mem->nslabs);
debugfs_create_ulong("io_tlb_used", 0400, mem->debugfs, &mem->used);
+}
+
+static int __init swiotlb_create_default_debugfs(void)
+{
+ swiotlb_create_debugfs(&io_tlb_default_mem, "swiotlb", NULL);
+
return 0;
}
-late_initcall(swiotlb_create_debugfs);
+late_initcall(swiotlb_create_default_debugfs);
#endif
+
+static int rmem_swiotlb_device_init(struct reserved_mem *rmem,
+ struct device *dev)
+{
+ struct io_tlb_mem *mem = rmem->priv;
+ int ret;
+
+ if (dev->dma_io_tlb_mem)
+ return -EBUSY;
+
+ if (!mem) {
+ mem = kzalloc(sizeof(*mem), GFP_KERNEL);
+ if (!mem)
+ return -ENOMEM;
+
+ if (!memremap(rmem->base, rmem->size, MEMREMAP_WB)) {
+ ret = -EINVAL;
+ goto cleanup;
+ }
+
+ ret = swiotlb_init_io_tlb_mem(mem, rmem->base, rmem->size);
+ if (ret)
+ goto cleanup;
+
+ rmem->priv = mem;
+ }
+
+#ifdef CONFIG_DEBUG_FS
+ swiotlb_create_debugfs(mem, dev_name(dev), io_tlb_default_mem.debugfs);
+#endif
+
+ dev->dma_io_tlb_mem = mem;
+
+ return 0;
+
+cleanup:
+ kfree(mem);
+
+ return ret;
+}
+
+static void rmem_swiotlb_device_release(struct reserved_mem *rmem,
+ struct device *dev)
+{
+ if (!dev)
+ return;
+
+#ifdef CONFIG_DEBUG_FS
+ debugfs_remove_recursive(dev->dma_io_tlb_mem->debugfs);
+#endif
+ dev->dma_io_tlb_mem = NULL;
+}
+
+static const struct reserved_mem_ops rmem_swiotlb_ops = {
+ .device_init = rmem_swiotlb_device_init,
+ .device_release = rmem_swiotlb_device_release,
+};
+
+static int __init rmem_swiotlb_setup(struct reserved_mem *rmem)
+{
+ unsigned long node = rmem->fdt_node;
+
+ if (of_get_flat_dt_prop(node, "reusable", NULL) ||
+ of_get_flat_dt_prop(node, "linux,cma-default", NULL) ||
+ of_get_flat_dt_prop(node, "linux,dma-default", NULL) ||
+ of_get_flat_dt_prop(node, "no-map", NULL))
+ return -EINVAL;
+
+ rmem->ops = &rmem_swiotlb_ops;
+ pr_info("Reserved memory: created device swiotlb memory pool at %pa, size %ld MiB\n",
+ &rmem->base, (unsigned long)rmem->size / SZ_1M);
+ return 0;
+}
+
+RESERVEDMEM_OF_DECLARE(dma, "restricted-dma-pool", rmem_swiotlb_setup);
--
2.29.2.729.g45daf8777d-goog
^ permalink raw reply related
* [RFC PATCH v3 1/6] swiotlb: Add io_tlb_mem struct
From: Claire Chang @ 2021-01-06 3:41 UTC (permalink / raw)
To: robh+dt, mpe, benh, paulus, joro, will, frowand.list, konrad.wilk,
boris.ostrovsky, jgross, sstabellini, hch, m.szyprowski,
robin.murphy
Cc: heikki.krogerus, peterz, grant.likely, mingo, drinkcat, saravanak,
xypron.glpk, rafael.j.wysocki, bgolaszewski, xen-devel, treding,
devicetree, Claire Chang, dan.j.williams, andriy.shevchenko,
gregkh, rdunlap, linux-kernel, tfiga, iommu, linuxppc-dev,
bauerman
In-Reply-To: <20210106034124.30560-1-tientzu@chromium.org>
Added a new struct, io_tlb_mem, as the IO TLB memory pool descriptor and
moved relevant global variables into that struct.
This will be useful later to allow for restricted DMA pool.
Signed-off-by: Claire Chang <tientzu@chromium.org>
---
arch/powerpc/platforms/pseries/svm.c | 4 +-
drivers/xen/swiotlb-xen.c | 4 +-
include/linux/swiotlb.h | 39 +++-
kernel/dma/swiotlb.c | 292 +++++++++++++--------------
4 files changed, 178 insertions(+), 161 deletions(-)
diff --git a/arch/powerpc/platforms/pseries/svm.c b/arch/powerpc/platforms/pseries/svm.c
index 7b739cc7a8a9..2b767f1ca5fd 100644
--- a/arch/powerpc/platforms/pseries/svm.c
+++ b/arch/powerpc/platforms/pseries/svm.c
@@ -55,8 +55,8 @@ void __init svm_swiotlb_init(void)
if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, false))
return;
- if (io_tlb_start)
- memblock_free_early(io_tlb_start,
+ if (io_tlb_default_mem.start)
+ memblock_free_early(io_tlb_default_mem.start,
PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
panic("SVM: Cannot allocate SWIOTLB buffer");
}
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 2b385c1b4a99..4d17dff7ffd2 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -192,8 +192,8 @@ int __ref xen_swiotlb_init(int verbose, bool early)
/*
* IO TLB memory already allocated. Just use it.
*/
- if (io_tlb_start != 0) {
- xen_io_tlb_start = phys_to_virt(io_tlb_start);
+ if (io_tlb_default_mem.start != 0) {
+ xen_io_tlb_start = phys_to_virt(io_tlb_default_mem.start);
goto end;
}
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index d9c9fc9ca5d2..dd8eb57cbb8f 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -70,11 +70,46 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t phys,
#ifdef CONFIG_SWIOTLB
extern enum swiotlb_force swiotlb_force;
-extern phys_addr_t io_tlb_start, io_tlb_end;
+
+/**
+ * struct io_tlb_mem - IO TLB Memory Pool Descriptor
+ *
+ * @start: The start address of the swiotlb memory pool. Used to do a quick
+ * range check to see if the memory was in fact allocated by this
+ * API.
+ * @end: The end address of the swiotlb memory pool. Used to do a quick
+ * range check to see if the memory was in fact allocated by this
+ * API.
+ * @nslabs: The number of IO TLB blocks (in groups of 64) between @start and
+ * @end. This is command line adjustable via setup_io_tlb_npages.
+ * @used: The number of used IO TLB block.
+ * @list: The free list describing the number of free entries available
+ * from each index.
+ * @index: The index to start searching in the next round.
+ * @orig_addr: The original address corresponding to a mapped entry for the
+ * sync operations.
+ * @lock: The lock to protect the above data structures in the map and
+ * unmap calls.
+ * @debugfs: The dentry to debugfs.
+ */
+struct io_tlb_mem {
+ phys_addr_t start;
+ phys_addr_t end;
+ unsigned long nslabs;
+ unsigned long used;
+ unsigned int *list;
+ unsigned int index;
+ phys_addr_t *orig_addr;
+ spinlock_t lock;
+ struct dentry *debugfs;
+};
+extern struct io_tlb_mem io_tlb_default_mem;
static inline bool is_swiotlb_buffer(phys_addr_t paddr)
{
- return paddr >= io_tlb_start && paddr < io_tlb_end;
+ struct io_tlb_mem *mem = &io_tlb_default_mem;
+
+ return paddr >= mem->start && paddr < mem->end;
}
void __init swiotlb_exit(void);
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 7c42df6e6100..e4368159f88a 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -61,33 +61,11 @@
* allocate a contiguous 1MB, we're probably in trouble anyway.
*/
#define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT)
+#define INVALID_PHYS_ADDR (~(phys_addr_t)0)
enum swiotlb_force swiotlb_force;
-/*
- * Used to do a quick range check in swiotlb_tbl_unmap_single and
- * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this
- * API.
- */
-phys_addr_t io_tlb_start, io_tlb_end;
-
-/*
- * The number of IO TLB blocks (in groups of 64) between io_tlb_start and
- * io_tlb_end. This is command line adjustable via setup_io_tlb_npages.
- */
-static unsigned long io_tlb_nslabs;
-
-/*
- * The number of used IO TLB block
- */
-static unsigned long io_tlb_used;
-
-/*
- * This is a free list describing the number of free entries available from
- * each index
- */
-static unsigned int *io_tlb_list;
-static unsigned int io_tlb_index;
+struct io_tlb_mem io_tlb_default_mem;
/*
* Max segment that we can provide which (if pages are contingous) will
@@ -95,27 +73,17 @@ static unsigned int io_tlb_index;
*/
static unsigned int max_segment;
-/*
- * We need to save away the original address corresponding to a mapped entry
- * for the sync operations.
- */
-#define INVALID_PHYS_ADDR (~(phys_addr_t)0)
-static phys_addr_t *io_tlb_orig_addr;
-
-/*
- * Protect the above data structures in the map and unmap calls
- */
-static DEFINE_SPINLOCK(io_tlb_lock);
-
static int late_alloc;
static int __init
setup_io_tlb_npages(char *str)
{
+ struct io_tlb_mem *mem = &io_tlb_default_mem;
+
if (isdigit(*str)) {
- io_tlb_nslabs = simple_strtoul(str, &str, 0);
+ mem->nslabs = simple_strtoul(str, &str, 0);
/* avoid tail segment of size < IO_TLB_SEGSIZE */
- io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
+ mem->nslabs = ALIGN(mem->nslabs, IO_TLB_SEGSIZE);
}
if (*str == ',')
++str;
@@ -123,7 +91,7 @@ setup_io_tlb_npages(char *str)
swiotlb_force = SWIOTLB_FORCE;
} else if (!strcmp(str, "noforce")) {
swiotlb_force = SWIOTLB_NO_FORCE;
- io_tlb_nslabs = 1;
+ mem->nslabs = 1;
}
return 0;
@@ -134,7 +102,7 @@ static bool no_iotlb_memory;
unsigned long swiotlb_nr_tbl(void)
{
- return unlikely(no_iotlb_memory) ? 0 : io_tlb_nslabs;
+ return unlikely(no_iotlb_memory) ? 0 : io_tlb_default_mem.nslabs;
}
EXPORT_SYMBOL_GPL(swiotlb_nr_tbl);
@@ -156,13 +124,14 @@ unsigned long swiotlb_size_or_default(void)
{
unsigned long size;
- size = io_tlb_nslabs << IO_TLB_SHIFT;
+ size = io_tlb_default_mem.nslabs << IO_TLB_SHIFT;
return size ? size : (IO_TLB_DEFAULT_SIZE);
}
void __init swiotlb_adjust_size(unsigned long new_size)
{
+ struct io_tlb_mem *mem = &io_tlb_default_mem;
unsigned long size;
/*
@@ -170,10 +139,10 @@ void __init swiotlb_adjust_size(unsigned long new_size)
* architectures such as those supporting memory encryption to
* adjust/expand SWIOTLB size for their use.
*/
- if (!io_tlb_nslabs) {
+ if (!mem->nslabs) {
size = ALIGN(new_size, 1 << IO_TLB_SHIFT);
- io_tlb_nslabs = size >> IO_TLB_SHIFT;
- io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
+ mem->nslabs = size >> IO_TLB_SHIFT;
+ mem->nslabs = ALIGN(mem->nslabs, IO_TLB_SEGSIZE);
pr_info("SWIOTLB bounce buffer size adjusted to %luMB", size >> 20);
}
@@ -181,14 +150,15 @@ void __init swiotlb_adjust_size(unsigned long new_size)
void swiotlb_print_info(void)
{
- unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT;
+ struct io_tlb_mem *mem = &io_tlb_default_mem;
+ unsigned long bytes = mem->nslabs << IO_TLB_SHIFT;
if (no_iotlb_memory) {
pr_warn("No low mem\n");
return;
}
- pr_info("mapped [mem %pa-%pa] (%luMB)\n", &io_tlb_start, &io_tlb_end,
+ pr_info("mapped [mem %pa-%pa] (%luMB)\n", &mem->start, &mem->end,
bytes >> 20);
}
@@ -200,57 +170,59 @@ void swiotlb_print_info(void)
*/
void __init swiotlb_update_mem_attributes(void)
{
+ struct io_tlb_mem *mem = &io_tlb_default_mem;
void *vaddr;
unsigned long bytes;
if (no_iotlb_memory || late_alloc)
return;
- vaddr = phys_to_virt(io_tlb_start);
- bytes = PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT);
+ vaddr = phys_to_virt(mem->start);
+ bytes = PAGE_ALIGN(mem->nslabs << IO_TLB_SHIFT);
set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
memset(vaddr, 0, bytes);
}
int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
{
+ struct io_tlb_mem *mem = &io_tlb_default_mem;
unsigned long i, bytes;
size_t alloc_size;
bytes = nslabs << IO_TLB_SHIFT;
- io_tlb_nslabs = nslabs;
- io_tlb_start = __pa(tlb);
- io_tlb_end = io_tlb_start + bytes;
+ mem->nslabs = nslabs;
+ mem->start = __pa(tlb);
+ mem->end = mem->start + bytes;
/*
* Allocate and initialize the free list array. This array is used
* to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
- * between io_tlb_start and io_tlb_end.
+ * between mem->start and mem->end.
*/
- alloc_size = PAGE_ALIGN(io_tlb_nslabs * sizeof(int));
- io_tlb_list = memblock_alloc(alloc_size, PAGE_SIZE);
- if (!io_tlb_list)
+ alloc_size = PAGE_ALIGN(mem->nslabs * sizeof(int));
+ mem->list = memblock_alloc(alloc_size, PAGE_SIZE);
+ if (!mem->list)
panic("%s: Failed to allocate %zu bytes align=0x%lx\n",
__func__, alloc_size, PAGE_SIZE);
- alloc_size = PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t));
- io_tlb_orig_addr = memblock_alloc(alloc_size, PAGE_SIZE);
- if (!io_tlb_orig_addr)
+ alloc_size = PAGE_ALIGN(mem->nslabs * sizeof(phys_addr_t));
+ mem->orig_addr = memblock_alloc(alloc_size, PAGE_SIZE);
+ if (!mem->orig_addr)
panic("%s: Failed to allocate %zu bytes align=0x%lx\n",
__func__, alloc_size, PAGE_SIZE);
- for (i = 0; i < io_tlb_nslabs; i++) {
- io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
- io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
+ for (i = 0; i < mem->nslabs; i++) {
+ mem->list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
+ mem->orig_addr[i] = INVALID_PHYS_ADDR;
}
- io_tlb_index = 0;
+ mem->index = 0;
no_iotlb_memory = false;
if (verbose)
swiotlb_print_info();
- swiotlb_set_max_segment(io_tlb_nslabs << IO_TLB_SHIFT);
+ swiotlb_set_max_segment(mem->nslabs << IO_TLB_SHIFT);
return 0;
}
@@ -261,26 +233,27 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
void __init
swiotlb_init(int verbose)
{
+ struct io_tlb_mem *mem = &io_tlb_default_mem;
size_t default_size = IO_TLB_DEFAULT_SIZE;
unsigned char *vstart;
unsigned long bytes;
- if (!io_tlb_nslabs) {
- io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
- io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
+ if (!mem->nslabs) {
+ mem->nslabs = (default_size >> IO_TLB_SHIFT);
+ mem->nslabs = ALIGN(mem->nslabs, IO_TLB_SEGSIZE);
}
- bytes = io_tlb_nslabs << IO_TLB_SHIFT;
+ bytes = mem->nslabs << IO_TLB_SHIFT;
/* Get IO TLB memory from the low pages */
vstart = memblock_alloc_low(PAGE_ALIGN(bytes), PAGE_SIZE);
- if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose))
+ if (vstart && !swiotlb_init_with_tbl(vstart, mem->nslabs, verbose))
return;
- if (io_tlb_start) {
- memblock_free_early(io_tlb_start,
- PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
- io_tlb_start = 0;
+ if (mem->start) {
+ memblock_free_early(mem->start,
+ PAGE_ALIGN(mem->nslabs << IO_TLB_SHIFT));
+ mem->start = 0;
}
pr_warn("Cannot allocate buffer");
no_iotlb_memory = true;
@@ -294,22 +267,23 @@ swiotlb_init(int verbose)
int
swiotlb_late_init_with_default_size(size_t default_size)
{
- unsigned long bytes, req_nslabs = io_tlb_nslabs;
+ struct io_tlb_mem *mem = &io_tlb_default_mem;
+ unsigned long bytes, req_nslabs = mem->nslabs;
unsigned char *vstart = NULL;
unsigned int order;
int rc = 0;
- if (!io_tlb_nslabs) {
- io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
- io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
+ if (!mem->nslabs) {
+ mem->nslabs = (default_size >> IO_TLB_SHIFT);
+ mem->nslabs = ALIGN(mem->nslabs, IO_TLB_SEGSIZE);
}
/*
* Get IO TLB memory from the low pages
*/
- order = get_order(io_tlb_nslabs << IO_TLB_SHIFT);
- io_tlb_nslabs = SLABS_PER_PAGE << order;
- bytes = io_tlb_nslabs << IO_TLB_SHIFT;
+ order = get_order(mem->nslabs << IO_TLB_SHIFT);
+ mem->nslabs = SLABS_PER_PAGE << order;
+ bytes = mem->nslabs << IO_TLB_SHIFT;
while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) {
vstart = (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN,
@@ -320,15 +294,15 @@ swiotlb_late_init_with_default_size(size_t default_size)
}
if (!vstart) {
- io_tlb_nslabs = req_nslabs;
+ mem->nslabs = req_nslabs;
return -ENOMEM;
}
if (order != get_order(bytes)) {
pr_warn("only able to allocate %ld MB\n",
(PAGE_SIZE << order) >> 20);
- io_tlb_nslabs = SLABS_PER_PAGE << order;
+ mem->nslabs = SLABS_PER_PAGE << order;
}
- rc = swiotlb_late_init_with_tbl(vstart, io_tlb_nslabs);
+ rc = swiotlb_late_init_with_tbl(vstart, mem->nslabs);
if (rc)
free_pages((unsigned long)vstart, order);
@@ -337,22 +311,25 @@ swiotlb_late_init_with_default_size(size_t default_size)
static void swiotlb_cleanup(void)
{
- io_tlb_end = 0;
- io_tlb_start = 0;
- io_tlb_nslabs = 0;
+ struct io_tlb_mem *mem = &io_tlb_default_mem;
+
+ mem->end = 0;
+ mem->start = 0;
+ mem->nslabs = 0;
max_segment = 0;
}
int
swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
{
+ struct io_tlb_mem *mem = &io_tlb_default_mem;
unsigned long i, bytes;
bytes = nslabs << IO_TLB_SHIFT;
- io_tlb_nslabs = nslabs;
- io_tlb_start = virt_to_phys(tlb);
- io_tlb_end = io_tlb_start + bytes;
+ mem->nslabs = nslabs;
+ mem->start = virt_to_phys(tlb);
+ mem->end = mem->start + bytes;
set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT);
memset(tlb, 0, bytes);
@@ -360,39 +337,39 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
/*
* Allocate and initialize the free list array. This array is used
* to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
- * between io_tlb_start and io_tlb_end.
+ * between mem->start and mem->end.
*/
- io_tlb_list = (unsigned int *)__get_free_pages(GFP_KERNEL,
- get_order(io_tlb_nslabs * sizeof(int)));
- if (!io_tlb_list)
+ mem->list = (unsigned int *)__get_free_pages(GFP_KERNEL,
+ get_order(mem->nslabs * sizeof(int)));
+ if (!mem->list)
goto cleanup3;
- io_tlb_orig_addr = (phys_addr_t *)
+ mem->orig_addr = (phys_addr_t *)
__get_free_pages(GFP_KERNEL,
- get_order(io_tlb_nslabs *
+ get_order(mem->nslabs *
sizeof(phys_addr_t)));
- if (!io_tlb_orig_addr)
+ if (!mem->orig_addr)
goto cleanup4;
- for (i = 0; i < io_tlb_nslabs; i++) {
- io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
- io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
+ for (i = 0; i < mem->nslabs; i++) {
+ mem->list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
+ mem->orig_addr[i] = INVALID_PHYS_ADDR;
}
- io_tlb_index = 0;
+ mem->index = 0;
no_iotlb_memory = false;
swiotlb_print_info();
late_alloc = 1;
- swiotlb_set_max_segment(io_tlb_nslabs << IO_TLB_SHIFT);
+ swiotlb_set_max_segment(mem->nslabs << IO_TLB_SHIFT);
return 0;
cleanup4:
- free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
- sizeof(int)));
- io_tlb_list = NULL;
+ free_pages((unsigned long)mem->list,
+ get_order(mem->nslabs * sizeof(int)));
+ mem->list = NULL;
cleanup3:
swiotlb_cleanup();
return -ENOMEM;
@@ -400,23 +377,25 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
void __init swiotlb_exit(void)
{
- if (!io_tlb_orig_addr)
+ struct io_tlb_mem *mem = &io_tlb_default_mem;
+
+ if (!mem->orig_addr)
return;
if (late_alloc) {
- free_pages((unsigned long)io_tlb_orig_addr,
- get_order(io_tlb_nslabs * sizeof(phys_addr_t)));
- free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
- sizeof(int)));
- free_pages((unsigned long)phys_to_virt(io_tlb_start),
- get_order(io_tlb_nslabs << IO_TLB_SHIFT));
+ free_pages((unsigned long)mem->orig_addr,
+ get_order(mem->nslabs * sizeof(phys_addr_t)));
+ free_pages((unsigned long)mem->list,
+ get_order(mem->nslabs * sizeof(int)));
+ free_pages((unsigned long)phys_to_virt(mem->start),
+ get_order(mem->nslabs << IO_TLB_SHIFT));
} else {
- memblock_free_late(__pa(io_tlb_orig_addr),
- PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)));
- memblock_free_late(__pa(io_tlb_list),
- PAGE_ALIGN(io_tlb_nslabs * sizeof(int)));
- memblock_free_late(io_tlb_start,
- PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
+ memblock_free_late(__pa(mem->orig_addr),
+ PAGE_ALIGN(mem->nslabs * sizeof(phys_addr_t)));
+ memblock_free_late(__pa(mem->list),
+ PAGE_ALIGN(mem->nslabs * sizeof(int)));
+ memblock_free_late(mem->start,
+ PAGE_ALIGN(mem->nslabs << IO_TLB_SHIFT));
}
swiotlb_cleanup();
}
@@ -465,7 +444,8 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t orig_addr,
size_t mapping_size, size_t alloc_size,
enum dma_data_direction dir, unsigned long attrs)
{
- dma_addr_t tbl_dma_addr = phys_to_dma_unencrypted(hwdev, io_tlb_start);
+ struct io_tlb_mem *mem = &io_tlb_default_mem;
+ dma_addr_t tbl_dma_addr = phys_to_dma_unencrypted(hwdev, mem->start);
unsigned long flags;
phys_addr_t tlb_addr;
unsigned int nslots, stride, index, wrap;
@@ -516,13 +496,13 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t orig_addr,
* Find suitable number of IO TLB entries size that will fit this
* request and allocate a buffer from that IO TLB pool.
*/
- spin_lock_irqsave(&io_tlb_lock, flags);
+ spin_lock_irqsave(&mem->lock, flags);
- if (unlikely(nslots > io_tlb_nslabs - io_tlb_used))
+ if (unlikely(nslots > mem->nslabs - mem->used))
goto not_found;
- index = ALIGN(io_tlb_index, stride);
- if (index >= io_tlb_nslabs)
+ index = ALIGN(mem->index, stride);
+ if (index >= mem->nslabs)
index = 0;
wrap = index;
@@ -530,7 +510,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t orig_addr,
while (iommu_is_span_boundary(index, nslots, offset_slots,
max_slots)) {
index += stride;
- if (index >= io_tlb_nslabs)
+ if (index >= mem->nslabs)
index = 0;
if (index == wrap)
goto not_found;
@@ -541,40 +521,40 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t orig_addr,
* contiguous buffers, we allocate the buffers from that slot
* and mark the entries as '0' indicating unavailable.
*/
- if (io_tlb_list[index] >= nslots) {
+ if (mem->list[index] >= nslots) {
int count = 0;
for (i = index; i < (int) (index + nslots); i++)
- io_tlb_list[i] = 0;
- for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1) && io_tlb_list[i]; i--)
- io_tlb_list[i] = ++count;
- tlb_addr = io_tlb_start + (index << IO_TLB_SHIFT);
+ mem->list[i] = 0;
+ for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1) && mem->list[i]; i--)
+ mem->list[i] = ++count;
+ tlb_addr = mem->start + (index << IO_TLB_SHIFT);
/*
* Update the indices to avoid searching in the next
* round.
*/
- io_tlb_index = ((index + nslots) < io_tlb_nslabs
- ? (index + nslots) : 0);
+ mem->index = ((index + nslots) < mem->nslabs
+ ? (index + nslots) : 0);
goto found;
}
index += stride;
- if (index >= io_tlb_nslabs)
+ if (index >= mem->nslabs)
index = 0;
} while (index != wrap);
not_found:
- tmp_io_tlb_used = io_tlb_used;
+ tmp_io_tlb_used = mem->used;
- spin_unlock_irqrestore(&io_tlb_lock, flags);
+ spin_unlock_irqrestore(&mem->lock, flags);
if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit())
dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n",
- alloc_size, io_tlb_nslabs, tmp_io_tlb_used);
+ alloc_size, mem->nslabs, tmp_io_tlb_used);
return (phys_addr_t)DMA_MAPPING_ERROR;
found:
- io_tlb_used += nslots;
- spin_unlock_irqrestore(&io_tlb_lock, flags);
+ mem->used += nslots;
+ spin_unlock_irqrestore(&mem->lock, flags);
/*
* Save away the mapping from the original address to the DMA address.
@@ -582,7 +562,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t orig_addr,
* needed.
*/
for (i = 0; i < nslots; i++)
- io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT);
+ mem->orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT);
if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_TO_DEVICE);
@@ -597,10 +577,11 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
size_t mapping_size, size_t alloc_size,
enum dma_data_direction dir, unsigned long attrs)
{
+ struct io_tlb_mem *mem = &io_tlb_default_mem;
unsigned long flags;
int i, count, nslots = ALIGN(alloc_size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
- int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT;
- phys_addr_t orig_addr = io_tlb_orig_addr[index];
+ int index = (tlb_addr - mem->start) >> IO_TLB_SHIFT;
+ phys_addr_t orig_addr = mem->orig_addr[index];
/*
* First, sync the memory before unmapping the entry
@@ -616,36 +597,37 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
* While returning the entries to the free list, we merge the entries
* with slots below and above the pool being returned.
*/
- spin_lock_irqsave(&io_tlb_lock, flags);
+ spin_lock_irqsave(&mem->lock, flags);
{
count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ?
- io_tlb_list[index + nslots] : 0);
+ mem->list[index + nslots] : 0);
/*
* Step 1: return the slots to the free list, merging the
* slots with superceeding slots
*/
for (i = index + nslots - 1; i >= index; i--) {
- io_tlb_list[i] = ++count;
- io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
+ mem->list[i] = ++count;
+ mem->orig_addr[i] = INVALID_PHYS_ADDR;
}
/*
* Step 2: merge the returned slots with the preceding slots,
* if available (non zero)
*/
- for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--)
- io_tlb_list[i] = ++count;
+ for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && mem->list[i]; i--)
+ mem->list[i] = ++count;
- io_tlb_used -= nslots;
+ mem->used -= nslots;
}
- spin_unlock_irqrestore(&io_tlb_lock, flags);
+ spin_unlock_irqrestore(&mem->lock, flags);
}
void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr,
size_t size, enum dma_data_direction dir,
enum dma_sync_target target)
{
- int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT;
- phys_addr_t orig_addr = io_tlb_orig_addr[index];
+ struct io_tlb_mem *mem = &io_tlb_default_mem;
+ int index = (tlb_addr - mem->start) >> IO_TLB_SHIFT;
+ phys_addr_t orig_addr = mem->orig_addr[index];
if (orig_addr == INVALID_PHYS_ADDR)
return;
@@ -713,21 +695,21 @@ size_t swiotlb_max_mapping_size(struct device *dev)
bool is_swiotlb_active(void)
{
/*
- * When SWIOTLB is initialized, even if io_tlb_start points to physical
- * address zero, io_tlb_end surely doesn't.
+ * When SWIOTLB is initialized, even if mem->start points to physical
+ * address zero, mem->end surely doesn't.
*/
- return io_tlb_end != 0;
+ return io_tlb_default_mem.end != 0;
}
#ifdef CONFIG_DEBUG_FS
static int __init swiotlb_create_debugfs(void)
{
- struct dentry *root;
+ struct io_tlb_mem *mem = &io_tlb_default_mem;
- root = debugfs_create_dir("swiotlb", NULL);
- debugfs_create_ulong("io_tlb_nslabs", 0400, root, &io_tlb_nslabs);
- debugfs_create_ulong("io_tlb_used", 0400, root, &io_tlb_used);
+ mem->debugfs = debugfs_create_dir("swiotlb", NULL);
+ debugfs_create_ulong("io_tlb_nslabs", 0400, mem->debugfs, &mem->nslabs);
+ debugfs_create_ulong("io_tlb_used", 0400, mem->debugfs, &mem->used);
return 0;
}
--
2.29.2.729.g45daf8777d-goog
To make this change as mechanical as possible, I didn't fix any
checkpatch.pl ERROR/WARNING.
^ permalink raw reply related
* [RFC PATCH v3 0/6] Restricted DMA
From: Claire Chang @ 2021-01-06 3:41 UTC (permalink / raw)
To: robh+dt, mpe, benh, paulus, joro, will, frowand.list, konrad.wilk,
boris.ostrovsky, jgross, sstabellini, hch, m.szyprowski,
robin.murphy
Cc: heikki.krogerus, peterz, grant.likely, mingo, drinkcat, saravanak,
xypron.glpk, rafael.j.wysocki, bgolaszewski, xen-devel, treding,
devicetree, Claire Chang, dan.j.williams, andriy.shevchenko,
gregkh, rdunlap, linux-kernel, tfiga, iommu, linuxppc-dev,
bauerman
This series implements mitigations for lack of DMA access control on
systems without an IOMMU, which could result in the DMA accessing the
system memory at unexpected times and/or unexpected addresses, possibly
leading to data leakage or corruption.
For example, we plan to use the PCI-e bus for Wi-Fi and that PCI-e bus is
not behind an IOMMU. As PCI-e, by design, gives the device full access to
system memory, a vulnerability in the Wi-Fi firmware could easily escalate
to a full system exploit (remote wifi exploits: [1a], [1b] that shows a
full chain of exploits; [2], [3]).
To mitigate the security concerns, we introduce restricted DMA. Restricted
DMA utilizes the existing swiotlb to bounce streaming DMA in and out of a
specially allocated region and does memory allocation from the same region.
The feature on its own provides a basic level of protection against the DMA
overwriting buffer contents at unexpected times. However, to protect
against general data leakage and system memory corruption, the system needs
to provide a way to restrict the DMA to a predefined memory region (this is
usually done at firmware level, e.g. in ATF on some ARM platforms).
[1a] https://googleprojectzero.blogspot.com/2017/04/over-air-exploiting-broadcoms-wi-fi_4.html
[1b] https://googleprojectzero.blogspot.com/2017/04/over-air-exploiting-broadcoms-wi-fi_11.html
[2] https://blade.tencent.com/en/advisories/qualpwn/
[3] https://www.bleepingcomputer.com/news/security/vulnerabilities-found-in-highly-popular-firmware-for-wifi-chips/
Claire Chang (6):
swiotlb: Add io_tlb_mem struct
swiotlb: Add restricted DMA pool
swiotlb: Use restricted DMA pool if available
swiotlb: Add restricted DMA alloc/free support.
dt-bindings: of: Add restricted DMA pool
of: Add plumbing for restricted DMA pool
.../reserved-memory/reserved-memory.txt | 24 +
arch/powerpc/platforms/pseries/svm.c | 4 +-
drivers/iommu/dma-iommu.c | 12 +-
drivers/of/address.c | 21 +
drivers/of/device.c | 4 +
drivers/of/of_private.h | 5 +
drivers/xen/swiotlb-xen.c | 4 +-
include/linux/device.h | 4 +
include/linux/swiotlb.h | 61 +-
kernel/dma/Kconfig | 1 +
kernel/dma/direct.c | 20 +-
kernel/dma/direct.h | 10 +-
kernel/dma/swiotlb.c | 576 +++++++++++-------
13 files changed, 514 insertions(+), 232 deletions(-)
--
2.29.2.729.g45daf8777d-goog
v3:
Using only one reserved memory region for both streaming DMA and memory
allocation.
v2:
Building on top of swiotlb.
https://lore.kernel.org/patchwork/cover/1280705/
v1:
Using dma_map_ops.
https://lore.kernel.org/patchwork/cover/1271660/
^ permalink raw reply
* Re: [PATCH v2] net: ethernet: fs_enet: Add missing MODULE_LICENSE
From: David Miller @ 2021-01-06 0:57 UTC (permalink / raw)
To: mpe; +Cc: andrew, netdev, linux-kernel, linuxppc-dev, kuba
In-Reply-To: <20210105091515.87509-1-mpe@ellerman.id.au>
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Tue, 5 Jan 2021 20:15:15 +1100
> Since commit 1d6cd3929360 ("modpost: turn missing MODULE_LICENSE()
> into error") the ppc32_allmodconfig build fails with:
>
> ERROR: modpost: missing MODULE_LICENSE() in drivers/net/ethernet/freescale/fs_enet/mii-fec.o
> ERROR: modpost: missing MODULE_LICENSE() in drivers/net/ethernet/freescale/fs_enet/mii-bitbang.o
>
> Add the missing MODULE_LICENSEs to fix the build. Both files include a
> copyright header indicating they are GPL v2.
>
> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Applied.
^ permalink raw reply
* Re: [RFC please help] membarrier: Rewrite sync_core_before_usermode()
From: Will Deacon @ 2021-01-05 22:41 UTC (permalink / raw)
To: Andy Lutomirski
Cc: Arnd Bergmann, X86 ML, LKML, Nicholas Piggin, Mathieu Desnoyers,
Andy Lutomirski, Catalin Marinas, Paul Mackerras, stable,
linuxppc-dev, linux-arm-kernel
In-Reply-To: <7BFAB97C-1949-46A3-A1E2-DFE108DC7D5E@amacapital.net>
On Tue, Jan 05, 2021 at 08:20:51AM -0800, Andy Lutomirski wrote:
> > On Jan 5, 2021, at 5:26 AM, Will Deacon <will@kernel.org> wrote:
> > Sorry for the slow reply, I was socially distanced from my keyboard.
> >
> >> On Mon, Dec 28, 2020 at 04:36:11PM -0800, Andy Lutomirski wrote:
> >> On Mon, Dec 28, 2020 at 4:11 PM Nicholas Piggin <npiggin@gmail.com> wrote:
> >>>> +static inline void membarrier_sync_core_before_usermode(void)
> >>>> +{
> >>>> + /*
> >>>> + * XXX: I know basically nothing about powerpc cache management.
> >>>> + * Is this correct?
> >>>> + */
> >>>> + isync();
> >>>
> >>> This is not about memory ordering or cache management, it's about
> >>> pipeline management. Powerpc's return to user mode serializes the
> >>> CPU (aka the hardware thread, _not_ the core; another wrongness of
> >>> the name, but AFAIKS the HW thread is what is required for
> >>> membarrier). So this is wrong, powerpc needs nothing here.
> >>
> >> Fair enough. I'm happy to defer to you on the powerpc details. In
> >> any case, this just illustrates that we need feedback from a person
> >> who knows more about ARM64 than I do.
> >
> > I think we're in a very similar boat to PowerPC, fwiw. Roughly speaking:
> >
> > 1. SYNC_CORE does _not_ perform any cache management; that is the
> > responsibility of userspace, either by executing the relevant
> > maintenance instructions (arm64) or a system call (arm32). Crucially,
> > the hardware will ensure that this cache maintenance is broadcast
> > to all other CPUs.
>
> Is this guaranteed regardless of any aliases? That is, if I flush from
> one CPU at one VA and then execute the same physical address from another
> CPU at a different VA, does this still work?
The data side will be fine, but the instruction side can have virtual
aliases. We handle this in flush_ptrace_access() by blowing away the whole
I-cache if we're not physically-indexed, but userspace would be in trouble
if it wanted to handle this situation alone.
> > 2. Even with all the cache maintenance in the world, a CPU could have
> > speculatively fetched stale instructions into its "pipeline" ahead of
> > time, and these are _not_ flushed by the broadcast maintenance instructions
> > in (1). SYNC_CORE provides a means for userspace to discard these stale
> > instructions.
> >
> > 3. The context synchronization event on exception entry/exit is
> > sufficient here. The Arm ARM isn't very good at describing what it
> > does, because it's in denial about the existence of a pipeline, but
> > it does have snippets such as:
> >
> > (s/PE/CPU/)
> > | For all types of memory:
> > | The PE might have fetched the instructions from memory at any time
> > | since the last Context synchronization event on that PE.
> >
> > Interestingly, the architecture recently added a control bit to remove
> > this synchronisation from exception return, so if we set that then we'd
> > have a problem with SYNC_CORE and adding an ISB would be necessary (and
> > we could probable then make kernel->kernel returns cheaper, but I
> > suspect we're relying on this implicit synchronisation in other places
> > too).
> >
>
> Is ISB just a context synchronization event or does it do more?
That's a good question. Barrier instructions on ARM do tend to get
overloaded with extra behaviours over time, so it could certainly end up
doing the context synchronization event + extra stuff in future. Right now,
the only thing that springs to mind is the spectre-v1 heavy mitigation
barrier of 'DSB; ISB' which, for example, probably doesn't work for 'DSB;
ERET' because the ERET can be treated like a conditional (!) branch.
> On x86, it’s very hard to tell that MFENCE does any more than LOCK, but
> it’s much slower. And we have LFENCE, which, as documented, doesn’t
> appear to have any semantics at all. (Or at least it didn’t before
> Spectre.)
I tend to think of ISB as a front-end barrier relating to instruction fetch
whereas DMB, acquire/release and DSB are all back-end barriers relating to
memory accesses. You _can_ use ISB in conjunction with control dependencies
to order a pair of loads (like you can with ISYNC on Power), but it's a
really expensive way to do it.
> > Are you seeing a problem in practice, or did this come up while trying to
> > decipher the semantics of SYNC_CORE?
>
> It came up while trying to understand the code and work through various
> bugs in it. The code was written using something approximating x86
> terminology, but it was definitely wrong on x86 (at least if you believe
> the SDM, and I haven’t convinced any architects to say otherwise).
Ok, thanks.
Will
^ permalink raw reply
* Power9 NV linux-next random process hang
From: Qian Cai @ 2021-01-05 22:36 UTC (permalink / raw)
To: linuxppc-dev; +Cc: linux-kernel
.config: https://cailca.coding.net/public/linux/mm/git/files/master/powerpc.config
Today's linux-next starts to generate random process hang quite easily.
Yesterday's build seems work fine. Sometimes, the process stack seems corrupt
while the process is running 100% CPU with gdb shows it just entered a
subroutine that really can't see why it hangs.
[ 6732.309621][T11627] task:ranbug state:R running task stack:24176 pid: 2893 ppid: 2867 flags:0x00040000
[ 6732.309779][T11627] Call Trace:
[ 6732.309826][T11627] [c00000006166fa30] [c00000006166fb60] 0xc00000006166fb60 (unreliable)
Also, running LTP syscalls ended up hanging with lots of zombie process. Any idea?
root 2023 0.0 0.0 0 0 ? Zs 14:10 0:00 [login] <defunct>
root 52052 0.0 0.0 0 0 pts/0 Z 15:03 0:00 [recv01] <defunct>
root 52054 0.0 0.0 0 0 pts/0 Z 15:03 0:00 [recvfrom01] <defunct>
root 52056 0.0 0.0 0 0 pts/0 Z 15:03 0:00 [recvmsg01] <defunct>
root 52155 0.0 0.0 0 0 pts/0 Z 15:03 0:00 [rt_sigtimedwait] <defunct>
root 52305 0.0 0.0 0 0 pts/0 Z 15:03 0:00 [semctl01] <defunct>
root 52362 0.0 0.0 0 0 pts/0 Z 15:03 0:00 [send01] <defunct>
root 52386 0.0 0.0 0 0 pts/0 Z 15:03 0:00 [sendfile04] <defunct>
root 52387 0.0 0.0 0 0 pts/0 Z 15:03 0:00 [sendfile04] <defunct>
root 52388 0.0 0.0 0 0 pts/0 Z 15:03 0:00 [sendfile04] <defunct>
root 52389 0.0 0.0 0 0 pts/0 Z 15:03 0:00 [sendfile04] <defunct>
root 52390 0.0 0.0 0 0 pts/0 Z 15:03 0:00 [sendfile04] <defunct>
root 52392 0.0 0.0 0 0 pts/0 Z 15:03 0:00 [sendfile04_64] <defunct>
root 52393 0.0 0.0 0 0 pts/0 Z 15:03 0:00 [sendfile04_64] <defunct>
root 52394 0.0 0.0 0 0 pts/0 Z 15:03 0:00 [sendfile04_64] <defunct>
root 52395 0.0 0.0 0 0 pts/0 Z 15:03 0:00 [sendfile04_64] <defunct>
root 52396 0.0 0.0 0 0 pts/0 Z 15:03 0:00 [sendfile04_64] <defunct>
root 52398 0.0 0.0 0 0 pts/0 Z 15:03 0:00 [sendfile05] <defunct>
root 52400 0.0 0.0 0 0 pts/0 Z 15:03 0:00 [sendfile05_64] <defunct>
root 52415 0.0 0.0 0 0 pts/0 Z 15:04 0:00 [sendmsg01] <defunct>
root 53470 0.0 0.0 0 0 pts/0 Z 15:04 0:00 [sendto01] <defunct>
root 53763 0.0 0.0 0 0 pts/0 Z 15:06 0:00 [setrlimit01] <defunct>
root 53764 0.0 0.0 0 0 pts/0 Z 15:06 0:00 [setrlimit01] <defunct>
root 53765 0.0 0.0 0 0 pts/0 Z 15:06 0:00 [setrlimit01] <defunct>
root 53766 0.0 0.0 0 0 pts/0 Z 15:06 0:00 [setrlimit01] <defunct>
root 53767 0.0 0.0 0 0 pts/0 Z 15:06 0:00 [setrlimit01] <defunct>
root 53768 0.0 0.0 0 0 pts/0 Z 15:06 0:00 [setrlimit01] <defunct>
root 53769 0.0 0.0 0 0 pts/0 Z 15:06 0:00 [setrlimit01] <defunct>
root 53770 0.0 0.0 0 0 pts/0 Z 15:06 0:00 [setrlimit01] <defunct>
root 53771 0.0 0.0 0 0 pts/0 Z 15:06 0:00 [setrlimit01] <defunct>
root 53772 0.0 0.0 0 0 pts/0 Z 15:06 0:00 [setrlimit01] <defunct>
root 53773 0.0 0.0 0 0 pts/0 Z 15:06 0:00 [setrlimit01] <defunct>
root 53774 0.0 0.0 0 0 pts/0 Z 15:06 0:00 [setrlimit01] <defunct>
root 53775 0.0 0.0 0 0 pts/0 Z 15:06 0:00 [setrlimit01] <defunct>
root 53776 0.0 0.0 0 0 pts/0 Z 15:06 0:00 [setrlimit01] <defunct>
root 53777 0.0 0.0 0 0 pts/0 Z 15:06 0:00 [setrlimit01] <defunct>
root 53778 0.0 0.0 0 0 pts/0 Z 15:06 0:00 [setrlimit01] <defunct>
root 53779 0.0 0.0 0 0 pts/0 Z 15:06 0:00 [setrlimit01] <defunct>
root 53780 0.0 0.0 0 0 pts/0 Z 15:06 0:00 [setrlimit01] <defunct>
root 53782 0.0 0.0 0 0 pts/0 Z 15:06 0:00 [setrlimit01] <defunct>
nobody 54290 0.0 0.0 0 0 pts/0 Z 15:07 0:00 [sysctl03] <defunct>
root 56813 0.0 0.0 0 0 pts/0 Z 16:09 0:00 [waitpid03] <defunct>
root 56814 0.0 0.0 0 0 pts/0 Z 16:09 0:00 [waitpid03] <defunct>
root 56815 0.0 0.0 0 0 pts/0 Z 16:09 0:00 [waitpid03] <defunct>
root 56816 0.0 0.0 0 0 pts/0 Z 16:09 0:00 [waitpid03] <defunct>
root 56817 0.0 0.0 0 0 pts/0 Z 16:09 0:00 [waitpid03] <defunct>
root 56818 0.0 0.0 0 0 pts/0 Z 16:09 0:00 [waitpid03] <defunct>
root 56819 0.0 0.0 0 0 pts/0 Z 16:09 0:00 [waitpid03] <defunct>
root 56820 0.0 0.0 0 0 pts/0 Z 16:09 0:00 [waitpid03] <defunct>
root 56821 0.0 0.0 0 0 pts/0 Z 16:09 0:00 [waitpid03] <defunct>
root 56822 0.0 0.0 0 0 pts/0 Z 16:09 0:00 [waitpid03] <defunct>
root 56823 0.0 0.0 0 0 pts/0 Z 16:09 0:00 [waitpid03] <defunct>
root 56825 0.0 0.0 0 0 pts/0 Z 16:09 0:00 [waitpid03] <defunct>
root 56826 0.0 0.0 0 0 pts/0 Z 16:09 0:00 [waitpid03] <defunct>
root 56827 0.0 0.0 0 0 pts/0 Z 16:09 0:00 [waitpid03] <defunct>
root 56828 0.0 0.0 0 0 pts/0 Z 16:09 0:00 [waitpid03] <defunct>
root 56829 0.0 0.0 0 0 pts/0 Z 16:09 0:00 [waitpid03] <defunct>
root 56830 0.0 0.0 0 0 pts/0 Z 16:09 0:00 [waitpid03] <defunct>
root 56831 0.0 0.0 0 0 pts/0 Z 16:09 0:00 [waitpid03] <defunct>
root 56832 0.0 0.0 0 0 pts/0 Z 16:09 0:00 [waitpid03] <defunct>
root 56833 0.0 0.0 0 0 pts/0 Z 16:09 0:00 [waitpid03] <defunct>
root 56834 0.0 0.0 0 0 pts/0 Z 16:09 0:00 [waitpid03] <defunct>
root 56835 0.0 0.0 0 0 pts/0 Z 16:09 0:00 [waitpid03] <defunct>
root 56836 0.0 0.0 0 0 pts/0 Z 16:09 0:00 [waitpid03] <defunct>
root 56838 0.0 0.0 0 0 pts/0 Z 16:09 0:00 [waitpid04] <defunct>
sshd 58675 0.0 0.0 0 0 ? Z 17:21 0:00 [sshd] <defunct>
^ permalink raw reply
* Re: [PATCH v2 -next] misc: ocxl: use DEFINE_MUTEX() for mutex lock
From: Andrew Donnellan @ 2021-01-05 21:53 UTC (permalink / raw)
To: Zheng Yongjun, linuxppc-dev, linux-kernel; +Cc: fbarrat, gregkh, arnd
In-Reply-To: <20201224132446.31286-1-zhengyongjun3@huawei.com>
On 25/12/20 12:24 am, Zheng Yongjun wrote:
> mutex lock can be initialized automatically with DEFINE_MUTEX()
> rather than explicitly calling mutex_init().
>
> Signed-off-by: Zheng Yongjun <zhengyongjun3@huawei.com>
Acked-by: Andrew Donnellan <ajd@linux.ibm.com>
--
Andrew Donnellan OzLabs, ADL Canberra
ajd@linux.ibm.com IBM Australia Limited
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox