* [PATCH v3 1/2] habanalabs: Replace dma-fence mechanism with completions
@ 2020-08-03 12:07 Oded Gabbay
2020-08-03 12:07 ` [PATCH v3 2/2] habanalabs: add information about PCIe controller Oded Gabbay
0 siblings, 1 reply; 2+ messages in thread
From: Oded Gabbay @ 2020-08-03 12:07 UTC (permalink / raw)
To: linux-kernel, SW_Drivers; +Cc: Ofir Bitton, Greg Kroah-Hartman, Daniel Vetter
From: Ofir Bitton <obitton@habana.ai>
habanalabs driver uses dma-fence mechanism for synchronization.
dma-fence mechanism was designed solely for GPUs, hence we purpose
a simpler mechanism based on completions to replace current
dma-fence objects.
Signed-off-by: Ofir Bitton <obitton@habana.ai>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
Changes in v3:
- mark two internal functions as static
.../habanalabs/common/command_submission.c | 95 +++++++++----------
drivers/misc/habanalabs/common/context.c | 13 +--
drivers/misc/habanalabs/common/habanalabs.h | 30 ++++--
drivers/misc/habanalabs/common/hw_queue.c | 2 +-
4 files changed, 77 insertions(+), 63 deletions(-)
diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
index b9840e368eb5..2b40aa85bec9 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -38,26 +38,10 @@ void hl_sob_reset_error(struct kref *ref)
hw_sob->q_idx, hw_sob->sob_id);
}
-static const char *hl_fence_get_driver_name(struct dma_fence *fence)
-{
- return "HabanaLabs";
-}
-
-static const char *hl_fence_get_timeline_name(struct dma_fence *fence)
-{
- struct hl_cs_compl *hl_cs_compl =
- container_of(fence, struct hl_cs_compl, base_fence);
-
- return dev_name(hl_cs_compl->hdev->dev);
-}
-
-static bool hl_fence_enable_signaling(struct dma_fence *fence)
-{
- return true;
-}
-
-static void hl_fence_release(struct dma_fence *fence)
+static void hl_fence_release(struct kref *kref)
{
+ struct hl_fence *fence =
+ container_of(kref, struct hl_fence, refcount);
struct hl_cs_compl *hl_cs_cmpl =
container_of(fence, struct hl_cs_compl, base_fence);
struct hl_device *hdev = hl_cs_cmpl->hdev;
@@ -99,15 +83,27 @@ static void hl_fence_release(struct dma_fence *fence)
}
free:
- kfree_rcu(hl_cs_cmpl, base_fence.rcu);
+ kfree(hl_cs_cmpl);
}
-static const struct dma_fence_ops hl_fence_ops = {
- .get_driver_name = hl_fence_get_driver_name,
- .get_timeline_name = hl_fence_get_timeline_name,
- .enable_signaling = hl_fence_enable_signaling,
- .release = hl_fence_release
-};
+void hl_fence_put(struct hl_fence *fence)
+{
+ if (fence)
+ kref_put(&fence->refcount, hl_fence_release);
+}
+
+void hl_fence_get(struct hl_fence *fence)
+{
+ if (fence)
+ kref_get(&fence->refcount);
+}
+
+static void hl_fence_init(struct hl_fence *fence)
+{
+ kref_init(&fence->refcount);
+ fence->error = 0;
+ init_completion(&fence->completion);
+}
static void cs_get(struct hl_cs *cs)
{
@@ -336,7 +332,7 @@ static void cs_do_release(struct kref *ref)
* In case the wait for signal CS was submitted, the put occurs
* in init_signal_wait_cs() right before hanging on the PQ.
*/
- dma_fence_put(cs->signal_fence);
+ hl_fence_put(cs->signal_fence);
}
/*
@@ -348,19 +344,18 @@ static void cs_do_release(struct kref *ref)
hl_ctx_put(cs->ctx);
/* We need to mark an error for not submitted because in that case
- * the dma fence release flow is different. Mainly, we don't need
+ * the hl fence release flow is different. Mainly, we don't need
* to handle hw_sob for signal/wait
*/
if (cs->timedout)
- dma_fence_set_error(cs->fence, -ETIMEDOUT);
+ cs->fence->error = -ETIMEDOUT;
else if (cs->aborted)
- dma_fence_set_error(cs->fence, -EIO);
+ cs->fence->error = -EIO;
else if (!cs->submitted)
- dma_fence_set_error(cs->fence, -EBUSY);
-
- dma_fence_signal(cs->fence);
- dma_fence_put(cs->fence);
+ cs->fence->error = -EBUSY;
+ complete_all(&cs->fence->completion);
+ hl_fence_put(cs->fence);
cs_counters_aggregate(hdev, cs->ctx);
kfree(cs->jobs_in_queue_cnt);
@@ -401,7 +396,7 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
enum hl_cs_type cs_type, struct hl_cs **cs_new)
{
struct hl_cs_compl *cs_cmpl;
- struct dma_fence *other = NULL;
+ struct hl_fence *other = NULL;
struct hl_cs *cs;
int rc;
@@ -434,7 +429,8 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
cs_cmpl->cs_seq = ctx->cs_sequence;
other = ctx->cs_pending[cs_cmpl->cs_seq &
(hdev->asic_prop.max_pending_cs - 1)];
- if ((other) && (!dma_fence_is_signaled(other))) {
+
+ if (other && !completion_done(&other->completion)) {
dev_dbg(hdev->dev,
"Rejecting CS because of too many in-flights CS\n");
rc = -EAGAIN;
@@ -448,8 +444,8 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
goto free_fence;
}
- dma_fence_init(&cs_cmpl->base_fence, &hl_fence_ops, &cs_cmpl->lock,
- ctx->asid, ctx->cs_sequence);
+ /* init hl_fence */
+ hl_fence_init(&cs_cmpl->base_fence);
cs->sequence = cs_cmpl->cs_seq;
@@ -458,9 +454,9 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
&cs_cmpl->base_fence;
ctx->cs_sequence++;
- dma_fence_get(&cs_cmpl->base_fence);
+ hl_fence_get(&cs_cmpl->base_fence);
- dma_fence_put(other);
+ hl_fence_put(other);
spin_unlock(&ctx->cs_lock);
@@ -773,7 +769,7 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
struct hl_ctx *ctx = hpriv->ctx;
struct hl_cs_chunk *cs_chunk_array, *chunk;
struct hw_queue_properties *hw_queue_prop;
- struct dma_fence *sig_fence = NULL;
+ struct hl_fence *sig_fence = NULL;
struct hl_cs_job *job;
struct hl_cs *cs;
struct hl_cb *cb;
@@ -875,14 +871,14 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
dev_err(hdev->dev,
"CS seq 0x%llx is not of a signal CS\n",
signal_seq);
- dma_fence_put(sig_fence);
+ hl_fence_put(sig_fence);
rc = -EINVAL;
goto free_signal_seq_array;
}
- if (dma_fence_is_signaled(sig_fence)) {
+ if (completion_done(&sig_fence->completion)) {
/* signal CS already finished */
- dma_fence_put(sig_fence);
+ hl_fence_put(sig_fence);
rc = 0;
goto free_signal_seq_array;
}
@@ -894,7 +890,7 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
rc = allocate_cs(hdev, ctx, cs_type, &cs);
if (rc) {
if (cs_type == CS_TYPE_WAIT)
- dma_fence_put(sig_fence);
+ hl_fence_put(sig_fence);
hl_ctx_put(ctx);
goto free_signal_seq_array;
}
@@ -1154,7 +1150,7 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
static long _hl_cs_wait_ioctl(struct hl_device *hdev,
struct hl_ctx *ctx, u64 timeout_us, u64 seq)
{
- struct dma_fence *fence;
+ struct hl_fence *fence;
unsigned long timeout;
long rc;
@@ -1173,12 +1169,15 @@ static long _hl_cs_wait_ioctl(struct hl_device *hdev,
"Can't wait on CS %llu because current CS is at seq %llu\n",
seq, ctx->cs_sequence);
} else if (fence) {
- rc = dma_fence_wait_timeout(fence, true, timeout);
+ rc = wait_for_completion_interruptible_timeout(
+ &fence->completion, timeout);
+
if (fence->error == -ETIMEDOUT)
rc = -ETIMEDOUT;
else if (fence->error == -EIO)
rc = -EIO;
- dma_fence_put(fence);
+
+ hl_fence_put(fence);
} else {
dev_dbg(hdev->dev,
"Can't wait on seq %llu because current CS is at seq %llu (Fence is gone)\n",
diff --git a/drivers/misc/habanalabs/common/context.c b/drivers/misc/habanalabs/common/context.c
index 3e375958e73b..b168a9fce817 100644
--- a/drivers/misc/habanalabs/common/context.c
+++ b/drivers/misc/habanalabs/common/context.c
@@ -23,7 +23,7 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
*/
for (i = 0 ; i < hdev->asic_prop.max_pending_cs ; i++)
- dma_fence_put(ctx->cs_pending[i]);
+ hl_fence_put(ctx->cs_pending[i]);
kfree(ctx->cs_pending);
@@ -128,7 +128,7 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
atomic_set(&ctx->thread_ctx_switch_token, 1);
ctx->thread_ctx_switch_wait_token = 0;
ctx->cs_pending = kcalloc(hdev->asic_prop.max_pending_cs,
- sizeof(struct dma_fence *),
+ sizeof(struct hl_fence *),
GFP_KERNEL);
if (!ctx->cs_pending)
return -ENOMEM;
@@ -184,10 +184,10 @@ int hl_ctx_put(struct hl_ctx *ctx)
return kref_put(&ctx->refcount, hl_ctx_do_release);
}
-struct dma_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq)
+struct hl_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq)
{
struct asic_fixed_properties *asic_prop = &ctx->hdev->asic_prop;
- struct dma_fence *fence;
+ struct hl_fence *fence;
spin_lock(&ctx->cs_lock);
@@ -201,8 +201,9 @@ struct dma_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq)
return NULL;
}
- fence = dma_fence_get(
- ctx->cs_pending[seq & (asic_prop->max_pending_cs - 1)]);
+ fence = ctx->cs_pending[seq & (asic_prop->max_pending_cs - 1)];
+ hl_fence_get(fence);
+
spin_unlock(&ctx->cs_lock);
return fence;
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 9722706a2d6c..474fb968532e 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -15,7 +15,6 @@
#include <linux/cdev.h>
#include <linux/iopoll.h>
#include <linux/irqreturn.h>
-#include <linux/dma-fence.h>
#include <linux/dma-direction.h>
#include <linux/scatterlist.h>
#include <linux/hashtable.h>
@@ -342,9 +341,22 @@ struct asic_fixed_properties {
u8 completion_queues_count;
};
+/**
+ * struct hl_fence - software synchronization primitive
+ * @completion: fence is implemented using completion
+ * @refcount: refcount for this fence
+ * @error: mark this fence with error
+ *
+ */
+struct hl_fence {
+ struct completion completion;
+ struct kref refcount;
+ int error;
+};
+
/**
* struct hl_cs_compl - command submission completion object.
- * @base_fence: kernel fence object.
+ * @base_fence: hl fence object.
* @lock: spinlock to protect fence.
* @hdev: habanalabs device structure.
* @hw_sob: the H/W SOB used in this signal/wait CS.
@@ -353,7 +365,7 @@ struct asic_fixed_properties {
* @sob_val: the SOB value that is used in this signal/wait CS.
*/
struct hl_cs_compl {
- struct dma_fence base_fence;
+ struct hl_fence base_fence;
spinlock_t lock;
struct hl_device *hdev;
struct hl_hw_sob *hw_sob;
@@ -800,7 +812,7 @@ struct hl_va_range {
* @hdev: pointer to the device structure.
* @refcount: reference counter for the context. Context is released only when
* this hits 0l. It is incremented on CS and CS_WAIT.
- * @cs_pending: array of DMA fence objects representing pending CS.
+ * @cs_pending: array of hl fence objects representing pending CS.
* @host_va_range: holds available virtual addresses for host mappings.
* @host_huge_va_range: holds available virtual addresses for host mappings
* with huge pages.
@@ -832,7 +844,7 @@ struct hl_ctx {
struct hl_fpriv *hpriv;
struct hl_device *hdev;
struct kref refcount;
- struct dma_fence **cs_pending;
+ struct hl_fence **cs_pending;
struct hl_va_range *host_va_range;
struct hl_va_range *host_huge_va_range;
struct hl_va_range *dram_va_range;
@@ -919,8 +931,8 @@ struct hl_cs {
struct list_head job_list;
spinlock_t job_lock;
struct kref refcount;
- struct dma_fence *fence;
- struct dma_fence *signal_fence;
+ struct hl_fence *fence;
+ struct hl_fence *signal_fence;
struct work_struct finish_work;
struct delayed_work work_tdr;
struct list_head mirror_node;
@@ -1736,7 +1748,7 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx);
void hl_ctx_do_release(struct kref *ref);
void hl_ctx_get(struct hl_device *hdev, struct hl_ctx *ctx);
int hl_ctx_put(struct hl_ctx *ctx);
-struct dma_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq);
+struct hl_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq);
void hl_ctx_mgr_init(struct hl_ctx_mgr *mgr);
void hl_ctx_mgr_fini(struct hl_device *hdev, struct hl_ctx_mgr *mgr);
@@ -1778,6 +1790,8 @@ void hl_cs_rollback_all(struct hl_device *hdev);
struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
enum hl_queue_type queue_type, bool is_kernel_allocated_cb);
void hl_sob_reset_error(struct kref *ref);
+void hl_fence_put(struct hl_fence *fence);
+void hl_fence_get(struct hl_fence *fence);
void goya_set_asic_funcs(struct hl_device *hdev);
void gaudi_set_asic_funcs(struct hl_device *hdev);
diff --git a/drivers/misc/habanalabs/common/hw_queue.c b/drivers/misc/habanalabs/common/hw_queue.c
index 287681646071..65b9aa69a83e 100644
--- a/drivers/misc/habanalabs/common/hw_queue.c
+++ b/drivers/misc/habanalabs/common/hw_queue.c
@@ -474,7 +474,7 @@ static void init_signal_wait_cs(struct hl_cs *cs)
* wait CS was submitted.
*/
mb();
- dma_fence_put(cs->signal_fence);
+ hl_fence_put(cs->signal_fence);
cs->signal_fence = NULL;
}
}
--
2.17.1
^ permalink raw reply related [flat|nested] 2+ messages in thread* [PATCH v3 2/2] habanalabs: add information about PCIe controller
2020-08-03 12:07 [PATCH v3 1/2] habanalabs: Replace dma-fence mechanism with completions Oded Gabbay
@ 2020-08-03 12:07 ` Oded Gabbay
0 siblings, 0 replies; 2+ messages in thread
From: Oded Gabbay @ 2020-08-03 12:07 UTC (permalink / raw)
To: linux-kernel, SW_Drivers; +Cc: Ofir Bitton
From: Ofir Bitton <obitton@habana.ai>
Update firmware header with new API for getting pcie info
such as tx/rx throughput and replay counter.
These counters are needed by customers for monitor and maintenance
of multiple devices.
Add new opcodes to the INFO ioctl to retrieve these counters.
Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
drivers/misc/habanalabs/common/firmware_if.c | 48 +++++++++++++++++++
drivers/misc/habanalabs/common/habanalabs.h | 4 ++
.../misc/habanalabs/common/habanalabs_ioctl.c | 41 ++++++++++++++++
drivers/misc/habanalabs/gaudi/gaudi.c | 4 ++
drivers/misc/habanalabs/goya/goya.c | 4 ++
.../misc/habanalabs/include/common/armcp_if.h | 10 ++++
include/uapi/misc/habanalabs.h | 27 +++++++++++
7 files changed, 138 insertions(+)
diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index f70302cdab1b..0842c2211475 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -354,6 +354,54 @@ int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)
return rc;
}
+int hl_fw_armcp_pci_counters_get(struct hl_device *hdev,
+ struct hl_info_pci_counters *counters)
+{
+ struct armcp_packet pkt = {};
+ long result;
+ int rc;
+
+ pkt.ctl = cpu_to_le32(ARMCP_PACKET_PCIE_THROUGHPUT_GET <<
+ ARMCP_PKT_CTL_OPCODE_SHIFT);
+
+ /* Fetch PCI rx counter */
+ pkt.index = cpu_to_le32(armcp_pcie_throughput_rx);
+ rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+ HL_ARMCP_INFO_TIMEOUT_USEC, &result);
+ if (rc) {
+ dev_err(hdev->dev,
+ "Failed to handle ArmCP PCI info pkt, error %d\n", rc);
+ return rc;
+ }
+ counters->rx_throughput = result;
+
+ /* Fetch PCI tx counter */
+ pkt.index = cpu_to_le32(armcp_pcie_throughput_tx);
+ rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+ HL_ARMCP_INFO_TIMEOUT_USEC, &result);
+ if (rc) {
+ dev_err(hdev->dev,
+ "Failed to handle ArmCP PCI info pkt, error %d\n", rc);
+ return rc;
+ }
+ counters->tx_throughput = result;
+
+ /* Fetch PCI replay counter */
+ pkt.ctl = cpu_to_le32(ARMCP_PACKET_PCIE_REPLAY_CNT_GET <<
+ ARMCP_PKT_CTL_OPCODE_SHIFT);
+
+ rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+ HL_ARMCP_INFO_TIMEOUT_USEC, &result);
+ if (rc) {
+ dev_err(hdev->dev,
+ "Failed to handle ArmCP PCI info pkt, error %d\n", rc);
+ return rc;
+ }
+ counters->replay_cnt = (u32) result;
+
+ return rc;
+}
+
static void fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg)
{
u32 err_val;
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 474fb968532e..8b5b4afe42c7 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -1481,6 +1481,7 @@ struct hl_device_idle_busy_ts {
* @soft_reset_cnt: number of soft reset since the driver was loaded.
* @hard_reset_cnt: number of hard reset since the driver was loaded.
* @idle_busy_ts_idx: index of current entry in idle_busy_ts_arr
+ * @clk_throttling_reason: bitmask represents the current clk throttling reasons
* @id: device minor.
* @id_control: minor of the control device
* @cpu_pci_msb_addr: 50-bit extension bits for the device CPU's 40-bit
@@ -1584,6 +1585,7 @@ struct hl_device {
u32 soft_reset_cnt;
u32 hard_reset_cnt;
u32 idle_busy_ts_idx;
+ u32 clk_throttling_reason;
u16 id;
u16 id_control;
u16 cpu_pci_msb_addr;
@@ -1838,6 +1840,8 @@ void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
int hl_fw_send_heartbeat(struct hl_device *hdev);
int hl_fw_armcp_info_get(struct hl_device *hdev);
int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size);
+int hl_fw_armcp_pci_counters_get(struct hl_device *hdev,
+ struct hl_info_pci_counters *counters);
int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
u32 msg_to_cpu_reg, u32 cpu_msg_status_reg,
u32 boot_err0_reg, bool skip_bmc,
diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
index 5af1c03da473..4d838b1a3bbe 100644
--- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
@@ -276,6 +276,41 @@ static int time_sync_info(struct hl_device *hdev, struct hl_info_args *args)
min((size_t) max_size, sizeof(time_sync))) ? -EFAULT : 0;
}
+static int pci_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
+{
+ struct hl_device *hdev = hpriv->hdev;
+ struct hl_info_pci_counters pci_counters = {0};
+ u32 max_size = args->return_size;
+ void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+ int rc;
+
+ if ((!max_size) || (!out))
+ return -EINVAL;
+
+ rc = hl_fw_armcp_pci_counters_get(hdev, &pci_counters);
+ if (rc)
+ return rc;
+
+ return copy_to_user(out, &pci_counters,
+ min((size_t) max_size, sizeof(pci_counters))) ? -EFAULT : 0;
+}
+
+static int clk_throttle_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
+{
+ struct hl_device *hdev = hpriv->hdev;
+ struct hl_info_clk_throttle clk_throttle = {0};
+ u32 max_size = args->return_size;
+ void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+
+ if ((!max_size) || (!out))
+ return -EINVAL;
+
+ clk_throttle.clk_throttling_reason = hdev->clk_throttling_reason;
+
+ return copy_to_user(out, &clk_throttle,
+ min((size_t) max_size, sizeof(clk_throttle))) ? -EFAULT : 0;
+}
+
static int cs_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
{
struct hl_device *hdev = hpriv->hdev;
@@ -360,6 +395,12 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
case HL_INFO_CS_COUNTERS:
return cs_counters_info(hpriv, args);
+ case HL_INFO_PCI_COUNTERS:
+ return pci_counters_info(hpriv, args);
+
+ case HL_INFO_CLK_THROTTLE_REASON:
+ return clk_throttle_info(hpriv, args);
+
default:
dev_err(dev, "Invalid request %d\n", args->op);
rc = -ENOTTY;
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 00a0a7238d81..41d55a5f7f83 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -5620,21 +5620,25 @@ static void gaudi_print_clk_change_info(struct hl_device *hdev,
{
switch (event_type) {
case GAUDI_EVENT_FIX_POWER_ENV_S:
+ hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER;
dev_info_ratelimited(hdev->dev,
"Clock throttling due to power consumption\n");
break;
case GAUDI_EVENT_FIX_POWER_ENV_E:
+ hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER;
dev_info_ratelimited(hdev->dev,
"Power envelop is safe, back to optimal clock\n");
break;
case GAUDI_EVENT_FIX_THERMAL_ENV_S:
+ hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL;
dev_info_ratelimited(hdev->dev,
"Clock throttling due to overheating\n");
break;
case GAUDI_EVENT_FIX_THERMAL_ENV_E:
+ hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL;
dev_info_ratelimited(hdev->dev,
"Thermal envelop is safe, back to optimal clock\n");
break;
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 85030759b2af..c497ae25c331 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -4549,18 +4549,22 @@ static void goya_print_clk_change_info(struct hl_device *hdev, u16 event_type)
{
switch (event_type) {
case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S:
+ hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER;
dev_info_ratelimited(hdev->dev,
"Clock throttling due to power consumption\n");
break;
case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E:
+ hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER;
dev_info_ratelimited(hdev->dev,
"Power envelop is safe, back to optimal clock\n");
break;
case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S:
+ hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL;
dev_info_ratelimited(hdev->dev,
"Clock throttling due to overheating\n");
break;
case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E:
+ hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL;
dev_info_ratelimited(hdev->dev,
"Thermal envelop is safe, back to optimal clock\n");
break;
diff --git a/drivers/misc/habanalabs/include/common/armcp_if.h b/drivers/misc/habanalabs/include/common/armcp_if.h
index 07f9972db28d..1403c937253c 100644
--- a/drivers/misc/habanalabs/include/common/armcp_if.h
+++ b/drivers/misc/habanalabs/include/common/armcp_if.h
@@ -243,6 +243,8 @@ enum armcp_packet_id {
ARMCP_PACKET_TEMPERATURE_SET, /* sysfs */
ARMCP_PACKET_VOLTAGE_SET, /* sysfs */
ARMCP_PACKET_CURRENT_SET, /* sysfs */
+ ARMCP_PACKET_PCIE_THROUGHPUT_GET, /* internal */
+ ARMCP_PACKET_PCIE_REPLAY_CNT_GET, /* internal */
};
#define ARMCP_PACKET_FENCE_VAL 0xFE8CE7A5
@@ -277,6 +279,9 @@ struct armcp_packet {
__u8 pad; /* unused */
};
+ /* For any general request */
+ __le32 index;
+
/* For frequency get/set */
__le32 pll_index;
@@ -344,6 +349,11 @@ enum armcp_pwm_attributes {
armcp_pwm_enable
};
+enum armcp_pcie_throughput_attributes {
+ armcp_pcie_throughput_tx,
+ armcp_pcie_throughput_rx
+};
+
/* Event Queue Packets */
struct eq_generic_event {
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index d5c4f983b7a8..ee13b919db35 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -264,6 +264,8 @@ enum hl_device_status {
* HL_INFO_TIME_SYNC - Retrieve the device's time alongside the host's time
* for synchronization.
* HL_INFO_CS_COUNTERS - Retrieve command submission counters
+ * HL_INFO_PCI_COUNTERS - Retrieve PCI counters
+ * HL_INFO_CLK_THROTTLE_REASON - Retrieve clock throttling reason
*/
#define HL_INFO_HW_IP_INFO 0
#define HL_INFO_HW_EVENTS 1
@@ -276,6 +278,8 @@ enum hl_device_status {
#define HL_INFO_RESET_COUNT 9
#define HL_INFO_TIME_SYNC 10
#define HL_INFO_CS_COUNTERS 11
+#define HL_INFO_PCI_COUNTERS 12
+#define HL_INFO_CLK_THROTTLE_REASON 13
#define HL_INFO_VERSION_MAX_LEN 128
#define HL_INFO_CARD_NAME_MAX_LEN 16
@@ -340,6 +344,29 @@ struct hl_info_time_sync {
__u64 host_time;
};
+/**
+ * struct hl_info_pci_counters - pci counters
+ * @rx_throughput: PCI rx throughput KBps
+ * @tx_throughput: PCI tx throughput KBps
+ * @replay_cnt: PCI replay counter
+ */
+struct hl_info_pci_counters {
+ __u64 rx_throughput;
+ __u64 tx_throughput;
+ __u64 replay_cnt;
+};
+
+#define HL_CLK_THROTTLE_POWER 0x1
+#define HL_CLK_THROTTLE_THERMAL 0x2
+
+/**
+ * struct hl_info_clk_throttle - clock throttling reason
+ * @clk_throttling_reason: each bit represents a clk throttling reason
+ */
+struct hl_info_clk_throttle {
+ __u32 clk_throttling_reason;
+};
+
/**
* struct hl_info_cs_counters - command submission counters
* @out_of_mem_drop_cnt: dropped due to memory allocation issue
--
2.17.1
^ permalink raw reply related [flat|nested] 2+ messages in thread
end of thread, other threads:[~2020-08-03 12:08 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2020-08-03 12:07 [PATCH v3 1/2] habanalabs: Replace dma-fence mechanism with completions Oded Gabbay
2020-08-03 12:07 ` [PATCH v3 2/2] habanalabs: add information about PCIe controller Oded Gabbay
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox