* [PATCH 1/7] habanalabs/gaudi2: print RAZWI info upon PCIe access error
@ 2022-09-18 11:37 Oded Gabbay
2022-09-18 11:37 ` [PATCH 2/7] habanalabs/gaudi2: increase hard-reset sleep time to 2 sec Oded Gabbay
` (5 more replies)
0 siblings, 6 replies; 7+ messages in thread
From: Oded Gabbay @ 2022-09-18 11:37 UTC (permalink / raw)
To: linux-kernel; +Cc: Tomer Tayar
From: Tomer Tayar <ttayar@habana.ai>
Add the dump of the RAZWI information when a PCIe access is blocked by
RR.
Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
drivers/misc/habanalabs/gaudi2/gaudi2.c | 52 +++++++++++++++++--
drivers/misc/habanalabs/gaudi2/gaudi2_masks.h | 13 +++++
2 files changed, 61 insertions(+), 4 deletions(-)
diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c
index 5761ca5d50ae..c040e01adafe 100644
--- a/drivers/misc/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c
@@ -7963,14 +7963,58 @@ static void gaudi2_handle_dma_core_event(struct hl_device *hdev, u64 intr_cause_
gaudi2_dma_core_interrupts_cause[i]);
}
+static void gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(struct hl_device *hdev)
+{
+ u32 mstr_if_base_addr = mmPCIE_MSTR_RR_MSTR_IF_RR_SHRD_HBW_BASE, razwi_happened_addr;
+
+ razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED;
+ if (RREG32(razwi_happened_addr)) {
+ gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE", true,
+ NULL);
+ WREG32(razwi_happened_addr, 0x1);
+ }
+
+ razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED;
+ if (RREG32(razwi_happened_addr)) {
+ gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE", true,
+ NULL);
+ WREG32(razwi_happened_addr, 0x1);
+ }
+
+ razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED;
+ if (RREG32(razwi_happened_addr)) {
+ gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE", true,
+ NULL);
+ WREG32(razwi_happened_addr, 0x1);
+ }
+
+ razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED;
+ if (RREG32(razwi_happened_addr)) {
+ gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE", true,
+ NULL);
+ WREG32(razwi_happened_addr, 0x1);
+ }
+}
+
static void gaudi2_print_pcie_addr_dec_info(struct hl_device *hdev, u64 intr_cause_data)
{
int i;
- for (i = 0 ; i < GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE; i++)
- if (intr_cause_data & BIT_ULL(i))
- dev_err_ratelimited(hdev->dev, "PCIE ADDR DEC Error: %s\n",
- gaudi2_pcie_addr_dec_error_cause[i]);
+ for (i = 0 ; i < GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE ; i++) {
+ if (!(intr_cause_data & BIT_ULL(i)))
+ continue;
+
+ dev_err_ratelimited(hdev->dev, "PCIE ADDR DEC Error: %s\n",
+ gaudi2_pcie_addr_dec_error_cause[i]);
+
+ switch (intr_cause_data & BIT_ULL(i)) {
+ case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_LBW_ERR_INTR_MASK:
+ break;
+ case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_BAD_ACCESS_INTR_MASK:
+ gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(hdev);
+ break;
+ }
+ }
}
static void gaudi2_handle_pif_fatal(struct hl_device *hdev, u64 intr_cause_data)
diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2_masks.h b/drivers/misc/habanalabs/gaudi2/gaudi2_masks.h
index 0239d118abc5..e9ac87828221 100644
--- a/drivers/misc/habanalabs/gaudi2/gaudi2_masks.h
+++ b/drivers/misc/habanalabs/gaudi2/gaudi2_masks.h
@@ -144,4 +144,17 @@
#define DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_SIGN_SHIFT 15
#define DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_SIGN_MASK 0x8000
+#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_ERR_INTR_SHIFT 0
+#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_ERR_INTR_MASK 0x1
+#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_LBW_ERR_INTR_SHIFT 1
+#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_LBW_ERR_INTR_MASK 0x2
+#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_BAD_ACCESS_INTR_SHIFT 2
+#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_BAD_ACCESS_INTR_MASK 0x4
+#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_ERR_INTR_MASK_SHIFT 3
+#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_ERR_INTR_MASK_MASK 0x8
+#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_LBW_ERR_INTR_MASK_SHIFT 4
+#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_LBW_ERR_INTR_MASK_MASK 0x10
+#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_BAD_ACCESS_INTR_MASK_SHIFT 5
+#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_BAD_ACCESS_INTR_MASK_MASK 0x20
+
#endif /* GAUDI2_MASKS_H_ */
--
2.25.1
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 2/7] habanalabs/gaudi2: increase hard-reset sleep time to 2 sec
2022-09-18 11:37 [PATCH 1/7] habanalabs/gaudi2: print RAZWI info upon PCIe access error Oded Gabbay
@ 2022-09-18 11:37 ` Oded Gabbay
2022-09-18 11:37 ` [PATCH 3/7] habanalabs/gaudi2: get f/w reset status register dynamically Oded Gabbay
` (4 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Oded Gabbay @ 2022-09-18 11:37 UTC (permalink / raw)
To: linux-kernel; +Cc: Tomer Tayar
From: Tomer Tayar <ttayar@habana.ai>
The access to the device registers is blocked during hard reset, until
preboot runs and allows the access to specific registers, including the
PSOC BTM_FSM register which is used to know when the reset is done.
Between the reset request and until this register is polled there is a
small delay of 500 msec which is not enough for F/W to process the reset
and for preboot to run, so the register might be accessed while it is
blocked.
To avoid it, increase the delay to 2 sec.
Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
drivers/misc/habanalabs/gaudi2/gaudi2.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c
index c040e01adafe..6ed9b3ce16dd 100644
--- a/drivers/misc/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c
@@ -21,7 +21,7 @@
#define GAUDI2_DMA_POOL_BLK_SIZE SZ_256 /* 256 bytes */
-#define GAUDI2_RESET_TIMEOUT_MSEC 500 /* 500ms */
+#define GAUDI2_RESET_TIMEOUT_MSEC 2000 /* 2000ms */
#define GAUDI2_RESET_POLL_TIMEOUT_USEC 50000 /* 50ms */
#define GAUDI2_PLDM_HRESET_TIMEOUT_MSEC 25000 /* 25s */
#define GAUDI2_PLDM_SRESET_TIMEOUT_MSEC 25000 /* 25s */
--
2.25.1
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 3/7] habanalabs/gaudi2: get f/w reset status register dynamically
2022-09-18 11:37 [PATCH 1/7] habanalabs/gaudi2: print RAZWI info upon PCIe access error Oded Gabbay
2022-09-18 11:37 ` [PATCH 2/7] habanalabs/gaudi2: increase hard-reset sleep time to 2 sec Oded Gabbay
@ 2022-09-18 11:37 ` Oded Gabbay
2022-09-18 11:37 ` [PATCH 4/7] habanalabs: rename error info structure Oded Gabbay
` (3 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Oded Gabbay @ 2022-09-18 11:37 UTC (permalink / raw)
To: linux-kernel; +Cc: farah kassabri
From: farah kassabri <fkassabri@habana.ai>
Get the firmware reset status address from the dynamic registers
we read from the firmware instead of using a define.
Signed-off-by: farah kassabri <fkassabri@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
drivers/misc/habanalabs/gaudi2/gaudi2.c | 5 ++++-
drivers/misc/habanalabs/include/common/hl_boot_if.h | 4 +++-
2 files changed, 7 insertions(+), 2 deletions(-)
diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c
index 6ed9b3ce16dd..b95eab4c237c 100644
--- a/drivers/misc/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c
@@ -5439,7 +5439,10 @@ static void gaudi2_execute_soft_reset(struct hl_device *hdev, u32 reset_sleep_ms
if (!driver_performs_reset) {
/* set SP to indicate reset request sent to FW */
- WREG32(mmCPU_RST_STATUS_TO_HOST, CPU_RST_STATUS_NA);
+ if (dyn_regs->cpu_rst_status)
+ WREG32(le32_to_cpu(dyn_regs->cpu_rst_status), CPU_RST_STATUS_NA);
+ else
+ WREG32(mmCPU_RST_STATUS_TO_HOST, CPU_RST_STATUS_NA);
WREG32(le32_to_cpu(dyn_regs->gic_host_soft_rst_irq),
gaudi2_irq_map_table[GAUDI2_EVENT_CPU_SOFT_RESET].cpu_id);
diff --git a/drivers/misc/habanalabs/include/common/hl_boot_if.h b/drivers/misc/habanalabs/include/common/hl_boot_if.h
index 2e45be5de4fe..e0ea51cc7475 100644
--- a/drivers/misc/habanalabs/include/common/hl_boot_if.h
+++ b/drivers/misc/habanalabs/include/common/hl_boot_if.h
@@ -431,7 +431,9 @@ struct cpu_dyn_regs {
__le32 gic_host_ints_irq;
__le32 gic_host_soft_rst_irq;
__le32 gic_rot_qm_irq_ctrl;
- __le32 reserved1[22]; /* reserve for future use */
+ __le32 cpu_rst_status;
+ __le32 eng_arc_irq_ctrl;
+ __le32 reserved1[20]; /* reserve for future use */
};
/* TODO: remove the desc magic after the code is updated to use message */
--
2.25.1
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 4/7] habanalabs: rename error info structure
2022-09-18 11:37 [PATCH 1/7] habanalabs/gaudi2: print RAZWI info upon PCIe access error Oded Gabbay
2022-09-18 11:37 ` [PATCH 2/7] habanalabs/gaudi2: increase hard-reset sleep time to 2 sec Oded Gabbay
2022-09-18 11:37 ` [PATCH 3/7] habanalabs/gaudi2: get f/w reset status register dynamically Oded Gabbay
@ 2022-09-18 11:37 ` Oded Gabbay
2022-09-18 11:37 ` [PATCH 5/7] habanalabs/gaudi: change TPC Assert to use TPC DEC instead of QMAN err Oded Gabbay
` (2 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Oded Gabbay @ 2022-09-18 11:37 UTC (permalink / raw)
To: linux-kernel; +Cc: Dani Liberman
From: Dani Liberman <dliberman@habana.ai>
As a preparation for adding more errors to it,
change to more suitable name.
Signed-off-by: Dani Liberman <dliberman@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
.../habanalabs/common/command_submission.c | 6 ++--
drivers/misc/habanalabs/common/habanalabs.h | 12 +++----
.../misc/habanalabs/common/habanalabs_drv.c | 6 ++--
.../misc/habanalabs/common/habanalabs_ioctl.c | 30 +++++++++---------
drivers/misc/habanalabs/gaudi/gaudi.c | 31 ++++++++++---------
5 files changed, 43 insertions(+), 42 deletions(-)
diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
index 746b688d34cf..fbe5003191bf 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -826,10 +826,10 @@ static void cs_timedout(struct work_struct *work)
}
/* Save only the first CS timeout parameters */
- rc = atomic_cmpxchg(&hdev->last_error.cs_timeout.write_enable, 1, 0);
+ rc = atomic_cmpxchg(&hdev->captured_err_info.cs_timeout.write_enable, 1, 0);
if (rc) {
- hdev->last_error.cs_timeout.timestamp = ktime_get();
- hdev->last_error.cs_timeout.seq = cs->sequence;
+ hdev->captured_err_info.cs_timeout.timestamp = ktime_get();
+ hdev->captured_err_info.cs_timeout.seq = cs->sequence;
event_mask = device_reset ? (HL_NOTIFIER_EVENT_CS_TIMEOUT |
HL_NOTIFIER_EVENT_DEVICE_RESET) : HL_NOTIFIER_EVENT_CS_TIMEOUT;
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index f2910ac7aa22..259eebdc2f1b 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -2981,12 +2981,12 @@ struct undefined_opcode_info {
};
/**
- * struct last_error_session_info - info about last session errors occurred.
- * @cs_timeout: CS timeout error last information.
- * @razwi: razwi last information.
+ * struct hl_error_info - holds information collected during an error.
+ * @cs_timeout: CS timeout error information.
+ * @razwi: razwi information.
* @undef_opcode: undefined opcode information
*/
-struct last_error_session_info {
+struct hl_error_info {
struct cs_timeout_info cs_timeout;
struct razwi_info razwi;
struct undefined_opcode_info undef_opcode;
@@ -3111,7 +3111,7 @@ struct hl_reset_info {
* @state_dump_specs: constants and dictionaries needed to dump system state.
* @multi_cs_completion: array of multi-CS completion.
* @clk_throttling: holds information about current/previous clock throttling events
- * @last_error: holds information about last session in which CS timeout or razwi error occurred.
+ * @captured_err_info: holds information about errors.
* @reset_info: holds current device reset information.
* @stream_master_qid_arr: pointer to array with QIDs of master streams.
* @fw_major_version: major version of current loaded preboot.
@@ -3286,7 +3286,7 @@ struct hl_device {
struct multi_cs_completion multi_cs_completion[
MULTI_CS_MAX_USER_CTX];
struct hl_clk_throttle clk_throttling;
- struct last_error_session_info last_error;
+ struct hl_error_info captured_err_info;
struct hl_reset_info reset_info;
diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c
index c60d6dab7aa7..73ae6f64d3ba 100644
--- a/drivers/misc/habanalabs/common/habanalabs_drv.c
+++ b/drivers/misc/habanalabs/common/habanalabs_drv.c
@@ -211,9 +211,9 @@ int hl_device_open(struct inode *inode, struct file *filp)
hl_debugfs_add_file(hpriv);
- atomic_set(&hdev->last_error.cs_timeout.write_enable, 1);
- atomic_set(&hdev->last_error.razwi.write_enable, 1);
- hdev->last_error.undef_opcode.write_enable = true;
+ atomic_set(&hdev->captured_err_info.cs_timeout.write_enable, 1);
+ atomic_set(&hdev->captured_err_info.razwi.write_enable, 1);
+ hdev->captured_err_info.undef_opcode.write_enable = true;
hdev->open_counter++;
hdev->last_successful_open_jif = jiffies;
diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
index c7bd000750c8..ab0be082f3a6 100644
--- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
@@ -593,8 +593,8 @@ static int cs_timeout_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
if ((!max_size) || (!out))
return -EINVAL;
- info.seq = hdev->last_error.cs_timeout.seq;
- info.timestamp = ktime_to_ns(hdev->last_error.cs_timeout.timestamp);
+ info.seq = hdev->captured_err_info.cs_timeout.seq;
+ info.timestamp = ktime_to_ns(hdev->captured_err_info.cs_timeout.timestamp);
return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0;
}
@@ -609,12 +609,12 @@ static int razwi_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
if ((!max_size) || (!out))
return -EINVAL;
- info.timestamp = ktime_to_ns(hdev->last_error.razwi.timestamp);
- info.addr = hdev->last_error.razwi.addr;
- info.engine_id_1 = hdev->last_error.razwi.engine_id_1;
- info.engine_id_2 = hdev->last_error.razwi.engine_id_2;
- info.no_engine_id = hdev->last_error.razwi.non_engine_initiator;
- info.error_type = hdev->last_error.razwi.type;
+ info.timestamp = ktime_to_ns(hdev->captured_err_info.razwi.timestamp);
+ info.addr = hdev->captured_err_info.razwi.addr;
+ info.engine_id_1 = hdev->captured_err_info.razwi.engine_id_1;
+ info.engine_id_2 = hdev->captured_err_info.razwi.engine_id_2;
+ info.no_engine_id = hdev->captured_err_info.razwi.non_engine_initiator;
+ info.error_type = hdev->captured_err_info.razwi.type;
return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0;
}
@@ -629,13 +629,13 @@ static int undefined_opcode_info(struct hl_fpriv *hpriv, struct hl_info_args *ar
if ((!max_size) || (!out))
return -EINVAL;
- info.timestamp = ktime_to_ns(hdev->last_error.undef_opcode.timestamp);
- info.engine_id = hdev->last_error.undef_opcode.engine_id;
- info.cq_addr = hdev->last_error.undef_opcode.cq_addr;
- info.cq_size = hdev->last_error.undef_opcode.cq_size;
- info.stream_id = hdev->last_error.undef_opcode.stream_id;
- info.cb_addr_streams_len = hdev->last_error.undef_opcode.cb_addr_streams_len;
- memcpy(info.cb_addr_streams, hdev->last_error.undef_opcode.cb_addr_streams,
+ info.timestamp = ktime_to_ns(hdev->captured_err_info.undef_opcode.timestamp);
+ info.engine_id = hdev->captured_err_info.undef_opcode.engine_id;
+ info.cq_addr = hdev->captured_err_info.undef_opcode.cq_addr;
+ info.cq_size = hdev->captured_err_info.undef_opcode.cq_size;
+ info.stream_id = hdev->captured_err_info.undef_opcode.stream_id;
+ info.cb_addr_streams_len = hdev->captured_err_info.undef_opcode.cb_addr_streams_len;
+ memcpy(info.cb_addr_streams, hdev->captured_err_info.undef_opcode.cb_addr_streams,
sizeof(info.cb_addr_streams));
return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0;
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 48ff3b103b9f..f81a141b4741 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -6894,9 +6894,9 @@ static void gaudi_handle_sw_config_stream_data(struct hl_device *hdev, u32 strea
stream, cq_ptr, size);
if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
- hdev->last_error.undef_opcode.cq_addr = cq_ptr;
- hdev->last_error.undef_opcode.cq_size = size;
- hdev->last_error.undef_opcode.stream_id = stream;
+ hdev->captured_err_info.undef_opcode.cq_addr = cq_ptr;
+ hdev->captured_err_info.undef_opcode.cq_size = size;
+ hdev->captured_err_info.undef_opcode.stream_id = stream;
}
}
@@ -6962,7 +6962,7 @@ static void gaudi_handle_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
}
if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
- struct undefined_opcode_info *undef_opcode = &hdev->last_error.undef_opcode;
+ struct undefined_opcode_info *undef_opcode = &hdev->captured_err_info.undef_opcode;
u32 arr_idx = undef_opcode->cb_addr_streams_len;
if (arr_idx == 0) {
@@ -7046,11 +7046,11 @@ static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
}
/* check for undefined opcode */
if (glbl_sts_val & TPC0_QM_GLBL_STS1_CP_UNDEF_CMD_ERR_MASK &&
- hdev->last_error.undef_opcode.write_enable) {
- memset(&hdev->last_error.undef_opcode, 0,
- sizeof(hdev->last_error.undef_opcode));
+ hdev->captured_err_info.undef_opcode.write_enable) {
+ memset(&hdev->captured_err_info.undef_opcode, 0,
+ sizeof(hdev->captured_err_info.undef_opcode));
- hdev->last_error.undef_opcode.write_enable = false;
+ hdev->captured_err_info.undef_opcode.write_enable = false;
*event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE;
}
@@ -7332,18 +7332,19 @@ static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, &razwi_type);
/* In case it's the first razwi, save its parameters*/
- rc = atomic_cmpxchg(&hdev->last_error.razwi.write_enable, 1, 0);
+ rc = atomic_cmpxchg(&hdev->captured_err_info.razwi.write_enable, 1, 0);
if (rc) {
- hdev->last_error.razwi.timestamp = ktime_get();
- hdev->last_error.razwi.addr = razwi_addr;
- hdev->last_error.razwi.engine_id_1 = engine_id_1;
- hdev->last_error.razwi.engine_id_2 = engine_id_2;
+ hdev->captured_err_info.razwi.timestamp = ktime_get();
+ hdev->captured_err_info.razwi.addr = razwi_addr;
+ hdev->captured_err_info.razwi.engine_id_1 = engine_id_1;
+ hdev->captured_err_info.razwi.engine_id_2 = engine_id_2;
/*
* If first engine id holds non valid value the razwi initiator
* does not have engine id
*/
- hdev->last_error.razwi.non_engine_initiator = (engine_id_1 == U16_MAX);
- hdev->last_error.razwi.type = razwi_type;
+ hdev->captured_err_info.razwi.non_engine_initiator =
+ (engine_id_1 == U16_MAX);
+ hdev->captured_err_info.razwi.type = razwi_type;
}
}
--
2.25.1
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 5/7] habanalabs/gaudi: change TPC Assert to use TPC DEC instead of QMAN err
2022-09-18 11:37 [PATCH 1/7] habanalabs/gaudi2: print RAZWI info upon PCIe access error Oded Gabbay
` (2 preceding siblings ...)
2022-09-18 11:37 ` [PATCH 4/7] habanalabs: rename error info structure Oded Gabbay
@ 2022-09-18 11:37 ` Oded Gabbay
2022-09-18 11:37 ` [PATCH 6/7] habanalabs/gaudi2: add handling to pmmu events in eqe handler Oded Gabbay
2022-09-18 11:37 ` [PATCH 7/7] habanalabs/gaudi2: add secured attestation info uapi Oded Gabbay
5 siblings, 0 replies; 7+ messages in thread
From: Oded Gabbay @ 2022-09-18 11:37 UTC (permalink / raw)
To: linux-kernel; +Cc: Tal Cohen
From: Tal Cohen <talcohen@habana.ai>
This change is done while there is a problem to use QMAN error for
TPC assert async. The problem involves security limitation that exists
to generate the assert via QMAN error.
Signed-off-by: Tal Cohen <talcohen@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
drivers/misc/habanalabs/gaudi/gaudi.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index f81a141b4741..e80ebace49c8 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -7216,12 +7216,6 @@ static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *e
switch (event_type) {
case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
- /* In TPC QM event, notify on TPC assertion. While there isn't
- * a specific event for assertion yet, the FW generates QM event.
- * The SW upper layer will inspect an internal mapped area to indicate
- * if the event is a tpc assertion or tpc QM.
- */
- *event_mask |= HL_NOTIFIER_EVENT_TPC_ASSERT;
index = event_type - GAUDI_EVENT_TPC0_QM;
qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS;
qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET;
@@ -7731,6 +7725,12 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr
case GAUDI_EVENT_TPC5_DEC:
case GAUDI_EVENT_TPC6_DEC:
case GAUDI_EVENT_TPC7_DEC:
+ /* In TPC DEC event, notify on TPC assertion. While there isn't
+ * a specific event for assertion yet, the FW generates TPC DEC event.
+ * The SW upper layer will inspect an internal mapped area to indicate
+ * if the event is a TPC Assertion or a "real" TPC DEC.
+ */
+ event_mask |= HL_NOTIFIER_EVENT_TPC_ASSERT;
gaudi_print_irq_info(hdev, event_type, true);
reset_required = gaudi_tpc_read_interrupts(hdev,
tpc_dec_event_to_tpc_id(event_type),
--
2.25.1
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 6/7] habanalabs/gaudi2: add handling to pmmu events in eqe handler
2022-09-18 11:37 [PATCH 1/7] habanalabs/gaudi2: print RAZWI info upon PCIe access error Oded Gabbay
` (3 preceding siblings ...)
2022-09-18 11:37 ` [PATCH 5/7] habanalabs/gaudi: change TPC Assert to use TPC DEC instead of QMAN err Oded Gabbay
@ 2022-09-18 11:37 ` Oded Gabbay
2022-09-18 11:37 ` [PATCH 7/7] habanalabs/gaudi2: add secured attestation info uapi Oded Gabbay
5 siblings, 0 replies; 7+ messages in thread
From: Oded Gabbay @ 2022-09-18 11:37 UTC (permalink / raw)
To: linux-kernel; +Cc: Dani Liberman
From: Dani Liberman <dliberman@habana.ai>
In order to get the error cause and the captured address in case of
page fault, added pmmu events to eqe handler.
Signed-off-by: Dani Liberman <dliberman@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
drivers/misc/habanalabs/gaudi2/gaudi2.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c
index b95eab4c237c..b8b8b2dc2095 100644
--- a/drivers/misc/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c
@@ -8756,6 +8756,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
case GAUDI2_EVENT_HMMU0_PAGE_FAULT_OR_WR_PERM ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR:
case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP:
+ case GAUDI2_EVENT_PMMU0_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_PMMU0_SECURITY_ERROR:
case GAUDI2_EVENT_PMMU_AXI_ERR_RSP_0:
gaudi2_handle_mmu_spi_sei_err(hdev, event_type);
reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
--
2.25.1
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 7/7] habanalabs/gaudi2: add secured attestation info uapi
2022-09-18 11:37 [PATCH 1/7] habanalabs/gaudi2: print RAZWI info upon PCIe access error Oded Gabbay
` (4 preceding siblings ...)
2022-09-18 11:37 ` [PATCH 6/7] habanalabs/gaudi2: add handling to pmmu events in eqe handler Oded Gabbay
@ 2022-09-18 11:37 ` Oded Gabbay
5 siblings, 0 replies; 7+ messages in thread
From: Oded Gabbay @ 2022-09-18 11:37 UTC (permalink / raw)
To: linux-kernel; +Cc: Dani Liberman
From: Dani Liberman <dliberman@habana.ai>
User will provide a nonce via the ioctl, and will retrieve
secured attestation data of the boot, generated using given
nonce.
Signed-off-by: Dani Liberman <dliberman@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
drivers/misc/habanalabs/common/firmware_if.c | 46 +++++++++++
drivers/misc/habanalabs/common/habanalabs.h | 3 +
.../misc/habanalabs/common/habanalabs_ioctl.c | 52 +++++++++++++
.../misc/habanalabs/include/common/cpucp_if.h | 77 ++++++++++++++++++-
include/uapi/misc/habanalabs.h | 43 +++++++++++
5 files changed, 219 insertions(+), 2 deletions(-)
diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index c2375917fc02..26a7529083e1 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -2988,3 +2988,49 @@ void hl_fw_set_max_power(struct hl_device *hdev)
if (rc)
dev_err(hdev->dev, "Failed to set max power, error %d\n", rc);
}
+
+static int hl_fw_get_sec_attest_data(struct hl_device *hdev, u32 packet_id, void *data, u32 size,
+ u32 nonce, u32 timeout)
+{
+ struct cpucp_packet pkt = {};
+ dma_addr_t req_dma_addr;
+ void *req_cpu_addr;
+ int rc;
+
+ req_cpu_addr = hl_cpu_accessible_dma_pool_alloc(hdev, size, &req_dma_addr);
+ if (!data) {
+ dev_err(hdev->dev,
+ "Failed to allocate DMA memory for CPU-CP packet %u\n", packet_id);
+ return -ENOMEM;
+ }
+
+ memset(data, 0, size);
+
+ pkt.ctl = cpu_to_le32(packet_id << CPUCP_PKT_CTL_OPCODE_SHIFT);
+ pkt.addr = cpu_to_le64(req_dma_addr);
+ pkt.data_max_size = cpu_to_le32(size);
+ pkt.nonce = cpu_to_le32(nonce);
+
+ rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+ timeout, NULL);
+ if (rc) {
+ dev_err(hdev->dev,
+ "Failed to handle CPU-CP pkt %u, error %d\n", packet_id, rc);
+ goto out;
+ }
+
+ memcpy(data, req_cpu_addr, size);
+
+out:
+ hl_cpu_accessible_dma_pool_free(hdev, size, req_cpu_addr);
+
+ return rc;
+}
+
+int hl_fw_get_sec_attest_info(struct hl_device *hdev, struct cpucp_sec_attest_info *sec_attest_info,
+ u32 nonce)
+{
+ return hl_fw_get_sec_attest_data(hdev, CPUCP_PACKET_SEC_ATTEST_GET, sec_attest_info,
+ sizeof(struct cpucp_sec_attest_info), nonce,
+ HL_CPUCP_SEC_ATTEST_INFO_TINEOUT_USEC);
+}
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 259eebdc2f1b..2ffb8378f565 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -66,6 +66,7 @@ struct hl_fpriv;
#define HL_CPUCP_INFO_TIMEOUT_USEC 10000000 /* 10s */
#define HL_CPUCP_EEPROM_TIMEOUT_USEC 10000000 /* 10s */
#define HL_CPUCP_MON_DUMP_TIMEOUT_USEC 10000000 /* 10s */
+#define HL_CPUCP_SEC_ATTEST_INFO_TINEOUT_USEC 10000000 /* 10s */
#define HL_FW_STATUS_POLL_INTERVAL_USEC 10000 /* 10ms */
#define HL_FW_COMMS_STATUS_PLDM_POLL_INTERVAL_USEC 1000000 /* 1s */
@@ -3750,6 +3751,8 @@ int hl_get_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr, long *va
void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr, long value);
long hl_fw_get_max_power(struct hl_device *hdev);
void hl_fw_set_max_power(struct hl_device *hdev);
+int hl_fw_get_sec_attest_info(struct hl_device *hdev, struct cpucp_sec_attest_info *sec_attest_info,
+ u32 nonce);
int hl_set_voltage(struct hl_device *hdev, int sensor_index, u32 attr, long value);
int hl_set_current(struct hl_device *hdev, int sensor_index, u32 attr, long value);
int hl_set_power(struct hl_device *hdev, int sensor_index, u32 attr, long value);
diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
index ab0be082f3a6..43afe40966e5 100644
--- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
@@ -662,6 +662,55 @@ static int dev_mem_alloc_page_sizes_info(struct hl_fpriv *hpriv, struct hl_info_
return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0;
}
+static int sec_attest_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
+{
+ void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+ struct cpucp_sec_attest_info *sec_attest_info;
+ struct hl_info_sec_attest *info;
+ u32 max_size = args->return_size;
+ int rc;
+
+ if ((!max_size) || (!out))
+ return -EINVAL;
+
+ sec_attest_info = kmalloc(sizeof(*sec_attest_info), GFP_KERNEL);
+ if (!sec_attest_info)
+ return -ENOMEM;
+
+ info = kmalloc(sizeof(*info), GFP_KERNEL);
+ if (!info) {
+ rc = -ENOMEM;
+ goto free_sec_attest_info;
+ }
+
+ rc = hl_fw_get_sec_attest_info(hpriv->hdev, sec_attest_info, args->sec_attest_nonce);
+ if (rc)
+ goto free_info;
+
+ info->nonce = le32_to_cpu(sec_attest_info->nonce);
+ info->pcr_quote_len = le16_to_cpu(sec_attest_info->pcr_quote_len);
+ info->pub_data_len = le16_to_cpu(sec_attest_info->pub_data_len);
+ info->certificate_len = le16_to_cpu(sec_attest_info->certificate_len);
+ info->pcr_num_reg = sec_attest_info->pcr_num_reg;
+ info->pcr_reg_len = sec_attest_info->pcr_reg_len;
+ info->quote_sig_len = sec_attest_info->quote_sig_len;
+ memcpy(&info->pcr_data, &sec_attest_info->pcr_data, sizeof(info->pcr_data));
+ memcpy(&info->pcr_quote, &sec_attest_info->pcr_quote, sizeof(info->pcr_quote));
+ memcpy(&info->public_data, &sec_attest_info->public_data, sizeof(info->public_data));
+ memcpy(&info->certificate, &sec_attest_info->certificate, sizeof(info->certificate));
+ memcpy(&info->quote_sig, &sec_attest_info->quote_sig, sizeof(info->quote_sig));
+
+ rc = copy_to_user(out, info,
+ min_t(size_t, max_size, sizeof(*info))) ? -EFAULT : 0;
+
+free_info:
+ kfree(info);
+free_sec_attest_info:
+ kfree(sec_attest_info);
+
+ return rc;
+}
+
static int eventfd_register(struct hl_fpriv *hpriv, struct hl_info_args *args)
{
int rc;
@@ -844,6 +893,9 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
case HL_INFO_DRAM_PENDING_ROWS:
return dram_pending_rows_info(hpriv, args);
+ case HL_INFO_SECURED_ATTESTATION:
+ return sec_attest_info(hpriv, args);
+
case HL_INFO_REGISTER_EVENTFD:
return eventfd_register(hpriv, args);
diff --git a/drivers/misc/habanalabs/include/common/cpucp_if.h b/drivers/misc/habanalabs/include/common/cpucp_if.h
index 9593d1a26945..baa5aa43b6f4 100644
--- a/drivers/misc/habanalabs/include/common/cpucp_if.h
+++ b/drivers/misc/habanalabs/include/common/cpucp_if.h
@@ -629,6 +629,12 @@ enum pq_init_status {
* CPUCP_PACKET_ENGINE_CORE_ASID_SET -
* Packet to perform engine core ASID configuration
*
+ * CPUCP_PACKET_SEC_ATTEST_GET -
+ * Get the attestaion data that is collected during various stages of the
+ * boot sequence. the attestation data is also hashed with some unique
+ * number (nonce) provided by the host to prevent replay attacks.
+ * public key and certificate also provided as part of the FW response.
+ *
* CPUCP_PACKET_MONITOR_DUMP_GET -
* Get monitors registers dump from the CpuCP kernel.
* The CPU will put the registers dump in the a buffer allocated by the driver
@@ -691,15 +697,15 @@ enum cpucp_packet_id {
CPUCP_PACKET_RESERVED, /* not used */
CPUCP_PACKET_ENGINE_CORE_ASID_SET, /* internal */
CPUCP_PACKET_RESERVED2, /* not used */
+ CPUCP_PACKET_SEC_ATTEST_GET, /* internal */
CPUCP_PACKET_RESERVED3, /* not used */
CPUCP_PACKET_RESERVED4, /* not used */
- CPUCP_PACKET_RESERVED5, /* not used */
CPUCP_PACKET_MONITOR_DUMP_GET, /* debugfs */
+ CPUCP_PACKET_RESERVED5, /* not used */
CPUCP_PACKET_RESERVED6, /* not used */
CPUCP_PACKET_RESERVED7, /* not used */
CPUCP_PACKET_RESERVED8, /* not used */
CPUCP_PACKET_RESERVED9, /* not used */
- CPUCP_PACKET_RESERVED10, /* not used */
CPUCP_PACKET_ACTIVE_STATUS_SET, /* internal */
CPUCP_PACKET_ID_MAX /* must be last */
};
@@ -794,6 +800,9 @@ struct cpucp_packet {
* result cannot be used to hold general purpose data.
*/
__le32 status_mask;
+
+ /* random, used once number, for security packets */
+ __le32 nonce;
};
/* For NIC requests */
@@ -1219,6 +1228,70 @@ enum cpu_reset_status {
CPU_RST_STATUS_SOFT_RST_DONE = 1,
};
+#define SEC_PCR_DATA_BUF_SZ 256
+#define SEC_PCR_QUOTE_BUF_SZ 510 /* (512 - 2) 2 bytes used for size */
+#define SEC_SIGNATURE_BUF_SZ 255 /* (256 - 1) 1 byte used for size */
+#define SEC_PUB_DATA_BUF_SZ 510 /* (512 - 2) 2 bytes used for size */
+#define SEC_CERTIFICATE_BUF_SZ 2046 /* (2048 - 2) 2 bytes used for size */
+
+/*
+ * struct cpucp_sec_attest_info - attestation report of the boot
+ * @pcr_data: raw values of the PCR registers
+ * @pcr_num_reg: number of PCR registers in the pcr_data array
+ * @pcr_reg_len: length of each PCR register in the pcr_data array (bytes)
+ * @nonce: number only used once. random number provided by host. this also
+ * passed to the quote command as a qualifying data.
+ * @pcr_quote_len: length of the attestation quote data (bytes)
+ * @pcr_quote: attestation report data structure
+ * @quote_sig_len: length of the attestation report signature (bytes)
+ * @quote_sig: signature structure of the attestation report
+ * @pub_data_len: length of the public data (bytes)
+ * @public_data: public key for the signed attestation
+ * (outPublic + name + qualifiedName)
+ * @certificate_len: length of the certificate (bytes)
+ * @certificate: certificate for the attestation signing key
+ */
+struct cpucp_sec_attest_info {
+ __u8 pcr_data[SEC_PCR_DATA_BUF_SZ];
+ __u8 pcr_num_reg;
+ __u8 pcr_reg_len;
+ __le16 pad0;
+ __le32 nonce;
+ __le16 pcr_quote_len;
+ __u8 pcr_quote[SEC_PCR_QUOTE_BUF_SZ];
+ __u8 quote_sig_len;
+ __u8 quote_sig[SEC_SIGNATURE_BUF_SZ];
+ __le16 pub_data_len;
+ __u8 public_data[SEC_PUB_DATA_BUF_SZ];
+ __le16 certificate_len;
+ __u8 certificate[SEC_CERTIFICATE_BUF_SZ];
+};
+
+/*
+ * struct cpucp_dev_info_signed - device information signed by a secured device
+ * @info: device information structure as defined above
+ * @nonce: number only used once. random number provided by host. this number is
+ * hashed and signed along with the device information.
+ * @info_sig_len: length of the attestation signature (bytes)
+ * @info_sig: signature of the info + nonce data.
+ * @pub_data_len: length of the public data (bytes)
+ * @public_data: public key info signed info data
+ * (outPublic + name + qualifiedName)
+ * @certificate_len: length of the certificate (bytes)
+ * @certificate: certificate for the signing key
+ */
+struct cpucp_dev_info_signed {
+ struct cpucp_info info; /* assumed to be 64bit aligned */
+ __le32 nonce;
+ __le32 pad0;
+ __u8 info_sig_len;
+ __u8 info_sig[SEC_SIGNATURE_BUF_SZ];
+ __le16 pub_data_len;
+ __u8 public_data[SEC_PUB_DATA_BUF_SZ];
+ __le16 certificate_len;
+ __u8 certificate[SEC_CERTIFICATE_BUF_SZ];
+};
+
/*
* struct dcore_monitor_regs_data - DCORE monitor regs data.
* the structure follows sync manager block layout. relevant only to Gaudi.
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index a4bab0fd8223..e00ebe05097d 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -773,6 +773,7 @@ enum hl_server_type {
* Razwi initiator.
* Razwi cause, was it a page fault or MMU access error.
* HL_INFO_DEV_MEM_ALLOC_PAGE_SIZES - Retrieve valid page sizes for device memory allocation
+ * HL_INFO_SECURED_ATTESTATION - Retrieve attestation report of the boot.
* HL_INFO_REGISTER_EVENTFD - Register eventfd for event notifications.
* HL_INFO_UNREGISTER_EVENTFD - Unregister eventfd
* HL_INFO_GET_EVENTS - Retrieve the last occurred events
@@ -802,6 +803,7 @@ enum hl_server_type {
#define HL_INFO_CS_TIMEOUT_EVENT 24
#define HL_INFO_RAZWI_EVENT 25
#define HL_INFO_DEV_MEM_ALLOC_PAGE_SIZES 26
+#define HL_INFO_SECURED_ATTESTATION 27
#define HL_INFO_REGISTER_EVENTFD 28
#define HL_INFO_UNREGISTER_EVENTFD 29
#define HL_INFO_GET_EVENTS 30
@@ -1133,6 +1135,45 @@ struct hl_info_dev_memalloc_page_sizes {
__u64 page_order_bitmask;
};
+#define SEC_PCR_DATA_BUF_SZ 256
+#define SEC_PCR_QUOTE_BUF_SZ 510 /* (512 - 2) 2 bytes used for size */
+#define SEC_SIGNATURE_BUF_SZ 255 /* (256 - 1) 1 byte used for size */
+#define SEC_PUB_DATA_BUF_SZ 510 /* (512 - 2) 2 bytes used for size */
+#define SEC_CERTIFICATE_BUF_SZ 2046 /* (2048 - 2) 2 bytes used for size */
+
+/*
+ * struct hl_info_sec_attest - attestation report of the boot
+ * @nonce: number only used once. random number provided by host. this also passed to the quote
+ * command as a qualifying data.
+ * @pcr_quote_len: length of the attestation quote data (bytes)
+ * @pub_data_len: length of the public data (bytes)
+ * @certificate_len: length of the certificate (bytes)
+ * @pcr_num_reg: number of PCR registers in the pcr_data array
+ * @pcr_reg_len: length of each PCR register in the pcr_data array (bytes)
+ * @quote_sig_len: length of the attestation report signature (bytes)
+ * @pcr_data: raw values of the PCR registers
+ * @pcr_quote: attestation report data structure
+ * @quote_sig: signature structure of the attestation report
+ * @public_data: public key for the signed attestation
+ * (outPublic + name + qualifiedName)
+ * @certificate: certificate for the attestation signing key
+ */
+struct hl_info_sec_attest {
+ __u32 nonce;
+ __u16 pcr_quote_len;
+ __u16 pub_data_len;
+ __u16 certificate_len;
+ __u8 pcr_num_reg;
+ __u8 pcr_reg_len;
+ __u8 quote_sig_len;
+ __u8 pcr_data[SEC_PCR_DATA_BUF_SZ];
+ __u8 pcr_quote[SEC_PCR_QUOTE_BUF_SZ];
+ __u8 quote_sig[SEC_SIGNATURE_BUF_SZ];
+ __u8 public_data[SEC_PUB_DATA_BUF_SZ];
+ __u8 certificate[SEC_CERTIFICATE_BUF_SZ];
+ __u8 pad0[2];
+};
+
enum gaudi_dcores {
HL_GAUDI_WS_DCORE,
HL_GAUDI_WN_DCORE,
@@ -1158,6 +1199,7 @@ enum gaudi_dcores {
* driver. It is possible for the user to allocate buffer larger than
* needed, hence updating this variable so user will know the exact amount
* of bytes copied by the kernel to the buffer.
+ * @sec_attest_nonce: Nonce number used for attestation report.
* @pad: Padding to 64 bit.
*/
struct hl_info_args {
@@ -1172,6 +1214,7 @@ struct hl_info_args {
__u32 pll_index;
__u32 eventfd;
__u32 user_buffer_actual_size;
+ __u32 sec_attest_nonce;
};
__u32 pad;
--
2.25.1
^ permalink raw reply related [flat|nested] 7+ messages in thread
end of thread, other threads:[~2022-09-18 11:38 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2022-09-18 11:37 [PATCH 1/7] habanalabs/gaudi2: print RAZWI info upon PCIe access error Oded Gabbay
2022-09-18 11:37 ` [PATCH 2/7] habanalabs/gaudi2: increase hard-reset sleep time to 2 sec Oded Gabbay
2022-09-18 11:37 ` [PATCH 3/7] habanalabs/gaudi2: get f/w reset status register dynamically Oded Gabbay
2022-09-18 11:37 ` [PATCH 4/7] habanalabs: rename error info structure Oded Gabbay
2022-09-18 11:37 ` [PATCH 5/7] habanalabs/gaudi: change TPC Assert to use TPC DEC instead of QMAN err Oded Gabbay
2022-09-18 11:37 ` [PATCH 6/7] habanalabs/gaudi2: add handling to pmmu events in eqe handler Oded Gabbay
2022-09-18 11:37 ` [PATCH 7/7] habanalabs/gaudi2: add secured attestation info uapi Oded Gabbay
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox