* [PATCH 2/3] habanalabs: track security status using positive logic
2021-05-24 8:46 [PATCH 1/3] habanalabs/gaudi: use COMMS to reset device / halt CPU Oded Gabbay
@ 2021-05-24 8:46 ` Oded Gabbay
2021-05-24 8:46 ` [PATCH 3/3] habanalabs/gaudi: refactor reset code Oded Gabbay
1 sibling, 0 replies; 3+ messages in thread
From: Oded Gabbay @ 2021-05-24 8:46 UTC (permalink / raw)
To: linux-kernel; +Cc: Ohad Sharabi
From: Ohad Sharabi <osharabi@habana.ai>
Using negative logic (i.e. fw_security_disabled) is confusing.
Modify the flag to use positive logic (fw_security_enabled).
Signed-off-by: Ohad Sharabi <osharabi@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
drivers/misc/habanalabs/common/firmware_if.c | 2 +-
drivers/misc/habanalabs/common/habanalabs.h | 6 +--
.../misc/habanalabs/common/habanalabs_drv.c | 6 +--
drivers/misc/habanalabs/gaudi/gaudi.c | 48 +++++++++----------
.../misc/habanalabs/gaudi/gaudi_coresight.c | 2 +-
.../misc/habanalabs/gaudi/gaudi_security.c | 15 +++---
drivers/misc/habanalabs/goya/goya.c | 20 ++++----
drivers/misc/habanalabs/goya/goya_coresight.c | 2 +-
8 files changed, 51 insertions(+), 50 deletions(-)
diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index c19acefdb7e4..4cc6690a3e26 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -1142,7 +1142,7 @@ static void hl_fw_preboot_update_state(struct hl_device *hdev)
prop->hard_reset_done_by_fw ? "enabled" : "disabled");
dev_dbg(hdev->dev, "firmware-level security is %s\n",
- prop->fw_security_disabled ? "disabled" : "enabled");
+ prop->fw_security_enabled ? "enabled" : "disabled");
dev_dbg(hdev->dev, "GIC controller is %s\n",
prop->gic_interrupts_enable ? "enabled" : "disabled");
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index f1ff4d503cf2..e751868b3ed3 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -456,8 +456,8 @@ struct hl_mmu_properties {
* @user_interrupt_count: number of user interrupts.
* @tpc_enabled_mask: which TPCs are enabled.
* @completion_queues_count: number of completion queues.
- * @fw_security_disabled: true if security measures are disabled in firmware,
- * false otherwise
+ * @fw_security_enabled: true if security measures are enabled in firmware,
+ * false otherwise
* @fw_cpu_boot_dev_sts0_valid: status bits are valid and can be fetched from
* BOOT_DEV_STS0
* @fw_cpu_boot_dev_sts1_valid: status bits are valid and can be fetched from
@@ -531,7 +531,7 @@ struct asic_fixed_properties {
u16 user_interrupt_count;
u8 tpc_enabled_mask;
u8 completion_queues_count;
- u8 fw_security_disabled;
+ u8 fw_security_enabled;
u8 fw_cpu_boot_dev_sts0_valid;
u8 fw_cpu_boot_dev_sts1_valid;
u8 dram_supports_virtual_memory;
diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c
index 339a1860c1e7..bd67d4ceab56 100644
--- a/drivers/misc/habanalabs/common/habanalabs_drv.c
+++ b/drivers/misc/habanalabs/common/habanalabs_drv.c
@@ -308,10 +308,10 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
}
if (pdev)
- hdev->asic_prop.fw_security_disabled =
- !is_asic_secured(hdev->asic_type);
+ hdev->asic_prop.fw_security_enabled =
+ is_asic_secured(hdev->asic_type);
else
- hdev->asic_prop.fw_security_disabled = true;
+ hdev->asic_prop.fw_security_enabled = false;
/* Assign status description string */
strncpy(hdev->status[HL_DEVICE_STATUS_MALFUNCTION],
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index e155fae5edcb..836465dccc61 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -687,7 +687,7 @@ static int gaudi_early_init(struct hl_device *hdev)
prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
/* If FW security is enabled at this point it means no access to ELBI */
- if (!hdev->asic_prop.fw_security_disabled) {
+ if (hdev->asic_prop.fw_security_enabled) {
hdev->asic_prop.iatu_done_by_fw = true;
/*
@@ -763,7 +763,14 @@ static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
int rc;
- if (hdev->asic_prop.fw_security_disabled) {
+ if (hdev->asic_prop.fw_security_enabled) {
+ rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
+
+ if (rc)
+ return rc;
+
+ freq = pll_freq_arr[2];
+ } else {
/* Backward compatibility */
div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
@@ -791,13 +798,6 @@ static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
div_sel);
freq = 0;
}
- } else {
- rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
-
- if (rc)
- return rc;
-
- freq = pll_freq_arr[2];
}
prop->psoc_timestamp_frequency = freq;
@@ -1525,7 +1525,7 @@ static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
hdev->cpu_pci_msb_addr =
GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
- if (hdev->asic_prop.fw_security_disabled)
+ if (!hdev->asic_prop.fw_security_enabled)
GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
free_dma_mem_arr:
@@ -1725,7 +1725,7 @@ static int gaudi_sw_init(struct hl_device *hdev)
free_cpu_accessible_dma_pool:
gen_pool_destroy(hdev->cpu_accessible_dma_pool);
free_cpu_dma_mem:
- if (hdev->asic_prop.fw_security_disabled)
+ if (!hdev->asic_prop.fw_security_enabled)
GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
hdev->cpu_pci_msb_addr);
hdev->asic_funcs->asic_dma_free_coherent(hdev,
@@ -1747,7 +1747,7 @@ static int gaudi_sw_fini(struct hl_device *hdev)
gen_pool_destroy(hdev->cpu_accessible_dma_pool);
- if (hdev->asic_prop.fw_security_disabled)
+ if (!hdev->asic_prop.fw_security_enabled)
GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
hdev->cpu_pci_msb_addr);
@@ -1967,7 +1967,7 @@ static void gaudi_init_scrambler_sram(struct hl_device *hdev)
{
struct gaudi_device *gaudi = hdev->asic_specific;
- if (!hdev->asic_prop.fw_security_disabled)
+ if (hdev->asic_prop.fw_security_enabled)
return;
if (hdev->asic_prop.fw_cpu_boot_dev_sts0_valid &&
@@ -2039,7 +2039,7 @@ static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
{
struct gaudi_device *gaudi = hdev->asic_specific;
- if (!hdev->asic_prop.fw_security_disabled)
+ if (hdev->asic_prop.fw_security_enabled)
return;
if (hdev->asic_prop.fw_cpu_boot_dev_sts0_valid &&
@@ -2109,7 +2109,7 @@ static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
static void gaudi_init_e2e(struct hl_device *hdev)
{
- if (!hdev->asic_prop.fw_security_disabled)
+ if (hdev->asic_prop.fw_security_enabled)
return;
if (hdev->asic_prop.fw_cpu_boot_dev_sts0_valid &&
@@ -2484,7 +2484,7 @@ static void gaudi_init_hbm_cred(struct hl_device *hdev)
{
uint32_t hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
- if (!hdev->asic_prop.fw_security_disabled)
+ if (hdev->asic_prop.fw_security_enabled)
return;
if (hdev->asic_prop.fw_cpu_boot_dev_sts0_valid &&
@@ -3602,7 +3602,7 @@ static void gaudi_set_clock_gating(struct hl_device *hdev)
if (hdev->in_debug)
return;
- if (!hdev->asic_prop.fw_security_disabled)
+ if (hdev->asic_prop.fw_security_enabled)
return;
for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) {
@@ -3662,7 +3662,7 @@ static void gaudi_disable_clock_gating(struct hl_device *hdev)
u32 qman_offset;
int i;
- if (!hdev->asic_prop.fw_security_disabled)
+ if (hdev->asic_prop.fw_security_enabled)
return;
for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
@@ -3897,7 +3897,7 @@ static int gaudi_init_cpu(struct hl_device *hdev)
* The device CPU works with 40 bits addresses.
* This register sets the extension to 50 bits.
*/
- if (hdev->asic_prop.fw_security_disabled)
+ if (!hdev->asic_prop.fw_security_enabled)
WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
rc = hl_fw_init_cpu(hdev);
@@ -3991,7 +3991,7 @@ static void gaudi_pre_hw_init(struct hl_device *hdev)
/* Perform read from the device to make sure device is up */
RREG32(mmHW_STATE);
- if (hdev->asic_prop.fw_security_disabled) {
+ if (!hdev->asic_prop.fw_security_enabled) {
/* Set the access through PCI bars (Linux driver only) as
* secured
*/
@@ -4129,7 +4129,7 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
/* Set device to handle FLR by H/W as we will put the device CPU to
* halt mode
*/
- if (hdev->asic_prop.fw_security_disabled &&
+ if (!hdev->asic_prop.fw_security_enabled &&
!hdev->asic_prop.hard_reset_done_by_fw)
WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
@@ -4150,7 +4150,7 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
WREG32(irq_handler_offset, GAUDI_EVENT_HALT_MACHINE);
}
- if (hdev->asic_prop.fw_security_disabled &&
+ if (!hdev->asic_prop.fw_security_enabled &&
!hdev->asic_prop.hard_reset_done_by_fw) {
/* Configure the reset registers. Must be done as early as
@@ -4185,7 +4185,7 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
/* Restart BTL/BLR upon hard-reset */
- if (hdev->asic_prop.fw_security_disabled)
+ if (!hdev->asic_prop.fw_security_enabled)
WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
@@ -7573,7 +7573,7 @@ static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
return 0;
}
- if (!hdev->asic_prop.fw_security_disabled) {
+ if (hdev->asic_prop.fw_security_enabled) {
dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
return 0;
}
diff --git a/drivers/misc/habanalabs/gaudi/gaudi_coresight.c b/drivers/misc/habanalabs/gaudi/gaudi_coresight.c
index 6e56fa1c6c69..9e271fd9f0d2 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi_coresight.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi_coresight.c
@@ -634,7 +634,7 @@ static int gaudi_config_etr(struct hl_device *hdev,
WREG32(mmPSOC_ETR_BUFWM, 0x3FFC);
WREG32(mmPSOC_ETR_RSZ, input->buffer_size);
WREG32(mmPSOC_ETR_MODE, input->sink_mode);
- if (hdev->asic_prop.fw_security_disabled) {
+ if (!hdev->asic_prop.fw_security_enabled) {
/* make ETR not privileged */
val = FIELD_PREP(
PSOC_ETR_AXICTL_PROTCTRLBIT0_MASK, 0);
diff --git a/drivers/misc/habanalabs/gaudi/gaudi_security.c b/drivers/misc/habanalabs/gaudi/gaudi_security.c
index 9a706c5980ef..0d3240f1f7d7 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi_security.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi_security.c
@@ -1448,7 +1448,7 @@ static void gaudi_init_dma_protection_bits(struct hl_device *hdev)
u32 pb_addr, mask;
u8 word_offset;
- if (hdev->asic_prop.fw_security_disabled) {
+ if (!hdev->asic_prop.fw_security_enabled) {
gaudi_pb_set_block(hdev, mmDMA_IF_E_S_BASE);
gaudi_pb_set_block(hdev, mmDMA_IF_E_S_DOWN_CH0_BASE);
gaudi_pb_set_block(hdev, mmDMA_IF_E_S_DOWN_CH1_BASE);
@@ -9135,7 +9135,7 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev)
u32 pb_addr, mask;
u8 word_offset;
- if (hdev->asic_prop.fw_security_disabled) {
+ if (!hdev->asic_prop.fw_security_enabled) {
gaudi_pb_set_block(hdev, mmTPC0_E2E_CRED_BASE);
gaudi_pb_set_block(hdev, mmTPC1_E2E_CRED_BASE);
gaudi_pb_set_block(hdev, mmTPC2_E2E_CRED_BASE);
@@ -12818,7 +12818,7 @@ static void gaudi_init_protection_bits(struct hl_device *hdev)
* secured
*/
- if (hdev->asic_prop.fw_security_disabled) {
+ if (!hdev->asic_prop.fw_security_enabled) {
gaudi_pb_set_block(hdev, mmIF_E_PLL_BASE);
gaudi_pb_set_block(hdev, mmMESH_W_PLL_BASE);
gaudi_pb_set_block(hdev, mmSRAM_W_PLL_BASE);
@@ -13023,7 +13023,7 @@ void gaudi_init_security(struct hl_device *hdev)
* property configuration of MME SBAB and ACC to be non-privileged and
* non-secured
*/
- if (hdev->asic_prop.fw_security_disabled) {
+ if (!hdev->asic_prop.fw_security_enabled) {
WREG32(mmMME0_SBAB_PROT, 0x2);
WREG32(mmMME0_ACC_PROT, 0x2);
WREG32(mmMME1_SBAB_PROT, 0x2);
@@ -13032,11 +13032,12 @@ void gaudi_init_security(struct hl_device *hdev)
WREG32(mmMME2_ACC_PROT, 0x2);
WREG32(mmMME3_SBAB_PROT, 0x2);
WREG32(mmMME3_ACC_PROT, 0x2);
- }
- /* On RAZWI, 0 will be returned from RR and 0xBABA0BAD from PB */
- if (hdev->asic_prop.fw_security_disabled)
+ /*
+ * On RAZWI, 0 will be returned from RR and 0xBABA0BAD from PB
+ */
WREG32(0xC01B28, 0x1);
+ }
gaudi_init_range_registers_lbw(hdev);
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 3d7a760cf2ba..bcefc372a689 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -619,7 +619,7 @@ static int goya_early_init(struct hl_device *hdev)
prop->dram_pci_bar_size = pci_resource_len(pdev, DDR_BAR_ID);
/* If FW security is enabled at this point it means no access to ELBI */
- if (!hdev->asic_prop.fw_security_disabled) {
+ if (hdev->asic_prop.fw_security_enabled) {
hdev->asic_prop.iatu_done_by_fw = true;
goto pci_init;
}
@@ -726,7 +726,15 @@ static void goya_fetch_psoc_frequency(struct hl_device *hdev)
u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
int rc;
- if (hdev->asic_prop.fw_security_disabled) {
+ if (hdev->asic_prop.fw_security_enabled) {
+ rc = hl_fw_cpucp_pll_info_get(hdev, HL_GOYA_PCI_PLL,
+ pll_freq_arr);
+
+ if (rc)
+ return;
+
+ freq = pll_freq_arr[1];
+ } else {
div_fctr = RREG32(mmPSOC_PCI_PLL_DIV_FACTOR_1);
div_sel = RREG32(mmPSOC_PCI_PLL_DIV_SEL_1);
nr = RREG32(mmPSOC_PCI_PLL_NR);
@@ -753,14 +761,6 @@ static void goya_fetch_psoc_frequency(struct hl_device *hdev)
div_sel);
freq = 0;
}
- } else {
- rc = hl_fw_cpucp_pll_info_get(hdev, HL_GOYA_PCI_PLL,
- pll_freq_arr);
-
- if (rc)
- return;
-
- freq = pll_freq_arr[1];
}
prop->psoc_timestamp_frequency = freq;
diff --git a/drivers/misc/habanalabs/goya/goya_coresight.c b/drivers/misc/habanalabs/goya/goya_coresight.c
index 6b7445cca580..c55c100fdd24 100644
--- a/drivers/misc/habanalabs/goya/goya_coresight.c
+++ b/drivers/misc/habanalabs/goya/goya_coresight.c
@@ -434,7 +434,7 @@ static int goya_config_etr(struct hl_device *hdev,
WREG32(mmPSOC_ETR_BUFWM, 0x3FFC);
WREG32(mmPSOC_ETR_RSZ, input->buffer_size);
WREG32(mmPSOC_ETR_MODE, input->sink_mode);
- if (hdev->asic_prop.fw_security_disabled) {
+ if (!hdev->asic_prop.fw_security_enabled) {
/* make ETR not privileged */
val = FIELD_PREP(PSOC_ETR_AXICTL_PROTCTRLBIT0_MASK, 0);
/* make ETR non-secured (inverted logic) */
--
2.25.1
^ permalink raw reply related [flat|nested] 3+ messages in thread* [PATCH 3/3] habanalabs/gaudi: refactor reset code
2021-05-24 8:46 [PATCH 1/3] habanalabs/gaudi: use COMMS to reset device / halt CPU Oded Gabbay
2021-05-24 8:46 ` [PATCH 2/3] habanalabs: track security status using positive logic Oded Gabbay
@ 2021-05-24 8:46 ` Oded Gabbay
1 sibling, 0 replies; 3+ messages in thread
From: Oded Gabbay @ 2021-05-24 8:46 UTC (permalink / raw)
To: linux-kernel
After all the latest changes to the reset code, there were some
redundancy and errors in the flows.
If the Linux FIT is loaded to the ASIC CPU, we need to communicate
with it only via GIC. If it is not loaded, we need to either use
COMMS protocol (for newer f/w) or MSG_TO_CPU register (for older f/w).
In addition, if we halted the device CPU then we need to mark that
the driver will do the reset, regardless of the capabilities.
Also, to prevent false errors, we need to keep track whether the
device CPU was already halted. If so, we shouldn't try to halt it
again.
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
drivers/misc/habanalabs/gaudi/gaudi.c | 47 ++++++++++++++++----------
drivers/misc/habanalabs/gaudi/gaudiP.h | 5 +++
2 files changed, 34 insertions(+), 18 deletions(-)
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 836465dccc61..5ca4c8f86801 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -1931,11 +1931,11 @@ static void gaudi_disable_msi(struct hl_device *hdev)
gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
}
-static void gaudi_fw_hard_reset(struct hl_device *hdev)
+static void gaudi_ask_hard_reset_without_linux(struct hl_device *hdev)
{
int rc;
- if (hdev->asic_prop.dynamic_fw_load && !hdev->fw_loader.linux_loaded) {
+ if (hdev->asic_prop.dynamic_fw_load) {
rc = hl_fw_dynamic_send_protocol_cmd(hdev, &hdev->fw_loader,
COMMS_RST_DEV, 0, false,
hdev->fw_loader.cpu_timeout);
@@ -1946,12 +1946,16 @@ static void gaudi_fw_hard_reset(struct hl_device *hdev)
}
}
-static void gaudi_fw_halt_cpu(struct hl_device *hdev)
+static void gaudi_ask_halt_machine_without_linux(struct hl_device *hdev)
{
+ struct gaudi_device *gaudi = hdev->asic_specific;
int rc;
+ if (gaudi && gaudi->device_cpu_is_halted)
+ return;
+
/* Stop device CPU to make sure nothing bad happens */
- if (hdev->asic_prop.dynamic_fw_load && !hdev->fw_loader.linux_loaded) {
+ if (hdev->asic_prop.dynamic_fw_load) {
rc = hl_fw_dynamic_send_protocol_cmd(hdev, &hdev->fw_loader,
COMMS_GOTO_WFE, 0, true,
hdev->fw_loader.cpu_timeout);
@@ -1961,6 +1965,9 @@ static void gaudi_fw_halt_cpu(struct hl_device *hdev)
WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_GOTO_WFE);
msleep(GAUDI_CPU_RESET_WAIT_MSEC);
}
+
+ if (gaudi)
+ gaudi->device_cpu_is_halted = true;
}
static void gaudi_init_scrambler_sram(struct hl_device *hdev)
@@ -4110,8 +4117,9 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
{
struct cpu_dyn_regs *dyn_regs =
&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
- struct gaudi_device *gaudi = hdev->asic_specific;
u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;
+ struct gaudi_device *gaudi = hdev->asic_specific;
+ bool driver_performs_reset;
if (!hard_reset) {
dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
@@ -4126,32 +4134,34 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
}
+ driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
+ !hdev->asic_prop.hard_reset_done_by_fw);
+
/* Set device to handle FLR by H/W as we will put the device CPU to
* halt mode
*/
- if (!hdev->asic_prop.fw_security_enabled &&
- !hdev->asic_prop.hard_reset_done_by_fw)
+ if (driver_performs_reset)
WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
- /* I don't know what is the state of the CPU so make sure it is
- * stopped in any means necessary
+ /* If linux is loaded in the device CPU we need to communicate with it
+ * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU
+ * registers in case of old F/Ws
*/
- if (hdev->asic_prop.hard_reset_done_by_fw)
- gaudi_fw_hard_reset(hdev);
- else
- gaudi_fw_halt_cpu(hdev);
-
if (hdev->fw_loader.linux_loaded) {
irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
le32_to_cpu(dyn_regs->gic_host_irq_ctrl);
WREG32(irq_handler_offset, GAUDI_EVENT_HALT_MACHINE);
+ } else {
+ if (hdev->asic_prop.hard_reset_done_by_fw)
+ gaudi_ask_hard_reset_without_linux(hdev);
+ else
+ gaudi_ask_halt_machine_without_linux(hdev);
}
- if (!hdev->asic_prop.fw_security_enabled &&
- !hdev->asic_prop.hard_reset_done_by_fw) {
+ if (driver_performs_reset) {
/* Configure the reset registers. Must be done as early as
* possible in case we fail during H/W initialization
@@ -4185,8 +4195,7 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
/* Restart BTL/BLR upon hard-reset */
- if (!hdev->asic_prop.fw_security_enabled)
- WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
+ WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
@@ -4223,6 +4232,8 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
HW_CAP_CLK_GATE);
memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
+
+ gaudi->device_cpu_is_halted = false;
}
}
diff --git a/drivers/misc/habanalabs/gaudi/gaudiP.h b/drivers/misc/habanalabs/gaudi/gaudiP.h
index 5929be81ec23..48637a6343bb 100644
--- a/drivers/misc/habanalabs/gaudi/gaudiP.h
+++ b/drivers/misc/habanalabs/gaudi/gaudiP.h
@@ -314,6 +314,10 @@ struct gaudi_internal_qman_info {
* Multi MSI is possible only with IOMMU enabled.
* @mmu_cache_inv_pi: PI for MMU cache invalidation flow. The H/W expects an
* 8-bit value so use u8.
+ * @device_cpu_is_halted: Flag to indicate whether the device CPU was already
+ * halted. We can't halt it again because the COMMS
+ * protocol will throw an error. Relevant only for
+ * cases where Linux was not loaded to device CPU
*/
struct gaudi_device {
int (*cpucp_info_get)(struct hl_device *hdev);
@@ -335,6 +339,7 @@ struct gaudi_device {
u32 hw_cap_initialized;
u8 multi_msi_mode;
u8 mmu_cache_inv_pi;
+ u8 device_cpu_is_halted;
};
void gaudi_init_security(struct hl_device *hdev);
--
2.25.1
^ permalink raw reply related [flat|nested] 3+ messages in thread