* [PATCH V3 0/2] Support for FW activation without reset
[not found] <CGME20170610070549epcas1p4fae7273b007148b52d31bf62cbb07b74@epcas1p4.samsung.com>
@ 2017-06-10 7:05 ` Arnav Dawn
2017-06-10 7:06 ` [PATCH V3 1/2] nvme: Rename and move get_log_page to nvme/scsi Arnav Dawn
2017-06-10 7:08 ` [PATCH V3 2/2] nvme: Add support for FW activation without reset Arnav Dawn
0 siblings, 2 replies; 12+ messages in thread
From: Arnav Dawn @ 2017-06-10 7:05 UTC (permalink / raw)
This series adds support for handling Firmware activation without
reset.
Changes since V2
1) Move get_log_page to scsi.c as get_smart_log_page.
2) Add helper function for checking ctrl PP status.
3) Minor cleanups.
Arnav Dawn (2):
nvme: Rename and move get_log_page to nvme/scsi
nvme: Add support for Fw activation without reset
drivers/nvme/host/core.c | 90 +++++++++++++++++++++++++++++++++++-------------
drivers/nvme/host/nvme.h | 23 ++++++++++++-
drivers/nvme/host/scsi.c | 27 +++++++++++++--
include/linux/nvme.h | 10 ++++++
4 files changed, 124 insertions(+), 26 deletions(-)
--
1.9.1
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH V3 1/2] nvme: Rename and move get_log_page to nvme/scsi
2017-06-10 7:05 ` [PATCH V3 0/2] Support for FW activation without reset Arnav Dawn
@ 2017-06-10 7:06 ` Arnav Dawn
2017-06-13 6:43 ` Christoph Hellwig
2017-06-10 7:08 ` [PATCH V3 2/2] nvme: Add support for FW activation without reset Arnav Dawn
1 sibling, 1 reply; 12+ messages in thread
From: Arnav Dawn @ 2017-06-10 7:06 UTC (permalink / raw)
This patch renames "nvme_get_log_page" to more appropriate
"nvme_get_smart_log_page" and moves it to nvme/scsi as
it is only being used there.
Signed-off-by: Arnav Dawn <a.dawn at samsung.com>
---
drivers/nvme/host/core.c | 22 ----------------------
drivers/nvme/host/nvme.h | 1 -
drivers/nvme/host/scsi.c | 27 +++++++++++++++++++++++++--
3 files changed, 25 insertions(+), 25 deletions(-)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 4e193b9..9591c47 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -706,28 +706,6 @@ int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
return ret;
}
-int nvme_get_log_page(struct nvme_ctrl *dev, struct nvme_smart_log **log)
-{
- struct nvme_command c = { };
- int error;
-
- c.common.opcode = nvme_admin_get_log_page,
- c.common.nsid = cpu_to_le32(0xFFFFFFFF),
- c.common.cdw10[0] = cpu_to_le32(
- (((sizeof(struct nvme_smart_log) / 4) - 1) << 16) |
- NVME_LOG_SMART),
-
- *log = kmalloc(sizeof(struct nvme_smart_log), GFP_KERNEL);
- if (!*log)
- return -ENOMEM;
-
- error = nvme_submit_sync_cmd(dev->admin_q, &c, *log,
- sizeof(struct nvme_smart_log));
- if (error)
- kfree(*log);
- return error;
-}
-
int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count)
{
u32 q_count = (*count - 1) | ((*count - 1) << 16);
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 22ee60b..ccea410 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -313,7 +313,6 @@ int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id);
int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid,
struct nvme_id_ns **id);
-int nvme_get_log_page(struct nvme_ctrl *dev, struct nvme_smart_log **log);
int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid,
void *buffer, size_t buflen, u32 *result);
int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
diff --git a/drivers/nvme/host/scsi.c b/drivers/nvme/host/scsi.c
index 1f7671e..50873bb8 100644
--- a/drivers/nvme/host/scsi.c
+++ b/drivers/nvme/host/scsi.c
@@ -832,6 +832,29 @@ static int nvme_trans_log_supp_pages(struct nvme_ns *ns, struct sg_io_hdr *hdr,
return res;
}
+static int nvme_get_smart_log_page(struct nvme_ctrl *dev,
+ struct nvme_smart_log **log)
+{
+ struct nvme_command c = { };
+ int error;
+
+ c.common.opcode = nvme_admin_get_log_page,
+ c.common.nsid = cpu_to_le32(0xFFFFFFFF),
+ c.common.cdw10[0] = cpu_to_le32(
+ (((sizeof(struct nvme_smart_log) / 4) - 1) << 16) |
+ NVME_LOG_SMART),
+
+ *log = kmalloc(sizeof(struct nvme_smart_log), GFP_KERNEL);
+ if (!*log)
+ return -ENOMEM;
+
+ error = nvme_submit_sync_cmd(dev->admin_q, &c, *log,
+ sizeof(struct nvme_smart_log));
+ if (error)
+ kfree(*log);
+ return error;
+}
+
static int nvme_trans_log_info_exceptions(struct nvme_ns *ns,
struct sg_io_hdr *hdr, int alloc_len)
{
@@ -846,7 +869,7 @@ static int nvme_trans_log_info_exceptions(struct nvme_ns *ns,
if (log_response == NULL)
return -ENOMEM;
- res = nvme_get_log_page(ns->ctrl, &smart_log);
+ res = nvme_get_smart_log_page(ns->ctrl, &smart_log);
if (res < 0)
goto out_free_response;
@@ -893,7 +916,7 @@ static int nvme_trans_log_temperature(struct nvme_ns *ns, struct sg_io_hdr *hdr,
if (log_response == NULL)
return -ENOMEM;
- res = nvme_get_log_page(ns->ctrl, &smart_log);
+ res = nvme_get_smart_log_page(ns->ctrl, &smart_log);
if (res < 0)
goto out_free_response;
--
1.9.1
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH V3 2/2] nvme: Add support for FW activation without reset
2017-06-10 7:05 ` [PATCH V3 0/2] Support for FW activation without reset Arnav Dawn
2017-06-10 7:06 ` [PATCH V3 1/2] nvme: Rename and move get_log_page to nvme/scsi Arnav Dawn
@ 2017-06-10 7:08 ` Arnav Dawn
2017-06-19 17:05 ` Keith Busch
1 sibling, 1 reply; 12+ messages in thread
From: Arnav Dawn @ 2017-06-10 7:08 UTC (permalink / raw)
This patch adds support for handling Fw activation without reset
On completion of FW-activation-starting AER, all queues are
paused till CSTS.PP is cleared or timed out (exceeds max time for
fw activtion MTFA). If device fails to clear CSTS.PP within MTFA,
driver issues reset controller
Signed-off-by: Arnav Dawn <a.dawn at samsung.com>
---
drivers/nvme/host/core.c | 68 +++++++++++++++++++++++++++++++++++++++++++++++-
drivers/nvme/host/nvme.h | 22 ++++++++++++++++
include/linux/nvme.h | 10 +++++++
3 files changed, 99 insertions(+), 1 deletion(-)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 9591c47..d73205c 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1573,7 +1573,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
nvme_set_queue_limits(ctrl, ctrl->admin_q);
ctrl->sgls = le32_to_cpu(id->sgls);
ctrl->kas = le16_to_cpu(id->kas);
-
+ ctrl->mtfa = le16_to_cpu(id->mtfa);
ctrl->npss = id->npss;
prev_apsta = ctrl->apsta;
if (ctrl->quirks & NVME_QUIRK_NO_APST) {
@@ -2242,6 +2242,52 @@ static void nvme_async_event_work(struct work_struct *work)
spin_unlock_irq(&ctrl->lock);
}
+static int nvme_get_fw_slot_info(struct nvme_ctrl *dev,
+ struct nvme_fw_slot_info_log *log)
+{
+ struct nvme_command c = { };
+
+ c.common.opcode = nvme_admin_get_log_page;
+ c.common.nsid = cpu_to_le32(0xFFFFFFFF);
+ c.common.cdw10[0] = cpu_to_le32(
+ (((sizeof(struct nvme_fw_slot_info_log) / 4) - 1) << 16)
+ | NVME_LOG_FW_SLOT);
+
+ if (!log)
+ return -ENOMEM;
+
+ return nvme_submit_sync_cmd(dev->admin_q, &c, log,
+ sizeof(struct nvme_fw_slot_info_log));
+}
+
+static void nvme_fw_act_work(struct work_struct *work)
+{
+ struct nvme_ctrl *ctrl = container_of(to_delayed_work(work),
+ struct nvme_ctrl, fw_act_work);
+ struct nvme_fw_slot_info_log *log;
+
+ nvme_stop_queues(ctrl);
+ while (nvme_ctrl_pp_status(ctrl)) {
+ if (time_after(jiffies, ctrl->fw_act_timeout)) {
+ dev_warn(ctrl->device,
+ "Fw activation timeout, reset controller\n");
+ ctrl->ops->reset_ctrl(ctrl);
+ break;
+ }
+ msleep(100);
+ }
+ nvme_start_queues(ctrl);
+
+ /* read FW slot informationi to clear the AER*/
+ log = kmalloc(sizeof(struct nvme_fw_slot_info_log), GFP_KERNEL);
+ if (!log)
+ return;
+ if (nvme_get_fw_slot_info(ctrl, log))
+ dev_warn(ctrl->device,
+ "Get FW SLOT INFO log error\n");
+ kfree(log);
+}
+
void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
union nvme_result *res)
{
@@ -2268,6 +2314,24 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
dev_info(ctrl->device, "rescanning\n");
nvme_queue_scan(ctrl);
break;
+ case NVME_AER_NOTICE_FW_ACT_STARTING:
+ {
+ if (nvme_ctrl_pp_status(ctrl)) {
+ if (ctrl->mtfa)
+ ctrl->fw_act_timeout = jiffies +
+ msecs_to_jiffies(ctrl->mtfa * 100);
+ else
+ ctrl->fw_act_timeout = jiffies +
+ msecs_to_jiffies(admin_timeout * 1000);
+
+ schedule_delayed_work(&ctrl->fw_act_work, 0);
+ }
+ break;
+ }
+ case NVME_AER_ERR_FW_IMG_LOAD:
+ dev_warn(ctrl->device, "FW image load error\n");
+ cancel_delayed_work(&ctrl->fw_act_work);
+ break;
default:
dev_warn(ctrl->device, "async event result %08x\n", result);
}
@@ -2314,6 +2378,7 @@ void nvme_uninit_ctrl(struct nvme_ctrl *ctrl)
{
flush_work(&ctrl->async_event_work);
flush_work(&ctrl->scan_work);
+ cancel_delayed_work_sync(&ctrl->fw_act_work);
nvme_remove_namespaces(ctrl);
device_destroy(nvme_class, MKDEV(nvme_char_major, ctrl->instance));
@@ -2361,6 +2426,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
ctrl->quirks = quirks;
INIT_WORK(&ctrl->scan_work, nvme_scan_work);
INIT_WORK(&ctrl->async_event_work, nvme_async_event_work);
+ INIT_DELAYED_WORK(&ctrl->fw_act_work, nvme_fw_act_work);
ret = nvme_set_instance(ctrl);
if (ret)
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index ccea410..0fc179b 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -141,6 +141,8 @@ struct nvme_ctrl {
u16 cntlid;
u32 ctrl_config;
+ u16 mtfa;
+ unsigned long fw_act_timeout;
u32 page_size;
u32 max_hw_sectors;
@@ -162,6 +164,7 @@ struct nvme_ctrl {
struct work_struct scan_work;
struct work_struct async_event_work;
struct delayed_work ka_work;
+ struct delayed_work fw_act_work;
/* Power saving configuration */
u64 ps_max_latency_us;
@@ -231,6 +234,25 @@ static inline bool nvme_ctrl_ready(struct nvme_ctrl *ctrl)
return val & NVME_CSTS_RDY;
}
+static inline bool nvme_ctrl_pp_status(struct nvme_ctrl *ctrl)
+{
+
+ u32 csts;
+
+ if (ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts))
+ return false;
+
+ if (ctrl->ops->reg_read32(ctrl, NVME_REG_CC,
+ &ctrl->ctrl_config))
+ return false;
+
+ if (csts == ~0)
+ return false;
+
+ return ((ctrl->ctrl_config & NVME_CC_ENABLE)
+ && (csts & NVME_CSTS_PP));
+}
+
static inline int nvme_reset_subsystem(struct nvme_ctrl *ctrl)
{
if (!ctrl->subsystem)
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index b625bac..53b7e73 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -144,6 +144,7 @@ enum {
NVME_CSTS_RDY = 1 << 0,
NVME_CSTS_CFS = 1 << 1,
NVME_CSTS_NSSRO = 1 << 4,
+ NVME_CSTS_PP = 1 << 5,
NVME_CSTS_SHST_NORMAL = 0 << 2,
NVME_CSTS_SHST_OCCUR = 1 << 2,
NVME_CSTS_SHST_CMPLT = 2 << 2,
@@ -337,6 +338,13 @@ struct nvme_smart_log {
__u8 rsvd216[296];
};
+struct nvme_fw_slot_info_log {
+ __u8 afi;
+ __u8 rsvd1[7];
+ __le64 frs[7];
+ __u8 rsvd64[448];
+};
+
enum {
NVME_SMART_CRIT_SPARE = 1 << 0,
NVME_SMART_CRIT_TEMPERATURE = 1 << 1,
@@ -347,6 +355,8 @@ enum {
enum {
NVME_AER_NOTICE_NS_CHANGED = 0x0002,
+ NVME_AER_NOTICE_FW_ACT_STARTING = 0x0102,
+ NVME_AER_ERR_FW_IMG_LOAD = 0x0500,
};
struct nvme_lba_range_type {
--
1.9.1
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH V3 1/2] nvme: Rename and move get_log_page to nvme/scsi
2017-06-10 7:06 ` [PATCH V3 1/2] nvme: Rename and move get_log_page to nvme/scsi Arnav Dawn
@ 2017-06-13 6:43 ` Christoph Hellwig
0 siblings, 0 replies; 12+ messages in thread
From: Christoph Hellwig @ 2017-06-13 6:43 UTC (permalink / raw)
Looks good,
Reviewed-by: Christoph Hellwig <hch at lst.de>
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH V3 2/2] nvme: Add support for FW activation without reset
2017-06-10 7:08 ` [PATCH V3 2/2] nvme: Add support for FW activation without reset Arnav Dawn
@ 2017-06-19 17:05 ` Keith Busch
2017-06-21 7:36 ` Arnav Dawn
0 siblings, 1 reply; 12+ messages in thread
From: Keith Busch @ 2017-06-19 17:05 UTC (permalink / raw)
On Sat, Jun 10, 2017@12:38:42PM +0530, Arnav Dawn wrote:
> This patch adds support for handling Fw activation without reset
> On completion of FW-activation-starting AER, all queues are
> paused till CSTS.PP is cleared or timed out (exceeds max time for
> fw activtion MTFA). If device fails to clear CSTS.PP within MTFA,
> driver issues reset controller
> @@ -2268,6 +2314,24 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
> dev_info(ctrl->device, "rescanning\n");
> nvme_queue_scan(ctrl);
> break;
> + case NVME_AER_NOTICE_FW_ACT_STARTING:
> + {
> + if (nvme_ctrl_pp_status(ctrl)) {
> + if (ctrl->mtfa)
> + ctrl->fw_act_timeout = jiffies +
> + msecs_to_jiffies(ctrl->mtfa * 100);
Instead of adding another field to the nvme_ctrl structure, just
calculate the timeout in your nvme_fw_act_work function.
> + else
> + ctrl->fw_act_timeout = jiffies +
> + msecs_to_jiffies(admin_timeout * 1000);
> +
> + schedule_delayed_work(&ctrl->fw_act_work, 0);
If scheduling with 0 delay, why is this delayed work?
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH V3 2/2] nvme: Add support for FW activation without reset
2017-06-19 17:05 ` Keith Busch
@ 2017-06-21 7:36 ` Arnav Dawn
2017-06-21 13:37 ` Keith Busch
0 siblings, 1 reply; 12+ messages in thread
From: Arnav Dawn @ 2017-06-21 7:36 UTC (permalink / raw)
Hi Keith,
Thanks for the review. Please review my comments.
On Monday 19 June 2017 10:35 PM, Keith Busch wrote:
> On Sat, Jun 10, 2017@12:38:42PM +0530, Arnav Dawn wrote:
>
>> + ctrl->fw_act_timeout = jiffies +
>> + msecs_to_jiffies(ctrl->mtfa * 100);
> Instead of adding another field to the nvme_ctrl structure, just
> calculate the timeout in your nvme_fw_act_work function.
intention was to set fw_act_timeout as soon as the AER is received.
Since work could be scheduled after some time, setting timeout in
work function would add that delay to it.
>
>> + else
>> + ctrl->fw_act_timeout = jiffies +
>> + msecs_to_jiffies(admin_timeout * 1000);
>> +
>> + schedule_delayed_work(&ctrl->fw_act_work, 0);
> If scheduling with 0 delay, why is this delayed work?
I used delayed work so i could use cancel_delayed_work, as cancel_work
was not available.
+ case NVME_AER_ERR_FW_IMG_LOAD:
+ dev_warn(ctrl->device, "FW image load error\n");
+ cancel_delayed_work(&ctrl->fw_act_work);
+ break;
default:
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH V3 2/2] nvme: Add support for FW activation without reset
2017-06-21 7:36 ` Arnav Dawn
@ 2017-06-21 13:37 ` Keith Busch
2017-06-23 12:11 ` Arnav Dawn
0 siblings, 1 reply; 12+ messages in thread
From: Keith Busch @ 2017-06-21 13:37 UTC (permalink / raw)
On Wed, Jun 21, 2017@01:06:31PM +0530, Arnav Dawn wrote:
> On Monday 19 June 2017 10:35 PM, Keith Busch wrote:
> > On Sat, Jun 10, 2017@12:38:42PM +0530, Arnav Dawn wrote:
> >
> >> + ctrl->fw_act_timeout = jiffies +
> >> + msecs_to_jiffies(ctrl->mtfa * 100);
> > Instead of adding another field to the nvme_ctrl structure, just
> > calculate the timeout in your nvme_fw_act_work function.
>
> intention was to set fw_act_timeout as soon as the AER is received.
> Since work could be scheduled after some time, setting timeout in
> work function would add that delay to it.
This feature doesn't require such tight constraints. The 100ms sleep
granularity in your CSTS.PP polling already exceeds the amount of time
it takes for work to schedule.
> >> + else
> >> + ctrl->fw_act_timeout = jiffies +
> >> + msecs_to_jiffies(admin_timeout * 1000);
> >> +
> >> + schedule_delayed_work(&ctrl->fw_act_work, 0);
> > If scheduling with 0 delay, why is this delayed work?
>
> I used delayed work so i could use cancel_delayed_work, as cancel_work
> was not available.
If you really have a use for cancel_work, you could send a patch to
export the symbol.
In any case, that's probably not going to do what you want. The work can
only be cancelled if it hasn't started, and since you start it without
delay, the work will likely be running. Maybe you want to add some other
criteria for the nvme_fw_act_work to end early, though I expect CSTS.PP
to clear if f/w load failed.
One last thing I noticed, it looks like the nvme_fw_act_work will break
if an IO timeout occurs while it's running since that may reset the
controller and restart IO queues.
> + case NVME_AER_ERR_FW_IMG_LOAD:
> + dev_warn(ctrl->device, "FW image load error\n");
> + cancel_delayed_work(&ctrl->fw_act_work);
> + break;
> default:
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH V3 2/2] nvme: Add support for FW activation without reset
2017-06-21 13:37 ` Keith Busch
@ 2017-06-23 12:11 ` Arnav Dawn
2017-06-23 15:25 ` Keith Busch
2017-06-24 7:11 ` Christoph Hellwig
0 siblings, 2 replies; 12+ messages in thread
From: Arnav Dawn @ 2017-06-23 12:11 UTC (permalink / raw)
On Wednesday 21 June 2017 07:07 PM, Keith Busch wrote:
> On Wed, Jun 21, 2017@01:06:31PM +0530, Arnav Dawn wrote:
>> On Monday 19 June 2017 10:35 PM, Keith Busch wrote:
>>> On Sat, Jun 10, 2017@12:38:42PM +0530, Arnav Dawn wrote:
>>>
>>>> + ctrl->fw_act_timeout = jiffies +
>>>> + msecs_to_jiffies(ctrl->mtfa * 100);
>>> Instead of adding another field to the nvme_ctrl structure, just
>>> calculate the timeout in your nvme_fw_act_work function.
>> intention was to set fw_act_timeout as soon as the AER is received.
>> Since work could be scheduled after some time, setting timeout in
>> work function would add that delay to it.
> This feature doesn't require such tight constraints. The 100ms sleep
> granularity in your CSTS.PP polling already exceeds the amount of time
> it takes for work to schedule.
i agree, i will update it in next version.
>>>> + else
>>>> + ctrl->fw_act_timeout = jiffies +
>>>> + msecs_to_jiffies(admin_timeout * 1000);
>>>> +
>>>> + schedule_delayed_work(&ctrl->fw_act_work, 0);
>>> If scheduling with 0 delay, why is this delayed work?
>> I used delayed work so i could use cancel_delayed_work, as cancel_work
>> was not available.
> If you really have a use for cancel_work, you could send a patch to
> export the symbol.
>
> In any case, that's probably not going to do what you want. The work can
> only be cancelled if it hasn't started, and since you start it without
> delay, the work will likely be running. Maybe you want to add some other
> criteria for the nvme_fw_act_work to end early, though I expect CSTS.PP
> to clear if f/w load failed.
you are right, if the device sends Firmware image load error AER,
CSTS.PP is probably cleared, and the nvme_fw_act_work will end.
so i think nothing needs to be done on FW Image load error AER.
>
> One last thing I noticed, it looks like the nvme_fw_act_work will break
> if an IO timeout occurs while it's running since that may reset the
> controller and restart IO queues.
could you please clarify more?
I think ,that, if an IO Timeout resets controller and restarts queue,
the nvme_ctrl_pp_status will return false, ending the nvme_fw_act_work.
Regards
Arnav Dawn
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH V3 2/2] nvme: Add support for FW activation without reset
2017-06-23 12:11 ` Arnav Dawn
@ 2017-06-23 15:25 ` Keith Busch
2017-06-25 9:19 ` Arnav Dawn
2017-06-24 7:11 ` Christoph Hellwig
1 sibling, 1 reply; 12+ messages in thread
From: Keith Busch @ 2017-06-23 15:25 UTC (permalink / raw)
On Fri, Jun 23, 2017@05:41:21PM +0530, Arnav Dawn wrote:
>
> On Wednesday 21 June 2017 07:07 PM, Keith Busch wrote:
> >
> > One last thing I noticed, it looks like the nvme_fw_act_work will break
> > if an IO timeout occurs while it's running since that may reset the
> > controller and restart IO queues.
>
> could you please clarify more?
> I think ,that, if an IO Timeout resets controller and restarts queue,
> the nvme_ctrl_pp_status will return false, ending the nvme_fw_act_work.
Sure, the nvme_fw_act_work will observe CSTS.PP cleared, then it restarts
the IO queues, but the nvme reset requires those queues to be quiesced.
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH V3 2/2] nvme: Add support for FW activation without reset
2017-06-23 12:11 ` Arnav Dawn
2017-06-23 15:25 ` Keith Busch
@ 2017-06-24 7:11 ` Christoph Hellwig
1 sibling, 0 replies; 12+ messages in thread
From: Christoph Hellwig @ 2017-06-24 7:11 UTC (permalink / raw)
On Fri, Jun 23, 2017@05:41:21PM +0530, Arnav Dawn wrote:
>
> On Wednesday 21 June 2017 07:07 PM, Keith Busch wrote:
> > On Wed, Jun 21, 2017@01:06:31PM +0530, Arnav Dawn wrote:
> >> On Monday 19 June 2017 10:35 PM, Keith Busch wrote:
> >>> On Sat, Jun 10, 2017@12:38:42PM +0530, Arnav Dawn wrote:
> >>>
> >>>> + ctrl->fw_act_timeout = jiffies +
> >>>> + msecs_to_jiffies(ctrl->mtfa * 100);
> >>> Instead of adding another field to the nvme_ctrl structure, just
> >>> calculate the timeout in your nvme_fw_act_work function.
> >> intention was to set fw_act_timeout as soon as the AER is received.
> >> Since work could be scheduled after some time, setting timeout in
> >> work function would add that delay to it.
> > This feature doesn't require such tight constraints. The 100ms sleep
> > granularity in your CSTS.PP polling already exceeds the amount of time
> > it takes for work to schedule.
> i agree, i will update it in next version.
> >>>> + else
> >>>> + ctrl->fw_act_timeout = jiffies +
> >>>> + msecs_to_jiffies(admin_timeout * 1000);
> >>>> +
> >>>> + schedule_delayed_work(&ctrl->fw_act_work, 0);
> >>> If scheduling with 0 delay, why is this delayed work?
> >> I used delayed work so i could use cancel_delayed_work, as cancel_work
> >> was not available.
> > If you really have a use for cancel_work, you could send a patch to
> > export the symbol.
> >
> > In any case, that's probably not going to do what you want. The work can
> > only be cancelled if it hasn't started, and since you start it without
> > delay, the work will likely be running. Maybe you want to add some other
> > criteria for the nvme_fw_act_work to end early, though I expect CSTS.PP
> > to clear if f/w load failed.
> you are right, if the device sends Firmware image load error AER,
> CSTS.PP is probably cleared, and the nvme_fw_act_work will end.
> so i think nothing needs to be done on FW Image load error AER.
please make sure you do a cancel_work_sync there and in the controller
removal path to make sure we don't have a work item lingering around.
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH V3 2/2] nvme: Add support for FW activation without reset
2017-06-23 15:25 ` Keith Busch
@ 2017-06-25 9:19 ` Arnav Dawn
2017-06-29 13:09 ` Arnav Dawn
0 siblings, 1 reply; 12+ messages in thread
From: Arnav Dawn @ 2017-06-25 9:19 UTC (permalink / raw)
On Friday 23 June 2017 08:55 PM, Keith Busch wrote:
> On Fri, Jun 23, 2017@05:41:21PM +0530, Arnav Dawn wrote:
>> On Wednesday 21 June 2017 07:07 PM, Keith Busch wrote:
>>> One last thing I noticed, it looks like the nvme_fw_act_work will break
>>> if an IO timeout occurs while it's running since that may reset the
>>> controller and restart IO queues.
>> could you please clarify more?
>> I think ,that, if an IO Timeout resets controller and restarts queue,
>> the nvme_ctrl_pp_status will return false, ending the nvme_fw_act_work.
> Sure, the nvme_fw_act_work will observe CSTS.PP cleared, then it restarts
> the IO queues, but the nvme reset requires those queues to be quiesced.
Thanks for clarifying,
i have made following changes based on comments, please review.
1. Added a delay of 100ms for scheduling fw_act_work,
as work without delay could schedule almost immediately which
may not give time to the controller to set CSTS.PP.
Also, Moved fw_act_timeout calculation to the fw_act_work.
+ case NVME_AER_NOTICE_FW_ACT_STARTING:
+ schedule_delayed_work(&ctrl->fw_act_work,
msecs_to_jiffies(100));
+ break;
+ case NVME_AER_ERR_FW_IMG_LOAD:
+ dev_warn(ctrl->device, "FW image load error\n");
+ cancel_delayed_work(&ctrl->fw_act_work);
+ break;
2. Added a check, that the controller is in NVME_CTRL_LIVE state
before starting queues once PP is cleared or fw activation times out.
This prevents starting of queues unless the controller is in Live state.
+static void nvme_fw_act_work(struct work_struct *work)
+{
+ struct nvme_ctrl *ctrl = container_of(to_delayed_work(work),
+ struct nvme_ctrl, fw_act_work);
+ struct nvme_fw_slot_info_log *log;
+ unsigned long fw_act_timeout;
+
+ if (ctrl->mtfa)
+ fw_act_timeout = jiffies +
+ msecs_to_jiffies(ctrl->mtfa * 100);
+ else
+ fw_act_timeout = jiffies +
+ msecs_to_jiffies(admin_timeout * 1000);
+
+ nvme_stop_queues(ctrl);
+ while (nvme_ctrl_pp_status(ctrl)) {
+ if (time_after(jiffies, fw_act_timeout)) {
+ dev_warn(ctrl->device,
+ "Fw activation timeout, reset
controller\n");
+ ctrl->ops->reset_ctrl(ctrl);
+ break;
+ }
+ msleep(100);
+ }
+
+ if(ctrl->state != NVME_CTRL_LIVE)
+ return;
+
+ nvme_start_queues(ctrl);
+ /* read FW slot informationi to clear the AER*/
+ log = kmalloc(sizeof(struct nvme_fw_slot_info_log), GFP_KERNEL);
+ if (!log)
+ return;
+
+ if (nvme_get_fw_slot_info(ctrl, log))
+ dev_warn(ctrl->device,
+ "Get FW SLOT INFO log error\n");
+ kfree(log);
+}
if this is fine i will send out the next version of patch.
Regards
Arnav Dawn
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH V3 2/2] nvme: Add support for FW activation without reset
2017-06-25 9:19 ` Arnav Dawn
@ 2017-06-29 13:09 ` Arnav Dawn
0 siblings, 0 replies; 12+ messages in thread
From: Arnav Dawn @ 2017-06-29 13:09 UTC (permalink / raw)
On Sunday 25 June 2017 02:49 PM, Arnav Dawn wrote:
>
> On Friday 23 June 2017 08:55 PM, Keith Busch wrote:
>> On Fri, Jun 23, 2017@05:41:21PM +0530, Arnav Dawn wrote:
>>> On Wednesday 21 June 2017 07:07 PM, Keith Busch wrote:
>>>> One last thing I noticed, it looks like the nvme_fw_act_work will break
>>>> if an IO timeout occurs while it's running since that may reset the
>>>> controller and restart IO queues.
>>> could you please clarify more?
>>> I think ,that, if an IO Timeout resets controller and restarts queue,
>>> the nvme_ctrl_pp_status will return false, ending the nvme_fw_act_work.
>> Sure, the nvme_fw_act_work will observe CSTS.PP cleared, then it restarts
>> the IO queues, but the nvme reset requires those queues to be quiesced.
> Thanks for clarifying,
> i have made following changes based on comments, please review.
>
> 1. Added a delay of 100ms for scheduling fw_act_work,
> as work without delay could schedule almost immediately which
> may not give time to the controller to set CSTS.PP.
> Also, Moved fw_act_timeout calculation to the fw_act_work.
>
> + case NVME_AER_NOTICE_FW_ACT_STARTING:
> + schedule_delayed_work(&ctrl->fw_act_work,
> msecs_to_jiffies(100));
> + break;
> + case NVME_AER_ERR_FW_IMG_LOAD:
> + dev_warn(ctrl->device, "FW image load error\n");
> + cancel_delayed_work(&ctrl->fw_act_work);
> + break;
>
>
> 2. Added a check, that the controller is in NVME_CTRL_LIVE state
> before starting queues once PP is cleared or fw activation times out.
> This prevents starting of queues unless the controller is in Live state.
>
> +static void nvme_fw_act_work(struct work_struct *work)
> +{
> + struct nvme_ctrl *ctrl = container_of(to_delayed_work(work),
> + struct nvme_ctrl, fw_act_work);
> + struct nvme_fw_slot_info_log *log;
> + unsigned long fw_act_timeout;
> +
> + if (ctrl->mtfa)
> + fw_act_timeout = jiffies +
> + msecs_to_jiffies(ctrl->mtfa * 100);
> + else
> + fw_act_timeout = jiffies +
> + msecs_to_jiffies(admin_timeout * 1000);
> +
> + nvme_stop_queues(ctrl);
> + while (nvme_ctrl_pp_status(ctrl)) {
> + if (time_after(jiffies, fw_act_timeout)) {
> + dev_warn(ctrl->device,
> + "Fw activation timeout, reset
> controller\n");
> + ctrl->ops->reset_ctrl(ctrl);
> + break;
> + }
> + msleep(100);
> + }
> +
> + if(ctrl->state != NVME_CTRL_LIVE)
> + return;
> +
> + nvme_start_queues(ctrl);
> + /* read FW slot informationi to clear the AER*/
> + log = kmalloc(sizeof(struct nvme_fw_slot_info_log), GFP_KERNEL);
> + if (!log)
> + return;
> +
> + if (nvme_get_fw_slot_info(ctrl, log))
> + dev_warn(ctrl->device,
> + "Get FW SLOT INFO log error\n");
> + kfree(log);
> +}
>
> if this is fine i will send out the next version of patch.
>
>
could you please review this, i will send out the patch if it looks ok.
Thanks
Arnav
^ permalink raw reply [flat|nested] 12+ messages in thread
end of thread, other threads:[~2017-06-29 13:09 UTC | newest]
Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
[not found] <CGME20170610070549epcas1p4fae7273b007148b52d31bf62cbb07b74@epcas1p4.samsung.com>
2017-06-10 7:05 ` [PATCH V3 0/2] Support for FW activation without reset Arnav Dawn
2017-06-10 7:06 ` [PATCH V3 1/2] nvme: Rename and move get_log_page to nvme/scsi Arnav Dawn
2017-06-13 6:43 ` Christoph Hellwig
2017-06-10 7:08 ` [PATCH V3 2/2] nvme: Add support for FW activation without reset Arnav Dawn
2017-06-19 17:05 ` Keith Busch
2017-06-21 7:36 ` Arnav Dawn
2017-06-21 13:37 ` Keith Busch
2017-06-23 12:11 ` Arnav Dawn
2017-06-23 15:25 ` Keith Busch
2017-06-25 9:19 ` Arnav Dawn
2017-06-29 13:09 ` Arnav Dawn
2017-06-24 7:11 ` Christoph Hellwig
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.