All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jonathan Cameron <Jonathan.Cameron@huawei.com>
To: Davidlohr Bueso <dave@stgolabs.net>
Cc: <fan.ni@samsung.com>, <dan.j.williams@intel.com>,
	<a.manzanares@samsung.com>, <linux-cxl@vger.kernel.org>
Subject: Re: [PATCH 3/3] cxl/type3: Add 'dirty-shutdown' parameter
Date: Mon, 23 Dec 2024 20:08:05 +0000	[thread overview]
Message-ID: <20241223200746.00001923@huawei.com> (raw)
In-Reply-To: <20241220160026.204055-4-dave@stgolabs.net>

On Fri, 20 Dec 2024 08:00:26 -0800
Davidlohr Bueso <dave@stgolabs.net> wrote:

> Add a new parameter for type3 memory devices to set the
> dirty shutdown count to a specified value. This allows
> emulating failure paths and informing the admin of such
> event via the Get Health Info command.
> 
> For example, upon a failed GPF, users can boot with
> dirty-shutdown=1 and with the cleared shutdown state,
> to emulate the hardware behavior.
> 
Just noticed, this isn't +CC to qemu-devel.  Please do that
even for patches posted for testing. Makes them easier to
upstream later if we want to as the discussion is all there.

A few comments inline.

Jonathan

> root@cxl:~# cxl list -m mem1 -H
>   {
>     "memdev":"mem1",
>     "pmem_size":2147483648,
>     "health":{
>       "maintenance_needed":false,
>       "performance_degraded":false,
>       "hw_replacement_needed":false,
>       "media_normal":true,
>       "media_not_ready":false,
>       "media_persistence_lost":false,
>       "media_data_lost":false,
>       "media_powerloss_persistence_loss":false,
>       "media_shutdown_persistence_loss":false,
>       "media_persistence_loss_imminent":false,
>       "media_powerloss_data_loss":false,
>       "media_shutdown_data_loss":false,
>       "media_data_loss_imminent":false,
>       "ext_life_used":"normal",
>       "ext_temperature":"normal",
>       "ext_corrected_volatile":"normal",
>       "ext_corrected_persistent":"normal",
>       "life_used_percent":20,
>       "temperature":30,
>       "dirty_shutdowns":1,
>       "volatile_errors":0,
>       "pmem_errors":0
>     },
>     "serial":0,
>     "host":"0000:0e:00.0"
>   }
> 
> Signed-off-by: Davidlohr Bueso <dave@stgolabs.net>
> ---
>  hw/cxl/cxl-mailbox-utils.c  | 32 ++++++++++++++++++++++++++++++++
>  hw/mem/cxl_type3.c          |  1 +
>  include/hw/cxl/cxl_device.h |  3 +++
>  3 files changed, 36 insertions(+)
> 
> diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
> index ff1d3f50610c..85a58ab96bef 100644
> --- a/hw/cxl/cxl-mailbox-utils.c
> +++ b/hw/cxl/cxl-mailbox-utils.c
> @@ -87,6 +87,7 @@ enum {
>          #define GET_LSA       0x2
>          #define SET_LSA       0x3
>      HEALTH_INFO_ALERTS = 0x42,
> +        #define GET_HEALTH_INFO        0x0
>          #define GET_SHUTDOWN_STATE     0x3
>          #define SET_SHUTDOWN_STATE     0x4
>      MEDIA_AND_POISON = 0x43,
> @@ -1724,6 +1725,35 @@ static CXLRetCode cmd_sanitize_overwrite(const struct cxl_cmd *cmd,
>      return CXL_MBOX_BG_STARTED;
>  }
>  
> +/* CXL r3.2 Section 8.2.10.9.3.1: Get Shutdown State (Opcode 4200h) */
> +static CXLRetCode cmd_health_get_health_info(const struct cxl_cmd *cmd,
> +                                             uint8_t *payload_in,
> +                                             size_t len_in,
> +                                             uint8_t *payload_out,
> +                                             size_t *len_out,
> +                                             CXLCCI *cci)
> +{
> +    CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
> +    struct get_health_info_pl {
> +        uint8_t health_status;
> +        uint8_t media_status;
> +        uint8_t additional_status;
> +        uint8_t life_used;
> +        uint16_t device_temperature;
> +        uint32_t dirty_shutdown_count;
> +        uint32_t corrected_volatile_error_count;
> +        uint32_t corrected_persistent_error_count;

This duplicates most of CXLEventMemoryModule (which is defined in the spec
in terms of this payload.

We should factor it out of there an into a header to reuse in two places.
Also make sure the data matches for the stuff like device_temperature.


> +    } QEMU_PACKED *out = (void *)payload_out;
> +
> +    /* anything not set explicitly is considered under normal health */
> +    out->life_used = 20;
> +    out->device_temperature = 30;
> +    out->dirty_shutdown_count = ct3d->dirty_shutdown;
> +    *len_out = sizeof(out);
> +
> +    return CXL_MBOX_SUCCESS;
> +}
> +
>  /* CXL r3.2 Section 8.2.10.9.3.4: Get Shutdown State (Opcode 4203h) */
>  static CXLRetCode cmd_health_get_shutdown_state(const struct cxl_cmd *cmd,
>                                                  uint8_t *payload_in,
> @@ -2911,6 +2941,8 @@ static const struct cxl_cmd cxl_cmd_set[256][256] = {
>           CXL_MBOX_BACKGROUND_OPERATION_ABORT)},
>      [PERSISTENT_MEM][GET_SECURITY_STATE] = { "GET_SECURITY_STATE",
>          cmd_get_security_state, 0, 0 },
> +    [HEALTH_INFO_ALERTS][GET_HEALTH_INFO] = { "HEALTH_INFO_ALERTS_GET_HEALTH_INFO",
> +        cmd_health_get_health_info, 0, 0 },
>      [HEALTH_INFO_ALERTS][GET_SHUTDOWN_STATE] = { "HEALTH_INFO_ALERTS_GET_SHUTDOWN_STATE",
>          cmd_health_get_shutdown_state, 0, 0 },
>      [HEALTH_INFO_ALERTS][SET_SHUTDOWN_STATE] = { "HEALTH_INFO_ALERTS_SET_SHUTDOWN_STATE",
> diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> index 5f365afb4dd1..e622eb9101ce 100644
> --- a/hw/mem/cxl_type3.c
> +++ b/hw/mem/cxl_type3.c
> @@ -1380,6 +1380,7 @@ static Property ct3_props[] = {
>                       TYPE_MEMORY_BACKEND, HostMemoryBackend *),
>      DEFINE_PROP_LINK("lsa", CXLType3Dev, lsa, TYPE_MEMORY_BACKEND,
>                       HostMemoryBackend *),
> +    DEFINE_PROP_UINT32("dirty-shutdown", CXLType3Dev, dirty_shutdown, 0),
>      DEFINE_PROP_UINT64("sn", CXLType3Dev, sn, UI64_NULL),
>      DEFINE_PROP_STRING("cdat", CXLType3Dev, cxl_cstate.cdat.filename),
>      DEFINE_PROP_UINT8("num-dc-regions", CXLType3Dev, dc.num_regions, 0),
> diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
> index 69e6330fe66d..f756e1a99f33 100644
> --- a/include/hw/cxl/cxl_device.h
> +++ b/include/hw/cxl/cxl_device.h
> @@ -653,6 +653,9 @@ struct CXLType3Dev {
>          uint8_t num_regions; /* 0-8 regions */
>          CXLDCRegion regions[DCD_MAX_NUM_REGION];
>      } dc;
> +
> +    /* Dirty shutdown count */
> +    uint32_t dirty_shutdown;
>  };
>  
>  #define TYPE_CXL_TYPE3 "cxl-type3"


      reply	other threads:[~2024-12-23 20:08 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-12-20 16:00 [PATCH -qemu 0/3] hw/cxl: Support dirty shutdown Davidlohr Bueso
2024-12-20 16:00 ` [PATCH 1/3] cxl: Fix mbox cmd enum order Davidlohr Bueso
2024-12-21 19:05   ` Fan Ni
2024-12-20 16:00 ` [PATCH 2/3] cxl: Support Get/Set Shutdown State commands Davidlohr Bueso
2024-12-21 19:18   ` Fan Ni
2024-12-23 19:57     ` Jonathan Cameron
2024-12-23 20:01   ` Jonathan Cameron
2024-12-20 16:00 ` [PATCH 3/3] cxl/type3: Add 'dirty-shutdown' parameter Davidlohr Bueso
2024-12-23 20:08   ` Jonathan Cameron [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20241223200746.00001923@huawei.com \
    --to=jonathan.cameron@huawei.com \
    --cc=a.manzanares@samsung.com \
    --cc=dan.j.williams@intel.com \
    --cc=dave@stgolabs.net \
    --cc=fan.ni@samsung.com \
    --cc=linux-cxl@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.