From: Jonathan Cameron <Jonathan.Cameron@Huawei.com>
To: <ira.weiny@intel.com>
Cc: Navneet Singh <navneet.singh@intel.com>,
Fan Ni <fan.ni@samsung.com>,
Dan Williams <dan.j.williams@intel.com>,
<linux-cxl@vger.kernel.org>
Subject: Re: [PATCH 4/5] cxl/mem: Add support to handle DCD add and release capacity events.
Date: Thu, 22 Jun 2023 18:01:30 +0100 [thread overview]
Message-ID: <20230622180130.00006273@Huawei.com> (raw)
In-Reply-To: <20230604-dcd-type2-upstream-v1-4-71b6341bae54@intel.com>
On Wed, 14 Jun 2023 12:16:31 -0700
ira.weiny@intel.com wrote:
> From: Navneet Singh <navneet.singh@intel.com>
>
> A dynamic capacity device utilizes events to signal the host about the
> changes to the allocation of DC blocks. The device communicates the
> state of these blocks of dynamic capacity through an extent list that
> describes the starting DPA and length of all blocks the host can access.
>
> Based on the dynamic capacity add or release event type,
> dynamic memory represented by the extents are either added
> or removed as devdax device.
>
> Process the dynamic capacity add and release events.
>
> Signed-off-by: Navneet Singh <navneet.singh@intel.com>
>
Hi,
I ran out of time today and will be traveling next few weeks (may have
review time, may not) so sending what I have on basis it might be useful.
Jonathan
> +static int cxl_send_dc_cap_response(struct cxl_memdev_state *mds,
> + struct cxl_mbox_dc_response *res,
> + int extent_cnt, int opcode)
> +{
> + struct cxl_mbox_cmd mbox_cmd;
> + int rc, size;
> +
> + size = struct_size(res, extent_list, extent_cnt);
> + res->extent_list_size = cpu_to_le32(extent_cnt);
> +
> + mbox_cmd = (struct cxl_mbox_cmd) {
> + .opcode = opcode,
> + .size_in = size,
> + .payload_in = res,
> + };
> +
> + rc = cxl_internal_send_cmd(mds, &mbox_cmd);
> +
> + return rc;
return cxl_..
> +
> +}
> +
> +static int cxl_prepare_ext_list(struct cxl_mbox_dc_response **res,
> + int *n, struct range *extent)
> +{
> + struct cxl_mbox_dc_response *dc_res;
> + unsigned int size;
> +
> + if (!extent)
> + size = struct_size(dc_res, extent_list, 0);
> + else
> + size = struct_size(dc_res, extent_list, *n + 1);
> +
> + dc_res = krealloc(*res, size, GFP_KERNEL);
> + if (!dc_res)
> + return -ENOMEM;
> +
> + if (extent) {
> + dc_res->extent_list[*n].dpa_start = cpu_to_le64(extent->start);
> + memset(dc_res->extent_list[*n].reserved, 0, 8);
> + dc_res->extent_list[*n].length =
> + cpu_to_le64(range_len(extent));
> + (*n)++;
> + }
> +
> + *res = dc_res;
> + return 0;
> +}
blank line.
> +/**
> + * cxl_handle_dcd_event_records() - Read DCD event records.
> + * @mds: The memory device state
>
> +int cxl_dev_get_dc_extent_cnt(struct cxl_memdev_state *mds,
> + unsigned int *extent_gen_num)
> +{
> + struct device *dev = mds->cxlds.dev;
> + struct cxl_mbox_dc_extents *dc_extents;
> + struct cxl_mbox_get_dc_extent get_dc_extent;
> + unsigned int total_extent_cnt;
> + struct cxl_mbox_cmd mbox_cmd;
> + int rc;
> +
> + /* Check GET_DC_EXTENT_LIST is supported by device */
> + if (!test_bit(CXL_DCD_ENABLED_GET_EXTENT_LIST, mds->dcd_cmds)) {
> + dev_dbg(dev, "unsupported cmd : get dyn cap extent list\n");
> + return 0;
> + }
> +
> + dc_extents = kvmalloc(mds->payload_size, GFP_KERNEL);
Put it on the stack - length is fixed and small if requesting 0
extents
> + if (!dc_extents)
> + return -ENOMEM;
> +
> + get_dc_extent = (struct cxl_mbox_get_dc_extent) {
> + .extent_cnt = 0,
> + .start_extent_index = 0,
> + };
> +
> + mbox_cmd = (struct cxl_mbox_cmd) {
> + .opcode = CXL_MBOX_OP_GET_DC_EXTENT_LIST,
> + .payload_in = &get_dc_extent,
> + .size_in = sizeof(get_dc_extent),
> + .size_out = mds->payload_size,
> + .payload_out = dc_extents,
> + .min_out = 1,
> + };
> + rc = cxl_internal_send_cmd(mds, &mbox_cmd);
> + if (rc < 0)
> + goto out;
> +
> + total_extent_cnt = le32_to_cpu(dc_extents->total_extent_cnt);
> + *extent_gen_num = le32_to_cpu(dc_extents->extent_list_num);
> + dev_dbg(dev, "Total extent count :%d Extent list Generation Num: %d\n",
> + total_extent_cnt, *extent_gen_num);
> +out:
> +
> + kvfree(dc_extents);
> + if (rc < 0)
> + return rc;
> +
> + return total_extent_cnt;
> +
> +}
> +EXPORT_SYMBOL_NS_GPL(cxl_dev_get_dc_extent_cnt, CXL);
> diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
> index 144232c8305e..ba45c1c3b0a9 100644
> --- a/drivers/cxl/core/region.c
> +++ b/drivers/cxl/core/region.c
> @@ -1,6 +1,7 @@
> // SPDX-License-Identifier: GPL-2.0-only
> /* Copyright(c) 2022 Intel Corporation. All rights reserved. */
> #include <linux/memregion.h>
> +#include <linux/interrupt.h>
> #include <linux/genalloc.h>
> #include <linux/device.h>
> #include <linux/module.h>
> @@ -11,6 +12,8 @@
> #include <cxlmem.h>
> #include <cxl.h>
> #include "core.h"
> +#include "../../dax/bus.h"
> +#include "../../dax/dax-private.h"
>
> /**
> * DOC: cxl core region
> @@ -166,6 +169,38 @@ static int cxl_region_decode_reset(struct cxl_region *cxlr, int count)
> return 0;
> }
>
> +static int cxl_region_manage_dc(struct cxl_region *cxlr)
> +{
> + struct cxl_region_params *p = &cxlr->params;
> + unsigned int extent_gen_num;
> + int i, rc;
> +
> + /* Designed for Non Interleaving flow with the assumption one
> + * cxl_region will map the complete device DC region's DPA range
> + */
> + for (i = 0; i < p->nr_targets; i++) {
> + struct cxl_endpoint_decoder *cxled = p->targets[i];
> + struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
> + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
> +
> + rc = cxl_dev_get_dc_extent_cnt(mds, &extent_gen_num);
> + if (rc < 0)
> + goto err;
> + else if (rc > 1) {
> + rc = cxl_dev_get_dc_extents(mds, rc, 0);
> + if (rc < 0)
> + goto err;
> + mds->num_dc_extents = rc;
> + mds->dc_extents_index = rc - 1;
> + }
> + mds->dc_list_gen_num = extent_gen_num;
> + dev_dbg(mds->cxlds.dev, "No of preallocated extents :%d\n", rc);
> + }
> + return 0;
> +err:
> + return rc;
Direct returns easier to review.
> +}
> +
> static int commit_decoder(struct cxl_decoder *cxld)
> {
> struct cxl_switch_decoder *cxlsd = NULL;
> @@ -2865,11 +2900,14 @@ static int devm_cxl_add_dc_region(struct cxl_region *cxlr)
> return PTR_ERR(cxlr_dax);
>
> cxlr_dc = kzalloc(sizeof(*cxlr_dc), GFP_KERNEL);
> - if (!cxlr_dc) {
> - rc = -ENOMEM;
> - goto err;
> - }
> + if (!cxlr_dc)
> + return -ENOMEM;
Curious. Looks like a bug from earlier.
>
> + rc = request_module("dax_cxl");
> + if (rc) {
> + dev_err(dev, "failed to load dax-ctl module\n");
> + goto load_err;
> + }
> dev = &cxlr_dax->dev;
> rc = dev_set_name(dev, "dax_region%d", cxlr->id);
> if (rc)
> @@ -2891,10 +2929,24 @@ static int devm_cxl_add_dc_region(struct cxl_region *cxlr)
> xa_init(&cxlr_dc->dax_dev_list);
> cxlr->cxlr_dc = cxlr_dc;
> rc = devm_add_action_or_reset(&cxlr->dev, cxl_dc_region_release, cxlr);
> - if (!rc)
> - return 0;
> + if (rc)
> + goto err;
> +
> + if (!dev->driver) {
> + dev_err(dev, "%s Driver not attached\n", dev_name(dev));
> + rc = -ENXIO;
> + goto err;
> + }
> +
> + rc = cxl_region_manage_dc(cxlr);
> + if (rc)
> + goto err;
> +
> + return 0;
> +
> err:
> put_device(dev);
> +load_err:
> kfree(cxlr_dc);
I've lost track, but seems unlikely we now need to free this in all paths and didn't before. Doesn't
the cxl_dc_region_Release deal with it?
> return rc;
> }
> @@ -3076,6 +3128,156 @@ struct cxl_region *cxl_create_region(struct cxl_root_decoder *cxlrd,
> }
> EXPORT_SYMBOL_NS_GPL(cxl_create_region, CXL);
> +
> +int cxl_release_dc_extent(struct cxl_memdev_state *mds,
> + struct range *rel_range)
> +{
> + struct cxl_memdev *cxlmd = mds->cxlds.cxlmd;
> + struct cxl_endpoint_decoder *cxled;
> + struct cxl_dc_region *cxlr_dc;
> + struct dax_region *dax_region;
> + resource_size_t dpa_offset;
> + struct cxl_region *cxlr;
> + struct range hpa_range;
> + struct dev_dax *dev_dax;
> + resource_size_t hpa;
> + struct device *dev;
> + int ranges, rc = 0;
> +int cxl_add_dc_extent(struct cxl_memdev_state *mds, struct range *alloc_range)
> +{
...
> + /*
> + * Find the cxl endpoind decoder with which has the extent dpa range and
> + * get the cxl_region, dax_region refrences.
> + */
> + dev = device_find_child(&cxlmd->endpoint->dev, alloc_range,
> + match_ep_decoder_by_range);
> + if (!dev) {
> + dev_err(mds->cxlds.dev, "%pr not mapped\n", alloc_range);
Odd spacing. (Tab?)
> + return PTR_ERR(dev);
> + }
> +
...
> diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
> index 9c0b2fa72bdd..0440b5c04ef6 100644
> --- a/drivers/cxl/cxlmem.h
> +++ b/drivers/cxl/cxlmem.h
> /**
> @@ -296,6 +298,13 @@ enum cxl_devtype {
> #define CXL_MAX_DC_REGION 8
> #define CXL_DC_REGION_SRTLEN 8
>
> +struct cxl_dc_extent_data {
> + u64 dpa_start;
> + u64 length;
> + u8 tag[16];
Define for this length probably makes sense. It's non obvious.
> + u16 shared_extent_seq;
> +};
> +
> +struct cxl_mbox_dc_response {
> + __le32 extent_list_size;
> + u8 reserved[4];
> + struct updated_extent_list {
> + __le64 dpa_start;
> + __le64 length;
> + u8 reserved[8];
> + } __packed extent_list[];
Going to need this in multiple places (e.g. release) so factor out.
> +} __packed;
> +
> @@ -826,6 +894,14 @@ int cxl_trigger_poison_list(struct cxl_memdev *cxlmd);
> int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa);
> int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa);
>
> +/* FIXME why not have these be static in mbox.c? */
:)
> +int cxl_add_dc_extent(struct cxl_memdev_state *mds, struct range *alloc_range);
> +int cxl_release_dc_extent(struct cxl_memdev_state *mds, struct range *rel_range);
> +int cxl_dev_get_dc_extent_cnt(struct cxl_memdev_state *mds,
> + unsigned int *extent_gen_num);
> +int cxl_dev_get_dc_extents(struct cxl_memdev_state *mds, unsigned int cnt,
> + unsigned int index);
> +
> #ifdef CONFIG_CXL_SUSPEND
> void cxl_mem_active_inc(void);
> void cxl_mem_active_dec(void);
> diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
> index ac1a41bc083d..558ffbcb9b34 100644
> --- a/drivers/cxl/pci.c
> +++ b/drivers/cxl/pci.c
> @@ -522,8 +522,8 @@ static int cxl_event_req_irq(struct cxl_dev_state *cxlds, u8 setting)
> return irq;
>
> return devm_request_threaded_irq(dev, irq, NULL, cxl_event_thread,
> - IRQF_SHARED | IRQF_ONESHOT, NULL,
> - dev_id);
> + IRQF_SHARED | IRQF_ONESHOT, NULL,
> + dev_id);
No comment. :)
> }
>
> static int cxl_event_get_int_policy(struct cxl_memdev_state *mds,
> @@ -555,6 +555,7 @@ static int cxl_event_config_msgnums(struct cxl_memdev_state *mds,
> .warn_settings = CXL_INT_MSI_MSIX,
> .failure_settings = CXL_INT_MSI_MSIX,
> .fatal_settings = CXL_INT_MSI_MSIX,
> + .dyncap_settings = CXL_INT_MSI_MSIX,
> };
>
> mbox_cmd = (struct cxl_mbox_cmd) {
> @@ -608,6 +609,11 @@ static int cxl_event_irqsetup(struct cxl_memdev_state *mds)
> return rc;
> }
>
> + rc = cxl_event_req_irq(cxlds, policy.dyncap_settings);
> + if (rc) {
> + dev_err(cxlds->dev, "Failed to get interrupt for event dc log\n");
> + return rc;
> + }
Blank line to maintain existing style.
> return 0;
> }
>
> diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c
> index 227800053309..b2b27033f589 100644
> --- a/drivers/dax/bus.c
> +++ b/drivers/dax/bus.c
> @@ -434,7 +434,7 @@ static void free_dev_dax_ranges(struct dev_dax *dev_dax)
...
> +EXPORT_SYMBOL_GPL(alloc_dev_dax_range);
> +
Single blank line seems to be style in this fiel.
>
> static int adjust_dev_dax_range(struct dev_dax *dev_dax, struct resource *res, resource_size_t size)
> {
> diff --git a/drivers/dax/bus.h b/drivers/dax/bus.h
> index 8cd79ab34292..aa8418c7aead 100644
> --- a/drivers/dax/bus.h
> +++ b/drivers/dax/bus.h
> @@ -47,8 +47,11 @@ int __dax_driver_register(struct dax_device_driver *dax_drv,
> __dax_driver_register(driver, THIS_MODULE, KBUILD_MODNAME)
> void dax_driver_unregister(struct dax_device_driver *dax_drv);
> void kill_dev_dax(struct dev_dax *dev_dax);
> +void unregister_dev_dax(void *dev);
> +void unregister_dax_mapping(void *data);
> bool static_dev_dax(struct dev_dax *dev_dax);
> -
> +int alloc_dev_dax_range(struct dev_dax *dev_dax, u64 start,
> + resource_size_t size);
Keep a blank line here..
> /*
> * While run_dax() is potentially a generic operation that could be
> * defined in include/linux/dax.h we don't want to grow any users
>
next prev parent reply other threads:[~2023-06-22 17:01 UTC|newest]
Thread overview: 55+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-06-14 19:16 [PATCH 0/5] cxl/dcd: Add support for Dynamic Capacity Devices (DCD) ira.weiny
2023-06-14 19:16 ` [PATCH 1/5] cxl/mem : Read Dynamic capacity configuration from the device ira.weiny
2023-06-14 22:53 ` Dave Jiang
2023-06-15 15:04 ` Ira Weiny
2023-06-14 23:49 ` Alison Schofield
2023-06-15 22:46 ` Ira Weiny
2023-06-15 18:30 ` Fan Ni
2023-06-15 19:17 ` Navneet Singh
2023-06-15 21:41 ` Fan Ni
2023-06-22 15:58 ` Jonathan Cameron
2023-06-24 13:08 ` Ira Weiny
2023-07-03 2:29 ` Jonathan Cameron
2023-06-14 19:16 ` [PATCH 2/5] cxl/region: Add dynamic capacity cxl region support ira.weiny
2023-06-14 23:37 ` Dave Jiang
2023-06-15 18:12 ` Ira Weiny
2023-06-15 18:28 ` Dave Jiang
2023-06-16 3:52 ` Navneet Singh
2023-06-15 18:56 ` Navneet Singh
2023-06-15 0:21 ` Alison Schofield
2023-06-16 2:06 ` Ira Weiny
2023-06-16 15:56 ` Alison Schofield
2023-06-16 16:51 ` Alison Schofield
2023-06-21 2:44 ` Ira Weiny
2023-06-20 17:55 ` Fan Ni
2023-06-20 20:33 ` Ira Weiny
2023-06-21 3:13 ` Navneet Singh
2023-06-21 17:20 ` Fan Ni
2023-06-23 18:02 ` Ira Weiny
2023-06-22 16:34 ` Jonathan Cameron
2023-07-05 14:49 ` Davidlohr Bueso
2023-06-14 19:16 ` [PATCH 3/5] cxl/mem : Expose dynamic capacity configuration to userspace ira.weiny
2023-06-15 0:40 ` Alison Schofield
2023-06-16 2:47 ` Ira Weiny
2023-06-16 15:58 ` Dave Jiang
2023-06-20 16:23 ` Ira Weiny
2023-06-20 16:48 ` Dave Jiang
2023-06-15 15:41 ` Dave Jiang
2023-06-14 19:16 ` [PATCH 4/5] cxl/mem: Add support to handle DCD add and release capacity events ira.weiny
2023-06-15 2:19 ` Alison Schofield
2023-06-16 4:11 ` Ira Weiny
2023-06-27 18:20 ` Fan Ni
2023-06-15 16:58 ` Dave Jiang
2023-06-22 17:01 ` Jonathan Cameron [this message]
2023-06-29 15:19 ` Ira Weiny
2023-06-27 18:17 ` Fan Ni
2023-07-13 12:55 ` Jørgen Hansen
2023-06-14 19:16 ` [PATCH 5/5] cxl/mem: Trace Dynamic capacity Event Record ira.weiny
2023-06-15 17:08 ` Dave Jiang
2023-06-15 0:56 ` [PATCH 0/5] cxl/dcd: Add support for Dynamic Capacity Devices (DCD) Alison Schofield
2023-06-16 2:57 ` Ira Weiny
2023-06-15 14:51 ` Ira Weiny
2023-06-22 15:07 ` Jonathan Cameron
2023-06-22 16:37 ` Jonathan Cameron
2023-06-27 14:59 ` Ira Weiny
2023-06-29 15:30 ` Ira Weiny
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230622180130.00006273@Huawei.com \
--to=jonathan.cameron@huawei.com \
--cc=dan.j.williams@intel.com \
--cc=fan.ni@samsung.com \
--cc=ira.weiny@intel.com \
--cc=linux-cxl@vger.kernel.org \
--cc=navneet.singh@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox