Re: [PATCH 4/5] cxl/mem: Add support to handle DCD add and release capacity events.

Linux CXL
 help / color / mirror / Atom feed

From: Ira Weiny <ira.weiny@intel.com>
To: Jonathan Cameron <Jonathan.Cameron@huawei.com>, <ira.weiny@intel.com>
Cc: Navneet Singh <navneet.singh@intel.com>,
	Fan Ni <fan.ni@samsung.com>,
	"Dan Williams" <dan.j.williams@intel.com>,
	<linux-cxl@vger.kernel.org>
Subject: Re: [PATCH 4/5] cxl/mem: Add support to handle DCD add and release capacity events.
Date: Thu, 29 Jun 2023 08:19:44 -0700	[thread overview]
Message-ID: <649da1107ab01_7645b294fc@iweiny-mobl.notmuch> (raw)
In-Reply-To: <20230622180130.00006273@Huawei.com>

Jonathan Cameron wrote:
> On Wed, 14 Jun 2023 12:16:31 -0700
> ira.weiny@intel.com wrote:
> 
> > From: Navneet Singh <navneet.singh@intel.com>
> > 
> > A dynamic capacity device utilizes events to signal the host about the
> > changes to the allocation of DC blocks. The device communicates the
> > state of these blocks of dynamic capacity through an extent list that
> > describes the starting DPA and length of all blocks the host can access.
> > 
> > Based on the dynamic capacity add or release event type,
> > dynamic memory represented by the extents are either added
> > or removed as devdax device.
> > 
> > Process the dynamic capacity add and release events.
> > 
> > Signed-off-by: Navneet Singh <navneet.singh@intel.com>
> > 
> Hi,
> 
> I ran out of time today and will be traveling next few weeks (may have
> review time, may not) so sending what I have on basis it might be useful.
> 
> Jonathan
> 
> > +static int cxl_send_dc_cap_response(struct cxl_memdev_state *mds,
> > +				struct cxl_mbox_dc_response *res,
> > +				int extent_cnt, int opcode)
> > +{
> > +	struct cxl_mbox_cmd mbox_cmd;
> > +	int rc, size;
> > +
> > +	size = struct_size(res, extent_list, extent_cnt);
> > +	res->extent_list_size = cpu_to_le32(extent_cnt);
> > +
> > +	mbox_cmd = (struct cxl_mbox_cmd) {
> > +		.opcode = opcode,
> > +		.size_in = size,
> > +		.payload_in = res,
> > +	};
> > +
> > +	rc = cxl_internal_send_cmd(mds, &mbox_cmd);
> > +
> > +	return rc;
> return cxl_..

Fixed.

> 
> > +
> > +}
> > +
> > +static int cxl_prepare_ext_list(struct cxl_mbox_dc_response **res,
> > +					int *n, struct range *extent)
> > +{
> > +	struct cxl_mbox_dc_response *dc_res;
> > +	unsigned int size;
> > +
> > +	if (!extent)
> > +		size = struct_size(dc_res, extent_list, 0);
> > +	else
> > +		size = struct_size(dc_res, extent_list, *n + 1);
> > +
> > +	dc_res = krealloc(*res, size, GFP_KERNEL);
> > +	if (!dc_res)
> > +		return -ENOMEM;
> > +
> > +	if (extent) {
> > +		dc_res->extent_list[*n].dpa_start = cpu_to_le64(extent->start);
> > +		memset(dc_res->extent_list[*n].reserved, 0, 8);
> > +		dc_res->extent_list[*n].length =
> > +				cpu_to_le64(range_len(extent));
> > +		(*n)++;
> > +	}
> > +
> > +	*res = dc_res;
> > +	return 0;
> > +}
> blank line.

Already done.

> 
> > +/**
> > + * cxl_handle_dcd_event_records() - Read DCD event records.
> > + * @mds: The memory device state
> 
> >  
> > +int cxl_dev_get_dc_extent_cnt(struct cxl_memdev_state *mds,
> > +			      unsigned int *extent_gen_num)
> > +{
> > +	struct device *dev = mds->cxlds.dev;
> > +	struct cxl_mbox_dc_extents *dc_extents;
> > +	struct cxl_mbox_get_dc_extent get_dc_extent;
> > +	unsigned int total_extent_cnt;
> > +	struct cxl_mbox_cmd mbox_cmd;
> > +	int rc;
> > +
> > +	/* Check GET_DC_EXTENT_LIST is supported by device */
> > +	if (!test_bit(CXL_DCD_ENABLED_GET_EXTENT_LIST, mds->dcd_cmds)) {
> > +		dev_dbg(dev, "unsupported cmd : get dyn cap extent list\n");
> > +		return 0;
> > +	}
> > +
> > +	dc_extents = kvmalloc(mds->payload_size, GFP_KERNEL);
> 
> Put it on the stack - length is fixed and small if requesting 0
> extents
> 

Ah yea good idea.

> 
> > +	if (!dc_extents)
> > +		return -ENOMEM;
> > +
> > +	get_dc_extent = (struct cxl_mbox_get_dc_extent) {
> > +		.extent_cnt = 0,
> > +		.start_extent_index = 0,
> > +	};
> > +
> > +	mbox_cmd = (struct cxl_mbox_cmd) {
> > +		.opcode = CXL_MBOX_OP_GET_DC_EXTENT_LIST,
> > +		.payload_in = &get_dc_extent,
> > +		.size_in = sizeof(get_dc_extent),
> > +		.size_out = mds->payload_size,
> > +		.payload_out = dc_extents,
> > +		.min_out = 1,
> > +	};
> > +	rc = cxl_internal_send_cmd(mds, &mbox_cmd);
> > +	if (rc < 0)
> > +		goto out;
> > +
> > +	total_extent_cnt = le32_to_cpu(dc_extents->total_extent_cnt);
> > +	*extent_gen_num = le32_to_cpu(dc_extents->extent_list_num);
> > +	dev_dbg(dev, "Total extent count :%d Extent list Generation Num: %d\n",
> > +			total_extent_cnt, *extent_gen_num);
> > +out:
> > +
> > +	kvfree(dc_extents);
> > +	if (rc < 0)
> > +		return rc;
> > +
> > +	return total_extent_cnt;
> > +
> > +}
> > +EXPORT_SYMBOL_NS_GPL(cxl_dev_get_dc_extent_cnt, CXL);
> 
> 
> 
> > diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
> > index 144232c8305e..ba45c1c3b0a9 100644
> > --- a/drivers/cxl/core/region.c
> > +++ b/drivers/cxl/core/region.c
> > @@ -1,6 +1,7 @@
> >  // SPDX-License-Identifier: GPL-2.0-only
> >  /* Copyright(c) 2022 Intel Corporation. All rights reserved. */
> >  #include <linux/memregion.h>
> > +#include <linux/interrupt.h>
> >  #include <linux/genalloc.h>
> >  #include <linux/device.h>
> >  #include <linux/module.h>
> > @@ -11,6 +12,8 @@
> >  #include <cxlmem.h>
> >  #include <cxl.h>
> >  #include "core.h"
> > +#include "../../dax/bus.h"
> > +#include "../../dax/dax-private.h"
> >  
> >  /**
> >   * DOC: cxl core region
> > @@ -166,6 +169,38 @@ static int cxl_region_decode_reset(struct cxl_region *cxlr, int count)
> >  	return 0;
> >  }
> >  
> > +static int cxl_region_manage_dc(struct cxl_region *cxlr)
> > +{
> > +	struct cxl_region_params *p = &cxlr->params;
> > +	unsigned int extent_gen_num;
> > +	int i, rc;
> > +
> > +	/* Designed for Non Interleaving flow with the assumption one
> > +	 * cxl_region will map the complete device DC region's DPA range
> > +	 */
> > +	for (i = 0; i < p->nr_targets; i++) {
> > +		struct cxl_endpoint_decoder *cxled = p->targets[i];
> > +		struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
> > +		struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
> > +
> > +		rc = cxl_dev_get_dc_extent_cnt(mds, &extent_gen_num);
> > +		if (rc < 0)
> > +			goto err;
> > +		else if (rc > 1) {
> > +			rc = cxl_dev_get_dc_extents(mds, rc, 0);
> > +			if (rc < 0)
> > +				goto err;
> > +			mds->num_dc_extents = rc;
> > +			mds->dc_extents_index = rc - 1;
> > +		}
> > +		mds->dc_list_gen_num = extent_gen_num;
> > +		dev_dbg(mds->cxlds.dev, "No of preallocated extents :%d\n", rc);
> > +	}
> > +	return 0;
> > +err:
> > +	return rc;
> 
> Direct returns easier to review.  

Done.

> 
> > +}
> > +
> >  static int commit_decoder(struct cxl_decoder *cxld)
> >  {
> >  	struct cxl_switch_decoder *cxlsd = NULL;
> > @@ -2865,11 +2900,14 @@ static int devm_cxl_add_dc_region(struct cxl_region *cxlr)
> >  		return PTR_ERR(cxlr_dax);
> >  
> >  	cxlr_dc = kzalloc(sizeof(*cxlr_dc), GFP_KERNEL);
> > -	if (!cxlr_dc) {
> > -		rc = -ENOMEM;
> > -		goto err;
> > -	}
> > +	if (!cxlr_dc)
> > +		return -ENOMEM;
> 
> Curious.  Looks like a bug from earlier.

Actually no.  This is just a bug in this patch.  The put_device() in the
'err' path is still required.

Digging through devm_cxl_add_dc_region() in the previous patch there is
quite a bit of clean up which can be done which I think will make this
next section of code much cleaner.  I'll update this patch after cleaning
up the previous one.

> 
> 
> >  
> > +	rc = request_module("dax_cxl");
> > +	if (rc) {
> > +		dev_err(dev, "failed to load dax-ctl module\n");
> > +		goto load_err;
> > +	}
> >  	dev = &cxlr_dax->dev;
> >  	rc = dev_set_name(dev, "dax_region%d", cxlr->id);
> >  	if (rc)
> > @@ -2891,10 +2929,24 @@ static int devm_cxl_add_dc_region(struct cxl_region *cxlr)
> >  	xa_init(&cxlr_dc->dax_dev_list);
> >  	cxlr->cxlr_dc = cxlr_dc;
> >  	rc = devm_add_action_or_reset(&cxlr->dev, cxl_dc_region_release, cxlr);
> > -	if (!rc)
> > -		return 0;
> > +	if (rc)
> > +		goto err;
> > +
> > +	if (!dev->driver) {
> > +		dev_err(dev, "%s Driver not attached\n", dev_name(dev));
> > +		rc = -ENXIO;
> > +		goto err;
> > +	}
> > +
> > +	rc = cxl_region_manage_dc(cxlr);
> > +	if (rc)
> > +		goto err;
> > +
> > +	return 0;
> > +
> >  err:
> >  	put_device(dev);
> > +load_err:
> >  	kfree(cxlr_dc);
> 
> I've lost track, but seems unlikely we now need to free this in all paths and didn't before. Doesn't
> the cxl_dc_region_Release deal with it?

Yes it does.  I've realized that a large section ~70% of
devm_cxl_add_dc_region() is doing _exactly_ the same thing as
devm_cxl_add_dax_region().  The only think that devm_cxl_add_dc_region()
needs is the cxl_dax_region pointer back to track it.  So I've created a
lead in patch to factor out devm_cxl_add_dax_region() so that it can be
reused in devm_cxl_add_dc_region().  After that this code is much more
straight forward.

> 
> >  	return rc;
> >  }
> > @@ -3076,6 +3128,156 @@ struct cxl_region *cxl_create_region(struct cxl_root_decoder *cxlrd,
> >  }
> >  EXPORT_SYMBOL_NS_GPL(cxl_create_region, CXL);
> 
> 
> 
> > +
> > +int cxl_release_dc_extent(struct cxl_memdev_state *mds,
> > +			  struct range *rel_range)
> > +{
> > +	struct cxl_memdev *cxlmd = mds->cxlds.cxlmd;
> > +	struct cxl_endpoint_decoder *cxled;
> > +	struct cxl_dc_region *cxlr_dc;
> > +	struct dax_region *dax_region;
> > +	resource_size_t dpa_offset;
> > +	struct cxl_region *cxlr;
> > +	struct range hpa_range;
> > +	struct dev_dax *dev_dax;
> > +	resource_size_t hpa;
> > +	struct device *dev;
> > +	int ranges, rc = 0;
> 
> 
> 
> 
> 
> 
> > +int cxl_add_dc_extent(struct cxl_memdev_state *mds, struct range *alloc_range)
> > +{
> ...
> 
> > +	/*
> > +	 * Find the cxl endpoind decoder with which has the extent dpa range and
> > +	 * get the cxl_region, dax_region refrences.
> > +	 */
> > +	dev = device_find_child(&cxlmd->endpoint->dev, alloc_range,
> > +				match_ep_decoder_by_range);
> > +	if (!dev) {
> > +		dev_err(mds->cxlds.dev, "%pr not mapped\n",	alloc_range);
> 
> Odd spacing. (Tab?)

Changed already.

> 
> > +		return PTR_ERR(dev);
> > +	}
> > +
> 
> ...
> 
> > diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
> > index 9c0b2fa72bdd..0440b5c04ef6 100644
> > --- a/drivers/cxl/cxlmem.h
> > +++ b/drivers/cxl/cxlmem.h
> 
> 
> >  /**
> > @@ -296,6 +298,13 @@ enum cxl_devtype {
> >  #define CXL_MAX_DC_REGION 8
> >  #define CXL_DC_REGION_SRTLEN 8
> >  
> > +struct cxl_dc_extent_data {
> > +	u64 dpa_start;
> > +	u64 length;
> > +	u8 tag[16];
> 
> Define for this length probably makes sense. It's non obvious.

Done.

> 
> > +	u16 shared_extent_seq;
> > +};
> 
> > +
> > +struct cxl_mbox_dc_response {
> > +	__le32 extent_list_size;
> > +	u8 reserved[4];
> > +	struct updated_extent_list {
> > +		__le64 dpa_start;
> > +		__le64 length;
> > +		u8 reserved[8];
> > +	} __packed extent_list[];
> 
> Going to need this in multiple places (e.g. release) so factor out.

I'm having trouble identifying how this is getting used in multiple
places.  But I'm also unsure of how this is working against the spec.

> 
> 
> > +} __packed;
> > +
> 
> 
> 
> > @@ -826,6 +894,14 @@ int cxl_trigger_poison_list(struct cxl_memdev *cxlmd);
> >  int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa);
> >  int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa);
> >  
> > +/* FIXME why not have these be static in mbox.c? */
> 
> :)
> 
> > +int cxl_add_dc_extent(struct cxl_memdev_state *mds, struct range *alloc_range);
> > +int cxl_release_dc_extent(struct cxl_memdev_state *mds, struct range *rel_range);
> > +int cxl_dev_get_dc_extent_cnt(struct cxl_memdev_state *mds,
> > +			      unsigned int *extent_gen_num);
> > +int cxl_dev_get_dc_extents(struct cxl_memdev_state *mds, unsigned int cnt,
> > +			   unsigned int index);
> > +
> >  #ifdef CONFIG_CXL_SUSPEND
> >  void cxl_mem_active_inc(void);
> >  void cxl_mem_active_dec(void);
> > diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
> > index ac1a41bc083d..558ffbcb9b34 100644
> > --- a/drivers/cxl/pci.c
> > +++ b/drivers/cxl/pci.c
> > @@ -522,8 +522,8 @@ static int cxl_event_req_irq(struct cxl_dev_state *cxlds, u8 setting)
> >  		return irq;
> >  
> >  	return devm_request_threaded_irq(dev, irq, NULL, cxl_event_thread,
> > -					 IRQF_SHARED | IRQF_ONESHOT, NULL,
> > -					 dev_id);
> > +					IRQF_SHARED | IRQF_ONESHOT, NULL,
> > +					dev_id);
> 
> No comment. :)

Fixed.

> 
> >  }
> >  
> >  static int cxl_event_get_int_policy(struct cxl_memdev_state *mds,
> > @@ -555,6 +555,7 @@ static int cxl_event_config_msgnums(struct cxl_memdev_state *mds,
> >  		.warn_settings = CXL_INT_MSI_MSIX,
> >  		.failure_settings = CXL_INT_MSI_MSIX,
> >  		.fatal_settings = CXL_INT_MSI_MSIX,
> > +		.dyncap_settings = CXL_INT_MSI_MSIX,
> >  	};
> >  
> >  	mbox_cmd = (struct cxl_mbox_cmd) {
> > @@ -608,6 +609,11 @@ static int cxl_event_irqsetup(struct cxl_memdev_state *mds)
> >  		return rc;
> >  	}
> >  
> > +	rc = cxl_event_req_irq(cxlds, policy.dyncap_settings);
> > +	if (rc) {
> > +		dev_err(cxlds->dev, "Failed to get interrupt for event dc log\n");
> > +		return rc;
> > +	}
> 
> Blank line to maintain existing style.

Done.

> 
> >  	return 0;
> >  }
> >  
> > diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c
> > index 227800053309..b2b27033f589 100644
> > --- a/drivers/dax/bus.c
> > +++ b/drivers/dax/bus.c
> > @@ -434,7 +434,7 @@ static void free_dev_dax_ranges(struct dev_dax *dev_dax)
> 
> ...
> 
> > +EXPORT_SYMBOL_GPL(alloc_dev_dax_range);
> > +
> Single blank line seems to be style in this fiel.

Done.

> >  
> >  static int adjust_dev_dax_range(struct dev_dax *dev_dax, struct resource *res, resource_size_t size)
> >  {
> > diff --git a/drivers/dax/bus.h b/drivers/dax/bus.h
> > index 8cd79ab34292..aa8418c7aead 100644
> > --- a/drivers/dax/bus.h
> > +++ b/drivers/dax/bus.h
> > @@ -47,8 +47,11 @@ int __dax_driver_register(struct dax_device_driver *dax_drv,
> >  	__dax_driver_register(driver, THIS_MODULE, KBUILD_MODNAME)
> >  void dax_driver_unregister(struct dax_device_driver *dax_drv);
> >  void kill_dev_dax(struct dev_dax *dev_dax);
> > +void unregister_dev_dax(void *dev);
> > +void unregister_dax_mapping(void *data);
> >  bool static_dev_dax(struct dev_dax *dev_dax);
> > -
> > +int alloc_dev_dax_range(struct dev_dax *dev_dax, u64 start,
> > +					resource_size_t size);
> 
> Keep a blank line here..

Done.

Thanks for the review.  Based on this review, the previous patch review,
and more issues I've found along the way; I'm thinking you should hold off
on this series.

I'm working toward a very new V2; with my SoB line and all the rearch I
think is needed.

Thanks for looking!
Ira

next prev parent reply	other threads:[~2023-06-29 15:20 UTC|newest]

Thread overview: 55+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-06-14 19:16 [PATCH 0/5] cxl/dcd: Add support for Dynamic Capacity Devices (DCD) ira.weiny
2023-06-14 19:16 ` [PATCH 1/5] cxl/mem : Read Dynamic capacity configuration from the device ira.weiny
2023-06-14 22:53   ` Dave Jiang
2023-06-15 15:04     ` Ira Weiny
2023-06-14 23:49   ` Alison Schofield
2023-06-15 22:46     ` Ira Weiny
2023-06-15 18:30   ` Fan Ni
2023-06-15 19:17     ` Navneet Singh
2023-06-15 21:41   ` Fan Ni
2023-06-22 15:58   ` Jonathan Cameron
2023-06-24 13:08     ` Ira Weiny
2023-07-03  2:29       ` Jonathan Cameron
2023-06-14 19:16 ` [PATCH 2/5] cxl/region: Add dynamic capacity cxl region support ira.weiny
2023-06-14 23:37   ` Dave Jiang
2023-06-15 18:12     ` Ira Weiny
2023-06-15 18:28       ` Dave Jiang
2023-06-16  3:52         ` Navneet Singh
2023-06-15 18:56       ` Navneet Singh
2023-06-15  0:21   ` Alison Schofield
2023-06-16  2:06     ` Ira Weiny
2023-06-16 15:56       ` Alison Schofield
2023-06-16 16:51   ` Alison Schofield
2023-06-21  2:44     ` Ira Weiny
2023-06-20 17:55   ` Fan Ni
2023-06-20 20:33     ` Ira Weiny
2023-06-21  3:13     ` Navneet Singh
2023-06-21 17:20   ` Fan Ni
2023-06-23 18:02     ` Ira Weiny
2023-06-22 16:34   ` Jonathan Cameron
2023-07-05 14:49   ` Davidlohr Bueso
2023-06-14 19:16 ` [PATCH 3/5] cxl/mem : Expose dynamic capacity configuration to userspace ira.weiny
2023-06-15  0:40   ` Alison Schofield
2023-06-16  2:47     ` Ira Weiny
2023-06-16 15:58       ` Dave Jiang
2023-06-20 16:23         ` Ira Weiny
2023-06-20 16:48           ` Dave Jiang
2023-06-15 15:41   ` Dave Jiang
2023-06-14 19:16 ` [PATCH 4/5] cxl/mem: Add support to handle DCD add and release capacity events ira.weiny
2023-06-15  2:19   ` Alison Schofield
2023-06-16  4:11     ` Ira Weiny
2023-06-27 18:20       ` Fan Ni
2023-06-15 16:58   ` Dave Jiang
2023-06-22 17:01   ` Jonathan Cameron
2023-06-29 15:19     ` Ira Weiny [this message]
2023-06-27 18:17   ` Fan Ni
2023-07-13 12:55   ` Jørgen Hansen
2023-06-14 19:16 ` [PATCH 5/5] cxl/mem: Trace Dynamic capacity Event Record ira.weiny
2023-06-15 17:08   ` Dave Jiang
2023-06-15  0:56 ` [PATCH 0/5] cxl/dcd: Add support for Dynamic Capacity Devices (DCD) Alison Schofield
2023-06-16  2:57   ` Ira Weiny
2023-06-15 14:51 ` Ira Weiny
2023-06-22 15:07   ` Jonathan Cameron
2023-06-22 16:37     ` Jonathan Cameron
2023-06-27 14:59     ` Ira Weiny
2023-06-29 15:30 ` Ira Weiny

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=649da1107ab01_7645b294fc@iweiny-mobl.notmuch \
    --to=ira.weiny@intel.com \
    --cc=Jonathan.Cameron@huawei.com \
    --cc=dan.j.williams@intel.com \
    --cc=fan.ni@samsung.com \
    --cc=linux-cxl@vger.kernel.org \
    --cc=navneet.singh@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox