From: "Maurizio Lombardi" <mlombard@bsdbackstore.eu>
To: "Mike Christie" <michael.christie@oracle.com>,
<bvanassche@acm.org>, <martin.petersen@oracle.com>,
<linux-scsi@vger.kernel.org>, <target-devel@vger.kernel.org>
Subject: Re: [PATCH v2 3/3] scsi: target: Move LUN stats to per CPU
Date: Tue, 09 Sep 2025 17:36:24 +0200 [thread overview]
Message-ID: <DCODPRWQNASG.2WN8KKSW05R13@bsdbackstore.eu> (raw)
In-Reply-To: <20250908231000.10777-4-michael.christie@oracle.com>
On Tue Sep 9, 2025 at 1:05 AM CEST, Mike Christie wrote:
> The atomic use in the main I/O path is causing perf issues when using
> higher performance backend devices and multiple queues (more than
> 10 when using vhost-scsi) like with this fio workload:
>
> [global]
> bs=4K
> iodepth=128
> direct=1
> ioengine=libaio
> group_reporting
> time_based
> runtime=120
> name=standard-iops
> rw=randread
> numjobs=16
> cpus_allowed=0-15
>
> To fix this issue, this moves the LUN stats to per CPU.
>
> Note: I forgot to include this patch with the delayed/ordered per CPU
> tracking and per device/device entry per CPU stats. With this patch you
> get the full 33% improvements when using fast backends, multiple queues
> and multiple IO submiters.
>
> Signed-off-by: Mike Christie <michael.christie@oracle.com>
> ---
> drivers/target/target_core_device.c | 1 +
> drivers/target/target_core_fabric_configfs.c | 2 +-
> drivers/target/target_core_internal.h | 1 +
> drivers/target/target_core_stat.c | 67 +++++++-------------
> drivers/target/target_core_tpg.c | 25 +++++++-
> drivers/target/target_core_transport.c | 22 +++++--
> include/target/target_core_base.h | 8 +--
> 7 files changed, 67 insertions(+), 59 deletions(-)
>
> diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c
> index 7bb711b24c0d..2d4a7c0c69ce 100644
> --- a/drivers/target/target_core_device.c
> +++ b/drivers/target/target_core_device.c
> @@ -814,6 +814,7 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name)
> dev->dev_attrib.max_write_same_len = DA_MAX_WRITE_SAME_LEN;
> dev->dev_attrib.submit_type = TARGET_FABRIC_DEFAULT_SUBMIT;
>
> + /* Skip allocating lun_stats since we can't export them. */
> xcopy_lun = &dev->xcopy_lun;
> rcu_assign_pointer(xcopy_lun->lun_se_dev, dev);
> init_completion(&xcopy_lun->lun_shutdown_comp);
> diff --git a/drivers/target/target_core_fabric_configfs.c b/drivers/target/target_core_fabric_configfs.c
> index 7156a4dc1ca7..13159928e365 100644
> --- a/drivers/target/target_core_fabric_configfs.c
> +++ b/drivers/target/target_core_fabric_configfs.c
> @@ -697,7 +697,7 @@ static void target_fabric_port_release(struct config_item *item)
> struct se_lun *lun = container_of(to_config_group(item),
> struct se_lun, lun_group);
>
> - kfree_rcu(lun, rcu_head);
> + call_rcu(&lun->rcu_head, target_tpg_free_lun);
> }
>
> static struct configfs_item_operations target_fabric_port_item_ops = {
> diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h
> index 20aab1f50565..763e6d26e187 100644
> --- a/drivers/target/target_core_internal.h
> +++ b/drivers/target/target_core_internal.h
> @@ -125,6 +125,7 @@ void core_tpg_add_node_to_devs(struct se_node_acl *, struct se_portal_group *,
> struct se_lun *);
> void core_tpg_wait_for_nacl_pr_ref(struct se_node_acl *);
> struct se_lun *core_tpg_alloc_lun(struct se_portal_group *, u64);
> +void target_tpg_free_lun(struct rcu_head *head);
> int core_tpg_add_lun(struct se_portal_group *, struct se_lun *,
> bool, struct se_device *);
> void core_tpg_remove_lun(struct se_portal_group *, struct se_lun *);
> diff --git a/drivers/target/target_core_stat.c b/drivers/target/target_core_stat.c
> index e29d43dacaf7..083205052be2 100644
> --- a/drivers/target/target_core_stat.c
> +++ b/drivers/target/target_core_stat.c
> @@ -606,53 +606,30 @@ static ssize_t target_stat_tgt_port_port_index_show(struct config_item *item,
> return ret;
> }
>
> -static ssize_t target_stat_tgt_port_in_cmds_show(struct config_item *item,
> - char *page)
> -{
> - struct se_lun *lun = to_stat_tgt_port(item);
> - struct se_device *dev;
> - ssize_t ret = -ENODEV;
> -
> - rcu_read_lock();
> - dev = rcu_dereference(lun->lun_se_dev);
> - if (dev)
> - ret = snprintf(page, PAGE_SIZE, "%lu\n",
> - atomic_long_read(&lun->lun_stats.cmd_pdus));
> - rcu_read_unlock();
> - return ret;
> -}
> -
> -static ssize_t target_stat_tgt_port_write_mbytes_show(struct config_item *item,
> - char *page)
> -{
> - struct se_lun *lun = to_stat_tgt_port(item);
> - struct se_device *dev;
> - ssize_t ret = -ENODEV;
> -
> - rcu_read_lock();
> - dev = rcu_dereference(lun->lun_se_dev);
> - if (dev)
> - ret = snprintf(page, PAGE_SIZE, "%u\n",
> - (u32)(atomic_long_read(&lun->lun_stats.rx_data_octets) >> 20));
> - rcu_read_unlock();
> - return ret;
> +#define tgt_port_show_per_cpu_stat(prefix, field, shift) \
> +per_cpu_stat_snprintf(scsi_port_stats, prefix, field, shift); \
> +static ssize_t \
> +target_stat_##prefix##_show(struct config_item *item, char *page) \
> +{ \
> + struct se_lun *lun = to_stat_tgt_port(item); \
> + struct se_device *dev; \
> + int ret; \
> + \
> + rcu_read_lock(); \
> + dev = rcu_dereference(lun->lun_se_dev); \
> + if (!dev) { \
> + rcu_read_unlock(); \
> + return -ENODEV; \
> + } \
> + \
> + ret = per_cpu_stat_##prefix##_snprintf(lun->lun_stats, page); \
> + rcu_read_unlock(); \
> + return ret; \
> }
>
> -static ssize_t target_stat_tgt_port_read_mbytes_show(struct config_item *item,
> - char *page)
> -{
> - struct se_lun *lun = to_stat_tgt_port(item);
> - struct se_device *dev;
> - ssize_t ret = -ENODEV;
> -
> - rcu_read_lock();
> - dev = rcu_dereference(lun->lun_se_dev);
> - if (dev)
> - ret = snprintf(page, PAGE_SIZE, "%u\n",
> - (u32)(atomic_long_read(&lun->lun_stats.tx_data_octets) >> 20));
> - rcu_read_unlock();
> - return ret;
> -}
> +tgt_port_show_per_cpu_stat(tgt_port_in_cmds, cmd_pdus, 0);
> +tgt_port_show_per_cpu_stat(tgt_port_write_mbytes, rx_data_octets, 20);
> +tgt_port_show_per_cpu_stat(tgt_port_read_mbytes, tx_data_octets, 20);
>
> static ssize_t target_stat_tgt_port_hs_in_cmds_show(struct config_item *item,
> char *page)
> diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c
> index c0e429e5ef31..540ff15882c9 100644
> --- a/drivers/target/target_core_tpg.c
> +++ b/drivers/target/target_core_tpg.c
> @@ -548,7 +548,7 @@ int core_tpg_register(
> ret = core_tpg_add_lun(se_tpg, se_tpg->tpg_virt_lun0,
> true, g_lun0_dev);
> if (ret < 0) {
> - kfree(se_tpg->tpg_virt_lun0);
> + target_tpg_free_lun(&se_tpg->tpg_virt_lun0->rcu_head);
> return ret;
> }
> }
> @@ -595,7 +595,7 @@ int core_tpg_deregister(struct se_portal_group *se_tpg)
>
> if (se_tpg->proto_id >= 0) {
> core_tpg_remove_lun(se_tpg, se_tpg->tpg_virt_lun0);
> - kfree_rcu(se_tpg->tpg_virt_lun0, rcu_head);
> + call_rcu(&se_tpg->tpg_virt_lun0->rcu_head, target_tpg_free_lun);
> }
>
> target_tpg_deregister_rtpi(se_tpg);
> @@ -609,12 +609,21 @@ struct se_lun *core_tpg_alloc_lun(
> u64 unpacked_lun)
> {
> struct se_lun *lun;
> + int ret;
>
> lun = kzalloc(sizeof(*lun), GFP_KERNEL);
> if (!lun) {
> pr_err("Unable to allocate se_lun memory\n");
> return ERR_PTR(-ENOMEM);
> }
> +
> + lun->lun_stats = alloc_percpu(struct scsi_port_stats);
> + if (!lun->lun_stats) {
> + pr_err("Unable to allocate se_lun stats memory\n");
> + ret = -ENOMEM;
> + goto free_lun;
> + }
> +
> lun->unpacked_lun = unpacked_lun;
> atomic_set(&lun->lun_acl_count, 0);
> init_completion(&lun->lun_shutdown_comp);
> @@ -628,6 +637,18 @@ struct se_lun *core_tpg_alloc_lun(
> lun->lun_tpg = tpg;
>
> return lun;
> +
> +free_lun:
> + kfree(lun);
> + return ERR_PTR(-ENOMEM);
> +}
nit: "ret" is set to -ENOMEM but never used, it can be removed.
Maurizio
next prev parent reply other threads:[~2025-09-09 15:36 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-09-08 23:05 [PATCH v2 0/3] target: RW/num_cmds stats improvements Mike Christie
2025-09-08 23:05 ` [PATCH v2 1/3] scsi: target: Fix lun/device R/W and total command stats Mike Christie
2025-09-17 13:47 ` Dmitry Bogdanov
2025-09-08 23:05 ` [PATCH v2 2/3] scsi: target: Create and use macro helpers for per CPU stats Mike Christie
2025-09-17 13:33 ` Dmitry Bogdanov
2025-09-08 23:05 ` [PATCH v2 3/3] scsi: target: Move LUN stats to per CPU Mike Christie
2025-09-09 15:36 ` Maurizio Lombardi [this message]
2025-09-17 13:38 ` Dmitry Bogdanov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=DCODPRWQNASG.2WN8KKSW05R13@bsdbackstore.eu \
--to=mlombard@bsdbackstore.eu \
--cc=bvanassche@acm.org \
--cc=linux-scsi@vger.kernel.org \
--cc=martin.petersen@oracle.com \
--cc=michael.christie@oracle.com \
--cc=target-devel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox