* [PATCH 1/1] scsi: target: Move LUN stats to per CPU
@ 2025-07-24 0:45 Mike Christie
2025-07-24 14:26 ` kernel test robot
` (2 more replies)
0 siblings, 3 replies; 4+ messages in thread
From: Mike Christie @ 2025-07-24 0:45 UTC (permalink / raw)
To: martin.petersen, linux-scsi, target-devel; +Cc: Mike Christie
The atomic use in the main I/O path is causing perf issues when using
higher performance backend devices and multiple queues. This moves the
LUN stats to per CPU.
I forgot to include this patch with the delayed/ordered per CPU
tracking and per device/device entry per CPU stats. With this patch
you get the full 33% improvements when using fast backends, multiple
queues and multiple IO submiters.
Signed-off-by: Mike Christie <michael.christie@oracle.com>
---
drivers/target/target_core_fabric_configfs.c | 2 +-
drivers/target/target_core_internal.h | 1 +
drivers/target/target_core_stat.c | 48 ++++++++++++++++----
drivers/target/target_core_tpg.c | 21 +++++++++
drivers/target/target_core_transport.c | 14 +++---
include/target/target_core_base.h | 8 ++--
6 files changed, 73 insertions(+), 21 deletions(-)
diff --git a/drivers/target/target_core_fabric_configfs.c b/drivers/target/target_core_fabric_configfs.c
index 7156a4dc1ca7..13159928e365 100644
--- a/drivers/target/target_core_fabric_configfs.c
+++ b/drivers/target/target_core_fabric_configfs.c
@@ -697,7 +697,7 @@ static void target_fabric_port_release(struct config_item *item)
struct se_lun *lun = container_of(to_config_group(item),
struct se_lun, lun_group);
- kfree_rcu(lun, rcu_head);
+ call_rcu(&lun->rcu_head, target_tpg_free_lun);
}
static struct configfs_item_operations target_fabric_port_item_ops = {
diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h
index 408be26d2e9b..dfe529e59a29 100644
--- a/drivers/target/target_core_internal.h
+++ b/drivers/target/target_core_internal.h
@@ -125,6 +125,7 @@ void core_tpg_add_node_to_devs(struct se_node_acl *, struct se_portal_group *,
struct se_lun *);
void core_tpg_wait_for_nacl_pr_ref(struct se_node_acl *);
struct se_lun *core_tpg_alloc_lun(struct se_portal_group *, u64);
+void target_tpg_free_lun(struct rcu_head *head);
int core_tpg_add_lun(struct se_portal_group *, struct se_lun *,
bool, struct se_device *);
void core_tpg_remove_lun(struct se_portal_group *, struct se_lun *);
diff --git a/drivers/target/target_core_stat.c b/drivers/target/target_core_stat.c
index 6bdf2d8bd694..88f8be197a68 100644
--- a/drivers/target/target_core_stat.c
+++ b/drivers/target/target_core_stat.c
@@ -627,14 +627,24 @@ static ssize_t target_stat_tgt_port_in_cmds_show(struct config_item *item,
char *page)
{
struct se_lun *lun = to_stat_tgt_port(item);
+ struct scsi_port_stats *stats;
struct se_device *dev;
ssize_t ret = -ENODEV;
+ unsigned int cpu;
+ u32 pdus = 0;
rcu_read_lock();
dev = rcu_dereference(lun->lun_se_dev);
- if (dev)
- ret = snprintf(page, PAGE_SIZE, "%lu\n",
- atomic_long_read(&lun->lun_stats.cmd_pdus));
+ if (!dev)
+ goto unlock;
+
+ for_each_possible_cpu(cpu) {
+ stats = per_cpu_ptr(lun->lun_stats, cpu);
+ pdus += stats->cmd_pdus;
+ }
+
+ ret = snprintf(page, PAGE_SIZE, "%u\n", pdus);
+unlock:
rcu_read_unlock();
return ret;
}
@@ -643,14 +653,24 @@ static ssize_t target_stat_tgt_port_write_mbytes_show(struct config_item *item,
char *page)
{
struct se_lun *lun = to_stat_tgt_port(item);
+ struct scsi_port_stats *stats;
struct se_device *dev;
ssize_t ret = -ENODEV;
+ unsigned int cpu;
+ u32 octets = 0;
rcu_read_lock();
dev = rcu_dereference(lun->lun_se_dev);
- if (dev)
- ret = snprintf(page, PAGE_SIZE, "%u\n",
- (u32)(atomic_long_read(&lun->lun_stats.rx_data_octets) >> 20));
+ if (!dev)
+ goto unlock;
+
+ for_each_possible_cpu(cpu) {
+ stats = per_cpu_ptr(lun->lun_stats, cpu);
+ octets += stats->rx_data_octets;
+ }
+
+ ret = snprintf(page, PAGE_SIZE, "%u\n", octets);
+unlock:
rcu_read_unlock();
return ret;
}
@@ -659,14 +679,24 @@ static ssize_t target_stat_tgt_port_read_mbytes_show(struct config_item *item,
char *page)
{
struct se_lun *lun = to_stat_tgt_port(item);
+ struct scsi_port_stats *stats;
struct se_device *dev;
ssize_t ret = -ENODEV;
+ unsigned int cpu;
+ u32 octets = 0;
rcu_read_lock();
dev = rcu_dereference(lun->lun_se_dev);
- if (dev)
- ret = snprintf(page, PAGE_SIZE, "%u\n",
- (u32)(atomic_long_read(&lun->lun_stats.tx_data_octets) >> 20));
+ if (!dev)
+ goto unlock;
+
+ for_each_possible_cpu(cpu) {
+ stats = per_cpu_ptr(lun->lun_stats, cpu);
+ octets += stats->tx_data_octets;
+ }
+
+ ret = snprintf(page, PAGE_SIZE, "%u\n", octets);
+unlock:
rcu_read_unlock();
return ret;
}
diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c
index c0e429e5ef31..caa95aa6f502 100644
--- a/drivers/target/target_core_tpg.c
+++ b/drivers/target/target_core_tpg.c
@@ -609,12 +609,21 @@ struct se_lun *core_tpg_alloc_lun(
u64 unpacked_lun)
{
struct se_lun *lun;
+ int ret;
lun = kzalloc(sizeof(*lun), GFP_KERNEL);
if (!lun) {
pr_err("Unable to allocate se_lun memory\n");
return ERR_PTR(-ENOMEM);
}
+
+ lun->lun_stats = alloc_percpu(struct scsi_port_stats);
+ if (!lun->lun_stats) {
+ pr_err("Unable to allocate se_lun stats memory\n");
+ ret = -ENOMEM;
+ goto free_lun;
+ }
+
lun->unpacked_lun = unpacked_lun;
atomic_set(&lun->lun_acl_count, 0);
init_completion(&lun->lun_shutdown_comp);
@@ -628,6 +637,18 @@ struct se_lun *core_tpg_alloc_lun(
lun->lun_tpg = tpg;
return lun;
+
+free_lun:
+ kfree(lun);
+ return ERR_PTR(-ENOMEM);
+}
+
+void target_tpg_free_lun(struct rcu_head *head)
+{
+ struct se_lun *lun = container_of(head, struct se_lun, rcu_head);
+
+ free_percpu(lun->lun_stats);
+ kfree(lun);
}
int core_tpg_add_lun(
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 0a76bdfe5528..4ec66ca6c0ca 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -1571,7 +1571,7 @@ target_cmd_parse_cdb(struct se_cmd *cmd)
return ret;
cmd->se_cmd_flags |= SCF_SUPPORTED_SAM_OPCODE;
- atomic_long_inc(&cmd->se_lun->lun_stats.cmd_pdus);
+ this_cpu_inc(cmd->se_lun->lun_stats->cmd_pdus);
return 0;
}
EXPORT_SYMBOL(target_cmd_parse_cdb);
@@ -2597,8 +2597,8 @@ static void target_complete_ok_work(struct work_struct *work)
!(cmd->se_cmd_flags & SCF_TREAT_READ_AS_NORMAL))
goto queue_status;
- atomic_long_add(cmd->data_length,
- &cmd->se_lun->lun_stats.tx_data_octets);
+ this_cpu_add(cmd->se_lun->lun_stats->tx_data_octets,
+ cmd->data_length);
/*
* Perform READ_STRIP of PI using software emulation when
* backend had PI enabled, if the transport will not be
@@ -2621,14 +2621,14 @@ static void target_complete_ok_work(struct work_struct *work)
goto queue_full;
break;
case DMA_TO_DEVICE:
- atomic_long_add(cmd->data_length,
- &cmd->se_lun->lun_stats.rx_data_octets);
+ this_cpu_add(cmd->se_lun->lun_stats->rx_data_octets,
+ cmd->data_length);
/*
* Check if we need to send READ payload for BIDI-COMMAND
*/
if (cmd->se_cmd_flags & SCF_BIDI) {
- atomic_long_add(cmd->data_length,
- &cmd->se_lun->lun_stats.tx_data_octets);
+ this_cpu_add(cmd->se_lun->lun_stats->tx_data_octets,
+ cmd->data_length);
ret = cmd->se_tfo->queue_data_in(cmd);
if (ret)
goto queue_full;
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index c4d9116904aa..e73fb224625d 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -744,9 +744,9 @@ struct se_port_stat_grps {
};
struct scsi_port_stats {
- atomic_long_t cmd_pdus;
- atomic_long_t tx_data_octets;
- atomic_long_t rx_data_octets;
+ u32 cmd_pdus;
+ u32 tx_data_octets;
+ u32 rx_data_octets;
};
struct se_lun {
@@ -773,7 +773,7 @@ struct se_lun {
spinlock_t lun_tg_pt_gp_lock;
struct se_portal_group *lun_tpg;
- struct scsi_port_stats lun_stats;
+ struct scsi_port_stats __percpu *lun_stats;
struct config_group lun_group;
struct se_port_stat_grps port_stat_grps;
struct completion lun_shutdown_comp;
--
2.47.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH 1/1] scsi: target: Move LUN stats to per CPU
2025-07-24 0:45 [PATCH 1/1] scsi: target: Move LUN stats to per CPU Mike Christie
@ 2025-07-24 14:26 ` kernel test robot
2025-07-24 15:06 ` Bart Van Assche
2025-07-28 15:08 ` Dmitry Bogdanov
2 siblings, 0 replies; 4+ messages in thread
From: kernel test robot @ 2025-07-24 14:26 UTC (permalink / raw)
To: Mike Christie, martin.petersen, linux-scsi, target-devel
Cc: oe-kbuild-all, Mike Christie
Hi Mike,
kernel test robot noticed the following build warnings:
[auto build test WARNING on mkp-scsi/for-next]
[also build test WARNING on jejb-scsi/for-next linus/master v6.16-rc7 next-20250724]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Mike-Christie/scsi-target-Move-LUN-stats-to-per-CPU/20250724-084852
base: https://git.kernel.org/pub/scm/linux/kernel/git/mkp/scsi.git for-next
patch link: https://lore.kernel.org/r/20250724004558.40993-1-michael.christie%40oracle.com
patch subject: [PATCH 1/1] scsi: target: Move LUN stats to per CPU
config: sparc64-randconfig-001-20250724 (https://download.01.org/0day-ci/archive/20250724/202507242247.rt3StU9U-lkp@intel.com/config)
compiler: sparc64-linux-gcc (GCC) 8.5.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250724/202507242247.rt3StU9U-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202507242247.rt3StU9U-lkp@intel.com/
All warnings (new ones prefixed by >>):
drivers/target/target_core_tpg.c: In function 'core_tpg_alloc_lun':
>> drivers/target/target_core_tpg.c:612:6: warning: variable 'ret' set but not used [-Wunused-but-set-variable]
int ret;
^~~
vim +/ret +612 drivers/target/target_core_tpg.c
606
607 struct se_lun *core_tpg_alloc_lun(
608 struct se_portal_group *tpg,
609 u64 unpacked_lun)
610 {
611 struct se_lun *lun;
> 612 int ret;
613
614 lun = kzalloc(sizeof(*lun), GFP_KERNEL);
615 if (!lun) {
616 pr_err("Unable to allocate se_lun memory\n");
617 return ERR_PTR(-ENOMEM);
618 }
619
620 lun->lun_stats = alloc_percpu(struct scsi_port_stats);
621 if (!lun->lun_stats) {
622 pr_err("Unable to allocate se_lun stats memory\n");
623 ret = -ENOMEM;
624 goto free_lun;
625 }
626
627 lun->unpacked_lun = unpacked_lun;
628 atomic_set(&lun->lun_acl_count, 0);
629 init_completion(&lun->lun_shutdown_comp);
630 INIT_LIST_HEAD(&lun->lun_deve_list);
631 INIT_LIST_HEAD(&lun->lun_dev_link);
632 atomic_set(&lun->lun_tg_pt_secondary_offline, 0);
633 spin_lock_init(&lun->lun_deve_lock);
634 mutex_init(&lun->lun_tg_pt_md_mutex);
635 INIT_LIST_HEAD(&lun->lun_tg_pt_gp_link);
636 spin_lock_init(&lun->lun_tg_pt_gp_lock);
637 lun->lun_tpg = tpg;
638
639 return lun;
640
641 free_lun:
642 kfree(lun);
643 return ERR_PTR(-ENOMEM);
644 }
645
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH 1/1] scsi: target: Move LUN stats to per CPU
2025-07-24 0:45 [PATCH 1/1] scsi: target: Move LUN stats to per CPU Mike Christie
2025-07-24 14:26 ` kernel test robot
@ 2025-07-24 15:06 ` Bart Van Assche
2025-07-28 15:08 ` Dmitry Bogdanov
2 siblings, 0 replies; 4+ messages in thread
From: Bart Van Assche @ 2025-07-24 15:06 UTC (permalink / raw)
To: Mike Christie, martin.petersen, linux-scsi, target-devel
On 7/23/25 5:45 PM, Mike Christie wrote:
> diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
> index c4d9116904aa..e73fb224625d 100644
> --- a/include/target/target_core_base.h
> +++ b/include/target/target_core_base.h
> @@ -744,9 +744,9 @@ struct se_port_stat_grps {
> };
>
> struct scsi_port_stats {
> - atomic_long_t cmd_pdus;
> - atomic_long_t tx_data_octets;
> - atomic_long_t rx_data_octets;
> + u32 cmd_pdus;
> + u32 tx_data_octets;
> + u32 rx_data_octets;
> };
>
> struct se_lun {
> @@ -773,7 +773,7 @@ struct se_lun {
> spinlock_t lun_tg_pt_gp_lock;
>
> struct se_portal_group *lun_tpg;
> - struct scsi_port_stats lun_stats;
> + struct scsi_port_stats __percpu *lun_stats;
> struct config_group lun_group;
> struct se_port_stat_grps port_stat_grps;
> struct completion lun_shutdown_comp;
Is this perhaps an open-coded implementation of struct percpu_counter?
Why hasn't struct percpu_counter been used? I think this should be
explained in the patch description.
Thanks,
Bart.
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH 1/1] scsi: target: Move LUN stats to per CPU
2025-07-24 0:45 [PATCH 1/1] scsi: target: Move LUN stats to per CPU Mike Christie
2025-07-24 14:26 ` kernel test robot
2025-07-24 15:06 ` Bart Van Assche
@ 2025-07-28 15:08 ` Dmitry Bogdanov
2 siblings, 0 replies; 4+ messages in thread
From: Dmitry Bogdanov @ 2025-07-28 15:08 UTC (permalink / raw)
To: Mike Christie; +Cc: martin.petersen, linux-scsi, target-devel
On Wed, Jul 23, 2025 at 07:45:57PM -0500, Mike Christie wrote:
> The atomic use in the main I/O path is causing perf issues when using
> higher performance backend devices and multiple queues. This moves the
> LUN stats to per CPU.
>
> I forgot to include this patch with the delayed/ordered per CPU
> tracking and per device/device entry per CPU stats. With this patch
> you get the full 33% improvements when using fast backends, multiple
> queues and multiple IO submiters.
>
> Signed-off-by: Mike Christie <michael.christie@oracle.com>
> ---
> drivers/target/target_core_fabric_configfs.c | 2 +-
> drivers/target/target_core_internal.h | 1 +
> drivers/target/target_core_stat.c | 48 ++++++++++++++++----
> drivers/target/target_core_tpg.c | 21 +++++++++
> drivers/target/target_core_transport.c | 14 +++---
> include/target/target_core_base.h | 8 ++--
> 6 files changed, 73 insertions(+), 21 deletions(-)
>
> diff --git a/drivers/target/target_core_fabric_configfs.c b/drivers/target/target_core_fabric_configfs.c
> index 7156a4dc1ca7..13159928e365 100644
> --- a/drivers/target/target_core_fabric_configfs.c
> +++ b/drivers/target/target_core_fabric_configfs.c
> @@ -697,7 +697,7 @@ static void target_fabric_port_release(struct config_item *item)
> struct se_lun *lun = container_of(to_config_group(item),
> struct se_lun, lun_group);
>
> - kfree_rcu(lun, rcu_head);
> + call_rcu(&lun->rcu_head, target_tpg_free_lun);
There is se_tpg->tpg_virt_lun0 that is also the lun object that will
have allocated lun_stats, but tpg_virt_lun0 is deallocated in other
places. You have to take care of it to not leak its lun_stats.
> }
>
> static struct configfs_item_operations target_fabric_port_item_ops = {
> diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h
> index 408be26d2e9b..dfe529e59a29 100644
> --- a/drivers/target/target_core_internal.h
> +++ b/drivers/target/target_core_internal.h
> @@ -125,6 +125,7 @@ void core_tpg_add_node_to_devs(struct se_node_acl *, struct se_portal_group *,
> struct se_lun *);
> void core_tpg_wait_for_nacl_pr_ref(struct se_node_acl *);
> struct se_lun *core_tpg_alloc_lun(struct se_portal_group *, u64);
> +void target_tpg_free_lun(struct rcu_head *head);
> int core_tpg_add_lun(struct se_portal_group *, struct se_lun *,
> bool, struct se_device *);
> void core_tpg_remove_lun(struct se_portal_group *, struct se_lun *);
> diff --git a/drivers/target/target_core_stat.c b/drivers/target/target_core_stat.c
> index 6bdf2d8bd694..88f8be197a68 100644
> --- a/drivers/target/target_core_stat.c
> +++ b/drivers/target/target_core_stat.c
> @@ -627,14 +627,24 @@ static ssize_t target_stat_tgt_port_in_cmds_show(struct config_item *item,
> char *page)
> {
> struct se_lun *lun = to_stat_tgt_port(item);
> + struct scsi_port_stats *stats;
> struct se_device *dev;
> ssize_t ret = -ENODEV;
> + unsigned int cpu;
> + u32 pdus = 0;
>
> rcu_read_lock();
> dev = rcu_dereference(lun->lun_se_dev);
> - if (dev)
> - ret = snprintf(page, PAGE_SIZE, "%lu\n",
> - atomic_long_read(&lun->lun_stats.cmd_pdus));
> + if (!dev)
> + goto unlock;
> +
> + for_each_possible_cpu(cpu) {
> + stats = per_cpu_ptr(lun->lun_stats, cpu);
> + pdus += stats->cmd_pdus;
> + }
> +
> + ret = snprintf(page, PAGE_SIZE, "%u\n", pdus);
> +unlock:
> rcu_read_unlock();
> return ret;
> }
> @@ -643,14 +653,24 @@ static ssize_t target_stat_tgt_port_write_mbytes_show(struct config_item *item,
> char *page)
> {
> struct se_lun *lun = to_stat_tgt_port(item);
> + struct scsi_port_stats *stats;
> struct se_device *dev;
> ssize_t ret = -ENODEV;
> + unsigned int cpu;
> + u32 octets = 0;
>
> rcu_read_lock();
> dev = rcu_dereference(lun->lun_se_dev);
> - if (dev)
> - ret = snprintf(page, PAGE_SIZE, "%u\n",
> - (u32)(atomic_long_read(&lun->lun_stats.rx_data_octets) >> 20));
> + if (!dev)
> + goto unlock;
> +
> + for_each_possible_cpu(cpu) {
> + stats = per_cpu_ptr(lun->lun_stats, cpu);
> + octets += stats->rx_data_octets;
> + }
> +
> + ret = snprintf(page, PAGE_SIZE, "%u\n", octets);
> +unlock:
> rcu_read_unlock();
> return ret;
> }
> @@ -659,14 +679,24 @@ static ssize_t target_stat_tgt_port_read_mbytes_show(struct config_item *item,
> char *page)
> {
> struct se_lun *lun = to_stat_tgt_port(item);
> + struct scsi_port_stats *stats;
> struct se_device *dev;
> ssize_t ret = -ENODEV;
> + unsigned int cpu;
> + u32 octets = 0;
>
> rcu_read_lock();
> dev = rcu_dereference(lun->lun_se_dev);
> - if (dev)
> - ret = snprintf(page, PAGE_SIZE, "%u\n",
> - (u32)(atomic_long_read(&lun->lun_stats.tx_data_octets) >> 20));
> + if (!dev)
> + goto unlock;
> +
> + for_each_possible_cpu(cpu) {
> + stats = per_cpu_ptr(lun->lun_stats, cpu);
> + octets += stats->tx_data_octets;
> + }
> +
> + ret = snprintf(page, PAGE_SIZE, "%u\n", octets);
> +unlock:
> rcu_read_unlock();
> return ret;
> }
May be that is a time to refactor this file using macro magic?
Something like:
static u64 _target_stat_get_u64_luns_stats(struct se_lun *lun, u64 offset)
{
int cpu;
u64 res = 0;
u8 *stats;
for_each_possible_cpu(cpu) {
const struct scsi_port_stats *pcpu_stats;
pcpu_stats = per_cpu_ptr(lun->lun_stats, cpu);
stats = (u8 *)pcpu_stats;
res += *(u64 *)(stats + offset);
}
return res;
}
#define target_stat_get_u64_luns_stats(LUN, FIELD) \
_target_stat_get_u64_luns_stats(LUN, \
offsetof(struct scsi_port_stats, FIELD))
#define _SYSFS_TGT_PORT_STATS_U64_SHOW(STAT, VAR, LAMBDA) \
static ssize_t target_stat_tgt_port_##STAT##_show( \
struct config_item *item, \
char *page) \
{ \
struct se_lun *lun = to_stat_tgt_port(item); \
struct se_device *dev; \
ssize_t ret = -ENODEV; \
u64 VAR; \
\
rcu_read_lock(); \
dev = rcu_dereference(lun->lun_se_dev); \
if (dev) { \
VAR = target_stat_get_u64_luns_stats(lun, VAR); \
VAR = LAMBDA(VAR); \
ret = snprintf(page, PAGE_SIZE, "%llu\n", VAR); \
} \
rcu_read_unlock(); \
return ret; \
}
#define LAMBDA_NOOP(A) (A)
#define LAMBDA_MBYTES(A) (A >> 20)
#define SYSFS_TGT_PORT_STATS_U64_SHOW(STAT, VAR) \
_SYSFS_TGT_PORT_STATS_U64_SHOW(STAT, VAR, LAMBDA_NOOP)
#define SYSFS_TGT_PORT_STATS_U64_SHOW_MBYTES(STAT, VAR) \
_SYSFS_TGT_PORT_STATS_U64_SHOW(STAT, VAR, LAMBDA_MBYTES)
SYSFS_TGT_PORT_STATS_U64_SHOW(in_cmds, cmd_pdus)
SYSFS_TGT_PORT_STATS_U64_SHOW(in_cmds, hs_cmd_pdus)
SYSFS_TGT_PORT_STATS_U64_SHOW_MBYTES(write_mbytes, rx_data_octets)
SYSFS_TGT_PORT_STATS_U64_SHOW_MBYTES(read_mbytes, tx_data_octets)
> diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c
> index c0e429e5ef31..caa95aa6f502 100644
> --- a/drivers/target/target_core_tpg.c
> +++ b/drivers/target/target_core_tpg.c
> @@ -609,12 +609,21 @@ struct se_lun *core_tpg_alloc_lun(
> u64 unpacked_lun)
> {
> struct se_lun *lun;
> + int ret;
>
> lun = kzalloc(sizeof(*lun), GFP_KERNEL);
> if (!lun) {
> pr_err("Unable to allocate se_lun memory\n");
> return ERR_PTR(-ENOMEM);
> }
> +
> + lun->lun_stats = alloc_percpu(struct scsi_port_stats);
> + if (!lun->lun_stats) {
> + pr_err("Unable to allocate se_lun stats memory\n");
> + ret = -ENOMEM;
> + goto free_lun;
> + }
> +
There is dev->xcopy_lun that is used for cmd->se_lun, you have to
allocate and deallocate lun_stats for it to avoid NPE on XCOPY command.
> lun->unpacked_lun = unpacked_lun;
> atomic_set(&lun->lun_acl_count, 0);
> init_completion(&lun->lun_shutdown_comp);
> @@ -628,6 +637,18 @@ struct se_lun *core_tpg_alloc_lun(
> lun->lun_tpg = tpg;
>
> return lun;
> +
> +free_lun:
> + kfree(lun);
> + return ERR_PTR(-ENOMEM);
> +}
> +
> +void target_tpg_free_lun(struct rcu_head *head)
> +{
> + struct se_lun *lun = container_of(head, struct se_lun, rcu_head);
> +
> + free_percpu(lun->lun_stats);
> + kfree(lun);
> }
>
> int core_tpg_add_lun(
> diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
> index 0a76bdfe5528..4ec66ca6c0ca 100644
> --- a/drivers/target/target_core_transport.c
> +++ b/drivers/target/target_core_transport.c
> @@ -1571,7 +1571,7 @@ target_cmd_parse_cdb(struct se_cmd *cmd)
> return ret;
>
> cmd->se_cmd_flags |= SCF_SUPPORTED_SAM_OPCODE;
> - atomic_long_inc(&cmd->se_lun->lun_stats.cmd_pdus);
> + this_cpu_inc(cmd->se_lun->lun_stats->cmd_pdus);
> return 0;
> }
> EXPORT_SYMBOL(target_cmd_parse_cdb);
> @@ -2597,8 +2597,8 @@ static void target_complete_ok_work(struct work_struct *work)
> !(cmd->se_cmd_flags & SCF_TREAT_READ_AS_NORMAL))
> goto queue_status;
>
> - atomic_long_add(cmd->data_length,
> - &cmd->se_lun->lun_stats.tx_data_octets);
> + this_cpu_add(cmd->se_lun->lun_stats->tx_data_octets,
> + cmd->data_length);
> /*
> * Perform READ_STRIP of PI using software emulation when
> * backend had PI enabled, if the transport will not be
> @@ -2621,14 +2621,14 @@ static void target_complete_ok_work(struct work_struct *work)
> goto queue_full;
> break;
> case DMA_TO_DEVICE:
> - atomic_long_add(cmd->data_length,
> - &cmd->se_lun->lun_stats.rx_data_octets);
> + this_cpu_add(cmd->se_lun->lun_stats->rx_data_octets,
> + cmd->data_length);
> /*
> * Check if we need to send READ payload for BIDI-COMMAND
> */
> if (cmd->se_cmd_flags & SCF_BIDI) {
> - atomic_long_add(cmd->data_length,
> - &cmd->se_lun->lun_stats.tx_data_octets);
> + this_cpu_add(cmd->se_lun->lun_stats->tx_data_octets,
> + cmd->data_length);
> ret = cmd->se_tfo->queue_data_in(cmd);
> if (ret)
> goto queue_full;
> diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
> index c4d9116904aa..e73fb224625d 100644
> --- a/include/target/target_core_base.h
> +++ b/include/target/target_core_base.h
> @@ -744,9 +744,9 @@ struct se_port_stat_grps {
> };
>
> struct scsi_port_stats {
> - atomic_long_t cmd_pdus;
> - atomic_long_t tx_data_octets;
> - atomic_long_t rx_data_octets;
> + u32 cmd_pdus;
> + u32 tx_data_octets;
> + u32 rx_data_octets;
I belive that there is no reason to have 32-bits counters in our
century.
[SPC-5] 7.3.9.2 General Access Statistics and Performance log parameter
has 64-bits counters.
RFC 4455 (MIB SCSI) states that 64bit counters are mandatory for systems
with speed >= 4Gbs.
Especially for t(r)x_data_octets that in 32-bit counter presentation has
just 12 meaning bits actually.
> };
>
> struct se_lun {
> @@ -773,7 +773,7 @@ struct se_lun {
> spinlock_t lun_tg_pt_gp_lock;
>
> struct se_portal_group *lun_tpg;
> - struct scsi_port_stats lun_stats;
> + struct scsi_port_stats __percpu *lun_stats;
> struct config_group lun_group;
> struct se_port_stat_grps port_stat_grps;
> struct completion lun_shutdown_comp;
> --
> 2.47.1
>
>
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2025-07-28 15:14 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-07-24 0:45 [PATCH 1/1] scsi: target: Move LUN stats to per CPU Mike Christie
2025-07-24 14:26 ` kernel test robot
2025-07-24 15:06 ` Bart Van Assche
2025-07-28 15:08 ` Dmitry Bogdanov
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox