public inbox for kdevops@lists.linux.dev
 help / color / mirror / Atom feed
From: Luis Chamberlain <mcgrof@kernel.org>
To: Chuck Lever <cel@kernel.org>, Daniel Gomez <da.gomez@kruces.com>,
	Viacheslav Dubeyko <slava@dubeyko.com>,
	kdevops@lists.linux.dev
Cc: Luis Chamberlain <mcgrof@kernel.org>
Subject: [PATCH 4/4] steady-state: quite a bit of fixes
Date: Thu, 17 Jul 2025 21:05:56 -0700	[thread overview]
Message-ID: <20250718040557.2774976-5-mcgrof@kernel.org> (raw)
In-Reply-To: <20250718040557.2774976-1-mcgrof@kernel.org>

The original implementation forgot to just open code the
pre-fill still as an ansible task. Claude AI did this for me,
and with a bit more love I dealt with the odd ansible oddities
found. Now that we have an open coded ansible version of the
optimal prefill step with kdevops integration just remove the
script.

Clarify the heuristics of the optimal prefill step on the
SSD_STEADY_STATE_PREFILL_BLOCKSIZE documentation.

Let's also ensure we provide steady state output files, which is
useful to visualize the steady state process for bw and iops.

Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
---
 .../roles/steady_state/defaults/main.yml      |  37 +-
 playbooks/roles/steady_state/tasks/main.yaml  | 314 +++++++++++++-
 .../roles/steady_state/templates/ss_bw.ini.j2 |  22 +-
 .../steady_state/templates/ss_iops.ini.j2     |  22 +-
 .../precondition/prefill-fio-jobs.sh          | 390 ------------------
 workflows/steady_state/Kconfig                |  77 +++-
 workflows/steady_state/Makefile               |  24 +-
 7 files changed, 448 insertions(+), 438 deletions(-)
 delete mode 100755 scripts/workflows/precondition/prefill-fio-jobs.sh

diff --git a/playbooks/roles/steady_state/defaults/main.yml b/playbooks/roles/steady_state/defaults/main.yml
index ac5635af..d38ba2b5 100644
--- a/playbooks/roles/steady_state/defaults/main.yml
+++ b/playbooks/roles/steady_state/defaults/main.yml
@@ -1,17 +1,26 @@
 ---
 steady_state_data: "{{ data_path }}/steady_state"
-steady_state_device: "/dev/nvme0n1"
-precondition_blocksize: "128k"
-precondition_iodepth: "32"
-precondition_numjobs: "4"
-precondition_prefill_loop: 2
-steady_state_runtime: "6h"
-steady_state_iops_mean_limit: "20%"
-steady_state_iops_mean_dur: "4h"
-steady_state_iops_slope: "10%"
-steady_state_iops_slope_dur: "4h"
-steady_state_bw_mean_limit: "20%"
-steady_state_bw_mean_dur: "2h"
-steady_state_bw_slope: "10%"
-steady_state_bw_slope_dur: "2h"
+ssd_steady_state_device: "/dev/null"
+ssd_steady_state_prefill_blocksize: "128k"
+ssd_steady_state_iodepth: "32"
+ssd_steady_state_numjobs: "4"
+ssd_steady_state_prefill_loop: 2
+ssd_steady_state_runtime: "6h"
+ssd_steady_state_iops_mean_limit: "20%"
+ssd_steady_state_iops_mean_dur: "4h"
+ssd_steady_state_iops_slope: "10%"
+ssd_steady_state_iops_slope_dur: "4h"
+ssd_steady_state_bw_mean_limit: "20%"
+ssd_steady_state_bw_mean_dur: "2h"
+ssd_steady_state_bw_slope: "10%"
+ssd_steady_state_bw_slope_dur: "2h"
 kdevops_run_ssd_steady_state: False
+
+# Prefill configuration defaults
+ssd_steady_state_prefill_verbose: False
+ssd_steady_state_prefill_max_size: ""
+ssd_steady_state_prefill_physical_bs: ""
+ssd_steady_state_prefill_ioengine: "io_uring"
+ssd_steady_state_prefill_direct: "1"
+ssd_steady_state_prefill_alloc_size: "1048576"
+ssd_steady_state_prefill_extra_args: ""
diff --git a/playbooks/roles/steady_state/tasks/main.yaml b/playbooks/roles/steady_state/tasks/main.yaml
index d94ff987..ba76e715 100644
--- a/playbooks/roles/steady_state/tasks/main.yaml
+++ b/playbooks/roles/steady_state/tasks/main.yaml
@@ -11,6 +11,16 @@
   failed_when: false
   tags: vars
 
+- include_role:
+    name: create_data_partition
+  tags: [ 'data_partition' ]
+
+- name: Ensure fio is installed
+  become: yes
+  package:
+    name: fio
+    state: present
+
 - name: Ensure steady state directory exists
   become: yes
   become_method: sudo
@@ -31,36 +41,315 @@
     - ss_bw.ini
   tags: ['setup']
 
-- name: Run prefill helper and execute fio commands
+- name: Resolve real path of target block device
+  become: yes
+  become_method: sudo
+  ansible.builtin.command: realpath {{ ssd_steady_state_device }}
+  register: real_device_path
+  when: kdevops_run_ssd_steady_state | bool
+  tags: ['prefill']
+
+- name: Inform user of target block device for steady_state_device
+  become: yes
+  become_method: sudo
+  ansible.builtin.debug:
+    msg:
+      - "Target device is {{ ssd_steady_state_device }} real device is: {{ real_device_path.stdout }}"
+  when: kdevops_run_ssd_steady_state | bool
+  tags: ['prefill']
+
+- name: Check if target is a block device
+  become: yes
+  become_method: sudo
+  ansible.builtin.stat:
+    path: "{{ real_device_path.stdout }}"
+  register: device_stat
+  when: kdevops_run_ssd_steady_state | bool
+  tags: ['prefill']
+
+- name: Fail if target is not a block device
+  become: yes
+  become_method: sudo
+  ansible.builtin.fail:
+    msg: "Target {{ ssd_steady_state_device }} must be a block device"
+  when:
+    - kdevops_run_ssd_steady_state | bool
+    - device_stat.stat.exists | default(false)
+    - not device_stat.stat.isblk | default(false)
+  tags: ['prefill']
+
+- name: Get device basename from resolved symlink
+  become: yes
+  become_method: sudo
+  set_fact:
+    device_basename: "{{ real_device_path.stdout | basename }}"
+  when: kdevops_run_ssd_steady_state | bool
+  tags: ['prefill']
+
+- name: Inform user of short basename of target device
+  become: yes
+  become_method: sudo
+  ansible.builtin.debug:
+    msg:
+      - "Target device base name is {{ device_basename }}"
+  when: kdevops_run_ssd_steady_state | bool
+  tags: ['prefill']
+
+- name: Read block device properties from sysfs
+  become: yes
+  become_method: sudo
+  ansible.builtin.slurp:
+    src: "{{ item.path }}"
+  loop:
+    - { path: "/sys/block/{{ device_basename }}/queue/physical_block_size", var: "device_pbs_raw" }
+    - { path: "/sys/block/{{ device_basename }}/queue/optimal_io_size", var: "device_optimal_io_raw" }
+  register: device_sysfs_read
+  when: kdevops_run_ssd_steady_state | bool
+  tags: ['prefill']
+
+- name: Get device capacity
+  become: yes
+  become_method: sudo
+  ansible.builtin.command: /usr/sbin/blockdev --getsize64 {{ ssd_steady_state_device }}
+  register: device_capacity_raw
+  changed_when: false
+  when: kdevops_run_ssd_steady_state|bool
+  tags: ['prefill']
+
+- name: Set block device properties as facts
+  become: yes
+  become_method: sudo
+  set_fact:
+    device_physical_block_size: >-
+      {{
+        (device_sysfs_read.results |
+         selectattr('item.var', 'equalto', 'device_pbs_raw') |
+         first).content | b64decode | trim | int
+      }}
+    device_optimal_io_size: >-
+      {{
+        (device_sysfs_read.results |
+         selectattr('item.var', 'equalto', 'device_optimal_io_raw') |
+         first).content | b64decode | trim | int
+      }}
+    device_capacity: "{{ device_capacity_raw.stdout | int }}"
+  when: kdevops_run_ssd_steady_state | bool
+  tags: ['prefill']
+
+- name: Calculate effective configuration values
+  become: yes
+  become_method: sudo
+  ansible.builtin.set_fact:
+    effective_physical_bs: "{{ ssd_steady_state_prefill_physical_bs | default(device_physical_block_size, true) | int }}"
+    effective_max_size: "{{ ssd_steady_state_prefill_max_size | default(device_capacity, true) | int }}"
+    effective_blocksize: >-
+      {% if ssd_steady_state_prefill_blocksize %}
+        {{ ssd_steady_state_prefill_blocksize | human_to_bytes | trim | int }}
+      {% elif device_optimal_io_size != 0 %}
+        {{ device_optimal_io_size | trim | int }}
+      {% else %}
+        {{ device_physical_block_size | trim | int }}
+      {% endif %}
+    effective_jobs: "{{ ssd_steady_state_numjobs | default(ansible_processor_vcpus, true) | int }}"
+  when: kdevops_run_ssd_steady_state|bool
+  tags: ['prefill']
+
+- name: Fix syntax for effective_blocksize
+  become: yes
+  become_method: sudo
+  ansible.builtin.set_fact:
+    effective_blocksize: "{{ effective_blocksize | trim | int }}"
+
+- name: Debug type of device_optimal_io_size
+  debug:
+    msg: "device_optimal_io_size={{ device_optimal_io_size }} ({{ device_optimal_io_size | type_debug }})"
+
+- name: Validate capacity alignment to physical block size
+  become: yes
+  become_method: sudo
+  ansible.builtin.fail:
+    msg: "Device capacity {{ effective_max_size }} not aligned to physical block size {{ effective_physical_bs }}"
+  when:
+    - kdevops_run_ssd_steady_state|bool
+    - (effective_max_size | int) % (effective_physical_bs | int) != 0
+  tags: ['prefill']
+
+- name: Validate blocksize alignment to physical block size
+  become: yes
+  become_method: sudo
+  ansible.builtin.fail:
+    msg: "Block size {{ effective_blocksize }} not aligned to physical block size {{ effective_physical_bs }}"
+  when:
+    - kdevops_run_ssd_steady_state|bool
+    - (effective_blocksize | int) % (effective_physical_bs | int) != 0
+  tags: ['prefill']
+
+- name: Calculate prefill job distribution
   become: yes
   become_method: sudo
-  shell: |
-    {{ topdir_path }}/scripts/workflows/precondition/prefill-fio-jobs.sh \
-      --target {{ steady_state_device }} \
-      --blocksize {{ precondition_blocksize }} \
-      --jobs {{ precondition_numjobs }} \
-      --verbose > {{ steady_state_data }}/prefill.cmd
-    grep '^fio' {{ steady_state_data }}/prefill.cmd | bash
-  args:
-    executable: /bin/bash
+  ansible.builtin.set_fact:
+    aligned_pbs_blocks: "{{ (effective_max_size | int) // (effective_physical_bs | int) }}"
+    aligned_bs_per_block: "{{ (effective_blocksize | int) // (effective_physical_bs | int) }}"
   when: kdevops_run_ssd_steady_state|bool
   tags: ['prefill']
 
+- name: Calculate aligned blocks and job distribution
+  become: yes
+  become_method: sudo
+  ansible.builtin.set_fact:
+    aligned_blocks: "{{ aligned_pbs_blocks | int // aligned_bs_per_block | int }}"
+  when: kdevops_run_ssd_steady_state|bool
+  tags: ['prefill']
+
+- name: Calculate job alignment
+  become: yes
+  become_method: sudo
+  ansible.builtin.set_fact:
+    blocks_per_job_remainder: "{{ aligned_blocks | int % effective_jobs | int }}"
+  when: kdevops_run_ssd_steady_state|bool
+  tags: ['prefill']
+
+- name: Set aligned job distribution
+  become: yes
+  become_method: sudo
+  ansible.builtin.set_fact:
+    aligned_jobs: "{{ effective_jobs if blocks_per_job_remainder == 0 else (effective_jobs | int - 1) }}"
+    unaligned_jobs: "{{ 0 if blocks_per_job_remainder == 0 else 1 }}"
+  when: kdevops_run_ssd_steady_state|bool
+  tags: ['prefill']
+
+- name: Set aligned blocks per job
+  become: yes
+  become_method: sudo
+  ansible.builtin.set_fact:
+    aligned_blocks_per_job: "{{ aligned_blocks | int // aligned_jobs | int if blocks_per_job_remainder != 0 else aligned_blocks | int // effective_jobs | int }}"
+  when: kdevops_run_ssd_steady_state|bool
+  tags: ['prefill']
+
+- name: Calculate byte distribution for jobs
+  become: yes
+  become_method: sudo
+  ansible.builtin.set_fact:
+    aligned_block_bytes: "{{ aligned_blocks_per_job | int * effective_blocksize | int }}"
+    total_aligned_block_bytes: "{{ aligned_blocks_per_job | int * effective_blocksize | int * aligned_jobs | int }}"
+  when: kdevops_run_ssd_steady_state|bool
+  tags: ['prefill']
+
+- name: Calculate remainder bytes for unaligned job
+  become: yes
+  become_method: sudo
+  ansible.builtin.set_fact:
+    remainder_block_bytes: "{{ effective_max_size | int - total_aligned_block_bytes | int }}"
+  when: kdevops_run_ssd_steady_state|bool
+  tags: ['prefill']
+
+- name: Validate job byte alignment to blocksize
+  become: yes
+  become_method: sudo
+  ansible.builtin.fail:
+    msg: "Aligned job bytes {{ aligned_block_bytes }} not aligned to blocksize {{ effective_blocksize }}"
+  when:
+    - kdevops_run_ssd_steady_state|bool
+    - (aligned_block_bytes | int) % (effective_blocksize | int) != 0
+  tags: ['prefill']
+
+- name: Validate job byte alignment to physical block size
+  become: yes
+  become_method: sudo
+  ansible.builtin.fail:
+    msg: "Aligned job bytes {{ aligned_block_bytes }} not aligned to physical block size {{ effective_physical_bs }}"
+  when:
+    - kdevops_run_ssd_steady_state|bool
+    - (aligned_block_bytes | int) % (effective_physical_bs | int) != 0
+  tags: ['prefill']
+
+- name: Validate remainder alignment to physical block size
+  become: yes
+  become_method: sudo
+  ansible.builtin.fail:
+    msg: "Remainder bytes {{ remainder_block_bytes }} not aligned to physical block size {{ effective_physical_bs }}"
+  when:
+    - kdevops_run_ssd_steady_state|bool
+    - remainder_block_bytes | int != 0
+    - (remainder_block_bytes | int) % (effective_physical_bs | int) != 0
+  tags: ['prefill']
+
+- name: Run main prefill fio job for aligned data
+  become: yes
+  become_method: sudo
+  ansible.builtin.command: >-
+    fio --filename={{ ssd_steady_state_device }}
+    --direct={{ ssd_steady_state_prefill_direct }}
+    --name=drive-pre-fill-aligned-to-bs
+    --readwrite=write
+    --ioengine={{ ssd_steady_state_prefill_ioengine }}
+    --group_reporting=1
+    --alloc-size={{ ssd_steady_state_prefill_alloc_size }}
+    --numjobs={{ aligned_jobs }}
+    --offset_increment={{ aligned_block_bytes }}
+    --size={{ aligned_block_bytes }}
+    --blocksize={{ effective_blocksize }}
+    {{ ssd_steady_state_prefill_extra_args }}
+  when:
+    - kdevops_run_ssd_steady_state|bool
+    - aligned_jobs | int > 0
+  tags: ['prefill']
+
+- name: Run remainder prefill fio job for unaligned data
+  become: yes
+  become_method: sudo
+  ansible.builtin.command: >-
+    fio --filename={{ ssd_steady_state_device }}
+    --direct={{ ssd_steady_state_prefill_direct }}
+    --name=drive-pre-fill-aligned-to-pbs
+    --readwrite=write
+    --ioengine={{ ssd_steady_state_prefill_ioengine }}
+    --group_reporting=1
+    --offset={{ total_aligned_block_bytes }}
+    --size={{ remainder_block_bytes }}
+    --blocksize={{ effective_physical_bs }}
+    {{ ssd_steady_state_prefill_extra_args }}
+  when:
+    - kdevops_run_ssd_steady_state|bool
+    - unaligned_jobs | int > 0
+    - remainder_block_bytes | int > 0
+  tags: ['prefill']
+
 - name: Run fio steady state for iops
   become: yes
   become_method: sudo
-  command: fio {{ steady_state_data }}/ss_iops.ini
+  ansible.builtin.command: >-
+    fio {{ steady_state_data }}/ss_iops.ini
+    --blocksize={{ effective_blocksize }}
+    --warnings-fatal
+    --output-format=json+
+    --output={{ steady_state_data }}/ss_iops.json
   when: kdevops_run_ssd_steady_state|bool
   tags: ['steady_state']
 
 - name: Run fio steady state for bw
   become: yes
   become_method: sudo
-  command: fio {{ steady_state_data }}/ss_bw.ini
+  ansible.builtin.command: >-
+    fio {{ steady_state_data }}/ss_bw.ini
+    --blocksize={{ effective_blocksize }}
+    --warnings-fatal
+    --output-format=json+
+    --output={{ steady_state_data }}/ss_bw.json
   when: kdevops_run_ssd_steady_state|bool
   tags: ['steady_state']
 
+- name: Create local results directory
+  delegate_to: localhost
+  ansible.builtin.file:
+    path: "{{ topdir_path }}/workflows/steady_state/results/{{ inventory_hostname }}/"
+    state: directory
+    mode: '0755'
+  run_once: false
+  tags: ['results']
+
 - name: Copy steady state results to controller
+  delegate_to: localhost
   ansible.posix.synchronize:
     src: "{{ steady_state_data }}/"
     dest: "{{ topdir_path }}/workflows/steady_state/results/{{ inventory_hostname }}/"
@@ -68,6 +357,5 @@
     recursive: true
     rsync_opts:
       - "--ignore-existing"
-  delegate_to: localhost
   when: kdevops_run_ssd_steady_state|bool
   tags: ['results']
diff --git a/playbooks/roles/steady_state/templates/ss_bw.ini.j2 b/playbooks/roles/steady_state/templates/ss_bw.ini.j2
index e54eb968..84dc1346 100644
--- a/playbooks/roles/steady_state/templates/ss_bw.ini.j2
+++ b/playbooks/roles/steady_state/templates/ss_bw.ini.j2
@@ -9,23 +9,27 @@ buffered=0
 norandommap
 refill_buffers
 
-bs={{ precondition_blocksize }}
-iodepth={{ precondition_iodepth }}
-numjobs={{ precondition_numjobs }}
-filename={{ steady_state_device }}
+# We actually compute this at run time and override it on the command line.
+# We keep the value you had only if you set SSD_STEADY_STATE_PREFILL_BLOCKSIZE
+# to something not empty, your value: {{ ssd_steady_state_prefill_blocksize }}
+# bs={{ ssd_steady_state_prefill_blocksize }}
+
+iodepth={{ ssd_steady_state_iodepth }}
+numjobs={{ ssd_steady_state_numjobs }}
+filename={{ ssd_steady_state_device }}
 
 exitall_on_error
 continue_on_error=none
 
 rw=randwrite
 
-runtime={{ steady_state_runtime }}
+runtime={{ ssd_steady_state_runtime }}
 [steady-state-mean-bw]
-ss=bw:{{ steady_state_bw_mean_limit }}
-ss_dur={{ steady_state_bw_mean_dur }}
+ss=bw:{{ ssd_steady_state_bw_mean_limit }}
+ss_dur={{ ssd_steady_state_bw_mean_dur }}
 
 [steady-state-slope-bw]
 new_group
 group_reporting
-ss=bw_slope:{{ steady_state_bw_slope }}
-ss_dur={{ steady_state_bw_slope_dur }}
+ss=bw_slope:{{ ssd_steady_state_bw_slope }}
+ss_dur={{ ssd_steady_state_bw_slope_dur }}
diff --git a/playbooks/roles/steady_state/templates/ss_iops.ini.j2 b/playbooks/roles/steady_state/templates/ss_iops.ini.j2
index 8410fefa..b8407fa2 100644
--- a/playbooks/roles/steady_state/templates/ss_iops.ini.j2
+++ b/playbooks/roles/steady_state/templates/ss_iops.ini.j2
@@ -9,23 +9,27 @@ buffered=0
 norandommap
 refill_buffers
 
-bs={{ precondition_blocksize }}
-iodepth={{ precondition_iodepth }}
-numjobs={{ precondition_numjobs }}
-filename={{ steady_state_device }}
+# We actually compute this at run time and override it on the command line.
+# We keep the value you had only if you set SSD_STEADY_STATE_PREFILL_BLOCKSIZE
+# to something not empty, your value: {{ ssd_steady_state_prefill_blocksize }}
+# bs={{ ssd_steady_state_prefill_blocksize }}
+
+iodepth={{ ssd_steady_state_iodepth }}
+numjobs={{ ssd_steady_state_numjobs }}
+filename={{ ssd_steady_state_device }}
 
 exitall_on_error
 continue_on_error=none
 
 rw=randwrite
 
-runtime={{ steady_state_runtime }}
+runtime={{ ssd_steady_state_runtime }}
 [steady-state-mean-iops]
-ss=iops:{{ steady_state_iops_mean_limit }}
-ss_dur={{ steady_state_iops_mean_dur }}
+ss=iops:{{ ssd_steady_state_iops_mean_limit }}
+ss_dur={{ ssd_steady_state_iops_mean_dur }}
 
 [steady-state-slope-iops]
 new_group
 group_reporting
-ss=iops_slope:{{ steady_state_iops_slope }}
-ss_dur={{ steady_state_iops_slope_dur }}
+ss=iops_slope:{{ ssd_steady_state_iops_slope }}
+ss_dur={{ ssd_steady_state_iops_slope_dur }}
diff --git a/scripts/workflows/precondition/prefill-fio-jobs.sh b/scripts/workflows/precondition/prefill-fio-jobs.sh
deleted file mode 100755
index 49abb6e7..00000000
--- a/scripts/workflows/precondition/prefill-fio-jobs.sh
+++ /dev/null
@@ -1,390 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: copyleft-next-0.3.1
-
-# Where we stuff the extra arguments we don't understand to fio
-declare -a EXTRA_FIO_ARGS
-
-VERBOSE=""
-
-BLOCKSIZE=""
-PHYSICAL_BLOCK_SIZE=""
-MAX_SIZE=""
-TARGET=""
-JOBS=""
-
-ALIGNED_PBS_BLOCKS=""
-ALIGNED_BS_PER_BLOCK=""
-
-ALIGNED_BLOCKS=""
-ALIGNED_BLOCKS_JOBS=""
-UNALIGNED_BLOCKS_JOBS=""
-
-ALIGNED_JOBS=""
-UNALIGNED_JOBS=""
-
-ALIGNED_BLOCK_BYTES=""
-TOTAL_ALIGNED_BLOCK_BYTES=""
-REMAINDER_BLOCK_BYTES=""
-
-# For the default IO we use we want the optimal IO, it should already be
-# aligned to the logical block size. We should review what userspace has
-# used since the old days first though.
-#
-# We have stat-size.h in gnulib:
-#
-# https://github.com/coreutils/gnulib/blob/master/lib/stat-size.h
-#
-# In userspace gnulib is used by coreutils stat binary. In gnulib stat-size.h
-# first there is DEV_BSIZE with BSIZE and BBSIZE inheritence from unix but this
-# is ancient and has a default of 4096 bytes if neither of these are defined.
-# Then we have STP_BLKSIZE(stat) which gets us the st_blksize with a current
-# arbitrary limitation of SIZE_MAX / 8 + 1 (512 MiB 64-bit). That is with
-# stat --format="%o". This suffices for us today, even if you use a large block
-# size filesystem, you get what you expect, the filesystem block size instead of
-# the logical block size. So stat() and using st_blksize suffices for files.
-#
-# The same stat call on block devices will get the logical block size and so we
-# must cat the actual queue's physical block size to ensure to avoid any
-# read-modify-write implications, but we can do better for block devices if
-# want to use a default target block size: the real optiomal IO. Even though
-# userspace stat says that $(stat --format="%o" gets the optimal IO size, this
-# is not that accurate. Since userspace has adopted st_blksize for optimal IO
-# though we should evaluate with the community if we should have parity and also
-# use the physical block size for block devices when the physical block device >
-# logical block device so to bring parity on strategy for both block device and
-# files.  An example existing set of devices would be devices which are exposed
-# in the cloud with a large atomic and smaller logical block size. On NVMe it
-# would be those NVMe drives with a larger atomic as well.
-#
-# We also have the new direct IO STATX_DIOALIGN but is useless as it returns 512
-# bytes even on files when the block size on the filesystem is larger on XFS.
-# It is however is useful to tell if a block device or filesystem supports
-# direct IO, but only for filesystems which support it. So we should fix it.
-#
-# The physical block size is always larger and a power of 2 than the logical
-# block size and so always aligned to the logical block size. It also ensures
-# we avoid read-modify-writes, and so we refer to it for the alignment
-# constraint we want to consider.
-
-usage()
-{
-	echo "$0 - wrapper for fio to prefill"
-	echo "--help           - Shows this menu"
-	echo "--target         - Required either truncated file or target block device file such as /dev/nvme0n1"
-	echo "--blocksize      - Use this as the max blocksize in fio"
-	echo "--max-size       - Use this as the either the drive's max capacity or file size instead"
-	echo "--physical-bs    - Use this as the drive's physical block size instead"
-	echo "--verbose        - Be verbose when debugging"
-	echo "--jobs           - How many threads to consider"
-	echo "--help           - Print help menu"
-	echo ""
-	echo "Examples:"
-	echo ""
-	echo "Uses 2 MiB to pre-fill an NVMe drive:"
-	echo "$0 --target /dev/nvme0n1 --blocksize 2097152"
-	echo ""
-	echo "The physical block size represents the size to align writes to"
-	echo "so to avoid read-modify-writes on flash storage."
-	echo ""
-	echo "If a block device is given then by default:"
-	echo "  - we use the drive's queue/optimal_io_size for blocksize if"
-	echo "    it is not zero, otherwize we use the physical block size"
-	echo "  - we use the drive's capacity as the max size."
-	echo "  - we use the drive's physical block size for its physical block size"
-	echo ""
-	echo "If a regular file is given then by default:"
-	echo "  - we use the file's returned st_blksize from stat() for both the"
-	echo "    block size and physical block as gathered with:"
-	echo "    stat printf=\"%o\" file"
-	echo "  - we compute the max size as the file's size."
-	echo ""
-	echo "Note that all parameters which we do not understand we'll just"
-	echo "pass long to fio so it can use them."
-}
-
-copy_to_fio_extra_args()
-{
-	FIO_EXTRA_ARGS+=" $1"
-}
-
-debug()
-{
-	echo "target: $TARGET"
-	echo "pbs: $PHYSICAL_BLOCK_SIZE"
-	echo "size: $MAX_SIZE"
-	echo "bs: $BLOCKSIZE"
-	echo "jobs: $JOBS"
-	echo ""
-	echo "aligned-pbs-blocks: $ALIGNED_PBS_BLOCKS"
-	echo "aligned-bs-per-block: $ALIGNED_BS_PER_BLOCK"
-	echo ""
-	echo "aligned-blocks: $ALIGNED_BLOCKS"
-	echo "aligned-block-jobs: $ALIGNED_BLOCKS_JOBS"
-	echo "unaligned-block-jobs: $UNALIGNED_BLOCKS_JOBS"
-	echo ""
-	echo "aligned-jobs: $ALIGNED_JOBS"
-	echo "unaligned-jobs: $UNALIGNED_JOBS"
-	echo ""
-	echo "aligned-bytes-per-job: $ALIGNED_BLOCK_BYTES"
-	echo "total-aligned-bytes: $TOTAL_ALIGNED_BLOCK_BYTES"
-	echo "remainder--block-bytes: $REMAINDER_BLOCK_BYTES"
-	if [[ "$((TOTAL_ALIGNED_BLOCK_BYTES + REMAINDER_BLOCK_BYTES))" != "$MAX_SIZE" ]]; then
-		echo "total-aligned-bytes + remainder-block-bytes != size ------> BUG!"
-	else
-		echo "total-aligned-bytes + remainder-block-bytes == size ------> OK!"
-	fi
-	echo ""
-	echo ""
-}
-
-parse_args()
-{
-	while [[ ${#1} -gt 0 ]]; do
-		key="$1"
-
-		case $key in
-		--target)
-			TARGET="$2"
-			shift
-			shift
-			;;
-		--blocksize)
-			BLOCKSIZE="$2"
-			shift
-			shift
-			;;
-		--max-size)
-			MAX_SIZE="$2"
-			shift
-			shift
-			;;
-		--physical-bs)
-			PHYSICAL_BLOCK_SIZE="$2"
-			shift
-			shift
-			;;
-		--verbose)
-			VERBOSE="true"
-			shift
-			;;
-		--jobs)
-			JOBS="$2"
-			shift
-			;;
-		--help)
-			usage
-			exit 0
-			;;
-		*)
-			copy_to_fio_extra_args $key
-			shift
-			;;
-		esac
-	done
-}
-
-parse_args $@
-
-if [[ "$TARGET" == "" ]]; then
-	echo "You need to specify --target"
-	echo ""
-	usage
-	exit 1
-fi
-
-if [[ "$JOBS" == "" ]]; then
-	JOBS="$(nproc)"
-fi
-
-if [[ -b $TARGET ]] then
-	if [[ $(id -u) != "0" ]]; then
-		echo "Must be root to work on block devices"
-		exit
-	fi
-
-	DEVNAME="$(basename $TARGET)"
-	PBS_FILE="/sys/block/$DEVNAME/queue/physical_block_size"
-	MAX_IO_FILE="/sys/block/$DEVNAME/queue/optimal_io_size"
-
-	if [[ ! -f $PBS_FILE ]]; then
-		echo "Not found: $PBS_FILE"
-		exit 1
-	fi
-
-	if [[ ! -f $MAX_IO_FILE ]]; then
-		echo "Not found: $MAX_IO_FILE"
-		exit 1
-	fi
-
-	BDEV_PHYSICAL_BLOCK_SIZE="$(cat $PBS_FILE)"
-	BDEV_MAX_IO="$(cat $MAX_IO_FILE)"
-	BDEV_MAX_SIZE="$(/usr/sbin/blockdev --getsize64 $TARGET)"
-
-	if [[ "$PHYSICAL_BLOCK_SIZE" == "" ]]; then
-		PHYSICAL_BLOCK_SIZE=$BDEV_PHYSICAL_BLOCK_SIZE
-	fi
-
-	if [[ "$BLOCKSIZE" == "" ]]; then
-		if [[ "$BDEV_MAX_IO" != "0" ]]; then
-			BLOCKSIZE=$BDEV_MAX_IO
-		else
-			BLOCKSIZE=$PHYSICAL_BLOCK_SIZE
-		fi
-	fi
-
-	if [[ "$MAX_SIZE" == "" ]]; then
-		MAX_SIZE=$BDEV_MAX_SIZE
-	fi
-elif [[ -f $TARGET ]] then
-	echo "Implement me"
-	exit 1
-else
-	echo "Target must be a block device or file"
-	echo ""
-	usage
-	exit 1
-fi
-
-# These get natural numbers on purpose, we round down
-ALIGNED_PBS_BLOCKS="$((MAX_SIZE / PHYSICAL_BLOCK_SIZE))"
-UNALIGNED_PBS_BLOCKS="$((MAX_SIZE % PHYSICAL_BLOCK_SIZE))"
-if [[ "$UNALIGNED_PBS_BLOCKS" != "0" ]]; then
-	echo "Odd, capacity not aligned to physical block size"
-	echo ""
-	echo "$MAX_SIZE % $PHYSICAL_BLOCK_SIZE = $UNALIGNED_PBS_BLOCKS"
-	echo ""
-	echo "It should be:"
-	echo "$MAX_SIZE % $PHYSICAL_BLOCK_SIZE = 0"
-	exit 1
-fi
-
-ALIGNED_BS_PER_BLOCK="$((BLOCKSIZE / PHYSICAL_BLOCK_SIZE))"
-UNALIGNED_BS_PER_BLOCK="$((BLOCKSIZE % PHYSICAL_BLOCK_SIZE))"
-if [[ "$UNALIGNED_BS_PER_BLOCK" != "0" ]]; then
-	echo "Odd, block size not aligned to physical block size. We have:"
-	echo ""
-	echo "$BLOCKSIZE % $PHYSICAL_BLOCK_SIZE = $UNALIGNED_BS_PER_BLOCK"
-	echo ""
-	echo "It should be:"
-	echo "$BLOCKSIZE % $PHYSICAL_BLOCK_SIZE = 0"
-	exit 1
-fi
-
-# These are the number of blocks at blocksize which are aligned to the
-# physical block size. We need next to divid this by the number of jobs
-# we have been asked to use.
-ALIGNED_BLOCKS="$((ALIGNED_PBS_BLOCKS / ALIGNED_BS_PER_BLOCK))"
-
-# We expect capacity / jobs to not always be aligned to the target block size
-# we want to operate, and so we must work with a different block size which
-# does align to the block device or file for that task. Only one thread is used
-# here by default as it should be a small amount of data.
-CHECK_UNALIGNED_BLOCKS_JOBS_MAX="$((ALIGNED_BLOCKS % JOBS))"
-if [[ "$CHECK_UNALIGNED_BLOCKS_JOBS_MAX" == 0 ]]; then
-	ALIGNED_BLOCKS_JOBS="$CHECK_UNALIGNED_BLOCKS_JOBS_MAX"
-	UNALIGNED_BLOCKS_JOBS=""
-	ALIGNED_JOBS=$JOBS
-	UNALIGNED_JOBS="0"
-else
-	UNALIGNED_JOBS="1"
-	ALIGNED_JOBS="$((JOBS - UNALIGNED_JOBS))"
-	ALIGNED_BLOCKS_JOBS="$((ALIGNED_BLOCKS / ALIGNED_JOBS))"
-	UNALIGNED_BLOCKS_JOBS="$((ALIGNED_BLOCKS % ALIGNED_JOBS))"
-fi
-
-# This is the amount of bytes each thread which is working on aligned blocksize.
-# The $ALIGNED_JOBS can be used for this.
-ALIGNED_BLOCK_BYTES="$((ALIGNED_BLOCKS_JOBS * BLOCKSIZE))"
-
-TOTAL_ALIGNED_BLOCK_BYTES="$((ALIGNED_BLOCK_BYTES * $ALIGNED_JOBS))"
-
-# This should be dealt with on a separate fio task with $UNALIGNED_JOBS jobs.
-# It may be that we don't need it, if the stars aligned too.
-REMAINDER_BLOCK_BYTES="$((MAX_SIZE - $TOTAL_ALIGNED_BLOCK_BYTES))"
-
-CHECK_UNALIGNED_BLOCK_BYTES="$((ALIGNED_BLOCK_BYTES % BLOCKSIZE))"
-if [[ "$CHECK_UNALIGNED_BLOCK_BYTES" != "0" ]]; then
-	echo "The entire job each thread should work on should be aligned to the target blocksize"
-	echo "We got:"
-	echo "$ALIGNED_BLOCK_BYTES % $BLOCKSIZE = $CHECK_UNALIGNED_BLOCK_BYTES"
-	exit 1
-fi
-
-CHECK_UNALIGNED_BLOCK_BYTES_PBS="$((ALIGNED_BLOCK_BYTES % PHYSICAL_BLOCK_SIZE))"
-if [[ "$CHECK_UNALIGNED_BLOCK_BYTES_PBS" != "0" ]]; then
-	echo "The entire job each thread should work on should be aligned to the target physical block size"
-	echo "We got:"
-	echo "$ALIGNED_BLOCK_BYTES % $PHYSICAL_BLOCK_SIZE = $CHECK_UNALIGNED_BLOCK_BYTES_PBS"
-	exit 1
-fi
-
-if [[ "$REMAINDER_BLOCK_BYTES" != "0" ]]; then
-	# We only care about aligning to the physical block size, as the
-	# data remaining could be smaller than the desired block size.
-	CHECK_UNALIGNED_REMAINDER_BLOCK_BYTES_PBS="$((REMAINDER_BLOCK_BYTES % PHYSICAL_BLOCK_SIZE))"
-	if [[ "$CHECK_UNALIGNED_REMAINDER_BLOCK_BYTES_PBS" != "0" ]]; then
-		echo "The entire job each thread should work on should be aligned to the target physical block size"
-		echo "We got:"
-		echo "$REMAINDER_BLOCK_BYTES % $PHYSICAL_BLOCK_SIZE = $CHECK_UNALIGNED_REMAINDER_BLOCK_BYTES_PBS"
-		exit 1
-	fi
-fi
-
-if [[ "$VERBOSE" == "true" ]]; then
-	debug
-fi
-
-if [[ "$UNALIGNED_JOBS" == "0" && "$REMAINDER_BLOCK_BYTES" != "0" ]]; then
-	echo "If we have no unaligned jobs to run the remainder block bytes should be 0 too."
-	echo
-	debug
-	exit 1
-fi
-
-if [[ "$((TOTAL_ALIGNED_BLOCK_BYTES + REMAINDER_BLOCK_BYTES))" != "$MAX_SIZE" ]]; then
-	echo "Unexpected computation, this $0 is buggy..."
-	echo
-	debug
-	exit 1
-fi
-
-# This would be the manual math we'd do with the fio output to verify
-# correctness, so just do that before giving the output.
-FIO_SUM_TOTAL_BYTES="$(((ALIGNED_BLOCK_BYTES * ALIGNED_JOBS) + REMAINDER_BLOCK_BYTES))"
-if [[ "$FIO_SUM_TOTAL_BYTES" != "$MAX_SIZE" ]]; then
-	echo "Unexpected final result $0 is buggy..."
-	echo "We expected ( $ALIGNED_BLOCK_BYTES * $ALIGNED_JOBS) + $REMAINDER_BLOCK_BYTES == $MAX_SIZE"
-	echo "We got ( $ALIGNED_BLOCK_BYTES * $ALIGNED_JOBS) + $REMAINDER_BLOCK_BYTES == $FIO_SUM_TOTAL_BYTES"
-	echo
-	debug
-	exit 1
-fi
-
-if [[ "$VERBOSE" == "true" ]]; then
-	echo "Fio command for all aligned data at blocksize $BLOCKSIZE using $ALIGNED_JOBS jobs":
-	echo ""
-fi
-
-# XXX: use only memory  for --alloc-size based on the amount of memory actually
-# needed per job. This default of 1 GiB should get us up to 128 threads for now.
-FIO_CMD="fio --filename=$TARGET -direct=1 -name drive-pre-fill-aligned-to-bs "
-FIO_CMD="$FIO_CMD --readwrite=write --ioengine=io_uring --group_reporting=1 "
-FIO_CMD="$FIO_CMD --alloc-size=1048576 --numjobs=$ALIGNED_JOBS"
-FIO_CMD="$FIO_CMD --offset_increment=$ALIGNED_BLOCK_BYTES --size=$ALIGNED_BLOCK_BYTES "
-FIO_CMD="$FIO_CMD --blocksize=$BLOCKSIZE "
-
-echo $FIO_CMD $EXTRA_FIO_ARGS
-
-if [[ "$VERBOSE" == "true" ]]; then
-	echo ""
-	echo "Fio command for all remaining data which needs to be issued at $PHYSICAL_BLOCK_SIZE":
-	echo ""
-fi
-
-if [[ "$UNALIGNED_JOBS" != "0" ]]; then
-	FIO_CMD="fio --filename=$TARGET -direct=1 -name drive-pre-fill-aligned-to-pbs "
-	FIO_CMD="$FIO_CMD --readwrite=write --ioengine=io_uring --group_reporting=1 "
-	FIO_CMD="$FIO_CMD --offset=$TOTAL_ALIGNED_BLOCK_BYTES --size=$REMAINDER_BLOCK_BYTES"
-	FIO_CMD="$FIO_CMD --blocksize=$PHYSICAL_BLOCK_SIZE"
-	echo $FIO_CMD $EXTRA_FIO_ARGS
-fi
diff --git a/workflows/steady_state/Kconfig b/workflows/steady_state/Kconfig
index 24810406..fd31ff0a 100644
--- a/workflows/steady_state/Kconfig
+++ b/workflows/steady_state/Kconfig
@@ -15,10 +15,14 @@ config SSD_STEADY_STATE_DEVICE
 config SSD_STEADY_STATE_PREFILL_BLOCKSIZE
     string "Prefill blocksize"
     output yaml
-    default "128k"
+    default ""
     help
       Block size used during the prefill step before steady
-      state verification.
+      state verification. If empty we will try to infer this based first on the
+      the device queue/optimal_io_size and if that is not available fallback
+      to the queue/physical_block_size which should always exist. The final
+      computed block size is kept at run time on the effective_blocksize
+      varible and computed on the task 'Calculate effective configuration values'
 
 config SSD_STEADY_STATE_IODEPTH
     string "Prefill iodepth"
@@ -103,3 +107,72 @@ config SSD_STEADY_STATE_BW_SLOPE_DUR
     default "2h"
     help
       Duration the throughput slope criterion must hold.
+
+menu "Prefill configuration options"
+
+config SSD_STEADY_STATE_PREFILL_VERBOSE
+    bool "Enable verbose prefill output"
+    output yaml
+    default n
+    help
+      Enable verbose debug output during the prefill operation.
+      This provides detailed information about alignment calculations
+      and fio command generation.
+
+config SSD_STEADY_STATE_PREFILL_MAX_SIZE
+    string "Maximum size for prefill operation"
+    output yaml
+    default ""
+    help
+      Override the automatic detection of device capacity.
+      If empty, the script will use the full capacity of block devices
+      or file size for regular files. Use suffixes like "1G", "100M", etc.
+
+config SSD_STEADY_STATE_PREFILL_PHYSICAL_BS
+    string "Physical block size override"
+    output yaml
+    default ""
+    help
+      Override the automatic detection of physical block size.
+      If empty, the script will read from /sys/block/*/queue/physical_block_size
+      for block devices. Must be a power of 2 and align with the blocksize.
+
+config SSD_STEADY_STATE_PREFILL_IOENGINE
+    string "FIO I/O engine for prefill"
+    output yaml
+    default "io_uring"
+    help
+      The FIO I/O engine to use for prefill operations.
+      Common options: io_uring, libaio, psync, sync.
+      io_uring provides the best performance on modern kernels.
+
+config SSD_STEADY_STATE_PREFILL_DIRECT
+    string "FIO direct I/O setting"
+    output yaml
+    default "1"
+    help
+      Enable or disable direct I/O for prefill operations.
+      1 = direct I/O (bypass page cache)
+      0 = buffered I/O (use page cache)
+      Direct I/O is recommended for accurate device testing.
+
+config SSD_STEADY_STATE_PREFILL_ALLOC_SIZE
+    string "FIO memory allocation size per job"
+    output yaml
+    default "1048576"
+    help
+      Memory allocation size per FIO job in bytes.
+      This controls how much memory each FIO job uses.
+      Default of 1MB should support up to 128 threads.
+      Increase if using more jobs or experiencing memory pressure.
+
+config SSD_STEADY_STATE_PREFILL_EXTRA_ARGS
+    string "Additional FIO arguments for prefill"
+    output yaml
+    default ""
+    help
+      Additional arguments to pass to FIO during prefill operations.
+      These will be appended to the generated FIO command line.
+      Example: "--gtod_reduce=1 --norandommap"
+
+endmenu
diff --git a/workflows/steady_state/Makefile b/workflows/steady_state/Makefile
index 6d395021..514bd474 100644
--- a/workflows/steady_state/Makefile
+++ b/workflows/steady_state/Makefile
@@ -1,13 +1,35 @@
-PHONY += steady-state steady-state-help-menu
+PHONY += steady-state
+PHONY += steady-state-files
+PHONY += steady-state-prefill
+PHONY += steady-state-run
+PHONY += steady-state-help-menu
 
 SSD_STEADY_STATE_DYNAMIC_RUNTIME_VARS := "kdevops_run_ssd_steady_state": True
 
+steady-state-files:
+	$(Q)ansible-playbook $(ANSIBLE_VERBOSE) \
+	-i $(KDEVOPS_HOSTFILE) playbooks/steady_state.yml \
+	--extra-vars=@./extra_vars.yaml --tags vars,setup $(LIMIT_HOSTS)
+
+steady-state-prefill:
+	$(Q)ansible-playbook $(ANSIBLE_VERBOSE) \
+	-i $(KDEVOPS_HOSTFILE) playbooks/steady_state.yml \
+	--extra-vars=@./extra_vars.yaml --tags vars,prefill $(LIMIT_HOSTS)
+
+steady-state-run:
+	$(Q)ansible-playbook $(ANSIBLE_VERBOSE) \
+	-i $(KDEVOPS_HOSTFILE) playbooks/steady_state.yml \
+	--extra-vars=@./extra_vars.yaml --tags vars,steady_state $(LIMIT_HOSTS)
+
 steady-state:
 	$(Q)ansible-playbook $(ANSIBLE_VERBOSE) \
 	-i $(KDEVOPS_HOSTFILE) playbooks/steady_state.yml \
 	--extra-vars '{ $(SSD_STEADY_STATE_DYNAMIC_RUNTIME_VARS) }' $(LIMIT_HOSTS)
 
 steady-state-help-menu:
+	@echo "steady-state-files                - Install steady state template files only"
+	@echo "steady-state-prefill              - Run prefill operations only"
+	@echo "steady-state-run                  - Run fio steady state tests only"
 	@echo "steady-state                      - Prefill and run fio steady state"
 	@echo ""
 
-- 
2.47.2


      parent reply	other threads:[~2025-07-18  4:06 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-07-18  4:05 [PATCH 0/4] steady-state: few fixes for steady state support Luis Chamberlain
2025-07-18  4:05 ` [PATCH 1/4] steady-state: use extra sparse drives when on libvirt Luis Chamberlain
2025-07-18  4:05 ` [PATCH 2/4] steady-state: fix identation on Makefile Luis Chamberlain
2025-07-18  4:05 ` [PATCH 3/4] steady-state: remove extra menu Luis Chamberlain
2025-07-18  4:05 ` Luis Chamberlain [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250718040557.2774976-5-mcgrof@kernel.org \
    --to=mcgrof@kernel.org \
    --cc=cel@kernel.org \
    --cc=da.gomez@kruces.com \
    --cc=kdevops@lists.linux.dev \
    --cc=slava@dubeyko.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox