From: Pierre Morel <pmorel@linux.ibm.com>
To: Nina Schoetterl-Glausch <nsg@linux.ibm.com>, linux-s390@vger.kernel.org
Cc: frankja@linux.ibm.com, thuth@redhat.com, kvm@vger.kernel.org,
imbrenda@linux.ibm.com, david@redhat.com, nrb@linux.ibm.com
Subject: Re: [kvm-unit-tests PATCH v6 2/2] s390x: topology: Checking Configuration Topology Information
Date: Wed, 15 Feb 2023 14:07:36 +0100 [thread overview]
Message-ID: <2ee40f92-bf9f-ad9b-77dc-a442ebb0440f@linux.ibm.com> (raw)
In-Reply-To: <fcb32e7dea2a94306a1ec773822ba3a24b50f371.camel@linux.ibm.com>
On 2/10/23 16:39, Nina Schoetterl-Glausch wrote:
> On Thu, 2023-02-02 at 10:28 +0100, Pierre Morel wrote:
>> STSI with function code 15 is used to store the CPU configuration
>> topology.
>>
>> We retrieve the maximum nested level with SCLP and use the
>> topology tree provided by the drawers, books, sockets, cores
>> arguments.
>>
>> We check :
>> - if the topology stored is coherent between the QEMU -smp
>> parameters and kernel parameters.
>> - the number of CPUs
>> - the maximum number of CPUs
>> - the number of containers of each levels for every STSI(15.1.x)
>> instruction allowed by the machine.
>>
>> Signed-off-by: Pierre Morel <pmorel@linux.ibm.com>
>> ---
>> lib/s390x/sclp.h | 3 +-
>> lib/s390x/stsi.h | 44 ++++++++
>> s390x/topology.c | 244 ++++++++++++++++++++++++++++++++++++++++++++
>> s390x/unittests.cfg | 1 +
>> 4 files changed, 291 insertions(+), 1 deletion(-)
>>
>> diff --git a/lib/s390x/sclp.h b/lib/s390x/sclp.h
>> index 853529b..6ecfb0a 100644
>> --- a/lib/s390x/sclp.h
>> +++ b/lib/s390x/sclp.h
>> @@ -150,7 +150,8 @@ typedef struct ReadInfo {
>> SCCBHeader h;
>> uint16_t rnmax;
>> uint8_t rnsize;
>> - uint8_t _reserved1[16 - 11]; /* 11-15 */
>> + uint8_t _reserved1[15 - 11]; /* 11-14 */
>> + uint8_t stsi_parm; /* 15-15 */
>> uint16_t entries_cpu; /* 16-17 */
>> uint16_t offset_cpu; /* 18-19 */
>> uint8_t _reserved2[24 - 20]; /* 20-23 */
>> diff --git a/lib/s390x/stsi.h b/lib/s390x/stsi.h
>> index bebc492..8dbbfc2 100644
>> --- a/lib/s390x/stsi.h
>> +++ b/lib/s390x/stsi.h
>> @@ -29,4 +29,48 @@ struct sysinfo_3_2_2 {
>> uint8_t ext_names[8][256];
>> };
>>
>> +struct topology_core {
>> + uint8_t nl;
>> + uint8_t reserved1[3];
>> + uint8_t reserved4:5;
>> + uint8_t d:1;
>> + uint8_t pp:2;
>> + uint8_t type;
>> + uint16_t origin;
>> + uint64_t mask;
>> +};
>> +
>> +struct topology_container {
>> + uint8_t nl;
>> + uint8_t reserved[6];
>> + uint8_t id;
>> +};
>> +
>> +union topology_entry {
>> + uint8_t nl;
>> + struct topology_core cpu;
>> + struct topology_container container;
>> +};
>> +
>> +#define CPU_TOPOLOGY_MAX_LEVEL 6
>> +struct sysinfo_15_1_x {
>> + uint8_t reserved0[2];
>> + uint16_t length;
>> + uint8_t mag[CPU_TOPOLOGY_MAX_LEVEL];
>> + uint8_t reserved0a;
>> + uint8_t mnest;
>> + uint8_t reserved0c[4];
>> + union topology_entry tle[0];
>
> I'd use [] since it's C99.
OK
>
>> +};
>> +
>> +static inline int cpus_in_tle_mask(uint64_t val)
>> +{
>> + int i, n;
>> +
>> + for (i = 0, n = 0; i < 64; i++, val >>= 1)
>> + if (val & 0x01)
>> + n++;
>> + return n;
>> +}
>> +
>> #endif /* _S390X_STSI_H_ */
>> diff --git a/s390x/topology.c b/s390x/topology.c
>> index 20f7ba2..f21c653 100644
>> --- a/s390x/topology.c
>> +++ b/s390x/topology.c
>> @@ -16,6 +16,18 @@
>> #include <smp.h>
>> #include <sclp.h>
>> #include <s390x/hardware.h>
>> +#include <s390x/stsi.h>
>> +
>> +static uint8_t pagebuf[PAGE_SIZE * 2] __attribute__((aligned(PAGE_SIZE * 2)));
>> +
>> +static int max_nested_lvl;
>> +static int number_of_cpus;
>> +static int max_cpus = 1;
>
> IMO you shouldn't initialize this, it's a bit confusing because it gets modified later.
> I think it's good to initialize globals where one would expect it, so in main, or
> at the very top of the individual test case.
OK
>
>> +
>> +/* Topology level as defined by architecture */
>> +static int arch_topo_lvl[CPU_TOPOLOGY_MAX_LEVEL];
>> +/* Topology nested level as reported in STSI */
>> +static int stsi_nested_lvl[CPU_TOPOLOGY_MAX_LEVEL];
>>
>> #define PTF_REQ_HORIZONTAL 0
>> #define PTF_REQ_VERTICAL 1
>> @@ -122,11 +134,241 @@ end:
>> report_prefix_pop();
>> }
>>
>> +/*
>> + * stsi_check_maxcpus
>> + * @info: Pointer to the stsi information
>> + *
>> + * The product of the numbers of containers per level
>> + * is the maximum number of CPU allowed by the machine.
>> + */
>> +static void stsi_check_maxcpus(struct sysinfo_15_1_x *info)
>> +{
>> + int n, i;
>> +
>> + report_prefix_push("maximum cpus");
>> +
>> + for (i = 0, n = 1; i < CPU_TOPOLOGY_MAX_LEVEL; i++) {
>> + report_info("Mag%d: %d", CPU_TOPOLOGY_MAX_LEVEL - i, info->mag[i]);
>> + n *= info->mag[i] ? info->mag[i] : 1;
>> + }
>> + report(n == max_cpus, "Maximum CPUs %d expected %d", n, max_cpus);
>> +
>> + report_prefix_pop();
>> +}
>> +
>> +/*
>> + * stsi_check_tle_coherency
>> + * @info: Pointer to the stsi information
>> + * @sel2: Topology level to check.
>> + *
>> + * We verify that we get the expected number of Topology List Entry
>> + * containers for a specific level.
>> + */
>> +static void stsi_check_tle_coherency(struct sysinfo_15_1_x *info, int sel2)
>> +{
>> + struct topology_container *tc, *end;
>> + struct topology_core *cpus;
>> + int n = 0;
>> + int i;
>> +
>> + report_prefix_push("TLE coherency");
>> +
>> + tc = &info->tle[0].container;
>> + end = (struct topology_container *)((unsigned long)info + info->length);
>> +
>> + for (i = 0; i < CPU_TOPOLOGY_MAX_LEVEL; i++)
>> + stsi_nested_lvl[i] = 0;
>> +
>> + while (tc < end) {
>> + if (tc->nl > 5) {
>> + report_abort("Unexpected TL Entry: tle->nl: %d", tc->nl);
>> + return;
>> + }
>> + if (tc->nl == 0) {
>> + cpus = (struct topology_core *)tc;
>> + n += cpus_in_tle_mask(cpus->mask);
>> + report_info("cpu type %02x d: %d pp: %d", cpus->type, cpus->d, cpus->pp);
>> + report_info("origin : %04x mask %016lx", cpus->origin, cpus->mask);
>> + }
>> +
>> + stsi_nested_lvl[tc->nl]++;
>> + report_info("level %d: lvl: %d id: %d cnt: %d",
>> + tc->nl, tc->nl, tc->id, stsi_nested_lvl[tc->nl]);
>> +
>> + /* trick: CPU TLEs are twice the size of containers TLE */
>> + if (tc->nl == 0)
>> + tc++;
>> + tc++;
>> + }
>> + report(n == number_of_cpus, "Number of CPUs : %d expect %d", n, number_of_cpus);
>> + /*
>> + * For KVM we accept
>> + * - only 1 type of CPU
>> + * - only horizontal topology
>> + * - only dedicated CPUs
>> + * This leads to expect the number of entries of level 0 CPU
>> + * Topology Level Entry (TLE) to be:
>> + * 1 + (number_of_cpus - 1) / arch_topo_lvl[0]
>> + *
>> + * For z/VM or LPAR this number can only be greater if different
>> + * polarity, CPU types because there may be a nested level 0 CPU TLE
>> + * for each of the CPU/polarity/sharing types in a level 1 container TLE.
>> + */
>
> IMO you should only test what is defined by the architecture. This behavior isn't
> even dependent on KVM, but on user space, ok effectively this is QEMU but you never know.
> And that behavior could change in the future.
Right, I will let fall this check as I did not find any specification
about how the levels collapse in the documentation.
And the measures on LPAR gives unexpected results.
>
>> + n = (number_of_cpus - 1) / arch_topo_lvl[0];
>> + report(stsi_nested_lvl[0] >= n + 1,
>> + "CPU Type TLE : %d expect %d", stsi_nested_lvl[0], n + 1);
>> +
>> + /* For each level found in STSI */
>> + for (i = 1; i < CPU_TOPOLOGY_MAX_LEVEL; i++) {
>> + /*
>> + * For non QEMU/KVM hypervisor the concatenation of the levels
>> + * above level 1 are architecture dependent.
>> + * Skip these checks.
>> + */
>> + if (!host_is_kvm() && sel2 != 2)
>> + continue;
>> +
>> + /* For QEMU/KVM we expect a simple calculation */
>> + if (sel2 > i) {
>> + report(stsi_nested_lvl[i] == n + 1,
>> + "Container TLE %d: %d expect %d", i, stsi_nested_lvl[i], n + 1);
>> + n /= arch_topo_lvl[i];
>> + }
>> + }
>> +
>> + report_prefix_pop();
>> +}
>
> I think you could make this test stricter and more readable by using recursion.
> You have a function check_container which:
> * checks that each child has nesting level one level lower
> * calls check_container on the child
> * gets back the number of cpus in the child
> * gets back the next child
> * if the current "child" isn't actually a child but has one higher nesting level we're done
> * check if sum of child cpus makes sense
> * error out on any anomaly (e.g required 0 bits)
> * return "child" and sum of cpus
>
> For CPU entries you have a special checking function:
> * check as much as you can
> * calculate number of cpus in mask
> * you can also test if the actual cpus exist by:
> * building a bitmap of all cpus first before the test
> * checking if the bits in the mask are a subset of existing cpus
That is very interesting however I wonder if we should not propose the
test without these checks first, because it is quite finish then, and
expand the test in a second series.
IMO having a test now even not complete and get more intense checking
later is better than to have nothing until we finally have all tests we
can think about in the framework.
>> +
>> +/*
>> + * check_sysinfo_15_1_x
>> + * @info: pointer to the STSI info structure
>> + * @sel2: the selector giving the topology level to check
>> + *
>> + * Check if the validity of the STSI instruction and then
>> + * calls specific checks on the information buffer.
>> + */
>> +static void check_sysinfo_15_1_x(struct sysinfo_15_1_x *info, int sel2)
>> +{
>> + int ret;
>> +
>> + report_prefix_pushf("mnested %d 15_1_%d", max_nested_lvl, sel2);
>> +
>> + ret = stsi(pagebuf, 15, 1, sel2);
>> + if (max_nested_lvl >= sel2) {
>> + report(!ret, "Valid stsi instruction");
>> + } else {
>> + report(ret, "Invalid stsi instruction");
>> + goto end;
>> + }
>> +
>> + stsi_check_maxcpus(info);
>> + stsi_check_tle_coherency(info, sel2);
>> +
>> +end:
>> + report_prefix_pop();
>> +}
>> +
>> +static int sclp_get_mnest(void)
>> +{
>> + ReadInfo *sccb = (void *)_sccb;
>> +
>> + sclp_mark_busy();
>> + memset(_sccb, 0, PAGE_SIZE);
>> + sccb->h.length = PAGE_SIZE;
>> +
>> + sclp_service_call(SCLP_CMDW_READ_SCP_INFO, sccb);
>> + assert(sccb->h.response_code == SCLP_RC_NORMAL_READ_COMPLETION);
>> +
>> + return sccb->stsi_parm;
>> +}
>
> I think it would be better if you just add a function sclp_get_stsi_max_sel
> to lib/s390x/sclp.[ch] that reads the value from read_info and calculates
> the maximum selector value.
OK
>
>> +
>> +/*
>> + * test_stsi
>> + *
>> + * Retrieves the maximum nested topology level supported by the architecture
>> + * and the number of CPUs.
>> + * Calls the checking for the STSI instruction in sel2 reverse level order
>> + * from 6 (CPU_TOPOLOGY_MAX_LEVEL) to 2 to have the most interesting level,
>> + * the one triggering a topology-change-report-pending condition, level 2,
>> + * at the end of the report.
>> + *
>> + */
>> +static void test_stsi(void)
>> +{
>> + int sel2;
>> +
>> + max_nested_lvl = sclp_get_mnest();
>> + report_info("SCLP maximum nested level : %d", max_nested_lvl);
>> +
>> + number_of_cpus = sclp_get_cpu_num();
>> + report_info("SCLP number of CPU: %d", number_of_cpus);
>> +
>> + /* STSI selector 2 can takes values between 2 and 6 */
>> + for (sel2 = 6; sel2 >= 2; sel2--)
>> + check_sysinfo_15_1_x((struct sysinfo_15_1_x *)pagebuf, sel2);
>> +}
>> +
>> +/*
>> + * parse_topology_args
>> + * @argc: number of arguments
>> + * @argv: argument array
>> + *
>> + * This function initialize the architecture topology levels
>> + * which should be the same as the one provided by the hypervisor.
>> + *
>> + * We use the current names found in IBM/Z literature, Linux and QEMU:
>> + * cores, sockets/packages, books, drawers and nodes to facilitate the
>> + * human machine interface but store the result in a machine abstract
>> + * array of architecture topology levels.
>> + * Note that when QEMU uses socket as a name for the topology level 1
>> + * Linux uses package or physical_package.
>> + */
>> +static void parse_topology_args(int argc, char **argv)
>> +{
>> + int i;
>> +
>> + report_info("%d arguments", argc);
>> + for (i = 1; i < argc; i++) {
>> + if (!strcmp("-cores", argv[i])) {
>> + i++;
>> + if (i >= argc)
>> + report_abort("-cores needs a parameter");
>> + arch_topo_lvl[0] = atol(argv[i]);
>
> You don't do any error checking of the number parsing, and don't check the sign.
> You could use parse_unsigned in spec_ex.c, should maybe be in a lib somewhere instead.
OK
>
>> + report_info("cores: %d", arch_topo_lvl[0]);
>> + } else if (!strcmp("-sockets", argv[i])) {
>> + i++;
>> + if (i >= argc)
>> + report_abort("-sockets needs a parameter");
>> + arch_topo_lvl[1] = atol(argv[i]);
>> + report_info("sockets: %d", arch_topo_lvl[1]);
>> + } else if (!strcmp("-books", argv[i])) {
>> + i++;
>> + if (i >= argc)
>> + report_abort("-books needs a parameter");
>> + arch_topo_lvl[2] = atol(argv[i]);
>> + report_info("books: %d", arch_topo_lvl[2]);
>> + } else if (!strcmp("-drawers", argv[i])) {
>> + i++;
>> + if (i >= argc)
>> + report_abort("-drawers needs a parameter");
>> + arch_topo_lvl[3] = atol(argv[i]);
>> + report_info("drawers: %d", arch_topo_lvl[3]);
>> + }
>
> Might be nice to error out if no flags match.
> I'm guessing, currently, -sockets -cores 1 would "parse", but the result is nonsense
OK
>
>> + }
>
> You can also get rid of some code redundancy:
> char *levels[] = { "cores", "sockets", "books", "drawers" };
> for (...) {
> char *flag = argv[i];
> int level;
>
> if (flag[0] != '-')
> report_abort("Expected ...");
> flag++;
> for (level = 0; ARRAY_SIZE(levels); level++) {
> if (!strcmp(levels[level]), flag)
> break;
> }
> if (level == ARRAY_SIZE(levels))
> report_abort("Unknown ...");
>
> arch_topo_lvl[level] = atol(argv[++i]);
> report_info("%s: %d", levels[level], arch_topo_lvl[level]);
> }
OK, I take it.
>> +
>> + for (i = 0; i < CPU_TOPOLOGY_MAX_LEVEL; i++) {
>> + if (!arch_topo_lvl[i])
>> + arch_topo_lvl[i] = 1;
>> + max_cpus *= arch_topo_lvl[i];
>> + }
>
> Might be nice to split this function up into two, one that parses the values
> into an array and one that calculates max_cpus.
> Then you can do max_cpus = calculate_max_cpus or whatever and when someone is
> looking for where max_cpus is defined searching for "max_cpus =" works.
OK
Regards,
Pierre
--
Pierre Morel
IBM Lab Boeblingen
prev parent reply other threads:[~2023-02-15 13:08 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-02-02 9:28 [kvm-unit-tests PATCH v6 0/2] S390x: CPU Topology Information Pierre Morel
2023-02-02 9:28 ` [kvm-unit-tests PATCH v6 1/2] s390x: topology: Check the Perform Topology Function Pierre Morel
2023-02-08 11:06 ` Thomas Huth
2023-02-10 14:38 ` Pierre Morel
2023-02-10 14:51 ` Nina Schoetterl-Glausch
2023-02-15 8:20 ` Pierre Morel
2023-02-02 9:28 ` [kvm-unit-tests PATCH v6 2/2] s390x: topology: Checking Configuration Topology Information Pierre Morel
2023-02-08 11:53 ` Thomas Huth
2023-02-10 14:49 ` Pierre Morel
2023-02-10 15:39 ` Nina Schoetterl-Glausch
2023-02-15 13:07 ` Pierre Morel [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=2ee40f92-bf9f-ad9b-77dc-a442ebb0440f@linux.ibm.com \
--to=pmorel@linux.ibm.com \
--cc=david@redhat.com \
--cc=frankja@linux.ibm.com \
--cc=imbrenda@linux.ibm.com \
--cc=kvm@vger.kernel.org \
--cc=linux-s390@vger.kernel.org \
--cc=nrb@linux.ibm.com \
--cc=nsg@linux.ibm.com \
--cc=thuth@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox