LinuxPPC-Dev Archive on lore.kernel.org

LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed

* Re: [PATCH v2] powerpc/fadump: Add timeout to RTAS busy-wait loops
From: Adriano Vero @ 2026-05-07  2:52 UTC (permalink / raw)
  To: Sourabh Jain; +Cc: maddy, mpe, npiggin, chleroy, linuxppc-dev, linux-kernel
In-Reply-To: <7bef2ce2-1fd3-4648-8c78-d3c5e07a1f9f@linux.ibm.com>

Hi Sourabh,

Thank you for the review. v3 with your Reviewed-by tag has been sent.

Adriano


On Wed, May 6, 2026 at 9:22 PM Sourabh Jain <sourabhjain@linux.ibm.com> wrote:
>
>
>
> On 19/04/26 12:20, Adriano Vero wrote:
> > The ibm,configure-kernel-dump RTAS call sites in
> > rtas_fadump_register(), rtas_fadump_unregister(), and
> > rtas_fadump_invalidate() polled indefinitely while firmware returned
> > a busy status. A misbehaving or hung firmware could stall these paths
> > forever, blocking fadump registration at boot or preventing clean
> > teardown.
> >
> > Introduce rtas_fadump_call(), a helper that wraps the common
> > busy-wait pattern shared by all three sites. The helper accumulates
> > the total delay and returns -ETIMEDOUT if firmware keeps returning a
> > busy status beyond RTAS_FADUMP_MAX_WAIT_MS (60 seconds). A pr_debug()
> > message is emitted on each busy iteration to aid diagnosis when the
> > timeout is hit.
> >
> > Signed-off-by: Adriano Vero <adri.vero.dev@gmail.com>
> > ---
> >   arch/powerpc/platforms/pseries/rtas-fadump.c | 80 ++++++++++++--------
> >   arch/powerpc/platforms/pseries/rtas-fadump.h |  6 ++
> >   2 files changed, 53 insertions(+), 33 deletions(-)
> >
> > diff --git a/arch/powerpc/platforms/pseries/rtas-fadump.c b/arch/powerpc/platforms/pseries/rtas-fadump.c
> > index eceb32893..3bb4ac2ab 100644
> > --- a/arch/powerpc/platforms/pseries/rtas-fadump.c
> > +++ b/arch/powerpc/platforms/pseries/rtas-fadump.c
> > @@ -179,9 +179,42 @@ static u64 rtas_fadump_get_bootmem_min(void)
> >       return RTAS_FADUMP_MIN_BOOT_MEM;
> >   }
> >
> > +/*
> > + * Helper to make an ibm,configure-kernel-dump RTAS call with a bounded
> > + * busy-wait loop. Returns the RTAS return code on completion, or
> > + * -ETIMEDOUT if firmware keeps returning a busy status beyond
> > + * RTAS_FADUMP_MAX_WAIT_MS milliseconds.
> > + */
> > +static int rtas_fadump_call(struct fw_dump *fadump_conf, int operation,
> > +                         void *fdm_ptr, unsigned int fdm_size,
> > +                         const char *op_name)
> > +{
> > +     unsigned int wait_time, total_wait = 0;
> > +     int rc;
> > +
> > +     do {
> > +             rc = rtas_call(fadump_conf->ibm_configure_kernel_dump, 3, 1,
> > +                            NULL, operation, fdm_ptr, fdm_size);
> > +             wait_time = rtas_busy_delay_time(rc);
> > +             if (wait_time) {
> > +                     pr_debug("Firmware busy during fadump %s, waiting %ums (total %ums)\n",
> > +                              op_name, wait_time, total_wait);
> > +                     if (total_wait >= RTAS_FADUMP_MAX_WAIT_MS) {
> > +                             pr_err("Timed out waiting for firmware to complete fadump %s\n",
> > +                                    op_name);
> > +                             return -ETIMEDOUT;
> > +                     }
> > +                     total_wait += wait_time;
> > +                     mdelay(wait_time);
> > +             }
> > +     } while (wait_time);
> > +
> > +     return rc;
> > +}
> > +
> >   static int rtas_fadump_register(struct fw_dump *fadump_conf)
> >   {
> > -     unsigned int wait_time, fdm_size;
> > +     unsigned int fdm_size;
> >       int rc, err = -EIO;
> >
> >       /*
> > @@ -192,16 +225,10 @@ static int rtas_fadump_register(struct fw_dump *fadump_conf)
> >       fdm_size = sizeof(struct rtas_fadump_section_header);
> >       fdm_size += be16_to_cpu(fdm.header.dump_num_sections) * sizeof(struct rtas_fadump_section);
> >
> > -     /* TODO: Add upper time limit for the delay */
> > -     do {
> > -             rc =  rtas_call(fadump_conf->ibm_configure_kernel_dump, 3, 1,
> > -                             NULL, FADUMP_REGISTER, &fdm, fdm_size);
> > -
> > -             wait_time = rtas_busy_delay_time(rc);
> > -             if (wait_time)
> > -                     mdelay(wait_time);
> > -
> > -     } while (wait_time);
> > +     rc = rtas_fadump_call(fadump_conf, FADUMP_REGISTER, &fdm, fdm_size,
> > +                           "register");
> > +     if (rc == -ETIMEDOUT)
> > +             return -ETIMEDOUT;
> >
> >       switch (rc) {
> >       case 0:
> > @@ -234,19 +261,12 @@ static int rtas_fadump_register(struct fw_dump *fadump_conf)
> >
> >   static int rtas_fadump_unregister(struct fw_dump *fadump_conf)
> >   {
> > -     unsigned int wait_time;
> >       int rc;
> >
> > -     /* TODO: Add upper time limit for the delay */
> > -     do {
> > -             rc =  rtas_call(fadump_conf->ibm_configure_kernel_dump, 3, 1,
> > -                             NULL, FADUMP_UNREGISTER, &fdm,
> > -                             sizeof(struct rtas_fadump_mem_struct));
> > -
> > -             wait_time = rtas_busy_delay_time(rc);
> > -             if (wait_time)
> > -                     mdelay(wait_time);
> > -     } while (wait_time);
> > +     rc = rtas_fadump_call(fadump_conf, FADUMP_UNREGISTER, &fdm,
> > +                           sizeof(struct rtas_fadump_mem_struct), "unregister");
> > +     if (rc == -ETIMEDOUT)
> > +             return -ETIMEDOUT;
> >
> >       if (rc) {
> >               pr_err("Failed to un-register - unexpected error(%d).\n", rc);
> > @@ -259,19 +279,13 @@ static int rtas_fadump_unregister(struct fw_dump *fadump_conf)
> >
> >   static int rtas_fadump_invalidate(struct fw_dump *fadump_conf)
> >   {
> > -     unsigned int wait_time;
> >       int rc;
> >
> > -     /* TODO: Add upper time limit for the delay */
> > -     do {
> > -             rc =  rtas_call(fadump_conf->ibm_configure_kernel_dump, 3, 1,
> > -                             NULL, FADUMP_INVALIDATE, fdm_active,
> > -                             sizeof(struct rtas_fadump_mem_struct));
> > -
> > -             wait_time = rtas_busy_delay_time(rc);
> > -             if (wait_time)
> > -                     mdelay(wait_time);
> > -     } while (wait_time);
> > +     rc = rtas_fadump_call(fadump_conf, FADUMP_INVALIDATE,
> > +                           (void *)fdm_active,
> > +                           sizeof(struct rtas_fadump_mem_struct), "invalidate");
> > +     if (rc == -ETIMEDOUT)
> > +             return -ETIMEDOUT;
> >
> >       if (rc) {
> >               pr_err("Failed to invalidate - unexpected error (%d).\n", rc);
> > diff --git a/arch/powerpc/platforms/pseries/rtas-fadump.h b/arch/powerpc/platforms/pseries/rtas-fadump.h
> > index c109abf6b..65fdab7b5 100644
> > --- a/arch/powerpc/platforms/pseries/rtas-fadump.h
> > +++ b/arch/powerpc/platforms/pseries/rtas-fadump.h
> > @@ -41,6 +41,12 @@
> >   #define MAX_SECTIONS                                10
> >   #define RTAS_FADUMP_MAX_BOOT_MEM_REGS               7
> >
> > +/*
> > + * Maximum time to wait for firmware to respond to an
> > + * ibm,configure-kernel-dump RTAS call before giving up.
> > + */
> > +#define RTAS_FADUMP_MAX_WAIT_MS                      60000U
> > +
> >   /* Kernel Dump section info */
> >   struct rtas_fadump_section {
> >       __be32  request_flag;
>
> Changes look good to me.
>
> Feel free to add:
> Reviewed-by: Sourabh Jain <sourabhjain@linux.ibm.com>
>
> While testing this patch I realized that fadump prints below message
> when registered is successfully
> <rtas/opal> fadump: Registration is successful
>
> But there is no message when fadump is unregistered successfully. I
> think it is good to
> log that event.  I will send a separate patch for this.
>
> - Sourabh Jain


^ permalink raw reply

* Re: [PATCH v2] powerpc/fadump: Add timeout to RTAS busy-wait loops
From: Sourabh Jain @ 2026-05-07  4:07 UTC (permalink / raw)
  To: Adriano Vero; +Cc: maddy, mpe, npiggin, chleroy, linuxppc-dev, linux-kernel
In-Reply-To: <CAGKWbKuxRCfqZaixqo7L3uT0gtOssDo8-cWWvsVJkKEsaTumcg@mail.gmail.com>



On 07/05/26 08:22, Adriano Vero wrote:
> Hi Sourabh,
>
> Thank you for the review. v3 with your Reviewed-by tag has been sent.

Thanks, but a new version just to include tags is not really needed.
Maintainers generally take care of including them.

- Sourabh Jain

>
> Adriano
>
>
> On Wed, May 6, 2026 at 9:22 PM Sourabh Jain <sourabhjain@linux.ibm.com> wrote:
>>
>>
>> On 19/04/26 12:20, Adriano Vero wrote:
>>> The ibm,configure-kernel-dump RTAS call sites in
>>> rtas_fadump_register(), rtas_fadump_unregister(), and
>>> rtas_fadump_invalidate() polled indefinitely while firmware returned
>>> a busy status. A misbehaving or hung firmware could stall these paths
>>> forever, blocking fadump registration at boot or preventing clean
>>> teardown.
>>>
>>> Introduce rtas_fadump_call(), a helper that wraps the common
>>> busy-wait pattern shared by all three sites. The helper accumulates
>>> the total delay and returns -ETIMEDOUT if firmware keeps returning a
>>> busy status beyond RTAS_FADUMP_MAX_WAIT_MS (60 seconds). A pr_debug()
>>> message is emitted on each busy iteration to aid diagnosis when the
>>> timeout is hit.
>>>
>>> Signed-off-by: Adriano Vero <adri.vero.dev@gmail.com>
>>> ---
>>>    arch/powerpc/platforms/pseries/rtas-fadump.c | 80 ++++++++++++--------
>>>    arch/powerpc/platforms/pseries/rtas-fadump.h |  6 ++
>>>    2 files changed, 53 insertions(+), 33 deletions(-)
>>>
>>> diff --git a/arch/powerpc/platforms/pseries/rtas-fadump.c b/arch/powerpc/platforms/pseries/rtas-fadump.c
>>> index eceb32893..3bb4ac2ab 100644
>>> --- a/arch/powerpc/platforms/pseries/rtas-fadump.c
>>> +++ b/arch/powerpc/platforms/pseries/rtas-fadump.c
>>> @@ -179,9 +179,42 @@ static u64 rtas_fadump_get_bootmem_min(void)
>>>        return RTAS_FADUMP_MIN_BOOT_MEM;
>>>    }
>>>
>>> +/*
>>> + * Helper to make an ibm,configure-kernel-dump RTAS call with a bounded
>>> + * busy-wait loop. Returns the RTAS return code on completion, or
>>> + * -ETIMEDOUT if firmware keeps returning a busy status beyond
>>> + * RTAS_FADUMP_MAX_WAIT_MS milliseconds.
>>> + */
>>> +static int rtas_fadump_call(struct fw_dump *fadump_conf, int operation,
>>> +                         void *fdm_ptr, unsigned int fdm_size,
>>> +                         const char *op_name)
>>> +{
>>> +     unsigned int wait_time, total_wait = 0;
>>> +     int rc;
>>> +
>>> +     do {
>>> +             rc = rtas_call(fadump_conf->ibm_configure_kernel_dump, 3, 1,
>>> +                            NULL, operation, fdm_ptr, fdm_size);
>>> +             wait_time = rtas_busy_delay_time(rc);
>>> +             if (wait_time) {
>>> +                     pr_debug("Firmware busy during fadump %s, waiting %ums (total %ums)\n",
>>> +                              op_name, wait_time, total_wait);
>>> +                     if (total_wait >= RTAS_FADUMP_MAX_WAIT_MS) {
>>> +                             pr_err("Timed out waiting for firmware to complete fadump %s\n",
>>> +                                    op_name);
>>> +                             return -ETIMEDOUT;
>>> +                     }
>>> +                     total_wait += wait_time;
>>> +                     mdelay(wait_time);
>>> +             }
>>> +     } while (wait_time);
>>> +
>>> +     return rc;
>>> +}
>>> +
>>>    static int rtas_fadump_register(struct fw_dump *fadump_conf)
>>>    {
>>> -     unsigned int wait_time, fdm_size;
>>> +     unsigned int fdm_size;
>>>        int rc, err = -EIO;
>>>
>>>        /*
>>> @@ -192,16 +225,10 @@ static int rtas_fadump_register(struct fw_dump *fadump_conf)
>>>        fdm_size = sizeof(struct rtas_fadump_section_header);
>>>        fdm_size += be16_to_cpu(fdm.header.dump_num_sections) * sizeof(struct rtas_fadump_section);
>>>
>>> -     /* TODO: Add upper time limit for the delay */
>>> -     do {
>>> -             rc =  rtas_call(fadump_conf->ibm_configure_kernel_dump, 3, 1,
>>> -                             NULL, FADUMP_REGISTER, &fdm, fdm_size);
>>> -
>>> -             wait_time = rtas_busy_delay_time(rc);
>>> -             if (wait_time)
>>> -                     mdelay(wait_time);
>>> -
>>> -     } while (wait_time);
>>> +     rc = rtas_fadump_call(fadump_conf, FADUMP_REGISTER, &fdm, fdm_size,
>>> +                           "register");
>>> +     if (rc == -ETIMEDOUT)
>>> +             return -ETIMEDOUT;
>>>
>>>        switch (rc) {
>>>        case 0:
>>> @@ -234,19 +261,12 @@ static int rtas_fadump_register(struct fw_dump *fadump_conf)
>>>
>>>    static int rtas_fadump_unregister(struct fw_dump *fadump_conf)
>>>    {
>>> -     unsigned int wait_time;
>>>        int rc;
>>>
>>> -     /* TODO: Add upper time limit for the delay */
>>> -     do {
>>> -             rc =  rtas_call(fadump_conf->ibm_configure_kernel_dump, 3, 1,
>>> -                             NULL, FADUMP_UNREGISTER, &fdm,
>>> -                             sizeof(struct rtas_fadump_mem_struct));
>>> -
>>> -             wait_time = rtas_busy_delay_time(rc);
>>> -             if (wait_time)
>>> -                     mdelay(wait_time);
>>> -     } while (wait_time);
>>> +     rc = rtas_fadump_call(fadump_conf, FADUMP_UNREGISTER, &fdm,
>>> +                           sizeof(struct rtas_fadump_mem_struct), "unregister");
>>> +     if (rc == -ETIMEDOUT)
>>> +             return -ETIMEDOUT;
>>>
>>>        if (rc) {
>>>                pr_err("Failed to un-register - unexpected error(%d).\n", rc);
>>> @@ -259,19 +279,13 @@ static int rtas_fadump_unregister(struct fw_dump *fadump_conf)
>>>
>>>    static int rtas_fadump_invalidate(struct fw_dump *fadump_conf)
>>>    {
>>> -     unsigned int wait_time;
>>>        int rc;
>>>
>>> -     /* TODO: Add upper time limit for the delay */
>>> -     do {
>>> -             rc =  rtas_call(fadump_conf->ibm_configure_kernel_dump, 3, 1,
>>> -                             NULL, FADUMP_INVALIDATE, fdm_active,
>>> -                             sizeof(struct rtas_fadump_mem_struct));
>>> -
>>> -             wait_time = rtas_busy_delay_time(rc);
>>> -             if (wait_time)
>>> -                     mdelay(wait_time);
>>> -     } while (wait_time);
>>> +     rc = rtas_fadump_call(fadump_conf, FADUMP_INVALIDATE,
>>> +                           (void *)fdm_active,
>>> +                           sizeof(struct rtas_fadump_mem_struct), "invalidate");
>>> +     if (rc == -ETIMEDOUT)
>>> +             return -ETIMEDOUT;
>>>
>>>        if (rc) {
>>>                pr_err("Failed to invalidate - unexpected error (%d).\n", rc);
>>> diff --git a/arch/powerpc/platforms/pseries/rtas-fadump.h b/arch/powerpc/platforms/pseries/rtas-fadump.h
>>> index c109abf6b..65fdab7b5 100644
>>> --- a/arch/powerpc/platforms/pseries/rtas-fadump.h
>>> +++ b/arch/powerpc/platforms/pseries/rtas-fadump.h
>>> @@ -41,6 +41,12 @@
>>>    #define MAX_SECTIONS                                10
>>>    #define RTAS_FADUMP_MAX_BOOT_MEM_REGS               7
>>>
>>> +/*
>>> + * Maximum time to wait for firmware to respond to an
>>> + * ibm,configure-kernel-dump RTAS call before giving up.
>>> + */
>>> +#define RTAS_FADUMP_MAX_WAIT_MS                      60000U
>>> +
>>>    /* Kernel Dump section info */
>>>    struct rtas_fadump_section {
>>>        __be32  request_flag;
>> Changes look good to me.
>>
>> Feel free to add:
>> Reviewed-by: Sourabh Jain <sourabhjain@linux.ibm.com>
>>
>> While testing this patch I realized that fadump prints below message
>> when registered is successfully
>> <rtas/opal> fadump: Registration is successful
>>
>> But there is no message when fadump is unregistered successfully. I
>> think it is good to
>> log that event.  I will send a separate patch for this.
>>
>> - Sourabh Jain



^ permalink raw reply

* Re: [PATCH 1/5] ibmvfc: add basic FPIN support
From: Tyrel Datwyler @ 2026-05-07  4:12 UTC (permalink / raw)
  To: davemarq, James E.J. Bottomley, Martin K. Petersen,
	Madhavan Srinivasan, Michael Ellerman, Nicholas Piggin,
	Christophe Leroy (CS GROUP)
  Cc: linux-kernel, linux-scsi, linuxppc-dev, Brian King, Greg Joyce,
	Kyle Mahlkuch
In-Reply-To: <20260408-ibmvfc-fpin-support-v1-1-52b06c464e03@linux.ibm.com>

On 4/8/26 10:07 AM, Dave Marquardt via B4 Relay wrote:
> From: Dave Marquardt <davemarq@linux.ibm.com>
> 
> - Add FPIN event descriptor
> - Add congestion cleared status
> - Add code to handle basic FPIN async event
> - Add KUnit tests

You need a more detailed description of your changes here for the commit log body.

You will also need a signed off tag from yourself for this to even be merged.

https://www.kernel.org/doc/html/latest/process/submitting-patches.html

> ---
>  drivers/scsi/Kconfig                 |  10 ++
>  drivers/scsi/ibmvscsi/Makefile       |   1 +
>  drivers/scsi/ibmvscsi/ibmvfc.c       | 189 ++++++++++++++++++++++++++++++++++-
>  drivers/scsi/ibmvscsi/ibmvfc.h       |   9 ++
>  drivers/scsi/ibmvscsi/ibmvfc_kunit.c |  95 ++++++++++++++++++
>  5 files changed, 302 insertions(+), 2 deletions(-)

<snip>

> +static struct fc_els_fpin *
> +ibmvfc_common_fpin_to_desc(u8 fpin_status, __be64 wwpn, __be16 modifier,
> +			   __be32 period, __be32 threshold, __be32 event_count)
> +{
> +	struct fc_fn_peer_congn_desc *pdesc;
> +	struct fc_fn_congn_desc *cdesc;
> +	struct fc_fn_li_desc *ldesc;
> +	struct fc_els_fpin *fpin;
> +	size_t size;
> +
> +	size = ibmvfc_fpin_size_helper(fpin_status);
> +	if (size == 0)
> +		return NULL;
> +
> +	fpin = kzalloc(size, GFP_KERNEL);

This appears to be called by ibmvfc_handle_async() with runs in atomic context
and cannot therefore sleep. This allocation needs to be GFP_ATOMIC. Although
there is another issue below that might make this moot.

> +	if (fpin == NULL)
> +		return NULL;
> +
> +	fpin->fpin_cmd = ELS_FPIN;
> +
> +	switch (fpin_status) {
> +	case IBMVFC_AE_FPIN_CONGESTION_CLEARED:
> +	case IBMVFC_AE_FPIN_LINK_CONGESTED:
> +		fpin->desc_len = cpu_to_be32(sizeof(struct fc_fn_congn_desc));
> +		cdesc = (struct fc_fn_congn_desc *)fpin->fpin_desc;
> +		cdesc->desc_tag = cpu_to_be32(ELS_DTAG_CONGESTION);
> +		cdesc->desc_len = cpu_to_be32(FC_TLV_DESC_LENGTH_FROM_SZ(*cdesc));
> +		if (fpin_status == IBMVFC_AE_FPIN_CONGESTION_CLEARED)
> +			cdesc->event_type = cpu_to_be16(FPIN_CONGN_CLEAR);
> +		else
> +			cdesc->event_type = cpu_to_be16(FPIN_CONGN_DEVICE_SPEC);
> +		cdesc->event_modifier = modifier;
> +		cdesc->event_period = period;
> +		cdesc->severity = FPIN_CONGN_SEVERITY_WARNING;
> +		break;
> +	case IBMVFC_AE_FPIN_PORT_CONGESTED:
> +	case IBMVFC_AE_FPIN_PORT_CLEARED:
> +		fpin->desc_len = cpu_to_be32(sizeof(struct fc_fn_peer_congn_desc));
> +		pdesc = (struct fc_fn_peer_congn_desc *)fpin->fpin_desc;
> +		pdesc->desc_tag = cpu_to_be32(ELS_DTAG_PEER_CONGEST);
> +		pdesc->desc_len = cpu_to_be32(FC_TLV_DESC_LENGTH_FROM_SZ(*pdesc));
> +		if (fpin_status == IBMVFC_AE_FPIN_PORT_CLEARED)
> +			pdesc->event_type = cpu_to_be16(FPIN_CONGN_CLEAR);
> +		else
> +			pdesc->event_type = cpu_to_be16(FPIN_CONGN_DEVICE_SPEC);
> +		pdesc->event_modifier = modifier;
> +		pdesc->event_period = period;
> +		pdesc->detecting_wwpn = cpu_to_be64(0);
> +		pdesc->attached_wwpn = wwpn;
> +		pdesc->pname_count = cpu_to_be32(1);
> +		pdesc->pname_list[0] = wwpn;
> +		break;
> +	case IBMVFC_AE_FPIN_PORT_DEGRADED:
> +		fpin->desc_len = cpu_to_be32(sizeof(struct fc_fn_li_desc));
> +		ldesc = (struct fc_fn_li_desc *)fpin->fpin_desc;
> +		ldesc->desc_tag = cpu_to_be32(ELS_DTAG_LNK_INTEGRITY);
> +		ldesc->desc_len = cpu_to_be32(FC_TLV_DESC_LENGTH_FROM_SZ(*ldesc));
> +		ldesc->event_type = cpu_to_be16(FPIN_LI_UNKNOWN);
> +		ldesc->event_modifier = modifier;
> +		ldesc->event_threshold = threshold;
> +		ldesc->event_count = event_count;
> +		ldesc->detecting_wwpn = cpu_to_be64(0);
> +		ldesc->attached_wwpn = wwpn;
> +		ldesc->pname_count = cpu_to_be32(1);
> +		ldesc->pname_list[0] = wwpn;
> +		break;
> +	default:
> +		/* This should be caught above. */
> +		kfree(fpin);
> +		fpin = NULL;
> +		break;
> +	}
> +
> +	return fpin;
> +}
> +
> +/**
> + * ibmvfc_basic_fpin_to_desc(): allocate and populate a struct fc_els_fpin struct
> + * containing a descriptor.
> + * @ibmvfc_fpin: Pointer to async crq
> + *
> + * Allocate a struct fc_els_fpin containing a descriptor and populate
> + * based on data from *ibmvfc_fpin.
> + *
> + * Return:
> + * NULL     - unable to allocate structure
> + * non-NULL - pointer to populated struct fc_els_fpin
> + */
> +static struct fc_els_fpin *
> +/*XXX*/ibmvfc_basic_fpin_to_desc(struct ibmvfc_async_crq *crq)

What is with this /*XXX*/? I can't find it once I apply the patchset so I assume
its removed in a later patch, but it should be removed here.

> +{
> +	return ibmvfc_common_fpin_to_desc(crq->fpin_status, crq->wwpn,
> +					  cpu_to_be16(0),
> +					  cpu_to_be32(IBMVFC_FPIN_DEFAULT_EVENT_PERIOD),
> +					  cpu_to_be32(IBMVFC_FPIN_DEFAULT_EVENT_THRESHOLD),
> +					  cpu_to_be32(1));
> +}
> +
>  /**
>   * ibmvfc_handle_async - Handle an async event from the adapter
>   * @crq:	crq to process
>   * @vhost:	ibmvfc host struct
>   *
>   **/
> -static void ibmvfc_handle_async(struct ibmvfc_async_crq *crq,
> -				struct ibmvfc_host *vhost)
> +VISIBLE_IF_KUNIT void ibmvfc_handle_async(struct ibmvfc_async_crq *crq,
> +					  struct ibmvfc_host *vhost)
>  {
>  	const struct ibmvfc_async_desc *desc = ibmvfc_get_ae_desc(be64_to_cpu(crq->event));
>  	struct ibmvfc_target *tgt;
> +	struct fc_els_fpin *fpin;
>  
>  	ibmvfc_log(vhost, desc->log_level, "%s event received. scsi_id: %llx, wwpn: %llx,"
>  		   " node_name: %llx%s\n", desc->desc, be64_to_cpu(crq->scsi_id),
> @@ -3269,11 +3422,37 @@ static void ibmvfc_handle_async(struct ibmvfc_async_crq *crq,
>  	case IBMVFC_AE_HALT:
>  		ibmvfc_link_down(vhost, IBMVFC_HALTED);
>  		break;
> +	case IBMVFC_AE_FPIN:
> +		if (!crq->scsi_id && !crq->wwpn && !crq->node_name)
> +			break;
> +		list_for_each_entry(tgt, &vhost->targets, queue) {
> +			if (crq->scsi_id && cpu_to_be64(tgt->scsi_id) != crq->scsi_id)
> +				continue;
> +			if (crq->wwpn && cpu_to_be64(tgt->ids.port_name) != crq->wwpn)
> +				continue;
> +			if (crq->node_name && cpu_to_be64(tgt->ids.node_name) != crq->node_name)
> +				continue;
> +			if (!tgt->rport)
> +				continue;
> +			fpin = ibmvfc_basic_fpin_to_desc(crq);
> +			if (fpin) {
> +				fc_host_fpin_rcv(tgt->vhost->host,
> +						 sizeof(*fpin) +
> +						       be32_to_cpu(fpin->desc_len),
> +						 (char *)fpin, 0);

This call to fc_host_fpin_rcv() appears to be problematic as it assumes no locks
are held, but ibmvfc_handle_async() is called with the scsi host lock held. We
already do a lot more work than we probaly should in our interrupt handler. I
think we maybe need to pass the FPIN work off to a workqueue instead to be
handled in process context instead.

-Tyrel




^ permalink raw reply

* Re: [PATCH v3] powerpc/fadump: Add timeout to RTAS busy-wait loops
From: Sourabh Jain @ 2026-05-07  4:15 UTC (permalink / raw)
  To: Adriano Vero, maddy, mpe; +Cc: npiggin, chleroy, linuxppc-dev, linux-kernel
In-Reply-To: <20260506222024.30352-1-adri.vero.dev@gmail.com>

Hello Adriano,

On 07/05/26 03:50, Adriano Vero wrote:
> The ibm,configure-kernel-dump RTAS call sites in
> rtas_fadump_register(), rtas_fadump_unregister(), and
> rtas_fadump_invalidate() polled indefinitely while firmware returned
> a busy status. A misbehaving or hung firmware could stall these paths
> forever, blocking fadump registration at boot or preventing clean
> teardown.
>
> Introduce rtas_fadump_call(), a helper that wraps the common
> busy-wait pattern shared by all three sites. The helper accumulates
> the total delay and returns -ETIMEDOUT if firmware keeps returning a
> busy status beyond RTAS_FADUMP_MAX_WAIT_MS (60 seconds). A pr_debug()
> message is emitted on each busy iteration to aid diagnosis when the
> timeout is hit.
>
> Signed-off-by: Adriano Vero <adri.vero.dev@gmail.com>
>
> Reviewed-by: Sourabh Jain <sourabhjain@linux.ibm.com>

New line between tags is unnecessary.

Including a Changelog in subsequent versions of the same patch is
helpful for the maintainer and reviewers to understand what has changed
in this version compared to the previous version.

For example:
https://lore.kernel.org/all/20260407124349.1698552-1-sourabhjain@linux.ibm.com/

Thanks,
Sourabh Jain

> ---
>   arch/powerpc/platforms/pseries/rtas-fadump.c | 80 ++++++++++++--------
>   arch/powerpc/platforms/pseries/rtas-fadump.h |  6 ++
>   2 files changed, 53 insertions(+), 33 deletions(-)
>
> diff --git a/arch/powerpc/platforms/pseries/rtas-fadump.c b/arch/powerpc/platforms/pseries/rtas-fadump.c
> index eceb3289383e..3bb4ac2ab6cc 100644
> --- a/arch/powerpc/platforms/pseries/rtas-fadump.c
> +++ b/arch/powerpc/platforms/pseries/rtas-fadump.c
> @@ -179,9 +179,42 @@ static u64 rtas_fadump_get_bootmem_min(void)
>   	return RTAS_FADUMP_MIN_BOOT_MEM;
>   }
>   
> +/*
> + * Helper to make an ibm,configure-kernel-dump RTAS call with a bounded
> + * busy-wait loop. Returns the RTAS return code on completion, or
> + * -ETIMEDOUT if firmware keeps returning a busy status beyond
> + * RTAS_FADUMP_MAX_WAIT_MS milliseconds.
> + */
> +static int rtas_fadump_call(struct fw_dump *fadump_conf, int operation,
> +			    void *fdm_ptr, unsigned int fdm_size,
> +			    const char *op_name)
> +{
> +	unsigned int wait_time, total_wait = 0;
> +	int rc;
> +
> +	do {
> +		rc = rtas_call(fadump_conf->ibm_configure_kernel_dump, 3, 1,
> +			       NULL, operation, fdm_ptr, fdm_size);
> +		wait_time = rtas_busy_delay_time(rc);
> +		if (wait_time) {
> +			pr_debug("Firmware busy during fadump %s, waiting %ums (total %ums)\n",
> +				 op_name, wait_time, total_wait);
> +			if (total_wait >= RTAS_FADUMP_MAX_WAIT_MS) {
> +				pr_err("Timed out waiting for firmware to complete fadump %s\n",
> +				       op_name);
> +				return -ETIMEDOUT;
> +			}
> +			total_wait += wait_time;
> +			mdelay(wait_time);
> +		}
> +	} while (wait_time);
> +
> +	return rc;
> +}
> +
>   static int rtas_fadump_register(struct fw_dump *fadump_conf)
>   {
> -	unsigned int wait_time, fdm_size;
> +	unsigned int fdm_size;
>   	int rc, err = -EIO;
>   
>   	/*
> @@ -192,16 +225,10 @@ static int rtas_fadump_register(struct fw_dump *fadump_conf)
>   	fdm_size = sizeof(struct rtas_fadump_section_header);
>   	fdm_size += be16_to_cpu(fdm.header.dump_num_sections) * sizeof(struct rtas_fadump_section);
>   
> -	/* TODO: Add upper time limit for the delay */
> -	do {
> -		rc =  rtas_call(fadump_conf->ibm_configure_kernel_dump, 3, 1,
> -				NULL, FADUMP_REGISTER, &fdm, fdm_size);
> -
> -		wait_time = rtas_busy_delay_time(rc);
> -		if (wait_time)
> -			mdelay(wait_time);
> -
> -	} while (wait_time);
> +	rc = rtas_fadump_call(fadump_conf, FADUMP_REGISTER, &fdm, fdm_size,
> +			      "register");
> +	if (rc == -ETIMEDOUT)
> +		return -ETIMEDOUT;
>   
>   	switch (rc) {
>   	case 0:
> @@ -234,19 +261,12 @@ static int rtas_fadump_register(struct fw_dump *fadump_conf)
>   
>   static int rtas_fadump_unregister(struct fw_dump *fadump_conf)
>   {
> -	unsigned int wait_time;
>   	int rc;
>   
> -	/* TODO: Add upper time limit for the delay */
> -	do {
> -		rc =  rtas_call(fadump_conf->ibm_configure_kernel_dump, 3, 1,
> -				NULL, FADUMP_UNREGISTER, &fdm,
> -				sizeof(struct rtas_fadump_mem_struct));
> -
> -		wait_time = rtas_busy_delay_time(rc);
> -		if (wait_time)
> -			mdelay(wait_time);
> -	} while (wait_time);
> +	rc = rtas_fadump_call(fadump_conf, FADUMP_UNREGISTER, &fdm,
> +			      sizeof(struct rtas_fadump_mem_struct), "unregister");
> +	if (rc == -ETIMEDOUT)
> +		return -ETIMEDOUT;
>   
>   	if (rc) {
>   		pr_err("Failed to un-register - unexpected error(%d).\n", rc);
> @@ -259,19 +279,13 @@ static int rtas_fadump_unregister(struct fw_dump *fadump_conf)
>   
>   static int rtas_fadump_invalidate(struct fw_dump *fadump_conf)
>   {
> -	unsigned int wait_time;
>   	int rc;
>   
> -	/* TODO: Add upper time limit for the delay */
> -	do {
> -		rc =  rtas_call(fadump_conf->ibm_configure_kernel_dump, 3, 1,
> -				NULL, FADUMP_INVALIDATE, fdm_active,
> -				sizeof(struct rtas_fadump_mem_struct));
> -
> -		wait_time = rtas_busy_delay_time(rc);
> -		if (wait_time)
> -			mdelay(wait_time);
> -	} while (wait_time);
> +	rc = rtas_fadump_call(fadump_conf, FADUMP_INVALIDATE,
> +			      (void *)fdm_active,
> +			      sizeof(struct rtas_fadump_mem_struct), "invalidate");
> +	if (rc == -ETIMEDOUT)
> +		return -ETIMEDOUT;
>   
>   	if (rc) {
>   		pr_err("Failed to invalidate - unexpected error (%d).\n", rc);
> diff --git a/arch/powerpc/platforms/pseries/rtas-fadump.h b/arch/powerpc/platforms/pseries/rtas-fadump.h
> index c109abf6befd..65fdab7b5b8d 100644
> --- a/arch/powerpc/platforms/pseries/rtas-fadump.h
> +++ b/arch/powerpc/platforms/pseries/rtas-fadump.h
> @@ -41,6 +41,12 @@
>   #define MAX_SECTIONS				10
>   #define RTAS_FADUMP_MAX_BOOT_MEM_REGS		7
>   
> +/*
> + * Maximum time to wait for firmware to respond to an
> + * ibm,configure-kernel-dump RTAS call before giving up.
> + */
> +#define RTAS_FADUMP_MAX_WAIT_MS			60000U
> +
>   /* Kernel Dump section info */
>   struct rtas_fadump_section {
>   	__be32	request_flag;



^ permalink raw reply

* Re: [PATCH 2/5] ibmvfc: Add NOOP command support
From: Tyrel Datwyler @ 2026-05-07  4:17 UTC (permalink / raw)
  To: davemarq, James E.J. Bottomley, Martin K. Petersen,
	Madhavan Srinivasan, Michael Ellerman, Nicholas Piggin,
	Christophe Leroy (CS GROUP)
  Cc: linux-kernel, linux-scsi, linuxppc-dev, Brian King, Greg Joyce,
	Kyle Mahlkuch
In-Reply-To: <20260408-ibmvfc-fpin-support-v1-2-52b06c464e03@linux.ibm.com>

On 4/8/26 10:07 AM, Dave Marquardt via B4 Relay wrote:
> From: Dave Marquardt <davemarq@linux.ibm.com>

As with patch 1 this requires a slightly more detailed commit log message and
developer sign off tag.

> 
> - Add VFC_NOOP command support
> - Add KUnit tests for VFC_NOOP command
> ---
>  drivers/scsi/ibmvscsi/ibmvfc.c       | 23 +++++++++++++----------
>  drivers/scsi/ibmvscsi/ibmvfc.h       | 13 +++++++++++++
>  drivers/scsi/ibmvscsi/ibmvfc_kunit.c | 27 +++++++++++++++++++++++++++
>  3 files changed, 53 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
> index 3ac376ba2c62..808301fa452d 100644
> --- a/drivers/scsi/ibmvscsi/ibmvfc.c
> +++ b/drivers/scsi/ibmvscsi/ibmvfc.c
> @@ -189,13 +189,6 @@ static long h_reg_sub_crq(unsigned long unit_address, unsigned long ioba,
>  	return rc;
>  }
>  
> -static int ibmvfc_check_caps(struct ibmvfc_host *vhost, unsigned long cap_flags)
> -{
> -	u64 host_caps = be64_to_cpu(vhost->login_buf->resp.capabilities);
> -
> -	return (host_caps & cap_flags) ? 1 : 0;
> -}
> -

It appears you are moving this to ibmvfc.h? Is there reasoning outside making it
visible to kunit?

>  static struct ibmvfc_fcp_cmd_iu *ibmvfc_get_fcp_iu(struct ibmvfc_host *vhost,
>  						   struct ibmvfc_cmd *vfc_cmd)
>  {
> @@ -1512,7 +1505,9 @@ static void ibmvfc_set_login_info(struct ibmvfc_host *vhost)
>  		login_info->flags |= cpu_to_be16(IBMVFC_CLIENT_MIGRATED);
>  
>  	login_info->max_cmds = cpu_to_be32(max_cmds);
> -	login_info->capabilities = cpu_to_be64(IBMVFC_CAN_MIGRATE | IBMVFC_CAN_SEND_VF_WWPN);
> +	login_info->capabilities =
> +		cpu_to_be64(IBMVFC_CAN_MIGRATE | IBMVFC_CAN_SEND_VF_WWPN |
> +			    IBMVFC_CAN_USE_NOOP_CMD);
>  
>  	if (vhost->mq_enabled || vhost->using_channels)
>  		login_info->capabilities |= cpu_to_be64(IBMVFC_CAN_USE_CHANNELS);
> @@ -3461,8 +3456,8 @@ EXPORT_SYMBOL_IF_KUNIT(ibmvfc_handle_async);
>   * @evt_doneq:	Event done queue
>   *
>  **/
> -static void ibmvfc_handle_crq(struct ibmvfc_crq *crq, struct ibmvfc_host *vhost,
> -			      struct list_head *evt_doneq)
> +VISIBLE_IF_KUNIT void ibmvfc_handle_crq(struct ibmvfc_crq *crq, struct ibmvfc_host *vhost,
> +					struct list_head *evt_doneq)
>  {
>  	long rc;
>  	struct ibmvfc_event *evt = (struct ibmvfc_event *)be64_to_cpu(crq->ioba);
> @@ -3520,6 +3515,13 @@ static void ibmvfc_handle_crq(struct ibmvfc_crq *crq, struct ibmvfc_host *vhost,
>  	if (crq->format == IBMVFC_ASYNC_EVENT)
>  		return;
>  
> +	if (crq->format == IBMVFC_VFC_NOOP) {
> +		if (!ibmvfc_check_caps(vhost, IBMVFC_SUPPORT_NOOP_CMD))
> +			dev_err(vhost->dev,
> +				"Received unexpected NOOP command from partner\n");

If we have a misbahaved VIOS partner we may want to ratelimit this dev_err so
that we don't flood the log. Probably a corner case, but I don't think it hurts.

-Tyrel

> +		return;
> +	}
> +
>  	/* The only kind of payload CRQs we should get are responses to
>  	 * things we send. Make sure this response is to something we
>  	 * actually sent




^ permalink raw reply

* Re: [PATCH] powerpc/pseries/iommu: Add TCEs for 16GB pages when RAM is pre-mapped
From: Harsh Prateek Bora @ 2026-05-07  4:44 UTC (permalink / raw)
  To: Gaurav Batra, maddy; +Cc: linuxppc-dev, ritesh.list, donettom, vaibhav, sbhat
In-Reply-To: <b03c0960-ee22-4de1-94bf-c1c9eec757af@linux.ibm.com>



On 07/05/26 12:57 am, Gaurav Batra wrote:
> 
> On 5/5/26 11:04 PM, Harsh Prateek Bora wrote:
>>
>>
>> On 06/05/26 2:31 am, Gaurav Batra wrote:
>>>
>>> On 5/5/26 1:49 PM, Harsh Prateek Bora wrote:
>>>>
>>>>
>>>> On 05/05/26 2:24 am, Gaurav Batra wrote:
>>>>> @@ -2431,7 +2437,7 @@ static int iommu_mem_notifier(struct 
>>>>> notifier_block *nb, unsigned long action,
>>>>>           spin_lock(&dma_win_list_lock);
>>>>>           list_for_each_entry(window, &dma_win_list, list) {
>>>>>               if (window->direct && (arg->start_pfn << PAGE_SHIFT) <
>>>>> -                ddw_memory_hotplug_max()) {
>>>>> +                pseries_ddw_max_ram) {
>>>>>                   ret |= tce_setrange_multi_pSeriesLP(arg->start_pfn,
>>>>>                           arg->nr_pages, window->prop);
>>>>
>>>> I think not only start_pfn, but end_pfn also needs to be within allowed
>>>> range, which may require clamping arg->nr_pages if crossing the limits.
>>> The reason to only check for start_pfn is because the range given 
>>> will either be in the RAM or pmemory. It can never cross the boundary
>>
>> Usually as an API, we do not trust the caller, hence the check for
>> start_pfn. However, if at all we need to go ahead with that assumption,
>> may be document that as a code comment and/or in the commit log also?
> 
> I will document it. I am wondering, if instead of capping it to 
> start+nrpages,
> 
> maybe, return an error. If in the future, we get an error, this needs to be
> 
> investigated.
> 

I am not sure erroring out for crossing RAM/pmem boundary is a right
thing to do here if it can hinder persistent memory onlining (since
there is an existing code comment which says it can get called when
onlining persistent memory as well). In that case, a WARN may be a
better choice.


>>
>>>>
>>>>>               }
>>>>> @@ -2444,7 +2450,7 @@ static int iommu_mem_notifier(struct 
>>>>> notifier_block *nb, unsigned long action,
>>>>>           spin_lock(&dma_win_list_lock);
>>>>>           list_for_each_entry(window, &dma_win_list, list) {
>>>>>               if (window->direct && (arg->start_pfn << PAGE_SHIFT) <
>>>>> -                ddw_memory_hotplug_max()) {
>>>>> +                pseries_ddw_max_ram) {
>>>>>                   ret |= tce_clearrange_multi_pSeriesLP(arg- 
>>>>> >start_pfn,
>>>>>                           arg->nr_pages, window->prop);
>>>>>               }
>>>>
>>



^ permalink raw reply

* Re: [PATCH 3/5] ibmvfc: make ibmvfc login to fabric
From: Tyrel Datwyler @ 2026-05-07  5:03 UTC (permalink / raw)
  To: davemarq, James E.J. Bottomley, Martin K. Petersen,
	Madhavan Srinivasan, Michael Ellerman, Nicholas Piggin,
	Christophe Leroy (CS GROUP)
  Cc: linux-kernel, linux-scsi, linuxppc-dev, Brian King, Greg Joyce,
	Kyle Mahlkuch
In-Reply-To: <20260408-ibmvfc-fpin-support-v1-3-52b06c464e03@linux.ibm.com>

On 4/8/26 10:07 AM, Dave Marquardt via B4 Relay wrote:
> From: Dave Marquardt <davemarq@linux.ibm.com>
> 
> Make ibmvfc login to fabric when NPIV login returns SUPPORT_SCSI or
> SUPPORT_NVMEOF capabilities.

Again better commit log message here and developer sign off tag.

> ---
>  drivers/scsi/ibmvscsi/ibmvfc.c | 100 ++++++++++++++++++++++++++++++++++++++---
>  drivers/scsi/ibmvscsi/ibmvfc.h |  20 +++++++++
>  2 files changed, 115 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
> index 808301fa452d..803fc3caa14d 100644
> --- a/drivers/scsi/ibmvscsi/ibmvfc.c
> +++ b/drivers/scsi/ibmvscsi/ibmvfc.c
> @@ -5205,6 +5205,89 @@ static void ibmvfc_discover_targets(struct ibmvfc_host *vhost)
>  		ibmvfc_link_down(vhost, IBMVFC_LINK_DEAD);
>  }
>  
> +static void ibmvfc_fabric_login_done(struct ibmvfc_event *evt)
> +{
> +	struct ibmvfc_fabric_login *rsp = &evt->xfer_iu->fabric_login;
> +	u32 mad_status = be16_to_cpu(rsp->common.status);
> +	struct ibmvfc_host *vhost = evt->vhost;
> +	int level = IBMVFC_DEFAULT_LOG_LEVEL;
> +
> +	ENTER;
> +
> +	switch (mad_status) {
> +	case IBMVFC_MAD_SUCCESS:
> +		vhost->logged_in = 1;

I'm not sure I see the point of setting logged_in here since we already set it
in npiv_login_done.

> +		vhost->fabric_capabilities = rsp->capabilities;

The way this is currently spec'd out there are no linux relevant capabilities
coming from fabric login. So, I'm not sure there is a reason to save them at
this point.

> +		fc_host_port_id(vhost->host) = be64_to_cpu(rsp->nport_id);
> +		ibmvfc_free_event(evt);
> +		break;
> +
> +	case IBMVFC_MAD_FAILED:
> +		if (ibmvfc_retry_cmd(be16_to_cpu(rsp->status), be16_to_cpu(rsp->error)))
> +			level += ibmvfc_retry_host_init(vhost);
> +		else
> +			ibmvfc_link_down(vhost, IBMVFC_LINK_DEAD);
> +		ibmvfc_log(vhost, level, "Fabric Login failed: %s (%x:%x)\n",
> +			   ibmvfc_get_cmd_error(be16_to_cpu(rsp->status), be16_to_cpu(rsp->error)),
> +						be16_to_cpu(rsp->status), be16_to_cpu(rsp->error));
> +		ibmvfc_free_event(evt);
> +		LEAVE;
> +		return;
> +
> +	case IBMVFC_MAD_CRQ_ERROR:
> +		ibmvfc_retry_host_init(vhost);
> +		fallthrough;
> +
> +	case IBMVFC_MAD_DRIVER_FAILED:
> +		ibmvfc_free_event(evt);
> +		LEAVE;
> +		return;
> +
> +	default:
> +		dev_err(vhost->dev, "Invalid fabric Login response: 0x%x\n", mad_status);
> +		ibmvfc_link_down(vhost, IBMVFC_LINK_DEAD);
> +		ibmvfc_free_event(evt);
> +		LEAVE;
> +		return;
> +	}
> +
> +	ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_QUERY);
> +	wake_up(&vhost->work_wait_q);
> +
> +	LEAVE;
> +}
> +
> +static void ibmvfc_fabric_login(struct ibmvfc_host *vhost)
> +{
> +	struct ibmvfc_fabric_login *mad;
> +	struct ibmvfc_event *evt = ibmvfc_get_reserved_event(&vhost->crq);
> +	int level = IBMVFC_DEFAULT_LOG_LEVEL;
> +
> +	if (!evt) {

I think we need to hard reset here or we are dead in the water if there are no
events.

> +		ibmvfc_log(vhost, level, "Fabric Login failed: no available events\n");
> +		return;
> +	}
> +
> +	ibmvfc_init_event(evt, ibmvfc_fabric_login_done, IBMVFC_MAD_FORMAT);
> +	mad = &evt->iu.fabric_login;
> +	memset(mad, 0, sizeof(*mad));
> +	if (vhost->scsi_scrqs.protocol == IBMVFC_PROTO_SCSI)
> +		mad->common.opcode = cpu_to_be32(IBMVFC_FABRIC_LOGIN);
> +	else if (vhost->scsi_scrqs.protocol == IBMVFC_PROTO_NVME)
> +		mad->common.opcode = cpu_to_be32(IBMVFC_NVMF_FABRIC_LOGIN);

The VIOS won't return NVMF support unless we advertise it. So, I think its best
to omit any NVMF releveant changes that are spec'd as they aren't being applied
in a proper workflow here anyways. If the driver advertised both SCSI and NVMF
support the current code would never do a NVMF fabric login as it would never
fall through here.

> +	else {
> +		ibmvfc_log(vhost, level, "Fabric Login failed: unknown protocol\n");
> +		return;
> +	}
> +	mad->common.version = cpu_to_be32(1);
> +	mad->common.length = cpu_to_be16(sizeof(*mad));
> +
> +	ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_INIT_WAIT);
> +
> +	if (ibmvfc_send_event(evt, vhost, default_timeout))
> +		ibmvfc_link_down(vhost, IBMVFC_LINK_DOWN);
> +}
> +
>  static void ibmvfc_channel_setup_done(struct ibmvfc_event *evt)
>  {
>  	struct ibmvfc_host *vhost = evt->vhost;
> @@ -5251,8 +5334,12 @@ static void ibmvfc_channel_setup_done(struct ibmvfc_event *evt)
>  		return;
>  	}
>  
> -	ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_QUERY);
> -	wake_up(&vhost->work_wait_q);
> +	if (ibmvfc_check_caps(vhost, (IBMVFC_SUPPORT_SCSI | IBMVFC_SUPPORT_NVMEOF))) {
> +		ibmvfc_fabric_login(vhost);

Again drop the NVMEOF code.

> +	} else {
> +		ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_QUERY);
> +		wake_up(&vhost->work_wait_q);
> +	}
>  }
>  
>  static void ibmvfc_channel_setup(struct ibmvfc_host *vhost)
> @@ -5443,9 +5530,12 @@ static void ibmvfc_npiv_login_done(struct ibmvfc_event *evt)
>  	vhost->host->can_queue = be32_to_cpu(rsp->max_cmds) - IBMVFC_NUM_INTERNAL_REQ;
>  	vhost->host->max_sectors = npiv_max_sectors;
>  
> -	if (ibmvfc_check_caps(vhost, IBMVFC_CAN_SUPPORT_CHANNELS) && vhost->do_enquiry) {
> -		ibmvfc_channel_enquiry(vhost);
> -	} else {
> +	if (ibmvfc_check_caps(vhost, IBMVFC_CAN_SUPPORT_CHANNELS)) {
> +		if (vhost->do_enquiry)
> +			ibmvfc_channel_enquiry(vhost);

I'm not sure I understand expanding this code to a second if block as there is
no functional change.

> +	} else if (ibmvfc_check_caps(vhost, (IBMVFC_SUPPORT_SCSI | IBMVFC_SUPPORT_NVMEOF)))

Again drop NVMEOF and NVMF related changes.

> +		ibmvfc_fabric_login(vhost);
> +	else {
>  		vhost->do_enquiry = 0;
>  		ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_QUERY);
>  		wake_up(&vhost->work_wait_q);
> diff --git a/drivers/scsi/ibmvscsi/ibmvfc.h b/drivers/scsi/ibmvscsi/ibmvfc.h
> index cd0917f70c6d..4f680c5d9558 100644
> --- a/drivers/scsi/ibmvscsi/ibmvfc.h
> +++ b/drivers/scsi/ibmvscsi/ibmvfc.h
> @@ -138,6 +138,8 @@ enum ibmvfc_mad_types {
>  	IBMVFC_CHANNEL_ENQUIRY	= 0x1000,
>  	IBMVFC_CHANNEL_SETUP	= 0x2000,
>  	IBMVFC_CONNECTION_INFO	= 0x4000,
> +	IBMVFC_FABRIC_LOGIN	= 0x8000,
> +	IBMVFC_NVMF_FABRIC_LOGIN	= 0x8001,
>  };
>  
>  struct ibmvfc_mad_common {
> @@ -227,6 +229,8 @@ struct ibmvfc_npiv_login_resp {
>  #define IBMVFC_MAD_VERSION_CAP		0x20
>  #define IBMVFC_HANDLE_VF_WWPN		0x40
>  #define IBMVFC_CAN_SUPPORT_CHANNELS	0x80
> +#define IBMVFC_SUPPORT_NVMEOF		0x100
> +#define IBMVFC_SUPPORT_SCSI		0x200
>  #define IBMVFC_SUPPORT_NOOP_CMD		0x1000
>  	__be32 max_cmds;
>  	__be32 scsi_id_sz;
> @@ -590,6 +594,19 @@ struct ibmvfc_connection_info {
>  	__be64 reserved[16];
>  } __packed __aligned(8);
>  
> +struct ibmvfc_fabric_login {
> +	struct ibmvfc_mad_common common;
> +	__be64 flags;
> +#define IBMVFC_STRIP_MERGE	0x1
> +#define IBMVFC_LINK_COMMANDS	0x2
> +	__be64 capabilities;
> +	__be64 nport_id;
> +	__be16 status;
> +	__be16 error;
> +	__be32 pad;
> +	__be64 reserved[16];
> +} __packed __aligned(8);
> +
>  struct ibmvfc_trace_start_entry {
>  	u32 xfer_len;
>  } __packed;
> @@ -709,6 +726,7 @@ union ibmvfc_iu {
>  	struct ibmvfc_channel_enquiry channel_enquiry;
>  	struct ibmvfc_channel_setup_mad channel_setup;
>  	struct ibmvfc_connection_info connection_info;
> +	struct ibmvfc_fabric_login fabric_login;
>  } __packed __aligned(8);
>  
>  enum ibmvfc_target_action {
> @@ -921,6 +939,8 @@ struct ibmvfc_host {
>  	struct work_struct rport_add_work_q;
>  	wait_queue_head_t init_wait_q;
>  	wait_queue_head_t work_wait_q;
> +	__be64 fabric_capabilities;
> +	unsigned int login_cap_index;

Lets drop these as they serve no purpose for Linux. If the spec changes to
introduce capabilites releveant to Linux we can add it then.

-Tyrel

>  };
>  
>  #define DBG_CMD(CMD) do { if (ibmvfc_debug) CMD; } while (0)
> 



^ permalink raw reply

* Re: [PATCH v2] KVM: PPC: Book3S HV: Add H_FAC_UNAVAIL mapping for tracing exits
From: Harsh Prateek Bora @ 2026-05-07  5:24 UTC (permalink / raw)
  To: Gautam Menghani, maddy, npiggin, mpe, chleroy
  Cc: Gautam Menghani, linuxppc-dev, kvm, linux-kernel
In-Reply-To: <20260428084534.49781-1-Gautam.Menghani@ibm.com>



On 28/04/26 2:15 pm, Gautam Menghani wrote:
> From: Gautam Menghani <gautam@linux.ibm.com>
> 
> The macro kvm_trace_symbol_exit is used for providing the mappings
> for the trap vectors and their names. Add mapping for H_FAC_UNAVAIL so that
> trap reason is displayed as string instead of a vector number when using
> the kvm_guest_exit tracepoint.
> 
> Signed-off-by: Gautam Menghani <gautam@linux.ibm.com>
> ---
> v2:
> 1. Remove the trailing comma after last element
> 
>   arch/powerpc/kvm/trace_book3s.h | 3 ++-
>   1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/powerpc/kvm/trace_book3s.h b/arch/powerpc/kvm/trace_book3s.h
> index 9260ddbd557f..5d272c115331 100644
> --- a/arch/powerpc/kvm/trace_book3s.h
> +++ b/arch/powerpc/kvm/trace_book3s.h
> @@ -28,6 +28,7 @@
>   	{0xea0, "H_VIRT"}, \
>   	{0xf00, "PERFMON"}, \
>   	{0xf20, "ALTIVEC"}, \
> -	{0xf40, "VSX"}
> +	{0xf40, "VSX"}, \
> +	{0xf80, "H_FAC_UNAVAIL"}

Reviewed-by: Harsh Prateek Bora <harshpb@linux.ibm.com>


>   
>   #endif



^ permalink raw reply

* Re: [PATCH 4/5] ibmvfc: use async sub-queue for FPIN messages
From: Tyrel Datwyler @ 2026-05-07  5:41 UTC (permalink / raw)
  To: davemarq, James E.J. Bottomley, Martin K. Petersen,
	Madhavan Srinivasan, Michael Ellerman, Nicholas Piggin,
	Christophe Leroy (CS GROUP)
  Cc: linux-kernel, linux-scsi, linuxppc-dev, Brian King, Greg Joyce,
	Kyle Mahlkuch
In-Reply-To: <20260408-ibmvfc-fpin-support-v1-4-52b06c464e03@linux.ibm.com>

On 4/8/26 10:07 AM, Dave Marquardt via B4 Relay wrote:
> From: Dave Marquardt <davemarq@linux.ibm.com>
> 
> - allocate async sub-queue
> - allocate interrupt and set up handler
> - negotiate use of async sub-queue with NPIV (VIOS)
> - refactor ibmvfc_basic_fpin_to_desc() and ibmvfc_full_fpin_to_desc()
>   into common routine
> - add KUnit test to verify async sub-queue is allocated

Again more descriptive commit log message required here. Also, this looks like a
lot of things being implmented. Can this be broken into multiple patches? It
sure looks like a bunch of functional changes that build on each other.

> ---
>  drivers/scsi/ibmvscsi/ibmvfc.c       | 325 ++++++++++++++++++++++++++++++++---
>  drivers/scsi/ibmvscsi/ibmvfc.h       |  29 +++-
>  drivers/scsi/ibmvscsi/ibmvfc_kunit.c |  52 +++---
>  3 files changed, 363 insertions(+), 43 deletions(-)
> 
> diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
> index 803fc3caa14d..26e39b367022 100644
> --- a/drivers/scsi/ibmvscsi/ibmvfc.c
> +++ b/drivers/scsi/ibmvscsi/ibmvfc.c
> @@ -1471,6 +1471,13 @@ static void ibmvfc_gather_partition_info(struct ibmvfc_host *vhost)
>  	of_node_put(rootdn);
>  }
>  
> +static __be64 ibmvfc_npiv_chan_caps[] = {
> +	cpu_to_be64(IBMVFC_CAN_USE_CHANNELS | IBMVFC_USE_ASYNC_SUBQ |
> +		    IBMVFC_YES_SCSI | IBMVFC_CAN_HANDLE_FPIN),
> +	cpu_to_be64(IBMVFC_CAN_USE_CHANNELS),
> +};
> +#define IBMVFC_NPIV_CHAN_CAPS_SIZE (sizeof(ibmvfc_npiv_chan_caps)/sizeof(__be64))
> +

I really don't understand what you are doing here? You seem to be definig
various sets of capabilities, but how does the driver decide which set to use?
As far as I can tell the index is increased and the capabilities decrease each
time a transport event is received. This looks like maybe its just a testing hack.

>  /**
>   * ibmvfc_set_login_info - Setup info for NPIV login
>   * @vhost:	ibmvfc host struct
> @@ -1486,6 +1493,8 @@ static void ibmvfc_set_login_info(struct ibmvfc_host *vhost)
>  	const char *location;
>  	u16 max_cmds;
>  
> +	ENTER;
> +
>  	max_cmds = scsi_qdepth + IBMVFC_NUM_INTERNAL_REQ;
>  	if (mq_enabled)
>  		max_cmds += (scsi_qdepth + IBMVFC_NUM_INTERNAL_SUBQ_REQ) *
> @@ -1509,8 +1518,12 @@ static void ibmvfc_set_login_info(struct ibmvfc_host *vhost)
>  		cpu_to_be64(IBMVFC_CAN_MIGRATE | IBMVFC_CAN_SEND_VF_WWPN |
>  			    IBMVFC_CAN_USE_NOOP_CMD);
>  
> -	if (vhost->mq_enabled || vhost->using_channels)
> -		login_info->capabilities |= cpu_to_be64(IBMVFC_CAN_USE_CHANNELS);
> +	if (vhost->mq_enabled || vhost->using_channels) {
> +		if (vhost->login_cap_index >= IBMVFC_NPIV_CHAN_CAPS_SIZE)
> +			login_info->capabilities |= cpu_to_be64(IBMVFC_CAN_USE_CHANNELS);
> +		else
> +			login_info->capabilities |= ibmvfc_npiv_chan_caps[vhost->login_cap_index];
> +	}
>  
>  	login_info->async.va = cpu_to_be64(vhost->async_crq.msg_token);
>  	login_info->async.len = cpu_to_be32(async_crq->size *
> @@ -1524,6 +1537,8 @@ static void ibmvfc_set_login_info(struct ibmvfc_host *vhost)
>  	location = of_get_property(of_node, "ibm,loc-code", NULL);
>  	location = location ? location : dev_name(vhost->dev);
>  	strscpy(login_info->drc_name, location, sizeof(login_info->drc_name));
> +
> +	LEAVE;
>  }
>  
>  /**
> @@ -3323,7 +3338,7 @@ ibmvfc_common_fpin_to_desc(u8 fpin_status, __be64 wwpn, __be16 modifier,
>   * non-NULL - pointer to populated struct fc_els_fpin
>   */
>  static struct fc_els_fpin *
> -/*XXX*/ibmvfc_basic_fpin_to_desc(struct ibmvfc_async_crq *crq)

I mentioned this /*XXX*/ in an earlier patch. This needs to be fixed in that patch.

> +ibmvfc_basic_fpin_to_desc(struct ibmvfc_async_crq *crq)
>  {
>  	return ibmvfc_common_fpin_to_desc(crq->fpin_status, crq->wwpn,
>  					  cpu_to_be16(0),
> @@ -3332,6 +3347,29 @@ static struct fc_els_fpin *
>  					  cpu_to_be32(1));
>  }
>  
> +/**
> + * ibmvfc_full_fpin_to_desc(): allocate and populate a struct fc_els_fpin struct
> + * containing a descriptor.
> + * @ibmvfc_fpin: Pointer to async subq FPIN data
> + *
> + * Allocate a struct fc_els_fpin containing a descriptor and populate
> + * based on data from *ibmvfc_fpin.
> + *
> + * Return:
> + * NULL     - unable to allocate structure
> + * non-NULL - pointer to populated struct fc_els_fpin
> + */
> +static struct fc_els_fpin *
> +ibmvfc_full_fpin_to_desc(struct ibmvfc_async_subq *ibmvfc_fpin)
> +{
> +	return ibmvfc_common_fpin_to_desc(ibmvfc_fpin->fpin_status,
> +					  ibmvfc_fpin->wwpn,
> +					  cpu_to_be16(0),
> +					  cpu_to_be32(IBMVFC_FPIN_DEFAULT_EVENT_PERIOD),
> +					  cpu_to_be32(IBMVFC_FPIN_DEFAULT_EVENT_THRESHOLD),
> +					  cpu_to_be32(1));
> +}
> +
>  /**
>   * ibmvfc_handle_async - Handle an async event from the adapter
>   * @crq:	crq to process
> @@ -3449,6 +3487,120 @@ VISIBLE_IF_KUNIT void ibmvfc_handle_async(struct ibmvfc_async_crq *crq,
>  }
>  EXPORT_SYMBOL_IF_KUNIT(ibmvfc_handle_async);
>  
> +VISIBLE_IF_KUNIT void ibmvfc_handle_asyncq(struct ibmvfc_crq *crq_instance,
> +					   struct ibmvfc_host *vhost,
> +					   struct list_head *evt_doneq)
> +{
> +	struct ibmvfc_async_subq *crq = (struct ibmvfc_async_subq *)crq_instance;
> +	const struct ibmvfc_async_desc *desc = ibmvfc_get_ae_desc(be16_to_cpu(crq->event));
> +	struct ibmvfc_target *tgt;
> +	struct fc_els_fpin *fpin;
> +
> +	ibmvfc_log(vhost, desc->log_level,
> +		   "%s event received. wwpn: %llx, node_name: %llx%s event 0x%x\n",
> +		   desc->desc, be64_to_cpu(crq->wwpn), be64_to_cpu(crq->id.node_name),
> +		   ibmvfc_get_link_state(crq->link_state), be16_to_cpu(crq->event));

Was there no way to not copy/paste what looks like basically ibmvfc_handle_async
into ibmvfc_handle_asyncq? This is a bunch of unnecessary code bloat. The major
difference seems that crq->event is be64 on the standard CRQ and be16 on a
sub-crq and accessing certain fields differently.

Again I think maybe we need to consider moving all the async work into a workqueue.

> +
> +	switch (be16_to_cpu(crq->event)) {
> +	case IBMVFC_AE_RESUME:
> +		switch (crq->link_state) {
> +		case IBMVFC_AE_LS_LINK_DOWN:
> +			ibmvfc_link_down(vhost, IBMVFC_LINK_DOWN);
> +			break;
> +		case IBMVFC_AE_LS_LINK_DEAD:
> +			ibmvfc_link_down(vhost, IBMVFC_LINK_DEAD);
> +			break;
> +		case IBMVFC_AE_LS_LINK_UP:
> +		case IBMVFC_AE_LS_LINK_BOUNCED:
> +		default:
> +			vhost->events_to_log |= IBMVFC_AE_LINKUP;
> +			vhost->delay_init = 1;
> +			__ibmvfc_reset_host(vhost);
> +			break;
> +		}
> +
> +		break;
> +	case IBMVFC_AE_LINK_UP:
> +		vhost->events_to_log |= IBMVFC_AE_LINKUP;
> +		vhost->delay_init = 1;
> +		__ibmvfc_reset_host(vhost);
> +		break;
> +	case IBMVFC_AE_SCN_FABRIC:
> +	case IBMVFC_AE_SCN_DOMAIN:
> +		vhost->events_to_log |= IBMVFC_AE_RSCN;
> +		if (vhost->state < IBMVFC_HALTED) {
> +			vhost->delay_init = 1;
> +			__ibmvfc_reset_host(vhost);
> +		}
> +		break;
> +	case IBMVFC_AE_SCN_NPORT:
> +	case IBMVFC_AE_SCN_GROUP:
> +		vhost->events_to_log |= IBMVFC_AE_RSCN;
> +		ibmvfc_reinit_host(vhost);
> +		break;
> +	case IBMVFC_AE_ELS_LOGO:
> +	case IBMVFC_AE_ELS_PRLO:
> +	case IBMVFC_AE_ELS_PLOGI:
> +		list_for_each_entry(tgt, &vhost->targets, queue) {
> +			if (!crq->wwpn && !crq->id.node_name)
> +				break;
> +	#ifdef notyet
> +			if (cpu_to_be64(tgt->scsi_id) != acrq->scsi_id)
> +				continue;
> +	#endif
> +			if (crq->wwpn && cpu_to_be64(tgt->ids.port_name) != crq->wwpn)
> +				continue;
> +			if (crq->id.node_name &&
> +			    cpu_to_be64(tgt->ids.node_name) != crq->id.node_name)
> +				continue;
> +			if (tgt->need_login && be64_to_cpu(crq->event) == IBMVFC_AE_ELS_LOGO)
> +				tgt->logo_rcvd = 1;
> +			if (!tgt->need_login || be64_to_cpu(crq->event) == IBMVFC_AE_ELS_PLOGI) {
> +				ibmvfc_del_tgt(tgt);
> +				ibmvfc_reinit_host(vhost);
> +			}
> +		}
> +		break;
> +	case IBMVFC_AE_LINK_DOWN:
> +	case IBMVFC_AE_ADAPTER_FAILED:
> +		ibmvfc_link_down(vhost, IBMVFC_LINK_DOWN);
> +		break;
> +	case IBMVFC_AE_LINK_DEAD:
> +		ibmvfc_link_down(vhost, IBMVFC_LINK_DEAD);
> +		break;
> +	case IBMVFC_AE_HALT:
> +		ibmvfc_link_down(vhost, IBMVFC_HALTED);
> +		break;
> +	case IBMVFC_AE_FPIN:
> +		if (!crq->wwpn && !crq->id.node_name)
> +			break;
> +		list_for_each_entry(tgt, &vhost->targets, queue) {
> +			if (crq->wwpn && cpu_to_be64(tgt->ids.port_name) != crq->wwpn)
> +				continue;
> +			if (crq->id.node_name &&
> +			    cpu_to_be64(tgt->ids.node_name) != crq->id.node_name)
> +				continue;
> +			if (!tgt->rport)
> +				continue;
> +			fpin = ibmvfc_full_fpin_to_desc(crq);
> +			if (fpin) {
> +				fc_host_fpin_rcv(tgt->vhost->host,
> +						 sizeof(*fpin) + be32_to_cpu(fpin->desc_len),
> +						 (char *)fpin, 0);
> +				kfree(fpin);
> +			} else
> +				dev_err(vhost->dev,
> +					"FPIN event %u received, unable to process\n",
> +					crq->fpin_status);
> +		}
> +		break;
> +	default:
> +		dev_err(vhost->dev, "Unknown async event received: %d\n", crq->event);
> +		break;
> +	}
> +}
> +EXPORT_SYMBOL_IF_KUNIT(ibmvfc_handle_asyncq);
> +
>  /**
>   * ibmvfc_handle_crq - Handles and frees received events in the CRQ
>   * @crq:	Command/Response queue
> @@ -3500,6 +3652,7 @@ VISIBLE_IF_KUNIT void ibmvfc_handle_crq(struct ibmvfc_crq *crq, struct ibmvfc_ho
>  			dev_err(vhost->dev, "Host partner adapter deregistered or failed (rc=%d)\n", crq->format);
>  			ibmvfc_purge_requests(vhost, DID_ERROR);
>  			ibmvfc_link_down(vhost, IBMVFC_LINK_DOWN);
> +			vhost->login_cap_index++;
>  			ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_RESET);
>  		} else {
>  			dev_err(vhost->dev, "Received unknown transport event from partner (rc=%d)\n", crq->format);
> @@ -4078,6 +4231,13 @@ static void ibmvfc_handle_scrq(struct ibmvfc_crq *crq, struct ibmvfc_host *vhost
>  	spin_unlock(&evt->queue->l_lock);
>  }
>  
> +/**
> + * ibmvfc_next_scrq - Returns the next entry in message subqueue
> + * @scrq:	Pointer to message subqueue
> + *
> + * Returns:
> + *	Pointer to next entry in queue / NULL if empty
> + **/
>  static struct ibmvfc_crq *ibmvfc_next_scrq(struct ibmvfc_queue *scrq)
>  {
>  	struct ibmvfc_crq *crq;
> @@ -4093,6 +4253,65 @@ static struct ibmvfc_crq *ibmvfc_next_scrq(struct ibmvfc_queue *scrq)
>  	return crq;
>  }
>  
> +static void ibmvfc_drain_async_subq(struct ibmvfc_queue *scrq)
> +{
> +	struct ibmvfc_crq *crq;
> +	struct ibmvfc_event *evt, *temp;
> +	unsigned long flags;
> +	int done = 0;
> +	LIST_HEAD(evt_doneq);
> +
> +	ENTER;
> +
> +	spin_lock_irqsave(scrq->q_lock, flags);
> +	while (!done) {
> +		while ((crq = ibmvfc_next_scrq(scrq)) != NULL) {
> +			ibmvfc_handle_asyncq(crq, scrq->vhost, &evt_doneq);
> +			crq->valid = 0;
> +			wmb();
> +		}
> +
> +		ibmvfc_toggle_scrq_irq(scrq, 1);
> +		crq = ibmvfc_next_scrq(scrq);
> +		if (crq != NULL) {
> +			ibmvfc_toggle_scrq_irq(scrq, 0);
> +			ibmvfc_handle_asyncq(crq, scrq->vhost, &evt_doneq);
> +			crq->valid = 0;
> +			wmb();
> +		} else
> +			done = 1;
> +	}
> +	spin_unlock_irqrestore(scrq->q_lock, flags);
> +
> +	list_for_each_entry_safe(evt, temp, &evt_doneq, queue_list) {
> +		timer_delete(&evt->timer);
> +		list_del(&evt->queue_list);
> +		ibmvfc_trc_end(evt);
> +		evt->done(evt);
> +	}
> +	LEAVE;
> +}
> +
> +/**
> + * ibmvfc_interrupt_asyncq - Handle an async event from the adapter
> + * @irq:           interrupt request
> + * @scrq_instance: async subq
> + *
> + **/
> +static irqreturn_t ibmvfc_interrupt_asyncq(int irq, void *scrq_instance)
> +{
> +	struct ibmvfc_queue *scrq = (struct ibmvfc_queue *)scrq_instance;
> +
> +	ENTER;
> +
> +	ibmvfc_toggle_scrq_irq(scrq, 0);
> +	ibmvfc_drain_async_subq(scrq);
> +
> +	LEAVE;
> +
> +	return IRQ_HANDLED;
> +}
> +
>  static void ibmvfc_drain_sub_crq(struct ibmvfc_queue *scrq)
>  {
>  	struct ibmvfc_crq *crq;
> @@ -5316,6 +5535,8 @@ static void ibmvfc_channel_setup_done(struct ibmvfc_event *evt)
>  			for (i = 0; i < active_queues; i++)
>  				scrqs->scrqs[i].vios_cookie =
>  					be64_to_cpu(setup->channel_handles[i]);
> +			scrqs->async_scrq->vios_cookie =
> +				be64_to_cpu(setup->asyncSubqHandle);
>  
>  			ibmvfc_dbg(vhost, "Using %u channels\n",
>  				   vhost->scsi_scrqs.active_queues);
> @@ -5366,6 +5587,7 @@ static void ibmvfc_channel_setup(struct ibmvfc_host *vhost)
>  		setup_buf->num_scsi_subq_channels = cpu_to_be32(num_channels);
>  		for (i = 0; i < num_channels; i++)
>  			setup_buf->channel_handles[i] = cpu_to_be64(scrqs->scrqs[i].cookie);
> +		setup_buf->asyncSubqHandle = cpu_to_be64(scrqs->async_scrq->cookie);
>  	}
>  
>  	ibmvfc_init_event(evt, ibmvfc_channel_setup_done, IBMVFC_MAD_FORMAT);
> @@ -5461,6 +5683,8 @@ static void ibmvfc_npiv_login_done(struct ibmvfc_event *evt)
>  	unsigned int npiv_max_sectors;
>  	int level = IBMVFC_DEFAULT_LOG_LEVEL;
>  
> +	ENTER;
> +
>  	switch (mad_status) {
>  	case IBMVFC_MAD_SUCCESS:
>  		ibmvfc_free_event(evt);
> @@ -5540,6 +5764,8 @@ static void ibmvfc_npiv_login_done(struct ibmvfc_event *evt)
>  		ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_QUERY);
>  		wake_up(&vhost->work_wait_q);
>  	}
> +
> +	LEAVE;
>  }
>  
>  /**
> @@ -6188,14 +6414,26 @@ static int ibmvfc_init_crq(struct ibmvfc_host *vhost)
>  	return retrc;
>  }
>  
> -static int ibmvfc_register_channel(struct ibmvfc_host *vhost,
> -				   struct ibmvfc_channels *channels,
> -				   int index)
> +static inline char *ibmvfc_channel_index(struct ibmvfc_channels *channels,
> +					 struct ibmvfc_queue *scrq,
> +					 char *buf, size_t bufsize)
> +{
> +	if (scrq < channels->scrqs || scrq >= channels->scrqs + channels->active_queues)
> +		strscpy(buf, "async", 6);
> +	else
> +		snprintf(buf, bufsize, "%ld", scrq - channels->scrqs);
> +	return buf;
> +}
> +
> +static int ibmvfc_register_channel_handler(struct ibmvfc_host *vhost,
> +					   struct ibmvfc_channels *channels,
> +					   struct ibmvfc_queue *scrq,
> +					   irq_handler_t irq)
>  {
>  	struct device *dev = vhost->dev;
>  	struct vio_dev *vdev = to_vio_dev(dev);
> -	struct ibmvfc_queue *scrq = &channels->scrqs[index];
>  	int rc = -ENOMEM;
> +	char buf[16];
>  
>  	ENTER;
>  
> @@ -6214,20 +6452,23 @@ static int ibmvfc_register_channel(struct ibmvfc_host *vhost,
>  
>  	if (!scrq->irq) {
>  		rc = -EINVAL;
> -		dev_err(dev, "Error mapping sub-crq[%d] irq\n", index);
> +		dev_err(dev, "Error mapping sub-crq[%s] irq\n",
> +			ibmvfc_channel_index(channels, scrq, buf, sizeof(buf)));
>  		goto irq_failed;
>  	}
>  
>  	switch (channels->protocol) {
>  	case IBMVFC_PROTO_SCSI:
> -		snprintf(scrq->name, sizeof(scrq->name), "ibmvfc-%x-scsi%d",
> -			 vdev->unit_address, index);
> -		scrq->handler = ibmvfc_interrupt_mq;
> +		snprintf(scrq->name, sizeof(scrq->name), "ibmvfc-%x-scsi%s",
> +			 vdev->unit_address,
> +			 ibmvfc_channel_index(channels, scrq, buf, sizeof(buf)));
> +		scrq->handler = irq;
>  		break;
>  	case IBMVFC_PROTO_NVME:
> -		snprintf(scrq->name, sizeof(scrq->name), "ibmvfc-%x-nvmf%d",
> -			 vdev->unit_address, index);
> -		scrq->handler = ibmvfc_interrupt_mq;
> +		snprintf(scrq->name, sizeof(scrq->name), "ibmvfc-%x-nvmf%s",
> +			 vdev->unit_address,
> +			 ibmvfc_channel_index(channels, scrq, buf, sizeof(buf)));
> +		scrq->handler = irq;
>  		break;
>  	default:
>  		dev_err(dev, "Unknown channel protocol (%d)\n",
> @@ -6238,12 +6479,14 @@ static int ibmvfc_register_channel(struct ibmvfc_host *vhost,
>  	rc = request_irq(scrq->irq, scrq->handler, 0, scrq->name, scrq);
>  
>  	if (rc) {
> -		dev_err(dev, "Couldn't register sub-crq[%d] irq\n", index);
> +		dev_err(dev, "Couldn't register sub-crq[%s] irq\n",
> +			ibmvfc_channel_index(channels, scrq, buf, sizeof(buf)));
>  		irq_dispose_mapping(scrq->irq);
>  		goto irq_failed;
>  	}
>  
> -	scrq->hwq_id = index;
> +	if (scrq >= channels->scrqs && scrq < channels->scrqs + channels->active_queues)
> +		scrq->hwq_id = scrq - channels->scrqs;
>  
>  	LEAVE;
>  	return 0;
> @@ -6257,13 +6500,21 @@ static int ibmvfc_register_channel(struct ibmvfc_host *vhost,
>  	return rc;
>  }
>  
> +static inline int
> +ibmvfc_register_channel(struct ibmvfc_host *vhost,
> +			struct ibmvfc_channels *channels,
> +			struct ibmvfc_queue *scrq)
> +{
> +	return ibmvfc_register_channel_handler(vhost, channels, scrq, ibmvfc_interrupt_mq);
> +}
> +
>  static void ibmvfc_deregister_channel(struct ibmvfc_host *vhost,
>  				      struct ibmvfc_channels *channels,
> -				      int index)
> +				      struct ibmvfc_queue *scrq)
>  {
>  	struct device *dev = vhost->dev;
>  	struct vio_dev *vdev = to_vio_dev(dev);
> -	struct ibmvfc_queue *scrq = &channels->scrqs[index];
> +	char buf[16];
>  	long rc;
>  
>  	ENTER;
> @@ -6278,7 +6529,8 @@ static void ibmvfc_deregister_channel(struct ibmvfc_host *vhost,
>  	} while (rc == H_BUSY || H_IS_LONG_BUSY(rc));
>  
>  	if (rc)
> -		dev_err(dev, "Failed to free sub-crq[%d]: rc=%ld\n", index, rc);
> +		dev_err(dev, "Failed to free sub-crq[%s]: rc=%ld\n",
> +			ibmvfc_channel_index(channels, scrq, buf, sizeof(buf)), rc);
>  
>  	/* Clean out the queue */
>  	memset(scrq->msgs.crq, 0, PAGE_SIZE);
> @@ -6296,10 +6548,19 @@ static void ibmvfc_reg_sub_crqs(struct ibmvfc_host *vhost,
>  	if (!vhost->mq_enabled || !channels->scrqs)
>  		return;
>  
> +	if (ibmvfc_register_channel_handler(vhost, channels,
> +					    channels->async_scrq,
> +					    ibmvfc_interrupt_asyncq))
> +		return;
> +
>  	for (i = 0; i < channels->max_queues; i++) {
> -		if (ibmvfc_register_channel(vhost, channels, i)) {
> +		if (ibmvfc_register_channel(vhost, channels, &channels->scrqs[i])) {
>  			for (j = i; j > 0; j--)
> -				ibmvfc_deregister_channel(vhost, channels, j - 1);
> +				ibmvfc_deregister_channel(
> +					vhost, channels, &channels->scrqs[j - 1]);
> +			ibmvfc_deregister_channel(vhost, channels,
> +							channels->async_scrq);
> +
>  			vhost->do_enquiry = 0;
>  			return;
>  		}
> @@ -6318,7 +6579,8 @@ static void ibmvfc_dereg_sub_crqs(struct ibmvfc_host *vhost,
>  		return;
>  
>  	for (i = 0; i < channels->max_queues; i++)
> -		ibmvfc_deregister_channel(vhost, channels, i);
> +		ibmvfc_deregister_channel(vhost, channels, &channels->scrqs[i]);
> +	ibmvfc_deregister_channel(vhost, channels, channels->async_scrq);
>  
>  	LEAVE;
>  }
> @@ -6334,6 +6596,21 @@ static int ibmvfc_alloc_channels(struct ibmvfc_host *vhost,
>  	if (!channels->scrqs)
>  		return -ENOMEM;
>  
> +	channels->async_scrq = kzalloc_obj(*channels->async_scrq, GFP_KERNEL);
> +
> +	if (!channels->async_scrq) {
> +		kfree(channels->scrqs);
> +		return -ENOMEM;
> +	}
> +
> +	rc = ibmvfc_alloc_queue(vhost, channels->async_scrq,
> +				IBMVFC_SUB_CRQ_FMT);
> +	if (rc) {
> +		kfree(channels->scrqs);
> +		kfree(channels->async_scrq);
> +		return rc;
> +	}
> +
>  	for (i = 0; i < channels->max_queues; i++) {
>  		scrq = &channels->scrqs[i];
>  		rc = ibmvfc_alloc_queue(vhost, scrq, IBMVFC_SUB_CRQ_FMT);
> @@ -6345,6 +6622,9 @@ static int ibmvfc_alloc_channels(struct ibmvfc_host *vhost,
>  			kfree(channels->scrqs);
>  			channels->scrqs = NULL;
>  			channels->active_queues = 0;
> +			ibmvfc_free_queue(vhost, channels->async_scrq);
> +			kfree(channels->async_scrq);
> +			channels->async_scrq = NULL;
>  			return rc;
>  		}
>  	}
> @@ -6629,6 +6909,7 @@ static int ibmvfc_probe(struct vio_dev *vdev, const struct vio_device_id *id)
>  	vhost->using_channels = 0;
>  	vhost->do_enquiry = 1;
>  	vhost->scan_timeout = 0;
> +	vhost->login_cap_index = 0;
>  
>  	strcpy(vhost->partition_name, "UNKNOWN");
>  	init_waitqueue_head(&vhost->work_wait_q);
> diff --git a/drivers/scsi/ibmvscsi/ibmvfc.h b/drivers/scsi/ibmvscsi/ibmvfc.h
> index 4f680c5d9558..b9f22613d144 100644
> --- a/drivers/scsi/ibmvscsi/ibmvfc.h
> +++ b/drivers/scsi/ibmvscsi/ibmvfc.h
> @@ -182,6 +182,9 @@ struct ibmvfc_npiv_login {
>  #define IBMVFC_CAN_HANDLE_FPIN		0x04
>  #define IBMVFC_CAN_USE_MAD_VERSION	0x08
>  #define IBMVFC_CAN_SEND_VF_WWPN		0x10
> +#define IBMVFC_YES_NVMEOF		0x20
> +#define IBMVFC_YES_SCSI			0x40
> +#define IBMVFC_USE_ASYNC_SUBQ		0x100
>  #define IBMVFC_CAN_USE_NOOP_CMD		0x200
>  	__be64 node_name;
>  	struct srp_direct_buf async;
> @@ -231,6 +234,7 @@ struct ibmvfc_npiv_login_resp {
>  #define IBMVFC_CAN_SUPPORT_CHANNELS	0x80
>  #define IBMVFC_SUPPORT_NVMEOF		0x100
>  #define IBMVFC_SUPPORT_SCSI		0x200
> +#define IBMVFC_SUPPORT_ASYNC_SUBQ	0x800
>  #define IBMVFC_SUPPORT_NOOP_CMD		0x1000
>  	__be32 max_cmds;
>  	__be32 scsi_id_sz;
> @@ -565,7 +569,7 @@ struct ibmvfc_channel_setup_mad {
>  	struct srp_direct_buf buffer;
>  } __packed __aligned(8);
>  
> -#define IBMVFC_MAX_CHANNELS	502
> +#define IBMVFC_MAX_CHANNELS	501
>  
>  struct ibmvfc_channel_setup {
>  	__be32 flags;
> @@ -580,6 +584,7 @@ struct ibmvfc_channel_setup {
>  	struct srp_direct_buf buffer;
>  	__be64 reserved2[5];
>  	__be64 channel_handles[IBMVFC_MAX_CHANNELS];
> +	__be64 asyncSubqHandle;
>  } __packed __aligned(8);
>  
>  struct ibmvfc_connection_info {
> @@ -710,6 +715,25 @@ struct ibmvfc_async_crq {
>  	__be64 reserved;
>  } __packed __aligned(8);
>  
> +struct ibmvfc_async_subq {
> +	volatile u8 valid;
> +#define IBMVFC_ASYNC_ID_IS_ASSOC_ID	0x01
> +#define IBMVFC_FC_EEH			0x04
> +#define IBMVFC_FC_FW_UPDATE		0x08
> +#define IBMVFC_FC_FW_DUMP		0x10
> +	u8 flags;
> +	u8 link_state;
> +	u8 fpin_status;
> +	__be16 event;
> +	__be16 pad;
> +	volatile __be64 wwpn;
> +	volatile __be64 nport_id;
> +	union {
> +		__be64 node_name;
> +		__be64 assoc_id;
> +	} id;
> +} __packed __aligned(8);
> +
>  union ibmvfc_iu {
>  	struct ibmvfc_mad_common mad_common;
>  	struct ibmvfc_npiv_login_mad npiv_login;
> @@ -849,6 +873,7 @@ struct ibmvfc_queue {
>  
>  struct ibmvfc_channels {
>  	struct ibmvfc_queue *scrqs;
> +	struct ibmvfc_queue *async_scrq;
>  	enum ibmvfc_protocol protocol;
>  	unsigned int active_queues;
>  	unsigned int desired_queues;
> @@ -989,6 +1014,8 @@ static inline int ibmvfc_check_caps(struct ibmvfc_host *vhost, unsigned long cap
>  
>  #ifdef VISIBLE_IF_KUNIT
>  VISIBLE_IF_KUNIT void ibmvfc_handle_async(struct ibmvfc_async_crq *crq, struct ibmvfc_host *vhost);
> +VISIBLE_IF_KUNIT void ibmvfc_handle_asyncq(struct ibmvfc_crq *crq_instance,
> +					   struct ibmvfc_host *vhost, struct list_head *evt_doneq);
>  VISIBLE_IF_KUNIT struct list_head *ibmvfc_get_headp(void);
>  VISIBLE_IF_KUNIT void ibmvfc_handle_crq(struct ibmvfc_crq *crq,
>  					struct ibmvfc_host *vhost,
> diff --git a/drivers/scsi/ibmvscsi/ibmvfc_kunit.c b/drivers/scsi/ibmvscsi/ibmvfc_kunit.c
> index 3359e4ebebe2..3a41127c4e81 100644
> --- a/drivers/scsi/ibmvscsi/ibmvfc_kunit.c
> +++ b/drivers/scsi/ibmvscsi/ibmvfc_kunit.c
> @@ -22,14 +22,14 @@ MODULE_IMPORT_NS("EXPORTED_FOR_KUNIT_TESTING");
>  static void ibmvfc_handle_fpin_event_test(struct kunit *test)
>  {
>  	u64 *stats[IBMVFC_AE_FPIN_CONGESTION_CLEARED + 1] = { NULL };
> -	u64 post[IBMVFC_AE_FPIN_CONGESTION_CLEARED + 1];
> -	u64 pre[IBMVFC_AE_FPIN_CONGESTION_CLEARED + 1];
>  	enum ibmvfc_ae_fpin_status fs;
> -	struct ibmvfc_async_crq crq;
> +	struct ibmvfc_async_subq crq;
>  	struct ibmvfc_target *tgt;
>  	struct ibmvfc_host *vhost;
>  	struct list_head *queue;
>  	struct list_head *headp;
> +	LIST_HEAD(evt_doneq);
> +	u64 pre, post;
>  
>  
>  	headp = ibmvfc_get_headp();
> @@ -52,31 +52,23 @@ static void ibmvfc_handle_fpin_event_test(struct kunit *test)
>  		crq.valid = 0x80;
>  		crq.link_state = IBMVFC_AE_LS_LINK_UP;
>  		crq.fpin_status = fs;
> -		crq.event = cpu_to_be64(IBMVFC_AE_FPIN);
> -		crq.scsi_id = cpu_to_be64(tgt->scsi_id);
> +		crq.event = cpu_to_be16(IBMVFC_AE_FPIN);
>  		crq.wwpn = cpu_to_be64(tgt->wwpn);
> -		crq.node_name = cpu_to_be64(tgt->ids.node_name);
> -		pre[fs] = *stats[fs];
> -		ibmvfc_handle_async(&crq, vhost);
> -		post[fs] = *stats[fs];
> -		KUNIT_EXPECT_EQ(test, post[fs], pre[fs]+1);
> +		crq.id.node_name = cpu_to_be64(tgt->ids.node_name);
> +		pre = *stats[fs];
> +		ibmvfc_handle_asyncq((struct ibmvfc_crq *)&crq, vhost, &evt_doneq);
> +		post = *stats[fs];
> +		KUNIT_EXPECT_EQ(test, post, pre+1);
>  	}
>  
>  	/* bad path */
> -	for (fs = IBMVFC_AE_FPIN_LINK_CONGESTED; fs <= IBMVFC_AE_FPIN_CONGESTION_CLEARED; fs++)
> -		pre[fs] = *stats[fs];
>  	crq.valid = 0x80;
>  	crq.link_state = IBMVFC_AE_LS_LINK_UP;
>  	crq.fpin_status = 0; /* bad value */
> -	crq.event = cpu_to_be64(IBMVFC_AE_FPIN);
> -	crq.scsi_id = cpu_to_be64(tgt->scsi_id);
> +	crq.event = cpu_to_be16(IBMVFC_AE_FPIN);
>  	crq.wwpn = cpu_to_be64(tgt->wwpn);
> -	crq.node_name = cpu_to_be64(tgt->ids.node_name);
> -	ibmvfc_handle_async(&crq, vhost);
> -	for (fs = IBMVFC_AE_FPIN_LINK_CONGESTED; fs <= IBMVFC_AE_FPIN_CONGESTION_CLEARED; fs++) {
> -		post[fs] = *stats[fs];
> -		KUNIT_EXPECT_EQ(test, pre[fs], post[fs]);
> -	}
> +	crq.id.node_name = cpu_to_be64(tgt->ids.node_name);
> +	ibmvfc_handle_asyncq((struct ibmvfc_crq *)&crq, vhost, &evt_doneq);
>  }
>  
>  /**
> @@ -105,9 +97,29 @@ static void ibmvfc_noop_test(struct kunit *test)
>  	ibmvfc_handle_crq(&crq, vhost, &evtq);
>  }
>  
> +/**
> + * ibmvfc_async_subq_test - unit test for allocating async subqueue
> + * @test: pointer to kunit structure
> + *
> + * Return: void
> + */
> +static void ibmvfc_async_subq_test(struct kunit *test)
> +{
> +	struct ibmvfc_host *vhost;
> +	struct list_head *queue;
> +	struct list_head *headp;
> +
> +	headp = ibmvfc_get_headp();
> +	queue = headp->next;
> +	vhost = container_of(queue, struct ibmvfc_host, queue);
> +
> +	KUNIT_EXPECT_NOT_NULL(test, vhost->scsi_scrqs.async_scrq);
> +}
> +
>  static struct kunit_case ibmvfc_fpin_test_cases[] = {
>  	KUNIT_CASE(ibmvfc_handle_fpin_event_test),
>  	KUNIT_CASE(ibmvfc_noop_test),
> +	KUNIT_CASE(ibmvfc_async_subq_test),
>  	{},
>  };
>  
> 



^ permalink raw reply

* Re: [PATCH 5/5] ibmvfc: handle extended FPIN events
From: Tyrel Datwyler @ 2026-05-07  5:48 UTC (permalink / raw)
  To: davemarq, James E.J. Bottomley, Martin K. Petersen,
	Madhavan Srinivasan, Michael Ellerman, Nicholas Piggin,
	Christophe Leroy (CS GROUP)
  Cc: linux-kernel, linux-scsi, linuxppc-dev, Brian King, Greg Joyce,
	Kyle Mahlkuch
In-Reply-To: <20260408-ibmvfc-fpin-support-v1-5-52b06c464e03@linux.ibm.com>

On 4/8/26 10:07 AM, Dave Marquardt via B4 Relay wrote:
> From: Dave Marquardt <davemarq@linux.ibm.com>
> 
> - negotiate use of extended FPIN events with NPIV (VIOS)
> - add code to parse and handle extended FPIN events
> - add KUnit test to test extended FPIN event handling

Same nit here as the previous 4 patches.

> ---
>  drivers/scsi/ibmvscsi/ibmvfc.c       | 45 ++++++++++++++---
>  drivers/scsi/ibmvscsi/ibmvfc.h       | 31 ++++++++++++
>  drivers/scsi/ibmvscsi/ibmvfc_kunit.c | 97 +++++++++++++++++++++++++++++++++---
>  3 files changed, 161 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
> index 26e39b367022..5b2b861a34c2 100644
> --- a/drivers/scsi/ibmvscsi/ibmvfc.c
> +++ b/drivers/scsi/ibmvscsi/ibmvfc.c
> @@ -1472,6 +1472,9 @@ static void ibmvfc_gather_partition_info(struct ibmvfc_host *vhost)
>  }
>  
>  static __be64 ibmvfc_npiv_chan_caps[] = {
> +	cpu_to_be64(IBMVFC_CAN_USE_CHANNELS | IBMVFC_USE_ASYNC_SUBQ |
> +		    IBMVFC_YES_SCSI | IBMVFC_CAN_HANDLE_FPIN |
> +		    IBMVFC_CAN_HANDLE_FPIN_EXT),
>  	cpu_to_be64(IBMVFC_CAN_USE_CHANNELS | IBMVFC_USE_ASYNC_SUBQ |
>  		    IBMVFC_YES_SCSI | IBMVFC_CAN_HANDLE_FPIN),
>  	cpu_to_be64(IBMVFC_CAN_USE_CHANNELS),
> @@ -3370,6 +3373,28 @@ ibmvfc_full_fpin_to_desc(struct ibmvfc_async_subq *ibmvfc_fpin)
>  					  cpu_to_be32(1));
>  }
>  
> +/**
> + * ibmvfc_ext_fpin_to_desc(): allocate and populate a struct fc_els_fpin struct
> + * containing a descriptor.
> + * @ibmvfc_fpin: Pointer to async subq FPIN data
> + *
> + * Allocate a struct fc_els_fpin containing a descriptor and populate
> + * based on data from *ibmvfc_fpin.
> + *
> + * Return:
> + * NULL     - unable to allocate structure
> + * non-NULL - pointer to populated struct fc_els_fpin
> + */
> +static struct fc_els_fpin *
> +ibmvfc_ext_fpin_to_desc(struct ibmvfc_async_subq_fpin *ibmvfc_fpin)
> +{
> +	return ibmvfc_common_fpin_to_desc(ibmvfc_fpin->fpin_status, ibmvfc_fpin->wwpn,
> +					  ibmvfc_fpin->fpin_data.event_type_modifier,
> +					  ibmvfc_fpin->fpin_data.event_threshold,
> +					  ibmvfc_fpin->fpin_data.event_threshold,

I see mention of threshold and period previously. Why in this case is it just
the threshold value passed for both?

-Tyrel

> +					  ibmvfc_fpin->fpin_data.event_data.event_count);
> +}
> +
>  /**
>   * ibmvfc_handle_async - Handle an async event from the adapter
>   * @crq:	crq to process
> @@ -3491,10 +3516,12 @@ VISIBLE_IF_KUNIT void ibmvfc_handle_asyncq(struct ibmvfc_crq *crq_instance,
>  					   struct ibmvfc_host *vhost,
>  					   struct list_head *evt_doneq)
>  {
> +	struct ibmvfc_async_subq_fpin *sqfpin = (struct ibmvfc_async_subq_fpin *)crq_instance;
>  	struct ibmvfc_async_subq *crq = (struct ibmvfc_async_subq *)crq_instance;
>  	const struct ibmvfc_async_desc *desc = ibmvfc_get_ae_desc(be16_to_cpu(crq->event));
>  	struct ibmvfc_target *tgt;
>  	struct fc_els_fpin *fpin;
> +	u8 fpin_status;
>  
>  	ibmvfc_log(vhost, desc->log_level,
>  		   "%s event received. wwpn: %llx, node_name: %llx%s event 0x%x\n",
> @@ -3582,7 +3609,17 @@ VISIBLE_IF_KUNIT void ibmvfc_handle_asyncq(struct ibmvfc_crq *crq_instance,
>  				continue;
>  			if (!tgt->rport)
>  				continue;
> -			fpin = ibmvfc_full_fpin_to_desc(crq);
> +			if ((crq->flags & IBMVFC_ASYNC_IS_FPIN_EXT) == 0) {
> +				fpin = ibmvfc_full_fpin_to_desc(crq);
> +				fpin_status = crq->fpin_status;
> +			} else if (!(sqfpin->fpin_data.flags & IBMVFC_FPIN_EVENT_TYPE_VALID))
> +				dev_err(vhost->dev, "Invalid extended FPIN event received");
> +			else if (!ibmvfc_check_caps(vhost, IBMVFC_SUPPORT_FPIN_EXT))
> +				dev_err(vhost->dev, "Unexpected extended FPIN event received");
> +			else {
> +				fpin = ibmvfc_ext_fpin_to_desc(sqfpin);
> +				fpin_status = sqfpin->fpin_status;
> +			}
>  			if (fpin) {
>  				fc_host_fpin_rcv(tgt->vhost->host,
>  						 sizeof(*fpin) + be32_to_cpu(fpin->desc_len),
> @@ -3591,12 +3628,8 @@ VISIBLE_IF_KUNIT void ibmvfc_handle_asyncq(struct ibmvfc_crq *crq_instance,
>  			} else
>  				dev_err(vhost->dev,
>  					"FPIN event %u received, unable to process\n",
> -					crq->fpin_status);
> +					fpin_status);
>  		}
> -		break;
> -	default:
> -		dev_err(vhost->dev, "Unknown async event received: %d\n", crq->event);
> -		break;
>  	}
>  }
>  EXPORT_SYMBOL_IF_KUNIT(ibmvfc_handle_asyncq);
> diff --git a/drivers/scsi/ibmvscsi/ibmvfc.h b/drivers/scsi/ibmvscsi/ibmvfc.h
> index b9f22613d144..c67db8d7b3fc 100644
> --- a/drivers/scsi/ibmvscsi/ibmvfc.h
> +++ b/drivers/scsi/ibmvscsi/ibmvfc.h
> @@ -186,6 +186,7 @@ struct ibmvfc_npiv_login {
>  #define IBMVFC_YES_SCSI			0x40
>  #define IBMVFC_USE_ASYNC_SUBQ		0x100
>  #define IBMVFC_CAN_USE_NOOP_CMD		0x200
> +#define IBMVFC_CAN_HANDLE_FPIN_EXT	0x800
>  	__be64 node_name;
>  	struct srp_direct_buf async;
>  	u8 partition_name[IBMVFC_MAX_NAME];
> @@ -236,6 +237,7 @@ struct ibmvfc_npiv_login_resp {
>  #define IBMVFC_SUPPORT_SCSI		0x200
>  #define IBMVFC_SUPPORT_ASYNC_SUBQ	0x800
>  #define IBMVFC_SUPPORT_NOOP_CMD		0x1000
> +#define IBMVFC_SUPPORT_FPIN_EXT		0x2000
>  	__be32 max_cmds;
>  	__be32 scsi_id_sz;
>  	__be64 max_dma_len;
> @@ -718,6 +720,7 @@ struct ibmvfc_async_crq {
>  struct ibmvfc_async_subq {
>  	volatile u8 valid;
>  #define IBMVFC_ASYNC_ID_IS_ASSOC_ID	0x01
> +#define IBMVFC_ASYNC_IS_FPIN_EXT	0x02
>  #define IBMVFC_FC_EEH			0x04
>  #define IBMVFC_FC_FW_UPDATE		0x08
>  #define IBMVFC_FC_FW_DUMP		0x10
> @@ -734,6 +737,34 @@ struct ibmvfc_async_subq {
>  	} id;
>  } __packed __aligned(8);
>  
> +struct ibmvfc_fpin_data {
> +#define IBMVFC_FPIN_EVENT_TYPE_VALID	0x01
> +#define IBMVFC_FPIN_MODIFIER_VALID	0x02
> +#define IBMVFC_FPIN_THRESHOLD_VALID	0x04
> +#define IBMVFC_FPIN_SEVERITY_VALID	0x08
> +#define IBMVFC_FPIN_EVENT_COUNT_VALID	0x10
> +	u8 flags;
> +	u8 reserved[3];
> +	__be16 event_type;
> +	__be16 event_type_modifier;
> +	__be32 event_threshold;
> +	union {
> +		u8 severity;
> +		__be32 event_count;
> +	} event_data;
> +} __packed __aligned(8);
> +
> +struct ibmvfc_async_subq_fpin {
> +	volatile u8 valid;
> +	u8 flags;
> +	u8 link_state;
> +	u8 fpin_status;
> +	__be16 event;
> +	__be16 pad;
> +	volatile __be64 wwpn;
> +	struct ibmvfc_fpin_data fpin_data;
> +} __packed __aligned(8);
> +
>  union ibmvfc_iu {
>  	struct ibmvfc_mad_common mad_common;
>  	struct ibmvfc_npiv_login_mad npiv_login;
> diff --git a/drivers/scsi/ibmvscsi/ibmvfc_kunit.c b/drivers/scsi/ibmvscsi/ibmvfc_kunit.c
> index 3a41127c4e81..6c2a7d6f8042 100644
> --- a/drivers/scsi/ibmvscsi/ibmvfc_kunit.c
> +++ b/drivers/scsi/ibmvscsi/ibmvfc_kunit.c
> @@ -3,6 +3,7 @@
>  #include <kunit/visibility.h>
>  #include <scsi/scsi_device.h>
>  #include <scsi/scsi_transport_fc.h>
> +#include <scsi/fc/fc_els.h>
>  #include <linux/list.h>
>  #include "ibmvfc.h"
>  
> @@ -71,6 +72,25 @@ static void ibmvfc_handle_fpin_event_test(struct kunit *test)
>  	ibmvfc_handle_asyncq((struct ibmvfc_crq *)&crq, vhost, &evt_doneq);
>  }
>  
> +/**
> + * ibmvfc_async_subq_test - unit test for allocating async subqueue
> + * @test: pointer to kunit structure
> + *
> + * Return: void
> + */
> +static void ibmvfc_async_subq_test(struct kunit *test)
> +{
> +	struct ibmvfc_host *vhost;
> +	struct list_head *queue;
> +	struct list_head *headp;
> +
> +	headp = ibmvfc_get_headp();
> +	queue = headp->next;
> +	vhost = container_of(queue, struct ibmvfc_host, queue);
> +
> +	KUNIT_EXPECT_NOT_NULL(test, vhost->scsi_scrqs.async_scrq);
> +}
> +
>  /**
>   * ibmvfc_noop_test - unit test for VFC_NOOP command
>   * @test: pointer to kunit structure
> @@ -97,29 +117,94 @@ static void ibmvfc_noop_test(struct kunit *test)
>  	ibmvfc_handle_crq(&crq, vhost, &evtq);
>  }
>  
> +#define IBMVFC_TEST_FPIN_EXT(fs, ev, stat) {			\
> +	crq.valid = 0x80;					\
> +	crq.flags = IBMVFC_ASYNC_IS_FPIN_EXT;			\
> +	crq.link_state = IBMVFC_AE_LS_LINK_UP;			\
> +	crq.fpin_status = (fs);					\
> +	crq.event = cpu_to_be16(IBMVFC_AE_FPIN);		\
> +	crq.wwpn = cpu_to_be64(tgt->wwpn);			\
> +	crq.fpin_data.flags = IBMVFC_FPIN_EVENT_TYPE_VALID;	\
> +	crq.fpin_data.event_type = cpu_to_be16((ev));		\
> +	pre = READ_ONCE(tgt->rport->fpin_stats.stat);		\
> +	ibmvfc_handle_asyncq((struct ibmvfc_crq *)&crq, vhost,	\
> +			     &evt_doneq);			\
> +	post = READ_ONCE(tgt->rport->fpin_stats.stat);		\
> +}
> +
>  /**
> - * ibmvfc_async_subq_test - unit test for allocating async subqueue
> + * ibmvfc_extended_fpin_test - unit test for extended FPIN events
>   * @test: pointer to kunit structure
>   *
> + * Tests
> + *
>   * Return: void
>   */
> -static void ibmvfc_async_subq_test(struct kunit *test)
> +static void ibmvfc_extended_fpin_test(struct kunit *test)
>  {
> +	enum ibmvfc_ae_fpin_status fs;
> +	struct ibmvfc_async_subq_fpin crq;
> +	struct fc_fpin_stats stats;
> +	struct ibmvfc_target *tgt;
>  	struct ibmvfc_host *vhost;
> -	struct list_head *queue;
>  	struct list_head *headp;
> +	LIST_HEAD(evt_doneq);
> +	u64 pre, post;
>  
>  	headp = ibmvfc_get_headp();
> -	queue = headp->next;
> -	vhost = container_of(queue, struct ibmvfc_host, queue);
> +	vhost = list_first_entry(headp, struct ibmvfc_host, queue);
> +	KUNIT_ASSERT_NOT_NULL_MSG(test, vhost, "No vhost");
>  
> -	KUNIT_EXPECT_NOT_NULL(test, vhost->scsi_scrqs.async_scrq);
> +	KUNIT_ASSERT_GE(test, vhost->num_targets, 1);
> +	tgt = list_first_entry(&vhost->targets, struct ibmvfc_target, queue);
> +	KUNIT_ASSERT_NOT_NULL(test, tgt->rport);
> +
> +	stats = tgt->rport->fpin_stats;
> +
> +	for (fs = IBMVFC_AE_FPIN_LINK_CONGESTED; fs <= IBMVFC_AE_FPIN_CONGESTION_CLEARED; fs++) {
> +		switch (fs) {
> +		case IBMVFC_AE_FPIN_PORT_CLEARED:
> +		case IBMVFC_AE_FPIN_CONGESTION_CLEARED:
> +			crq.valid = 0x80;
> +			crq.flags = IBMVFC_ASYNC_IS_FPIN_EXT;
> +			crq.link_state = IBMVFC_AE_LS_LINK_UP;
> +			crq.fpin_status = fs;
> +			crq.event = cpu_to_be16(IBMVFC_AE_FPIN);
> +			crq.wwpn = cpu_to_be64(tgt->wwpn);
> +			crq.fpin_data.flags = IBMVFC_FPIN_EVENT_TYPE_VALID;
> +			crq.fpin_data.event_type = cpu_to_be16(0);
> +			pre = READ_ONCE(tgt->rport->fpin_stats.cn_clear);
> +			ibmvfc_handle_asyncq((struct ibmvfc_crq *)&crq, vhost,
> +					     &evt_doneq);
> +			post = READ_ONCE(tgt->rport->fpin_stats.cn_clear);
> +			break;
> +		case IBMVFC_AE_FPIN_LINK_CONGESTED:
> +		case IBMVFC_AE_FPIN_PORT_CONGESTED:
> +			IBMVFC_TEST_FPIN_EXT(fs, FPIN_CONGN_CLEAR, cn_clear);
> +			IBMVFC_TEST_FPIN_EXT(fs, FPIN_CONGN_LOST_CREDIT, cn_lost_credit);
> +			IBMVFC_TEST_FPIN_EXT(fs, FPIN_CONGN_CREDIT_STALL, cn_credit_stall);
> +			IBMVFC_TEST_FPIN_EXT(fs, FPIN_CONGN_OVERSUBSCRIPTION, cn_oversubscription);
> +			IBMVFC_TEST_FPIN_EXT(fs, FPIN_CONGN_DEVICE_SPEC, cn_device_specific);
> +			break;
> +		case IBMVFC_AE_FPIN_PORT_DEGRADED:
> +			IBMVFC_TEST_FPIN_EXT(fs, FPIN_LI_UNKNOWN, li_failure_unknown);
> +			IBMVFC_TEST_FPIN_EXT(fs, FPIN_LI_LINK_FAILURE, li_link_failure_count);
> +			IBMVFC_TEST_FPIN_EXT(fs, FPIN_LI_LOSS_OF_SYNC, li_loss_of_sync_count);
> +			IBMVFC_TEST_FPIN_EXT(fs, FPIN_LI_LOSS_OF_SIG, li_loss_of_signals_count);
> +			IBMVFC_TEST_FPIN_EXT(fs, FPIN_LI_PRIM_SEQ_ERR, li_prim_seq_err_count);
> +			IBMVFC_TEST_FPIN_EXT(fs, FPIN_LI_INVALID_TX_WD, li_invalid_tx_word_count);
> +			IBMVFC_TEST_FPIN_EXT(fs, FPIN_LI_INVALID_CRC, li_invalid_crc_count);
> +			IBMVFC_TEST_FPIN_EXT(fs, FPIN_LI_DEVICE_SPEC, li_device_specific);
> +			break;
> +		}
> +	}
>  }
>  
>  static struct kunit_case ibmvfc_fpin_test_cases[] = {
>  	KUNIT_CASE(ibmvfc_handle_fpin_event_test),
>  	KUNIT_CASE(ibmvfc_noop_test),
>  	KUNIT_CASE(ibmvfc_async_subq_test),
> +	KUNIT_CASE(ibmvfc_extended_fpin_test),
>  	{},
>  };
>  
> 



^ permalink raw reply

* Re: [PATCH V5 1/2] powerpc tools perf: Initialize error code in auxtrace_record_init function
From: Namhyung Kim @ 2026-05-07  6:13 UTC (permalink / raw)
  To: Athira Rajeev
  Cc: acme, jolsa, adrian.hunter, mpetlan, tmricht, maddy, irogers,
	linux-perf-users, linuxppc-dev, hbathini, Tejas.Manhas1,
	Tanushree.Shah, shivani
In-Reply-To: <20260504151321.12346-1-atrajeev@linux.ibm.com>

On Mon, May 04, 2026 at 08:43:20PM +0530, Athira Rajeev wrote:
> perf trace record fails some cases in powerpc
> 
>  # ./perf test "perf trace record and replay"
>  128: perf trace record and replay                                    : FAILED!
> 
>  # ./perf trace record sleep 1
>  # echo $?
>    32
> 
> This is happening because of non-zero err value from
> auxtrace_record__init() function.
> 
>  static int record__auxtrace_init(struct record *rec)
>  {
>         int err;
> 
>         if ((rec->opts.auxtrace_snapshot_opts || rec->opts.auxtrace_sample_opts)
>             && record__threads_enabled(rec)) {
>                 pr_err("AUX area tracing options are not available in parallel streaming mode.\n");
>                 return -EINVAL;
>         }
> 
>         if (!rec->itr) {
>                 rec->itr = auxtrace_record__init(rec->evlist, &err);
>                 if (err)
>                         return err;
>         }
> 
> Here "int err" is not initialised. The code expects "err" to be set
> from auxtrace_record__init() function.
> 
> Update auxtrace_record__init() in arch/powerpc/util/auxtrace.c to clear
> err value in the beginning.
> 
> - Clear err value in beginning of function. Any fail later will
> set appropriate return code to err.
> - Even if we haven't found any event for auxtrace, perf record
> should continue for other events. NULL return
> will indicate that there is no auxtrace record initialized.
> - Not having "err" set here will affect monitoring of other events
> also because perf record will fail seeing random value in err.
> 
> Set err to -EINVAL before invoking auxtrace_record__init() in
> builtin-record.c
> 
> With the fix,
> 
>  # ./perf trace record sleep 1
>  [ perf record: Woken up 2 times to write data ]
>  [ perf record: Captured and wrote 0.033 MB perf.data (228 samples) ]
> 
> Fixes: 1dbfaf94cf66 ("perf powerpc: Add basic CONFIG_AUXTRACE support for VPA pmu on powerpc")
> Reviewed-by: Adrian Hunter <adrian.hunter@intel.com>
> Signed-off-by: Athira Rajeev <atrajeev@linux.ibm.com>

For both patches,

Acked-by: Namhyung Kim <namhyung@kernel.org>

Thanks,
Namhyung

> ---
> Changelog:
> v4:
> Added Reviewed-by from Adrian
> 
> v1 -> v2
> Addressed review comment from Adrian:
> - Set err to -EINVAL before invoking auxtrace_record__init() in
>   builtin-record.c
> - Added kernel-doc to auxtrace_record__init() in tools/perf/util/auxtrace.c
> Addressed review comment from Namhyung:
> - Added fixes tag
> 
>  tools/perf/arch/powerpc/util/auxtrace.c | 6 ++++++
>  tools/perf/builtin-record.c             | 1 +
>  2 files changed, 7 insertions(+)
> 
> diff --git a/tools/perf/arch/powerpc/util/auxtrace.c b/tools/perf/arch/powerpc/util/auxtrace.c
> index e39deff6c857..4600a1661b4f 100644
> --- a/tools/perf/arch/powerpc/util/auxtrace.c
> +++ b/tools/perf/arch/powerpc/util/auxtrace.c
> @@ -71,6 +71,12 @@ struct auxtrace_record *auxtrace_record__init(struct evlist *evlist,
>  	struct evsel *pos;
>  	int found = 0;
>  
> +	/*
> +	 * Set err value to zero here. Any fail later
> +	 * will set appropriate return code to err.
> +	 */
> +	*err = 0;
> +
>  	evlist__for_each_entry(evlist, pos) {
>  		if (strstarts(pos->name, "vpa_dtl")) {
>  			found = 1;
> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
> index 4a5eba498c02..708825747af5 100644
> --- a/tools/perf/builtin-record.c
> +++ b/tools/perf/builtin-record.c
> @@ -865,6 +865,7 @@ static int record__auxtrace_init(struct record *rec)
>  	}
>  
>  	if (!rec->itr) {
> +		err = -EINVAL;
>  		rec->itr = auxtrace_record__init(rec->evlist, &err);
>  		if (err)
>  			return err;
> -- 
> 2.47.3
> 


^ permalink raw reply

* Re: [PATCH v3] powerpc/fadump: Add timeout to RTAS busy-wait loops
From: Adriano Vero @ 2026-05-07  8:08 UTC (permalink / raw)
  To: Sourabh Jain; +Cc: maddy, mpe, npiggin, chleroy, linuxppc-dev, linux-kernel
In-Reply-To: <a582e27b-fb51-473a-802c-6ffb83e7ce63@linux.ibm.com>

Hi Sourabh,

Understood, I'll keep that in mind for future patches. Thanks for
the guidance.

Adriano

On Thu, May 7, 2026 at 6:16 AM Sourabh Jain <sourabhjain@linux.ibm.com> wrote:
>
> Hello Adriano,
>
> On 07/05/26 03:50, Adriano Vero wrote:
> > The ibm,configure-kernel-dump RTAS call sites in
> > rtas_fadump_register(), rtas_fadump_unregister(), and
> > rtas_fadump_invalidate() polled indefinitely while firmware returned
> > a busy status. A misbehaving or hung firmware could stall these paths
> > forever, blocking fadump registration at boot or preventing clean
> > teardown.
> >
> > Introduce rtas_fadump_call(), a helper that wraps the common
> > busy-wait pattern shared by all three sites. The helper accumulates
> > the total delay and returns -ETIMEDOUT if firmware keeps returning a
> > busy status beyond RTAS_FADUMP_MAX_WAIT_MS (60 seconds). A pr_debug()
> > message is emitted on each busy iteration to aid diagnosis when the
> > timeout is hit.
> >
> > Signed-off-by: Adriano Vero <adri.vero.dev@gmail.com>
> >
> > Reviewed-by: Sourabh Jain <sourabhjain@linux.ibm.com>
>
> New line between tags is unnecessary.
>
> Including a Changelog in subsequent versions of the same patch is
> helpful for the maintainer and reviewers to understand what has changed
> in this version compared to the previous version.
>
> For example:
> https://lore.kernel.org/all/20260407124349.1698552-1-sourabhjain@linux.ibm.com/
>
> Thanks,
> Sourabh Jain
>
> > ---
> >   arch/powerpc/platforms/pseries/rtas-fadump.c | 80 ++++++++++++--------
> >   arch/powerpc/platforms/pseries/rtas-fadump.h |  6 ++
> >   2 files changed, 53 insertions(+), 33 deletions(-)
> >
> > diff --git a/arch/powerpc/platforms/pseries/rtas-fadump.c b/arch/powerpc/platforms/pseries/rtas-fadump.c
> > index eceb3289383e..3bb4ac2ab6cc 100644
> > --- a/arch/powerpc/platforms/pseries/rtas-fadump.c
> > +++ b/arch/powerpc/platforms/pseries/rtas-fadump.c
> > @@ -179,9 +179,42 @@ static u64 rtas_fadump_get_bootmem_min(void)
> >       return RTAS_FADUMP_MIN_BOOT_MEM;
> >   }
> >
> > +/*
> > + * Helper to make an ibm,configure-kernel-dump RTAS call with a bounded
> > + * busy-wait loop. Returns the RTAS return code on completion, or
> > + * -ETIMEDOUT if firmware keeps returning a busy status beyond
> > + * RTAS_FADUMP_MAX_WAIT_MS milliseconds.
> > + */
> > +static int rtas_fadump_call(struct fw_dump *fadump_conf, int operation,
> > +                         void *fdm_ptr, unsigned int fdm_size,
> > +                         const char *op_name)
> > +{
> > +     unsigned int wait_time, total_wait = 0;
> > +     int rc;
> > +
> > +     do {
> > +             rc = rtas_call(fadump_conf->ibm_configure_kernel_dump, 3, 1,
> > +                            NULL, operation, fdm_ptr, fdm_size);
> > +             wait_time = rtas_busy_delay_time(rc);
> > +             if (wait_time) {
> > +                     pr_debug("Firmware busy during fadump %s, waiting %ums (total %ums)\n",
> > +                              op_name, wait_time, total_wait);
> > +                     if (total_wait >= RTAS_FADUMP_MAX_WAIT_MS) {
> > +                             pr_err("Timed out waiting for firmware to complete fadump %s\n",
> > +                                    op_name);
> > +                             return -ETIMEDOUT;
> > +                     }
> > +                     total_wait += wait_time;
> > +                     mdelay(wait_time);
> > +             }
> > +     } while (wait_time);
> > +
> > +     return rc;
> > +}
> > +
> >   static int rtas_fadump_register(struct fw_dump *fadump_conf)
> >   {
> > -     unsigned int wait_time, fdm_size;
> > +     unsigned int fdm_size;
> >       int rc, err = -EIO;
> >
> >       /*
> > @@ -192,16 +225,10 @@ static int rtas_fadump_register(struct fw_dump *fadump_conf)
> >       fdm_size = sizeof(struct rtas_fadump_section_header);
> >       fdm_size += be16_to_cpu(fdm.header.dump_num_sections) * sizeof(struct rtas_fadump_section);
> >
> > -     /* TODO: Add upper time limit for the delay */
> > -     do {
> > -             rc =  rtas_call(fadump_conf->ibm_configure_kernel_dump, 3, 1,
> > -                             NULL, FADUMP_REGISTER, &fdm, fdm_size);
> > -
> > -             wait_time = rtas_busy_delay_time(rc);
> > -             if (wait_time)
> > -                     mdelay(wait_time);
> > -
> > -     } while (wait_time);
> > +     rc = rtas_fadump_call(fadump_conf, FADUMP_REGISTER, &fdm, fdm_size,
> > +                           "register");
> > +     if (rc == -ETIMEDOUT)
> > +             return -ETIMEDOUT;
> >
> >       switch (rc) {
> >       case 0:
> > @@ -234,19 +261,12 @@ static int rtas_fadump_register(struct fw_dump *fadump_conf)
> >
> >   static int rtas_fadump_unregister(struct fw_dump *fadump_conf)
> >   {
> > -     unsigned int wait_time;
> >       int rc;
> >
> > -     /* TODO: Add upper time limit for the delay */
> > -     do {
> > -             rc =  rtas_call(fadump_conf->ibm_configure_kernel_dump, 3, 1,
> > -                             NULL, FADUMP_UNREGISTER, &fdm,
> > -                             sizeof(struct rtas_fadump_mem_struct));
> > -
> > -             wait_time = rtas_busy_delay_time(rc);
> > -             if (wait_time)
> > -                     mdelay(wait_time);
> > -     } while (wait_time);
> > +     rc = rtas_fadump_call(fadump_conf, FADUMP_UNREGISTER, &fdm,
> > +                           sizeof(struct rtas_fadump_mem_struct), "unregister");
> > +     if (rc == -ETIMEDOUT)
> > +             return -ETIMEDOUT;
> >
> >       if (rc) {
> >               pr_err("Failed to un-register - unexpected error(%d).\n", rc);
> > @@ -259,19 +279,13 @@ static int rtas_fadump_unregister(struct fw_dump *fadump_conf)
> >
> >   static int rtas_fadump_invalidate(struct fw_dump *fadump_conf)
> >   {
> > -     unsigned int wait_time;
> >       int rc;
> >
> > -     /* TODO: Add upper time limit for the delay */
> > -     do {
> > -             rc =  rtas_call(fadump_conf->ibm_configure_kernel_dump, 3, 1,
> > -                             NULL, FADUMP_INVALIDATE, fdm_active,
> > -                             sizeof(struct rtas_fadump_mem_struct));
> > -
> > -             wait_time = rtas_busy_delay_time(rc);
> > -             if (wait_time)
> > -                     mdelay(wait_time);
> > -     } while (wait_time);
> > +     rc = rtas_fadump_call(fadump_conf, FADUMP_INVALIDATE,
> > +                           (void *)fdm_active,
> > +                           sizeof(struct rtas_fadump_mem_struct), "invalidate");
> > +     if (rc == -ETIMEDOUT)
> > +             return -ETIMEDOUT;
> >
> >       if (rc) {
> >               pr_err("Failed to invalidate - unexpected error (%d).\n", rc);
> > diff --git a/arch/powerpc/platforms/pseries/rtas-fadump.h b/arch/powerpc/platforms/pseries/rtas-fadump.h
> > index c109abf6befd..65fdab7b5b8d 100644
> > --- a/arch/powerpc/platforms/pseries/rtas-fadump.h
> > +++ b/arch/powerpc/platforms/pseries/rtas-fadump.h
> > @@ -41,6 +41,12 @@
> >   #define MAX_SECTIONS                                10
> >   #define RTAS_FADUMP_MAX_BOOT_MEM_REGS               7
> >
> > +/*
> > + * Maximum time to wait for firmware to respond to an
> > + * ibm,configure-kernel-dump RTAS call before giving up.
> > + */
> > +#define RTAS_FADUMP_MAX_WAIT_MS                      60000U
> > +
> >   /* Kernel Dump section info */
> >   struct rtas_fadump_section {
> >       __be32  request_flag;
>


^ permalink raw reply

* Re: [PATCH v2 0/3] KVM: Fix and clean up kvm_vcpu_map[_readonly]() usages
From: Peter Fang @ 2026-05-07  8:18 UTC (permalink / raw)
  To: Sean Christopherson
  Cc: David Woodhouse, Paolo Bonzini, Madhavan Srinivasan,
	Nicholas Piggin, Fred Griffoul, Yosry Ahmed, Ritesh Harjani,
	Michael Ellerman, Christophe Leroy (CS GROUP), Thomas Gleixner,
	Ingo Molnar, Borislav Petkov, Dave Hansen, x86, H. Peter Anvin,
	kvm, linuxppc-dev, linux-kernel
In-Reply-To: <afjeahNaSG0zsMgx@google.com>

On Mon, May 04, 2026 at 10:59:06AM -0700, Sean Christopherson wrote:
> On Mon, Apr 27, 2026, Peter Fang wrote:
> 
> > Thanks David!
> > 
> > I think I'd need at least input from the maintainers on this but just by
> > code inspection, the kvm_vcpu_map() usage in sev.c seems a bit tricky.
> > Unmapping doesn't happen until right before switching to the guest, so
> > this might fall into the "keep the mapping around for a longer time"
> > category [1].
> 
> It definitely falls into that category.  But that code is also rather gross, i.e.
> could use some cleanup no matter what, so I don't think it's a good argument for
> keeping kvm_vcpu_map() around.
> 
> To avoid a bunch of pointless work and churn, let's hold off on hardening and/or
> renaming kvm_vcpu_map() for now.  I'll take this v2 as-is; even though taking a
> gpa instead of a gfn will conflict with the nVMX series, it's dead simple and a
> worthwhile cleanup even if some of the conversions get discarded shortly after.

Makes sense to me. Thanks for the review Sean!


^ permalink raw reply

* Re: [PATCH v3] powerpc/g5: Enable all windfarms by default
From: Segher Boessenkool @ 2026-05-07  8:52 UTC (permalink / raw)
  To: Michael Ellerman
  Cc: Linus Walleij, Madhavan Srinivasan, Michael Ellerman,
	Nicholas Piggin, Christophe Leroy (CS GROUP), linuxppc-dev,
	linux-kernel
In-Reply-To: <5a28c484-5500-4b68-b837-28d30648e592@kernel.org>

On Wed, May 06, 2026 at 09:41:51PM +1000, Michael Ellerman wrote:
> Sorry it didn't get picked up.
> 
> I don't have any G5s anymore,

I don't have any functioning ones either :-(

> but the ones I did have used the other
> windfarm drivers so I never hit this.

Yeah, the oldest PowerMac G5 and RackMac were not super big sellers
either, relatively to the newer models.  And of course all Apple
hardware of the time (as well as all by other manufacturers) had failing
power supplies, so there "naturally" be fewer working machines over\
time.  Problems that solve themselves!

> You might be the last person on earth booting Linux on those :)

Booting, yes, but it won't run for even a minute ;-)

> > diff --git a/arch/powerpc/configs/g5_defconfig b/arch/powerpc/configs/g5_defconfig
> > index 04bbb37f5978..f74ccc06f4c1 100644
> > --- a/arch/powerpc/configs/g5_defconfig
> > +++ b/arch/powerpc/configs/g5_defconfig
> > @@ -85,6 +85,8 @@ CONFIG_PMAC_SMU=y
> >   CONFIG_MAC_EMUMOUSEBTN=y
> >   CONFIG_WINDFARM=y
> >   CONFIG_WINDFARM_PM81=y
> > +CONFIG_WINDFARM_PM72=y
> > +CONFIG_WINDFARM_RM31=y
> >   CONFIG_WINDFARM_PM91=y
> >   CONFIG_WINDFARM_PM112=y
> >   CONFIG_WINDFARM_PM121=y
> > 
> 
> Ack, LGTM.
> 
> Maddy can you pick this one up for next?

Is this all supported models?  Might be better to do that immediately,
not wait for patches to trickle in over the years :-)

Thanks everyone!


Segher


^ permalink raw reply

* Re: [PATCH v3] powerpc/g5: Enable all windfarms by default
From: Linus Walleij @ 2026-05-07  8:58 UTC (permalink / raw)
  To: Segher Boessenkool
  Cc: Michael Ellerman, Madhavan Srinivasan, Michael Ellerman,
	Nicholas Piggin, Christophe Leroy (CS GROUP), linuxppc-dev,
	linux-kernel
In-Reply-To: <afxS5S86eynCQaTd@gate>

On Thu, May 7, 2026 at 10:53 AM Segher Boessenkool
<segher@kernel.crashing.org> wrote:
> On Wed, May 06, 2026 at 09:41:51PM +1000, Michael Ellerman wrote:

> > You might be the last person on earth booting Linux on those :)
>
> Booting, yes, but it won't run for even a minute ;-)

Yeah what I think happen with Debian (which works) is that they have
this driver in the initramfs or rootfs as a module so it gets probed soon
enough, but it feels way safer to compile in vital stuff like this.

Yours,
Linus Walleij


^ permalink raw reply

* Re: [PATCH v2] KVM: PPC: Book3S HV: Add H_FAC_UNAVAIL mapping for tracing exits
From: Amit Machhiwal @ 2026-05-07  9:42 UTC (permalink / raw)
  To: Gautam Menghani
  Cc: maddy, npiggin, mpe, chleroy, Gautam Menghani, linuxppc-dev, kvm,
	linux-kernel
In-Reply-To: <20260428084534.49781-1-Gautam.Menghani@ibm.com>

On 2026/04/28 02:15 PM, Gautam Menghani wrote:
> From: Gautam Menghani <gautam@linux.ibm.com>
> 
> The macro kvm_trace_symbol_exit is used for providing the mappings
> for the trap vectors and their names. Add mapping for H_FAC_UNAVAIL so that
> trap reason is displayed as string instead of a vector number when using
> the kvm_guest_exit tracepoint.
> 
> Signed-off-by: Gautam Menghani <gautam@linux.ibm.com>
> ---
> v2:
> 1. Remove the trailing comma after last element
> 
>  arch/powerpc/kvm/trace_book3s.h | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/powerpc/kvm/trace_book3s.h b/arch/powerpc/kvm/trace_book3s.h
> index 9260ddbd557f..5d272c115331 100644
> --- a/arch/powerpc/kvm/trace_book3s.h
> +++ b/arch/powerpc/kvm/trace_book3s.h
> @@ -28,6 +28,7 @@
>  	{0xea0, "H_VIRT"}, \
>  	{0xf00, "PERFMON"}, \
>  	{0xf20, "ALTIVEC"}, \
> -	{0xf40, "VSX"}
> +	{0xf40, "VSX"}, \
> +	{0xf80, "H_FAC_UNAVAIL"}

While we are at it, should we also consider adding 0xf60 for Facility
Unavailable? Anyways, LGTM.

Reviewed-by: Amit Machhiwal <amachhiw@linux.ibm.com>

>  
>  #endif
> -- 
> 2.52.0
> 
> 


^ permalink raw reply

* Re: [PATCH v3] powerpc/g5: Enable all windfarms by default
From: Michael Ellerman @ 2026-05-07 12:50 UTC (permalink / raw)
  To: Linus Walleij
  Cc: Madhavan Srinivasan, Nicholas Piggin, Christophe Leroy (CS GROUP),
	linuxppc-dev, linux-kernel
In-Reply-To: <CAD++jLmBn4moNJKp8TLmMvdmSVaT4qnY10OuBMEQmaZnO8D1JQ@mail.gmail.com>

On 7/5/2026 03:30, Linus Walleij wrote:
> On Wed, May 6, 2026 at 1:41 PM Michael Ellerman <mpe@kernel.org> wrote:
> 
>> I don't have any G5s anymore, but the ones I did have used the other
>> windfarm drivers so I never hit this.
>>
>> You might be the last person on earth booting Linux on those :)
> 
> Not really, Sean (Action Retro) did this thing and follow-ups on
> YouTube:
> https://www.youtube.com/watch?v=g-Ugfqj1ank
> 
> Including installing a contemporary graphics card:
> https://www.youtube.com/watch?v=WPDd1Y0flSg

Nice.

But the driver you enabled, PM72, is for PowerMac7,2, which is 2-3 years 
older, and does not have PCI-E.
So if you have one of those it's even more special.

 > This likely means he inspired a whole bunch of geeks to
 > go and do the same. :)

Hopefully one of them volunteers to be powermac maintainer ;)

cheers


^ permalink raw reply

* Re: [PATCH v5 net-next 11/15] net: dsa: netc: add phylink MAC operations
From: Maxime Chevallier @ 2026-05-07 12:44 UTC (permalink / raw)
  To: Wei Fang, claudiu.manoil, vladimir.oltean, xiaoning.wang,
	andrew+netdev, davem, edumazet, kuba, pabeni, robh, krzk+dt,
	conor+dt, f.fainelli, frank.li, chleroy, horms, linux
  Cc: netdev, linux-kernel, devicetree, linuxppc-dev, linux-arm-kernel,
	imx
In-Reply-To: <20260430024945.3413973-12-wei.fang@nxp.com>

Hi,

On 30/04/2026 04:49, Wei Fang wrote:
> Different versions of NETC switches have different numbers of ports and
> MAC capabilities. Add .phylink_get_caps() to struct netc_switch_info,
> allowing each NETC switch version to implement its own callback for
> obtaining MAC capabilities.
> 
> Implement the phylink_mac_ops callbacks: .mac_config(), .mac_link_up(),
> and .mac_link_down(). Note that flow-control configuration is not yet
> supported in .mac_link_up(), but will be implemented in a subsequent
> patch.
> 
> Signed-off-by: Wei Fang <wei.fang@nxp.com>

The phylink part looks good to me,

Reviewed-by: Maxime Chevallier <maxime.chevallier@bootlin.com>

Maxime

> ---
>  drivers/net/dsa/netc/netc_main.c      | 243 ++++++++++++++++++++++++++
>  drivers/net/dsa/netc/netc_platform.c  |  38 ++++
>  drivers/net/dsa/netc/netc_switch.h    |   4 +
>  drivers/net/dsa/netc/netc_switch_hw.h |  26 +++
>  4 files changed, 311 insertions(+)
> 
> diff --git a/drivers/net/dsa/netc/netc_main.c b/drivers/net/dsa/netc/netc_main.c
> index 90a2d8cfd3d2..edf50cb32cb6 100644
> --- a/drivers/net/dsa/netc/netc_main.c
> +++ b/drivers/net/dsa/netc/netc_main.c
> @@ -44,6 +44,26 @@ static void netc_mac_port_wr(struct netc_port *np, u32 reg, u32 val)
>  		netc_port_wr(np, reg + NETC_PMAC_OFFSET, val);
>  }
>  
> +static void netc_mac_port_rmw(struct netc_port *np, u32 reg,
> +			      u32 mask, u32 val)
> +{
> +	u32 old, new;
> +
> +	if (is_netc_pseudo_port(np))
> +		return;
> +
> +	WARN_ON((mask | val) != mask);
> +
> +	old = netc_port_rd(np, reg);
> +	new = (old & ~mask) | val;
> +	if (new == old)
> +		return;
> +
> +	netc_port_wr(np, reg, new);
> +	if (np->caps.pmac)
> +		netc_port_wr(np, reg + NETC_PMAC_OFFSET, new);
> +}
> +
>  static void netc_port_get_capability(struct netc_port *np)
>  {
>  	u32 val;
> @@ -522,10 +542,232 @@ static void netc_switch_get_ip_revision(struct netc_switch *priv)
>  	priv->revision = FIELD_GET(IPBRR0_IP_REV, val);
>  }
>  
> +static void netc_phylink_get_caps(struct dsa_switch *ds, int port,
> +				  struct phylink_config *config)
> +{
> +	struct netc_switch *priv = ds->priv;
> +
> +	priv->info->phylink_get_caps(port, config);
> +}
> +
> +static void netc_port_set_mac_mode(struct netc_port *np,
> +				   unsigned int mode,
> +				   phy_interface_t phy_mode)
> +{
> +	u32 mask = PM_IF_MODE_IFMODE | PM_IF_MODE_REVMII;
> +	u32 val = 0;
> +
> +	switch (phy_mode) {
> +	case PHY_INTERFACE_MODE_RGMII:
> +	case PHY_INTERFACE_MODE_RGMII_ID:
> +	case PHY_INTERFACE_MODE_RGMII_RXID:
> +	case PHY_INTERFACE_MODE_RGMII_TXID:
> +		val |= IFMODE_RGMII;
> +		break;
> +	case PHY_INTERFACE_MODE_RMII:
> +		val |= IFMODE_RMII;
> +		break;
> +	case PHY_INTERFACE_MODE_REVMII:
> +		val |= PM_IF_MODE_REVMII;
> +		fallthrough;
> +	case PHY_INTERFACE_MODE_MII:
> +		val |= IFMODE_MII;
> +		break;
> +	case PHY_INTERFACE_MODE_SGMII:
> +	case PHY_INTERFACE_MODE_2500BASEX:
> +		val |= IFMODE_SGMII;
> +		break;
> +	default:
> +		break;
> +	}
> +
> +	netc_mac_port_rmw(np, NETC_PM_IF_MODE(0), mask, val);
> +}
> +
> +static void netc_mac_config(struct phylink_config *config, unsigned int mode,
> +			    const struct phylink_link_state *state)
> +{
> +	struct dsa_port *dp = dsa_phylink_to_port(config);
> +
> +	netc_port_set_mac_mode(NETC_PORT(dp->ds, dp->index), mode,
> +			       state->interface);
> +}
> +
> +static void netc_port_set_speed(struct netc_port *np, int speed)
> +{
> +	netc_port_rmw(np, NETC_PCR, PCR_PSPEED, PSPEED_SET_VAL(speed));
> +}
> +
> +static void netc_port_set_rgmii_mac(struct netc_port *np,
> +				    int speed, int duplex)
> +{
> +	u32 mask, val;
> +
> +	mask = PM_IF_MODE_SSP | PM_IF_MODE_HD | PM_IF_MODE_M10;
> +
> +	switch (speed) {
> +	default:
> +	case SPEED_1000:
> +		val = FIELD_PREP(PM_IF_MODE_SSP, SSP_1G);
> +		break;
> +	case SPEED_100:
> +		val = FIELD_PREP(PM_IF_MODE_SSP, SSP_100M);
> +		break;
> +	case SPEED_10:
> +		val = FIELD_PREP(PM_IF_MODE_SSP, SSP_10M);
> +		break;
> +	}
> +
> +	if (duplex != DUPLEX_FULL)
> +		val |= PM_IF_MODE_HD;
> +
> +	netc_mac_port_rmw(np, NETC_PM_IF_MODE(0), mask, val);
> +}
> +
> +static void netc_port_set_rmii_mii_mac(struct netc_port *np,
> +				       int speed, int duplex)
> +{
> +	u32 mask, val = 0;
> +
> +	mask = PM_IF_MODE_SSP | PM_IF_MODE_HD | PM_IF_MODE_M10;
> +
> +	if (speed == SPEED_10)
> +		val |= PM_IF_MODE_M10;
> +
> +	if (duplex != DUPLEX_FULL)
> +		val |= PM_IF_MODE_HD;
> +
> +	netc_mac_port_rmw(np, NETC_PM_IF_MODE(0), mask, val);
> +}
> +
> +static void netc_port_mac_rx_enable(struct netc_port *np)
> +{
> +	netc_port_rmw(np, NETC_POR, POR_RXDIS, 0);
> +	netc_mac_port_rmw(np, NETC_PM_CMD_CFG(0), PM_CMD_CFG_RX_EN,
> +			  PM_CMD_CFG_RX_EN);
> +}
> +
> +static void netc_port_wait_rx_empty(struct netc_port *np, int mac)
> +{
> +	u32 val;
> +
> +	/* PM_IEVENT_RX_EMPTY is a read-only bit, it is automatically set by
> +	 * hardware if RX FIFO is empty and no RX packet receive in process.
> +	 * And it is automatically cleared if RX FIFO is not empty or RX
> +	 * packet receive in process.
> +	 */
> +	if (read_poll_timeout(netc_port_rd, val, val & PM_IEVENT_RX_EMPTY,
> +			      100, 10000, false, np, NETC_PM_IEVENT(mac)))
> +		dev_warn(np->switch_priv->dev,
> +			 "swp%d MAC%d: RX is not idle\n", np->dp->index, mac);
> +}
> +
> +static void netc_port_mac_rx_graceful_stop(struct netc_port *np)
> +{
> +	u32 val;
> +
> +	if (is_netc_pseudo_port(np))
> +		goto rx_disable;
> +
> +	if (np->caps.pmac) {
> +		netc_port_rmw(np, NETC_PM_CMD_CFG(1), PM_CMD_CFG_RX_EN, 0);
> +		netc_port_wait_rx_empty(np, 1);
> +	}
> +
> +	netc_port_rmw(np, NETC_PM_CMD_CFG(0), PM_CMD_CFG_RX_EN, 0);
> +	netc_port_wait_rx_empty(np, 0);
> +
> +	if (read_poll_timeout(netc_port_rd, val, !(val & PSR_RX_BUSY),
> +			      100, 10000, false, np, NETC_PSR))
> +		dev_warn(np->switch_priv->dev, "swp%d RX is busy\n",
> +			 np->dp->index);
> +
> +rx_disable:
> +	netc_port_rmw(np, NETC_POR, POR_RXDIS, POR_RXDIS);
> +}
> +
> +static void netc_port_mac_tx_enable(struct netc_port *np)
> +{
> +	netc_mac_port_rmw(np, NETC_PM_CMD_CFG(0), PM_CMD_CFG_TX_EN,
> +			  PM_CMD_CFG_TX_EN);
> +	netc_port_rmw(np, NETC_POR, POR_TXDIS, 0);
> +}
> +
> +static void netc_port_wait_tx_empty(struct netc_port *np, int mac)
> +{
> +	u32 val;
> +
> +	/* PM_IEVENT_TX_EMPTY is a read-only bit, it is automatically set by
> +	 * hardware if TX FIFO is empty. And it is automatically cleared if
> +	 * TX FIFO is not empty.
> +	 */
> +	if (read_poll_timeout(netc_port_rd, val, val & PM_IEVENT_TX_EMPTY,
> +			      100, 10000, false, np, NETC_PM_IEVENT(mac)))
> +		dev_warn(np->switch_priv->dev,
> +			 "swp%d MAC%d: TX FIFO is not empty\n",
> +			 np->dp->index, mac);
> +}
> +
> +static void netc_port_mac_tx_graceful_stop(struct netc_port *np)
> +{
> +	netc_port_rmw(np, NETC_POR, POR_TXDIS, POR_TXDIS);
> +
> +	if (is_netc_pseudo_port(np))
> +		return;
> +
> +	netc_port_wait_tx_empty(np, 0);
> +	if (np->caps.pmac)
> +		netc_port_wait_tx_empty(np, 1);
> +
> +	netc_mac_port_rmw(np, NETC_PM_CMD_CFG(0), PM_CMD_CFG_TX_EN, 0);
> +}
> +
> +static void netc_mac_link_up(struct phylink_config *config,
> +			     struct phy_device *phy, unsigned int mode,
> +			     phy_interface_t interface, int speed,
> +			     int duplex, bool tx_pause, bool rx_pause)
> +{
> +	struct dsa_port *dp = dsa_phylink_to_port(config);
> +	struct netc_port *np;
> +
> +	np = NETC_PORT(dp->ds, dp->index);
> +	netc_port_set_speed(np, speed);
> +
> +	if (phy_interface_mode_is_rgmii(interface))
> +		netc_port_set_rgmii_mac(np, speed, duplex);
> +
> +	if (interface == PHY_INTERFACE_MODE_RMII ||
> +	    interface == PHY_INTERFACE_MODE_REVMII ||
> +	    interface == PHY_INTERFACE_MODE_MII)
> +		netc_port_set_rmii_mii_mac(np, speed, duplex);
> +
> +	netc_port_mac_tx_enable(np);
> +	netc_port_mac_rx_enable(np);
> +}
> +
> +static void netc_mac_link_down(struct phylink_config *config,
> +			       unsigned int mode,
> +			       phy_interface_t interface)
> +{
> +	struct dsa_port *dp = dsa_phylink_to_port(config);
> +	struct netc_port *np;
> +
> +	np = NETC_PORT(dp->ds, dp->index);
> +	netc_port_mac_rx_graceful_stop(np);
> +	netc_port_mac_tx_graceful_stop(np);
> +}
> +
> +static const struct phylink_mac_ops netc_phylink_mac_ops = {
> +	.mac_config		= netc_mac_config,
> +	.mac_link_up		= netc_mac_link_up,
> +	.mac_link_down		= netc_mac_link_down,
> +};
> +
>  static const struct dsa_switch_ops netc_switch_ops = {
>  	.get_tag_protocol		= netc_get_tag_protocol,
>  	.setup				= netc_setup,
>  	.teardown			= netc_teardown,
> +	.phylink_get_caps		= netc_phylink_get_caps,
>  };
>  
>  static int netc_switch_probe(struct pci_dev *pdev,
> @@ -564,6 +806,7 @@ static int netc_switch_probe(struct pci_dev *pdev,
>  	ds->num_ports = priv->info->num_ports;
>  	ds->num_tx_queues = NETC_TC_NUM;
>  	ds->ops = &netc_switch_ops;
> +	ds->phylink_mac_ops = &netc_phylink_mac_ops;
>  	ds->priv = priv;
>  	priv->ds = ds;
>  
> diff --git a/drivers/net/dsa/netc/netc_platform.c b/drivers/net/dsa/netc/netc_platform.c
> index abd599ea9c8d..bb4f92d238cb 100644
> --- a/drivers/net/dsa/netc/netc_platform.c
> +++ b/drivers/net/dsa/netc/netc_platform.c
> @@ -11,8 +11,46 @@ struct netc_switch_platform {
>  	const struct netc_switch_info *info;
>  };
>  
> +static void imx94_switch_phylink_get_caps(int port,
> +					  struct phylink_config *config)
> +{
> +	config->mac_capabilities = MAC_1000FD;
> +
> +	switch (port) {
> +	case 0 ... 1:
> +		__set_bit(PHY_INTERFACE_MODE_SGMII,
> +			  config->supported_interfaces);
> +		__set_bit(PHY_INTERFACE_MODE_2500BASEX,
> +			  config->supported_interfaces);
> +		config->mac_capabilities |= MAC_2500FD;
> +		fallthrough;
> +	case 2:
> +		config->mac_capabilities |= MAC_10 | MAC_100;
> +		__set_bit(PHY_INTERFACE_MODE_MII,
> +			  config->supported_interfaces);
> +		__set_bit(PHY_INTERFACE_MODE_RMII,
> +			  config->supported_interfaces);
> +		/* Port 0 and 1 do not support REVMII */
> +		if (port == 2)
> +			__set_bit(PHY_INTERFACE_MODE_REVMII,
> +				  config->supported_interfaces);
> +
> +		phy_interface_set_rgmii(config->supported_interfaces);
> +		break;
> +	case 3: /* CPU port */
> +		__set_bit(PHY_INTERFACE_MODE_INTERNAL,
> +			  config->supported_interfaces);
> +		config->mac_capabilities |= MAC_10FD | MAC_100FD |
> +					    MAC_2500FD;
> +		break;
> +	default:
> +		break;
> +	}
> +}
> +
>  static const struct netc_switch_info imx94_info = {
>  	.num_ports = 4,
> +	.phylink_get_caps = imx94_switch_phylink_get_caps,
>  };
>  
>  static const struct netc_switch_platform netc_platforms[] = {
> diff --git a/drivers/net/dsa/netc/netc_switch.h b/drivers/net/dsa/netc/netc_switch.h
> index dac19bfba02b..eb65c36ecead 100644
> --- a/drivers/net/dsa/netc/netc_switch.h
> +++ b/drivers/net/dsa/netc/netc_switch.h
> @@ -34,6 +34,7 @@ struct netc_switch;
>  
>  struct netc_switch_info {
>  	u32 num_ports;
> +	void (*phylink_get_caps)(int port, struct phylink_config *config);
>  };
>  
>  struct netc_port_caps {
> @@ -70,6 +71,9 @@ struct netc_switch {
>  	struct ntmp_user ntmp;
>  };
>  
> +#define NETC_PRIV(ds)			((struct netc_switch *)((ds)->priv))
> +#define NETC_PORT(ds, port_id)		(NETC_PRIV(ds)->ports[(port_id)])
> +
>  /* Write/Read Switch base registers */
>  #define netc_base_rd(r, o)		netc_read((r)->base + (o))
>  #define netc_base_wr(r, o, v)		netc_write((r)->base + (o), v)
> diff --git a/drivers/net/dsa/netc/netc_switch_hw.h b/drivers/net/dsa/netc/netc_switch_hw.h
> index 0419f7f9207e..7d9afb493053 100644
> --- a/drivers/net/dsa/netc/netc_switch_hw.h
> +++ b/drivers/net/dsa/netc/netc_switch_hw.h
> @@ -67,6 +67,14 @@
>  #define  PQOSMR_VQMP			GENMASK(19, 16)
>  #define  PQOSMR_QVMP			GENMASK(23, 20)
>  
> +#define NETC_POR			0x100
> +#define  POR_TXDIS			BIT(0)
> +#define  POR_RXDIS			BIT(1)
> +
> +#define NETC_PSR			0x104
> +#define  PSR_TX_BUSY			BIT(0)
> +#define  PSR_RX_BUSY			BIT(1)
> +
>  #define NETC_PTCTMSDUR(a)		(0x208 + (a) * 0x20)
>  #define  PTCTMSDUR_MAXSDU		GENMASK(15, 0)
>  #define  PTCTMSDUR_SDU_TYPE		GENMASK(17, 16)
> @@ -123,6 +131,24 @@ enum netc_mfo {
>  #define NETC_PM_MAXFRM(a)		(0x1014 + (a) * 0x400)
>  #define  PM_MAXFRAM			GENMASK(15, 0)
>  
> +#define NETC_PM_IEVENT(a)		(0x1040 + (a) * 0x400)
> +#define  PM_IEVENT_TX_EMPTY		BIT(5)
> +#define  PM_IEVENT_RX_EMPTY		BIT(6)
> +
> +#define NETC_PM_IF_MODE(a)		(0x1300 + (a) * 0x400)
> +#define  PM_IF_MODE_IFMODE		GENMASK(2, 0)
> +#define   IFMODE_MII			1
> +#define   IFMODE_RMII			3
> +#define   IFMODE_RGMII			4
> +#define   IFMODE_SGMII			5
> +#define  PM_IF_MODE_REVMII		BIT(3)
> +#define  PM_IF_MODE_M10			BIT(4)
> +#define  PM_IF_MODE_HD			BIT(6)
> +#define  PM_IF_MODE_SSP			GENMASK(14, 13)
> +#define   SSP_100M			0
> +#define   SSP_10M			1
> +#define   SSP_1G			2
> +
>  #define NETC_PEMDIOCR			0x1c00
>  #define NETC_EMDIO_BASE			NETC_PEMDIOCR
>  



^ permalink raw reply

* Re: [PATCH 1/6] KVM: PPC: Book3S HV: Validate arch_compat against host compatibility mode
From: Amit Machhiwal @ 2026-05-07 13:25 UTC (permalink / raw)
  To: Harsh Prateek Bora
  Cc: Amit Machhiwal, linuxppc-dev, Madhavan Srinivasan, Vaibhav Jain,
	Nicholas Piggin, Michael Ellerman, Christophe Leroy (CS GROUP),
	kvm, linux-kernel
In-Reply-To: <0ca0e84a-ec56-428f-ac6a-22dd52ad4a1a@linux.ibm.com>

Hi Harsh,

Thanks for looking at the series and your review. Please find my comments
inline.

On 2026/05/05 12:14 PM, Harsh Prateek Bora wrote:
> 
> 
> On 30/04/26 11:19 am, Amit Machhiwal wrote:
> > On IBM POWER systems, newer processor generations can operate in
> > compatibility modes corresponding to earlier generations. This becomes
> > relevant for nested virtualization, where nested KVM guests may need to
> > run with a specific processor compatibility level.
> > 
> > Currently, when running a nested KVM guest (L2) inside a Power11 pSeries
> > logical partition (L1) booted in Power10 compatibility mode, the guest
> > fails to boot while setting 'arch_compat'. This happens because the CPU
> > class is derived from the hardware PVR (via mfspr()), which reflects the
> > physical processor generation (Power11), rather than the effective
> > compatibility mode (Power10).
> > 
> > As a result, userspace may request a Power11 arch_compat for the L2
> > guest. However, the L1 partition, running in Power10 compatibility, has
> > only negotiated support up to Power10 with the Power Hypervisor (L0).
> > When H_SET_STATE is invoked with a Power11 Logical PVR, the hypervisor
> > rejects the request, leading to a late guest boot failure:
> > 
> >    KVM-NESTEDv2: couldn't set guest wide elements
> >    [..KVM reg dump..]
> > 
> > This situation should be detected earlier. Rejecting unsupported
> > 'arch_compat' values in 'kvmppc_set_arch_compat()' avoids issuing an
> > invalid H_SET_STATE hcall and provides a clearer failure mode.
> > 
> > Add a check to reject Power11 'arch_compat' requests when the host is
> > running in Power10 compatibility mode, returning -EINVAL early instead
> > of deferring the failure to the hypervisor.
> > 
> > Signed-off-by: Amit Machhiwal <amachhiw@linux.ibm.com>
> > ---
> >   arch/powerpc/kvm/book3s_hv.c | 6 ++++++
> >   1 file changed, 6 insertions(+)
> > 
> > diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> > index 61dbeea317f3..948c6b099a29 100644
> > --- a/arch/powerpc/kvm/book3s_hv.c
> > +++ b/arch/powerpc/kvm/book3s_hv.c
> > @@ -446,7 +446,13 @@ static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
> >   			guest_pcr_bit = PCR_ARCH_300;
> >   			break;
> >   		case PVR_ARCH_31:
> > +			guest_pcr_bit = PCR_ARCH_31;
> > +			break;
> >   		case PVR_ARCH_31_P11:
> > +			if ((PVR_ARCH_31 & cur_cpu_spec->pvr_mask) ==
> > +				cur_cpu_spec->pvr_value) {
> > +				return -EINVAL;
> > +			}
> 
> Is it possible to keep the check generic for applicable ISA versions (ISA
> 3.1 onwards?) instead of keeping it specific for P11 ?
> Also need to add a comment, why it's not applicable for older ISAs.

I believe the check is only required for ISA 3.1, the reason being that Power10
and Power11 share the same PCR value. For newer ISA versions, that might not
necessarily be the case. For any subsequent ISA versions, this check would
already be taken care of by the below check in kvmppc_set_arch_compat():

	if (guest_pcr_bit > host_pcr_bit)
		return -EINVAL;

Its mostly implicit but I'm okay to include a comment mentioning the same.

Thanks,
Amit

> 
> >   			guest_pcr_bit = PCR_ARCH_31;
> >   			break;
> >   		default:
> 


^ permalink raw reply

* Re: [PATCH 2/6] KVM: PPC: Introduce KVM_CAP_PPC_COMPAT_CAPS and KVM_PPC_GET_COMPAT_CAPS
From: Amit Machhiwal @ 2026-05-07 13:36 UTC (permalink / raw)
  To: Harsh Prateek Bora
  Cc: Amit Machhiwal, linuxppc-dev, Madhavan Srinivasan, Vaibhav Jain,
	Paolo Bonzini, Nicholas Piggin, Michael Ellerman,
	Christophe Leroy (CS GROUP), kvm, linux-kernel
In-Reply-To: <8c72bee5-664d-4ecc-b4c5-03f825febc75@linux.ibm.com>

On 2026/05/05 02:01 PM, Harsh Prateek Bora wrote:
> 
> 
> On 30/04/26 11:19 am, Amit Machhiwal wrote:
> > Introduce a new capability and ioctl to expose CPU compatibility modes
> > supported by the host processor for nested guests.
> > 
> > Introduce a new KVM capability, KVM_CAP_PPC_COMPAT_CAPS, and a
> > corresponding vm ioctl, KVM_PPC_GET_COMPAT_CAPS, to expose processor
> > compatibility modes supported by the host.
> > 
> > On IBM POWER systems, newer processor generations (N) can operate in
> > compatibility modes corresponding to earlier generations, like (N-1) and
> > (N-2). This is particularly relevant for nested virtualization, where
> > nested KVM guests may need to run with a specific processor compatibility
> > level.
> > 
> > The new ioctl returns a bitmap describing the compatibility modes
> > supported by the host in respective bit numbers. This allows userspace
> > to select an appropriate compatibility level when configuring nested KVM
> > guests.
> > 
> > Signed-off-by: Amit Machhiwal <amachhiw@linux.ibm.com>
> > ---
> >   arch/powerpc/include/uapi/asm/kvm.h | 6 ++++++
> >   include/uapi/linux/kvm.h            | 4 ++++
> >   2 files changed, 10 insertions(+)
> > 
> > diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
> > index 077c5437f521..a38dff7a8aea 100644
> > --- a/arch/powerpc/include/uapi/asm/kvm.h
> > +++ b/arch/powerpc/include/uapi/asm/kvm.h
> > @@ -437,6 +437,12 @@ struct kvm_ppc_cpu_char {
> >   	__u64	behaviour_mask;		/* valid bits in behaviour */
> >   };
> > +/* For KVM_PPC_GET_COMPAT_CAPS */
> > +struct kvm_ppc_compat_caps {
> > +	__u32	flags;
> 
> You may either want flags to be __u64, or rearrange to keep variables in
> descending order of size or add padding as appropriate.
> Also, if we are not using flags with this series, we need to document
> comment as /* reserved for future use */

Good catch. Sure, will do in the next version.

> 
> > +	__u64	compat_capabilities;	/* Capabilities supported by the host */
> > +};
> > +
> >   /*
> >    * Values for character and character_mask.
> >    * These are identical to the values used by H_GET_CPU_CHARACTERISTICS.
> > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> > index 6c8afa2047bf..1788a0068662 100644
> > --- a/include/uapi/linux/kvm.h
> > +++ b/include/uapi/linux/kvm.h
> > @@ -996,6 +996,7 @@ struct kvm_enable_cap {
> >   #define KVM_CAP_S390_USER_OPEREXEC 246
> >   #define KVM_CAP_S390_KEYOP 247
> >   #define KVM_CAP_S390_VSIE_ESAMODE 248
> > +#define KVM_CAP_PPC_COMPAT_CAPS 249
> >   struct kvm_irq_routing_irqchip {
> >   	__u32 irqchip;
> > @@ -1349,6 +1350,9 @@ struct kvm_s390_keyop {
> >   #define KVM_GET_DEVICE_ATTR	  _IOW(KVMIO,  0xe2, struct kvm_device_attr)
> >   #define KVM_HAS_DEVICE_ATTR	  _IOW(KVMIO,  0xe3, struct kvm_device_attr)
> > +/* Available with KVM_CAP_PPC_COMPAT_CAPS */
> > +#define KVM_PPC_GET_COMPAT_CAPS	_IOR(KVMIO,  0xe4, struct kvm_ppc_compat_caps)
> > +
> 
> 
> This patch can actually be squashed with next patch where the struct, macros
> are actually being used.

Sure, I can do that. I had split the uAPI addition and ioctl wiring as separate
logical steps, but they are tightly coupled and the split may not add much
value. I’ll squash patches 2 and 3 in the next revision.”

Thanks,
Amit

> >   /*
> >    * ioctls for vcpu fds
> >    */
> 


^ permalink raw reply

* Re: [PATCH 1/3] powerpc/time: remove preempt_disable/enable from arch_irq_work_raise()
From: Shrikanth Hegde @ 2026-05-07 13:36 UTC (permalink / raw)
  To: Sayali Patil, linuxppc-dev, maddy
  Cc: linux-kernel, Ritesh Harjani, Mahesh Salgaonkar, chleroy
In-Reply-To: <a64fa7d86da51f78743bee26e16ae155c43016c7.1778057685.git.sayalip@linux.ibm.com>



On 5/6/26 2:36 PM, Sayali Patil wrote:
> A kernel panic is observed when handling machine check exceptions from
> real mode.
> 
>    BUG: Unable to handle kernel data access on read at 0xc00000006be21300
>    Oops: Kernel access of bad area, sig: 11 [#1]
>    NIP [c000000000029e40] arch_irq_work_raise+0x10/0x70
>    LR [c00000000003ffc8] machine_check_queue_event+0xa8/0x150
>    Call Trace:
>    [c0000000179d3c70] [c00000000003ff64] machine_check_queue_event+0x44/0x150
>    [c0000000179d3d30] [c0000000000084e0] machine_check_early_common+0x1f0/0x2c0
> 
> The crash occurs because arch_irq_work_raise() calls preempt_disable()
> from machine check exception (MCE) handlers running in real mode. In
> this context, accessing the preempt_count can fault, leading to the panic.
> 
> The preempt_disable()/preempt_enable() pair in arch_irq_work_raise()
> was originally added by commit 0fe1ac48bef0 ("powerpc/perf_event: Fix
> oops due to perf_event_do_pending call") to avoid races while raising
> irq work from exception context.
> 
> Later, commit 471ba0e686cb ("irq_work: Do not raise an IPI when
> queueing work on the local CPU") added preemption protection in
> irq_work_queue() path, while commit 20b876918c06 ("irq_work: Use per
> cpu atomics instead of regular atomics") added equivalent
> protection in irq_work_queue_on() before reaching arch_irq_work_raise():
> 
>    irq_work_queue() / irq_work_queue_on()
>      -> preempt_disable()
>        -> __irq_work_queue_local()
>          -> irq_work_raise()
>            -> arch_irq_work_raise()
> 
> As a result, callers other than mce_irq_work_raise() already execute
> with preemption disabled, making the additional
> preempt_disable()/preempt_enable() pair in arch_irq_work_raise()
> redundant.
> 
> Remove it to avoid accessing preempt_count from real mode context.

I assume interrupt is disabled here. So it should be functionally safe
to remove it.

> 
> Fixes: cc15ff327569 ("powerpc/mce: Avoid using irq_work_queue() in realmode")
> Suggested-by: Mahesh Salgaonkar <mahesh@linux.ibm.com>
> Signed-off-by: Sayali Patil <sayalip@linux.ibm.com>
> ---
>   arch/powerpc/kernel/time.c | 2 --
>   1 file changed, 2 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
> index 4bbeb8644d3d..a99eb43f6ce9 100644
> --- a/arch/powerpc/kernel/time.c
> +++ b/arch/powerpc/kernel/time.c
> @@ -471,10 +471,8 @@ void arch_irq_work_raise(void)

Could you please add a comment for the function that it expects to
be called with preemption_disabled?

>   	 * which could get tangled up if we're messing with the same state
>   	 * here.
>   	 */
> -	preempt_disable();
>   	set_irq_work_pending_flag();
>   	set_dec(1);
> -	preempt_enable();
>   }
>   
>   static void set_dec_or_work(u64 val)

Acked-by: Shrikanth Hegde <sshegde@linux.ibm.com>


^ permalink raw reply

* Re: [PATCH 3/6] KVM: PPC: Wire up KVM_PPC_GET_COMPAT_CAPS ioctl
From: Amit Machhiwal @ 2026-05-07 14:18 UTC (permalink / raw)
  To: Harsh Prateek Bora
  Cc: Amit Machhiwal, linuxppc-dev, Madhavan Srinivasan, Vaibhav Jain,
	Nicholas Piggin, Michael Ellerman, Christophe Leroy (CS GROUP),
	kvm, linux-kernel
In-Reply-To: <1d2e6647-1fbf-48bf-8bb9-f4f3fb80677b@linux.ibm.com>

On 2026/05/05 02:16 PM, Harsh Prateek Bora wrote:
> 
> 
> On 30/04/26 11:19 am, Amit Machhiwal wrote:
> > Add handling for KVM_PPC_GET_COMPAT_CAPS in kvm_arch_vm_ioctl() and
> > advertise support via KVM_CAP_PPC_COMPAT_CAPS.
> > 
> > The ioctl retrieves host CPU compatibility capabilities via a
> > PowerPC-specific backend implementation when available. If the
> > capability is not supported, the ioctl returns success with no
> > capabilities set, allowing userspace to fall back gracefully.
> > 
> > Signed-off-by: Amit Machhiwal <amachhiw@linux.ibm.com>
> > ---
> >   arch/powerpc/include/asm/kvm_ppc.h |  1 +
> >   arch/powerpc/kvm/powerpc.c         | 19 +++++++++++++++++++
> >   2 files changed, 20 insertions(+)
> > 
> > diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
> > index 0953f2daa466..cadfb839e836 100644
> > --- a/arch/powerpc/include/asm/kvm_ppc.h
> > +++ b/arch/powerpc/include/asm/kvm_ppc.h
> > @@ -319,6 +319,7 @@ struct kvmppc_ops {
> >   	bool (*hash_v3_possible)(void);
> >   	int (*create_vm_debugfs)(struct kvm *kvm);
> >   	int (*create_vcpu_debugfs)(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry);
> > +	int (*get_compat_cpu_ver)(struct kvm_ppc_compat_caps *host_caps);
> >   };
> >   extern struct kvmppc_ops *kvmppc_hv_ops;
> > diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
> > index 00302399fc37..f35017d83d77 100644
> > --- a/arch/powerpc/kvm/powerpc.c
> > +++ b/arch/powerpc/kvm/powerpc.c
> > @@ -697,6 +697,12 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
> >   			}
> >   		}
> >   		break;
> > +#if defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE)
> > +	case KVM_CAP_PPC_COMPAT_CAPS:
> > +		if (kvmhv_on_pseries())
> 
> What about PowerNV ?

I'm not sure if understand the question completely. The goal is to handle the
compatibility mode for a nested guest, i.e., an L2 KVM guest booted inside a
pSeries KVM L1 booted on a PowerNV host with HV compatibilities and as well as
on a Logical Parition booted on PowerVM, hence the check. Does that answer your
question?

> Also, can't we just check if get_compat_cpu_ver is initialized and return
> accordingly ?
> 
> > +			r = 1;
> > +		break;
> > +#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
> >   	default:
> >   		r = 0;
> >   		break;
> > @@ -2463,6 +2469,19 @@ int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
> >   		r = kvm->arch.kvm_ops->svm_off(kvm);
> >   		break;
> >   	}
> > +	case KVM_PPC_GET_COMPAT_CAPS: {
> > +		struct kvm_ppc_compat_caps host_caps;
> > +
> > +		memset(&host_caps, 0, sizeof(host_caps));
> > +		if (!kvm->arch.kvm_ops->get_compat_cpu_ver)
> > +			goto out;
> 
> I guess we want to init r = 0 before returning in this case.

Sure, will do in the next version.

> Also prefer break over goto unless really needed.

I used goto out; to stay consistent with the existing exit paths in this
function, where similar branches go through the same out label. I do not have a
strong preference either way. Please let me know your views.

Thanks,
Amit

> 
> > +
> > +		r = kvm->arch.kvm_ops->get_compat_cpu_ver(&host_caps);
> > +		if (!r && copy_to_user(argp, &host_caps,
> > +				     sizeof(host_caps)))
> > +			r = -EFAULT;
> > +		break;
> > +	}
> >   	default: {
> >   		struct kvm *kvm = filp->private_data;
> >   		r = kvm->arch.kvm_ops->arch_vm_ioctl(filp, ioctl, arg);
> 


^ permalink raw reply

* Re: [PATCH 4/6] KVM: PPC: Book3S HV: Implement compat CPU capability retrieval for KVM on PowerVM
From: Amit Machhiwal @ 2026-05-07 14:54 UTC (permalink / raw)
  To: Harsh Prateek Bora
  Cc: Amit Machhiwal, linuxppc-dev, Madhavan Srinivasan, Vaibhav Jain,
	Nicholas Piggin, Michael Ellerman, Christophe Leroy (CS GROUP),
	kvm, linux-kernel
In-Reply-To: <52c78fd5-e987-4140-bb37-f358fc25c7e6@linux.ibm.com>

On 2026/05/05 02:55 PM, Harsh Prateek Bora wrote:
> 
> 
> On 30/04/26 11:19 am, Amit Machhiwal wrote:
> > On POWER systems, the host CPU may run in a compatibility mode (e.g., a
> > Power11 processor operating in Power10 compatibility mode). In such
> > cases, the effective CPU level exposed to guests differs from the
> > physical processor generation.
> > 
> > When running nested KVM guests, QEMU derives the host CPU type using
> > mfpvr(), which reflects the physical processor version. This can result
> > in a mismatch between the CPU model selected by QEMU and the
> > compatibility mode enforced by the host, leading to guest boot failures.
> > 
> > For example, booting a nested guest on a Power11 LPAR configured in
> > Power10 compatibility mode fails with:
> > 
> >    KVM-NESTEDv2: couldn't set guest wide elements
> >    [..KVM reg dump..]
> > 
> > This occurs because QEMU selects a CPU model corresponding to the
> > physical processor (via mfpvr()), while the host operates in a lower
> > compatibility mode. As a result, KVM rejects the requested compatibility
> > level during guest initialization.
> > 
> > Add support for retrieving host CPU compatibility capabilities for
> > nested guests on PowerVM (PAPR nested API v2). The hypervisor provides
> > the effective compatibility levels via the H_GUEST_GET_CAPABILITIES
> > hcall, which reflects the processor modes negotiated between the Power
> > hypervisor (L0) and the host partition (L1).
> > 
> > On pseries systems, obtain the capability bitmap using
> > plpar_guest_get_capabilities() and return it via struct
> > kvm_ppc_compat_caps. This information is then exposed to userspace
> > through the KVM_PPC_GET_COMPAT_CAPS ioctl.
> > 
> > Hook the implementation into the Book3S HV kvmppc_ops so that it can be
> > invoked by the generic KVM ioctl handling code.
> > 
> > Signed-off-by: Amit Machhiwal <amachhiw@linux.ibm.com>
> > ---
> >   arch/powerpc/kvm/book3s_hv.c | 17 +++++++++++++++++
> >   1 file changed, 17 insertions(+)
> > 
> > diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> > index 948c6b099a29..d602d90111d1 100644
> > --- a/arch/powerpc/kvm/book3s_hv.c
> > +++ b/arch/powerpc/kvm/book3s_hv.c
> > @@ -6516,6 +6516,22 @@ static bool kvmppc_hash_v3_possible(void)
> >   	return true;
> >   }
> > +
> > +static int kvmppc_get_compat_cpu_caps(struct kvm_ppc_compat_caps *host_caps)
> > +{
> > +
> > +	unsigned long capabilities = 0;
> > +	long rc = -EINVAL;
> > +
> > +	if (kvmhv_on_pseries()) {
> > +		if (kvmhv_is_nestedv2())
> 
> If this is only intended for nested APIv2, it should reflect in commit
> log/title, otherwise, we need to return success with capabilities set to 0
> for non-nestedv2 case.

This patch is specific to nested API v2. I tried to convey that through the “KVM
on PowerVM” in the commit title, and I also described the API v2 scope in the
commit log. If that is still not explicit enough, I can update the title and
commit log in the next revision to make the limitation clearer.

> 
> Also, better to have combined check for pseries and nestedv2 if we are not
> handling other cases.

We are, in the next patch.

> 
> > +			rc = plpar_guest_get_capabilities(0, &capabilities);
> > +		host_caps->compat_capabilities = capabilities;
> 
> Do we need to take care of PowerNV case ?

Next patch takes care of it.

Thanks,
Amit

> > +	}
> > +
> > +	return rc;
> > +}
> > +
> >   static struct kvmppc_ops kvm_ops_hv = {
> >   	.get_sregs = kvm_arch_vcpu_ioctl_get_sregs_hv,
> >   	.set_sregs = kvm_arch_vcpu_ioctl_set_sregs_hv,
> > @@ -6558,6 +6574,7 @@ static struct kvmppc_ops kvm_ops_hv = {
> >   	.hash_v3_possible = kvmppc_hash_v3_possible,
> >   	.create_vcpu_debugfs = kvmppc_arch_create_vcpu_debugfs_hv,
> >   	.create_vm_debugfs = kvmppc_arch_create_vm_debugfs_hv,
> > +	.get_compat_cpu_ver = kvmppc_get_compat_cpu_caps,
> >   };
> >   static int kvm_init_subcore_bitmap(void)
> 


^ permalink raw reply

* Re: [PATCH 5/6] KVM: PPC: Book3S HV: Add support for compat CPU capabilities for KVM on PowerNV
From: Amit Machhiwal @ 2026-05-07 15:06 UTC (permalink / raw)
  To: Harsh Prateek Bora
  Cc: Amit Machhiwal, linuxppc-dev, Madhavan Srinivasan, Vaibhav Jain,
	Nicholas Piggin, Michael Ellerman, Christophe Leroy (CS GROUP),
	kvm, linux-kernel
In-Reply-To: <a85ed3d9-2a7e-4e63-93ef-c38c4b9a4305@linux.ibm.com>

On 2026/05/05 03:19 PM, Harsh Prateek Bora wrote:
> 
> 
> On 30/04/26 11:19 am, Amit Machhiwal wrote:
> > Currently, when booting a compatibility-mode KVM guest (L1) on a PowerNV
> > hypervisor (L0), the guest runs with the expected processor
> > compatibility level. However, when booting a nested KVM guest (L2)
> > inside the L1, QEMU derives the CPU model from the raw host PVR and
> > attempts to run the nested guest at that level, instead of honoring the
> > compatibility mode of the L1.
> > 
> > Extend host CPU compatibility capability reporting to support nested
> > virtualization on PowerNV systems (PAPR nested API v1).
> > 
> > For nested API v2 (PowerVM), compatibility capabilities are obtained
> > from the hypervisor via the H_GUEST_GET_CAPABILITIES hcall. This
> > information is not available on PowerNV systems.
> > 
> > For nested API v1, derive the compatibility capabilities from the L1
> > guest by reading the "cpu-version" property from the device tree, which
> > reflects the effective (logical) processor compatibility level. Map this
> > value to the corresponding compatibility capability bitmap.
> > 
> > Introduce a helper to translate CPU version values into compatibility
> > capability bits and integrate it into kvmppc_get_compat_cpu_caps().
> > 
> > This allows userspace to query host CPU compatibility modes on both
> > PowerVM and PowerNV platforms via the KVM_PPC_GET_COMPAT_CAPS ioctl.
> > 
> > Signed-off-by: Amit Machhiwal <amachhiw@linux.ibm.com>
> > ---
> >   arch/powerpc/kvm/book3s_hv.c | 37 +++++++++++++++++++++++++++++++++++-
> >   1 file changed, 36 insertions(+), 1 deletion(-)
> > 
> > diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> > index d602d90111d1..25d05f1ccb72 100644
> > --- a/arch/powerpc/kvm/book3s_hv.c
> > +++ b/arch/powerpc/kvm/book3s_hv.c
> > @@ -6516,16 +6516,51 @@ static bool kvmppc_hash_v3_possible(void)
> >   	return true;
> >   }
> > +static int kvmppc_map_compat_capabilities(const __be32 cpu_version,
> > +				      unsigned long *capabilities)
> > +{
> > +	switch (cpu_version) {
> > +	case PVR_ARCH_31_P11:
> > +		*capabilities |= H_GUEST_CAP_POWER11;
> 
> Should this be:
> 
> +               *capabilities |= H_GUEST_CAP_POWER11 |
> +                                H_GUEST_CAP_POWER10 |
> +                                H_GUEST_CAP_POWER9;
> 
> Likewise, the remaining as applicable?

Not necessarily. This function will be called only when we are booting a nested
KVM guest (L2) on KVM on PowerNV. So, when the L1 KVM guest is booted in a
compat mode, L2 is supposed to boot with the same PVR version as that of the L1
which is already taken care of with the current changes, unless L2 as well
booted with a max-cpu-compat - which would anyway take a different path for
setting the compat.

> 
> > +		break;
> > +	case PVR_ARCH_31:
> > +		*capabilities |= H_GUEST_CAP_POWER10;
> > +		break;
> > +	case PVR_ARCH_300:
> > +		*capabilities |= H_GUEST_CAP_POWER9;
> > +		break;
> > +	default:
> > +		return -EINVAL;
> > +	}
> > +
> > +	return 0;
> > +}
> >   static int kvmppc_get_compat_cpu_caps(struct kvm_ppc_compat_caps *host_caps)
> >   {
> > +	struct device_node *np;
> >   	unsigned long capabilities = 0;
> > +	const __be32 *prop = NULL;
> >   	long rc = -EINVAL;
> > +	u32 cpu_version;
> >   	if (kvmhv_on_pseries()) {
> 
> Commit title says PowerNV, but here were are doing only for PowerVM?

Not really. This would take of both the cases. The else part specifically takes
care of the API v1 nested guests on PowerNV platforms.

Thanks,
Amit

> > -		if (kvmhv_is_nestedv2())
> > +		if (kvmhv_is_nestedv2()) {
> >   			rc = plpar_guest_get_capabilities(0, &capabilities);
> > +		} else {
> > +			for_each_node_by_type(np, "cpu") {
> > +				prop = of_get_property(np, "cpu-version", NULL);
> > +				if (prop) {
> > +					cpu_version = be32_to_cpup(prop);
> > +					break;
> > +				}
> > +			}
> > +			if (!prop)
> > +				return -EINVAL;
> > +			rc = kvmppc_map_compat_capabilities(cpu_version,
> > +								&capabilities);
> > +		}
> >   		host_caps->compat_capabilities = capabilities;
> >   	}
> 


^ permalink raw reply

* RE: [PATCH v2 net] net: wan: fsl_ucc_hdlc: free tx_skbuff in uhdlc_memclean
From: Holger Brunck @ 2026-05-07 15:07 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: netdev@vger.kernel.org, linuxppc-dev@lists.ozlabs.org,
	andrew+netdev@lunn.ch, chleroy@kernel.org, qiang.zhao@nxp.com,
	horms@kernel.org
In-Reply-To: <20260506161629.5488a643@kernel.org>

> On Wed,  6 May 2026 13:15:29 +0200 Holger Brunck wrote:
> > When the device is removed all allocated resources should be freed.
> > In uhdlc_memclean the netdev transmit queue was already stopped. But
> > at this point we may have pending skb in the transmit queue which must
> > be freed. Therefore iterate over the tx_skbuff pointers and free all
> > pending skb. The issue was discovered by sashiko.
> 
> And you tested this how?
> 
> Given the questionable v1 I'm highly hesitant to accept patches from you if you
> can't test them.

I tested the patch on a ls1043a board running HDLC in busmode on kernel 6.12

I was able to queue some packets in the TX part simply in removing the TX clock
for my setup. When I then shutdown the interface and remove the module I can
see that thel sk_buff pointers stored in the priv->tx_skbuff array, are not freed
 without the patch in question.
I am currently not able to run my setup on latest master, but the changes in the
driver compared to master are minimal.
 
Best regards
Holger  


^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox