* [PATCH 1/7] VT-d: parse ACPI "SoC Integrated Address Translation Cache Reporting Structure"s
2024-02-05 13:53 [PATCH 0/7] VT-d: SATC handling and ATS tidying Jan Beulich
@ 2024-02-05 13:55 ` Jan Beulich
2024-02-08 9:17 ` Roger Pau Monné
2024-02-05 13:55 ` [PATCH 2/7] IOMMU: rename and re-type ats_enabled Jan Beulich
` (5 subsequent siblings)
6 siblings, 1 reply; 30+ messages in thread
From: Jan Beulich @ 2024-02-05 13:55 UTC (permalink / raw)
To: xen-devel@lists.xenproject.org
Cc: Kevin Tian, Roger Pau Monné, Paul Durrant
This is a prereq to us, in particular, respecting the "ATC required"
flag.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
Should we check scope entries for appropriate types? (If so, then also
for e.g. ATSR.)
--- a/xen/drivers/passthrough/vtd/dmar.c
+++ b/xen/drivers/passthrough/vtd/dmar.c
@@ -47,6 +47,7 @@ LIST_HEAD_READ_MOSTLY(acpi_drhd_units);
LIST_HEAD_READ_MOSTLY(acpi_rmrr_units);
static LIST_HEAD_READ_MOSTLY(acpi_atsr_units);
static LIST_HEAD_READ_MOSTLY(acpi_rhsa_units);
+static LIST_HEAD_READ_MOSTLY(acpi_satc_units);
static struct acpi_table_header *__read_mostly dmar_table;
static int __read_mostly dmar_flags;
@@ -764,6 +765,95 @@ acpi_parse_one_rhsa(struct acpi_dmar_hea
return ret;
}
+static int __init register_one_satc(struct acpi_satc_unit *satcu)
+{
+ bool ignore = false;
+ unsigned int i = 0;
+ int ret = 0;
+
+ /* Skip checking if segment is not accessible yet. */
+ if ( !pci_known_segment(satcu->segment) )
+ i = UINT_MAX;
+
+ for ( ; i < satcu->scope.devices_cnt; i++ )
+ {
+ uint8_t b = PCI_BUS(satcu->scope.devices[i]);
+ uint8_t d = PCI_SLOT(satcu->scope.devices[i]);
+ uint8_t f = PCI_FUNC(satcu->scope.devices[i]);
+
+ if ( pci_device_detect(satcu->segment, b, d, f) == 0 )
+ {
+ dprintk(XENLOG_WARNING VTDPREFIX,
+ " Non-existent device (%pp) is reported in SATC scope!\n",
+ &PCI_SBDF(satcu->segment, b, d, f));
+ ignore = true;
+ }
+ else
+ {
+ ignore = false;
+ break;
+ }
+ }
+
+ if ( ignore )
+ {
+ dprintk(XENLOG_WARNING VTDPREFIX,
+ " Ignore SATC for seg %04x as no device under its scope is PCI discoverable!\n",
+ satcu->segment);
+ scope_devices_free(&satcu->scope);
+ xfree(satcu);
+ return 1;
+ }
+
+ if ( iommu_verbose )
+ printk(VTDPREFIX " ATC required: %d\n", satcu->atc_required);
+
+ list_add(&satcu->list, &acpi_satc_units);
+
+ return ret;
+}
+
+static int __init
+acpi_parse_one_satc(const struct acpi_dmar_header *header)
+{
+ const struct acpi_dmar_satc *satc =
+ container_of(header, const struct acpi_dmar_satc, header);
+ struct acpi_satc_unit *satcu;
+ const void *dev_scope_start, *dev_scope_end;
+ int ret;
+
+ if ( (ret = acpi_dmar_check_length(header, sizeof(*satc))) != 0 )
+ return ret;
+
+ satcu = xzalloc(struct acpi_satc_unit);
+ if ( !satcu )
+ return -ENOMEM;
+
+ satcu->segment = satc->segment;
+ satcu->atc_required = satc->flags & 1;
+
+ dev_scope_start = (const void *)(satc + 1);
+ dev_scope_end = (const void *)satc + header->length;
+ ret = acpi_parse_dev_scope(dev_scope_start, dev_scope_end,
+ &satcu->scope, SATC_TYPE, satc->segment);
+
+ if ( !ret && satcu->scope.devices_cnt )
+ {
+ ret = register_one_satc(satcu);
+ /*
+ * register_one_satc() returns greater than 0 when a specified
+ * PCIe device cannot be detected. To prevent VT-d from being
+ * disabled in such cases, reset the return value to 0 here.
+ */
+ if ( ret > 0 )
+ ret = 0;
+ }
+ else
+ xfree(satcu);
+
+ return ret;
+}
+
static int __init cf_check acpi_parse_dmar(struct acpi_table_header *table)
{
struct acpi_table_dmar *dmar;
@@ -817,6 +907,11 @@ static int __init cf_check acpi_parse_dm
printk(VTDPREFIX "found ACPI_DMAR_RHSA:\n");
ret = acpi_parse_one_rhsa(entry_header);
break;
+ case ACPI_DMAR_TYPE_SATC:
+ if ( iommu_verbose )
+ printk(VTDPREFIX "found ACPI_DMAR_SATC:\n");
+ ret = acpi_parse_one_satc(entry_header);
+ break;
default:
dprintk(XENLOG_WARNING VTDPREFIX,
"Ignore unknown DMAR structure type (%#x)\n",
--- a/xen/drivers/passthrough/vtd/dmar.h
+++ b/xen/drivers/passthrough/vtd/dmar.h
@@ -91,6 +91,13 @@ struct acpi_rhsa_unit {
u32 proximity_domain;
};
+struct acpi_satc_unit {
+ struct dmar_scope scope;
+ struct list_head list;
+ uint16_t segment;
+ bool atc_required:1;
+};
+
#define for_each_drhd_unit(drhd) \
list_for_each_entry(drhd, &acpi_drhd_units, list)
@@ -106,6 +113,7 @@ struct acpi_atsr_unit *acpi_find_matched
#define DMAR_TYPE 1
#define RMRR_TYPE 2
#define ATSR_TYPE 3
+#define SATC_TYPE 4
#define DMAR_OPERATION_TIMEOUT MILLISECS(1000)
--- a/xen/include/acpi/actbl2.h
+++ b/xen/include/acpi/actbl2.h
@@ -345,7 +345,8 @@ enum acpi_dmar_type {
ACPI_DMAR_TYPE_RESERVED_MEMORY = 1,
ACPI_DMAR_TYPE_ATSR = 2,
ACPI_DMAR_HARDWARE_AFFINITY = 3,
- ACPI_DMAR_TYPE_RESERVED = 4 /* 4 and greater are reserved */
+ ACPI_DMAR_TYPE_SATC = 5,
+ ACPI_DMAR_TYPE_RESERVED = 7 /* 7 and greater are reserved */
};
/* DMAR Device Scope structure */
@@ -427,6 +428,15 @@ struct acpi_dmar_rhsa {
u32 proximity_domain;
};
+/* 5: SOC Integrated Address Translation Cache Reporting Structure */
+
+struct acpi_dmar_satc {
+ struct acpi_dmar_header header;
+ uint8_t flags;
+ uint8_t reserved;
+ uint16_t segment;
+};
+
/*******************************************************************************
*
* HPET - High Precision Event Timer table
^ permalink raw reply [flat|nested] 30+ messages in thread* Re: [PATCH 1/7] VT-d: parse ACPI "SoC Integrated Address Translation Cache Reporting Structure"s
2024-02-05 13:55 ` [PATCH 1/7] VT-d: parse ACPI "SoC Integrated Address Translation Cache Reporting Structure"s Jan Beulich
@ 2024-02-08 9:17 ` Roger Pau Monné
2024-02-08 15:29 ` Jan Beulich
0 siblings, 1 reply; 30+ messages in thread
From: Roger Pau Monné @ 2024-02-08 9:17 UTC (permalink / raw)
To: Jan Beulich; +Cc: xen-devel@lists.xenproject.org, Kevin Tian, Paul Durrant
On Mon, Feb 05, 2024 at 02:55:17PM +0100, Jan Beulich wrote:
> This is a prereq to us, in particular, respecting the "ATC required"
> flag.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> ---
> Should we check scope entries for appropriate types? (If so, then also
> for e.g. ATSR.)
Hm, I guess we could do so in acpi_parse_dev_scope() since that
function already gets passed a 'type' argument.
>
> --- a/xen/drivers/passthrough/vtd/dmar.c
> +++ b/xen/drivers/passthrough/vtd/dmar.c
> @@ -47,6 +47,7 @@ LIST_HEAD_READ_MOSTLY(acpi_drhd_units);
> LIST_HEAD_READ_MOSTLY(acpi_rmrr_units);
> static LIST_HEAD_READ_MOSTLY(acpi_atsr_units);
> static LIST_HEAD_READ_MOSTLY(acpi_rhsa_units);
> +static LIST_HEAD_READ_MOSTLY(acpi_satc_units);
>
> static struct acpi_table_header *__read_mostly dmar_table;
> static int __read_mostly dmar_flags;
> @@ -764,6 +765,95 @@ acpi_parse_one_rhsa(struct acpi_dmar_hea
> return ret;
> }
>
> +static int __init register_one_satc(struct acpi_satc_unit *satcu)
> +{
> + bool ignore = false;
> + unsigned int i = 0;
> + int ret = 0;
> +
> + /* Skip checking if segment is not accessible yet. */
> + if ( !pci_known_segment(satcu->segment) )
> + i = UINT_MAX;
> +
> + for ( ; i < satcu->scope.devices_cnt; i++ )
> + {
> + uint8_t b = PCI_BUS(satcu->scope.devices[i]);
> + uint8_t d = PCI_SLOT(satcu->scope.devices[i]);
> + uint8_t f = PCI_FUNC(satcu->scope.devices[i]);
> +
> + if ( pci_device_detect(satcu->segment, b, d, f) == 0 )
Any reason to explicitly compare against 0?
if ( !pci_device_detect(satcu->segment, b, d, f) )
...
The function returns a boolean.
> + {
> + dprintk(XENLOG_WARNING VTDPREFIX,
> + " Non-existent device (%pp) is reported in SATC scope!\n",
> + &PCI_SBDF(satcu->segment, b, d, f));
> + ignore = true;
This is kind of reporting is incomplete: as soon as one device is
found the loop is exited and no further detection happens. If we want
to print such information, we should do the full scan and avoid
exiting early when a populated device is detected.
> + }
> + else
> + {
> + ignore = false;
> + break;
> + }
> + }
> +
> + if ( ignore )
> + {
> + dprintk(XENLOG_WARNING VTDPREFIX,
> + " Ignore SATC for seg %04x as no device under its scope is PCI discoverable!\n",
(I would drop the '!' at the end, here and above, I don't think they
add much to the error message) I would also use the '#' flag to avoid
confusion, as in the weird case we have a segment '1234' then without
the zero padding I wouldn't really know whether it's decimal or hex.
> + satcu->segment);
> + scope_devices_free(&satcu->scope);
> + xfree(satcu);
> + return 1;
> + }
> +
> + if ( iommu_verbose )
> + printk(VTDPREFIX " ATC required: %d\n", satcu->atc_required);
> +
> + list_add(&satcu->list, &acpi_satc_units);
> +
> + return ret;
> +}
> +
> +static int __init
> +acpi_parse_one_satc(const struct acpi_dmar_header *header)
> +{
> + const struct acpi_dmar_satc *satc =
> + container_of(header, const struct acpi_dmar_satc, header);
> + struct acpi_satc_unit *satcu;
> + const void *dev_scope_start, *dev_scope_end;
> + int ret;
> +
> + if ( (ret = acpi_dmar_check_length(header, sizeof(*satc))) != 0 )
> + return ret;
A very stupid nit, but I would rather prefer:
int ret = acpi_dmar_check_length(header, sizeof(*satc));
if ( ret )
return ret;
Which has the same number of lines and is IMO easier to read.
> +
> + satcu = xzalloc(struct acpi_satc_unit);
> + if ( !satcu )
> + return -ENOMEM;
> +
> + satcu->segment = satc->segment;
> + satcu->atc_required = satc->flags & 1;
I would add this as a define in actbl2.h:
#define ACPI_DMAR_ATC_REQUIRED (1U << 0)
Or some such (maybe just using plain 1 is also fine).
> +
> + dev_scope_start = (const void *)(satc + 1);
> + dev_scope_end = (const void *)satc + header->length;
> + ret = acpi_parse_dev_scope(dev_scope_start, dev_scope_end,
> + &satcu->scope, SATC_TYPE, satc->segment);
> +
> + if ( !ret && satcu->scope.devices_cnt )
> + {
> + ret = register_one_satc(satcu);
> + /*
> + * register_one_satc() returns greater than 0 when a specified
> + * PCIe device cannot be detected. To prevent VT-d from being
> + * disabled in such cases, reset the return value to 0 here.
> + */
> + if ( ret > 0 )
> + ret = 0;
> + }
> + else
> + xfree(satcu);
You can safely use scope_devices_free() even if acpi_parse_dev_scope()
failed, so you could unify the freeing here, instead of doing it in
register_one_satc() also.
Also why not make register_one_satc() return a boolean instead of 0/1?
if ( ret || !satcu->scope.devices_cnt || register_one_satc(satcu) )
{
scope_devices_free(&satcu->scope);
xfree(satcu);
}
> +
> + return ret;
> +}
> +
> static int __init cf_check acpi_parse_dmar(struct acpi_table_header *table)
> {
> struct acpi_table_dmar *dmar;
> @@ -817,6 +907,11 @@ static int __init cf_check acpi_parse_dm
> printk(VTDPREFIX "found ACPI_DMAR_RHSA:\n");
> ret = acpi_parse_one_rhsa(entry_header);
> break;
> + case ACPI_DMAR_TYPE_SATC:
> + if ( iommu_verbose )
> + printk(VTDPREFIX "found ACPI_DMAR_SATC:\n");
> + ret = acpi_parse_one_satc(entry_header);
> + break;
I know the surrounding code doesn't use it, but readability would
benefit from adding a blank line after the break statement.
Thanks, Roger.
^ permalink raw reply [flat|nested] 30+ messages in thread* Re: [PATCH 1/7] VT-d: parse ACPI "SoC Integrated Address Translation Cache Reporting Structure"s
2024-02-08 9:17 ` Roger Pau Monné
@ 2024-02-08 15:29 ` Jan Beulich
2024-02-09 9:00 ` Roger Pau Monné
0 siblings, 1 reply; 30+ messages in thread
From: Jan Beulich @ 2024-02-08 15:29 UTC (permalink / raw)
To: Roger Pau Monné
Cc: xen-devel@lists.xenproject.org, Kevin Tian, Paul Durrant
On 08.02.2024 10:17, Roger Pau Monné wrote:
> On Mon, Feb 05, 2024 at 02:55:17PM +0100, Jan Beulich wrote:
>> This is a prereq to us, in particular, respecting the "ATC required"
>> flag.
>>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>> ---
>> Should we check scope entries for appropriate types? (If so, then also
>> for e.g. ATSR.)
>
> Hm, I guess we could do so in acpi_parse_dev_scope() since that
> function already gets passed a 'type' argument.
Right, I transiently had it there, but then dropped it again for being
inconsistent with what we have right now. I'll try to remember to add
another patch.
>> @@ -764,6 +765,95 @@ acpi_parse_one_rhsa(struct acpi_dmar_hea
>> return ret;
>> }
>>
>> +static int __init register_one_satc(struct acpi_satc_unit *satcu)
>> +{
>> + bool ignore = false;
>> + unsigned int i = 0;
>> + int ret = 0;
>> +
>> + /* Skip checking if segment is not accessible yet. */
>> + if ( !pci_known_segment(satcu->segment) )
>> + i = UINT_MAX;
>> +
>> + for ( ; i < satcu->scope.devices_cnt; i++ )
>> + {
>> + uint8_t b = PCI_BUS(satcu->scope.devices[i]);
>> + uint8_t d = PCI_SLOT(satcu->scope.devices[i]);
>> + uint8_t f = PCI_FUNC(satcu->scope.devices[i]);
>> +
>> + if ( pci_device_detect(satcu->segment, b, d, f) == 0 )
>
> Any reason to explicitly compare against 0?
>
> if ( !pci_device_detect(satcu->segment, b, d, f) )
> ...
>
> The function returns a boolean.
Hmm, right - simply a result of copy-and-paste.
>> + {
>> + dprintk(XENLOG_WARNING VTDPREFIX,
>> + " Non-existent device (%pp) is reported in SATC scope!\n",
>> + &PCI_SBDF(satcu->segment, b, d, f));
>> + ignore = true;
>
> This is kind of reporting is incomplete: as soon as one device is
> found the loop is exited and no further detection happens. If we want
> to print such information, we should do the full scan and avoid
> exiting early when a populated device is detected.
Not sure I follow, but first of all - these are dprintk()s only, so
meant to only help in dev environments. Specifically ...
>> + }
>> + else
>> + {
>> + ignore = false;
>> + break;
>> + }
>> + }
>> +
>> + if ( ignore )
>> + {
>> + dprintk(XENLOG_WARNING VTDPREFIX,
>> + " Ignore SATC for seg %04x as no device under its scope is PCI discoverable!\n",
... this message is then issued only bogus entries were found. IOW
when a real device was found, there's no real reason to report N
other bogus ones, I think.
Plus, whatever we change here ought to also / first change in
register_one_rmrr().
> (I would drop the '!' at the end, here and above, I don't think they
> add much to the error message)
Sure; copy-and-paste again.
> I would also use the '#' flag to avoid
> confusion, as in the weird case we have a segment '1234' then without
> the zero padding I wouldn't really know whether it's decimal or hex.
Not really, no. If there's any place we log segment numbers in
decimal, we should change that. They ought to be possible to match
with the usual ssss:bb:dd.f coordinates we log.
>> + satcu = xzalloc(struct acpi_satc_unit);
>> + if ( !satcu )
>> + return -ENOMEM;
>> +
>> + satcu->segment = satc->segment;
>> + satcu->atc_required = satc->flags & 1;
>
> I would add this as a define in actbl2.h:
>
> #define ACPI_DMAR_ATC_REQUIRED (1U << 0)
>
> Or some such (maybe just using plain 1 is also fine).
I intended to do so, but strictly staying in line with what Linux has.
To my surprise they use a literal number and have no #define. Hence I
didn't add any either.
>> +
>> + dev_scope_start = (const void *)(satc + 1);
>> + dev_scope_end = (const void *)satc + header->length;
>> + ret = acpi_parse_dev_scope(dev_scope_start, dev_scope_end,
>> + &satcu->scope, SATC_TYPE, satc->segment);
>> +
>> + if ( !ret && satcu->scope.devices_cnt )
>> + {
>> + ret = register_one_satc(satcu);
>> + /*
>> + * register_one_satc() returns greater than 0 when a specified
>> + * PCIe device cannot be detected. To prevent VT-d from being
>> + * disabled in such cases, reset the return value to 0 here.
>> + */
>> + if ( ret > 0 )
>> + ret = 0;
>> + }
>> + else
>> + xfree(satcu);
>
> You can safely use scope_devices_free() even if acpi_parse_dev_scope()
> failed, so you could unify the freeing here, instead of doing it in
> register_one_satc() also.
Moving that out of acpi_parse_dev_scope() would feel wrong - if a
function fails, it would better not leave cleanup to its caller(s).
> Also why not make register_one_satc() return a boolean instead of 0/1?
To leave room to also return errors, like register_one_rmrr() does.
For both of these aspects you raise: I'd really like to avoid these
sibling functions going too much out of sync.
>> @@ -817,6 +907,11 @@ static int __init cf_check acpi_parse_dm
>> printk(VTDPREFIX "found ACPI_DMAR_RHSA:\n");
>> ret = acpi_parse_one_rhsa(entry_header);
>> break;
>> + case ACPI_DMAR_TYPE_SATC:
>> + if ( iommu_verbose )
>> + printk(VTDPREFIX "found ACPI_DMAR_SATC:\n");
>> + ret = acpi_parse_one_satc(entry_header);
>> + break;
>
> I know the surrounding code doesn't use it, but readability would
> benefit from adding a blank line after the break statement.
Well, yes, done so. I'm not generally in favor of introducing such
inconsistencies, but it looks tolerable here. (In cases like this
I do - and did here - consider this as an option, but in most cases
I end up valuing uniform look slightly higher.)
Jan
^ permalink raw reply [flat|nested] 30+ messages in thread* Re: [PATCH 1/7] VT-d: parse ACPI "SoC Integrated Address Translation Cache Reporting Structure"s
2024-02-08 15:29 ` Jan Beulich
@ 2024-02-09 9:00 ` Roger Pau Monné
2024-02-12 9:32 ` Jan Beulich
0 siblings, 1 reply; 30+ messages in thread
From: Roger Pau Monné @ 2024-02-09 9:00 UTC (permalink / raw)
To: Jan Beulich; +Cc: xen-devel@lists.xenproject.org, Kevin Tian, Paul Durrant
On Thu, Feb 08, 2024 at 04:29:34PM +0100, Jan Beulich wrote:
> On 08.02.2024 10:17, Roger Pau Monné wrote:
> > On Mon, Feb 05, 2024 at 02:55:17PM +0100, Jan Beulich wrote:
> >> This is a prereq to us, in particular, respecting the "ATC required"
> >> flag.
> >>
> >> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> >> ---
> >> Should we check scope entries for appropriate types? (If so, then also
> >> for e.g. ATSR.)
> >
> > Hm, I guess we could do so in acpi_parse_dev_scope() since that
> > function already gets passed a 'type' argument.
>
> Right, I transiently had it there, but then dropped it again for being
> inconsistent with what we have right now. I'll try to remember to add
> another patch.
No strict requirement - but since it's on the spec we might as well
try to honor it.
> >> + {
> >> + dprintk(XENLOG_WARNING VTDPREFIX,
> >> + " Non-existent device (%pp) is reported in SATC scope!\n",
> >> + &PCI_SBDF(satcu->segment, b, d, f));
> >> + ignore = true;
> >
> > This is kind of reporting is incomplete: as soon as one device is
> > found the loop is exited and no further detection happens. If we want
> > to print such information, we should do the full scan and avoid
> > exiting early when a populated device is detected.
>
> Not sure I follow, but first of all - these are dprintk()s only, so
> meant to only help in dev environments. Specifically ...
>
> >> + }
> >> + else
> >> + {
> >> + ignore = false;
> >> + break;
> >> + }
> >> + }
> >> +
> >> + if ( ignore )
> >> + {
> >> + dprintk(XENLOG_WARNING VTDPREFIX,
> >> + " Ignore SATC for seg %04x as no device under its scope is PCI discoverable!\n",
>
> ... this message is then issued only bogus entries were found. IOW
> when a real device was found, there's no real reason to report N
> other bogus ones, I think.
I guess it's a question of taste. I do find it odd (asymmetric
maybe?) that we stop reporting non-existing devices once a valid
device is found. Makes me wonder what's the point of reporting them
in the first place, if the list of non-existing devices is not
complete?
> Plus, whatever we change here ought to also / first change in
> register_one_rmrr().
I could live with those looking differently, or register_one_rmrr()
can be adjusted later. Existing examples shouldn't be an argument to
not make new additions better.
But that's only if you agree the suggested changes make the code
better, if you prefer the current implementation then there's no point
in arguing whether we should keep register_one_rmrr() and the newly
introduce function similar enough.
> >> + satcu = xzalloc(struct acpi_satc_unit);
> >> + if ( !satcu )
> >> + return -ENOMEM;
> >> +
> >> + satcu->segment = satc->segment;
> >> + satcu->atc_required = satc->flags & 1;
> >
> > I would add this as a define in actbl2.h:
> >
> > #define ACPI_DMAR_ATC_REQUIRED (1U << 0)
> >
> > Or some such (maybe just using plain 1 is also fine).
>
> I intended to do so, but strictly staying in line with what Linux has.
> To my surprise they use a literal number and have no #define. Hence I
> didn't add any either.
I would prefer the define unless you have strong objections, even if
that means diverging from Linux.
> >> +
> >> + dev_scope_start = (const void *)(satc + 1);
> >> + dev_scope_end = (const void *)satc + header->length;
> >> + ret = acpi_parse_dev_scope(dev_scope_start, dev_scope_end,
> >> + &satcu->scope, SATC_TYPE, satc->segment);
> >> +
> >> + if ( !ret && satcu->scope.devices_cnt )
> >> + {
> >> + ret = register_one_satc(satcu);
> >> + /*
> >> + * register_one_satc() returns greater than 0 when a specified
> >> + * PCIe device cannot be detected. To prevent VT-d from being
> >> + * disabled in such cases, reset the return value to 0 here.
> >> + */
> >> + if ( ret > 0 )
> >> + ret = 0;
> >> + }
> >> + else
> >> + xfree(satcu);
> >
> > You can safely use scope_devices_free() even if acpi_parse_dev_scope()
> > failed, so you could unify the freeing here, instead of doing it in
> > register_one_satc() also.
>
> Moving that out of acpi_parse_dev_scope() would feel wrong - if a
> function fails, it would better not leave cleanup to its caller(s).
Given that the caller here is the one that did the allocation my
preference would be to also do the cleanup there - register_one_satc()
has no need to know what needs freeing, and allows unifying the
cleanup in a single place.
> > Also why not make register_one_satc() return a boolean instead of 0/1?
>
> To leave room to also return errors, like register_one_rmrr() does.
>
> For both of these aspects you raise: I'd really like to avoid these
> sibling functions going too much out of sync.
I could live with those going out-of-sync (or adjusted later), but only
if you think the suggestions are an improvement.
> >> @@ -817,6 +907,11 @@ static int __init cf_check acpi_parse_dm
> >> printk(VTDPREFIX "found ACPI_DMAR_RHSA:\n");
> >> ret = acpi_parse_one_rhsa(entry_header);
> >> break;
> >> + case ACPI_DMAR_TYPE_SATC:
> >> + if ( iommu_verbose )
> >> + printk(VTDPREFIX "found ACPI_DMAR_SATC:\n");
> >> + ret = acpi_parse_one_satc(entry_header);
> >> + break;
> >
> > I know the surrounding code doesn't use it, but readability would
> > benefit from adding a blank line after the break statement.
>
> Well, yes, done so. I'm not generally in favor of introducing such
> inconsistencies, but it looks tolerable here. (In cases like this
> I do - and did here - consider this as an option, but in most cases
> I end up valuing uniform look slightly higher.)
Yeah, overall I think it's an improvement if we go switching those as
we modify the code for other reasons.
Thanks, Roger.
^ permalink raw reply [flat|nested] 30+ messages in thread* Re: [PATCH 1/7] VT-d: parse ACPI "SoC Integrated Address Translation Cache Reporting Structure"s
2024-02-09 9:00 ` Roger Pau Monné
@ 2024-02-12 9:32 ` Jan Beulich
2024-02-12 10:06 ` Roger Pau Monné
0 siblings, 1 reply; 30+ messages in thread
From: Jan Beulich @ 2024-02-12 9:32 UTC (permalink / raw)
To: Roger Pau Monné
Cc: xen-devel@lists.xenproject.org, Kevin Tian, Paul Durrant
On 09.02.2024 10:00, Roger Pau Monné wrote:
> On Thu, Feb 08, 2024 at 04:29:34PM +0100, Jan Beulich wrote:
>> On 08.02.2024 10:17, Roger Pau Monné wrote:
>>> On Mon, Feb 05, 2024 at 02:55:17PM +0100, Jan Beulich wrote:
>>>> + {
>>>> + dprintk(XENLOG_WARNING VTDPREFIX,
>>>> + " Non-existent device (%pp) is reported in SATC scope!\n",
>>>> + &PCI_SBDF(satcu->segment, b, d, f));
>>>> + ignore = true;
>>>
>>> This is kind of reporting is incomplete: as soon as one device is
>>> found the loop is exited and no further detection happens. If we want
>>> to print such information, we should do the full scan and avoid
>>> exiting early when a populated device is detected.
>>
>> Not sure I follow, but first of all - these are dprintk()s only, so
>> meant to only help in dev environments. Specifically ...
>>
>>>> + }
>>>> + else
>>>> + {
>>>> + ignore = false;
>>>> + break;
>>>> + }
>>>> + }
>>>> +
>>>> + if ( ignore )
>>>> + {
>>>> + dprintk(XENLOG_WARNING VTDPREFIX,
>>>> + " Ignore SATC for seg %04x as no device under its scope is PCI discoverable!\n",
>>
>> ... this message is then issued only bogus entries were found. IOW
>> when a real device was found, there's no real reason to report N
>> other bogus ones, I think.
>
> I guess it's a question of taste. I do find it odd (asymmetric
> maybe?) that we stop reporting non-existing devices once a valid
> device is found. Makes me wonder what's the point of reporting them
> in the first place, if the list of non-existing devices is not
> complete?
Since you look to not be taking this into account, let me re-emphasize
that these are dprintk() only. In the event of an issue, seeing the
log messages you at least get a hint of one device that poses a
problem. That may or may not be enough of an indication for figuring
what's wrong. Making the loop run for longer than necessary when
especially in a release build there's not going to be any change (but
the logic would become [slightly] more complex, as after setting
"ignore" to true we'd need to avoid clearing it back to false) is just
pointless imo. IOW I view this 1st message as merely a courtesy for
the case where the 2nd one would end up also being logged.
>> Plus, whatever we change here ought to also / first change in
>> register_one_rmrr().
>
> I could live with those looking differently, or register_one_rmrr()
> can be adjusted later. Existing examples shouldn't be an argument to
> not make new additions better.
While I generally agree with this principle, in cases like this one it
needs weighing against consistency. Which I consider more important
here, to reduce the chance of making mistakes when fiddling with this
code later again.
>>>> + satcu = xzalloc(struct acpi_satc_unit);
>>>> + if ( !satcu )
>>>> + return -ENOMEM;
>>>> +
>>>> + satcu->segment = satc->segment;
>>>> + satcu->atc_required = satc->flags & 1;
>>>
>>> I would add this as a define in actbl2.h:
>>>
>>> #define ACPI_DMAR_ATC_REQUIRED (1U << 0)
>>>
>>> Or some such (maybe just using plain 1 is also fine).
>>
>> I intended to do so, but strictly staying in line with what Linux has.
>> To my surprise they use a literal number and have no #define. Hence I
>> didn't add any either.
>
> I would prefer the define unless you have strong objections, even if
> that means diverging from Linux.
I could probably accept such a #define living in one of dmar.[ch]. I'd
rather not see it go into actbl2.h.
>>>> +
>>>> + dev_scope_start = (const void *)(satc + 1);
>>>> + dev_scope_end = (const void *)satc + header->length;
>>>> + ret = acpi_parse_dev_scope(dev_scope_start, dev_scope_end,
>>>> + &satcu->scope, SATC_TYPE, satc->segment);
>>>> +
>>>> + if ( !ret && satcu->scope.devices_cnt )
>>>> + {
>>>> + ret = register_one_satc(satcu);
>>>> + /*
>>>> + * register_one_satc() returns greater than 0 when a specified
>>>> + * PCIe device cannot be detected. To prevent VT-d from being
>>>> + * disabled in such cases, reset the return value to 0 here.
>>>> + */
>>>> + if ( ret > 0 )
>>>> + ret = 0;
>>>> + }
>>>> + else
>>>> + xfree(satcu);
>>>
>>> You can safely use scope_devices_free() even if acpi_parse_dev_scope()
>>> failed, so you could unify the freeing here, instead of doing it in
>>> register_one_satc() also.
>>
>> Moving that out of acpi_parse_dev_scope() would feel wrong - if a
>> function fails, it would better not leave cleanup to its caller(s).
>
> Given that the caller here is the one that did the allocation my
> preference would be to also do the cleanup there - register_one_satc()
> has no need to know what needs freeing, and allows unifying the
> cleanup in a single place.
Hmm, you're right about the odd freeing behavior. I guess I really
ought to change that, but then first for register_one_rmrr() (seeing
that DRHD and ATSR parsing also do it that way). Which then of course
means also touching add_one_user_rmrr().
Jan
^ permalink raw reply [flat|nested] 30+ messages in thread* Re: [PATCH 1/7] VT-d: parse ACPI "SoC Integrated Address Translation Cache Reporting Structure"s
2024-02-12 9:32 ` Jan Beulich
@ 2024-02-12 10:06 ` Roger Pau Monné
0 siblings, 0 replies; 30+ messages in thread
From: Roger Pau Monné @ 2024-02-12 10:06 UTC (permalink / raw)
To: Jan Beulich; +Cc: xen-devel@lists.xenproject.org, Kevin Tian, Paul Durrant
On Mon, Feb 12, 2024 at 10:32:00AM +0100, Jan Beulich wrote:
> On 09.02.2024 10:00, Roger Pau Monné wrote:
> > On Thu, Feb 08, 2024 at 04:29:34PM +0100, Jan Beulich wrote:
> >> On 08.02.2024 10:17, Roger Pau Monné wrote:
> >>> On Mon, Feb 05, 2024 at 02:55:17PM +0100, Jan Beulich wrote:
> >>>> + {
> >>>> + dprintk(XENLOG_WARNING VTDPREFIX,
> >>>> + " Non-existent device (%pp) is reported in SATC scope!\n",
> >>>> + &PCI_SBDF(satcu->segment, b, d, f));
> >>>> + ignore = true;
> >>>
> >>> This is kind of reporting is incomplete: as soon as one device is
> >>> found the loop is exited and no further detection happens. If we want
> >>> to print such information, we should do the full scan and avoid
> >>> exiting early when a populated device is detected.
> >>
> >> Not sure I follow, but first of all - these are dprintk()s only, so
> >> meant to only help in dev environments. Specifically ...
> >>
> >>>> + }
> >>>> + else
> >>>> + {
> >>>> + ignore = false;
> >>>> + break;
> >>>> + }
> >>>> + }
> >>>> +
> >>>> + if ( ignore )
> >>>> + {
> >>>> + dprintk(XENLOG_WARNING VTDPREFIX,
> >>>> + " Ignore SATC for seg %04x as no device under its scope is PCI discoverable!\n",
> >>
> >> ... this message is then issued only bogus entries were found. IOW
> >> when a real device was found, there's no real reason to report N
> >> other bogus ones, I think.
> >
> > I guess it's a question of taste. I do find it odd (asymmetric
> > maybe?) that we stop reporting non-existing devices once a valid
> > device is found. Makes me wonder what's the point of reporting them
> > in the first place, if the list of non-existing devices is not
> > complete?
>
> Since you look to not be taking this into account, let me re-emphasize
> that these are dprintk() only. In the event of an issue, seeing the
> log messages you at least get a hint of one device that poses a
> problem. That may or may not be enough of an indication for figuring
> what's wrong. Making the loop run for longer than necessary when
> especially in a release build there's not going to be any change (but
> the logic would become [slightly] more complex, as after setting
> "ignore" to true we'd need to avoid clearing it back to false) is just
> pointless imo. IOW I view this 1st message as merely a courtesy for
> the case where the 2nd one would end up also being logged.
I will not insist anymore.
> >>>> + satcu = xzalloc(struct acpi_satc_unit);
> >>>> + if ( !satcu )
> >>>> + return -ENOMEM;
> >>>> +
> >>>> + satcu->segment = satc->segment;
> >>>> + satcu->atc_required = satc->flags & 1;
> >>>
> >>> I would add this as a define in actbl2.h:
> >>>
> >>> #define ACPI_DMAR_ATC_REQUIRED (1U << 0)
> >>>
> >>> Or some such (maybe just using plain 1 is also fine).
> >>
> >> I intended to do so, but strictly staying in line with what Linux has.
> >> To my surprise they use a literal number and have no #define. Hence I
> >> didn't add any either.
> >
> > I would prefer the define unless you have strong objections, even if
> > that means diverging from Linux.
>
> I could probably accept such a #define living in one of dmar.[ch]. I'd
> rather not see it go into actbl2.h.
Fine. I think the current open coding of 1 in Linux is wrong. Other
flag fields in DMAR structures have the related defines.
Thanks, Roger.
^ permalink raw reply [flat|nested] 30+ messages in thread
* [PATCH 2/7] IOMMU: rename and re-type ats_enabled
2024-02-05 13:53 [PATCH 0/7] VT-d: SATC handling and ATS tidying Jan Beulich
2024-02-05 13:55 ` [PATCH 1/7] VT-d: parse ACPI "SoC Integrated Address Translation Cache Reporting Structure"s Jan Beulich
@ 2024-02-05 13:55 ` Jan Beulich
2024-02-08 11:49 ` Roger Pau Monné
2024-02-05 13:56 ` [PATCH 3/7] VT-d: respect ACPI SATC's ATC_REQUIRED flag Jan Beulich
` (4 subsequent siblings)
6 siblings, 1 reply; 30+ messages in thread
From: Jan Beulich @ 2024-02-05 13:55 UTC (permalink / raw)
To: xen-devel@lists.xenproject.org
Cc: Kevin Tian, Roger Pau Monné, Paul Durrant, Andrew Cooper
Make the variable a tristate, with (as done elsewhere) a negative value
meaning "default". Since all use sites need looking at, also rename it
to match our usual "opt_*" pattern. While touching it, also move it to
.data.ro_after_init.
The only place it retains boolean nature is pci_ats_device(), for now.
In AMD code re-order conditionals to have the config space accesses
after (cheaper) flag checks.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
In domain_context_mapping_one() I'm a little puzzled that translation
type is selected based on only IOMMU and global properties, i.e. not
taking the device itself into account.
--- a/xen/drivers/passthrough/amd/iommu_cmd.c
+++ b/xen/drivers/passthrough/amd/iommu_cmd.c
@@ -282,7 +282,7 @@ void amd_iommu_flush_iotlb(u8 devfn, con
struct amd_iommu *iommu;
unsigned int req_id, queueid, maxpend;
- if ( !ats_enabled )
+ if ( opt_ats <= 0 )
return;
if ( !pci_ats_enabled(pdev->seg, pdev->bus, pdev->devfn) )
@@ -340,7 +340,7 @@ static void _amd_iommu_flush_pages(struc
flush_command_buffer(iommu, 0);
}
- if ( ats_enabled )
+ if ( opt_ats > 0 )
{
amd_iommu_flush_all_iotlbs(d, daddr, order);
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
@@ -185,10 +185,11 @@ static int __must_check amd_iommu_setup_
dte->ex = ivrs_dev->dte_allow_exclusion;
dte->sys_mgt = MASK_EXTR(ivrs_dev->device_flags, ACPI_IVHD_SYSTEM_MGMT);
- if ( pci_ats_device(iommu->seg, bus, pdev->devfn) &&
+ if ( opt_ats > 0 &&
!ivrs_dev->block_ats &&
- iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) )
- dte->i = ats_enabled;
+ iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) &&
+ pci_ats_device(iommu->seg, bus, pdev->devfn) )
+ dte->i = true;
spin_unlock_irqrestore(&iommu->lock, flags);
@@ -248,10 +249,11 @@ static int __must_check amd_iommu_setup_
ASSERT(dte->sys_mgt == MASK_EXTR(ivrs_dev->device_flags,
ACPI_IVHD_SYSTEM_MGMT));
- if ( pci_ats_device(iommu->seg, bus, pdev->devfn) &&
+ if ( opt_ats > 0 &&
!ivrs_dev->block_ats &&
- iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) )
- ASSERT(dte->i == ats_enabled);
+ iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) &&
+ pci_ats_device(iommu->seg, bus, pdev->devfn) )
+ ASSERT(dte->i);
spin_unlock_irqrestore(&iommu->lock, flags);
@@ -268,9 +270,10 @@ static int __must_check amd_iommu_setup_
ASSERT(pcidevs_locked());
- if ( pci_ats_device(iommu->seg, bus, pdev->devfn) &&
+ if ( opt_ats > 0 &&
!ivrs_dev->block_ats &&
iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) &&
+ pci_ats_device(iommu->seg, bus, pdev->devfn) &&
!pci_ats_enabled(iommu->seg, bus, pdev->devfn) )
{
if ( devfn == pdev->devfn )
--- a/xen/drivers/passthrough/ats.c
+++ b/xen/drivers/passthrough/ats.c
@@ -18,8 +18,8 @@
#include <xen/pci_regs.h>
#include "ats.h"
-bool __read_mostly ats_enabled;
-boolean_param("ats", ats_enabled);
+int8_t __ro_after_init opt_ats = -1;
+boolean_param("ats", opt_ats);
int enable_ats_device(struct pci_dev *pdev, struct list_head *ats_list)
{
--- a/xen/drivers/passthrough/ats.h
+++ b/xen/drivers/passthrough/ats.h
@@ -22,7 +22,7 @@
#define ATS_QUEUE_DEPTH_MASK 0x1f
#define ATS_ENABLE (1<<15)
-extern bool ats_enabled;
+extern int8_t opt_ats;
int enable_ats_device(struct pci_dev *pdev, struct list_head *ats_list);
void disable_ats_device(struct pci_dev *pdev);
@@ -43,7 +43,7 @@ static inline int pci_ats_enabled(int se
static inline int pci_ats_device(int seg, int bus, int devfn)
{
- if ( !ats_enabled )
+ if ( !opt_ats )
return 0;
return pci_find_ext_capability(PCI_SBDF(seg, bus, devfn),
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -1543,7 +1543,7 @@ int domain_context_mapping_one(
}
context_set_address_root(lctxt, root);
- if ( ats_enabled && ecap_dev_iotlb(iommu->ecap) )
+ if ( opt_ats > 0 && ecap_dev_iotlb(iommu->ecap) )
context_set_translation_type(lctxt, CONTEXT_TT_DEV_IOTLB);
else
context_set_translation_type(lctxt, CONTEXT_TT_MULTI_LEVEL);
--- a/xen/drivers/passthrough/vtd/x86/ats.c
+++ b/xen/drivers/passthrough/vtd/x86/ats.c
@@ -46,7 +46,7 @@ int ats_device(const struct pci_dev *pde
struct acpi_drhd_unit *ats_drhd;
int pos;
- if ( !ats_enabled || !iommu_qinval )
+ if ( opt_ats <= 0 || !iommu_qinval )
return 0;
if ( !ecap_queued_inval(drhd->iommu->ecap) ||
^ permalink raw reply [flat|nested] 30+ messages in thread* Re: [PATCH 2/7] IOMMU: rename and re-type ats_enabled
2024-02-05 13:55 ` [PATCH 2/7] IOMMU: rename and re-type ats_enabled Jan Beulich
@ 2024-02-08 11:49 ` Roger Pau Monné
2024-02-08 15:49 ` Jan Beulich
0 siblings, 1 reply; 30+ messages in thread
From: Roger Pau Monné @ 2024-02-08 11:49 UTC (permalink / raw)
To: Jan Beulich
Cc: xen-devel@lists.xenproject.org, Kevin Tian, Paul Durrant,
Andrew Cooper
On Mon, Feb 05, 2024 at 02:55:43PM +0100, Jan Beulich wrote:
> Make the variable a tristate, with (as done elsewhere) a negative value
> meaning "default". Since all use sites need looking at, also rename it
> to match our usual "opt_*" pattern. While touching it, also move it to
> .data.ro_after_init.
>
> The only place it retains boolean nature is pci_ats_device(), for now.
Why does it retain the boolean nature in pci_ats_device()?
I assume this is to avoid having to touch the line again in a further
patch, as given the current logic pci_ats_device() would also want to
treat -1 as ATS disabled.
I think this is all fine because you add additional opt_ats > 0 checks
before the call to pci_ats_device(), but would be good to know this is
the intention.
> In AMD code re-order conditionals to have the config space accesses
> after (cheaper) flag checks.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> ---
> In domain_context_mapping_one() I'm a little puzzled that translation
> type is selected based on only IOMMU and global properties, i.e. not
> taking the device itself into account.
That seems like a bug to me, we should check that the device supports
ATS (and has it enabled) before setting the translation type to
CONTEXT_TT_DEV_IOTLB unconditionally. We should likely use
ats_device() instead of ats_enabled in domain_context_mapping_one().
There's also IMO a second bug here, which is that we possibly attempt
to flush the device IOTLB before having ATS enabled. We flush the
device TLB in domain_context_mapping_one(), yet ATS is enabled by the
caller afterwards (see domain_context_mapping()).
>
> --- a/xen/drivers/passthrough/amd/iommu_cmd.c
> +++ b/xen/drivers/passthrough/amd/iommu_cmd.c
> @@ -282,7 +282,7 @@ void amd_iommu_flush_iotlb(u8 devfn, con
> struct amd_iommu *iommu;
> unsigned int req_id, queueid, maxpend;
>
> - if ( !ats_enabled )
> + if ( opt_ats <= 0 )
> return;
>
> if ( !pci_ats_enabled(pdev->seg, pdev->bus, pdev->devfn) )
> @@ -340,7 +340,7 @@ static void _amd_iommu_flush_pages(struc
> flush_command_buffer(iommu, 0);
> }
>
> - if ( ats_enabled )
> + if ( opt_ats > 0 )
> {
> amd_iommu_flush_all_iotlbs(d, daddr, order);
>
> --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
> +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
> @@ -185,10 +185,11 @@ static int __must_check amd_iommu_setup_
> dte->ex = ivrs_dev->dte_allow_exclusion;
> dte->sys_mgt = MASK_EXTR(ivrs_dev->device_flags, ACPI_IVHD_SYSTEM_MGMT);
>
> - if ( pci_ats_device(iommu->seg, bus, pdev->devfn) &&
> + if ( opt_ats > 0 &&
> !ivrs_dev->block_ats &&
> - iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) )
> - dte->i = ats_enabled;
> + iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) &&
> + pci_ats_device(iommu->seg, bus, pdev->devfn) )
> + dte->i = true;
>
> spin_unlock_irqrestore(&iommu->lock, flags);
>
> @@ -248,10 +249,11 @@ static int __must_check amd_iommu_setup_
> ASSERT(dte->sys_mgt == MASK_EXTR(ivrs_dev->device_flags,
> ACPI_IVHD_SYSTEM_MGMT));
>
> - if ( pci_ats_device(iommu->seg, bus, pdev->devfn) &&
> + if ( opt_ats > 0 &&
> !ivrs_dev->block_ats &&
> - iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) )
> - ASSERT(dte->i == ats_enabled);
> + iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) &&
> + pci_ats_device(iommu->seg, bus, pdev->devfn) )
> + ASSERT(dte->i);
>
> spin_unlock_irqrestore(&iommu->lock, flags);
>
> @@ -268,9 +270,10 @@ static int __must_check amd_iommu_setup_
>
> ASSERT(pcidevs_locked());
>
> - if ( pci_ats_device(iommu->seg, bus, pdev->devfn) &&
> + if ( opt_ats > 0 &&
> !ivrs_dev->block_ats &&
> iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) &&
> + pci_ats_device(iommu->seg, bus, pdev->devfn) &&
> !pci_ats_enabled(iommu->seg, bus, pdev->devfn) )
Seeing that this same set of conditions is used in 3 different checks,
could we add a wrapper for it?
opt_ats > 0 && !ivrs_dev->block_ats &&
iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) &&
pci_ats_device(iommu->seg, bus, pdev->devfn)
pci_device_ats_capable()? or some such.
Thanks, Roger.
^ permalink raw reply [flat|nested] 30+ messages in thread* Re: [PATCH 2/7] IOMMU: rename and re-type ats_enabled
2024-02-08 11:49 ` Roger Pau Monné
@ 2024-02-08 15:49 ` Jan Beulich
2024-02-12 9:39 ` Roger Pau Monné
0 siblings, 1 reply; 30+ messages in thread
From: Jan Beulich @ 2024-02-08 15:49 UTC (permalink / raw)
To: Roger Pau Monné
Cc: xen-devel@lists.xenproject.org, Kevin Tian, Paul Durrant,
Andrew Cooper
On 08.02.2024 12:49, Roger Pau Monné wrote:
> On Mon, Feb 05, 2024 at 02:55:43PM +0100, Jan Beulich wrote:
>> Make the variable a tristate, with (as done elsewhere) a negative value
>> meaning "default". Since all use sites need looking at, also rename it
>> to match our usual "opt_*" pattern. While touching it, also move it to
>> .data.ro_after_init.
>>
>> The only place it retains boolean nature is pci_ats_device(), for now.
>
> Why does it retain the boolean nature in pci_ats_device()?
>
> I assume this is to avoid having to touch the line again in a further
> patch, as given the current logic pci_ats_device() would also want to
> treat -1 as ATS disabled.
No, then I would need to touch the line. The function wants to treat
-1 as "maybe enabled", so the caller can know whether a device is an
ATS device regardless of whether ATS use is fully off, or only
"soft-off".
> I think this is all fine because you add additional opt_ats > 0 checks
> before the call to pci_ats_device(), but would be good to know this is
> the intention.
Note how amd_iommu_disable_domain_device() does not gain such a
check, for exactly the reason named above: The function would better
turn off ATS whenever enabled, no matter for what reason.
And of course - none of this "soft-off" vs "fully off" matters for
AMD (which is the only user of the function) right now anyway, seeing
they don't have an equivalent of the ATC_REQUIRED flag.
>> In AMD code re-order conditionals to have the config space accesses
>> after (cheaper) flag checks.
>>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>> ---
>> In domain_context_mapping_one() I'm a little puzzled that translation
>> type is selected based on only IOMMU and global properties, i.e. not
>> taking the device itself into account.
>
> That seems like a bug to me, we should check that the device supports
> ATS (and has it enabled) before setting the translation type to
> CONTEXT_TT_DEV_IOTLB unconditionally. We should likely use
> ats_device() instead of ats_enabled in domain_context_mapping_one().
Will try to remember to add yet another patch then.
> There's also IMO a second bug here, which is that we possibly attempt
> to flush the device IOTLB before having ATS enabled. We flush the
> device TLB in domain_context_mapping_one(), yet ATS is enabled by the
> caller afterwards (see domain_context_mapping()).
You may be right with this; I'd need to read up on whether such
flushing is permissible.
>> --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
>> +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
>> @@ -185,10 +185,11 @@ static int __must_check amd_iommu_setup_
>> dte->ex = ivrs_dev->dte_allow_exclusion;
>> dte->sys_mgt = MASK_EXTR(ivrs_dev->device_flags, ACPI_IVHD_SYSTEM_MGMT);
>>
>> - if ( pci_ats_device(iommu->seg, bus, pdev->devfn) &&
>> + if ( opt_ats > 0 &&
>> !ivrs_dev->block_ats &&
>> - iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) )
>> - dte->i = ats_enabled;
>> + iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) &&
>> + pci_ats_device(iommu->seg, bus, pdev->devfn) )
>> + dte->i = true;
>>
>> spin_unlock_irqrestore(&iommu->lock, flags);
>>
>> @@ -248,10 +249,11 @@ static int __must_check amd_iommu_setup_
>> ASSERT(dte->sys_mgt == MASK_EXTR(ivrs_dev->device_flags,
>> ACPI_IVHD_SYSTEM_MGMT));
>>
>> - if ( pci_ats_device(iommu->seg, bus, pdev->devfn) &&
>> + if ( opt_ats > 0 &&
>> !ivrs_dev->block_ats &&
>> - iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) )
>> - ASSERT(dte->i == ats_enabled);
>> + iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) &&
>> + pci_ats_device(iommu->seg, bus, pdev->devfn) )
>> + ASSERT(dte->i);
>>
>> spin_unlock_irqrestore(&iommu->lock, flags);
>>
>> @@ -268,9 +270,10 @@ static int __must_check amd_iommu_setup_
>>
>> ASSERT(pcidevs_locked());
>>
>> - if ( pci_ats_device(iommu->seg, bus, pdev->devfn) &&
>> + if ( opt_ats > 0 &&
>> !ivrs_dev->block_ats &&
>> iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) &&
>> + pci_ats_device(iommu->seg, bus, pdev->devfn) &&
>> !pci_ats_enabled(iommu->seg, bus, pdev->devfn) )
>
> Seeing that this same set of conditions is used in 3 different checks,
> could we add a wrapper for it?
>
> opt_ats > 0 && !ivrs_dev->block_ats &&
> iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) &&
> pci_ats_device(iommu->seg, bus, pdev->devfn)
>
> pci_device_ats_capable()? or some such.
I was pondering that, yes (iirc already once when adding block_ats).
Problem is the name. "capable" isn't quite right when considering
the tristate opt_ats. And pci_device_may_use_ats() reads, well,
clumsy to me. If you have any good idea for a name that's fully
applicable and not odd or overly long, I can certainly introduce
such a helper.
Jan
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH 2/7] IOMMU: rename and re-type ats_enabled
2024-02-08 15:49 ` Jan Beulich
@ 2024-02-12 9:39 ` Roger Pau Monné
2024-02-12 10:45 ` Jan Beulich
0 siblings, 1 reply; 30+ messages in thread
From: Roger Pau Monné @ 2024-02-12 9:39 UTC (permalink / raw)
To: Jan Beulich
Cc: xen-devel@lists.xenproject.org, Kevin Tian, Paul Durrant,
Andrew Cooper
On Thu, Feb 08, 2024 at 04:49:46PM +0100, Jan Beulich wrote:
> On 08.02.2024 12:49, Roger Pau Monné wrote:
> > On Mon, Feb 05, 2024 at 02:55:43PM +0100, Jan Beulich wrote:
> >> Make the variable a tristate, with (as done elsewhere) a negative value
> >> meaning "default". Since all use sites need looking at, also rename it
> >> to match our usual "opt_*" pattern. While touching it, also move it to
> >> .data.ro_after_init.
> >>
> >> The only place it retains boolean nature is pci_ats_device(), for now.
> >
> > Why does it retain the boolean nature in pci_ats_device()?
> >
> > I assume this is to avoid having to touch the line again in a further
> > patch, as given the current logic pci_ats_device() would also want to
> > treat -1 as ATS disabled.
>
> No, then I would need to touch the line. The function wants to treat
> -1 as "maybe enabled", so the caller can know whether a device is an
> ATS device regardless of whether ATS use is fully off, or only
> "soft-off".
I have to admit I'm slightly concerned about this soft-off. Given the
current status of ATS itself in Xen, and the technology itself, I
think a user should always opt-in to ATS usage.
> > I think this is all fine because you add additional opt_ats > 0 checks
> > before the call to pci_ats_device(), but would be good to know this is
> > the intention.
>
> Note how amd_iommu_disable_domain_device() does not gain such a
> check, for exactly the reason named above: The function would better
> turn off ATS whenever enabled, no matter for what reason.
>
> And of course - none of this "soft-off" vs "fully off" matters for
> AMD (which is the only user of the function) right now anyway, seeing
> they don't have an equivalent of the ATC_REQUIRED flag.
>
> >> In AMD code re-order conditionals to have the config space accesses
> >> after (cheaper) flag checks.
> >>
> >> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> >> ---
> >> In domain_context_mapping_one() I'm a little puzzled that translation
> >> type is selected based on only IOMMU and global properties, i.e. not
> >> taking the device itself into account.
> >
> > That seems like a bug to me, we should check that the device supports
> > ATS (and has it enabled) before setting the translation type to
> > CONTEXT_TT_DEV_IOTLB unconditionally. We should likely use
> > ats_device() instead of ats_enabled in domain_context_mapping_one().
>
> Will try to remember to add yet another patch then.
>
> > There's also IMO a second bug here, which is that we possibly attempt
> > to flush the device IOTLB before having ATS enabled. We flush the
> > device TLB in domain_context_mapping_one(), yet ATS is enabled by the
> > caller afterwards (see domain_context_mapping()).
>
> You may be right with this; I'd need to read up on whether such
> flushing is permissible.
>
> >> --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
> >> +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
> >> @@ -185,10 +185,11 @@ static int __must_check amd_iommu_setup_
> >> dte->ex = ivrs_dev->dte_allow_exclusion;
> >> dte->sys_mgt = MASK_EXTR(ivrs_dev->device_flags, ACPI_IVHD_SYSTEM_MGMT);
> >>
> >> - if ( pci_ats_device(iommu->seg, bus, pdev->devfn) &&
> >> + if ( opt_ats > 0 &&
> >> !ivrs_dev->block_ats &&
> >> - iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) )
> >> - dte->i = ats_enabled;
> >> + iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) &&
> >> + pci_ats_device(iommu->seg, bus, pdev->devfn) )
> >> + dte->i = true;
> >>
> >> spin_unlock_irqrestore(&iommu->lock, flags);
> >>
> >> @@ -248,10 +249,11 @@ static int __must_check amd_iommu_setup_
> >> ASSERT(dte->sys_mgt == MASK_EXTR(ivrs_dev->device_flags,
> >> ACPI_IVHD_SYSTEM_MGMT));
> >>
> >> - if ( pci_ats_device(iommu->seg, bus, pdev->devfn) &&
> >> + if ( opt_ats > 0 &&
> >> !ivrs_dev->block_ats &&
> >> - iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) )
> >> - ASSERT(dte->i == ats_enabled);
> >> + iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) &&
> >> + pci_ats_device(iommu->seg, bus, pdev->devfn) )
> >> + ASSERT(dte->i);
> >>
> >> spin_unlock_irqrestore(&iommu->lock, flags);
> >>
> >> @@ -268,9 +270,10 @@ static int __must_check amd_iommu_setup_
> >>
> >> ASSERT(pcidevs_locked());
> >>
> >> - if ( pci_ats_device(iommu->seg, bus, pdev->devfn) &&
> >> + if ( opt_ats > 0 &&
> >> !ivrs_dev->block_ats &&
> >> iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) &&
> >> + pci_ats_device(iommu->seg, bus, pdev->devfn) &&
> >> !pci_ats_enabled(iommu->seg, bus, pdev->devfn) )
> >
> > Seeing that this same set of conditions is used in 3 different checks,
> > could we add a wrapper for it?
> >
> > opt_ats > 0 && !ivrs_dev->block_ats &&
> > iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) &&
> > pci_ats_device(iommu->seg, bus, pdev->devfn)
> >
> > pci_device_ats_capable()? or some such.
>
> I was pondering that, yes (iirc already once when adding block_ats).
> Problem is the name. "capable" isn't quite right when considering
> the tristate opt_ats. And pci_device_may_use_ats() reads, well,
> clumsy to me. If you have any good idea for a name that's fully
> applicable and not odd or overly long, I can certainly introduce
> such a helper.
But if ATS is soft-disabled (-1) or hard disabled (0), it's fine to
consider the devices as not ATS capable for the context here?
Thanks, Roger.
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH 2/7] IOMMU: rename and re-type ats_enabled
2024-02-12 9:39 ` Roger Pau Monné
@ 2024-02-12 10:45 ` Jan Beulich
2024-02-12 15:38 ` Roger Pau Monné
0 siblings, 1 reply; 30+ messages in thread
From: Jan Beulich @ 2024-02-12 10:45 UTC (permalink / raw)
To: Roger Pau Monné
Cc: xen-devel@lists.xenproject.org, Kevin Tian, Paul Durrant,
Andrew Cooper
On 12.02.2024 10:39, Roger Pau Monné wrote:
> On Thu, Feb 08, 2024 at 04:49:46PM +0100, Jan Beulich wrote:
>> On 08.02.2024 12:49, Roger Pau Monné wrote:
>>> On Mon, Feb 05, 2024 at 02:55:43PM +0100, Jan Beulich wrote:
>>>> Make the variable a tristate, with (as done elsewhere) a negative value
>>>> meaning "default". Since all use sites need looking at, also rename it
>>>> to match our usual "opt_*" pattern. While touching it, also move it to
>>>> .data.ro_after_init.
>>>>
>>>> The only place it retains boolean nature is pci_ats_device(), for now.
>>>
>>> Why does it retain the boolean nature in pci_ats_device()?
>>>
>>> I assume this is to avoid having to touch the line again in a further
>>> patch, as given the current logic pci_ats_device() would also want to
>>> treat -1 as ATS disabled.
>>
>> No, then I would need to touch the line. The function wants to treat
>> -1 as "maybe enabled", so the caller can know whether a device is an
>> ATS device regardless of whether ATS use is fully off, or only
>> "soft-off".
>
> I have to admit I'm slightly concerned about this soft-off. Given the
> current status of ATS itself in Xen, and the technology itself, I
> think a user should always opt-in to ATS usage.
The plan is to follow your suggestion in patch 3 and require explicit
enabling for passing through of such devices. For Dom0, however, I
think it is important that we respect the firmware request by default.
The only viable(?!) alternative would be to panic() instead.
>>>> --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
>>>> +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
>>>> @@ -185,10 +185,11 @@ static int __must_check amd_iommu_setup_
>>>> dte->ex = ivrs_dev->dte_allow_exclusion;
>>>> dte->sys_mgt = MASK_EXTR(ivrs_dev->device_flags, ACPI_IVHD_SYSTEM_MGMT);
>>>>
>>>> - if ( pci_ats_device(iommu->seg, bus, pdev->devfn) &&
>>>> + if ( opt_ats > 0 &&
>>>> !ivrs_dev->block_ats &&
>>>> - iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) )
>>>> - dte->i = ats_enabled;
>>>> + iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) &&
>>>> + pci_ats_device(iommu->seg, bus, pdev->devfn) )
>>>> + dte->i = true;
>>>>
>>>> spin_unlock_irqrestore(&iommu->lock, flags);
>>>>
>>>> @@ -248,10 +249,11 @@ static int __must_check amd_iommu_setup_
>>>> ASSERT(dte->sys_mgt == MASK_EXTR(ivrs_dev->device_flags,
>>>> ACPI_IVHD_SYSTEM_MGMT));
>>>>
>>>> - if ( pci_ats_device(iommu->seg, bus, pdev->devfn) &&
>>>> + if ( opt_ats > 0 &&
>>>> !ivrs_dev->block_ats &&
>>>> - iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) )
>>>> - ASSERT(dte->i == ats_enabled);
>>>> + iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) &&
>>>> + pci_ats_device(iommu->seg, bus, pdev->devfn) )
>>>> + ASSERT(dte->i);
>>>>
>>>> spin_unlock_irqrestore(&iommu->lock, flags);
>>>>
>>>> @@ -268,9 +270,10 @@ static int __must_check amd_iommu_setup_
>>>>
>>>> ASSERT(pcidevs_locked());
>>>>
>>>> - if ( pci_ats_device(iommu->seg, bus, pdev->devfn) &&
>>>> + if ( opt_ats > 0 &&
>>>> !ivrs_dev->block_ats &&
>>>> iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) &&
>>>> + pci_ats_device(iommu->seg, bus, pdev->devfn) &&
>>>> !pci_ats_enabled(iommu->seg, bus, pdev->devfn) )
>>>
>>> Seeing that this same set of conditions is used in 3 different checks,
>>> could we add a wrapper for it?
>>>
>>> opt_ats > 0 && !ivrs_dev->block_ats &&
>>> iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) &&
>>> pci_ats_device(iommu->seg, bus, pdev->devfn)
>>>
>>> pci_device_ats_capable()? or some such.
>>
>> I was pondering that, yes (iirc already once when adding block_ats).
>> Problem is the name. "capable" isn't quite right when considering
>> the tristate opt_ats. And pci_device_may_use_ats() reads, well,
>> clumsy to me. If you have any good idea for a name that's fully
>> applicable and not odd or overly long, I can certainly introduce
>> such a helper.
>
> But if ATS is soft-disabled (-1) or hard disabled (0), it's fine to
> consider the devices as not ATS capable for the context here?
I don't like mixing capability and policy aspects into a resulting
"capable".
Jan
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH 2/7] IOMMU: rename and re-type ats_enabled
2024-02-12 10:45 ` Jan Beulich
@ 2024-02-12 15:38 ` Roger Pau Monné
2024-02-12 15:59 ` Jan Beulich
0 siblings, 1 reply; 30+ messages in thread
From: Roger Pau Monné @ 2024-02-12 15:38 UTC (permalink / raw)
To: Jan Beulich
Cc: xen-devel@lists.xenproject.org, Kevin Tian, Paul Durrant,
Andrew Cooper
On Mon, Feb 12, 2024 at 11:45:33AM +0100, Jan Beulich wrote:
> On 12.02.2024 10:39, Roger Pau Monné wrote:
> > On Thu, Feb 08, 2024 at 04:49:46PM +0100, Jan Beulich wrote:
> >> On 08.02.2024 12:49, Roger Pau Monné wrote:
> >>> On Mon, Feb 05, 2024 at 02:55:43PM +0100, Jan Beulich wrote:
> >>>> Make the variable a tristate, with (as done elsewhere) a negative value
> >>>> meaning "default". Since all use sites need looking at, also rename it
> >>>> to match our usual "opt_*" pattern. While touching it, also move it to
> >>>> .data.ro_after_init.
> >>>>
> >>>> The only place it retains boolean nature is pci_ats_device(), for now.
> >>>
> >>> Why does it retain the boolean nature in pci_ats_device()?
> >>>
> >>> I assume this is to avoid having to touch the line again in a further
> >>> patch, as given the current logic pci_ats_device() would also want to
> >>> treat -1 as ATS disabled.
> >>
> >> No, then I would need to touch the line. The function wants to treat
> >> -1 as "maybe enabled", so the caller can know whether a device is an
> >> ATS device regardless of whether ATS use is fully off, or only
> >> "soft-off".
> >
> > I have to admit I'm slightly concerned about this soft-off. Given the
> > current status of ATS itself in Xen, and the technology itself, I
> > think a user should always opt-in to ATS usage.
>
> The plan is to follow your suggestion in patch 3 and require explicit
> enabling for passing through of such devices. For Dom0, however, I
> think it is important that we respect the firmware request by default.
> The only viable(?!) alternative would be to panic() instead.
Or assign to domIO?
> >>>> --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
> >>>> +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
> >>>> @@ -185,10 +185,11 @@ static int __must_check amd_iommu_setup_
> >>>> dte->ex = ivrs_dev->dte_allow_exclusion;
> >>>> dte->sys_mgt = MASK_EXTR(ivrs_dev->device_flags, ACPI_IVHD_SYSTEM_MGMT);
> >>>>
> >>>> - if ( pci_ats_device(iommu->seg, bus, pdev->devfn) &&
> >>>> + if ( opt_ats > 0 &&
> >>>> !ivrs_dev->block_ats &&
> >>>> - iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) )
> >>>> - dte->i = ats_enabled;
> >>>> + iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) &&
> >>>> + pci_ats_device(iommu->seg, bus, pdev->devfn) )
> >>>> + dte->i = true;
> >>>>
> >>>> spin_unlock_irqrestore(&iommu->lock, flags);
> >>>>
> >>>> @@ -248,10 +249,11 @@ static int __must_check amd_iommu_setup_
> >>>> ASSERT(dte->sys_mgt == MASK_EXTR(ivrs_dev->device_flags,
> >>>> ACPI_IVHD_SYSTEM_MGMT));
> >>>>
> >>>> - if ( pci_ats_device(iommu->seg, bus, pdev->devfn) &&
> >>>> + if ( opt_ats > 0 &&
> >>>> !ivrs_dev->block_ats &&
> >>>> - iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) )
> >>>> - ASSERT(dte->i == ats_enabled);
> >>>> + iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) &&
> >>>> + pci_ats_device(iommu->seg, bus, pdev->devfn) )
> >>>> + ASSERT(dte->i);
> >>>>
> >>>> spin_unlock_irqrestore(&iommu->lock, flags);
> >>>>
> >>>> @@ -268,9 +270,10 @@ static int __must_check amd_iommu_setup_
> >>>>
> >>>> ASSERT(pcidevs_locked());
> >>>>
> >>>> - if ( pci_ats_device(iommu->seg, bus, pdev->devfn) &&
> >>>> + if ( opt_ats > 0 &&
> >>>> !ivrs_dev->block_ats &&
> >>>> iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) &&
> >>>> + pci_ats_device(iommu->seg, bus, pdev->devfn) &&
> >>>> !pci_ats_enabled(iommu->seg, bus, pdev->devfn) )
> >>>
> >>> Seeing that this same set of conditions is used in 3 different checks,
> >>> could we add a wrapper for it?
> >>>
> >>> opt_ats > 0 && !ivrs_dev->block_ats &&
> >>> iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) &&
> >>> pci_ats_device(iommu->seg, bus, pdev->devfn)
> >>>
> >>> pci_device_ats_capable()? or some such.
> >>
> >> I was pondering that, yes (iirc already once when adding block_ats).
> >> Problem is the name. "capable" isn't quite right when considering
> >> the tristate opt_ats. And pci_device_may_use_ats() reads, well,
> >> clumsy to me. If you have any good idea for a name that's fully
> >> applicable and not odd or overly long, I can certainly introduce
> >> such a helper.
> >
> > But if ATS is soft-disabled (-1) or hard disabled (0), it's fine to
> > consider the devices as not ATS capable for the context here?
>
> I don't like mixing capability and policy aspects into a resulting
> "capable".
IMO we should prefer avoiding code repetition, even if at the cost
of having a handler that have a maybe not ideal naming, but I can't
force you to do that.
Thanks, Roger.
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH 2/7] IOMMU: rename and re-type ats_enabled
2024-02-12 15:38 ` Roger Pau Monné
@ 2024-02-12 15:59 ` Jan Beulich
0 siblings, 0 replies; 30+ messages in thread
From: Jan Beulich @ 2024-02-12 15:59 UTC (permalink / raw)
To: Roger Pau Monné
Cc: xen-devel@lists.xenproject.org, Kevin Tian, Paul Durrant,
Andrew Cooper
On 12.02.2024 16:38, Roger Pau Monné wrote:
> On Mon, Feb 12, 2024 at 11:45:33AM +0100, Jan Beulich wrote:
>> On 12.02.2024 10:39, Roger Pau Monné wrote:
>>> On Thu, Feb 08, 2024 at 04:49:46PM +0100, Jan Beulich wrote:
>>>> On 08.02.2024 12:49, Roger Pau Monné wrote:
>>>>> On Mon, Feb 05, 2024 at 02:55:43PM +0100, Jan Beulich wrote:
>>>>>> Make the variable a tristate, with (as done elsewhere) a negative value
>>>>>> meaning "default". Since all use sites need looking at, also rename it
>>>>>> to match our usual "opt_*" pattern. While touching it, also move it to
>>>>>> .data.ro_after_init.
>>>>>>
>>>>>> The only place it retains boolean nature is pci_ats_device(), for now.
>>>>>
>>>>> Why does it retain the boolean nature in pci_ats_device()?
>>>>>
>>>>> I assume this is to avoid having to touch the line again in a further
>>>>> patch, as given the current logic pci_ats_device() would also want to
>>>>> treat -1 as ATS disabled.
>>>>
>>>> No, then I would need to touch the line. The function wants to treat
>>>> -1 as "maybe enabled", so the caller can know whether a device is an
>>>> ATS device regardless of whether ATS use is fully off, or only
>>>> "soft-off".
>>>
>>> I have to admit I'm slightly concerned about this soft-off. Given the
>>> current status of ATS itself in Xen, and the technology itself, I
>>> think a user should always opt-in to ATS usage.
>>
>> The plan is to follow your suggestion in patch 3 and require explicit
>> enabling for passing through of such devices. For Dom0, however, I
>> think it is important that we respect the firmware request by default.
>> The only viable(?!) alternative would be to panic() instead.
>
> Or assign to domIO?
Not behind Dom0's back - I can't see how that would work if then Dom0
tried to load a driver for the device.
Jan
^ permalink raw reply [flat|nested] 30+ messages in thread
* [PATCH 3/7] VT-d: respect ACPI SATC's ATC_REQUIRED flag
2024-02-05 13:53 [PATCH 0/7] VT-d: SATC handling and ATS tidying Jan Beulich
2024-02-05 13:55 ` [PATCH 1/7] VT-d: parse ACPI "SoC Integrated Address Translation Cache Reporting Structure"s Jan Beulich
2024-02-05 13:55 ` [PATCH 2/7] IOMMU: rename and re-type ats_enabled Jan Beulich
@ 2024-02-05 13:56 ` Jan Beulich
2024-02-08 12:42 ` Roger Pau Monné
2024-02-05 13:56 ` [PATCH 4/7] VT-d: replace find_ats_dev_drhd() Jan Beulich
` (3 subsequent siblings)
6 siblings, 1 reply; 30+ messages in thread
From: Jan Beulich @ 2024-02-05 13:56 UTC (permalink / raw)
To: xen-devel@lists.xenproject.org
Cc: Kevin Tian, Roger Pau Monné, Paul Durrant
When the flag is set, permit Dom0 to control the device (no worse than
what we had before and in line with other "best effort" behavior we use
when it comes to Dom0), but suppress passing through to DomU-s unless
ATS can actually be enabled for such devices.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
Is ats_device() using acpi_find_matched_atsr_unit() unconditionally
actually correct? Shouldn't that check be skipped for root complex
integrated devices?
--- a/docs/misc/xen-command-line.pandoc
+++ b/docs/misc/xen-command-line.pandoc
@@ -225,7 +225,10 @@ exceptions (watchdog NMIs and unexpected
> Default: `false`
Permits Xen to set up and use PCI Address Translation Services. This is a
-performance optimisation for PCI Passthrough.
+performance optimisation for PCI Passthrough. Note that firmware may indicate
+that certain devices need to have ATS enabled for proper operation. For such
+devices ATS will be enabled by default, unless the option is used in its
+negative form.
**WARNING: Xen cannot currently safely use ATS because of its synchronous wait
loops for Queued Invalidation completions.**
--- a/xen/drivers/passthrough/vtd/dmar.c
+++ b/xen/drivers/passthrough/vtd/dmar.c
@@ -254,6 +254,24 @@ struct acpi_atsr_unit *acpi_find_matched
return all_ports;
}
+const struct acpi_satc_unit *acpi_find_matched_satc_unit(
+ const struct pci_dev *pdev)
+{
+ const struct acpi_satc_unit *satc;
+
+ list_for_each_entry ( satc, &acpi_satc_units, list )
+ {
+ if ( satc->segment != pdev->seg )
+ continue;
+
+ for ( unsigned int i = 0; i < satc->scope.devices_cnt; ++i )
+ if ( satc->scope.devices[i] == pdev->sbdf.bdf )
+ return satc;
+ }
+
+ return NULL;
+}
+
struct acpi_rhsa_unit *drhd_to_rhsa(const struct acpi_drhd_unit *drhd)
{
struct acpi_rhsa_unit *rhsa;
--- a/xen/drivers/passthrough/vtd/dmar.h
+++ b/xen/drivers/passthrough/vtd/dmar.h
@@ -109,6 +109,8 @@ struct acpi_satc_unit {
struct acpi_drhd_unit *acpi_find_matched_drhd_unit(const struct pci_dev *);
struct acpi_atsr_unit *acpi_find_matched_atsr_unit(const struct pci_dev *);
+const struct acpi_satc_unit *acpi_find_matched_satc_unit(
+ const struct pci_dev *pdev);
#define DMAR_TYPE 1
#define RMRR_TYPE 2
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -2364,6 +2364,25 @@ static int cf_check intel_iommu_add_devi
if ( ret )
dprintk(XENLOG_ERR VTDPREFIX, "%pd: context mapping failed\n",
pdev->domain);
+ else if ( !pdev->broken )
+ {
+ const struct acpi_drhd_unit *drhd = acpi_find_matched_drhd_unit(pdev);
+ const struct acpi_satc_unit *satc = acpi_find_matched_satc_unit(pdev);
+
+ /*
+ * Prevent the device from getting assigned to an unprivileged domain
+ * when firmware indicates ATS is required, but ATS could not be enabled
+ * (e.g. because of being suppressed via command line option).
+ */
+ if ( satc && satc->atc_required &&
+ (!drhd || ats_device(pdev, drhd) <= 0 ||
+ !pci_ats_enabled(pdev->seg, pdev->bus, pdev->devfn)) )
+ {
+ printk(XENLOG_WARNING "ATS: %pp is not eligible for pass-through\n",
+ &pdev->sbdf);
+ pdev->broken = true;
+ }
+ }
return ret;
}
@@ -2375,12 +2394,27 @@ static int cf_check intel_iommu_enable_d
pci_vtd_quirk(pdev);
- if ( ret <= 0 )
- return ret;
+ if ( ret <= 0 ||
+ (ret = enable_ats_device(pdev, &drhd->iommu->ats_devices)) < 0 )
+ {
+ const struct acpi_satc_unit *satc = acpi_find_matched_satc_unit(pdev);
+
+ /*
+ * Prevent the device from getting assigned to an unprivileged domain
+ * when firmware indicates ATS is required, but ATS use was disabled
+ * via command line option.
+ */
+ if ( satc && satc->atc_required && !pdev->broken )
+ {
+ printk(XENLOG_WARNING "ATS: %pp is not eligible for pass-through\n",
+ &pdev->sbdf);
+ pdev->broken = true;
+ }
- ret = enable_ats_device(pdev, &drhd->iommu->ats_devices);
+ return ret;
+ }
- return ret >= 0 ? 0 : ret;
+ return 0;
}
static int cf_check intel_iommu_remove_device(u8 devfn, struct pci_dev *pdev)
--- a/xen/drivers/passthrough/vtd/x86/ats.c
+++ b/xen/drivers/passthrough/vtd/x86/ats.c
@@ -44,9 +44,10 @@ struct acpi_drhd_unit *find_ats_dev_drhd
int ats_device(const struct pci_dev *pdev, const struct acpi_drhd_unit *drhd)
{
struct acpi_drhd_unit *ats_drhd;
+ const struct acpi_satc_unit *satc;
int pos;
- if ( opt_ats <= 0 || !iommu_qinval )
+ if ( !opt_ats || !iommu_qinval )
return 0;
if ( !ecap_queued_inval(drhd->iommu->ecap) ||
@@ -56,6 +57,10 @@ int ats_device(const struct pci_dev *pde
if ( !acpi_find_matched_atsr_unit(pdev) )
return 0;
+ satc = acpi_find_matched_satc_unit(pdev);
+ if ( opt_ats < 0 && (!satc || !satc->atc_required) )
+ return 0;
+
ats_drhd = find_ats_dev_drhd(drhd->iommu);
pos = pci_find_ext_capability(pdev->sbdf, PCI_EXT_CAP_ID_ATS);
^ permalink raw reply [flat|nested] 30+ messages in thread* Re: [PATCH 3/7] VT-d: respect ACPI SATC's ATC_REQUIRED flag
2024-02-05 13:56 ` [PATCH 3/7] VT-d: respect ACPI SATC's ATC_REQUIRED flag Jan Beulich
@ 2024-02-08 12:42 ` Roger Pau Monné
2024-02-12 11:06 ` Jan Beulich
0 siblings, 1 reply; 30+ messages in thread
From: Roger Pau Monné @ 2024-02-08 12:42 UTC (permalink / raw)
To: Jan Beulich; +Cc: xen-devel@lists.xenproject.org, Kevin Tian, Paul Durrant
On Mon, Feb 05, 2024 at 02:56:14PM +0100, Jan Beulich wrote:
> When the flag is set, permit Dom0 to control the device (no worse than
> what we had before and in line with other "best effort" behavior we use
> when it comes to Dom0), but suppress passing through to DomU-s unless
> ATS can actually be enabled for such devices.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> ---
> Is ats_device() using acpi_find_matched_atsr_unit() unconditionally
> actually correct? Shouldn't that check be skipped for root complex
> integrated devices?
Yes, I think so, ATSR only lists root ports supporting ATS, because
the root complex is assumed to always be ATS capable.
None of this seems to be working then for PCIe endpoints directly in
the root complex, as ats_device() will always return 0?
> --- a/docs/misc/xen-command-line.pandoc
> +++ b/docs/misc/xen-command-line.pandoc
> @@ -225,7 +225,10 @@ exceptions (watchdog NMIs and unexpected
> > Default: `false`
>
> Permits Xen to set up and use PCI Address Translation Services. This is a
> -performance optimisation for PCI Passthrough.
> +performance optimisation for PCI Passthrough. Note that firmware may indicate
> +that certain devices need to have ATS enabled for proper operation. For such
> +devices ATS will be enabled by default, unless the option is used in its
> +negative form.
I'm kind of worried that we add this support while maintaining the
WARNING below. If I was an admin I would certainly be worried whether
my system could lock-up during normal operations, even with the
devices assigned to dom0 and not a malicious domain.
I know that enabling ATS is forced on us from DMAR, but still.
> **WARNING: Xen cannot currently safely use ATS because of its synchronous wait
> loops for Queued Invalidation completions.**
> --- a/xen/drivers/passthrough/vtd/dmar.c
> +++ b/xen/drivers/passthrough/vtd/dmar.c
> @@ -254,6 +254,24 @@ struct acpi_atsr_unit *acpi_find_matched
> return all_ports;
> }
>
> +const struct acpi_satc_unit *acpi_find_matched_satc_unit(
> + const struct pci_dev *pdev)
> +{
> + const struct acpi_satc_unit *satc;
> +
> + list_for_each_entry ( satc, &acpi_satc_units, list )
> + {
> + if ( satc->segment != pdev->seg )
> + continue;
> +
> + for ( unsigned int i = 0; i < satc->scope.devices_cnt; ++i )
> + if ( satc->scope.devices[i] == pdev->sbdf.bdf )
> + return satc;
> + }
> +
> + return NULL;
> +}
> +
> struct acpi_rhsa_unit *drhd_to_rhsa(const struct acpi_drhd_unit *drhd)
> {
> struct acpi_rhsa_unit *rhsa;
> --- a/xen/drivers/passthrough/vtd/dmar.h
> +++ b/xen/drivers/passthrough/vtd/dmar.h
> @@ -109,6 +109,8 @@ struct acpi_satc_unit {
>
> struct acpi_drhd_unit *acpi_find_matched_drhd_unit(const struct pci_dev *);
> struct acpi_atsr_unit *acpi_find_matched_atsr_unit(const struct pci_dev *);
> +const struct acpi_satc_unit *acpi_find_matched_satc_unit(
> + const struct pci_dev *pdev);
>
> #define DMAR_TYPE 1
> #define RMRR_TYPE 2
> --- a/xen/drivers/passthrough/vtd/iommu.c
> +++ b/xen/drivers/passthrough/vtd/iommu.c
> @@ -2364,6 +2364,25 @@ static int cf_check intel_iommu_add_devi
> if ( ret )
> dprintk(XENLOG_ERR VTDPREFIX, "%pd: context mapping failed\n",
> pdev->domain);
> + else if ( !pdev->broken )
> + {
> + const struct acpi_drhd_unit *drhd = acpi_find_matched_drhd_unit(pdev);
> + const struct acpi_satc_unit *satc = acpi_find_matched_satc_unit(pdev);
> +
> + /*
> + * Prevent the device from getting assigned to an unprivileged domain
> + * when firmware indicates ATS is required, but ATS could not be enabled
> + * (e.g. because of being suppressed via command line option).
> + */
I think a safer policy would be to prevent assigning any device that
has atc_required set unless opt_ats > 1 (ie: the user has explicitly
opted-in to the usage of ATS).
While we can't likely avoid ATS being enabled for devices having the
ATC_REQUIRED flag, we shouldn't allow passthrough to possibly
untrusted guests without notice.
Thanks, Roger.
^ permalink raw reply [flat|nested] 30+ messages in thread* Re: [PATCH 3/7] VT-d: respect ACPI SATC's ATC_REQUIRED flag
2024-02-08 12:42 ` Roger Pau Monné
@ 2024-02-12 11:06 ` Jan Beulich
0 siblings, 0 replies; 30+ messages in thread
From: Jan Beulich @ 2024-02-12 11:06 UTC (permalink / raw)
To: Roger Pau Monné
Cc: xen-devel@lists.xenproject.org, Kevin Tian, Paul Durrant
On 08.02.2024 13:42, Roger Pau Monné wrote:
> On Mon, Feb 05, 2024 at 02:56:14PM +0100, Jan Beulich wrote:
>> When the flag is set, permit Dom0 to control the device (no worse than
>> what we had before and in line with other "best effort" behavior we use
>> when it comes to Dom0), but suppress passing through to DomU-s unless
>> ATS can actually be enabled for such devices.
>>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>> ---
>> Is ats_device() using acpi_find_matched_atsr_unit() unconditionally
>> actually correct? Shouldn't that check be skipped for root complex
>> integrated devices?
>
> Yes, I think so, ATSR only lists root ports supporting ATS, because
> the root complex is assumed to always be ATS capable.
>
> None of this seems to be working then for PCIe endpoints directly in
> the root complex, as ats_device() will always return 0?
That's my understanding. I've now added a bugfix patch near the front of
the series.
>> --- a/docs/misc/xen-command-line.pandoc
>> +++ b/docs/misc/xen-command-line.pandoc
>> @@ -225,7 +225,10 @@ exceptions (watchdog NMIs and unexpected
>> > Default: `false`
>>
>> Permits Xen to set up and use PCI Address Translation Services. This is a
>> -performance optimisation for PCI Passthrough.
>> +performance optimisation for PCI Passthrough. Note that firmware may indicate
>> +that certain devices need to have ATS enabled for proper operation. For such
>> +devices ATS will be enabled by default, unless the option is used in its
>> +negative form.
>
> I'm kind of worried that we add this support while maintaining the
> WARNING below. If I was an admin I would certainly be worried whether
> my system could lock-up during normal operations, even with the
> devices assigned to dom0 and not a malicious domain.
>
> I know that enabling ATS is forced on us from DMAR, but still.
I'm with you; see below.
>> --- a/xen/drivers/passthrough/vtd/iommu.c
>> +++ b/xen/drivers/passthrough/vtd/iommu.c
>> @@ -2364,6 +2364,25 @@ static int cf_check intel_iommu_add_devi
>> if ( ret )
>> dprintk(XENLOG_ERR VTDPREFIX, "%pd: context mapping failed\n",
>> pdev->domain);
>> + else if ( !pdev->broken )
>> + {
>> + const struct acpi_drhd_unit *drhd = acpi_find_matched_drhd_unit(pdev);
>> + const struct acpi_satc_unit *satc = acpi_find_matched_satc_unit(pdev);
>> +
>> + /*
>> + * Prevent the device from getting assigned to an unprivileged domain
>> + * when firmware indicates ATS is required, but ATS could not be enabled
>> + * (e.g. because of being suppressed via command line option).
>> + */
>
> I think a safer policy would be to prevent assigning any device that
> has atc_required set unless opt_ats > 1 (ie: the user has explicitly
> opted-in to the usage of ATS).
>
> While we can't likely avoid ATS being enabled for devices having the
> ATC_REQUIRED flag, we shouldn't allow passthrough to possibly
> untrusted guests without notice.
Switched to that model, including respective wording in the cmdline doc.
Jan
^ permalink raw reply [flat|nested] 30+ messages in thread
* [PATCH 4/7] VT-d: replace find_ats_dev_drhd()
2024-02-05 13:53 [PATCH 0/7] VT-d: SATC handling and ATS tidying Jan Beulich
` (2 preceding siblings ...)
2024-02-05 13:56 ` [PATCH 3/7] VT-d: respect ACPI SATC's ATC_REQUIRED flag Jan Beulich
@ 2024-02-05 13:56 ` Jan Beulich
2024-02-08 17:31 ` Roger Pau Monné
2024-02-05 13:56 ` [PATCH 5/7] VT-d: move ats_device() to the sole file it's used from Jan Beulich
` (2 subsequent siblings)
6 siblings, 1 reply; 30+ messages in thread
From: Jan Beulich @ 2024-02-05 13:56 UTC (permalink / raw)
To: xen-devel@lists.xenproject.org
Cc: Kevin Tian, Roger Pau Monné, Paul Durrant
All callers only care about boolean outcome. For this there's no point
in allocating a duplicate of the respective DRHD structure; a simple
boolean suffices (which eventually may wantg to become a count, such
that the "any ATS devices assigned state" can also clear again). With
that boolean, remove respective parameters from internal helper
functions right away, as those have access to the flag through another
parameter.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/drivers/passthrough/vtd/extern.h
+++ b/xen/drivers/passthrough/vtd/extern.h
@@ -65,8 +65,6 @@ struct acpi_drhd_unit *ioapic_to_drhd(un
struct acpi_drhd_unit *hpet_to_drhd(unsigned int hpet_id);
struct acpi_rhsa_unit *drhd_to_rhsa(const struct acpi_drhd_unit *drhd);
-struct acpi_drhd_unit *find_ats_dev_drhd(struct vtd_iommu *iommu);
-
int ats_device(const struct pci_dev *, const struct acpi_drhd_unit *);
int dev_invalidate_iotlb(struct vtd_iommu *iommu, u16 did,
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -624,8 +624,7 @@ int cf_check vtd_flush_iotlb_reg(
}
static int __must_check iommu_flush_iotlb_global(struct vtd_iommu *iommu,
- bool flush_non_present_entry,
- bool flush_dev_iotlb)
+ bool flush_non_present_entry)
{
int status;
@@ -633,7 +632,7 @@ static int __must_check iommu_flush_iotl
vtd_ops_preamble_quirk(iommu);
status = iommu->flush.iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
- flush_non_present_entry, flush_dev_iotlb);
+ flush_non_present_entry, iommu->flush_dev_iotlb);
/* undo platform specific errata workarounds */
vtd_ops_postamble_quirk(iommu);
@@ -642,8 +641,7 @@ static int __must_check iommu_flush_iotl
}
static int __must_check iommu_flush_iotlb_dsi(struct vtd_iommu *iommu, u16 did,
- bool flush_non_present_entry,
- bool flush_dev_iotlb)
+ bool flush_non_present_entry)
{
int status;
@@ -651,7 +649,7 @@ static int __must_check iommu_flush_iotl
vtd_ops_preamble_quirk(iommu);
status = iommu->flush.iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH,
- flush_non_present_entry, flush_dev_iotlb);
+ flush_non_present_entry, iommu->flush_dev_iotlb);
/* undo platform specific errata workarounds */
vtd_ops_postamble_quirk(iommu);
@@ -661,26 +659,23 @@ static int __must_check iommu_flush_iotl
static int __must_check iommu_flush_iotlb_psi(struct vtd_iommu *iommu, u16 did,
u64 addr, unsigned int order,
- bool flush_non_present_entry,
- bool flush_dev_iotlb)
+ bool flush_non_present_entry)
{
int status;
/* Fallback to domain selective flush if no PSI support */
if ( !cap_pgsel_inv(iommu->cap) )
- return iommu_flush_iotlb_dsi(iommu, did, flush_non_present_entry,
- flush_dev_iotlb);
+ return iommu_flush_iotlb_dsi(iommu, did, flush_non_present_entry);
/* Fallback to domain selective flush if size is too big */
if ( order > cap_max_amask_val(iommu->cap) )
- return iommu_flush_iotlb_dsi(iommu, did, flush_non_present_entry,
- flush_dev_iotlb);
+ return iommu_flush_iotlb_dsi(iommu, did, flush_non_present_entry);
/* apply platform specific errata workarounds */
vtd_ops_preamble_quirk(iommu);
status = iommu->flush.iotlb(iommu, did, addr, order, DMA_TLB_PSI_FLUSH,
- flush_non_present_entry, flush_dev_iotlb);
+ flush_non_present_entry, iommu->flush_dev_iotlb);
/* undo platform specific errata workarounds */
vtd_ops_postamble_quirk(iommu);
@@ -692,7 +687,6 @@ static int __must_check iommu_flush_all(
{
struct acpi_drhd_unit *drhd;
struct vtd_iommu *iommu;
- bool flush_dev_iotlb;
int rc = 0;
flush_local(FLUSH_CACHE);
@@ -703,8 +697,7 @@ static int __must_check iommu_flush_all(
iommu = drhd->iommu;
context_rc = iommu_flush_context_global(iommu, 0);
- flush_dev_iotlb = !!find_ats_dev_drhd(iommu);
- iotlb_rc = iommu_flush_iotlb_global(iommu, 0, flush_dev_iotlb);
+ iotlb_rc = iommu_flush_iotlb_global(iommu, 0);
/*
* The current logic for returns:
@@ -734,7 +727,6 @@ static int __must_check cf_check iommu_f
struct domain_iommu *hd = dom_iommu(d);
struct acpi_drhd_unit *drhd;
struct vtd_iommu *iommu;
- bool flush_dev_iotlb;
int iommu_domid;
int ret = 0;
@@ -762,21 +754,18 @@ static int __must_check cf_check iommu_f
if ( !test_bit(iommu->index, hd->arch.vtd.iommu_bitmap) )
continue;
- flush_dev_iotlb = !!find_ats_dev_drhd(iommu);
iommu_domid = get_iommu_did(d->domain_id, iommu, !d->is_dying);
if ( iommu_domid == -1 )
continue;
if ( !page_count || (page_count & (page_count - 1)) ||
dfn_eq(dfn, INVALID_DFN) || !IS_ALIGNED(dfn_x(dfn), page_count) )
- rc = iommu_flush_iotlb_dsi(iommu, iommu_domid,
- 0, flush_dev_iotlb);
+ rc = iommu_flush_iotlb_dsi(iommu, iommu_domid, 0);
else
rc = iommu_flush_iotlb_psi(iommu, iommu_domid,
dfn_to_daddr(dfn),
get_order_from_pages(page_count),
- !(flush_flags & IOMMU_FLUSHF_modified),
- flush_dev_iotlb);
+ !(flush_flags & IOMMU_FLUSHF_modified));
if ( rc > 0 )
iommu_flush_write_buffer(iommu);
@@ -1488,7 +1477,6 @@ int domain_context_mapping_one(
uint16_t seg = iommu->drhd->segment, prev_did = 0;
struct domain *prev_dom = NULL;
int rc, ret;
- bool flush_dev_iotlb;
if ( QUARANTINE_SKIP(domain, pgd_maddr) )
return 0;
@@ -1637,8 +1625,7 @@ int domain_context_mapping_one(
rc = iommu_flush_context_device(iommu, prev_did, PCI_BDF(bus, devfn),
DMA_CCMD_MASK_NOBIT, !prev_dom);
- flush_dev_iotlb = !!find_ats_dev_drhd(iommu);
- ret = iommu_flush_iotlb_dsi(iommu, prev_did, !prev_dom, flush_dev_iotlb);
+ ret = iommu_flush_iotlb_dsi(iommu, prev_did, !prev_dom);
/*
* The current logic for returns:
@@ -1882,7 +1869,6 @@ int domain_context_unmap_one(
struct context_entry *context, *context_entries;
u64 maddr;
int iommu_domid, rc, ret;
- bool flush_dev_iotlb;
ASSERT(pcidevs_locked());
spin_lock(&iommu->lock);
@@ -1908,8 +1894,7 @@ int domain_context_unmap_one(
PCI_BDF(bus, devfn),
DMA_CCMD_MASK_NOBIT, 0);
- flush_dev_iotlb = !!find_ats_dev_drhd(iommu);
- ret = iommu_flush_iotlb_dsi(iommu, iommu_domid, 0, flush_dev_iotlb);
+ ret = iommu_flush_iotlb_dsi(iommu, iommu_domid, 0);
/*
* The current logic for returns:
--- a/xen/drivers/passthrough/vtd/iommu.h
+++ b/xen/drivers/passthrough/vtd/iommu.h
@@ -484,6 +484,7 @@ struct vtd_iommu {
spinlock_t register_lock; /* protect iommu register handling */
u64 root_maddr; /* root entry machine address */
nodeid_t node;
+ bool flush_dev_iotlb;
struct msi_desc msi;
struct acpi_drhd_unit *drhd;
--- a/xen/drivers/passthrough/vtd/x86/ats.c
+++ b/xen/drivers/passthrough/vtd/x86/ats.c
@@ -28,22 +28,8 @@
#include "../extern.h"
#include "../../ats.h"
-static LIST_HEAD(ats_dev_drhd_units);
-
-struct acpi_drhd_unit *find_ats_dev_drhd(struct vtd_iommu *iommu)
-{
- struct acpi_drhd_unit *drhd;
- list_for_each_entry ( drhd, &ats_dev_drhd_units, list )
- {
- if ( drhd->iommu == iommu )
- return drhd;
- }
- return NULL;
-}
-
int ats_device(const struct pci_dev *pdev, const struct acpi_drhd_unit *drhd)
{
- struct acpi_drhd_unit *ats_drhd;
const struct acpi_satc_unit *satc;
int pos;
@@ -61,17 +47,10 @@ int ats_device(const struct pci_dev *pde
if ( opt_ats < 0 && (!satc || !satc->atc_required) )
return 0;
- ats_drhd = find_ats_dev_drhd(drhd->iommu);
pos = pci_find_ext_capability(pdev->sbdf, PCI_EXT_CAP_ID_ATS);
+ if ( pos )
+ drhd->iommu->flush_dev_iotlb = true;
- if ( pos && (ats_drhd == NULL) )
- {
- ats_drhd = xmalloc(struct acpi_drhd_unit);
- if ( !ats_drhd )
- return -ENOMEM;
- *ats_drhd = *drhd;
- list_add_tail(&ats_drhd->list, &ats_dev_drhd_units);
- }
return pos;
}
^ permalink raw reply [flat|nested] 30+ messages in thread* Re: [PATCH 4/7] VT-d: replace find_ats_dev_drhd()
2024-02-05 13:56 ` [PATCH 4/7] VT-d: replace find_ats_dev_drhd() Jan Beulich
@ 2024-02-08 17:31 ` Roger Pau Monné
2024-02-09 7:06 ` Jan Beulich
0 siblings, 1 reply; 30+ messages in thread
From: Roger Pau Monné @ 2024-02-08 17:31 UTC (permalink / raw)
To: Jan Beulich; +Cc: xen-devel@lists.xenproject.org, Kevin Tian, Paul Durrant
On Mon, Feb 05, 2024 at 02:56:36PM +0100, Jan Beulich wrote:
> All callers only care about boolean outcome. For this there's no point
> in allocating a duplicate of the respective DRHD structure; a simple
> boolean suffices (which eventually may wantg to become a count, such
^ want
> that the "any ATS devices assigned state" can also clear again). With
> that boolean, remove respective parameters from internal helper
> functions right away, as those have access to the flag through another
> parameter.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
AFAICT the intention is that this is a non-functional change?
>
> --- a/xen/drivers/passthrough/vtd/extern.h
> +++ b/xen/drivers/passthrough/vtd/extern.h
> @@ -65,8 +65,6 @@ struct acpi_drhd_unit *ioapic_to_drhd(un
> struct acpi_drhd_unit *hpet_to_drhd(unsigned int hpet_id);
> struct acpi_rhsa_unit *drhd_to_rhsa(const struct acpi_drhd_unit *drhd);
>
> -struct acpi_drhd_unit *find_ats_dev_drhd(struct vtd_iommu *iommu);
> -
> int ats_device(const struct pci_dev *, const struct acpi_drhd_unit *);
>
> int dev_invalidate_iotlb(struct vtd_iommu *iommu, u16 did,
> --- a/xen/drivers/passthrough/vtd/iommu.c
> +++ b/xen/drivers/passthrough/vtd/iommu.c
> @@ -624,8 +624,7 @@ int cf_check vtd_flush_iotlb_reg(
> }
>
> static int __must_check iommu_flush_iotlb_global(struct vtd_iommu *iommu,
> - bool flush_non_present_entry,
> - bool flush_dev_iotlb)
> + bool flush_non_present_entry)
> {
> int status;
>
> @@ -633,7 +632,7 @@ static int __must_check iommu_flush_iotl
> vtd_ops_preamble_quirk(iommu);
>
> status = iommu->flush.iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
> - flush_non_present_entry, flush_dev_iotlb);
> + flush_non_present_entry, iommu->flush_dev_iotlb);
Any reason to not also remove the parameter from here also? As the handler
gets iommu passed as the first parameter anyway.
Thanks, Roger.
^ permalink raw reply [flat|nested] 30+ messages in thread* Re: [PATCH 4/7] VT-d: replace find_ats_dev_drhd()
2024-02-08 17:31 ` Roger Pau Monné
@ 2024-02-09 7:06 ` Jan Beulich
2024-02-09 8:11 ` Roger Pau Monné
0 siblings, 1 reply; 30+ messages in thread
From: Jan Beulich @ 2024-02-09 7:06 UTC (permalink / raw)
To: Roger Pau Monné
Cc: xen-devel@lists.xenproject.org, Kevin Tian, Paul Durrant
On 08.02.2024 18:31, Roger Pau Monné wrote:
> On Mon, Feb 05, 2024 at 02:56:36PM +0100, Jan Beulich wrote:
>> All callers only care about boolean outcome. For this there's no point
>> in allocating a duplicate of the respective DRHD structure; a simple
>> boolean suffices (which eventually may wantg to become a count, such
> ^ want
>> that the "any ATS devices assigned state" can also clear again). With
>> that boolean, remove respective parameters from internal helper
>> functions right away, as those have access to the flag through another
>> parameter.
>>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>
> AFAICT the intention is that this is a non-functional change?
No functional effect intended, yes. Added such a sentence.
>> --- a/xen/drivers/passthrough/vtd/extern.h
>> +++ b/xen/drivers/passthrough/vtd/extern.h
>> @@ -65,8 +65,6 @@ struct acpi_drhd_unit *ioapic_to_drhd(un
>> struct acpi_drhd_unit *hpet_to_drhd(unsigned int hpet_id);
>> struct acpi_rhsa_unit *drhd_to_rhsa(const struct acpi_drhd_unit *drhd);
>>
>> -struct acpi_drhd_unit *find_ats_dev_drhd(struct vtd_iommu *iommu);
>> -
>> int ats_device(const struct pci_dev *, const struct acpi_drhd_unit *);
>>
>> int dev_invalidate_iotlb(struct vtd_iommu *iommu, u16 did,
>> --- a/xen/drivers/passthrough/vtd/iommu.c
>> +++ b/xen/drivers/passthrough/vtd/iommu.c
>> @@ -624,8 +624,7 @@ int cf_check vtd_flush_iotlb_reg(
>> }
>>
>> static int __must_check iommu_flush_iotlb_global(struct vtd_iommu *iommu,
>> - bool flush_non_present_entry,
>> - bool flush_dev_iotlb)
>> + bool flush_non_present_entry)
>> {
>> int status;
>>
>> @@ -633,7 +632,7 @@ static int __must_check iommu_flush_iotl
>> vtd_ops_preamble_quirk(iommu);
>>
>> status = iommu->flush.iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
>> - flush_non_present_entry, flush_dev_iotlb);
>> + flush_non_present_entry, iommu->flush_dev_iotlb);
>
> Any reason to not also remove the parameter from here also? As the handler
> gets iommu passed as the first parameter anyway.
Indeed, yet then the patch would have grown quite a bit. I think I
meant to have a respective post-commit-message remark, but then
forgot to actually put one there. Once (if) this change has gone in,
a follow-on patch could further tidy tings. (The "right away" in the
description was kind of meant to indicate that.)
Jan
^ permalink raw reply [flat|nested] 30+ messages in thread* Re: [PATCH 4/7] VT-d: replace find_ats_dev_drhd()
2024-02-09 7:06 ` Jan Beulich
@ 2024-02-09 8:11 ` Roger Pau Monné
0 siblings, 0 replies; 30+ messages in thread
From: Roger Pau Monné @ 2024-02-09 8:11 UTC (permalink / raw)
To: Jan Beulich; +Cc: xen-devel@lists.xenproject.org, Kevin Tian, Paul Durrant
On Fri, Feb 09, 2024 at 08:06:07AM +0100, Jan Beulich wrote:
> On 08.02.2024 18:31, Roger Pau Monné wrote:
> > On Mon, Feb 05, 2024 at 02:56:36PM +0100, Jan Beulich wrote:
> >> All callers only care about boolean outcome. For this there's no point
> >> in allocating a duplicate of the respective DRHD structure; a simple
> >> boolean suffices (which eventually may wantg to become a count, such
> > ^ want
> >> that the "any ATS devices assigned state" can also clear again). With
> >> that boolean, remove respective parameters from internal helper
> >> functions right away, as those have access to the flag through another
> >> parameter.
> >>
> >> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> >
> > AFAICT the intention is that this is a non-functional change?
>
> No functional effect intended, yes. Added such a sentence.
Acked-by: Roger Pau Monné <roger.pau@citrix.com>
>
> >> --- a/xen/drivers/passthrough/vtd/extern.h
> >> +++ b/xen/drivers/passthrough/vtd/extern.h
> >> @@ -65,8 +65,6 @@ struct acpi_drhd_unit *ioapic_to_drhd(un
> >> struct acpi_drhd_unit *hpet_to_drhd(unsigned int hpet_id);
> >> struct acpi_rhsa_unit *drhd_to_rhsa(const struct acpi_drhd_unit *drhd);
> >>
> >> -struct acpi_drhd_unit *find_ats_dev_drhd(struct vtd_iommu *iommu);
> >> -
> >> int ats_device(const struct pci_dev *, const struct acpi_drhd_unit *);
> >>
> >> int dev_invalidate_iotlb(struct vtd_iommu *iommu, u16 did,
> >> --- a/xen/drivers/passthrough/vtd/iommu.c
> >> +++ b/xen/drivers/passthrough/vtd/iommu.c
> >> @@ -624,8 +624,7 @@ int cf_check vtd_flush_iotlb_reg(
> >> }
> >>
> >> static int __must_check iommu_flush_iotlb_global(struct vtd_iommu *iommu,
> >> - bool flush_non_present_entry,
> >> - bool flush_dev_iotlb)
> >> + bool flush_non_present_entry)
> >> {
> >> int status;
> >>
> >> @@ -633,7 +632,7 @@ static int __must_check iommu_flush_iotl
> >> vtd_ops_preamble_quirk(iommu);
> >>
> >> status = iommu->flush.iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
> >> - flush_non_present_entry, flush_dev_iotlb);
> >> + flush_non_present_entry, iommu->flush_dev_iotlb);
> >
> > Any reason to not also remove the parameter from here also? As the handler
> > gets iommu passed as the first parameter anyway.
>
> Indeed, yet then the patch would have grown quite a bit. I think I
> meant to have a respective post-commit-message remark, but then
> forgot to actually put one there. Once (if) this change has gone in,
> a follow-on patch could further tidy tings. (The "right away" in the
> description was kind of meant to indicate that.)
Would you mind adding a sentence to the commit message that the
vtd_iommu hooks are not modified in order to avoid the patch growing
too much? Otherwise it it's not clear why those are not also
converted.
Thanks, Roger.
^ permalink raw reply [flat|nested] 30+ messages in thread
* [PATCH 5/7] VT-d: move ats_device() to the sole file it's used from
2024-02-05 13:53 [PATCH 0/7] VT-d: SATC handling and ATS tidying Jan Beulich
` (3 preceding siblings ...)
2024-02-05 13:56 ` [PATCH 4/7] VT-d: replace find_ats_dev_drhd() Jan Beulich
@ 2024-02-05 13:56 ` Jan Beulich
2024-02-09 8:15 ` Roger Pau Monné
2024-02-05 13:57 ` [PATCH 6/7] VT-d: move dev_invalidate_iotlb() " Jan Beulich
2024-02-05 13:57 ` [PATCH 7/7] VT-d: move {,un}map_vtd_domain_page() Jan Beulich
6 siblings, 1 reply; 30+ messages in thread
From: Jan Beulich @ 2024-02-05 13:56 UTC (permalink / raw)
To: xen-devel@lists.xenproject.org
Cc: Kevin Tian, Roger Pau Monné, Paul Durrant
..., thus allowing it to become static, and thus reducing scope overlap
between it and pci_ats_device(). There's nothing x86-specific about this
function anyway.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/drivers/passthrough/vtd/extern.h
+++ b/xen/drivers/passthrough/vtd/extern.h
@@ -65,8 +65,6 @@ struct acpi_drhd_unit *ioapic_to_drhd(un
struct acpi_drhd_unit *hpet_to_drhd(unsigned int hpet_id);
struct acpi_rhsa_unit *drhd_to_rhsa(const struct acpi_drhd_unit *drhd);
-int ats_device(const struct pci_dev *, const struct acpi_drhd_unit *);
-
int dev_invalidate_iotlb(struct vtd_iommu *iommu, u16 did,
u64 addr, unsigned int size_order, u64 type);
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -1455,6 +1455,33 @@ static void __hwdom_init cf_check intel_
}
}
+static int ats_device(const struct pci_dev *pdev,
+ const struct acpi_drhd_unit *drhd)
+{
+ const struct acpi_satc_unit *satc;
+ int pos;
+
+ if ( !opt_ats || !iommu_qinval )
+ return 0;
+
+ if ( !ecap_queued_inval(drhd->iommu->ecap) ||
+ !ecap_dev_iotlb(drhd->iommu->ecap) )
+ return 0;
+
+ if ( !acpi_find_matched_atsr_unit(pdev) )
+ return 0;
+
+ satc = acpi_find_matched_satc_unit(pdev);
+ if ( opt_ats < 0 && (!satc || !satc->atc_required) )
+ return 0;
+
+ pos = pci_find_ext_capability(pdev->sbdf, PCI_EXT_CAP_ID_ATS);
+ if ( pos )
+ drhd->iommu->flush_dev_iotlb = true;
+
+ return pos;
+}
+
/*
* This function returns
* - a negative errno value upon error,
--- a/xen/drivers/passthrough/vtd/x86/ats.c
+++ b/xen/drivers/passthrough/vtd/x86/ats.c
@@ -28,32 +28,6 @@
#include "../extern.h"
#include "../../ats.h"
-int ats_device(const struct pci_dev *pdev, const struct acpi_drhd_unit *drhd)
-{
- const struct acpi_satc_unit *satc;
- int pos;
-
- if ( !opt_ats || !iommu_qinval )
- return 0;
-
- if ( !ecap_queued_inval(drhd->iommu->ecap) ||
- !ecap_dev_iotlb(drhd->iommu->ecap) )
- return 0;
-
- if ( !acpi_find_matched_atsr_unit(pdev) )
- return 0;
-
- satc = acpi_find_matched_satc_unit(pdev);
- if ( opt_ats < 0 && (!satc || !satc->atc_required) )
- return 0;
-
- pos = pci_find_ext_capability(pdev->sbdf, PCI_EXT_CAP_ID_ATS);
- if ( pos )
- drhd->iommu->flush_dev_iotlb = true;
-
- return pos;
-}
-
static bool device_in_domain(const struct vtd_iommu *iommu,
const struct pci_dev *pdev, uint16_t did)
{
^ permalink raw reply [flat|nested] 30+ messages in thread* Re: [PATCH 5/7] VT-d: move ats_device() to the sole file it's used from
2024-02-05 13:56 ` [PATCH 5/7] VT-d: move ats_device() to the sole file it's used from Jan Beulich
@ 2024-02-09 8:15 ` Roger Pau Monné
0 siblings, 0 replies; 30+ messages in thread
From: Roger Pau Monné @ 2024-02-09 8:15 UTC (permalink / raw)
To: Jan Beulich; +Cc: xen-devel@lists.xenproject.org, Kevin Tian, Paul Durrant
On Mon, Feb 05, 2024 at 02:56:55PM +0100, Jan Beulich wrote:
> ..., thus allowing it to become static, and thus reducing scope overlap
> between it and pci_ats_device(). There's nothing x86-specific about this
> function anyway.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Roger Pau Monné zroger.pau@citrix.com>
FWIW, the function would better return a boolean, as none of the
callers care about the position of the ATS capability, and use it as
an indication that ATS is supported.
Thanks, Roger.
^ permalink raw reply [flat|nested] 30+ messages in thread
* [PATCH 6/7] VT-d: move dev_invalidate_iotlb() to the sole file it's used from
2024-02-05 13:53 [PATCH 0/7] VT-d: SATC handling and ATS tidying Jan Beulich
` (4 preceding siblings ...)
2024-02-05 13:56 ` [PATCH 5/7] VT-d: move ats_device() to the sole file it's used from Jan Beulich
@ 2024-02-05 13:57 ` Jan Beulich
2024-02-09 8:25 ` Roger Pau Monné
2024-02-05 13:57 ` [PATCH 7/7] VT-d: move {,un}map_vtd_domain_page() Jan Beulich
6 siblings, 1 reply; 30+ messages in thread
From: Jan Beulich @ 2024-02-05 13:57 UTC (permalink / raw)
To: xen-devel@lists.xenproject.org
Cc: Kevin Tian, Roger Pau Monné, Paul Durrant
..., thus allowing it and qinval_device_iotlb_sync() to become static.
There's nothing x86-specific about the function anyway. While moving,
adjust types to better match ./CODING_STYLE (albeit use of fixed-width
types for parameters is retained to limit the effective change).
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/drivers/passthrough/vtd/extern.h
+++ b/xen/drivers/passthrough/vtd/extern.h
@@ -65,12 +65,6 @@ struct acpi_drhd_unit *ioapic_to_drhd(un
struct acpi_drhd_unit *hpet_to_drhd(unsigned int hpet_id);
struct acpi_rhsa_unit *drhd_to_rhsa(const struct acpi_drhd_unit *drhd);
-int dev_invalidate_iotlb(struct vtd_iommu *iommu, u16 did,
- u64 addr, unsigned int size_order, u64 type);
-
-int __must_check qinval_device_iotlb_sync(struct vtd_iommu *iommu,
- struct pci_dev *pdev,
- u16 did, u16 size, u64 addr);
uint64_t alloc_pgtable_maddr(unsigned long npages, nodeid_t node);
void free_pgtable_maddr(u64 maddr);
--- a/xen/drivers/passthrough/vtd/qinval.c
+++ b/xen/drivers/passthrough/vtd/qinval.c
@@ -251,8 +251,9 @@ static int __must_check dev_invalidate_s
return rc;
}
-int qinval_device_iotlb_sync(struct vtd_iommu *iommu, struct pci_dev *pdev,
- u16 did, u16 size, u64 addr)
+static int qinval_device_iotlb_sync(struct vtd_iommu *iommu,
+ struct pci_dev *pdev, uint16_t did,
+ uint16_t size, paddr_t addr)
{
unsigned long flags;
unsigned int index;
@@ -282,6 +283,101 @@ int qinval_device_iotlb_sync(struct vtd_
return dev_invalidate_sync(iommu, pdev, did);
}
+static bool device_in_domain(const struct vtd_iommu *iommu,
+ const struct pci_dev *pdev, uint16_t did)
+{
+ struct root_entry *root_entry;
+ struct context_entry *ctxt_entry = NULL;
+ unsigned int tt;
+ bool found = false;
+
+ if ( unlikely(!iommu->root_maddr) )
+ {
+ ASSERT_UNREACHABLE();
+ return false;
+ }
+
+ root_entry = map_vtd_domain_page(iommu->root_maddr);
+ if ( !root_present(root_entry[pdev->bus]) )
+ goto out;
+
+ ctxt_entry = map_vtd_domain_page(root_entry[pdev->bus].val);
+ if ( context_domain_id(ctxt_entry[pdev->devfn]) != did )
+ goto out;
+
+ tt = context_translation_type(ctxt_entry[pdev->devfn]);
+ if ( tt != CONTEXT_TT_DEV_IOTLB )
+ goto out;
+
+ found = true;
+ out:
+ if ( root_entry )
+ unmap_vtd_domain_page(root_entry);
+
+ if ( ctxt_entry )
+ unmap_vtd_domain_page(ctxt_entry);
+
+ return found;
+}
+
+static int dev_invalidate_iotlb(struct vtd_iommu *iommu, uint16_t did,
+ paddr_t addr, unsigned int size_order,
+ uint64_t type)
+{
+ struct pci_dev *pdev, *temp;
+ int ret = 0;
+
+ if ( !ecap_dev_iotlb(iommu->ecap) )
+ return ret;
+
+ list_for_each_entry_safe( pdev, temp, &iommu->ats_devices, ats.list )
+ {
+ bool sbit;
+ int rc = 0;
+
+ switch ( type )
+ {
+ case DMA_TLB_DSI_FLUSH:
+ if ( !device_in_domain(iommu, pdev, did) )
+ break;
+ /* fall through if DSI condition met */
+ case DMA_TLB_GLOBAL_FLUSH:
+ /* invalidate all translations: sbit=1,bit_63=0,bit[62:12]=1 */
+ sbit = 1;
+ addr = (~0UL << PAGE_SHIFT_4K) & 0x7FFFFFFFFFFFFFFF;
+ rc = qinval_device_iotlb_sync(iommu, pdev, did, sbit, addr);
+ break;
+ case DMA_TLB_PSI_FLUSH:
+ if ( !device_in_domain(iommu, pdev, did) )
+ break;
+
+ /* if size <= 4K, set sbit = 0, else set sbit = 1 */
+ sbit = size_order ? 1 : 0;
+
+ /* clear lower bits */
+ addr &= ~0UL << PAGE_SHIFT_4K;
+
+ /* if sbit == 1, zero out size_order bit and set lower bits to 1 */
+ if ( sbit )
+ {
+ addr &= ~((u64)PAGE_SIZE_4K << (size_order - 1));
+ addr |= (((u64)1 << (size_order - 1)) - 1) << PAGE_SHIFT_4K;
+ }
+
+ rc = qinval_device_iotlb_sync(iommu, pdev, did, sbit, addr);
+ break;
+ default:
+ dprintk(XENLOG_WARNING VTDPREFIX, "invalid vt-d flush type\n");
+ return -EOPNOTSUPP;
+ }
+
+ if ( !ret )
+ ret = rc;
+ }
+
+ return ret;
+}
+
static int __must_check queue_invalidate_iec_sync(struct vtd_iommu *iommu,
u8 granu, u8 im, u16 iidx)
{
--- a/xen/drivers/passthrough/vtd/x86/Makefile
+++ b/xen/drivers/passthrough/vtd/x86/Makefile
@@ -1,2 +1 @@
-obj-y += ats.o
obj-y += vtd.o
--- a/xen/drivers/passthrough/vtd/x86/ats.c
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Copyright (c) 2006, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; If not, see <http://www.gnu.org/licenses/>.
- *
- * Author: Allen Kay <allen.m.kay@intel.com>
- */
-
-#include <xen/sched.h>
-#include <xen/iommu.h>
-#include <xen/time.h>
-#include <xen/pci.h>
-#include <xen/pci_regs.h>
-#include <asm/msi.h>
-#include "../iommu.h"
-#include "../dmar.h"
-#include "../vtd.h"
-#include "../extern.h"
-#include "../../ats.h"
-
-static bool device_in_domain(const struct vtd_iommu *iommu,
- const struct pci_dev *pdev, uint16_t did)
-{
- struct root_entry *root_entry;
- struct context_entry *ctxt_entry = NULL;
- unsigned int tt;
- bool found = false;
-
- if ( unlikely(!iommu->root_maddr) )
- {
- ASSERT_UNREACHABLE();
- return false;
- }
-
- root_entry = map_vtd_domain_page(iommu->root_maddr);
- if ( !root_present(root_entry[pdev->bus]) )
- goto out;
-
- ctxt_entry = map_vtd_domain_page(root_entry[pdev->bus].val);
- if ( context_domain_id(ctxt_entry[pdev->devfn]) != did )
- goto out;
-
- tt = context_translation_type(ctxt_entry[pdev->devfn]);
- if ( tt != CONTEXT_TT_DEV_IOTLB )
- goto out;
-
- found = true;
-out:
- if ( root_entry )
- unmap_vtd_domain_page(root_entry);
-
- if ( ctxt_entry )
- unmap_vtd_domain_page(ctxt_entry);
-
- return found;
-}
-
-int dev_invalidate_iotlb(struct vtd_iommu *iommu, u16 did,
- u64 addr, unsigned int size_order, u64 type)
-{
- struct pci_dev *pdev, *temp;
- int ret = 0;
-
- if ( !ecap_dev_iotlb(iommu->ecap) )
- return ret;
-
- list_for_each_entry_safe( pdev, temp, &iommu->ats_devices, ats.list )
- {
- bool sbit;
- int rc = 0;
-
- switch ( type )
- {
- case DMA_TLB_DSI_FLUSH:
- if ( !device_in_domain(iommu, pdev, did) )
- break;
- /* fall through if DSI condition met */
- case DMA_TLB_GLOBAL_FLUSH:
- /* invalidate all translations: sbit=1,bit_63=0,bit[62:12]=1 */
- sbit = 1;
- addr = (~0UL << PAGE_SHIFT_4K) & 0x7FFFFFFFFFFFFFFF;
- rc = qinval_device_iotlb_sync(iommu, pdev, did, sbit, addr);
- break;
- case DMA_TLB_PSI_FLUSH:
- if ( !device_in_domain(iommu, pdev, did) )
- break;
-
- /* if size <= 4K, set sbit = 0, else set sbit = 1 */
- sbit = size_order ? 1 : 0;
-
- /* clear lower bits */
- addr &= ~0UL << PAGE_SHIFT_4K;
-
- /* if sbit == 1, zero out size_order bit and set lower bits to 1 */
- if ( sbit )
- {
- addr &= ~((u64)PAGE_SIZE_4K << (size_order - 1));
- addr |= (((u64)1 << (size_order - 1)) - 1) << PAGE_SHIFT_4K;
- }
-
- rc = qinval_device_iotlb_sync(iommu, pdev, did, sbit, addr);
- break;
- default:
- dprintk(XENLOG_WARNING VTDPREFIX, "invalid vt-d flush type\n");
- return -EOPNOTSUPP;
- }
-
- if ( !ret )
- ret = rc;
- }
-
- return ret;
-}
^ permalink raw reply [flat|nested] 30+ messages in thread* Re: [PATCH 6/7] VT-d: move dev_invalidate_iotlb() to the sole file it's used from
2024-02-05 13:57 ` [PATCH 6/7] VT-d: move dev_invalidate_iotlb() " Jan Beulich
@ 2024-02-09 8:25 ` Roger Pau Monné
2024-02-09 14:44 ` Jan Beulich
0 siblings, 1 reply; 30+ messages in thread
From: Roger Pau Monné @ 2024-02-09 8:25 UTC (permalink / raw)
To: Jan Beulich; +Cc: xen-devel@lists.xenproject.org, Kevin Tian, Paul Durrant
On Mon, Feb 05, 2024 at 02:57:12PM +0100, Jan Beulich wrote:
> ..., thus allowing it and qinval_device_iotlb_sync() to become static.
> There's nothing x86-specific about the function anyway. While moving,
> adjust types to better match ./CODING_STYLE (albeit use of fixed-width
> types for parameters is retained to limit the effective change).
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Roger Pau Monné <roger.pau@citrix.com>
>
> --- a/xen/drivers/passthrough/vtd/extern.h
> +++ b/xen/drivers/passthrough/vtd/extern.h
> @@ -65,12 +65,6 @@ struct acpi_drhd_unit *ioapic_to_drhd(un
> struct acpi_drhd_unit *hpet_to_drhd(unsigned int hpet_id);
> struct acpi_rhsa_unit *drhd_to_rhsa(const struct acpi_drhd_unit *drhd);
>
> -int dev_invalidate_iotlb(struct vtd_iommu *iommu, u16 did,
> - u64 addr, unsigned int size_order, u64 type);
> -
> -int __must_check qinval_device_iotlb_sync(struct vtd_iommu *iommu,
> - struct pci_dev *pdev,
> - u16 did, u16 size, u64 addr);
>
> uint64_t alloc_pgtable_maddr(unsigned long npages, nodeid_t node);
> void free_pgtable_maddr(u64 maddr);
> --- a/xen/drivers/passthrough/vtd/qinval.c
> +++ b/xen/drivers/passthrough/vtd/qinval.c
> @@ -251,8 +251,9 @@ static int __must_check dev_invalidate_s
> return rc;
> }
>
> -int qinval_device_iotlb_sync(struct vtd_iommu *iommu, struct pci_dev *pdev,
> - u16 did, u16 size, u64 addr)
> +static int qinval_device_iotlb_sync(struct vtd_iommu *iommu,
> + struct pci_dev *pdev, uint16_t did,
> + uint16_t size, paddr_t addr)
> {
> unsigned long flags;
> unsigned int index;
> @@ -282,6 +283,101 @@ int qinval_device_iotlb_sync(struct vtd_
> return dev_invalidate_sync(iommu, pdev, did);
> }
>
> +static bool device_in_domain(const struct vtd_iommu *iommu,
> + const struct pci_dev *pdev, uint16_t did)
> +{
> + struct root_entry *root_entry;
> + struct context_entry *ctxt_entry = NULL;
> + unsigned int tt;
> + bool found = false;
> +
> + if ( unlikely(!iommu->root_maddr) )
> + {
> + ASSERT_UNREACHABLE();
> + return false;
> + }
> +
> + root_entry = map_vtd_domain_page(iommu->root_maddr);
> + if ( !root_present(root_entry[pdev->bus]) )
> + goto out;
> +
> + ctxt_entry = map_vtd_domain_page(root_entry[pdev->bus].val);
> + if ( context_domain_id(ctxt_entry[pdev->devfn]) != did )
> + goto out;
> +
> + tt = context_translation_type(ctxt_entry[pdev->devfn]);
> + if ( tt != CONTEXT_TT_DEV_IOTLB )
> + goto out;
> +
> + found = true;
> + out:
> + if ( root_entry )
> + unmap_vtd_domain_page(root_entry);
> +
> + if ( ctxt_entry )
> + unmap_vtd_domain_page(ctxt_entry);
> +
> + return found;
> +}
> +
> +static int dev_invalidate_iotlb(struct vtd_iommu *iommu, uint16_t did,
> + paddr_t addr, unsigned int size_order,
> + uint64_t type)
> +{
> + struct pci_dev *pdev, *temp;
> + int ret = 0;
> +
> + if ( !ecap_dev_iotlb(iommu->ecap) )
> + return ret;
> +
> + list_for_each_entry_safe( pdev, temp, &iommu->ats_devices, ats.list )
> + {
> + bool sbit;
> + int rc = 0;
> +
> + switch ( type )
> + {
> + case DMA_TLB_DSI_FLUSH:
> + if ( !device_in_domain(iommu, pdev, did) )
> + break;
> + /* fall through if DSI condition met */
> + case DMA_TLB_GLOBAL_FLUSH:
> + /* invalidate all translations: sbit=1,bit_63=0,bit[62:12]=1 */
> + sbit = 1;
> + addr = (~0UL << PAGE_SHIFT_4K) & 0x7FFFFFFFFFFFFFFF;
Given the MISRA stuff, won't it be better to append 'UL' here while
moving?
Thanks, Roger.
^ permalink raw reply [flat|nested] 30+ messages in thread* Re: [PATCH 6/7] VT-d: move dev_invalidate_iotlb() to the sole file it's used from
2024-02-09 8:25 ` Roger Pau Monné
@ 2024-02-09 14:44 ` Jan Beulich
0 siblings, 0 replies; 30+ messages in thread
From: Jan Beulich @ 2024-02-09 14:44 UTC (permalink / raw)
To: Roger Pau Monné
Cc: xen-devel@lists.xenproject.org, Kevin Tian, Paul Durrant
On 09.02.2024 09:25, Roger Pau Monné wrote:
> On Mon, Feb 05, 2024 at 02:57:12PM +0100, Jan Beulich wrote:
>> ..., thus allowing it and qinval_device_iotlb_sync() to become static.
>> There's nothing x86-specific about the function anyway. While moving,
>> adjust types to better match ./CODING_STYLE (albeit use of fixed-width
>> types for parameters is retained to limit the effective change).
>>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>
> Acked-by: Roger Pau Monné <roger.pau@citrix.com>
Thanks.
>> +static int dev_invalidate_iotlb(struct vtd_iommu *iommu, uint16_t did,
>> + paddr_t addr, unsigned int size_order,
>> + uint64_t type)
>> +{
>> + struct pci_dev *pdev, *temp;
>> + int ret = 0;
>> +
>> + if ( !ecap_dev_iotlb(iommu->ecap) )
>> + return ret;
>> +
>> + list_for_each_entry_safe( pdev, temp, &iommu->ats_devices, ats.list )
>> + {
>> + bool sbit;
>> + int rc = 0;
>> +
>> + switch ( type )
>> + {
>> + case DMA_TLB_DSI_FLUSH:
>> + if ( !device_in_domain(iommu, pdev, did) )
>> + break;
>> + /* fall through if DSI condition met */
>> + case DMA_TLB_GLOBAL_FLUSH:
>> + /* invalidate all translations: sbit=1,bit_63=0,bit[62:12]=1 */
>> + sbit = 1;
>> + addr = (~0UL << PAGE_SHIFT_4K) & 0x7FFFFFFFFFFFFFFF;
>
> Given the MISRA stuff, won't it be better to append 'UL' here while
> moving?
Sure, done.
Jan
^ permalink raw reply [flat|nested] 30+ messages in thread
* [PATCH 7/7] VT-d: move {,un}map_vtd_domain_page()
2024-02-05 13:53 [PATCH 0/7] VT-d: SATC handling and ATS tidying Jan Beulich
` (5 preceding siblings ...)
2024-02-05 13:57 ` [PATCH 6/7] VT-d: move dev_invalidate_iotlb() " Jan Beulich
@ 2024-02-05 13:57 ` Jan Beulich
2024-02-09 8:39 ` Roger Pau Monné
6 siblings, 1 reply; 30+ messages in thread
From: Jan Beulich @ 2024-02-05 13:57 UTC (permalink / raw)
To: xen-devel@lists.xenproject.org
Cc: Kevin Tian, Roger Pau Monné, Paul Durrant
..., thus allowing them to become static. There's nothing x86-specific
about these functions anyway.
Since only the "iommu_inclusive_mapping" parameter declaration would be
left in the file, move that as well. There's nothing VT-d specific about
it (anymore?): "dom0-iommu=map-inclusive" is similarly generic, and
documentation also doesn't say anything.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/drivers/passthrough/vtd/Makefile
+++ b/xen/drivers/passthrough/vtd/Makefile
@@ -1,5 +1,3 @@
-obj-$(CONFIG_X86) += x86/
-
obj-y += iommu.o
obj-y += dmar.o
obj-y += utils.o
--- a/xen/drivers/passthrough/vtd/extern.h
+++ b/xen/drivers/passthrough/vtd/extern.h
@@ -21,6 +21,7 @@
#define _VTD_EXTERN_H_
#include "dmar.h"
+#include <xen/domain_page.h>
#include <xen/keyhandler.h>
#define VTDPREFIX "[VT-D]"
@@ -68,8 +69,6 @@ struct acpi_rhsa_unit *drhd_to_rhsa(cons
uint64_t alloc_pgtable_maddr(unsigned long npages, nodeid_t node);
void free_pgtable_maddr(u64 maddr);
-void *map_vtd_domain_page(u64 maddr);
-void unmap_vtd_domain_page(const void *va);
int domain_context_mapping_one(struct domain *domain, struct vtd_iommu *iommu,
uint8_t bus, uint8_t devfn,
const struct pci_dev *pdev, domid_t domid,
@@ -79,6 +78,16 @@ int domain_context_unmap_one(struct doma
int cf_check intel_iommu_get_reserved_device_memory(
iommu_grdm_t *func, void *ctxt);
+static inline void *map_vtd_domain_page(paddr_t maddr)
+{
+ return map_domain_page(_mfn(paddr_to_pfn(maddr)));
+}
+
+static inline void unmap_vtd_domain_page(const void *va)
+{
+ unmap_domain_page(va);
+}
+
unsigned int cf_check io_apic_read_remap_rte(
unsigned int apic, unsigned int reg);
void cf_check io_apic_write_remap_rte(
--- a/xen/drivers/passthrough/vtd/x86/Makefile
+++ /dev/null
@@ -1 +0,0 @@
-obj-y += vtd.o
--- a/xen/drivers/passthrough/vtd/x86/vtd.c
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright (c) 2008, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; If not, see <http://www.gnu.org/licenses/>.
- *
- * Copyright (C) Allen Kay <allen.m.kay@intel.com>
- * Copyright (C) Weidong Han <weidong.han@intel.com>
- */
-
-#include <xen/param.h>
-#include <xen/sched.h>
-#include <xen/softirq.h>
-#include <xen/domain_page.h>
-#include <asm/paging.h>
-#include <xen/iommu.h>
-#include <xen/irq.h>
-#include <xen/numa.h>
-#include <asm/fixmap.h>
-#include "../iommu.h"
-#include "../dmar.h"
-#include "../vtd.h"
-#include "../extern.h"
-
-/*
- * iommu_inclusive_mapping: when set, all memory below 4GB is included in dom0
- * 1:1 iommu mappings except xen and unusable regions.
- */
-boolean_param("iommu_inclusive_mapping", iommu_hwdom_inclusive);
-
-void *map_vtd_domain_page(u64 maddr)
-{
- return map_domain_page(_mfn(paddr_to_pfn(maddr)));
-}
-
-void unmap_vtd_domain_page(const void *va)
-{
- unmap_domain_page(va);
-}
--- a/xen/drivers/passthrough/x86/iommu.c
+++ b/xen/drivers/passthrough/x86/iommu.c
@@ -19,6 +19,7 @@
#include <xen/paging.h>
#include <xen/guest_access.h>
#include <xen/event.h>
+#include <xen/param.h>
#include <xen/softirq.h>
#include <xen/vm_event.h>
#include <xsm/xsm.h>
@@ -36,6 +37,12 @@ bool __initdata iommu_superpages = true;
enum iommu_intremap __read_mostly iommu_intremap = iommu_intremap_full;
+/*
+ * iommu_inclusive_mapping: When set, all memory below 4GB is included in dom0
+ * 1:1 iommu mappings except xen and unusable regions.
+ */
+boolean_param("iommu_inclusive_mapping", iommu_hwdom_inclusive);
+
#ifdef CONFIG_PV
/* Possible unfiltered LAPIC/MSI messages from untrusted sources? */
bool __read_mostly untrusted_msi;
^ permalink raw reply [flat|nested] 30+ messages in thread* Re: [PATCH 7/7] VT-d: move {,un}map_vtd_domain_page()
2024-02-05 13:57 ` [PATCH 7/7] VT-d: move {,un}map_vtd_domain_page() Jan Beulich
@ 2024-02-09 8:39 ` Roger Pau Monné
2024-02-12 9:46 ` Jan Beulich
0 siblings, 1 reply; 30+ messages in thread
From: Roger Pau Monné @ 2024-02-09 8:39 UTC (permalink / raw)
To: Jan Beulich; +Cc: xen-devel@lists.xenproject.org, Kevin Tian, Paul Durrant
On Mon, Feb 05, 2024 at 02:57:30PM +0100, Jan Beulich wrote:
> ..., thus allowing them to become static. There's nothing x86-specific
> about these functions anyway.
>
> Since only the "iommu_inclusive_mapping" parameter declaration would be
> left in the file, move that as well. There's nothing VT-d specific about
> it (anymore?): "dom0-iommu=map-inclusive" is similarly generic, and
> documentation also doesn't say anything.
Hm, I guess documentation should at least say that
iommu_inclusive_mapping is x86 specific, because it's not parsed on
Arm and hence might give the wrong impression that it's actually
acknowledged there.
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Roger Pau Monné <roger.pau@citrix.com>
Albeit I think it would be better to put the parsing in generic
iommu.c, so that the option gets parsed on Arm and
arch_iommu_hwdom_init() can print a warning message about it not
supported on Arm.
Thanks, Roger.
^ permalink raw reply [flat|nested] 30+ messages in thread* Re: [PATCH 7/7] VT-d: move {,un}map_vtd_domain_page()
2024-02-09 8:39 ` Roger Pau Monné
@ 2024-02-12 9:46 ` Jan Beulich
2024-02-12 10:11 ` Roger Pau Monné
0 siblings, 1 reply; 30+ messages in thread
From: Jan Beulich @ 2024-02-12 9:46 UTC (permalink / raw)
To: Roger Pau Monné, Julien Grall, Stefano Stabellini,
Bertrand Marquis, Michal Orzel
Cc: xen-devel@lists.xenproject.org, Kevin Tian, Paul Durrant
On 09.02.2024 09:39, Roger Pau Monné wrote:
> On Mon, Feb 05, 2024 at 02:57:30PM +0100, Jan Beulich wrote:
>> ..., thus allowing them to become static. There's nothing x86-specific
>> about these functions anyway.
>>
>> Since only the "iommu_inclusive_mapping" parameter declaration would be
>> left in the file, move that as well. There's nothing VT-d specific about
>> it (anymore?): "dom0-iommu=map-inclusive" is similarly generic, and
>> documentation also doesn't say anything.
>
> Hm, I guess documentation should at least say that
> iommu_inclusive_mapping is x86 specific, because it's not parsed on
> Arm and hence might give the wrong impression that it's actually
> acknowledged there.
In v2 I'm adding "(x86)" there.
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>
> Acked-by: Roger Pau Monné <roger.pau@citrix.com>
Thanks.
> Albeit I think it would be better to put the parsing in generic
> iommu.c, so that the option gets parsed on Arm and
> arch_iommu_hwdom_init() can print a warning message about it not
> supported on Arm.
Hmm, I would have considered doing things the other way around - make
that part of parsing in parse_dom0_iommu_param() x86-only. I would
feel odd to introduce an option to Arm, just to be able to report
that it's unsupported. The more when generic option parsing code will
already log unrecognized options (sadly such log messages aren't seen
in the serial log, for being issued too early). But let's ask Arm
folks what they'd prefer, by adding all of them to To:.
Jan
^ permalink raw reply [flat|nested] 30+ messages in thread* Re: [PATCH 7/7] VT-d: move {,un}map_vtd_domain_page()
2024-02-12 9:46 ` Jan Beulich
@ 2024-02-12 10:11 ` Roger Pau Monné
0 siblings, 0 replies; 30+ messages in thread
From: Roger Pau Monné @ 2024-02-12 10:11 UTC (permalink / raw)
To: Jan Beulich
Cc: Julien Grall, Stefano Stabellini, Bertrand Marquis, Michal Orzel,
xen-devel@lists.xenproject.org, Kevin Tian, Paul Durrant
On Mon, Feb 12, 2024 at 10:46:55AM +0100, Jan Beulich wrote:
> On 09.02.2024 09:39, Roger Pau Monné wrote:
> > On Mon, Feb 05, 2024 at 02:57:30PM +0100, Jan Beulich wrote:
> >> ..., thus allowing them to become static. There's nothing x86-specific
> >> about these functions anyway.
> >>
> >> Since only the "iommu_inclusive_mapping" parameter declaration would be
> >> left in the file, move that as well. There's nothing VT-d specific about
> >> it (anymore?): "dom0-iommu=map-inclusive" is similarly generic, and
> >> documentation also doesn't say anything.
> >
> > Hm, I guess documentation should at least say that
> > iommu_inclusive_mapping is x86 specific, because it's not parsed on
> > Arm and hence might give the wrong impression that it's actually
> > acknowledged there.
>
> In v2 I'm adding "(x86)" there.
Maybe part of this should be done as a separate patch so it can be
backported to stable branches?
> >> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> >
> > Acked-by: Roger Pau Monné <roger.pau@citrix.com>
>
> Thanks.
>
> > Albeit I think it would be better to put the parsing in generic
> > iommu.c, so that the option gets parsed on Arm and
> > arch_iommu_hwdom_init() can print a warning message about it not
> > supported on Arm.
>
> Hmm, I would have considered doing things the other way around - make
> that part of parsing in parse_dom0_iommu_param() x86-only. I would
> feel odd to introduce an option to Arm, just to be able to report
> that it's unsupported. The more when generic option parsing code will
> already log unrecognized options (sadly such log messages aren't seen
> in the serial log, for being issued too early). But let's ask Arm
> folks what they'd prefer, by adding all of them to To:.
FWIW, that (moving part of the parsing of parse_dom0_iommu_param() to
x86-only code) would also be fine. As long as both
dom0-iommu=map-inclusive and iommu_inclusive_mapping are handled
equally and the documentation is updated to reflect that.
Thanks, Roger.
^ permalink raw reply [flat|nested] 30+ messages in thread