* [PATCH] x86: use VMLOAD for PV context switch
@ 2018-07-10 10:14 Jan Beulich
2018-08-16 22:04 ` Brian Woods
0 siblings, 1 reply; 6+ messages in thread
From: Jan Beulich @ 2018-07-10 10:14 UTC (permalink / raw)
To: xen-devel; +Cc: Andrew Cooper, Brian Woods, Suravee Suthikulpanit
Having noticed that VMLOAD alone is about as fast as a single of the
involved WRMSRs, I thought it might be a reasonable idea to also use it
for PV. Measurements, however, have shown that an actual improvement can
be achieved only with an early prefetch of the VMCB (thanks to Andrew
for suggesting to try this), which I have to admit I can't really
explain. This way on my Fam15 box context switch takes over 100 clocks
less on average (the measured values are heavily varying in all cases,
though).
This is intentionally not using a new hvm_funcs hook: For one, this is
all about PV, and something similar can hardly be done for VMX.
Furthermore the indirect to direct call patching that is meant to be
applied to most hvm_funcs hooks would be ugly to make work with
functions having more than 6 parameters.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
Besides the mentioned oddity with measured performance, I've also
noticed a significant difference (of at least 150 clocks) between
measuring immediately around the calls to svm_load_segs() and measuring
immediately inside the function.
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -52,6 +52,7 @@
#include <asm/hvm/hvm.h>
#include <asm/hvm/nestedhvm.h>
#include <asm/hvm/support.h>
+#include <asm/hvm/svm/svm.h>
#include <asm/hvm/viridian.h>
#include <asm/debugreg.h>
#include <asm/msr.h>
@@ -1274,11 +1275,35 @@ static void load_segments(struct vcpu *n
struct cpu_user_regs *uregs = &n->arch.user_regs;
int all_segs_okay = 1;
unsigned int dirty_segment_mask, cpu = smp_processor_id();
+ bool fs_gs_done = false;
/* Load and clear the dirty segment mask. */
dirty_segment_mask = per_cpu(dirty_segment_mask, cpu);
per_cpu(dirty_segment_mask, cpu) = 0;
+#ifdef CONFIG_HVM
+ if ( !is_pv_32bit_vcpu(n) && !cpu_has_fsgsbase && cpu_has_svm &&
+ !((uregs->fs | uregs->gs) & ~3) &&
+ /*
+ * The remaining part is just for optimization: If only shadow GS
+ * needs loading, there's nothing to be gained here.
+ */
+ (n->arch.pv_vcpu.fs_base | n->arch.pv_vcpu.gs_base_user) )
+ {
+ fs_gs_done = n->arch.flags & TF_kernel_mode
+ ? svm_load_segs(n->arch.pv_vcpu.ldt_ents, LDT_VIRT_START(n),
+ uregs->fs, n->arch.pv_vcpu.fs_base,
+ uregs->gs, n->arch.pv_vcpu.gs_base_kernel,
+ n->arch.pv_vcpu.gs_base_user)
+ : svm_load_segs(n->arch.pv_vcpu.ldt_ents, LDT_VIRT_START(n),
+ uregs->fs, n->arch.pv_vcpu.fs_base,
+ uregs->gs, n->arch.pv_vcpu.gs_base_user,
+ n->arch.pv_vcpu.gs_base_kernel);
+ }
+#endif
+ if ( !fs_gs_done )
+ load_LDT(n);
+
/* Either selector != 0 ==> reload. */
if ( unlikely((dirty_segment_mask & DIRTY_DS) | uregs->ds) )
{
@@ -1294,7 +1319,7 @@ static void load_segments(struct vcpu *n
}
/* Either selector != 0 ==> reload. */
- if ( unlikely((dirty_segment_mask & DIRTY_FS) | uregs->fs) )
+ if ( unlikely((dirty_segment_mask & DIRTY_FS) | uregs->fs) && !fs_gs_done )
{
all_segs_okay &= loadsegment(fs, uregs->fs);
/* non-nul selector updates fs_base */
@@ -1303,7 +1328,7 @@ static void load_segments(struct vcpu *n
}
/* Either selector != 0 ==> reload. */
- if ( unlikely((dirty_segment_mask & DIRTY_GS) | uregs->gs) )
+ if ( unlikely((dirty_segment_mask & DIRTY_GS) | uregs->gs) && !fs_gs_done )
{
all_segs_okay &= loadsegment(gs, uregs->gs);
/* non-nul selector updates gs_base_user */
@@ -1311,7 +1336,7 @@ static void load_segments(struct vcpu *n
dirty_segment_mask &= ~DIRTY_GS_BASE;
}
- if ( !is_pv_32bit_vcpu(n) )
+ if ( !fs_gs_done && !is_pv_32bit_vcpu(n) )
{
/* This can only be non-zero if selector is NULL. */
if ( n->arch.pv_vcpu.fs_base | (dirty_segment_mask & DIRTY_FS_BASE) )
@@ -1646,6 +1671,12 @@ static void __context_switch(void)
write_ptbase(n);
+#if defined(CONFIG_PV) && defined(CONFIG_HVM)
+ if ( is_pv_domain(nd) && !is_pv_32bit_domain(nd) && !is_idle_domain(nd) &&
+ !cpu_has_fsgsbase && cpu_has_svm )
+ svm_load_segs(0, 0, 0, 0, 0, 0, 0);
+#endif
+
if ( need_full_gdt(nd) &&
((p->vcpu_id != n->vcpu_id) || !need_full_gdt(pd)) )
{
@@ -1707,10 +1738,7 @@ void context_switch(struct vcpu *prev, s
local_irq_enable();
if ( is_pv_domain(nextd) )
- {
- load_LDT(next);
load_segments(next);
- }
ctxt_switch_levelling(next);
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -81,6 +81,9 @@ static struct hvm_function_table svm_fun
*/
static DEFINE_PER_CPU_READ_MOSTLY(paddr_t, hsa);
static DEFINE_PER_CPU_READ_MOSTLY(paddr_t, host_vmcb);
+#ifdef CONFIG_PV
+static DEFINE_PER_CPU(struct vmcb_struct *, host_vmcb_va);
+#endif
static bool_t amd_erratum383_found __read_mostly;
@@ -1574,6 +1577,14 @@ static void svm_cpu_dead(unsigned int cp
*this_hsa = 0;
}
+#ifdef CONFIG_PV
+ if ( per_cpu(host_vmcb_va, cpu) )
+ {
+ unmap_domain_page_global(per_cpu(host_vmcb_va, cpu));
+ per_cpu(host_vmcb_va, cpu) = NULL;
+ }
+#endif
+
if ( *this_vmcb )
{
free_domheap_page(maddr_to_page(*this_vmcb));
@@ -1608,6 +1619,11 @@ static int svm_cpu_up_prepare(unsigned i
if ( !pg )
goto err;
+#ifdef CONFIG_PV
+ if ( !cpu_has_fsgsbase )
+ per_cpu(host_vmcb_va, cpu) = __map_domain_page_global(pg);
+#endif
+
clear_domain_page(page_to_mfn(pg));
*this_vmcb = page_to_maddr(pg);
}
@@ -1654,6 +1670,60 @@ static int svm_handle_osvw(struct vcpu *
return 0;
}
+#ifdef CONFIG_PV
+bool svm_load_segs(unsigned int ldt_ents, unsigned long ldt_base,
+ unsigned int fs_sel, unsigned long fs_base,
+ unsigned int gs_sel, unsigned long gs_base,
+ unsigned long gs_shadow)
+{
+ unsigned int cpu = smp_processor_id();
+ struct vmcb_struct *vmcb = per_cpu(host_vmcb_va, cpu);
+
+ if ( unlikely(!vmcb) )
+ return false;
+
+ if ( !ldt_base )
+ {
+ asm volatile ( "prefetch %0" :: "m" (vmcb->ldtr) );
+ return true;
+ }
+
+ if ( likely(!ldt_ents) )
+ memset(&vmcb->ldtr, 0, sizeof(vmcb->ldtr));
+ else
+ {
+ /* Keep GDT in sync. */
+ struct desc_struct *desc = this_cpu(gdt_table) + LDT_ENTRY -
+ FIRST_RESERVED_GDT_ENTRY;
+
+ _set_tssldt_desc(desc, ldt_base, ldt_ents * 8 - 1, SYS_DESC_ldt);
+
+ vmcb->ldtr.sel = LDT_ENTRY << 3;
+ vmcb->ldtr.attr = SYS_DESC_ldt | (_SEGMENT_P >> 8);
+ vmcb->ldtr.limit = ldt_ents * 8 - 1;
+ vmcb->ldtr.base = ldt_base;
+ }
+
+ ASSERT(!(fs_sel & ~3));
+ vmcb->fs.sel = fs_sel;
+ vmcb->fs.attr = 0;
+ vmcb->fs.limit = 0;
+ vmcb->fs.base = fs_base;
+
+ ASSERT(!(gs_sel & ~3));
+ vmcb->gs.sel = gs_sel;
+ vmcb->gs.attr = 0;
+ vmcb->gs.limit = 0;
+ vmcb->gs.base = gs_base;
+
+ vmcb->kerngsbase = gs_shadow;
+
+ svm_vmload_pa(per_cpu(host_vmcb, cpu));
+
+ return true;
+}
+#endif
+
static int _svm_cpu_up(bool bsp)
{
uint64_t msr_content;
@@ -1706,6 +1776,8 @@ static int _svm_cpu_up(bool bsp)
/* Initialize OSVW bits to be used by guests */
svm_host_osvw_init();
+ svm_vmsave_pa(per_cpu(host_vmcb, cpu));
+
return 0;
}
--- a/xen/include/asm-x86/hvm/svm/svm.h
+++ b/xen/include/asm-x86/hvm/svm/svm.h
@@ -53,6 +53,12 @@ unsigned long *svm_msrbit(unsigned long
void __update_guest_eip(struct cpu_user_regs *regs, unsigned int inst_len);
void svm_update_guest_cr(struct vcpu *, unsigned int cr, unsigned int flags);
+/* PV context switch helper */
+bool svm_load_segs(unsigned int ldt_ents, unsigned long ldt_base,
+ unsigned int fs_sel, unsigned long fs_base,
+ unsigned int gs_sel, unsigned long gs_base,
+ unsigned long gs_shadow);
+
extern u32 svm_feature_flags;
#define SVM_FEATURE_NPT 0 /* Nested page table support */
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] x86: use VMLOAD for PV context switch
2018-07-10 10:14 [PATCH] x86: use VMLOAD for PV context switch Jan Beulich
@ 2018-08-16 22:04 ` Brian Woods
2018-08-17 7:33 ` Jan Beulich
2018-08-29 7:06 ` Ping: " Jan Beulich
0 siblings, 2 replies; 6+ messages in thread
From: Brian Woods @ 2018-08-16 22:04 UTC (permalink / raw)
To: Jan Beulich; +Cc: xen-devel, Brian Woods, Suravee Suthikulpanit, Andrew Cooper
On Tue, Jul 10, 2018 at 04:14:11AM -0600, Jan Beulich wrote:
> Having noticed that VMLOAD alone is about as fast as a single of the
> involved WRMSRs, I thought it might be a reasonable idea to also use it
> for PV. Measurements, however, have shown that an actual improvement can
> be achieved only with an early prefetch of the VMCB (thanks to Andrew
> for suggesting to try this), which I have to admit I can't really
> explain. This way on my Fam15 box context switch takes over 100 clocks
> less on average (the measured values are heavily varying in all cases,
> though).
>
> This is intentionally not using a new hvm_funcs hook: For one, this is
> all about PV, and something similar can hardly be done for VMX.
> Furthermore the indirect to direct call patching that is meant to be
> applied to most hvm_funcs hooks would be ugly to make work with
> functions having more than 6 parameters.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
I have confirmed it with a senior hardware engineer and using vmload in
this fashion is safe and recommended for performance. As far as using
vmload with PV.
Acked-by: Brian Woods <brian.woods@amd.com>
--
Brian Woods
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] x86: use VMLOAD for PV context switch
2018-08-16 22:04 ` Brian Woods
@ 2018-08-17 7:33 ` Jan Beulich
2018-08-17 14:55 ` Brian Woods
2018-08-29 7:06 ` Ping: " Jan Beulich
1 sibling, 1 reply; 6+ messages in thread
From: Jan Beulich @ 2018-08-17 7:33 UTC (permalink / raw)
To: Brian Woods; +Cc: Andrew Cooper, Suravee Suthikulpanit, xen-devel
>>> On 17.08.18 at 00:04, <brian.woods@amd.com> wrote:
> On Tue, Jul 10, 2018 at 04:14:11AM -0600, Jan Beulich wrote:
>> Having noticed that VMLOAD alone is about as fast as a single of the
>> involved WRMSRs, I thought it might be a reasonable idea to also use it
>> for PV. Measurements, however, have shown that an actual improvement can
>> be achieved only with an early prefetch of the VMCB (thanks to Andrew
>> for suggesting to try this), which I have to admit I can't really
>> explain. This way on my Fam15 box context switch takes over 100 clocks
>> less on average (the measured values are heavily varying in all cases,
>> though).
>>
>> This is intentionally not using a new hvm_funcs hook: For one, this is
>> all about PV, and something similar can hardly be done for VMX.
>> Furthermore the indirect to direct call patching that is meant to be
>> applied to most hvm_funcs hooks would be ugly to make work with
>> functions having more than 6 parameters.
>>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>
> I have confirmed it with a senior hardware engineer and using vmload in
> this fashion is safe and recommended for performance. As far as using
> vmload with PV.
>
> Acked-by: Brian Woods <brian.woods@amd.com>
Thanks. There's another aspect in this same area that I'd like to
improve, and hence seek clarification on up front: Currently SVM
code uses two pages per CPU, one for host_vmcb and the other
for hsa. Afaict the two uses are entirely dis-joint: The host save
area looks to be simply yet another VMCB, and the parts accessed
during VMRUN / VM exit are fully separate from the ones used by
VMLOAD / VMSAVE. Therefore I think both could be folded,
reducing code size as well as memory (and perhaps cache) footprint.
I think this separation was done because the PM mentions both
data structures separately, but iirc there's nothing said anywhere
that the two structures indeed need to be distinct.
Jan
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] x86: use VMLOAD for PV context switch
2018-08-17 7:33 ` Jan Beulich
@ 2018-08-17 14:55 ` Brian Woods
2018-08-17 15:58 ` Jan Beulich
0 siblings, 1 reply; 6+ messages in thread
From: Brian Woods @ 2018-08-17 14:55 UTC (permalink / raw)
To: Jan Beulich; +Cc: Andrew Cooper, Brian Woods, Suravee Suthikulpanit, xen-devel
On Fri, Aug 17, 2018 at 01:33:43AM -0600, Jan Beulich wrote:
> >>> On 17.08.18 at 00:04, <brian.woods@amd.com> wrote:
> > On Tue, Jul 10, 2018 at 04:14:11AM -0600, Jan Beulich wrote:
> >> Having noticed that VMLOAD alone is about as fast as a single of the
> >> involved WRMSRs, I thought it might be a reasonable idea to also use it
> >> for PV. Measurements, however, have shown that an actual improvement can
> >> be achieved only with an early prefetch of the VMCB (thanks to Andrew
> >> for suggesting to try this), which I have to admit I can't really
> >> explain. This way on my Fam15 box context switch takes over 100 clocks
> >> less on average (the measured values are heavily varying in all cases,
> >> though).
> >>
> >> This is intentionally not using a new hvm_funcs hook: For one, this is
> >> all about PV, and something similar can hardly be done for VMX.
> >> Furthermore the indirect to direct call patching that is meant to be
> >> applied to most hvm_funcs hooks would be ugly to make work with
> >> functions having more than 6 parameters.
> >>
> >> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> >
> > I have confirmed it with a senior hardware engineer and using vmload in
> > this fashion is safe and recommended for performance. As far as using
> > vmload with PV.
> >
> > Acked-by: Brian Woods <brian.woods@amd.com>
>
> Thanks. There's another aspect in this same area that I'd like to
> improve, and hence seek clarification on up front: Currently SVM
> code uses two pages per CPU, one for host_vmcb and the other
> for hsa. Afaict the two uses are entirely dis-joint: The host save
> area looks to be simply yet another VMCB, and the parts accessed
> during VMRUN / VM exit are fully separate from the ones used by
> VMLOAD / VMSAVE. Therefore I think both could be folded,
> reducing code size as well as memory (and perhaps cache) footprint.
>
> I think this separation was done because the PM mentions both
> data structures separately, but iirc there's nothing said anywhere
> that the two structures indeed need to be distinct.
>
> Jan
From APM Vol 2
15.30.4 VM_HSAVE_PA MSR (C001_0117h)
The 64-bit read/write VM_HSAVE_PA MSR holds the physical address of a
4KB block of memory where VMRUN saves host state, and from which
#VMEXIT reloads host state. The VMM software is expected to set up this
register before issuing the first VMRUN instruction. Software must not
attempt to read or write the host save-state area directly.
Writing this MSR causes a #GP if:
• any of the low 12 bits of the address written are nonzero, or
• the address written is greater than or equal to the maximum
supported physical address for this implementation.
It seems that the HSA is needed for the state of the guest/host. I
don't see how they can be folded in together. Am I missing something?
--
Brian Woods
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] x86: use VMLOAD for PV context switch
2018-08-17 14:55 ` Brian Woods
@ 2018-08-17 15:58 ` Jan Beulich
0 siblings, 0 replies; 6+ messages in thread
From: Jan Beulich @ 2018-08-17 15:58 UTC (permalink / raw)
To: Brian Woods; +Cc: Andrew Cooper, Suravee Suthikulpanit, xen-devel
>>> On 17.08.18 at 16:55, <brian.woods@amd.com> wrote:
> On Fri, Aug 17, 2018 at 01:33:43AM -0600, Jan Beulich wrote:
>> >>> On 17.08.18 at 00:04, <brian.woods@amd.com> wrote:
>> > On Tue, Jul 10, 2018 at 04:14:11AM -0600, Jan Beulich wrote:
>> >> Having noticed that VMLOAD alone is about as fast as a single of the
>> >> involved WRMSRs, I thought it might be a reasonable idea to also use it
>> >> for PV. Measurements, however, have shown that an actual improvement can
>> >> be achieved only with an early prefetch of the VMCB (thanks to Andrew
>> >> for suggesting to try this), which I have to admit I can't really
>> >> explain. This way on my Fam15 box context switch takes over 100 clocks
>> >> less on average (the measured values are heavily varying in all cases,
>> >> though).
>> >>
>> >> This is intentionally not using a new hvm_funcs hook: For one, this is
>> >> all about PV, and something similar can hardly be done for VMX.
>> >> Furthermore the indirect to direct call patching that is meant to be
>> >> applied to most hvm_funcs hooks would be ugly to make work with
>> >> functions having more than 6 parameters.
>> >>
>> >> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>> >
>> > I have confirmed it with a senior hardware engineer and using vmload in
>> > this fashion is safe and recommended for performance. As far as using
>> > vmload with PV.
>> >
>> > Acked-by: Brian Woods <brian.woods@amd.com>
>>
>> Thanks. There's another aspect in this same area that I'd like to
>> improve, and hence seek clarification on up front: Currently SVM
>> code uses two pages per CPU, one for host_vmcb and the other
>> for hsa. Afaict the two uses are entirely dis-joint: The host save
>> area looks to be simply yet another VMCB, and the parts accessed
>> during VMRUN / VM exit are fully separate from the ones used by
>> VMLOAD / VMSAVE. Therefore I think both could be folded,
>> reducing code size as well as memory (and perhaps cache) footprint.
>>
>> I think this separation was done because the PM mentions both
>> data structures separately, but iirc there's nothing said anywhere
>> that the two structures indeed need to be distinct.
>
> From APM Vol 2
>
> 15.30.4 VM_HSAVE_PA MSR (C001_0117h)
> The 64-bit read/write VM_HSAVE_PA MSR holds the physical address of a
> 4KB block of memory where VMRUN saves host state, and from which
> #VMEXIT reloads host state. The VMM software is expected to set up this
> register before issuing the first VMRUN instruction. Software must not
> attempt to read or write the host save-state area directly.
>
> Writing this MSR causes a #GP if:
> ● any of the low 12 bits of the address written are nonzero, or
> ● the address written is greater than or equal to the maximum
> supported physical address for this implementation.
>
>
>
> It seems that the HSA is needed for the state of the guest/host. I
> don't see how they can be folded in together. Am I missing something?
Well, as said, the set of elements saved/loaded by VMRUN / #VMEXIT
has nothing in common with the set of elements saved/loaded by
VMSAVE / VMLOAD.
Jan
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
^ permalink raw reply [flat|nested] 6+ messages in thread
* Ping: [PATCH] x86: use VMLOAD for PV context switch
2018-08-16 22:04 ` Brian Woods
2018-08-17 7:33 ` Jan Beulich
@ 2018-08-29 7:06 ` Jan Beulich
1 sibling, 0 replies; 6+ messages in thread
From: Jan Beulich @ 2018-08-29 7:06 UTC (permalink / raw)
To: Andrew Cooper; +Cc: xen-devel, Brian Woods, Suravee Suthikulpanit
>>> On 17.08.18 at 00:04, <brian.woods@amd.com> wrote:
> On Tue, Jul 10, 2018 at 04:14:11AM -0600, Jan Beulich wrote:
>> Having noticed that VMLOAD alone is about as fast as a single of the
>> involved WRMSRs, I thought it might be a reasonable idea to also use it
>> for PV. Measurements, however, have shown that an actual improvement can
>> be achieved only with an early prefetch of the VMCB (thanks to Andrew
>> for suggesting to try this), which I have to admit I can't really
>> explain. This way on my Fam15 box context switch takes over 100 clocks
>> less on average (the measured values are heavily varying in all cases,
>> though).
>>
>> This is intentionally not using a new hvm_funcs hook: For one, this is
>> all about PV, and something similar can hardly be done for VMX.
>> Furthermore the indirect to direct call patching that is meant to be
>> applied to most hvm_funcs hooks would be ugly to make work with
>> functions having more than 6 parameters.
>>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>
> I have confirmed it with a senior hardware engineer and using vmload in
> this fashion is safe and recommended for performance. As far as using
> vmload with PV.
>
> Acked-by: Brian Woods <brian.woods@amd.com>
Thanks. Andrew?
Jan
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2018-08-29 7:06 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2018-07-10 10:14 [PATCH] x86: use VMLOAD for PV context switch Jan Beulich
2018-08-16 22:04 ` Brian Woods
2018-08-17 7:33 ` Jan Beulich
2018-08-17 14:55 ` Brian Woods
2018-08-17 15:58 ` Jan Beulich
2018-08-29 7:06 ` Ping: " Jan Beulich
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).