Linux virtualization list
 help / color / mirror / Atom feed
* [PATCH net] virtio-net: re enable XDP_REDIRECT for mergeable buffer
From: Jason Wang @ 2018-03-02  6:25 UTC (permalink / raw)
  To: mst, virtualization, netdev, linux-kernel; +Cc: john.fastabend, brouer

XDP_REDIRECT support for mergeable buffer was removed since commit
7324f5399b06 ("virtio_net: disable XDP_REDIRECT in receive_mergeable()
case"). This is because we don't reserve enough tailroom for struct
skb_shared_info which breaks XDP assumption. So this patch fixes this
by reserving enough tailroom and using fixed size of rx buffer.

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/net/virtio_net.c | 55 +++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 43 insertions(+), 12 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 9bb9e56..11e48c5 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -504,6 +504,7 @@ static struct page *xdp_linearize_page(struct receive_queue *rq,
 	page_off += *len;
 
 	while (--*num_buf) {
+		int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
 		unsigned int buflen;
 		void *buf;
 		int off;
@@ -518,7 +519,7 @@ static struct page *xdp_linearize_page(struct receive_queue *rq,
 		/* guard against a misconfigured or uncooperative backend that
 		 * is sending packet larger than the MTU.
 		 */
-		if ((page_off + buflen) > PAGE_SIZE) {
+		if ((page_off + buflen + tailroom) > PAGE_SIZE) {
 			put_page(p);
 			goto err_buf;
 		}
@@ -690,6 +691,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 	unsigned int truesize;
 	unsigned int headroom = mergeable_ctx_to_headroom(ctx);
 	bool sent;
+	int err;
 
 	head_skb = NULL;
 
@@ -701,7 +703,12 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 		void *data;
 		u32 act;
 
-		/* This happens when rx buffer size is underestimated */
+		/* This happens when rx buffer size is underestimated
+		 * or headroom is not enough because of the buffer
+		 * was refilled before XDP is set. This should only
+		 * happen for the first several packets, so we don't
+		 * care much about its performance.
+		 */
 		if (unlikely(num_buf > 1 ||
 			     headroom < virtnet_get_headroom(vi))) {
 			/* linearize data for XDP */
@@ -736,9 +743,6 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 
 		act = bpf_prog_run_xdp(xdp_prog, &xdp);
 
-		if (act != XDP_PASS)
-			ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len);
-
 		switch (act) {
 		case XDP_PASS:
 			/* recalculate offset to account for any header
@@ -770,6 +774,19 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 				goto err_xdp;
 			rcu_read_unlock();
 			goto xdp_xmit;
+		case XDP_REDIRECT:
+			err = xdp_do_redirect(dev, &xdp, xdp_prog);
+			if (err) {
+				trace_xdp_exception(vi->dev, xdp_prog, act);
+				if (unlikely(xdp_page != page))
+					put_page(xdp_page);
+				goto err_xdp;
+			}
+			*xdp_xmit = true;
+			if (unlikely(xdp_page != page))
+				goto err_xdp;
+			rcu_read_unlock();
+			goto xdp_xmit;
 		default:
 			bpf_warn_invalid_xdp_action(act);
 		case XDP_ABORTED:
@@ -1013,13 +1030,18 @@ static int add_recvbuf_big(struct virtnet_info *vi, struct receive_queue *rq,
 }
 
 static unsigned int get_mergeable_buf_len(struct receive_queue *rq,
-					  struct ewma_pkt_len *avg_pkt_len)
+					  struct ewma_pkt_len *avg_pkt_len,
+					  unsigned int room)
 {
 	const size_t hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
 	unsigned int len;
 
-	len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len),
+	if (room)
+		return PAGE_SIZE - room;
+
+	len = hdr_len +	clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len),
 				rq->min_buf_len, PAGE_SIZE - hdr_len);
+
 	return ALIGN(len, L1_CACHE_BYTES);
 }
 
@@ -1028,21 +1050,27 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi,
 {
 	struct page_frag *alloc_frag = &rq->alloc_frag;
 	unsigned int headroom = virtnet_get_headroom(vi);
+	unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
+	unsigned int room = SKB_DATA_ALIGN(headroom + tailroom);
 	char *buf;
 	void *ctx;
 	int err;
 	unsigned int len, hole;
 
-	len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len);
-	if (unlikely(!skb_page_frag_refill(len + headroom, alloc_frag, gfp)))
+	/* Extra tailroom is needed to satisfy XDP's assumption. This
+	 * means rx frags coalescing won't work, but consider we've
+	 * disabled GSO for XDP, it won't be a big issue.
+	 */
+	len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room);
+	if (unlikely(!skb_page_frag_refill(len + room, alloc_frag, gfp)))
 		return -ENOMEM;
 
 	buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
 	buf += headroom; /* advance address leaving hole at front of pkt */
 	get_page(alloc_frag->page);
-	alloc_frag->offset += len + headroom;
+	alloc_frag->offset += len + room;
 	hole = alloc_frag->size - alloc_frag->offset;
-	if (hole < len + headroom) {
+	if (hole < len + room) {
 		/* To avoid internal fragmentation, if there is very likely not
 		 * enough space for another buffer, add the remaining space to
 		 * the current buffer.
@@ -2576,12 +2604,15 @@ static ssize_t mergeable_rx_buffer_size_show(struct netdev_rx_queue *queue,
 {
 	struct virtnet_info *vi = netdev_priv(queue->dev);
 	unsigned int queue_index = get_netdev_rx_queue_index(queue);
+	unsigned int headroom = virtnet_get_headroom(vi);
+	unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
 	struct ewma_pkt_len *avg;
 
 	BUG_ON(queue_index >= vi->max_queue_pairs);
 	avg = &vi->rq[queue_index].mrg_avg_pkt_len;
 	return sprintf(buf, "%u\n",
-		       get_mergeable_buf_len(&vi->rq[queue_index], avg));
+		       get_mergeable_buf_len(&vi->rq[queue_index], avg,
+				       SKB_DATA_ALIGN(headroom + tailroom)));
 }
 
 static struct rx_queue_attribute mergeable_rx_buffer_size_attribute =
-- 
2.7.4

^ permalink raw reply related

* Re: [PATCH net-next 1/2] virtio-net: re enable XDP_REDIRECT for mergeable buffer
From: Jason Wang @ 2018-03-02  4:20 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: netdev, brouer, john.fastabend, linux-kernel, virtualization
In-Reply-To: <20180301153441-mutt-send-email-mst@kernel.org>



On 2018年03月01日 21:36, Michael S. Tsirkin wrote:
> On Thu, Mar 01, 2018 at 11:19:04AM +0800, Jason Wang wrote:
>> XDP_REDIRECT support for mergeable buffer was removed since commit
>> 7324f5399b06 ("virtio_net: disable XDP_REDIRECT in receive_mergeable()
>> case"). This is because we don't reserve enough tailroom for struct
>> skb_shared_info which breaks XDP assumption. Other complaints are, the
>> complex linearize logic and EWMA estimation may increase the
>> possibility of linearizing.
>>
>> Signed-off-by: Jason Wang<jasowang@redhat.com>
> How about reposting just this patch for net? It's pretty small
> and this way we don't have broken redirect there.
> Probably keeping the linearize in here to reduce the
> amount of changes.
>

Looks possible, let me post a version for net.

Thanks

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply

* Re: [PATCH net-next 0/2] virtio-net: re enable XDP_REDIRECT for mergeable buffer
From: Jason Wang @ 2018-03-02  4:17 UTC (permalink / raw)
  To: Jesper Dangaard Brouer
  Cc: mst, netdev, john.fastabend, linux-kernel, virtualization,
	Alexei Starovoitov
In-Reply-To: <20180301151601.611d6b83@redhat.com>



On 2018年03月01日 22:16, Jesper Dangaard Brouer wrote:
> On Thu, 1 Mar 2018 21:15:36 +0800
> Jason Wang <jasowang@redhat.com> wrote:
>
>> On 2018年03月01日 18:35, Jesper Dangaard Brouer wrote:
>>> On Thu, 1 Mar 2018 17:23:37 +0800
>>> Jason Wang <jasowang@redhat.com> wrote:
>>>   
>>>> On 2018年03月01日 17:10, Jesper Dangaard Brouer wrote:
>>>>> On Thu,  1 Mar 2018 11:19:03 +0800
>>>>> Jason Wang <jasowang@redhat.com> wrote:
>>>>>      
>>>>>> This series tries to re-enable XDP_REDIRECT for mergeable buffer which
>>>>>> was removed since commit 7324f5399b06 ("virtio_net: disable
>>>>>> XDP_REDIRECT in receive_mergeable() case"). Main concerns are:
>>>>>>
>>>>>> - not enough tailroom was reserved which breaks cpumap
>>>>> To address this at a more fundamental level, I would suggest that we/you
>>>>> instead extend XDP to know it's buffers "frame" size/end.  (The
>>>>> assumption use to be, xdp_buff->data_hard_start + PAGE_SIZE, but
>>>>> ixgbe+virtio_net broke that assumption).
>>>>>
>>>>> It should actually be fairly easy to implement:
>>>>>     * Simply extend xdp_buff with a "data_hard_end" pointer.
>>>> Right, and then cpumap can warn and drop packets with insufficient
>>>> tailroom.
>>>>
>>>> But it should be a patch on top of this I think.
>>> Hmmm, not really.  If we/you instead fix the issue of XDP doesn't know
>>> the end/size of the frame, then we don't need this mixed XDP
>>> generic/native code path mixing.
>> I know this but I'm still a little bit confused. According to the commit
>> log of 7324f5399b06 ("virtio_net: disable XDP_REDIRECT in
>> receive_mergeable() case"), you said:
>>
>> """
>>       The longer explaination is that receive_mergeable() tries to
>>       work-around and satisfy these XDP requiresments e.g. by having a
>>       function xdp_linearize_page() that allocates and memcpy RX buffers
>>       around (in case packet is scattered across multiple rx buffers).  This
>>       does currently satisfy XDP_PASS, XDP_DROP and XDP_TX (but only because
>>       we have not implemented bpf_xdp_adjust_tail yet).
>> """
>>
>> So I consider the tailroom is a must for the (future) tail adjustment.
> That is true, BUT implementing the "data_hard_end" extension is a
> pre-requisite.  It will also be to catch the issue of too little
> tail-room if/when implementing bpf_xdp_adjust_tail().
>
> It is of-cause a "nice-to-have", to fix this virtio_net driver's
> receive_mergeable() call to have enough tail-room, but I don't see it
> as a solution to the fundamental problem.
>
>
>>> You could re-enable native redirect, and push the responsibility to
>>> cpumap for detecting this too-small frame "missing tailroom" (and avoid
>>> crashing...). (If we really want to support this, cpumap could fallback
>>> to dev_alloc_skb, and handle it gracefully).
>>>   
>> Right but it will be slower than build_skb().
> True, but bad argument in this context, as you are already using a
> similar function call napi_alloc_skb().  And it will be even slower to
> call generic-XDP code path.
>

Well, there's no generic skb implementation for cpumap redirection so I 
think we're talking about native XDP for cpumap, In this case, we won't 
even use napi_alloc_skb().

Thanks

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply

* Re: [PATCH v3 4/6] x86: Consolidate PCI_MMCONFIG configs
From: Andy Shevchenko @ 2018-03-01 16:35 UTC (permalink / raw)
  To: Bjorn Helgaas
  Cc: jailhouse-dev, Jan Kiszka, x86, linux-pci,
	Linux Kernel Mailing List, virtualization, Ingo Molnar,
	H . Peter Anvin, Bjorn Helgaas, Thomas Gleixner
In-Reply-To: <20180301151359.GC13722@bhelgaas-glaptop.roam.corp.google.com>

On Thu, Mar 1, 2018 at 5:13 PM, Bjorn Helgaas <helgaas@kernel.org> wrote:
> On Thu, Mar 01, 2018 at 06:40:47AM +0100, Jan Kiszka wrote:
>> From: Jan Kiszka <jan.kiszka@siemens.com>
>>
>> Since e279b6c1d329 ("x86: start unification of arch/x86/Kconfig.*"), we
>> have two PCI_MMCONFIG entries, one from the original i386 and another
>> from x86_64. This consolidates both entries into a single one.
>>
>> The logic for x86_32, where this option was not under user control,
>> remains identical. On x86_64, PCI_MMCONFIG becomes additionally
>> configurable for SFI systems even if ACPI was disabled. This just
>> simplifies the logic without restricting the configurability in any way.
>
> Thanks for mentioning this difference.  It's probably trivial, but if
> you have any other reason to respin this series, I would split this
> into two patches:
>
>   - allow PCI_MMCONFIG on x86_64 with SFI
>   - consolidate PCI_MMCONFIG with no logical change at all
>
>> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
>
> But either way,
>
> Acked-by: Bjorn Helgaas <bhelgaas@google.com>
>

If you going to respin I would suggest one more trivia

Split long depends on to two lines, like
-      depends on PCI && (X86_64 || (PCI_GODIRECT || PCI_GOANY ||
PCI_GOOLPC || PCI_GOMMCONFIG))
+      depends on PCI
+      depends on X86_64 || (PCI_GODIRECT || PCI_GOANY || PCI_GOOLPC
|| PCI_GOMMCONFIG)

...

      depends on PCI && (ACPI || SFI) && (X86_64 || (PCI_GOANY ||
PCI_GOMMCONFIG))
+     depends on PCI && (ACPI || SFI)
+     depends on X86_64 || (PCI_GOANY || PCI_GOMMCONFIG)

(perhaps in a separate change)

>> ---
>>  arch/x86/Kconfig | 9 +++------
>>  1 file changed, 3 insertions(+), 6 deletions(-)
>>
>> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
>> index eb7f43f23521..aef9d67ac186 100644
>> --- a/arch/x86/Kconfig
>> +++ b/arch/x86/Kconfig
>> @@ -2641,8 +2641,9 @@ config PCI_DIRECT
>>       depends on PCI && (X86_64 || (PCI_GODIRECT || PCI_GOANY || PCI_GOOLPC || PCI_GOMMCONFIG))
>>
>>  config PCI_MMCONFIG
>> -     def_bool y
>> -     depends on X86_32 && PCI && (ACPI || SFI) && (PCI_GOMMCONFIG || PCI_GOANY)
>> +     bool "Support mmconfig PCI config space access" if X86_64
>> +     default y
>> +     depends on PCI && (ACPI || SFI) && (X86_64 || (PCI_GOANY || PCI_GOMMCONFIG))
>>
>>  config PCI_OLPC
>>       def_bool y
>> @@ -2657,10 +2658,6 @@ config PCI_DOMAINS
>>       def_bool y
>>       depends on PCI
>>
>> -config PCI_MMCONFIG
>> -     bool "Support mmconfig PCI config space access"
>> -     depends on X86_64 && PCI && ACPI
>> -
>>  config PCI_CNB20LE_QUIRK
>>       bool "Read CNB20LE Host Bridge Windows" if EXPERT
>>       depends on PCI
>> --
>> 2.13.6
>>



-- 
With Best Regards,
Andy Shevchenko

^ permalink raw reply

* Re: [PATCH net-next 0/2] virtio-net: re enable XDP_REDIRECT for mergeable buffer
From: Jesper Dangaard Brouer @ 2018-03-01 14:16 UTC (permalink / raw)
  To: Jason Wang
  Cc: mst, netdev, john.fastabend, linux-kernel, virtualization, brouer,
	Alexei Starovoitov
In-Reply-To: <872bf3ea-7631-4e5f-9a6a-d632213cd15c@redhat.com>

On Thu, 1 Mar 2018 21:15:36 +0800
Jason Wang <jasowang@redhat.com> wrote:

> On 2018年03月01日 18:35, Jesper Dangaard Brouer wrote:
> > On Thu, 1 Mar 2018 17:23:37 +0800
> > Jason Wang <jasowang@redhat.com> wrote:
> >  
> >> On 2018年03月01日 17:10, Jesper Dangaard Brouer wrote:  
> >>> On Thu,  1 Mar 2018 11:19:03 +0800
> >>> Jason Wang <jasowang@redhat.com> wrote:
> >>>     
> >>>> This series tries to re-enable XDP_REDIRECT for mergeable buffer which
> >>>> was removed since commit 7324f5399b06 ("virtio_net: disable
> >>>> XDP_REDIRECT in receive_mergeable() case"). Main concerns are:
> >>>>
> >>>> - not enough tailroom was reserved which breaks cpumap  
> >>> To address this at a more fundamental level, I would suggest that we/you
> >>> instead extend XDP to know it's buffers "frame" size/end.  (The
> >>> assumption use to be, xdp_buff->data_hard_start + PAGE_SIZE, but
> >>> ixgbe+virtio_net broke that assumption).
> >>>
> >>> It should actually be fairly easy to implement:
> >>>    * Simply extend xdp_buff with a "data_hard_end" pointer.  
> >> Right, and then cpumap can warn and drop packets with insufficient
> >> tailroom.
> >>
> >> But it should be a patch on top of this I think.  
> > Hmmm, not really.  If we/you instead fix the issue of XDP doesn't know
> > the end/size of the frame, then we don't need this mixed XDP
> > generic/native code path mixing.  
> 
> I know this but I'm still a little bit confused. According to the commit 
> log of 7324f5399b06 ("virtio_net: disable XDP_REDIRECT in 
> receive_mergeable() case"), you said:
> 
> """
>      The longer explaination is that receive_mergeable() tries to
>      work-around and satisfy these XDP requiresments e.g. by having a
>      function xdp_linearize_page() that allocates and memcpy RX buffers
>      around (in case packet is scattered across multiple rx buffers).  This
>      does currently satisfy XDP_PASS, XDP_DROP and XDP_TX (but only because
>      we have not implemented bpf_xdp_adjust_tail yet).
> """
> 
> So I consider the tailroom is a must for the (future) tail adjustment.

That is true, BUT implementing the "data_hard_end" extension is a
pre-requisite.  It will also be to catch the issue of too little
tail-room if/when implementing bpf_xdp_adjust_tail().

It is of-cause a "nice-to-have", to fix this virtio_net driver's
receive_mergeable() call to have enough tail-room, but I don't see it
as a solution to the fundamental problem.


> > You could re-enable native redirect, and push the responsibility to
> > cpumap for detecting this too-small frame "missing tailroom" (and avoid
> > crashing...). (If we really want to support this, cpumap could fallback
> > to dev_alloc_skb, and handle it gracefully).
> >  
> 
> Right but it will be slower than build_skb().

True, but bad argument in this context, as you are already using a
similar function call napi_alloc_skb().  And it will be even slower to
call generic-XDP code path.

-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply

* Re: [PATCH net-next 0/2] virtio-net: re enable XDP_REDIRECT for mergeable buffer
From: Michael S. Tsirkin @ 2018-03-01 13:40 UTC (permalink / raw)
  To: Jesper Dangaard Brouer
  Cc: netdev, john.fastabend, linux-kernel, virtualization,
	Alexei Starovoitov
In-Reply-To: <20180301113531.7b25e2df@redhat.com>

On Thu, Mar 01, 2018 at 11:35:31AM +0100, Jesper Dangaard Brouer wrote:
> On Thu, 1 Mar 2018 17:23:37 +0800
> Jason Wang <jasowang@redhat.com> wrote:
> 
> > On 2018年03月01日 17:10, Jesper Dangaard Brouer wrote:
> > > On Thu,  1 Mar 2018 11:19:03 +0800
> > > Jason Wang <jasowang@redhat.com> wrote:
> > >  
> > >> This series tries to re-enable XDP_REDIRECT for mergeable buffer which
> > >> was removed since commit 7324f5399b06 ("virtio_net: disable
> > >> XDP_REDIRECT in receive_mergeable() case"). Main concerns are:
> > >>
> > >> - not enough tailroom was reserved which breaks cpumap  
> >
> > > To address this at a more fundamental level, I would suggest that we/you
> > > instead extend XDP to know it's buffers "frame" size/end.  (The
> > > assumption use to be, xdp_buff->data_hard_start + PAGE_SIZE, but
> > > ixgbe+virtio_net broke that assumption).
> > >
> > > It should actually be fairly easy to implement:
> > >   * Simply extend xdp_buff with a "data_hard_end" pointer.  
> > 
> > Right, and then cpumap can warn and drop packets with insufficient 
> > tailroom.
> >
> > But it should be a patch on top of this I think.
> 
> Hmmm, not really.  If we/you instead fix the issue of XDP doesn't know
> the end/size of the frame, then we don't need this mixed XDP
> generic/native code path mixing.
> 
> You could re-enable native redirect, and push the responsibility to
> cpumap for detecting this too-small frame "missing tailroom" (and avoid
> crashing...). (If we really want to support this, cpumap could fallback
> to dev_alloc_skb, and handle it gracefully).

Yea, we probably should.

However it's not nice that redirect is now gone in net.
IMHO a smaller version of patch 1/2 (without using generic code)
should go into net. tailroom tracking and fallback to dev_alloc_skb
can go into net-next.

> -- 
> Best regards,
>   Jesper Dangaard Brouer
>   MSc.CS, Principal Kernel Engineer at Red Hat
>   LinkedIn: http://www.linkedin.com/in/brouer
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply

* Re: [PATCH net-next 1/2] virtio-net: re enable XDP_REDIRECT for mergeable buffer
From: Michael S. Tsirkin @ 2018-03-01 13:36 UTC (permalink / raw)
  To: Jason Wang; +Cc: netdev, brouer, john.fastabend, linux-kernel, virtualization
In-Reply-To: <1519874345-10235-2-git-send-email-jasowang@redhat.com>

On Thu, Mar 01, 2018 at 11:19:04AM +0800, Jason Wang wrote:
> XDP_REDIRECT support for mergeable buffer was removed since commit
> 7324f5399b06 ("virtio_net: disable XDP_REDIRECT in receive_mergeable()
> case"). This is because we don't reserve enough tailroom for struct
> skb_shared_info which breaks XDP assumption. Other complaints are, the
> complex linearize logic and EWMA estimation may increase the
> possibility of linearizing.
> 
> Signed-off-by: Jason Wang <jasowang@redhat.com>

How about reposting just this patch for net? It's pretty small
and this way we don't have broken redirect there.
Probably keeping the linearize in here to reduce the
amount of changes.

> ---
>  drivers/net/virtio_net.c | 107 +++++++++++++++++++++++++++++------------------
>  1 file changed, 67 insertions(+), 40 deletions(-)
> 
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 9bb9e56..81190ba 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -537,6 +537,26 @@ static struct page *xdp_linearize_page(struct receive_queue *rq,
>  	return NULL;
>  }
>  
> +static struct sk_buff *virtnet_skb_xdp(struct receive_queue *rq,
> +				       struct sk_buff *skb)
> +{
> +	struct bpf_prog *xdp_prog;
> +	int ret;
> +
> +	rcu_read_lock();
> +	xdp_prog = rcu_dereference(rq->xdp_prog);
> +	if (xdp_prog) {
> +		ret = do_xdp_generic(xdp_prog, skb);
> +		if (ret != XDP_PASS) {
> +			rcu_read_unlock();
> +			return NULL;
> +		}
> +	}
> +	rcu_read_unlock();
> +
> +	return skb;
> +}
> +
>  static struct sk_buff *receive_small(struct net_device *dev,
>  				     struct virtnet_info *vi,
>  				     struct receive_queue *rq,
> @@ -689,31 +709,30 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
>  	struct bpf_prog *xdp_prog;
>  	unsigned int truesize;
>  	unsigned int headroom = mergeable_ctx_to_headroom(ctx);
> -	bool sent;
> +	bool sent, skb_xdp = false;
> +	int err;
>  
>  	head_skb = NULL;
>  
>  	rcu_read_lock();
>  	xdp_prog = rcu_dereference(rq->xdp_prog);
>  	if (xdp_prog) {
> -		struct page *xdp_page;
>  		struct xdp_buff xdp;
>  		void *data;
>  		u32 act;
>  
> -		/* This happens when rx buffer size is underestimated */
> +		/* This happens when rx buffer size is underestimated
> +		 * or headroom is not enough because of the buffer
> +		 * was refilled before XDP is set. In both cases,
> +		 * for simplicity, we will offload them to generic
> +		 * XDP routine. This should only happen for the first
> +		 * several packets, so we don't care much about its
> +		 * performance.
> +		 */
>  		if (unlikely(num_buf > 1 ||
>  			     headroom < virtnet_get_headroom(vi))) {
> -			/* linearize data for XDP */
> -			xdp_page = xdp_linearize_page(rq, &num_buf,
> -						      page, offset,
> -						      VIRTIO_XDP_HEADROOM,
> -						      &len);
> -			if (!xdp_page)
> -				goto err_xdp;
> -			offset = VIRTIO_XDP_HEADROOM;
> -		} else {
> -			xdp_page = page;
> +			skb_xdp = true;
> +			goto skb_xdp;
>  		}
>  
>  		/* Transient failure which in theory could occur if
> @@ -727,7 +746,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
>  		/* Allow consuming headroom but reserve enough space to push
>  		 * the descriptor on if we get an XDP_TX return code.
>  		 */
> -		data = page_address(xdp_page) + offset;
> +		data = page_address(page) + offset;
>  		xdp.data_hard_start = data - VIRTIO_XDP_HEADROOM + vi->hdr_len;
>  		xdp.data = data + vi->hdr_len;
>  		xdp_set_data_meta_invalid(&xdp);
> @@ -736,9 +755,6 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
>  
>  		act = bpf_prog_run_xdp(xdp_prog, &xdp);
>  
> -		if (act != XDP_PASS)
> -			ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len);
> -
>  		switch (act) {
>  		case XDP_PASS:
>  			/* recalculate offset to account for any header
> @@ -746,28 +762,22 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
>  			 * skb and avoid using offset
>  			 */
>  			offset = xdp.data -
> -					page_address(xdp_page) - vi->hdr_len;
> -
> -			/* We can only create skb based on xdp_page. */
> -			if (unlikely(xdp_page != page)) {
> -				rcu_read_unlock();
> -				put_page(page);
> -				head_skb = page_to_skb(vi, rq, xdp_page,
> -						       offset, len, PAGE_SIZE);
> -				return head_skb;
> -			}
> +					page_address(page) - vi->hdr_len;
>  			break;
>  		case XDP_TX:
>  			sent = __virtnet_xdp_xmit(vi, &xdp);
>  			if (unlikely(!sent)) {
>  				trace_xdp_exception(vi->dev, xdp_prog, act);
> -				if (unlikely(xdp_page != page))
> -					put_page(xdp_page);
>  				goto err_xdp;
>  			}
>  			*xdp_xmit = true;
> -			if (unlikely(xdp_page != page))
> +			rcu_read_unlock();
> +			goto xdp_xmit;
> +		case XDP_REDIRECT:
> +			err = xdp_do_redirect(dev, &xdp, xdp_prog);
> +			if (err)
>  				goto err_xdp;
> +			*xdp_xmit = true;
>  			rcu_read_unlock();
>  			goto xdp_xmit;
>  		default:
> @@ -775,13 +785,12 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
>  		case XDP_ABORTED:
>  			trace_xdp_exception(vi->dev, xdp_prog, act);
>  		case XDP_DROP:
> -			if (unlikely(xdp_page != page))
> -				__free_pages(xdp_page, 0);
>  			goto err_xdp;
>  		}
>  	}
>  	rcu_read_unlock();
>  
> +skb_xdp:
>  	truesize = mergeable_ctx_to_truesize(ctx);
>  	if (unlikely(len > truesize)) {
>  		pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
> @@ -848,7 +857,11 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
>  		}
>  	}
>  
> -	ewma_pkt_len_add(&rq->mrg_avg_pkt_len, head_skb->len);
> +	if (skb_xdp)
> +		head_skb = virtnet_skb_xdp(rq, head_skb);
> +	else
> +		ewma_pkt_len_add(&rq->mrg_avg_pkt_len, head_skb->len);
> +
>  	return head_skb;
>  
>  err_xdp:
> @@ -1013,13 +1026,18 @@ static int add_recvbuf_big(struct virtnet_info *vi, struct receive_queue *rq,
>  }
>  
>  static unsigned int get_mergeable_buf_len(struct receive_queue *rq,
> -					  struct ewma_pkt_len *avg_pkt_len)
> +					  struct ewma_pkt_len *avg_pkt_len,
> +					  unsigned int room)
>  {
>  	const size_t hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
>  	unsigned int len;
>  
> -	len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len),
> +	if (room)
> +		return PAGE_SIZE - room;
> +
> +	len = hdr_len +	clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len),
>  				rq->min_buf_len, PAGE_SIZE - hdr_len);
> +
>  	return ALIGN(len, L1_CACHE_BYTES);
>  }
>  
> @@ -1028,21 +1046,27 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi,
>  {
>  	struct page_frag *alloc_frag = &rq->alloc_frag;
>  	unsigned int headroom = virtnet_get_headroom(vi);
> +	unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
> +	unsigned int room = SKB_DATA_ALIGN(headroom + tailroom);
>  	char *buf;
>  	void *ctx;
>  	int err;
>  	unsigned int len, hole;
>  
> -	len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len);
> -	if (unlikely(!skb_page_frag_refill(len + headroom, alloc_frag, gfp)))
> +	/* Extra tailroom is needed to satisfy XDP's assumption. This
> +	 * means rx frags coalescing won't work, but consider we've
> +	 * disabled GSO for XDP, it won't be a big issue.
> +	 */
> +	len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room);
> +	if (unlikely(!skb_page_frag_refill(len + room, alloc_frag, gfp)))
>  		return -ENOMEM;
>  
>  	buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
>  	buf += headroom; /* advance address leaving hole at front of pkt */
>  	get_page(alloc_frag->page);
> -	alloc_frag->offset += len + headroom;
> +	alloc_frag->offset += len + room;
>  	hole = alloc_frag->size - alloc_frag->offset;
> -	if (hole < len + headroom) {
> +	if (hole < len + room) {
>  		/* To avoid internal fragmentation, if there is very likely not
>  		 * enough space for another buffer, add the remaining space to
>  		 * the current buffer.
> @@ -2576,12 +2600,15 @@ static ssize_t mergeable_rx_buffer_size_show(struct netdev_rx_queue *queue,
>  {
>  	struct virtnet_info *vi = netdev_priv(queue->dev);
>  	unsigned int queue_index = get_netdev_rx_queue_index(queue);
> +	unsigned int headroom = virtnet_get_headroom(vi);
> +	unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
>  	struct ewma_pkt_len *avg;
>  
>  	BUG_ON(queue_index >= vi->max_queue_pairs);
>  	avg = &vi->rq[queue_index].mrg_avg_pkt_len;
>  	return sprintf(buf, "%u\n",
> -		       get_mergeable_buf_len(&vi->rq[queue_index], avg));
> +		       get_mergeable_buf_len(&vi->rq[queue_index], avg,
> +				       SKB_DATA_ALIGN(headroom + tailroom)));
>  }
>  
>  static struct rx_queue_attribute mergeable_rx_buffer_size_attribute =
> -- 
> 2.7.4

^ permalink raw reply

* Re: [PATCH net-next 0/2] virtio-net: re enable XDP_REDIRECT for mergeable buffer
From: Jason Wang @ 2018-03-01 13:15 UTC (permalink / raw)
  To: Jesper Dangaard Brouer
  Cc: mst, netdev, john.fastabend, linux-kernel, virtualization,
	Alexei Starovoitov
In-Reply-To: <20180301113531.7b25e2df@redhat.com>



On 2018年03月01日 18:35, Jesper Dangaard Brouer wrote:
> On Thu, 1 Mar 2018 17:23:37 +0800
> Jason Wang <jasowang@redhat.com> wrote:
>
>> On 2018年03月01日 17:10, Jesper Dangaard Brouer wrote:
>>> On Thu,  1 Mar 2018 11:19:03 +0800
>>> Jason Wang <jasowang@redhat.com> wrote:
>>>   
>>>> This series tries to re-enable XDP_REDIRECT for mergeable buffer which
>>>> was removed since commit 7324f5399b06 ("virtio_net: disable
>>>> XDP_REDIRECT in receive_mergeable() case"). Main concerns are:
>>>>
>>>> - not enough tailroom was reserved which breaks cpumap
>>> To address this at a more fundamental level, I would suggest that we/you
>>> instead extend XDP to know it's buffers "frame" size/end.  (The
>>> assumption use to be, xdp_buff->data_hard_start + PAGE_SIZE, but
>>> ixgbe+virtio_net broke that assumption).
>>>
>>> It should actually be fairly easy to implement:
>>>    * Simply extend xdp_buff with a "data_hard_end" pointer.
>> Right, and then cpumap can warn and drop packets with insufficient
>> tailroom.
>>
>> But it should be a patch on top of this I think.
> Hmmm, not really.  If we/you instead fix the issue of XDP doesn't know
> the end/size of the frame, then we don't need this mixed XDP
> generic/native code path mixing.

I know this but I'm still a little bit confused. According to the commit 
log of 7324f5399b06 ("virtio_net: disable XDP_REDIRECT in 
receive_mergeable() case"), you said:

"""
     The longer explaination is that receive_mergeable() tries to
     work-around and satisfy these XDP requiresments e.g. by having a
     function xdp_linearize_page() that allocates and memcpy RX buffers
     around (in case packet is scattered across multiple rx buffers).  This
     does currently satisfy XDP_PASS, XDP_DROP and XDP_TX (but only because
     we have not implemented bpf_xdp_adjust_tail yet).
"""

So I consider the tailroom is a must for the (future) tail adjustment.

>
> You could re-enable native redirect, and push the responsibility to
> cpumap for detecting this too-small frame "missing tailroom" (and avoid
> crashing...). (If we really want to support this, cpumap could fallback
> to dev_alloc_skb, and handle it gracefully).
>

Right but it will be slower than build_skb().

Thanks

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply

* Re: [PATCH net-next 0/2] virtio-net: re enable XDP_REDIRECT for mergeable buffer
From: Jesper Dangaard Brouer @ 2018-03-01 10:35 UTC (permalink / raw)
  To: Jason Wang
  Cc: mst, netdev, john.fastabend, linux-kernel, virtualization, brouer,
	Alexei Starovoitov
In-Reply-To: <b0f55100-183d-a507-8510-3c67f6106031@redhat.com>

On Thu, 1 Mar 2018 17:23:37 +0800
Jason Wang <jasowang@redhat.com> wrote:

> On 2018年03月01日 17:10, Jesper Dangaard Brouer wrote:
> > On Thu,  1 Mar 2018 11:19:03 +0800
> > Jason Wang <jasowang@redhat.com> wrote:
> >  
> >> This series tries to re-enable XDP_REDIRECT for mergeable buffer which
> >> was removed since commit 7324f5399b06 ("virtio_net: disable
> >> XDP_REDIRECT in receive_mergeable() case"). Main concerns are:
> >>
> >> - not enough tailroom was reserved which breaks cpumap  
>
> > To address this at a more fundamental level, I would suggest that we/you
> > instead extend XDP to know it's buffers "frame" size/end.  (The
> > assumption use to be, xdp_buff->data_hard_start + PAGE_SIZE, but
> > ixgbe+virtio_net broke that assumption).
> >
> > It should actually be fairly easy to implement:
> >   * Simply extend xdp_buff with a "data_hard_end" pointer.  
> 
> Right, and then cpumap can warn and drop packets with insufficient 
> tailroom.
>
> But it should be a patch on top of this I think.

Hmmm, not really.  If we/you instead fix the issue of XDP doesn't know
the end/size of the frame, then we don't need this mixed XDP
generic/native code path mixing.

You could re-enable native redirect, and push the responsibility to
cpumap for detecting this too-small frame "missing tailroom" (and avoid
crashing...). (If we really want to support this, cpumap could fallback
to dev_alloc_skb, and handle it gracefully).

-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply

* Re: [PATCH v3 4/6] x86: Consolidate PCI_MMCONFIG configs
From: Andy Shevchenko @ 2018-03-01 10:31 UTC (permalink / raw)
  To: Jan Kiszka
  Cc: jailhouse-dev, linux-pci, x86, Linux Kernel Mailing List,
	virtualization, Ingo Molnar, H . Peter Anvin, Bjorn Helgaas,
	Thomas Gleixner
In-Reply-To: <8527c92b2e2920b8cd9096ddcbbb6d0f4004eb69.1519882849.git.jan.kiszka@siemens.com>

On Thu, Mar 1, 2018 at 7:40 AM, Jan Kiszka <jan.kiszka@siemens.com> wrote:
> From: Jan Kiszka <jan.kiszka@siemens.com>
>
> Since e279b6c1d329 ("x86: start unification of arch/x86/Kconfig.*"), we
> have two PCI_MMCONFIG entries, one from the original i386 and another
> from x86_64. This consolidates both entries into a single one.
>
> The logic for x86_32, where this option was not under user control,
> remains identical. On x86_64, PCI_MMCONFIG becomes additionally
> configurable for SFI systems even if ACPI was disabled. This just
> simplifies the logic without restricting the configurability in any way.
>

FWIW,
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>

> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
> ---
>  arch/x86/Kconfig | 9 +++------
>  1 file changed, 3 insertions(+), 6 deletions(-)
>
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index eb7f43f23521..aef9d67ac186 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -2641,8 +2641,9 @@ config PCI_DIRECT
>         depends on PCI && (X86_64 || (PCI_GODIRECT || PCI_GOANY || PCI_GOOLPC || PCI_GOMMCONFIG))
>
>  config PCI_MMCONFIG
> -       def_bool y
> -       depends on X86_32 && PCI && (ACPI || SFI) && (PCI_GOMMCONFIG || PCI_GOANY)
> +       bool "Support mmconfig PCI config space access" if X86_64
> +       default y
> +       depends on PCI && (ACPI || SFI) && (X86_64 || (PCI_GOANY || PCI_GOMMCONFIG))
>
>  config PCI_OLPC
>         def_bool y
> @@ -2657,10 +2658,6 @@ config PCI_DOMAINS
>         def_bool y
>         depends on PCI
>
> -config PCI_MMCONFIG
> -       bool "Support mmconfig PCI config space access"
> -       depends on X86_64 && PCI && ACPI
> -
>  config PCI_CNB20LE_QUIRK
>         bool "Read CNB20LE Host Bridge Windows" if EXPERT
>         depends on PCI
> --
> 2.13.6
>



-- 
With Best Regards,
Andy Shevchenko

^ permalink raw reply

* Re: [PATCH v3 3/6] x86/jailhouse: Enable PCI mmconfig access in inmates
From: Andy Shevchenko @ 2018-03-01 10:31 UTC (permalink / raw)
  To: Jan Kiszka
  Cc: jailhouse-dev, linux-pci, x86, Linux Kernel Mailing List,
	virtualization, Ingo Molnar, H . Peter Anvin, Bjorn Helgaas,
	Thomas Gleixner
In-Reply-To: <188edc11aa5fa11d4915097831a2a4a153f6ed83.1519882849.git.jan.kiszka@siemens.com>

On Thu, Mar 1, 2018 at 7:40 AM, Jan Kiszka <jan.kiszka@siemens.com> wrote:

> Use the PCI mmconfig base address exported by jailhouse in boot
> parameters in order to access the memory mapped PCI configuration space.


> --- a/arch/x86/kernel/jailhouse.c
> +++ b/arch/x86/kernel/jailhouse.c
> @@ -124,6 +124,13 @@ static int __init jailhouse_pci_arch_init(void)
>         if (pcibios_last_bus < 0)
>                 pcibios_last_bus = 0xff;
>
> +#ifdef CONFIG_PCI_MMCONFIG
> +       if (setup_data.pci_mmconfig_base) {

> +               pci_mmconfig_add(0, 0, 0xff, setup_data.pci_mmconfig_base);

Hmm... Shouldn't be pcibios_last_bus instead of 0xff?

> +               pci_mmcfg_arch_init();
> +       }
> +#endif

-- 
With Best Regards,
Andy Shevchenko

^ permalink raw reply

* Re: [PATCH v3 2/6] PCI: Scan all functions when running over Jailhouse
From: Andy Shevchenko @ 2018-03-01 10:28 UTC (permalink / raw)
  To: Jan Kiszka
  Cc: jailhouse-dev, Benedikt Spranger, linux-pci, x86,
	Linux Kernel Mailing List, virtualization, Ingo Molnar,
	H . Peter Anvin, Bjorn Helgaas, Thomas Gleixner
In-Reply-To: <2b4c650d27e59c81903fa4c2526bf18a2cf2d243.1519882849.git.jan.kiszka@siemens.com>

On Thu, Mar 1, 2018 at 7:40 AM, Jan Kiszka <jan.kiszka@siemens.com> wrote:
> From: Jan Kiszka <jan.kiszka@siemens.com>
>
> Per PCIe r4.0, sec 7.5.1.1.9, multi-function devices are required to
> have a function 0.  Therefore, Linux scans for devices at function 0
> (devfn 0/8/16/...) and only scans for other functions if function 0
> has its Multi-Function Device bit set or ARI or SR-IOV indicate
> there are more functions.
>
> The Jailhouse hypervisor may pass individual functions of a
> multi-function device to a guest without passing function 0, which
> means a Linux guest won't find them.
>
> Change Linux PCI probing so it scans all function numbers when
> running as a guest over Jailhouse.
>
> This is technically prohibited by the spec, so it is possible that
> PCI devices without the Multi-Function Device bit set may have
> unexpected behavior in response to this probe.
>
> Derived from original patch by Benedikt Spranger.
>

FWIW,
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>

One nit below.

> CC: Benedikt Spranger <b.spranger@linutronix.de>
> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
> Acked-by: Bjorn Helgaas <bhelgaas@google.com>
> ---
>  arch/x86/pci/legacy.c |  4 +++-
>  drivers/pci/probe.c   | 22 +++++++++++++++++++---
>  2 files changed, 22 insertions(+), 4 deletions(-)
>
> diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c
> index 1cb01abcb1be..dfbe6ac38830 100644
> --- a/arch/x86/pci/legacy.c
> +++ b/arch/x86/pci/legacy.c
> @@ -4,6 +4,7 @@
>  #include <linux/init.h>
>  #include <linux/export.h>
>  #include <linux/pci.h>
> +#include <asm/jailhouse_para.h>
>  #include <asm/pci_x86.h>
>
>  /*
> @@ -34,13 +35,14 @@ int __init pci_legacy_init(void)
>
>  void pcibios_scan_specific_bus(int busn)
>  {
> +       int stride = jailhouse_paravirt() ? 1 : 8;
>         int devfn;
>         u32 l;
>
>         if (pci_find_bus(0, busn))
>                 return;
>
> -       for (devfn = 0; devfn < 256; devfn += 8) {
> +       for (devfn = 0; devfn < 256; devfn += stride) {
>                 if (!raw_pci_read(0, busn, devfn, PCI_VENDOR_ID, 2, &l) &&
>                     l != 0x0000 && l != 0xffff) {
>                         DBG("Found device at %02x:%02x [%04x]\n", busn, devfn, l);
> diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
> index ef5377438a1e..da22d6d216f8 100644
> --- a/drivers/pci/probe.c
> +++ b/drivers/pci/probe.c
> @@ -16,6 +16,7 @@
>  #include <linux/pci-aspm.h>
>  #include <linux/aer.h>
>  #include <linux/acpi.h>
> +#include <linux/hypervisor.h>
>  #include <linux/irqdomain.h>
>  #include <linux/pm_runtime.h>
>  #include "pci.h"
> @@ -2518,14 +2519,29 @@ static unsigned int pci_scan_child_bus_extend(struct pci_bus *bus,
>  {
>         unsigned int used_buses, normal_bridges = 0, hotplug_bridges = 0;
>         unsigned int start = bus->busn_res.start;
> -       unsigned int devfn, cmax, max = start;
> +       unsigned int devfn, fn, cmax, max = start;
>         struct pci_dev *dev;
> +       int nr_devs;
>
>         dev_dbg(&bus->dev, "scanning bus\n");
>
>         /* Go find them, Rover! */
> -       for (devfn = 0; devfn < 0x100; devfn += 8)
> -               pci_scan_slot(bus, devfn);

> +       for (devfn = 0; devfn < 0x100; devfn += 8) {

Since you touch this line perhaps make sense to unify with above, i.e.
0x100 -> 256 ?

> +               nr_devs = pci_scan_slot(bus, devfn);
> +
> +               /*
> +                * The Jailhouse hypervisor may pass individual functions of a
> +                * multi-function device to a guest without passing function 0.
> +                * Look for them as well.
> +                */
> +               if (jailhouse_paravirt() && nr_devs == 0) {
> +                       for (fn = 1; fn < 8; fn++) {
> +                               dev = pci_scan_single_device(bus, devfn + fn);
> +                               if (dev)
> +                                       dev->multifunction = 1;
> +                       }
> +               }
> +       }
>
>         /* Reserve buses for SR-IOV capability */
>         used_buses = pci_iov_bus_range(bus);
> --
> 2.13.6
>



-- 
With Best Regards,
Andy Shevchenko

^ permalink raw reply

* Re: [PATCH net-next 2/2] virtio-net: simplify XDP handling in small buffer
From: Jason Wang @ 2018-03-01  9:24 UTC (permalink / raw)
  To: Jesper Dangaard Brouer
  Cc: netdev, virtualization, john.fastabend, linux-kernel, mst
In-Reply-To: <20180301101553.55c4c6bd@redhat.com>



On 2018年03月01日 17:15, Jesper Dangaard Brouer wrote:
> On Thu, 1 Mar 2018 16:49:24 +0800
> Jason Wang <jasowang@redhat.com> wrote:
>
>>> 2. This can easily cause out-of-order packets.
>> I may miss something, but it looks to me packets were still delivered
>> in order? Or you mean the packets that was dropped by cpumap?
> No. Packets can now travel two code paths to the egress device. (1) XDP
> native via ndp_xdp_xmit via direct delivery into a lockfree/dedicated
> TX queue, (2) via normal network stack which can involve being queue in
> a qdisc.  Do you see the possibility of the reorder now?
>

I see, thanks. But consider this could only happen for first few 
packets, not sure it was worth to worry about it.

Thanks
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply

* Re: [PATCH net-next 0/2] virtio-net: re enable XDP_REDIRECT for mergeable buffer
From: Jason Wang @ 2018-03-01  9:23 UTC (permalink / raw)
  To: Jesper Dangaard Brouer
  Cc: netdev, virtualization, john.fastabend, linux-kernel, mst
In-Reply-To: <20180301101038.4253424d@redhat.com>



On 2018年03月01日 17:10, Jesper Dangaard Brouer wrote:
> On Thu,  1 Mar 2018 11:19:03 +0800
> Jason Wang <jasowang@redhat.com> wrote:
>
>> This series tries to re-enable XDP_REDIRECT for mergeable buffer which
>> was removed since commit 7324f5399b06 ("virtio_net: disable
>> XDP_REDIRECT in receive_mergeable() case"). Main concerns are:
>>
>> - not enough tailroom was reserved which breaks cpumap
> To address this at a more fundamental level, I would suggest that we/you
> instead extend XDP to know it's buffers "frame" size/end.  (The
> assumption use to be, xdp_buff->data_hard_start + PAGE_SIZE, but
> ixgbe+virtio_net broke that assumption).
>
> It should actually be fairly easy to implement:
>   * Simply extend xdp_buff with a "data_hard_end" pointer.

Right, and then cpumap can warn and drop packets with insufficient 
tailroom. But it should be a patch on top of this I think.

Thanks

>
> Now cpumap is more safe... instead of crashing, it can now know/see when
> it is safe to create an SKB using build_skb (could fallback to
> dev_alloc_skb).
>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply

* Re: [PATCH net-next 2/2] virtio-net: simplify XDP handling in small buffer
From: Jesper Dangaard Brouer @ 2018-03-01  9:15 UTC (permalink / raw)
  To: Jason Wang
  Cc: mst, netdev, john.fastabend, linux-kernel, virtualization, brouer
In-Reply-To: <dbbfd899-fb7e-c039-5c9a-0f3b6ce34b78@redhat.com>

On Thu, 1 Mar 2018 16:49:24 +0800
Jason Wang <jasowang@redhat.com> wrote:

> > 2. This can easily cause out-of-order packets.  
> 
> I may miss something, but it looks to me packets were still delivered
> in order? Or you mean the packets that was dropped by cpumap?

No. Packets can now travel two code paths to the egress device. (1) XDP
native via ndp_xdp_xmit via direct delivery into a lockfree/dedicated
TX queue, (2) via normal network stack which can involve being queue in
a qdisc.  Do you see the possibility of the reorder now?

-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer

^ permalink raw reply

* Re: [PATCH net-next 1/2] virtio-net: re enable XDP_REDIRECT for mergeable buffer
From: Jason Wang @ 2018-03-01  9:11 UTC (permalink / raw)
  To: Jesper Dangaard Brouer
  Cc: netdev, virtualization, john.fastabend, linux-kernel, mst
In-Reply-To: <20180301094129.29fb6ce6@redhat.com>



On 2018年03月01日 16:41, Jesper Dangaard Brouer wrote:
> On Thu,  1 Mar 2018 11:19:04 +0800 Jason Wang <jasowang@redhat.com> wrote:
>
>> XDP_REDIRECT support for mergeable buffer was removed since commit
>> 7324f5399b06 ("virtio_net: disable XDP_REDIRECT in receive_mergeable()
>> case"). This is because we don't reserve enough tailroom for struct
>> skb_shared_info which breaks XDP assumption. Other complaints are, the
>> complex linearize logic and EWMA estimation may increase the
>> possibility of linearizing.
> This patch also have the intermixing issues, I mentioned for patch 2/2.
>
> On Thu, 1 Mar 2018 09:02:06 +0100
> Jesper Dangaard Brouer <brouer@redhat.com> wrote:
>
>> 1. XDP generic is not feature complete, e.g. cpumap will drop these
>>     packets. It might not be possible to implement some features, think
>>     of (AF_XDP) zero-copy.
>>
>> 2. This can easily cause out-of-order packets.
>>
>> 3. It makes it harder to troubleshoot, when diagnosing issues
>>     around #1, we have a hard time determining what path an XDP packet
>>     took (the xdp tracepoints doesn't know).
>
> It is slightly better, as it is consistent in calling XDP-generic in
> the XDP_REDIRECT action, which an action under heavy development, here
> we want the freedom to develop in different code tempi.

Looks not, this patch did:

+               case XDP_REDIRECT:
+                       err = xdp_do_redirect(dev, &xdp, xdp_prog);
+                       if (err)
                                 goto err_xdp;
+                       *xdp_xmit = true;
                         rcu_read_unlock();
                         goto xdp_xmit;

So native version is used for most cases. XDP-generic will be only used 
for the packets of insufficient headroom which should be rare (just 
first few packets).

>    And some
> features might never be available in XDP-generic. Thus, when a feature
> is missing/broken it will be consistent for the user.

This sounds bad.

>
> The remaining question is how will a user know that XDP "mode" she is
> using?

So for this patch, skb mode will only happen for the first several 
packets. So mostly native mode. And it's even not clear to me if user 
need to know about this since the performance should be the same.

>    The user clearly loaded an XDP-native program, and expect the
> associated performance, but XDP_REDIRECT will be using the slow
> XDP-generic code path...

As I replied above, XDP_REDIRECT use native mode except for the first 
few packets.

>
>
>> Signed-off-by: Jason Wang <jasowang@redhat.com>
>> ---
>>   drivers/net/virtio_net.c | 107 +++++++++++++++++++++++++++++------------------
>>   1 file changed, 67 insertions(+), 40 deletions(-)
>>
>> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
>> index 9bb9e56..81190ba 100644
>> --- a/drivers/net/virtio_net.c
>> +++ b/drivers/net/virtio_net.c
> [...]
>> @@ -689,31 +709,30 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
>>   	struct bpf_prog *xdp_prog;
>>   	unsigned int truesize;
>>   	unsigned int headroom = mergeable_ctx_to_headroom(ctx);
>> -	bool sent;
>> +	bool sent, skb_xdp = false;
>> +	int err;
>>   
>>   	head_skb = NULL;
>>   
>>   	rcu_read_lock();
>>   	xdp_prog = rcu_dereference(rq->xdp_prog);
>>   	if (xdp_prog) {
>> -		struct page *xdp_page;
>>   		struct xdp_buff xdp;
>>   		void *data;
>>   		u32 act;
>>   
>> -		/* This happens when rx buffer size is underestimated */
>> +		/* This happens when rx buffer size is underestimated
>> +		 * or headroom is not enough because of the buffer
>> +		 * was refilled before XDP is set. In both cases,
>> +		 * for simplicity, we will offload them to generic
>> +		 * XDP routine. This should only happen for the first
>> +		 * several packets, so we don't care much about its
>> +		 * performance.
>> +		 */
>>   		if (unlikely(num_buf > 1 ||
>>   			     headroom < virtnet_get_headroom(vi))) {
> I think you also need to check the tailroom here? (AFAIK this is hidden
> in the len_to_ctx as the "truesize").

add_recvbuf_mergeable() did this:

     unsigned int headroom = virtnet_get_headroom(vi);
     unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0;

So if headroom is not zero, it implies tailroom here.

Thanks

>
>> -			/* linearize data for XDP */
>> -			xdp_page = xdp_linearize_page(rq, &num_buf,
>> -						      page, offset,
>> -						      VIRTIO_XDP_HEADROOM,
>> -						      &len);
>> -			if (!xdp_page)
>> -				goto err_xdp;
>> -			offset = VIRTIO_XDP_HEADROOM;
>> -		} else {
>> -			xdp_page = page;
>> +			skb_xdp = true;
>> +			goto skb_xdp;
>>   		}
>>   
>>   		/* Transient failure which in theory could occur if
>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply

* Re: [PATCH net-next 0/2] virtio-net: re enable XDP_REDIRECT for mergeable buffer
From: Jesper Dangaard Brouer @ 2018-03-01  9:10 UTC (permalink / raw)
  To: Jason Wang
  Cc: mst, netdev, john.fastabend, linux-kernel, virtualization, brouer
In-Reply-To: <1519874345-10235-1-git-send-email-jasowang@redhat.com>

On Thu,  1 Mar 2018 11:19:03 +0800
Jason Wang <jasowang@redhat.com> wrote:

> This series tries to re-enable XDP_REDIRECT for mergeable buffer which
> was removed since commit 7324f5399b06 ("virtio_net: disable
> XDP_REDIRECT in receive_mergeable() case"). Main concerns are:
> 
> - not enough tailroom was reserved which breaks cpumap

To address this at a more fundamental level, I would suggest that we/you
instead extend XDP to know it's buffers "frame" size/end.  (The
assumption use to be, xdp_buff->data_hard_start + PAGE_SIZE, but
ixgbe+virtio_net broke that assumption).

It should actually be fairly easy to implement:
 * Simply extend xdp_buff with a "data_hard_end" pointer.

Now cpumap is more safe... instead of crashing, it can now know/see when
it is safe to create an SKB using build_skb (could fallback to
dev_alloc_skb).

-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer

^ permalink raw reply

* Re: [PATCH net-next 2/2] virtio-net: simplify XDP handling in small buffer
From: Jason Wang @ 2018-03-01  8:49 UTC (permalink / raw)
  To: Jesper Dangaard Brouer
  Cc: netdev, virtualization, john.fastabend, linux-kernel, mst
In-Reply-To: <20180301090206.04e13a71@redhat.com>



On 2018年03月01日 16:02, Jesper Dangaard Brouer wrote:
> On Thu,  1 Mar 2018 11:19:05 +0800 Jason Wang <jasowang@redhat.com> wrote:
>
>> We used to do data copy through xdp_linearize_page() for the buffer
>> without sufficient headroom, it brings extra complexity without
>> helping for the performance. So this patch remove it and switch to use
>> generic XDP routine to handle this case.
> I don't like where this is going.  I don't like intermixing the native
> XDP and generic XDP in this way, for several reasons:
>
> 1. XDP generic is not feature complete, e.g. cpumap will drop these
>     packets.

Well, then we'd better fix it, otherwise it would be even worse than not 
having it. For cpumap, it can be done through queuing skb in the pointer 
ring with some encoding/decoding.

>   It might not be possible to implement some features, think
>     of (AF_XDP) zero-copy.

Yes, but can AF_XDP support header adjustment? Consider the assumption 
of zerocopy, I was considering maybe we need a way to tell AF_XDP of 
whether or not zerocopy is supported by the driver.

>
> 2. This can easily cause out-of-order packets.

I may miss something, but it looks to me packets were still delivered in 
order? Or you mean the packets that was dropped by cpumap?

>
> 3. It makes it harder to troubleshoot, when diagnosing issues
>     around #1, we have a hard time determining what path an XDP packet
>     took (the xdp tracepoints doesn't know).

Looks like we can address this by adding tracepoints in generic code path.

>
>
> [...]
>> @@ -590,25 +526,14 @@ static struct sk_buff *receive_small(struct net_device *dev,
>>   		if (unlikely(hdr->hdr.gso_type))
>>   			goto err_xdp;
>>   
>> +		/* This happnes when headroom is not enough because
>> +		 * the buffer was refilled before XDP is set. This
>> +		 * only happen for several packets, for simplicity,
>> +		 * offload them to generic XDP routine.
> In my practical tests, I also saw that sometime my ping packets were
> traveling this code-path, even after a long time when XDP were attached.

I don't see this, probably a bug somewhere. I would like to reproduce 
and see, how do you do the test? If there's just very few packets were 
received after XDP, we may still use the buffer that was refilled before 
XDP, that's by design.

>
> This worries me a bit, for troubleshooting purposes... as this can give
> a strange user experience given point #1.



I can stick to the xdp_linearize_page() logic if you don't like generic 
XDP stuffs, but it may not work for AF_XDP neither. Consider AF_XDP has 
still some distance of being merged, I'm not sure we could even care 
about it.

Thanks

>
>
>> +		 */
>>   		if (unlikely(xdp_headroom < virtnet_get_headroom(vi))) {
>> -			int offset = buf - page_address(page) + header_offset;
>> -			unsigned int tlen = len + vi->hdr_len;
>> -			u16 num_buf = 1;
>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply

* Re: [PATCH net-next 1/2] virtio-net: re enable XDP_REDIRECT for mergeable buffer
From: Jesper Dangaard Brouer @ 2018-03-01  8:41 UTC (permalink / raw)
  To: Jason Wang
  Cc: mst, netdev, john.fastabend, linux-kernel, virtualization, brouer
In-Reply-To: <1519874345-10235-2-git-send-email-jasowang@redhat.com>


On Thu,  1 Mar 2018 11:19:04 +0800 Jason Wang <jasowang@redhat.com> wrote:

> XDP_REDIRECT support for mergeable buffer was removed since commit
> 7324f5399b06 ("virtio_net: disable XDP_REDIRECT in receive_mergeable()
> case"). This is because we don't reserve enough tailroom for struct
> skb_shared_info which breaks XDP assumption. Other complaints are, the
> complex linearize logic and EWMA estimation may increase the
> possibility of linearizing.

This patch also have the intermixing issues, I mentioned for patch 2/2.

On Thu, 1 Mar 2018 09:02:06 +0100
Jesper Dangaard Brouer <brouer@redhat.com> wrote:

> 1. XDP generic is not feature complete, e.g. cpumap will drop these
>    packets. It might not be possible to implement some features, think
>    of (AF_XDP) zero-copy.
> 
> 2. This can easily cause out-of-order packets.
> 
> 3. It makes it harder to troubleshoot, when diagnosing issues
>    around #1, we have a hard time determining what path an XDP packet
>    took (the xdp tracepoints doesn't know).


It is slightly better, as it is consistent in calling XDP-generic in
the XDP_REDIRECT action, which an action under heavy development, here
we want the freedom to develop in different code tempi.  And some
features might never be available in XDP-generic. Thus, when a feature
is missing/broken it will be consistent for the user.

The remaining question is how will a user know that XDP "mode" she is
using?  The user clearly loaded an XDP-native program, and expect the
associated performance, but XDP_REDIRECT will be using the slow
XDP-generic code path...



> Signed-off-by: Jason Wang <jasowang@redhat.com>
> ---
>  drivers/net/virtio_net.c | 107 +++++++++++++++++++++++++++++------------------
>  1 file changed, 67 insertions(+), 40 deletions(-)
> 
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 9bb9e56..81190ba 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
[...]
> @@ -689,31 +709,30 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
>  	struct bpf_prog *xdp_prog;
>  	unsigned int truesize;
>  	unsigned int headroom = mergeable_ctx_to_headroom(ctx);
> -	bool sent;
> +	bool sent, skb_xdp = false;
> +	int err;
>  
>  	head_skb = NULL;
>  
>  	rcu_read_lock();
>  	xdp_prog = rcu_dereference(rq->xdp_prog);
>  	if (xdp_prog) {
> -		struct page *xdp_page;
>  		struct xdp_buff xdp;
>  		void *data;
>  		u32 act;
>  
> -		/* This happens when rx buffer size is underestimated */
> +		/* This happens when rx buffer size is underestimated
> +		 * or headroom is not enough because of the buffer
> +		 * was refilled before XDP is set. In both cases,
> +		 * for simplicity, we will offload them to generic
> +		 * XDP routine. This should only happen for the first
> +		 * several packets, so we don't care much about its
> +		 * performance.
> +		 */
>  		if (unlikely(num_buf > 1 ||
>  			     headroom < virtnet_get_headroom(vi))) {

I think you also need to check the tailroom here? (AFAIK this is hidden
in the len_to_ctx as the "truesize").

> -			/* linearize data for XDP */
> -			xdp_page = xdp_linearize_page(rq, &num_buf,
> -						      page, offset,
> -						      VIRTIO_XDP_HEADROOM,
> -						      &len);
> -			if (!xdp_page)
> -				goto err_xdp;
> -			offset = VIRTIO_XDP_HEADROOM;
> -		} else {
> -			xdp_page = page;
> +			skb_xdp = true;
> +			goto skb_xdp;
>  		}
>  
>  		/* Transient failure which in theory could occur if


-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer

^ permalink raw reply

* Re: [PATCH net-next 2/2] virtio-net: simplify XDP handling in small buffer
From: Jesper Dangaard Brouer @ 2018-03-01  8:02 UTC (permalink / raw)
  To: Jason Wang
  Cc: mst, netdev, john.fastabend, linux-kernel, virtualization, brouer
In-Reply-To: <1519874345-10235-3-git-send-email-jasowang@redhat.com>


On Thu,  1 Mar 2018 11:19:05 +0800 Jason Wang <jasowang@redhat.com> wrote:

> We used to do data copy through xdp_linearize_page() for the buffer
> without sufficient headroom, it brings extra complexity without
> helping for the performance. So this patch remove it and switch to use
> generic XDP routine to handle this case.

I don't like where this is going.  I don't like intermixing the native
XDP and generic XDP in this way, for several reasons:

1. XDP generic is not feature complete, e.g. cpumap will drop these
   packets. It might not be possible to implement some features, think
   of (AF_XDP) zero-copy.

2. This can easily cause out-of-order packets.

3. It makes it harder to troubleshoot, when diagnosing issues
   around #1, we have a hard time determining what path an XDP packet
   took (the xdp tracepoints doesn't know).


[...]
> @@ -590,25 +526,14 @@ static struct sk_buff *receive_small(struct net_device *dev,
>  		if (unlikely(hdr->hdr.gso_type))
>  			goto err_xdp;
>  
> +		/* This happnes when headroom is not enough because
> +		 * the buffer was refilled before XDP is set. This
> +		 * only happen for several packets, for simplicity,
> +		 * offload them to generic XDP routine.

In my practical tests, I also saw that sometime my ping packets were
traveling this code-path, even after a long time when XDP were attached.

This worries me a bit, for troubleshooting purposes... as this can give
a strange user experience given point #1.


> +		 */
>  		if (unlikely(xdp_headroom < virtnet_get_headroom(vi))) {
> -			int offset = buf - page_address(page) + header_offset;
> -			unsigned int tlen = len + vi->hdr_len;
> -			u16 num_buf = 1;


-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer

^ permalink raw reply

* Re: [PATCH v3 1/6] jailhouse: Provide detection for non-x86 systems
From: Juergen Gross @ 2018-03-01  6:17 UTC (permalink / raw)
  To: Jan Kiszka, Thomas Gleixner, Ingo Molnar, H . Peter Anvin,
	Bjorn Helgaas
  Cc: jailhouse-dev, Mark Rutland, linux-pci, x86,
	Linux Kernel Mailing List, virtualization, Andy Shevchenko,
	Rob Herring
In-Reply-To: <558f5cbae432ae029e2d8ee9b0ef44d837318625.1519882849.git.jan.kiszka@siemens.com>

On 01/03/18 06:40, Jan Kiszka wrote:
> From: Jan Kiszka <jan.kiszka@siemens.com>
> 
> Implement jailhouse_paravirt() via device tree probing on architectures
> != x86. Will be used by the PCI core.
> 
> CC: Rob Herring <robh+dt@kernel.org>
> CC: Mark Rutland <mark.rutland@arm.com>
> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
> ---
>  Documentation/devicetree/bindings/jailhouse.txt |  8 ++++++++
>  arch/x86/include/asm/jailhouse_para.h           |  2 +-
>  include/linux/hypervisor.h                      | 17 +++++++++++++++--

I'd appreciate if you'd Cc: the maintainers of the touched files...

Reviewed-by: Juergen Gross <jgross@suse.com>


Juergen

^ permalink raw reply

* [PATCH v3 6/6] MAINTAINERS: Add entry for Jailhouse
From: Jan Kiszka @ 2018-03-01  5:40 UTC (permalink / raw)
  To: Thomas Gleixner, Ingo Molnar, H . Peter Anvin, Bjorn Helgaas
  Cc: jailhouse-dev, linux-pci, x86, Linux Kernel Mailing List,
	virtualization, Andy Shevchenko
In-Reply-To: <cover.1519882849.git.jan.kiszka@siemens.com>

From: Jan Kiszka <jan.kiszka@siemens.com>

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
---
 MAINTAINERS | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 93a12af4f180..4b889f282c77 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7521,6 +7521,13 @@ Q:	http://patchwork.linuxtv.org/project/linux-media/list/
 S:	Maintained
 F:	drivers/media/dvb-frontends/ix2505v*
 
+JAILHOUSE HYPERVISOR INTERFACE
+M:	Jan Kiszka <jan.kiszka@siemens.com>
+L:	jailhouse-dev@googlegroups.com
+S:	Maintained
+F:	arch/x86/kernel/jailhouse.c
+F:	arch/x86/include/asm/jailhouse_para.h
+
 JC42.4 TEMPERATURE SENSOR DRIVER
 M:	Guenter Roeck <linux@roeck-us.net>
 L:	linux-hwmon@vger.kernel.org
-- 
2.13.6

^ permalink raw reply related

* [PATCH v3 5/6] x86/jailhouse: Allow to use PCI_MMCONFIG without ACPI
From: Jan Kiszka @ 2018-03-01  5:40 UTC (permalink / raw)
  To: Thomas Gleixner, Ingo Molnar, H . Peter Anvin, Bjorn Helgaas
  Cc: jailhouse-dev, linux-pci, x86, Linux Kernel Mailing List,
	virtualization, Andy Shevchenko
In-Reply-To: <cover.1519882849.git.jan.kiszka@siemens.com>

From: Jan Kiszka <jan.kiszka@siemens.com>

Jailhouse does not use ACPI, but it does support MMCONFIG. Make sure the
latter can be built without having to enable ACPI as well. Primarily, we
need to make the AMD mmconf-fam10h_64 depend upon MMCONFIG and ACPI,
instead of just the former.

Saves some bytes in the Jailhouse non-root kernel.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
---
 arch/x86/Kconfig          | 6 +++++-
 arch/x86/kernel/Makefile  | 2 +-
 arch/x86/kernel/cpu/amd.c | 2 +-
 3 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index aef9d67ac186..b8e73e748acc 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2643,7 +2643,7 @@ config PCI_DIRECT
 config PCI_MMCONFIG
 	bool "Support mmconfig PCI config space access" if X86_64
 	default y
-	depends on PCI && (ACPI || SFI) && (X86_64 || (PCI_GOANY || PCI_GOMMCONFIG))
+	depends on PCI && (ACPI || SFI || JAILHOUSE_GUEST) && (X86_64 || (PCI_GOANY || PCI_GOMMCONFIG))
 
 config PCI_OLPC
 	def_bool y
@@ -2658,6 +2658,10 @@ config PCI_DOMAINS
 	def_bool y
 	depends on PCI
 
+config MMCONF_FAM10H
+	def_bool y
+	depends on PCI_MMCONFIG && ACPI
+
 config PCI_CNB20LE_QUIRK
 	bool "Read CNB20LE Host Bridge Windows" if EXPERT
 	depends on PCI
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 29786c87e864..73ccf80c09a2 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -146,6 +146,6 @@ ifeq ($(CONFIG_X86_64),y)
 	obj-$(CONFIG_GART_IOMMU)	+= amd_gart_64.o aperture_64.o
 	obj-$(CONFIG_CALGARY_IOMMU)	+= pci-calgary_64.o tce_64.o
 
-	obj-$(CONFIG_PCI_MMCONFIG)	+= mmconf-fam10h_64.o
+	obj-$(CONFIG_MMCONF_FAM10H)	+= mmconf-fam10h_64.o
 	obj-y				+= vsmp_64.o
 endif
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index f0e6456ca7d3..12bc0a1139da 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -716,7 +716,7 @@ static void init_amd_k8(struct cpuinfo_x86 *c)
 
 static void init_amd_gh(struct cpuinfo_x86 *c)
 {
-#ifdef CONFIG_X86_64
+#ifdef CONFIG_MMCONF_FAM10H
 	/* do this for boot cpu */
 	if (c == &boot_cpu_data)
 		check_enable_amd_mmconf_dmi();
-- 
2.13.6

^ permalink raw reply related

* [PATCH v3 4/6] x86: Consolidate PCI_MMCONFIG configs
From: Jan Kiszka @ 2018-03-01  5:40 UTC (permalink / raw)
  To: Thomas Gleixner, Ingo Molnar, H . Peter Anvin, Bjorn Helgaas
  Cc: jailhouse-dev, linux-pci, x86, Linux Kernel Mailing List,
	virtualization, Andy Shevchenko
In-Reply-To: <cover.1519882849.git.jan.kiszka@siemens.com>

From: Jan Kiszka <jan.kiszka@siemens.com>

Since e279b6c1d329 ("x86: start unification of arch/x86/Kconfig.*"), we
have two PCI_MMCONFIG entries, one from the original i386 and another
from x86_64. This consolidates both entries into a single one.

The logic for x86_32, where this option was not under user control,
remains identical. On x86_64, PCI_MMCONFIG becomes additionally
configurable for SFI systems even if ACPI was disabled. This just
simplifies the logic without restricting the configurability in any way.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
---
 arch/x86/Kconfig | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index eb7f43f23521..aef9d67ac186 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2641,8 +2641,9 @@ config PCI_DIRECT
 	depends on PCI && (X86_64 || (PCI_GODIRECT || PCI_GOANY || PCI_GOOLPC || PCI_GOMMCONFIG))
 
 config PCI_MMCONFIG
-	def_bool y
-	depends on X86_32 && PCI && (ACPI || SFI) && (PCI_GOMMCONFIG || PCI_GOANY)
+	bool "Support mmconfig PCI config space access" if X86_64
+	default y
+	depends on PCI && (ACPI || SFI) && (X86_64 || (PCI_GOANY || PCI_GOMMCONFIG))
 
 config PCI_OLPC
 	def_bool y
@@ -2657,10 +2658,6 @@ config PCI_DOMAINS
 	def_bool y
 	depends on PCI
 
-config PCI_MMCONFIG
-	bool "Support mmconfig PCI config space access"
-	depends on X86_64 && PCI && ACPI
-
 config PCI_CNB20LE_QUIRK
 	bool "Read CNB20LE Host Bridge Windows" if EXPERT
 	depends on PCI
-- 
2.13.6

^ permalink raw reply related

* [PATCH v3 3/6] x86/jailhouse: Enable PCI mmconfig access in inmates
From: Jan Kiszka @ 2018-03-01  5:40 UTC (permalink / raw)
  To: Thomas Gleixner, Ingo Molnar, H . Peter Anvin, Bjorn Helgaas
  Cc: jailhouse-dev, linux-pci, x86, Linux Kernel Mailing List,
	virtualization, Andy Shevchenko
In-Reply-To: <cover.1519882849.git.jan.kiszka@siemens.com>

From: Otavio Pontes <otavio.pontes@intel.com>

Use the PCI mmconfig base address exported by jailhouse in boot
parameters in order to access the memory mapped PCI configuration space.

Signed-off-by: Otavio Pontes <otavio.pontes@intel.com>
[Jan: rebased, fixed !CONFIG_PCI_MMCONFIG]
Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
---
 arch/x86/include/asm/pci_x86.h | 2 ++
 arch/x86/kernel/jailhouse.c    | 7 +++++++
 arch/x86/pci/mmconfig-shared.c | 4 ++--
 3 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
index eb66fa9cd0fc..959d618dbb17 100644
--- a/arch/x86/include/asm/pci_x86.h
+++ b/arch/x86/include/asm/pci_x86.h
@@ -151,6 +151,8 @@ extern int pci_mmconfig_insert(struct device *dev, u16 seg, u8 start, u8 end,
 			       phys_addr_t addr);
 extern int pci_mmconfig_delete(u16 seg, u8 start, u8 end);
 extern struct pci_mmcfg_region *pci_mmconfig_lookup(int segment, int bus);
+extern struct pci_mmcfg_region *__init pci_mmconfig_add(int segment, int start,
+							int end, u64 addr);
 
 extern struct list_head pci_mmcfg_list;
 
diff --git a/arch/x86/kernel/jailhouse.c b/arch/x86/kernel/jailhouse.c
index b68fd895235a..7fe2a73da0b3 100644
--- a/arch/x86/kernel/jailhouse.c
+++ b/arch/x86/kernel/jailhouse.c
@@ -124,6 +124,13 @@ static int __init jailhouse_pci_arch_init(void)
 	if (pcibios_last_bus < 0)
 		pcibios_last_bus = 0xff;
 
+#ifdef CONFIG_PCI_MMCONFIG
+	if (setup_data.pci_mmconfig_base) {
+		pci_mmconfig_add(0, 0, 0xff, setup_data.pci_mmconfig_base);
+		pci_mmcfg_arch_init();
+	}
+#endif
+
 	return 0;
 }
 
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index 96684d0adcf9..0e590272366b 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -94,8 +94,8 @@ static struct pci_mmcfg_region *pci_mmconfig_alloc(int segment, int start,
 	return new;
 }
 
-static struct pci_mmcfg_region *__init pci_mmconfig_add(int segment, int start,
-							int end, u64 addr)
+struct pci_mmcfg_region *__init pci_mmconfig_add(int segment, int start,
+						 int end, u64 addr)
 {
 	struct pci_mmcfg_region *new;
 
-- 
2.13.6

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox