* [PATCH 2/2] qxl: keep separate release_bo pointer
From: Gerd Hoffmann @ 2018-04-18 5:42 UTC (permalink / raw)
To: dri-devel
Cc: David Airlie, Dave Airlie, open list,
open list:DRM DRIVER FOR QXL VIRTUAL GPU
In-Reply-To: <20180418054257.15388-1-kraxel@redhat.com>
qxl expects that list_first_entry(release->bos) returns the first
element qxl added to the list. ttm_eu_reserve_buffers() may reorder
the list though.
Add a release_bo field to struct qxl_release and use that instead.
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
---
drivers/gpu/drm/qxl/qxl_drv.h | 1 +
drivers/gpu/drm/qxl/qxl_cmd.c | 6 ++----
drivers/gpu/drm/qxl/qxl_release.c | 12 ++++++------
3 files changed, 9 insertions(+), 10 deletions(-)
diff --git a/drivers/gpu/drm/qxl/qxl_drv.h b/drivers/gpu/drm/qxl/qxl_drv.h
index 00a1a66b05..864b456080 100644
--- a/drivers/gpu/drm/qxl/qxl_drv.h
+++ b/drivers/gpu/drm/qxl/qxl_drv.h
@@ -167,6 +167,7 @@ struct qxl_release {
int id;
int type;
+ struct qxl_bo *release_bo;
uint32_t release_offset;
uint32_t surface_release_id;
struct ww_acquire_ctx ticket;
diff --git a/drivers/gpu/drm/qxl/qxl_cmd.c b/drivers/gpu/drm/qxl/qxl_cmd.c
index c0fb52c6d4..01665b98c5 100644
--- a/drivers/gpu/drm/qxl/qxl_cmd.c
+++ b/drivers/gpu/drm/qxl/qxl_cmd.c
@@ -179,10 +179,9 @@ qxl_push_command_ring_release(struct qxl_device *qdev, struct qxl_release *relea
uint32_t type, bool interruptible)
{
struct qxl_command cmd;
- struct qxl_bo_list *entry = list_first_entry(&release->bos, struct qxl_bo_list, tv.head);
cmd.type = type;
- cmd.data = qxl_bo_physical_address(qdev, to_qxl_bo(entry->tv.bo), release->release_offset);
+ cmd.data = qxl_bo_physical_address(qdev, release->release_bo, release->release_offset);
return qxl_ring_push(qdev->command_ring, &cmd, interruptible);
}
@@ -192,10 +191,9 @@ qxl_push_cursor_ring_release(struct qxl_device *qdev, struct qxl_release *releas
uint32_t type, bool interruptible)
{
struct qxl_command cmd;
- struct qxl_bo_list *entry = list_first_entry(&release->bos, struct qxl_bo_list, tv.head);
cmd.type = type;
- cmd.data = qxl_bo_physical_address(qdev, to_qxl_bo(entry->tv.bo), release->release_offset);
+ cmd.data = qxl_bo_physical_address(qdev, release->release_bo, release->release_offset);
return qxl_ring_push(qdev->cursor_ring, &cmd, interruptible);
}
diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c
index a0b4244d28..7cb2145772 100644
--- a/drivers/gpu/drm/qxl/qxl_release.c
+++ b/drivers/gpu/drm/qxl/qxl_release.c
@@ -173,6 +173,7 @@ qxl_release_free_list(struct qxl_release *release)
list_del(&entry->tv.head);
kfree(entry);
}
+ release->release_bo = NULL;
}
void
@@ -296,7 +297,6 @@ int qxl_alloc_surface_release_reserved(struct qxl_device *qdev,
{
if (surface_cmd_type == QXL_SURFACE_CMD_DESTROY && create_rel) {
int idr_ret;
- struct qxl_bo_list *entry = list_first_entry(&create_rel->bos, struct qxl_bo_list, tv.head);
struct qxl_bo *bo;
union qxl_release_info *info;
@@ -304,8 +304,9 @@ int qxl_alloc_surface_release_reserved(struct qxl_device *qdev,
idr_ret = qxl_release_alloc(qdev, QXL_RELEASE_SURFACE_CMD, release);
if (idr_ret < 0)
return idr_ret;
- bo = to_qxl_bo(entry->tv.bo);
+ bo = create_rel->release_bo;
+ (*release)->release_bo = bo;
(*release)->release_offset = create_rel->release_offset + 64;
qxl_release_list_add(*release, bo);
@@ -365,6 +366,7 @@ int qxl_alloc_release_reserved(struct qxl_device *qdev, unsigned long size,
bo = qxl_bo_ref(qdev->current_release_bo[cur_idx]);
+ (*release)->release_bo = bo;
(*release)->release_offset = qdev->current_release_bo_offset[cur_idx] * release_size_per_bo[cur_idx];
qdev->current_release_bo_offset[cur_idx]++;
@@ -408,8 +410,7 @@ union qxl_release_info *qxl_release_map(struct qxl_device *qdev,
{
void *ptr;
union qxl_release_info *info;
- struct qxl_bo_list *entry = list_first_entry(&release->bos, struct qxl_bo_list, tv.head);
- struct qxl_bo *bo = to_qxl_bo(entry->tv.bo);
+ struct qxl_bo *bo = release->release_bo;
ptr = qxl_bo_kmap_atomic_page(qdev, bo, release->release_offset & PAGE_MASK);
if (!ptr)
@@ -422,8 +423,7 @@ void qxl_release_unmap(struct qxl_device *qdev,
struct qxl_release *release,
union qxl_release_info *info)
{
- struct qxl_bo_list *entry = list_first_entry(&release->bos, struct qxl_bo_list, tv.head);
- struct qxl_bo *bo = to_qxl_bo(entry->tv.bo);
+ struct qxl_bo *bo = release->release_bo;
void *ptr;
ptr = ((void *)info) - (release->release_offset & ~PAGE_MASK);
--
2.9.3
^ permalink raw reply related
* [PATCH 1/2] qxl: fix qxl_release_{map,unmap}
From: Gerd Hoffmann @ 2018-04-18 5:42 UTC (permalink / raw)
To: dri-devel
Cc: David Airlie, Dave Airlie, open list,
open list:DRM DRIVER FOR QXL VIRTUAL GPU
In-Reply-To: <20180418054257.15388-1-kraxel@redhat.com>
s/PAGE_SIZE/PAGE_MASK/
Luckily release_offset is never larger than PAGE_SIZE, so the bug has no
bad side effects and managed to stay unnoticed for years that way ...
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
---
drivers/gpu/drm/qxl/qxl_ioctl.c | 4 ++--
drivers/gpu/drm/qxl/qxl_release.c | 6 +++---
2 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/drm/qxl/qxl_ioctl.c b/drivers/gpu/drm/qxl/qxl_ioctl.c
index e238a1a2ec..6cc9f3367f 100644
--- a/drivers/gpu/drm/qxl/qxl_ioctl.c
+++ b/drivers/gpu/drm/qxl/qxl_ioctl.c
@@ -182,9 +182,9 @@ static int qxl_process_single_command(struct qxl_device *qdev,
goto out_free_reloc;
/* TODO copy slow path code from i915 */
- fb_cmd = qxl_bo_kmap_atomic_page(qdev, cmd_bo, (release->release_offset & PAGE_SIZE));
+ fb_cmd = qxl_bo_kmap_atomic_page(qdev, cmd_bo, (release->release_offset & PAGE_MASK));
unwritten = __copy_from_user_inatomic_nocache
- (fb_cmd + sizeof(union qxl_release_info) + (release->release_offset & ~PAGE_SIZE),
+ (fb_cmd + sizeof(union qxl_release_info) + (release->release_offset & ~PAGE_MASK),
u64_to_user_ptr(cmd->command), cmd->command_size);
{
diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c
index 5d84a66fed..a0b4244d28 100644
--- a/drivers/gpu/drm/qxl/qxl_release.c
+++ b/drivers/gpu/drm/qxl/qxl_release.c
@@ -411,10 +411,10 @@ union qxl_release_info *qxl_release_map(struct qxl_device *qdev,
struct qxl_bo_list *entry = list_first_entry(&release->bos, struct qxl_bo_list, tv.head);
struct qxl_bo *bo = to_qxl_bo(entry->tv.bo);
- ptr = qxl_bo_kmap_atomic_page(qdev, bo, release->release_offset & PAGE_SIZE);
+ ptr = qxl_bo_kmap_atomic_page(qdev, bo, release->release_offset & PAGE_MASK);
if (!ptr)
return NULL;
- info = ptr + (release->release_offset & ~PAGE_SIZE);
+ info = ptr + (release->release_offset & ~PAGE_MASK);
return info;
}
@@ -426,7 +426,7 @@ void qxl_release_unmap(struct qxl_device *qdev,
struct qxl_bo *bo = to_qxl_bo(entry->tv.bo);
void *ptr;
- ptr = ((void *)info) - (release->release_offset & ~PAGE_SIZE);
+ ptr = ((void *)info) - (release->release_offset & ~PAGE_MASK);
qxl_bo_kunmap_atomic_page(qdev, bo, ptr);
}
--
2.9.3
^ permalink raw reply related
* Re: [RFC] virtio: Use DMA MAP API for devices without an IOMMU
From: Anshuman Khandual @ 2018-04-18 3:17 UTC (permalink / raw)
To: Christoph Hellwig, Benjamin Herrenschmidt
Cc: robh, Michael S. Tsirkin, linux-kernel, virtualization, joe,
linuxppc-dev, elfring, david
In-Reply-To: <20180415121157.GA17726@infradead.org>
On 04/15/2018 05:41 PM, Christoph Hellwig wrote:
> On Fri, Apr 06, 2018 at 06:37:18PM +1000, Benjamin Herrenschmidt wrote:
>>>> implemented as DMA API which the virtio core understands. There is no
>>>> need for an IOMMU to be involved for the device representation in this
>>>> case IMHO.
>>>
>>> This whole virtio translation issue is a mess. I think we need to
>>> switch it to the dma API, and then quirk the legacy case to always
>>> use the direct mapping inside the dma API.
>>
>> Fine with using a dma API always on the Linux side, but we do want to
>> special case virtio still at the arch and qemu side to have a "direct
>> mapping" mode. Not sure how (special flags on PCI devices) to avoid
>> actually going through an emulated IOMMU on the qemu side, because that
>> slows things down, esp. with vhost.
>>
>> IE, we can't I think just treat it the same as a physical device.
>
> We should have treated it like a physical device from the start, but
> that device has unfortunately sailed.
>
> But yes, we'll need a per-device quirk that says 'don't attach an
> iommu'.
How about doing it per platform basis as suggested in this RFC through
an arch specific callback. Because all the virtio devices in the given
platform would require and exercise this option (to avail bounce buffer
mechanism for secure guests as an example). So the flag basically is a
platform specific one not a device specific one.
^ permalink raw reply
* Re: [RFC v2] virtio: support packed ring
From: Tiwei Bie @ 2018-04-18 1:17 UTC (permalink / raw)
To: Michael S. Tsirkin; +Cc: netdev, linux-kernel, virtualization, wexu
In-Reply-To: <20180417184810-mutt-send-email-mst@kernel.org>
On Tue, Apr 17, 2018 at 06:54:51PM +0300, Michael S. Tsirkin wrote:
> On Tue, Apr 17, 2018 at 10:56:26PM +0800, Tiwei Bie wrote:
> > On Tue, Apr 17, 2018 at 05:04:59PM +0300, Michael S. Tsirkin wrote:
> > > On Tue, Apr 17, 2018 at 08:47:16PM +0800, Tiwei Bie wrote:
> > > > On Tue, Apr 17, 2018 at 03:17:41PM +0300, Michael S. Tsirkin wrote:
> > > > > On Tue, Apr 17, 2018 at 10:51:33AM +0800, Tiwei Bie wrote:
> > > > > > On Tue, Apr 17, 2018 at 10:11:58AM +0800, Jason Wang wrote:
> > > > > > > On 2018年04月13日 15:15, Tiwei Bie wrote:
> > > > > > > > On Fri, Apr 13, 2018 at 12:30:24PM +0800, Jason Wang wrote:
> > > > > > > > > On 2018年04月01日 22:12, Tiwei Bie wrote:
> > > > > > [...]
> > > > > > > > > > +static int detach_buf_packed(struct vring_virtqueue *vq, unsigned int head,
> > > > > > > > > > + void **ctx)
> > > > > > > > > > +{
> > > > > > > > > > + struct vring_packed_desc *desc;
> > > > > > > > > > + unsigned int i, j;
> > > > > > > > > > +
> > > > > > > > > > + /* Clear data ptr. */
> > > > > > > > > > + vq->desc_state[head].data = NULL;
> > > > > > > > > > +
> > > > > > > > > > + i = head;
> > > > > > > > > > +
> > > > > > > > > > + for (j = 0; j < vq->desc_state[head].num; j++) {
> > > > > > > > > > + desc = &vq->vring_packed.desc[i];
> > > > > > > > > > + vring_unmap_one_packed(vq, desc);
> > > > > > > > > > + desc->flags = 0x0;
> > > > > > > > > Looks like this is unnecessary.
> > > > > > > > It's safer to zero it. If we don't zero it, after we
> > > > > > > > call virtqueue_detach_unused_buf_packed() which calls
> > > > > > > > this function, the desc is still available to the
> > > > > > > > device.
> > > > > > >
> > > > > > > Well detach_unused_buf_packed() should be called after device is stopped,
> > > > > > > otherwise even if you try to clear, there will still be a window that device
> > > > > > > may use it.
> > > > > >
> > > > > > This is not about whether the device has been stopped or
> > > > > > not. We don't have other places to re-initialize the ring
> > > > > > descriptors and wrap_counter. So they need to be set to
> > > > > > the correct values when doing detach_unused_buf.
> > > > > >
> > > > > > Best regards,
> > > > > > Tiwei Bie
> > > > >
> > > > > find vqs is the time to do it.
> > > >
> > > > The .find_vqs() will call .setup_vq() which will eventually
> > > > call vring_create_virtqueue(). It's a different case. Here
> > > > we're talking about re-initializing the descs and updating
> > > > the wrap counter when detaching the unused descs (In this
> > > > case, split ring just needs to decrease vring.avail->idx).
> > > >
> > > > Best regards,
> > > > Tiwei Bie
> > >
> > > There's no requirement that virtqueue_detach_unused_buf re-initializes
> > > the descs. It happens on cleanup path just before drivers delete the
> > > vqs.
> >
> > Cool, I wasn't aware of it. I saw split ring decrease
> > vring.avail->idx after detaching an unused desc, so I
> > thought detaching unused desc also needs to make sure
> > that the ring state will be updated correspondingly.
>
>
> Hmm. You are right. Seems to be out console driver being out of spec.
> Will have to look at how to fix that :(
>
> It was done here:
>
> Commit b3258ff1d6086bd2b9eeb556844a868ad7d49bc8
> Author: Amit Shah <amit.shah@redhat.com>
> Date: Wed Mar 16 19:12:10 2011 +0530
>
> virtio: Decrement avail idx on buffer detach
>
> When detaching a buffer from a vq, the avail.idx value should be
> decremented as well.
>
> This was noticed by hot-unplugging a virtio console port and then
> plugging in a new one on the same number (re-using the vqs which were
> just 'disowned'). qemu reported
>
> 'Guest moved used index from 0 to 256'
>
> when any IO was attempted on the new port.
>
> CC: stable@kernel.org
> Reported-by: juzhang <juzhang@redhat.com>
> Signed-off-by: Amit Shah <amit.shah@redhat.com>
> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
>
> The spec is quite explicit though:
> A driver MUST NOT decrement the available idx on a live virtqueue (ie. there is no way to “unexpose”
> buffers).
>
Hmm.. Got it. Thanks!
Best regards,
Tiwei Bie
>
>
>
>
> > If there is no such requirement, do you think it's OK
> > to remove below two lines:
> >
> > vq->avail_idx_shadow--;
> > vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev, vq->avail_idx_shadow);
> >
> > from virtqueue_detach_unused_buf(), and we could have
> > one generic function to handle both rings:
> >
> > void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
> > {
> > struct vring_virtqueue *vq = to_vvq(_vq);
> > unsigned int num, i;
> > void *buf;
> >
> > START_USE(vq);
> >
> > num = vq->packed ? vq->vring_packed.num : vq->vring.num;
> >
> > for (i = 0; i < num; i++) {
> > if (!vq->desc_state[i].data)
> > continue;
> > /* detach_buf clears data, so grab it now. */
> > buf = vq->desc_state[i].data;
> > detach_buf(vq, i, NULL);
> > END_USE(vq);
> > return buf;
> > }
> > /* That should have freed everything. */
> > BUG_ON(vq->vq.num_free != num);
> >
> > END_USE(vq);
> > return NULL;
> > }
> >
> > Best regards,
> > Tiwei Bie
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply
* Re: [RFC PATCH 2/3] netdev: kernel-only IFF_HIDDEN netdevice
From: Siwei Liu @ 2018-04-18 0:26 UTC (permalink / raw)
To: David Miller
Cc: Alexander Duyck, virtio-dev, Jiri Pirko, Michael S. Tsirkin,
Jakub Kicinski, Samudrala, Sridhar, virtualization, Netdev,
David Ahern, si-wei liu
In-Reply-To: <CADGSJ233AmbBrTP-A_u3-5fkr=HUJka_xcSNWxO1HWGua1J92Q@mail.gmail.com>
I ran this with a few folks offline and gathered some good feedbacks
that I'd like to share thus revive the discussion.
First of all, as illustrated in the reply below, cloud service
providers require transparent live migration. Specifically, the main
target of our case is to support SR-IOV live migration via kernel
upgrade while keeping the userspace of old distros unmodified. If it's
because this use case is not appealing enough for the mainline to
adopt, I will shut up and not continue discussing, although
technically it's entirely possible (and there's precedent in other
implementation) to do so to benefit any cloud service providers.
If it's just the implementation of hiding netdev itself needs to be
improved, such as implementing it as attribute flag or adding linkdump
API, that's completely fine and we can look into that. However, the
specific issue needs to be undestood beforehand is to make transparent
SR-IOV to be able to take over the name (so inherit all the configs)
from the lower netdev, which needs some games with uevents and name
space reservation. So far I don't think it's been well discussed.
One thing in particular I'd like to point out is that the 3-netdev
model currently missed to address the core problem of live migration:
migration of hardware specific feature/state, for e.g. ethtool configs
and hardware offloading states. Only general network state (IP
address, gateway, for eg.) associated with the bypass interface can be
migrated. As a follow-up work, bypass driver can/should be enhanced to
save and apply those hardware specific configs before or after
migration as needed. The transparent 1-netdev model being proposed as
part of this patch series will be able to solve that problem naturally
by making all hardware specific configurations go through the central
bypass driver, such that hardware configurations can be replayed when
new VF or passthrough gets plugged back in. Although that
corresponding function hasn't been implemented today, I'd like to
refresh everyone's mind that is the core problem any live migration
proposal should have addressed.
If it would make things more clear to defer netdev hiding until all
functionalities regarding centralizing and replay are implemented,
we'd take advices like that and move on to implementing those features
as follow-up patches. Once all needed features get done, we'd resume
the work for hiding lower netdev at that point. Think it would be the
best to make everyone understand the big picture in advance before
going too far.
Thanks, comments welcome.
-Siwei
On Mon, Apr 9, 2018 at 11:48 PM, Siwei Liu <loseweigh@gmail.com> wrote:
> On Sun, Apr 8, 2018 at 9:32 AM, David Miller <davem@davemloft.net> wrote:
>> From: Siwei Liu <loseweigh@gmail.com>
>> Date: Fri, 6 Apr 2018 19:32:05 -0700
>>
>>> And I assume everyone here understands the use case for live
>>> migration (in the context of providing cloud service) is very
>>> different, and we have to hide the netdevs. If not, I'm more than
>>> happy to clarify.
>>
>> I think you still need to clarify.
>
> OK. The short answer is cloud users really want *transparent* live migration.
>
> By being transparent it means they don't and shouldn't care about the
> existence and the occurence of live migration, but they do if
> userspace toolstack and libraries have to be updated or modified,
> which means potential dependency brokeness of their applications. They
> don't like any change to the userspace envinroment (existing apps
> lift-and-shift, no recompilation, no re-packaging, no re-certification
> needed), while no one barely cares about ABI or API compatibility in
> the kernel level, as long as their applications don't break.
>
> I agree the current bypass solution for SR-IOV live migration requires
> guest cooperation. Though it doesn't mean guest *userspace*
> cooperation. As a matter of fact, techinically it shouldn't invovle
> userspace at all to get SR-IOV migration working. It's the kernel that
> does the real work. If I understand the goal of this in-kernel
> approach correctly, it was meant to save userspace from modification
> or corresponding toolstack support, as those additional 2 interfaces
> is more a side product of this approach, rather than being neccessary
> for users to be aware of. All what the user needs to deal with is one
> single interface, and that's what they care about. It's more a trouble
> than help when they see 2 extra interfaces are present. Management
> tools in the old distros don't recoginze them and try to bring up
> those extra interfaces for its own. Various odd warnings start to spew
> out, and there's a lot of caveats for the users to get around...
>
> On the other hand, if we "teach" those cloud users to update the
> userspace toolstack just for trading a feature they don't need, no one
> is likely going to embrace the change. As such there's just no real
> value of adopting this in-kernel bypass facility for any cloud service
> provider. It does not look more appealing than just configure generic
> bonding using its own set of daemons or scripts. But again, cloud
> users don't welcome that facility. And basically it would get to
> nearly the same set of problems if leaving userspace alone.
>
> IMHO we're not hiding the devices, think it the way we're adding a
> feature transparent to user. Those auto-managed slaves are ones users
> don't care about much. And user is still able to see and configure the
> lower netdevs if they really desires to do so. But generally the
> target user for this feature won't need to know that. Why they care
> how many interfaces a VM virtually has rather than how many interfaces
> are actually _useable_ to them??
>
> Thanks,
> -Siwei
>
>
>>
>> netdevs are netdevs. If they have special attributes, mark them as
>> such and the tools base their actions upon that.
>>
>> "Hiding", or changing classes, doesn't make any sense to me still.
^ permalink raw reply
* Re: [PATCH net-next 0/5] virtio-net: Add SCTP checksum offload support
From: Vlad Yasevich @ 2018-04-17 20:35 UTC (permalink / raw)
To: Marcelo Ricardo Leitner, Vladislav Yasevich
Cc: nhorman, mst, netdev, virtualization, linux-sctp
In-Reply-To: <20180402144730.GA6001@localhost.localdomain>
On 04/02/2018 10:47 AM, Marcelo Ricardo Leitner wrote:
> On Mon, Apr 02, 2018 at 09:40:01AM -0400, Vladislav Yasevich wrote:
>> Now that we have SCTP offload capabilities in the kernel, we can add
>> them to virtio as well. First step is SCTP checksum.
>
> Thanks.
>
>> As for GSO, the way sctp GSO is currently implemented buys us nothing
>> in added support to virtio. To add true GSO, would require a lot of
>> re-work inside of SCTP and would require extensions to the virtio
>> net header to carry extra sctp data.
>
> Can you please elaborate more on this? Is this because SCTP GSO relies
> on the gso skb format for knowing how to segment it instead of having
> a list of sizes?
>
it's mainly because all the true segmentation, placing data into chunks,
has already happened. All that GSO does is allow for higher bundling
rate between VMs. If that is all SCTP GSO ever going to do, that fine,
but the goal is to do real GSO eventually and potentially reduce the
amount of memory copying we are doing.
If we do that, any current attempt at GSO in virtio would have to be
depricated and we'd need GSO2 or something like that.
This is why, after doing the GSO support, I decided not to include it.
-vlad
> Marcelo
>
^ permalink raw reply
* Re: [PATCH net-next 1/5] virtio: Add support for SCTP checksum offloading
From: Vlad Yasevich @ 2018-04-17 19:06 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: nhorman, netdev, virtualization, linux-sctp, Vladislav Yasevich
In-Reply-To: <20180416200743-mutt-send-email-mst@kernel.org>
On 04/16/2018 01:09 PM, Michael S. Tsirkin wrote:
> On Mon, Apr 16, 2018 at 09:45:48AM -0400, Vlad Yasevich wrote:
>> On 04/11/2018 06:49 PM, Michael S. Tsirkin wrote:
>>> On Mon, Apr 02, 2018 at 09:40:02AM -0400, Vladislav Yasevich wrote:
>>>> To support SCTP checksum offloading, we need to add a new feature
>>>> to virtio_net, so we can negotiate support between the hypervisor
>>>> and the guest.
>>>>
>>>> The signalling to the guest that an alternate checksum needs to
>>>> be used is done via a new flag in the virtio_net_hdr. If the
>>>> flag is set, the host will know to perform an alternate checksum
>>>> calculation, which right now is only CRC32c.
>>>>
>>>> Signed-off-by: Vladislav Yasevich <vyasevic@redhat.com>
>>>> ---
>>>> drivers/net/virtio_net.c | 11 ++++++++---
>>>> include/linux/virtio_net.h | 6 ++++++
>>>> include/uapi/linux/virtio_net.h | 2 ++
>>>> 3 files changed, 16 insertions(+), 3 deletions(-)
>>>>
>>>> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
>>>> index 7b187ec..b601294 100644
>>>> --- a/drivers/net/virtio_net.c
>>>> +++ b/drivers/net/virtio_net.c
>>>> @@ -2724,9 +2724,14 @@ static int virtnet_probe(struct virtio_device *vdev)
>>>> /* Do we support "hardware" checksums? */
>>>> if (virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) {
>>>> /* This opens up the world of extra features. */
>>>> - dev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_SG;
>>>> + netdev_features_t sctp = 0;
>>>> +
>>>> + if (virtio_has_feature(vdev, VIRTIO_NET_F_SCTP_CSUM))
>>>> + sctp |= NETIF_F_SCTP_CRC;
>>>> +
>>>> + dev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_SG | sctp;
>>>> if (csum)
>>>> - dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG;
>>>> + dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG | sctp;
>>>>
>>>> if (virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) {
>>>> dev->hw_features |= NETIF_F_TSO
>>>> @@ -2952,7 +2957,7 @@ static struct virtio_device_id id_table[] = {
>>>> };
>>>>
>>>> #define VIRTNET_FEATURES \
>>>> - VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, \
>>>> + VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, VIRTIO_NET_F_SCTP_CSUM, \
>>>> VIRTIO_NET_F_MAC, \
>>>> VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, \
>>>> VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, \
>>>> diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
>>>> index f144216..2e7a64a 100644
>>>> --- a/include/linux/virtio_net.h
>>>> +++ b/include/linux/virtio_net.h
>>>> @@ -39,6 +39,9 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
>>>>
>>>> if (!skb_partial_csum_set(skb, start, off))
>>>> return -EINVAL;
>>>> +
>>>> + if (hdr->flags & VIRTIO_NET_HDR_F_CSUM_NOT_INET)
>>>> + skb->csum_not_inet = 1;
>>>> }
>>>>
>>>> if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
>>>> @@ -96,6 +99,9 @@ static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb,
>>>> hdr->flags = VIRTIO_NET_HDR_F_DATA_VALID;
>>>> } /* else everything is zero */
>>>>
>>>> + if (skb->csum_not_inet)
>>>> + hdr->flags &= VIRTIO_NET_HDR_F_CSUM_NOT_INET;
>>>> +
>>>> return 0;
>>>> }
>>>>
>>>> diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h
>>>> index 5de6ed3..3f279c8 100644
>>>> --- a/include/uapi/linux/virtio_net.h
>>>> +++ b/include/uapi/linux/virtio_net.h
>>>> @@ -36,6 +36,7 @@
>>>> #define VIRTIO_NET_F_GUEST_CSUM 1 /* Guest handles pkts w/ partial csum */
>>>> #define VIRTIO_NET_F_CTRL_GUEST_OFFLOADS 2 /* Dynamic offload configuration. */
>>>> #define VIRTIO_NET_F_MTU 3 /* Initial MTU advice */
>>>> +#define VIRTIO_NET_F_SCTP_CSUM 4 /* SCTP checksum offload support */
>>>> #define VIRTIO_NET_F_MAC 5 /* Host has given MAC address. */
>>>> #define VIRTIO_NET_F_GUEST_TSO4 7 /* Guest can handle TSOv4 in. */
>>>> #define VIRTIO_NET_F_GUEST_TSO6 8 /* Guest can handle TSOv6 in. */
>>>
>>> Is this a guest or a host checksum? We should differenciate between the
>>> two.
>>
>> I suppose this is HOST checksum, since it behaves like VIRTIO_NET_F_CSUM only for
>> SCTP. I couldn't find the use for the GUEST side flag, since there technically
>> isn't any validations and there is no additional mappings from VIRTIO flag to a
>> NETIF flag.
>>
>> If the feature is negotiated, the guest ends up generating partially check-summed
>> packets, and the host turns on appropriate flags on it's side. The host will
>> also make sure the checksum if fixed up if the guest doesn't support it.
>> So 1 flag is currently all that is needed.
>>
>> -vlad
>
> I see code handling VIRTIO_NET_HDR_F_CSUM_NOT_INET on RX side. Host
> needs to know whether it's ok/worth it to set this flag, too.
I think I understand. I have to re-consider outside of the context of
Linux behavior.
-vlad
>
>>>
>>>
>>>> @@ -101,6 +102,7 @@ struct virtio_net_config {
>>>> struct virtio_net_hdr_v1 {
>>>> #define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 /* Use csum_start, csum_offset */
>>>> #define VIRTIO_NET_HDR_F_DATA_VALID 2 /* Csum is valid */
>>>> +#define VIRTIO_NET_HDR_F_CSUM_NOT_INET 4 /* Checksum is not inet */
>>>> __u8 flags;
>>>> #define VIRTIO_NET_HDR_GSO_NONE 0 /* Not a GSO frame */
>>>> #define VIRTIO_NET_HDR_GSO_TCPV4 1 /* GSO frame, IPv4 TCP (TSO) */
>>>> --
>>>> 2.9.5
^ permalink raw reply
* Re: [RFC v2] virtio: support packed ring
From: Michael S. Tsirkin @ 2018-04-17 15:54 UTC (permalink / raw)
To: Tiwei Bie; +Cc: netdev, linux-kernel, virtualization, wexu
In-Reply-To: <20180417145626.y5vei4y6irrdw7ky@debian>
On Tue, Apr 17, 2018 at 10:56:26PM +0800, Tiwei Bie wrote:
> On Tue, Apr 17, 2018 at 05:04:59PM +0300, Michael S. Tsirkin wrote:
> > On Tue, Apr 17, 2018 at 08:47:16PM +0800, Tiwei Bie wrote:
> > > On Tue, Apr 17, 2018 at 03:17:41PM +0300, Michael S. Tsirkin wrote:
> > > > On Tue, Apr 17, 2018 at 10:51:33AM +0800, Tiwei Bie wrote:
> > > > > On Tue, Apr 17, 2018 at 10:11:58AM +0800, Jason Wang wrote:
> > > > > > On 2018年04月13日 15:15, Tiwei Bie wrote:
> > > > > > > On Fri, Apr 13, 2018 at 12:30:24PM +0800, Jason Wang wrote:
> > > > > > > > On 2018年04月01日 22:12, Tiwei Bie wrote:
> > > > > [...]
> > > > > > > > > +static int detach_buf_packed(struct vring_virtqueue *vq, unsigned int head,
> > > > > > > > > + void **ctx)
> > > > > > > > > +{
> > > > > > > > > + struct vring_packed_desc *desc;
> > > > > > > > > + unsigned int i, j;
> > > > > > > > > +
> > > > > > > > > + /* Clear data ptr. */
> > > > > > > > > + vq->desc_state[head].data = NULL;
> > > > > > > > > +
> > > > > > > > > + i = head;
> > > > > > > > > +
> > > > > > > > > + for (j = 0; j < vq->desc_state[head].num; j++) {
> > > > > > > > > + desc = &vq->vring_packed.desc[i];
> > > > > > > > > + vring_unmap_one_packed(vq, desc);
> > > > > > > > > + desc->flags = 0x0;
> > > > > > > > Looks like this is unnecessary.
> > > > > > > It's safer to zero it. If we don't zero it, after we
> > > > > > > call virtqueue_detach_unused_buf_packed() which calls
> > > > > > > this function, the desc is still available to the
> > > > > > > device.
> > > > > >
> > > > > > Well detach_unused_buf_packed() should be called after device is stopped,
> > > > > > otherwise even if you try to clear, there will still be a window that device
> > > > > > may use it.
> > > > >
> > > > > This is not about whether the device has been stopped or
> > > > > not. We don't have other places to re-initialize the ring
> > > > > descriptors and wrap_counter. So they need to be set to
> > > > > the correct values when doing detach_unused_buf.
> > > > >
> > > > > Best regards,
> > > > > Tiwei Bie
> > > >
> > > > find vqs is the time to do it.
> > >
> > > The .find_vqs() will call .setup_vq() which will eventually
> > > call vring_create_virtqueue(). It's a different case. Here
> > > we're talking about re-initializing the descs and updating
> > > the wrap counter when detaching the unused descs (In this
> > > case, split ring just needs to decrease vring.avail->idx).
> > >
> > > Best regards,
> > > Tiwei Bie
> >
> > There's no requirement that virtqueue_detach_unused_buf re-initializes
> > the descs. It happens on cleanup path just before drivers delete the
> > vqs.
>
> Cool, I wasn't aware of it. I saw split ring decrease
> vring.avail->idx after detaching an unused desc, so I
> thought detaching unused desc also needs to make sure
> that the ring state will be updated correspondingly.
Hmm. You are right. Seems to be out console driver being out of spec.
Will have to look at how to fix that :(
It was done here:
Commit b3258ff1d6086bd2b9eeb556844a868ad7d49bc8
Author: Amit Shah <amit.shah@redhat.com>
Date: Wed Mar 16 19:12:10 2011 +0530
virtio: Decrement avail idx on buffer detach
When detaching a buffer from a vq, the avail.idx value should be
decremented as well.
This was noticed by hot-unplugging a virtio console port and then
plugging in a new one on the same number (re-using the vqs which were
just 'disowned'). qemu reported
'Guest moved used index from 0 to 256'
when any IO was attempted on the new port.
CC: stable@kernel.org
Reported-by: juzhang <juzhang@redhat.com>
Signed-off-by: Amit Shah <amit.shah@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
The spec is quite explicit though:
A driver MUST NOT decrement the available idx on a live virtqueue (ie. there is no way to “unexpose”
buffers).
> If there is no such requirement, do you think it's OK
> to remove below two lines:
>
> vq->avail_idx_shadow--;
> vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev, vq->avail_idx_shadow);
>
> from virtqueue_detach_unused_buf(), and we could have
> one generic function to handle both rings:
>
> void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
> {
> struct vring_virtqueue *vq = to_vvq(_vq);
> unsigned int num, i;
> void *buf;
>
> START_USE(vq);
>
> num = vq->packed ? vq->vring_packed.num : vq->vring.num;
>
> for (i = 0; i < num; i++) {
> if (!vq->desc_state[i].data)
> continue;
> /* detach_buf clears data, so grab it now. */
> buf = vq->desc_state[i].data;
> detach_buf(vq, i, NULL);
> END_USE(vq);
> return buf;
> }
> /* That should have freed everything. */
> BUG_ON(vq->vq.num_free != num);
>
> END_USE(vq);
> return NULL;
> }
>
> Best regards,
> Tiwei Bie
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply
* Re: [RFC v2] virtio: support packed ring
From: Tiwei Bie @ 2018-04-17 14:56 UTC (permalink / raw)
To: Michael S. Tsirkin; +Cc: netdev, linux-kernel, virtualization, wexu
In-Reply-To: <20180417170354-mutt-send-email-mst@kernel.org>
On Tue, Apr 17, 2018 at 05:04:59PM +0300, Michael S. Tsirkin wrote:
> On Tue, Apr 17, 2018 at 08:47:16PM +0800, Tiwei Bie wrote:
> > On Tue, Apr 17, 2018 at 03:17:41PM +0300, Michael S. Tsirkin wrote:
> > > On Tue, Apr 17, 2018 at 10:51:33AM +0800, Tiwei Bie wrote:
> > > > On Tue, Apr 17, 2018 at 10:11:58AM +0800, Jason Wang wrote:
> > > > > On 2018年04月13日 15:15, Tiwei Bie wrote:
> > > > > > On Fri, Apr 13, 2018 at 12:30:24PM +0800, Jason Wang wrote:
> > > > > > > On 2018年04月01日 22:12, Tiwei Bie wrote:
> > > > [...]
> > > > > > > > +static int detach_buf_packed(struct vring_virtqueue *vq, unsigned int head,
> > > > > > > > + void **ctx)
> > > > > > > > +{
> > > > > > > > + struct vring_packed_desc *desc;
> > > > > > > > + unsigned int i, j;
> > > > > > > > +
> > > > > > > > + /* Clear data ptr. */
> > > > > > > > + vq->desc_state[head].data = NULL;
> > > > > > > > +
> > > > > > > > + i = head;
> > > > > > > > +
> > > > > > > > + for (j = 0; j < vq->desc_state[head].num; j++) {
> > > > > > > > + desc = &vq->vring_packed.desc[i];
> > > > > > > > + vring_unmap_one_packed(vq, desc);
> > > > > > > > + desc->flags = 0x0;
> > > > > > > Looks like this is unnecessary.
> > > > > > It's safer to zero it. If we don't zero it, after we
> > > > > > call virtqueue_detach_unused_buf_packed() which calls
> > > > > > this function, the desc is still available to the
> > > > > > device.
> > > > >
> > > > > Well detach_unused_buf_packed() should be called after device is stopped,
> > > > > otherwise even if you try to clear, there will still be a window that device
> > > > > may use it.
> > > >
> > > > This is not about whether the device has been stopped or
> > > > not. We don't have other places to re-initialize the ring
> > > > descriptors and wrap_counter. So they need to be set to
> > > > the correct values when doing detach_unused_buf.
> > > >
> > > > Best regards,
> > > > Tiwei Bie
> > >
> > > find vqs is the time to do it.
> >
> > The .find_vqs() will call .setup_vq() which will eventually
> > call vring_create_virtqueue(). It's a different case. Here
> > we're talking about re-initializing the descs and updating
> > the wrap counter when detaching the unused descs (In this
> > case, split ring just needs to decrease vring.avail->idx).
> >
> > Best regards,
> > Tiwei Bie
>
> There's no requirement that virtqueue_detach_unused_buf re-initializes
> the descs. It happens on cleanup path just before drivers delete the
> vqs.
Cool, I wasn't aware of it. I saw split ring decrease
vring.avail->idx after detaching an unused desc, so I
thought detaching unused desc also needs to make sure
that the ring state will be updated correspondingly.
If there is no such requirement, do you think it's OK
to remove below two lines:
vq->avail_idx_shadow--;
vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev, vq->avail_idx_shadow);
from virtqueue_detach_unused_buf(), and we could have
one generic function to handle both rings:
void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
{
struct vring_virtqueue *vq = to_vvq(_vq);
unsigned int num, i;
void *buf;
START_USE(vq);
num = vq->packed ? vq->vring_packed.num : vq->vring.num;
for (i = 0; i < num; i++) {
if (!vq->desc_state[i].data)
continue;
/* detach_buf clears data, so grab it now. */
buf = vq->desc_state[i].data;
detach_buf(vq, i, NULL);
END_USE(vq);
return buf;
}
/* That should have freed everything. */
BUG_ON(vq->vq.num_free != num);
END_USE(vq);
return NULL;
}
Best regards,
Tiwei Bie
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply
* Re: [RFC v2] virtio: support packed ring
From: Michael S. Tsirkin @ 2018-04-17 14:04 UTC (permalink / raw)
To: Tiwei Bie; +Cc: netdev, linux-kernel, virtualization, wexu
In-Reply-To: <20180417124716.wsypd5zl4n4galrz@debian>
On Tue, Apr 17, 2018 at 08:47:16PM +0800, Tiwei Bie wrote:
> On Tue, Apr 17, 2018 at 03:17:41PM +0300, Michael S. Tsirkin wrote:
> > On Tue, Apr 17, 2018 at 10:51:33AM +0800, Tiwei Bie wrote:
> > > On Tue, Apr 17, 2018 at 10:11:58AM +0800, Jason Wang wrote:
> > > > On 2018年04月13日 15:15, Tiwei Bie wrote:
> > > > > On Fri, Apr 13, 2018 at 12:30:24PM +0800, Jason Wang wrote:
> > > > > > On 2018年04月01日 22:12, Tiwei Bie wrote:
> > > [...]
> > > > > > > +static int detach_buf_packed(struct vring_virtqueue *vq, unsigned int head,
> > > > > > > + void **ctx)
> > > > > > > +{
> > > > > > > + struct vring_packed_desc *desc;
> > > > > > > + unsigned int i, j;
> > > > > > > +
> > > > > > > + /* Clear data ptr. */
> > > > > > > + vq->desc_state[head].data = NULL;
> > > > > > > +
> > > > > > > + i = head;
> > > > > > > +
> > > > > > > + for (j = 0; j < vq->desc_state[head].num; j++) {
> > > > > > > + desc = &vq->vring_packed.desc[i];
> > > > > > > + vring_unmap_one_packed(vq, desc);
> > > > > > > + desc->flags = 0x0;
> > > > > > Looks like this is unnecessary.
> > > > > It's safer to zero it. If we don't zero it, after we
> > > > > call virtqueue_detach_unused_buf_packed() which calls
> > > > > this function, the desc is still available to the
> > > > > device.
> > > >
> > > > Well detach_unused_buf_packed() should be called after device is stopped,
> > > > otherwise even if you try to clear, there will still be a window that device
> > > > may use it.
> > >
> > > This is not about whether the device has been stopped or
> > > not. We don't have other places to re-initialize the ring
> > > descriptors and wrap_counter. So they need to be set to
> > > the correct values when doing detach_unused_buf.
> > >
> > > Best regards,
> > > Tiwei Bie
> >
> > find vqs is the time to do it.
>
> The .find_vqs() will call .setup_vq() which will eventually
> call vring_create_virtqueue(). It's a different case. Here
> we're talking about re-initializing the descs and updating
> the wrap counter when detaching the unused descs (In this
> case, split ring just needs to decrease vring.avail->idx).
>
> Best regards,
> Tiwei Bie
There's no requirement that virtqueue_detach_unused_buf re-initializes
the descs. It happens on cleanup path just before drivers delete the
vqs.
--
MST
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply
* Re: [RFC v2] virtio: support packed ring
From: Tiwei Bie @ 2018-04-17 12:47 UTC (permalink / raw)
To: Michael S. Tsirkin; +Cc: netdev, linux-kernel, virtualization, wexu
In-Reply-To: <20180417151654-mutt-send-email-mst@kernel.org>
On Tue, Apr 17, 2018 at 03:17:41PM +0300, Michael S. Tsirkin wrote:
> On Tue, Apr 17, 2018 at 10:51:33AM +0800, Tiwei Bie wrote:
> > On Tue, Apr 17, 2018 at 10:11:58AM +0800, Jason Wang wrote:
> > > On 2018年04月13日 15:15, Tiwei Bie wrote:
> > > > On Fri, Apr 13, 2018 at 12:30:24PM +0800, Jason Wang wrote:
> > > > > On 2018年04月01日 22:12, Tiwei Bie wrote:
> > [...]
> > > > > > +static int detach_buf_packed(struct vring_virtqueue *vq, unsigned int head,
> > > > > > + void **ctx)
> > > > > > +{
> > > > > > + struct vring_packed_desc *desc;
> > > > > > + unsigned int i, j;
> > > > > > +
> > > > > > + /* Clear data ptr. */
> > > > > > + vq->desc_state[head].data = NULL;
> > > > > > +
> > > > > > + i = head;
> > > > > > +
> > > > > > + for (j = 0; j < vq->desc_state[head].num; j++) {
> > > > > > + desc = &vq->vring_packed.desc[i];
> > > > > > + vring_unmap_one_packed(vq, desc);
> > > > > > + desc->flags = 0x0;
> > > > > Looks like this is unnecessary.
> > > > It's safer to zero it. If we don't zero it, after we
> > > > call virtqueue_detach_unused_buf_packed() which calls
> > > > this function, the desc is still available to the
> > > > device.
> > >
> > > Well detach_unused_buf_packed() should be called after device is stopped,
> > > otherwise even if you try to clear, there will still be a window that device
> > > may use it.
> >
> > This is not about whether the device has been stopped or
> > not. We don't have other places to re-initialize the ring
> > descriptors and wrap_counter. So they need to be set to
> > the correct values when doing detach_unused_buf.
> >
> > Best regards,
> > Tiwei Bie
>
> find vqs is the time to do it.
The .find_vqs() will call .setup_vq() which will eventually
call vring_create_virtqueue(). It's a different case. Here
we're talking about re-initializing the descs and updating
the wrap counter when detaching the unused descs (In this
case, split ring just needs to decrease vring.avail->idx).
Best regards,
Tiwei Bie
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply
* Re: [RFC v2] virtio: support packed ring
From: Michael S. Tsirkin @ 2018-04-17 12:17 UTC (permalink / raw)
To: Tiwei Bie; +Cc: netdev, linux-kernel, virtualization, wexu
In-Reply-To: <20180417025133.7t7exmizgolr565z@debian>
On Tue, Apr 17, 2018 at 10:51:33AM +0800, Tiwei Bie wrote:
> On Tue, Apr 17, 2018 at 10:11:58AM +0800, Jason Wang wrote:
> > On 2018年04月13日 15:15, Tiwei Bie wrote:
> > > On Fri, Apr 13, 2018 at 12:30:24PM +0800, Jason Wang wrote:
> > > > On 2018年04月01日 22:12, Tiwei Bie wrote:
> [...]
> > > > > +static int detach_buf_packed(struct vring_virtqueue *vq, unsigned int head,
> > > > > + void **ctx)
> > > > > +{
> > > > > + struct vring_packed_desc *desc;
> > > > > + unsigned int i, j;
> > > > > +
> > > > > + /* Clear data ptr. */
> > > > > + vq->desc_state[head].data = NULL;
> > > > > +
> > > > > + i = head;
> > > > > +
> > > > > + for (j = 0; j < vq->desc_state[head].num; j++) {
> > > > > + desc = &vq->vring_packed.desc[i];
> > > > > + vring_unmap_one_packed(vq, desc);
> > > > > + desc->flags = 0x0;
> > > > Looks like this is unnecessary.
> > > It's safer to zero it. If we don't zero it, after we
> > > call virtqueue_detach_unused_buf_packed() which calls
> > > this function, the desc is still available to the
> > > device.
> >
> > Well detach_unused_buf_packed() should be called after device is stopped,
> > otherwise even if you try to clear, there will still be a window that device
> > may use it.
>
> This is not about whether the device has been stopped or
> not. We don't have other places to re-initialize the ring
> descriptors and wrap_counter. So they need to be set to
> the correct values when doing detach_unused_buf.
>
> Best regards,
> Tiwei Bie
find vqs is the time to do it.
--
MST
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply
* Re: [RFC v2] virtio: support packed ring
From: Tiwei Bie @ 2018-04-17 2:51 UTC (permalink / raw)
To: Jason Wang; +Cc: mst, netdev, linux-kernel, virtualization, wexu
In-Reply-To: <8dee7d62-ac0b-54ba-6bec-4bc4a6fb34e9@redhat.com>
On Tue, Apr 17, 2018 at 10:11:58AM +0800, Jason Wang wrote:
> On 2018年04月13日 15:15, Tiwei Bie wrote:
> > On Fri, Apr 13, 2018 at 12:30:24PM +0800, Jason Wang wrote:
> > > On 2018年04月01日 22:12, Tiwei Bie wrote:
[...]
> > > > +static int detach_buf_packed(struct vring_virtqueue *vq, unsigned int head,
> > > > + void **ctx)
> > > > +{
> > > > + struct vring_packed_desc *desc;
> > > > + unsigned int i, j;
> > > > +
> > > > + /* Clear data ptr. */
> > > > + vq->desc_state[head].data = NULL;
> > > > +
> > > > + i = head;
> > > > +
> > > > + for (j = 0; j < vq->desc_state[head].num; j++) {
> > > > + desc = &vq->vring_packed.desc[i];
> > > > + vring_unmap_one_packed(vq, desc);
> > > > + desc->flags = 0x0;
> > > Looks like this is unnecessary.
> > It's safer to zero it. If we don't zero it, after we
> > call virtqueue_detach_unused_buf_packed() which calls
> > this function, the desc is still available to the
> > device.
>
> Well detach_unused_buf_packed() should be called after device is stopped,
> otherwise even if you try to clear, there will still be a window that device
> may use it.
This is not about whether the device has been stopped or
not. We don't have other places to re-initialize the ring
descriptors and wrap_counter. So they need to be set to
the correct values when doing detach_unused_buf.
Best regards,
Tiwei Bie
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply
* Re: [RFC v2] virtio: support packed ring
From: Michael S. Tsirkin @ 2018-04-17 2:37 UTC (permalink / raw)
To: Jason Wang; +Cc: netdev, linux-kernel, virtualization, wexu
In-Reply-To: <665c828e-6699-7688-cfea-b23b2b0f5fe3@redhat.com>
On Tue, Apr 17, 2018 at 10:24:32AM +0800, Jason Wang wrote:
>
>
> On 2018年04月17日 10:17, Michael S. Tsirkin wrote:
> > On Tue, Apr 17, 2018 at 10:11:58AM +0800, Jason Wang wrote:
> > >
> > > On 2018年04月13日 15:15, Tiwei Bie wrote:
> > > > On Fri, Apr 13, 2018 at 12:30:24PM +0800, Jason Wang wrote:
> > > > > On 2018年04月01日 22:12, Tiwei Bie wrote:
> > > > > > Hello everyone,
> > > > > >
> > > > > > This RFC implements packed ring support for virtio driver.
> > > > > >
> > > > > > The code was tested with DPDK vhost (testpmd/vhost-PMD) implemented
> > > > > > by Jens at http://dpdk.org/ml/archives/dev/2018-January/089417.html
> > > > > > Minor changes are needed for the vhost code, e.g. to kick the guest.
> > > > > >
> > > > > > TODO:
> > > > > > - Refinements and bug fixes;
> > > > > > - Split into small patches;
> > > > > > - Test indirect descriptor support;
> > > > > > - Test/fix event suppression support;
> > > > > > - Test devices other than net;
> > > > > >
> > > > > > RFC v1 -> RFC v2:
> > > > > > - Add indirect descriptor support - compile test only;
> > > > > > - Add event suppression supprt - compile test only;
> > > > > > - Move vring_packed_init() out of uapi (Jason, MST);
> > > > > > - Merge two loops into one in virtqueue_add_packed() (Jason);
> > > > > > - Split vring_unmap_one() for packed ring and split ring (Jason);
> > > > > > - Avoid using '%' operator (Jason);
> > > > > > - Rename free_head -> next_avail_idx (Jason);
> > > > > > - Add comments for virtio_wmb() in virtqueue_add_packed() (Jason);
> > > > > > - Some other refinements and bug fixes;
> > > > > >
> > > > > > Thanks!
> > > > > >
> > > > > > Signed-off-by: Tiwei Bie <tiwei.bie@intel.com>
> > > > > > ---
> > > > > > drivers/virtio/virtio_ring.c | 1094 +++++++++++++++++++++++++++++-------
> > > > > > include/linux/virtio_ring.h | 8 +-
> > > > > > include/uapi/linux/virtio_config.h | 12 +-
> > > > > > include/uapi/linux/virtio_ring.h | 61 ++
> > > > > > 4 files changed, 980 insertions(+), 195 deletions(-)
> > > > [...]
>
> [...]
>
> > > > > It looks to me we should examine RING_EVENT_FLAGS_DESC in desc_event_flags
> > > > > instead of vq->event here. Spec does not forces to use evenf_off and
> > > > > event_wrap if event index is enabled.
> > > > >
> > > > > > + // FIXME: fix this!
> > > > > > + needs_kick = ((off_wrap >> 15) == vq->wrap_counter) &&
> > > > > > + vring_need_event(off_wrap & ~(1<<15), new, old);
> > > > > Why need a & here?
> > > > Because wrap_counter (the most significant bit in off_wrap)
> > > > isn't part of the index.
> > > >
> > > > > > + } else {
> > > > > Need a smp_rmb() to make sure desc_event_flags was checked before flags.
> > > > I don't get your point, if my understanding is correct,
> > > > desc_event_flags is vq->vring_packed.device->flags. So
> > > > what's the "flags"?
> > > Sorry, I mean we need check device.flags before off_warp. So it needs an
> > > smp_rmb() in the middle.
> > It's best to just read all flags atomically as u32.
>
> Yes it is.
>
> >
> > > It looks to me there's no guarantee that
> > > VRING_EVENT_F_DESC is set if event index is supported.
> > >
> > > > > > + needs_kick = (vq->vring_packed.device->flags !=
> > > > > > + cpu_to_virtio16(_vq->vdev, VRING_EVENT_F_DISABLE));
> > > > > > + }
> > > > > > + END_USE(vq);
> > > > > > + return needs_kick;
> > > > > > +}
> > > > [...]
> > > > > > +static int detach_buf_packed(struct vring_virtqueue *vq, unsigned int head,
> > > > > > + void **ctx)
> > > > > > +{
> > > > > > + struct vring_packed_desc *desc;
> > > > > > + unsigned int i, j;
> > > > > > +
> > > > > > + /* Clear data ptr. */
> > > > > > + vq->desc_state[head].data = NULL;
> > > > > > +
> > > > > > + i = head;
> > > > > > +
> > > > > > + for (j = 0; j < vq->desc_state[head].num; j++) {
> > > > > > + desc = &vq->vring_packed.desc[i];
> > > > > > + vring_unmap_one_packed(vq, desc);
> > > > > > + desc->flags = 0x0;
> > > > > Looks like this is unnecessary.
> > > > It's safer to zero it. If we don't zero it, after we
> > > > call virtqueue_detach_unused_buf_packed() which calls
> > > > this function, the desc is still available to the
> > > > device.
> > > Well detach_unused_buf_packed() should be called after device is stopped,
> > > otherwise even if you try to clear, there will still be a window that device
> > > may use it.
> > >
> > > > > > + i++;
> > > > > > + if (i >= vq->vring_packed.num)
> > > > > > + i = 0;
> > > > > > + }
> > > > [...]
> > > > > > +static unsigned virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq)
> > > > > > +{
> > > > > > + struct vring_virtqueue *vq = to_vvq(_vq);
> > > > > > + u16 last_used_idx, wrap_counter, off_wrap;
> > > > > > +
> > > > > > + START_USE(vq);
> > > > > > +
> > > > > > + last_used_idx = vq->last_used_idx;
> > > > > > + wrap_counter = vq->wrap_counter;
> > > > > > +
> > > > > > + if (last_used_idx > vq->next_avail_idx)
> > > > > > + wrap_counter ^= 1;
> > > > > > +
> > > > > > + off_wrap = last_used_idx | (wrap_counter << 15);
> > > > > > +
> > > > > > + /* We optimistically turn back on interrupts, then check if there was
> > > > > > + * more to do. */
> > > > > > + /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to
> > > > > > + * either clear the flags bit or point the event index at the next
> > > > > > + * entry. Always do both to keep code simple. */
> > > > > > + if (vq->event_flags_shadow == VRING_EVENT_F_DISABLE) {
> > > > > > + vq->event_flags_shadow = vq->event ? VRING_EVENT_F_DESC:
> > > > > > + VRING_EVENT_F_ENABLE;
> > > > > > + vq->vring_packed.driver->flags = cpu_to_virtio16(_vq->vdev,
> > > > > > + vq->event_flags_shadow);
> > > > > > + }
> > > > > A smp_wmb() is missed here?
> > > > >
> > > > > > + vq->vring_packed.driver->off_wrap = cpu_to_virtio16(_vq->vdev, off_wrap);
> > > > > And according to the spec, it looks to me write a VRING_EVENT_F_ENABLE is
> > > > > sufficient here.
> > > > I didn't think much when implementing the event suppression
> > > > for packed ring previously. After I saw your comments, I found
> > > > something new. Indeed, unlike the split ring, for the packed
> > > > ring, spec doesn't say we must use VRING_EVENT_F_DESC when
> > > > EVENT_IDX is negotiated. So do you think below thought is
> > > > right or makes sense?
> > > >
> > > > - For virtqueue_enable_cb_prepare(), we just need to enable
> > > > the ring by setting flags to VRING_EVENT_F_ENABLE in any
> > > > case.
> > > >
> > > > - We will try to use VRING_EVENT_F_DESC (if EVENT_IDX is
> > > > negotiated) only when we want to delay the interrupts
> > > > virtqueue_enable_cb_delayed().
> > > This looks good to me.
> > I suspect this will lead to extra interrupts if host is fast.
> > So I think for now we should always use VRING_EVENT_F_DESC
> > if EVENT_IDX is negotiated.
>
> Right, so if this is true, maybe we'd better force this in the spec?
>
> Thanks
I did consider this.
Why it is the way it is:
- if we allow DISABLE it seems nicer to allow ENABLE as well
for symmetry.
- ENABLE is handy for hardware offload devices
where kicks do not trigger an exit so not worth bother
with, but interrupts still slow the system down so
event index might be worth it.
> >
> > VRING_EVENT_F_DISABLE makes more sense to me.
> >
>
> [...]
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply
* Re: [RFC v2] virtio: support packed ring
From: Jason Wang @ 2018-04-17 2:24 UTC (permalink / raw)
To: Michael S. Tsirkin; +Cc: netdev, linux-kernel, virtualization, wexu
In-Reply-To: <20180417051343-mutt-send-email-mst@kernel.org>
On 2018年04月17日 10:17, Michael S. Tsirkin wrote:
> On Tue, Apr 17, 2018 at 10:11:58AM +0800, Jason Wang wrote:
>>
>> On 2018年04月13日 15:15, Tiwei Bie wrote:
>>> On Fri, Apr 13, 2018 at 12:30:24PM +0800, Jason Wang wrote:
>>>> On 2018年04月01日 22:12, Tiwei Bie wrote:
>>>>> Hello everyone,
>>>>>
>>>>> This RFC implements packed ring support for virtio driver.
>>>>>
>>>>> The code was tested with DPDK vhost (testpmd/vhost-PMD) implemented
>>>>> by Jens at http://dpdk.org/ml/archives/dev/2018-January/089417.html
>>>>> Minor changes are needed for the vhost code, e.g. to kick the guest.
>>>>>
>>>>> TODO:
>>>>> - Refinements and bug fixes;
>>>>> - Split into small patches;
>>>>> - Test indirect descriptor support;
>>>>> - Test/fix event suppression support;
>>>>> - Test devices other than net;
>>>>>
>>>>> RFC v1 -> RFC v2:
>>>>> - Add indirect descriptor support - compile test only;
>>>>> - Add event suppression supprt - compile test only;
>>>>> - Move vring_packed_init() out of uapi (Jason, MST);
>>>>> - Merge two loops into one in virtqueue_add_packed() (Jason);
>>>>> - Split vring_unmap_one() for packed ring and split ring (Jason);
>>>>> - Avoid using '%' operator (Jason);
>>>>> - Rename free_head -> next_avail_idx (Jason);
>>>>> - Add comments for virtio_wmb() in virtqueue_add_packed() (Jason);
>>>>> - Some other refinements and bug fixes;
>>>>>
>>>>> Thanks!
>>>>>
>>>>> Signed-off-by: Tiwei Bie <tiwei.bie@intel.com>
>>>>> ---
>>>>> drivers/virtio/virtio_ring.c | 1094 +++++++++++++++++++++++++++++-------
>>>>> include/linux/virtio_ring.h | 8 +-
>>>>> include/uapi/linux/virtio_config.h | 12 +-
>>>>> include/uapi/linux/virtio_ring.h | 61 ++
>>>>> 4 files changed, 980 insertions(+), 195 deletions(-)
>>> [...]
[...]
>>>> It looks to me we should examine RING_EVENT_FLAGS_DESC in desc_event_flags
>>>> instead of vq->event here. Spec does not forces to use evenf_off and
>>>> event_wrap if event index is enabled.
>>>>
>>>>> + // FIXME: fix this!
>>>>> + needs_kick = ((off_wrap >> 15) == vq->wrap_counter) &&
>>>>> + vring_need_event(off_wrap & ~(1<<15), new, old);
>>>> Why need a & here?
>>> Because wrap_counter (the most significant bit in off_wrap)
>>> isn't part of the index.
>>>
>>>>> + } else {
>>>> Need a smp_rmb() to make sure desc_event_flags was checked before flags.
>>> I don't get your point, if my understanding is correct,
>>> desc_event_flags is vq->vring_packed.device->flags. So
>>> what's the "flags"?
>> Sorry, I mean we need check device.flags before off_warp. So it needs an
>> smp_rmb() in the middle.
> It's best to just read all flags atomically as u32.
Yes it is.
>
>> It looks to me there's no guarantee that
>> VRING_EVENT_F_DESC is set if event index is supported.
>>
>>>>> + needs_kick = (vq->vring_packed.device->flags !=
>>>>> + cpu_to_virtio16(_vq->vdev, VRING_EVENT_F_DISABLE));
>>>>> + }
>>>>> + END_USE(vq);
>>>>> + return needs_kick;
>>>>> +}
>>> [...]
>>>>> +static int detach_buf_packed(struct vring_virtqueue *vq, unsigned int head,
>>>>> + void **ctx)
>>>>> +{
>>>>> + struct vring_packed_desc *desc;
>>>>> + unsigned int i, j;
>>>>> +
>>>>> + /* Clear data ptr. */
>>>>> + vq->desc_state[head].data = NULL;
>>>>> +
>>>>> + i = head;
>>>>> +
>>>>> + for (j = 0; j < vq->desc_state[head].num; j++) {
>>>>> + desc = &vq->vring_packed.desc[i];
>>>>> + vring_unmap_one_packed(vq, desc);
>>>>> + desc->flags = 0x0;
>>>> Looks like this is unnecessary.
>>> It's safer to zero it. If we don't zero it, after we
>>> call virtqueue_detach_unused_buf_packed() which calls
>>> this function, the desc is still available to the
>>> device.
>> Well detach_unused_buf_packed() should be called after device is stopped,
>> otherwise even if you try to clear, there will still be a window that device
>> may use it.
>>
>>>>> + i++;
>>>>> + if (i >= vq->vring_packed.num)
>>>>> + i = 0;
>>>>> + }
>>> [...]
>>>>> +static unsigned virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq)
>>>>> +{
>>>>> + struct vring_virtqueue *vq = to_vvq(_vq);
>>>>> + u16 last_used_idx, wrap_counter, off_wrap;
>>>>> +
>>>>> + START_USE(vq);
>>>>> +
>>>>> + last_used_idx = vq->last_used_idx;
>>>>> + wrap_counter = vq->wrap_counter;
>>>>> +
>>>>> + if (last_used_idx > vq->next_avail_idx)
>>>>> + wrap_counter ^= 1;
>>>>> +
>>>>> + off_wrap = last_used_idx | (wrap_counter << 15);
>>>>> +
>>>>> + /* We optimistically turn back on interrupts, then check if there was
>>>>> + * more to do. */
>>>>> + /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to
>>>>> + * either clear the flags bit or point the event index at the next
>>>>> + * entry. Always do both to keep code simple. */
>>>>> + if (vq->event_flags_shadow == VRING_EVENT_F_DISABLE) {
>>>>> + vq->event_flags_shadow = vq->event ? VRING_EVENT_F_DESC:
>>>>> + VRING_EVENT_F_ENABLE;
>>>>> + vq->vring_packed.driver->flags = cpu_to_virtio16(_vq->vdev,
>>>>> + vq->event_flags_shadow);
>>>>> + }
>>>> A smp_wmb() is missed here?
>>>>
>>>>> + vq->vring_packed.driver->off_wrap = cpu_to_virtio16(_vq->vdev, off_wrap);
>>>> And according to the spec, it looks to me write a VRING_EVENT_F_ENABLE is
>>>> sufficient here.
>>> I didn't think much when implementing the event suppression
>>> for packed ring previously. After I saw your comments, I found
>>> something new. Indeed, unlike the split ring, for the packed
>>> ring, spec doesn't say we must use VRING_EVENT_F_DESC when
>>> EVENT_IDX is negotiated. So do you think below thought is
>>> right or makes sense?
>>>
>>> - For virtqueue_enable_cb_prepare(), we just need to enable
>>> the ring by setting flags to VRING_EVENT_F_ENABLE in any
>>> case.
>>>
>>> - We will try to use VRING_EVENT_F_DESC (if EVENT_IDX is
>>> negotiated) only when we want to delay the interrupts
>>> virtqueue_enable_cb_delayed().
>> This looks good to me.
> I suspect this will lead to extra interrupts if host is fast.
> So I think for now we should always use VRING_EVENT_F_DESC
> if EVENT_IDX is negotiated.
Right, so if this is true, maybe we'd better force this in the spec?
Thanks
>
> VRING_EVENT_F_DISABLE makes more sense to me.
>
[...]
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply
* Re: [RFC v2] virtio: support packed ring
From: Michael S. Tsirkin @ 2018-04-17 2:17 UTC (permalink / raw)
To: Jason Wang; +Cc: netdev, linux-kernel, virtualization, wexu
In-Reply-To: <8dee7d62-ac0b-54ba-6bec-4bc4a6fb34e9@redhat.com>
On Tue, Apr 17, 2018 at 10:11:58AM +0800, Jason Wang wrote:
>
>
> On 2018年04月13日 15:15, Tiwei Bie wrote:
> > On Fri, Apr 13, 2018 at 12:30:24PM +0800, Jason Wang wrote:
> > > On 2018年04月01日 22:12, Tiwei Bie wrote:
> > > > Hello everyone,
> > > >
> > > > This RFC implements packed ring support for virtio driver.
> > > >
> > > > The code was tested with DPDK vhost (testpmd/vhost-PMD) implemented
> > > > by Jens at http://dpdk.org/ml/archives/dev/2018-January/089417.html
> > > > Minor changes are needed for the vhost code, e.g. to kick the guest.
> > > >
> > > > TODO:
> > > > - Refinements and bug fixes;
> > > > - Split into small patches;
> > > > - Test indirect descriptor support;
> > > > - Test/fix event suppression support;
> > > > - Test devices other than net;
> > > >
> > > > RFC v1 -> RFC v2:
> > > > - Add indirect descriptor support - compile test only;
> > > > - Add event suppression supprt - compile test only;
> > > > - Move vring_packed_init() out of uapi (Jason, MST);
> > > > - Merge two loops into one in virtqueue_add_packed() (Jason);
> > > > - Split vring_unmap_one() for packed ring and split ring (Jason);
> > > > - Avoid using '%' operator (Jason);
> > > > - Rename free_head -> next_avail_idx (Jason);
> > > > - Add comments for virtio_wmb() in virtqueue_add_packed() (Jason);
> > > > - Some other refinements and bug fixes;
> > > >
> > > > Thanks!
> > > >
> > > > Signed-off-by: Tiwei Bie <tiwei.bie@intel.com>
> > > > ---
> > > > drivers/virtio/virtio_ring.c | 1094 +++++++++++++++++++++++++++++-------
> > > > include/linux/virtio_ring.h | 8 +-
> > > > include/uapi/linux/virtio_config.h | 12 +-
> > > > include/uapi/linux/virtio_ring.h | 61 ++
> > > > 4 files changed, 980 insertions(+), 195 deletions(-)
> > [...]
> > > > +static struct vring_packed_desc *alloc_indirect_packed(struct virtqueue *_vq,
> > > > + unsigned int total_sg,
> > > > + gfp_t gfp)
> > > > +{
> > > > + struct vring_packed_desc *desc;
> > > > +
> > > > + /*
> > > > + * We require lowmem mappings for the descriptors because
> > > > + * otherwise virt_to_phys will give us bogus addresses in the
> > > > + * virtqueue.
> > > > + */
> > > > + gfp &= ~__GFP_HIGHMEM;
> > > > +
> > > > + desc = kmalloc(total_sg * sizeof(struct vring_packed_desc), gfp);
> > > Can we simply check vq->packed here to avoid duplicating helpers?
> > Then it would be something like this:
> >
> > static void *alloc_indirect(struct virtqueue *_vq, unsigned int total_sg,
> > gfp_t gfp)
> > {
> > struct vring_virtqueue *vq = to_vvq(_vq);
> > void *data;
> >
> > /*
> > * We require lowmem mappings for the descriptors because
> > * otherwise virt_to_phys will give us bogus addresses in the
> > * virtqueue.
> > */
> > gfp &= ~__GFP_HIGHMEM;
> >
> > if (vq->packed) {
> > data = kmalloc(total_sg * sizeof(struct vring_packed_desc),
> > gfp);
> > if (!data)
> > return NULL;
> > } else {
> > struct vring_desc *desc;
> > unsigned int i;
> >
> > desc = kmalloc(total_sg * sizeof(struct vring_desc), gfp);
> > if (!desc)
> > return NULL;
> >
> > for (i = 0; i < total_sg; i++)
> > desc[i].next = cpu_to_virtio16(_vq->vdev, i + 1);
> >
> > data = desc;
> > }
> >
> > return data;
> > }
> >
> > I would prefer to have two simpler helpers (and to the callers,
> > it's already very clear about which one they should call), i.e.
> > the current implementation:
> >
> > static struct vring_packed_desc *alloc_indirect_packed(struct virtqueue *_vq,
> > unsigned int total_sg,
> > gfp_t gfp)
> > {
> > struct vring_packed_desc *desc;
> >
> > /*
> > * We require lowmem mappings for the descriptors because
> > * otherwise virt_to_phys will give us bogus addresses in the
> > * virtqueue.
> > */
> > gfp &= ~__GFP_HIGHMEM;
> >
> > desc = kmalloc(total_sg * sizeof(struct vring_packed_desc), gfp);
> >
> > return desc;
> > }
> >
> > static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq,
> > unsigned int total_sg,
> > gfp_t gfp)
> > {
> > struct vring_desc *desc;
> > unsigned int i;
> >
> > /*
> > * We require lowmem mappings for the descriptors because
> > * otherwise virt_to_phys will give us bogus addresses in the
> > * virtqueue.
> > */
> > gfp &= ~__GFP_HIGHMEM;
> >
> > desc = kmalloc(total_sg * sizeof(struct vring_desc), gfp);
> > if (!desc)
> > return NULL;
> >
> > for (i = 0; i < total_sg; i++)
> > desc[i].next = cpu_to_virtio16(_vq->vdev, i + 1);
> > return desc;
> > }
>
> Yeah, I miss that split version needs a desc list.
>
> >
> > > > +
> > > > + return desc;
> > > > +}
> > [...]
> > > > +static inline int virtqueue_add_packed(struct virtqueue *_vq,
> > > > + struct scatterlist *sgs[],
> > > > + unsigned int total_sg,
> > > > + unsigned int out_sgs,
> > > > + unsigned int in_sgs,
> > > > + void *data,
> > > > + void *ctx,
> > > > + gfp_t gfp)
> > > > +{
> > > > + struct vring_virtqueue *vq = to_vvq(_vq);
> > > > + struct vring_packed_desc *desc;
> > > > + struct scatterlist *sg;
> > > > + unsigned int i, n, descs_used, uninitialized_var(prev), err_idx;
> > > > + __virtio16 uninitialized_var(head_flags), flags;
> > > > + int head, wrap_counter;
> > > > + bool indirect;
> > > > +
> > > > + START_USE(vq);
> > > > +
> > > > + BUG_ON(data == NULL);
> > > > + BUG_ON(ctx && vq->indirect);
> > > > +
> > > > + if (unlikely(vq->broken)) {
> > > > + END_USE(vq);
> > > > + return -EIO;
> > > > + }
> > > > +
> > > > +#ifdef DEBUG
> > > > + {
> > > > + ktime_t now = ktime_get();
> > > > +
> > > > + /* No kick or get, with .1 second between? Warn. */
> > > > + if (vq->last_add_time_valid)
> > > > + WARN_ON(ktime_to_ms(ktime_sub(now, vq->last_add_time))
> > > > + > 100);
> > > > + vq->last_add_time = now;
> > > > + vq->last_add_time_valid = true;
> > > > + }
> > > > +#endif
> > > > +
> > > > + BUG_ON(total_sg == 0);
> > > > +
> > > > + head = vq->next_avail_idx;
> > > > + wrap_counter = vq->wrap_counter;
> > > > +
> > > > + /* If the host supports indirect descriptor tables, and we have multiple
> > > > + * buffers, then go indirect. FIXME: tune this threshold */
> > > > + if (vq->indirect && total_sg > 1 && vq->vq.num_free)
> > > Let's introduce a helper like virtqueue_need_indirect() to avoid duplicating
> > > codes and FIXME.
> > Okay.
> >
> > > > + desc = alloc_indirect_packed(_vq, total_sg, gfp);
> > > > + else {
> > > > + desc = NULL;
> > > > + WARN_ON_ONCE(total_sg > vq->vring_packed.num && !vq->indirect);
> > > > + }
> > > > +
> > > > + if (desc) {
> > > > + /* Use a single buffer which doesn't continue */
> > > > + indirect = true;
> > > > + /* Set up rest to use this indirect table. */
> > > > + i = 0;
> > > > + descs_used = 1;
> > > > + } else {
> > > > + indirect = false;
> > > > + desc = vq->vring_packed.desc;
> > > > + i = head;
> > > > + descs_used = total_sg;
> > > > + }
> > > > +
> > > > + if (vq->vq.num_free < descs_used) {
> > > > + pr_debug("Can't add buf len %i - avail = %i\n",
> > > > + descs_used, vq->vq.num_free);
> > > > + /* FIXME: for historical reasons, we force a notify here if
> > > > + * there are outgoing parts to the buffer. Presumably the
> > > > + * host should service the ring ASAP. */
> > > > + if (out_sgs)
> > > > + vq->notify(&vq->vq);
> > > > + if (indirect)
> > > > + kfree(desc);
> > > > + END_USE(vq);
> > > > + return -ENOSPC;
> > > > + }
> > > > +
> > > > + for (n = 0; n < out_sgs + in_sgs; n++) {
> > > > + for (sg = sgs[n]; sg; sg = sg_next(sg)) {
> > > > + dma_addr_t addr = vring_map_one_sg(vq, sg, n < out_sgs ?
> > > > + DMA_TO_DEVICE : DMA_FROM_DEVICE);
> > > > + if (vring_mapping_error(vq, addr))
> > > > + goto unmap_release;
> > > > +
> > > > + flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT |
> > > > + (n < out_sgs ? 0 : VRING_DESC_F_WRITE) |
> > > > + VRING_DESC_F_AVAIL(vq->wrap_counter) |
> > > > + VRING_DESC_F_USED(!vq->wrap_counter));
> > > > + if (!indirect && i == head)
> > > > + head_flags = flags;
> > > > + else
> > > > + desc[i].flags = flags;
> > > > +
> > > > + desc[i].addr = cpu_to_virtio64(_vq->vdev, addr);
> > > > + desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length);
> > > > + desc[i].id = cpu_to_virtio32(_vq->vdev, head);
> > > Similar to V1, we only need this for the last descriptor.
> > Okay, will just set it for the last desc.
> >
> > > > + prev = i;
> > > It looks to me there's no need to track prev inside the loop here.
> > >
> > > > + i++;
> > > > + if (!indirect && i >= vq->vring_packed.num) {
> > > > + i = 0;
> > > > + vq->wrap_counter ^= 1;
> > > > + }
> > > > + }
> > > > + }
> > > > + /* Last one doesn't continue. */
> > > > + if (total_sg == 1)
> > > > + head_flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT);
> > > > + else
> > > > + desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT);
> > > The only case when prev != i - 1 is i == 0, we can add a if here.
> > It's just a mirror of the existing implementation in split ring.
> > It seems that split ring implementation needs this just because
> > it's much harder for it to find the prev, which is not true for
> > packed ring. So I'll take your suggestion. Thanks!
> >
> > [...]
> > > > +static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
> > > > +{
> > > > + struct vring_virtqueue *vq = to_vvq(_vq);
> > > > + u16 new, old, off_wrap;
> > > > + bool needs_kick;
> > > > +
> > > > + START_USE(vq);
> > > > + /* We need to expose the new flags value before checking notification
> > > > + * suppressions. */
> > > > + virtio_mb(vq->weak_barriers);
> > > > +
> > > > + old = vq->next_avail_idx - vq->num_added;
> > > > + new = vq->next_avail_idx;
> > > > + vq->num_added = 0;
> > > > +
> > > > +#ifdef DEBUG
> > > > + if (vq->last_add_time_valid) {
> > > > + WARN_ON(ktime_to_ms(ktime_sub(ktime_get(),
> > > > + vq->last_add_time)) > 100);
> > > > + }
> > > > + vq->last_add_time_valid = false;
> > > > +#endif
> > > > +
> > > > + off_wrap = virtio16_to_cpu(_vq->vdev, vq->vring_packed.device->off_wrap);
> > > > +
> > > > + if (vq->event) {
> > > It looks to me we should examine RING_EVENT_FLAGS_DESC in desc_event_flags
> > > instead of vq->event here. Spec does not forces to use evenf_off and
> > > event_wrap if event index is enabled.
> > >
> > > > + // FIXME: fix this!
> > > > + needs_kick = ((off_wrap >> 15) == vq->wrap_counter) &&
> > > > + vring_need_event(off_wrap & ~(1<<15), new, old);
> > > Why need a & here?
> > Because wrap_counter (the most significant bit in off_wrap)
> > isn't part of the index.
> >
> > > > + } else {
> > > Need a smp_rmb() to make sure desc_event_flags was checked before flags.
> > I don't get your point, if my understanding is correct,
> > desc_event_flags is vq->vring_packed.device->flags. So
> > what's the "flags"?
>
> Sorry, I mean we need check device.flags before off_warp. So it needs an
> smp_rmb() in the middle.
It's best to just read all flags atomically as u32.
> It looks to me there's no guarantee that
> VRING_EVENT_F_DESC is set if event index is supported.
>
> >
> > > > + needs_kick = (vq->vring_packed.device->flags !=
> > > > + cpu_to_virtio16(_vq->vdev, VRING_EVENT_F_DISABLE));
> > > > + }
> > > > + END_USE(vq);
> > > > + return needs_kick;
> > > > +}
> > [...]
> > > > +static int detach_buf_packed(struct vring_virtqueue *vq, unsigned int head,
> > > > + void **ctx)
> > > > +{
> > > > + struct vring_packed_desc *desc;
> > > > + unsigned int i, j;
> > > > +
> > > > + /* Clear data ptr. */
> > > > + vq->desc_state[head].data = NULL;
> > > > +
> > > > + i = head;
> > > > +
> > > > + for (j = 0; j < vq->desc_state[head].num; j++) {
> > > > + desc = &vq->vring_packed.desc[i];
> > > > + vring_unmap_one_packed(vq, desc);
> > > > + desc->flags = 0x0;
> > > Looks like this is unnecessary.
> > It's safer to zero it. If we don't zero it, after we
> > call virtqueue_detach_unused_buf_packed() which calls
> > this function, the desc is still available to the
> > device.
>
> Well detach_unused_buf_packed() should be called after device is stopped,
> otherwise even if you try to clear, there will still be a window that device
> may use it.
>
> >
> > > > + i++;
> > > > + if (i >= vq->vring_packed.num)
> > > > + i = 0;
> > > > + }
> > [...]
> > > > +static unsigned virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq)
> > > > +{
> > > > + struct vring_virtqueue *vq = to_vvq(_vq);
> > > > + u16 last_used_idx, wrap_counter, off_wrap;
> > > > +
> > > > + START_USE(vq);
> > > > +
> > > > + last_used_idx = vq->last_used_idx;
> > > > + wrap_counter = vq->wrap_counter;
> > > > +
> > > > + if (last_used_idx > vq->next_avail_idx)
> > > > + wrap_counter ^= 1;
> > > > +
> > > > + off_wrap = last_used_idx | (wrap_counter << 15);
> > > > +
> > > > + /* We optimistically turn back on interrupts, then check if there was
> > > > + * more to do. */
> > > > + /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to
> > > > + * either clear the flags bit or point the event index at the next
> > > > + * entry. Always do both to keep code simple. */
> > > > + if (vq->event_flags_shadow == VRING_EVENT_F_DISABLE) {
> > > > + vq->event_flags_shadow = vq->event ? VRING_EVENT_F_DESC:
> > > > + VRING_EVENT_F_ENABLE;
> > > > + vq->vring_packed.driver->flags = cpu_to_virtio16(_vq->vdev,
> > > > + vq->event_flags_shadow);
> > > > + }
> > > A smp_wmb() is missed here?
> > >
> > > > + vq->vring_packed.driver->off_wrap = cpu_to_virtio16(_vq->vdev, off_wrap);
> > > And according to the spec, it looks to me write a VRING_EVENT_F_ENABLE is
> > > sufficient here.
> > I didn't think much when implementing the event suppression
> > for packed ring previously. After I saw your comments, I found
> > something new. Indeed, unlike the split ring, for the packed
> > ring, spec doesn't say we must use VRING_EVENT_F_DESC when
> > EVENT_IDX is negotiated. So do you think below thought is
> > right or makes sense?
> >
> > - For virtqueue_enable_cb_prepare(), we just need to enable
> > the ring by setting flags to VRING_EVENT_F_ENABLE in any
> > case.
> >
> > - We will try to use VRING_EVENT_F_DESC (if EVENT_IDX is
> > negotiated) only when we want to delay the interrupts
> > virtqueue_enable_cb_delayed().
>
> This looks good to me.
I suspect this will lead to extra interrupts if host is fast.
So I think for now we should always use VRING_EVENT_F_DESC
if EVENT_IDX is negotiated.
VRING_EVENT_F_DISABLE makes more sense to me.
> >
> > > > + END_USE(vq);
> > > > + return last_used_idx;
> > > > +}
> > > > +
> > [...]
> > > > @@ -1157,14 +1852,18 @@ void vring_transport_features(struct virtio_device *vdev)
> > > > for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) {
> > > > switch (i) {
> > > > - case VIRTIO_RING_F_INDIRECT_DESC:
> > > > +#if 0
> > > > + case VIRTIO_RING_F_INDIRECT_DESC: // FIXME not tested yet.
> > > > break;
> > > > - case VIRTIO_RING_F_EVENT_IDX:
> > > > + case VIRTIO_RING_F_EVENT_IDX: // FIXME probably not work.
> > > > break;
> > > > +#endif
> > > It would be better if you can split EVENT_IDX and INDIRECT_DESC into
> > > separate patches too.
> > Sure. Will do it in the next version.
> >
> > Thanks for the review!
>
> Thanks.
>
> > > Thanks
> > >
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply
* Re: [RFC v2] virtio: support packed ring
From: Jason Wang @ 2018-04-17 2:11 UTC (permalink / raw)
To: Tiwei Bie; +Cc: mst, netdev, linux-kernel, virtualization, wexu
In-Reply-To: <20180413071529.f4esh654dakodf4f@debian>
On 2018年04月13日 15:15, Tiwei Bie wrote:
> On Fri, Apr 13, 2018 at 12:30:24PM +0800, Jason Wang wrote:
>> On 2018年04月01日 22:12, Tiwei Bie wrote:
>>> Hello everyone,
>>>
>>> This RFC implements packed ring support for virtio driver.
>>>
>>> The code was tested with DPDK vhost (testpmd/vhost-PMD) implemented
>>> by Jens at http://dpdk.org/ml/archives/dev/2018-January/089417.html
>>> Minor changes are needed for the vhost code, e.g. to kick the guest.
>>>
>>> TODO:
>>> - Refinements and bug fixes;
>>> - Split into small patches;
>>> - Test indirect descriptor support;
>>> - Test/fix event suppression support;
>>> - Test devices other than net;
>>>
>>> RFC v1 -> RFC v2:
>>> - Add indirect descriptor support - compile test only;
>>> - Add event suppression supprt - compile test only;
>>> - Move vring_packed_init() out of uapi (Jason, MST);
>>> - Merge two loops into one in virtqueue_add_packed() (Jason);
>>> - Split vring_unmap_one() for packed ring and split ring (Jason);
>>> - Avoid using '%' operator (Jason);
>>> - Rename free_head -> next_avail_idx (Jason);
>>> - Add comments for virtio_wmb() in virtqueue_add_packed() (Jason);
>>> - Some other refinements and bug fixes;
>>>
>>> Thanks!
>>>
>>> Signed-off-by: Tiwei Bie <tiwei.bie@intel.com>
>>> ---
>>> drivers/virtio/virtio_ring.c | 1094 +++++++++++++++++++++++++++++-------
>>> include/linux/virtio_ring.h | 8 +-
>>> include/uapi/linux/virtio_config.h | 12 +-
>>> include/uapi/linux/virtio_ring.h | 61 ++
>>> 4 files changed, 980 insertions(+), 195 deletions(-)
> [...]
>>> +static struct vring_packed_desc *alloc_indirect_packed(struct virtqueue *_vq,
>>> + unsigned int total_sg,
>>> + gfp_t gfp)
>>> +{
>>> + struct vring_packed_desc *desc;
>>> +
>>> + /*
>>> + * We require lowmem mappings for the descriptors because
>>> + * otherwise virt_to_phys will give us bogus addresses in the
>>> + * virtqueue.
>>> + */
>>> + gfp &= ~__GFP_HIGHMEM;
>>> +
>>> + desc = kmalloc(total_sg * sizeof(struct vring_packed_desc), gfp);
>> Can we simply check vq->packed here to avoid duplicating helpers?
> Then it would be something like this:
>
> static void *alloc_indirect(struct virtqueue *_vq, unsigned int total_sg,
> gfp_t gfp)
> {
> struct vring_virtqueue *vq = to_vvq(_vq);
> void *data;
>
> /*
> * We require lowmem mappings for the descriptors because
> * otherwise virt_to_phys will give us bogus addresses in the
> * virtqueue.
> */
> gfp &= ~__GFP_HIGHMEM;
>
> if (vq->packed) {
> data = kmalloc(total_sg * sizeof(struct vring_packed_desc),
> gfp);
> if (!data)
> return NULL;
> } else {
> struct vring_desc *desc;
> unsigned int i;
>
> desc = kmalloc(total_sg * sizeof(struct vring_desc), gfp);
> if (!desc)
> return NULL;
>
> for (i = 0; i < total_sg; i++)
> desc[i].next = cpu_to_virtio16(_vq->vdev, i + 1);
>
> data = desc;
> }
>
> return data;
> }
>
> I would prefer to have two simpler helpers (and to the callers,
> it's already very clear about which one they should call), i.e.
> the current implementation:
>
> static struct vring_packed_desc *alloc_indirect_packed(struct virtqueue *_vq,
> unsigned int total_sg,
> gfp_t gfp)
> {
> struct vring_packed_desc *desc;
>
> /*
> * We require lowmem mappings for the descriptors because
> * otherwise virt_to_phys will give us bogus addresses in the
> * virtqueue.
> */
> gfp &= ~__GFP_HIGHMEM;
>
> desc = kmalloc(total_sg * sizeof(struct vring_packed_desc), gfp);
>
> return desc;
> }
>
> static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq,
> unsigned int total_sg,
> gfp_t gfp)
> {
> struct vring_desc *desc;
> unsigned int i;
>
> /*
> * We require lowmem mappings for the descriptors because
> * otherwise virt_to_phys will give us bogus addresses in the
> * virtqueue.
> */
> gfp &= ~__GFP_HIGHMEM;
>
> desc = kmalloc(total_sg * sizeof(struct vring_desc), gfp);
> if (!desc)
> return NULL;
>
> for (i = 0; i < total_sg; i++)
> desc[i].next = cpu_to_virtio16(_vq->vdev, i + 1);
> return desc;
> }
Yeah, I miss that split version needs a desc list.
>
>>> +
>>> + return desc;
>>> +}
> [...]
>>> +static inline int virtqueue_add_packed(struct virtqueue *_vq,
>>> + struct scatterlist *sgs[],
>>> + unsigned int total_sg,
>>> + unsigned int out_sgs,
>>> + unsigned int in_sgs,
>>> + void *data,
>>> + void *ctx,
>>> + gfp_t gfp)
>>> +{
>>> + struct vring_virtqueue *vq = to_vvq(_vq);
>>> + struct vring_packed_desc *desc;
>>> + struct scatterlist *sg;
>>> + unsigned int i, n, descs_used, uninitialized_var(prev), err_idx;
>>> + __virtio16 uninitialized_var(head_flags), flags;
>>> + int head, wrap_counter;
>>> + bool indirect;
>>> +
>>> + START_USE(vq);
>>> +
>>> + BUG_ON(data == NULL);
>>> + BUG_ON(ctx && vq->indirect);
>>> +
>>> + if (unlikely(vq->broken)) {
>>> + END_USE(vq);
>>> + return -EIO;
>>> + }
>>> +
>>> +#ifdef DEBUG
>>> + {
>>> + ktime_t now = ktime_get();
>>> +
>>> + /* No kick or get, with .1 second between? Warn. */
>>> + if (vq->last_add_time_valid)
>>> + WARN_ON(ktime_to_ms(ktime_sub(now, vq->last_add_time))
>>> + > 100);
>>> + vq->last_add_time = now;
>>> + vq->last_add_time_valid = true;
>>> + }
>>> +#endif
>>> +
>>> + BUG_ON(total_sg == 0);
>>> +
>>> + head = vq->next_avail_idx;
>>> + wrap_counter = vq->wrap_counter;
>>> +
>>> + /* If the host supports indirect descriptor tables, and we have multiple
>>> + * buffers, then go indirect. FIXME: tune this threshold */
>>> + if (vq->indirect && total_sg > 1 && vq->vq.num_free)
>> Let's introduce a helper like virtqueue_need_indirect() to avoid duplicating
>> codes and FIXME.
> Okay.
>
>>> + desc = alloc_indirect_packed(_vq, total_sg, gfp);
>>> + else {
>>> + desc = NULL;
>>> + WARN_ON_ONCE(total_sg > vq->vring_packed.num && !vq->indirect);
>>> + }
>>> +
>>> + if (desc) {
>>> + /* Use a single buffer which doesn't continue */
>>> + indirect = true;
>>> + /* Set up rest to use this indirect table. */
>>> + i = 0;
>>> + descs_used = 1;
>>> + } else {
>>> + indirect = false;
>>> + desc = vq->vring_packed.desc;
>>> + i = head;
>>> + descs_used = total_sg;
>>> + }
>>> +
>>> + if (vq->vq.num_free < descs_used) {
>>> + pr_debug("Can't add buf len %i - avail = %i\n",
>>> + descs_used, vq->vq.num_free);
>>> + /* FIXME: for historical reasons, we force a notify here if
>>> + * there are outgoing parts to the buffer. Presumably the
>>> + * host should service the ring ASAP. */
>>> + if (out_sgs)
>>> + vq->notify(&vq->vq);
>>> + if (indirect)
>>> + kfree(desc);
>>> + END_USE(vq);
>>> + return -ENOSPC;
>>> + }
>>> +
>>> + for (n = 0; n < out_sgs + in_sgs; n++) {
>>> + for (sg = sgs[n]; sg; sg = sg_next(sg)) {
>>> + dma_addr_t addr = vring_map_one_sg(vq, sg, n < out_sgs ?
>>> + DMA_TO_DEVICE : DMA_FROM_DEVICE);
>>> + if (vring_mapping_error(vq, addr))
>>> + goto unmap_release;
>>> +
>>> + flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT |
>>> + (n < out_sgs ? 0 : VRING_DESC_F_WRITE) |
>>> + VRING_DESC_F_AVAIL(vq->wrap_counter) |
>>> + VRING_DESC_F_USED(!vq->wrap_counter));
>>> + if (!indirect && i == head)
>>> + head_flags = flags;
>>> + else
>>> + desc[i].flags = flags;
>>> +
>>> + desc[i].addr = cpu_to_virtio64(_vq->vdev, addr);
>>> + desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length);
>>> + desc[i].id = cpu_to_virtio32(_vq->vdev, head);
>> Similar to V1, we only need this for the last descriptor.
> Okay, will just set it for the last desc.
>
>>> + prev = i;
>> It looks to me there's no need to track prev inside the loop here.
>>
>>> + i++;
>>> + if (!indirect && i >= vq->vring_packed.num) {
>>> + i = 0;
>>> + vq->wrap_counter ^= 1;
>>> + }
>>> + }
>>> + }
>>> + /* Last one doesn't continue. */
>>> + if (total_sg == 1)
>>> + head_flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT);
>>> + else
>>> + desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT);
>> The only case when prev != i - 1 is i == 0, we can add a if here.
> It's just a mirror of the existing implementation in split ring.
> It seems that split ring implementation needs this just because
> it's much harder for it to find the prev, which is not true for
> packed ring. So I'll take your suggestion. Thanks!
>
> [...]
>>> +static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
>>> +{
>>> + struct vring_virtqueue *vq = to_vvq(_vq);
>>> + u16 new, old, off_wrap;
>>> + bool needs_kick;
>>> +
>>> + START_USE(vq);
>>> + /* We need to expose the new flags value before checking notification
>>> + * suppressions. */
>>> + virtio_mb(vq->weak_barriers);
>>> +
>>> + old = vq->next_avail_idx - vq->num_added;
>>> + new = vq->next_avail_idx;
>>> + vq->num_added = 0;
>>> +
>>> +#ifdef DEBUG
>>> + if (vq->last_add_time_valid) {
>>> + WARN_ON(ktime_to_ms(ktime_sub(ktime_get(),
>>> + vq->last_add_time)) > 100);
>>> + }
>>> + vq->last_add_time_valid = false;
>>> +#endif
>>> +
>>> + off_wrap = virtio16_to_cpu(_vq->vdev, vq->vring_packed.device->off_wrap);
>>> +
>>> + if (vq->event) {
>> It looks to me we should examine RING_EVENT_FLAGS_DESC in desc_event_flags
>> instead of vq->event here. Spec does not forces to use evenf_off and
>> event_wrap if event index is enabled.
>>
>>> + // FIXME: fix this!
>>> + needs_kick = ((off_wrap >> 15) == vq->wrap_counter) &&
>>> + vring_need_event(off_wrap & ~(1<<15), new, old);
>> Why need a & here?
> Because wrap_counter (the most significant bit in off_wrap)
> isn't part of the index.
>
>>> + } else {
>> Need a smp_rmb() to make sure desc_event_flags was checked before flags.
> I don't get your point, if my understanding is correct,
> desc_event_flags is vq->vring_packed.device->flags. So
> what's the "flags"?
Sorry, I mean we need check device.flags before off_warp. So it needs an
smp_rmb() in the middle. It looks to me there's no guarantee that
VRING_EVENT_F_DESC is set if event index is supported.
>
>>> + needs_kick = (vq->vring_packed.device->flags !=
>>> + cpu_to_virtio16(_vq->vdev, VRING_EVENT_F_DISABLE));
>>> + }
>>> + END_USE(vq);
>>> + return needs_kick;
>>> +}
> [...]
>>> +static int detach_buf_packed(struct vring_virtqueue *vq, unsigned int head,
>>> + void **ctx)
>>> +{
>>> + struct vring_packed_desc *desc;
>>> + unsigned int i, j;
>>> +
>>> + /* Clear data ptr. */
>>> + vq->desc_state[head].data = NULL;
>>> +
>>> + i = head;
>>> +
>>> + for (j = 0; j < vq->desc_state[head].num; j++) {
>>> + desc = &vq->vring_packed.desc[i];
>>> + vring_unmap_one_packed(vq, desc);
>>> + desc->flags = 0x0;
>> Looks like this is unnecessary.
> It's safer to zero it. If we don't zero it, after we
> call virtqueue_detach_unused_buf_packed() which calls
> this function, the desc is still available to the
> device.
Well detach_unused_buf_packed() should be called after device is
stopped, otherwise even if you try to clear, there will still be a
window that device may use it.
>
>>> + i++;
>>> + if (i >= vq->vring_packed.num)
>>> + i = 0;
>>> + }
> [...]
>>> +static unsigned virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq)
>>> +{
>>> + struct vring_virtqueue *vq = to_vvq(_vq);
>>> + u16 last_used_idx, wrap_counter, off_wrap;
>>> +
>>> + START_USE(vq);
>>> +
>>> + last_used_idx = vq->last_used_idx;
>>> + wrap_counter = vq->wrap_counter;
>>> +
>>> + if (last_used_idx > vq->next_avail_idx)
>>> + wrap_counter ^= 1;
>>> +
>>> + off_wrap = last_used_idx | (wrap_counter << 15);
>>> +
>>> + /* We optimistically turn back on interrupts, then check if there was
>>> + * more to do. */
>>> + /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to
>>> + * either clear the flags bit or point the event index at the next
>>> + * entry. Always do both to keep code simple. */
>>> + if (vq->event_flags_shadow == VRING_EVENT_F_DISABLE) {
>>> + vq->event_flags_shadow = vq->event ? VRING_EVENT_F_DESC:
>>> + VRING_EVENT_F_ENABLE;
>>> + vq->vring_packed.driver->flags = cpu_to_virtio16(_vq->vdev,
>>> + vq->event_flags_shadow);
>>> + }
>> A smp_wmb() is missed here?
>>
>>> + vq->vring_packed.driver->off_wrap = cpu_to_virtio16(_vq->vdev, off_wrap);
>> And according to the spec, it looks to me write a VRING_EVENT_F_ENABLE is
>> sufficient here.
> I didn't think much when implementing the event suppression
> for packed ring previously. After I saw your comments, I found
> something new. Indeed, unlike the split ring, for the packed
> ring, spec doesn't say we must use VRING_EVENT_F_DESC when
> EVENT_IDX is negotiated. So do you think below thought is
> right or makes sense?
>
> - For virtqueue_enable_cb_prepare(), we just need to enable
> the ring by setting flags to VRING_EVENT_F_ENABLE in any
> case.
>
> - We will try to use VRING_EVENT_F_DESC (if EVENT_IDX is
> negotiated) only when we want to delay the interrupts
> virtqueue_enable_cb_delayed().
This looks good to me.
>
>>> + END_USE(vq);
>>> + return last_used_idx;
>>> +}
>>> +
> [...]
>>> @@ -1157,14 +1852,18 @@ void vring_transport_features(struct virtio_device *vdev)
>>> for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) {
>>> switch (i) {
>>> - case VIRTIO_RING_F_INDIRECT_DESC:
>>> +#if 0
>>> + case VIRTIO_RING_F_INDIRECT_DESC: // FIXME not tested yet.
>>> break;
>>> - case VIRTIO_RING_F_EVENT_IDX:
>>> + case VIRTIO_RING_F_EVENT_IDX: // FIXME probably not work.
>>> break;
>>> +#endif
>> It would be better if you can split EVENT_IDX and INDIRECT_DESC into
>> separate patches too.
> Sure. Will do it in the next version.
>
> Thanks for the review!
Thanks.
>> Thanks
>>
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply
* Re: [PATCH net-next 5/5] macvlan/macvtap: Add support for SCTP checksum offload.
From: Michael S. Tsirkin @ 2018-04-16 17:14 UTC (permalink / raw)
To: Vladislav Yasevich; +Cc: nhorman, netdev, virtualization, linux-sctp
In-Reply-To: <20180402134006.10111-6-vyasevic@redhat.com>
On Mon, Apr 02, 2018 at 09:40:06AM -0400, Vladislav Yasevich wrote:
> Since we now have support for software CRC32c offload, turn it on
> for macvlan and macvtap devices so that guests can take advantage
> of offload SCTP checksums to the host or host hardware.
>
> Signed-off-by: Vladislav Yasevich <vyasevic@redhat.com>
> ---
> drivers/net/macvlan.c | 5 +++--
> drivers/net/tap.c | 8 +++++---
> include/uapi/linux/if_tun.h | 1 +
> 3 files changed, 9 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
> index 725f4b4..646b730 100644
> --- a/drivers/net/macvlan.c
> +++ b/drivers/net/macvlan.c
> @@ -834,7 +834,7 @@ static struct lock_class_key macvlan_netdev_addr_lock_key;
>
> #define ALWAYS_ON_OFFLOADS \
> (NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE | \
> - NETIF_F_GSO_ROBUST | NETIF_F_GSO_ENCAP_ALL)
> + NETIF_F_GSO_ROBUST | NETIF_F_GSO_ENCAP_ALL | NETIF_F_SCTP_CRC)
>
> #define ALWAYS_ON_FEATURES (ALWAYS_ON_OFFLOADS | NETIF_F_LLTX)
>
> @@ -842,7 +842,8 @@ static struct lock_class_key macvlan_netdev_addr_lock_key;
> (NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \
> NETIF_F_GSO | NETIF_F_TSO | NETIF_F_LRO | \
> NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_GRO | NETIF_F_RXCSUM | \
> - NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER)
> + NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER | \
> + NETIF_F_SCTP_CRC)
>
> #define MACVLAN_STATE_MASK \
> ((1<<__LINK_STATE_NOCARRIER) | (1<<__LINK_STATE_DORMANT))
> diff --git a/drivers/net/tap.c b/drivers/net/tap.c
> index 9b6cb78..2c8512b 100644
> --- a/drivers/net/tap.c
> +++ b/drivers/net/tap.c
> @@ -369,8 +369,7 @@ rx_handler_result_t tap_handle_frame(struct sk_buff **pskb)
> * check, we either support them all or none.
> */
> if (skb->ip_summed == CHECKSUM_PARTIAL &&
> - !(features & NETIF_F_CSUM_MASK) &&
> - skb_checksum_help(skb))
> + skb_csum_hwoffload_help(skb, features))
> goto drop;
> if (ptr_ring_produce(&q->ring, skb))
> goto drop;
> @@ -945,6 +944,9 @@ static int set_offload(struct tap_queue *q, unsigned long arg)
> }
> }
>
> + if (arg & TUN_F_SCTP_CSUM)
> + feature_mask |= NETIF_F_SCTP_CRC;
> +
> /* tun/tap driver inverts the usage for TSO offloads, where
> * setting the TSO bit means that the userspace wants to
> * accept TSO frames and turning it off means that user space
Does not the above comment apply to the new flag too? It seems that
it's value should be inverted for macvtap, and isn't, here.
> @@ -1077,7 +1079,7 @@ static long tap_ioctl(struct file *file, unsigned int cmd,
> case TUNSETOFFLOAD:
> /* let the user check for future flags */
> if (arg & ~(TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 |
> - TUN_F_TSO_ECN | TUN_F_UFO))
> + TUN_F_TSO_ECN | TUN_F_UFO | TUN_F_SCTP_CSUM))
> return -EINVAL;
>
> rtnl_lock();
> diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h
> index ee432cd..c3bb282 100644
> --- a/include/uapi/linux/if_tun.h
> +++ b/include/uapi/linux/if_tun.h
> @@ -86,6 +86,7 @@
> #define TUN_F_TSO6 0x04 /* I can handle TSO for IPv6 packets */
> #define TUN_F_TSO_ECN 0x08 /* I can handle TSO with ECN bits. */
> #define TUN_F_UFO 0x10 /* I can handle UFO packets */
> +#define TUN_F_SCTP_CSUM 0x20 /* I can handle SCTP checksum offload */
>
> /* Protocol info prepended to the packets (when IFF_NO_PI is not set) */
> #define TUN_PKT_STRIP 0x0001
Doesn't this belong in the previous patch?
> 2.9.5
^ permalink raw reply
* Re: [PATCH net-next 4/5] tun: Add support for SCTP checksum offload
From: Michael S. Tsirkin @ 2018-04-16 17:12 UTC (permalink / raw)
To: Vladislav Yasevich; +Cc: nhorman, netdev, virtualization, linux-sctp
In-Reply-To: <20180402134006.10111-5-vyasevic@redhat.com>
On Mon, Apr 02, 2018 at 09:40:05AM -0400, Vladislav Yasevich wrote:
> Adds a new tun offload flag to allow for SCTP checksum offload.
> The flag has to be set by the user and defaults to "no offload".
>
> Signed-off-by: Vladislav Yasevich <vyasevic@redhat.com>
When would user set this flag? Wouldn't that be when
userspace is ready to get SCTP packets without a checksum?
Seems to be this is an indication that when userspace
is qemu running a guest, said guest needs to communicate
the new ability to qemu.
> ---
> drivers/net/tun.c | 5 +++++
> 1 file changed, 5 insertions(+)
>
> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> index a1ba262..263bcbe 100644
> --- a/drivers/net/tun.c
> +++ b/drivers/net/tun.c
> @@ -2719,6 +2719,11 @@ static int set_offload(struct tun_struct *tun, unsigned long arg)
> arg &= ~TUN_F_UFO;
> }
>
> + if (arg & TUN_F_SCTP_CSUM) {
> + features |= NETIF_F_SCTP_CRC;
> + arg &= ~TUN_F_SCTP_CSUM;
> + }
> +
> /* This gives the user a way to test for new features in future by
> * trying to set them. */
> if (arg)
> --
> 2.9.5
^ permalink raw reply
* Re: [PATCH net-next 1/5] virtio: Add support for SCTP checksum offloading
From: Michael S. Tsirkin @ 2018-04-16 17:09 UTC (permalink / raw)
To: Vlad Yasevich
Cc: nhorman, netdev, virtualization, linux-sctp, Vladislav Yasevich
In-Reply-To: <f5655ab8-f24f-a5a5-d000-2bbb90ecc552@redhat.com>
On Mon, Apr 16, 2018 at 09:45:48AM -0400, Vlad Yasevich wrote:
> On 04/11/2018 06:49 PM, Michael S. Tsirkin wrote:
> > On Mon, Apr 02, 2018 at 09:40:02AM -0400, Vladislav Yasevich wrote:
> >> To support SCTP checksum offloading, we need to add a new feature
> >> to virtio_net, so we can negotiate support between the hypervisor
> >> and the guest.
> >>
> >> The signalling to the guest that an alternate checksum needs to
> >> be used is done via a new flag in the virtio_net_hdr. If the
> >> flag is set, the host will know to perform an alternate checksum
> >> calculation, which right now is only CRC32c.
> >>
> >> Signed-off-by: Vladislav Yasevich <vyasevic@redhat.com>
> >> ---
> >> drivers/net/virtio_net.c | 11 ++++++++---
> >> include/linux/virtio_net.h | 6 ++++++
> >> include/uapi/linux/virtio_net.h | 2 ++
> >> 3 files changed, 16 insertions(+), 3 deletions(-)
> >>
> >> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> >> index 7b187ec..b601294 100644
> >> --- a/drivers/net/virtio_net.c
> >> +++ b/drivers/net/virtio_net.c
> >> @@ -2724,9 +2724,14 @@ static int virtnet_probe(struct virtio_device *vdev)
> >> /* Do we support "hardware" checksums? */
> >> if (virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) {
> >> /* This opens up the world of extra features. */
> >> - dev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_SG;
> >> + netdev_features_t sctp = 0;
> >> +
> >> + if (virtio_has_feature(vdev, VIRTIO_NET_F_SCTP_CSUM))
> >> + sctp |= NETIF_F_SCTP_CRC;
> >> +
> >> + dev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_SG | sctp;
> >> if (csum)
> >> - dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG;
> >> + dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG | sctp;
> >>
> >> if (virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) {
> >> dev->hw_features |= NETIF_F_TSO
> >> @@ -2952,7 +2957,7 @@ static struct virtio_device_id id_table[] = {
> >> };
> >>
> >> #define VIRTNET_FEATURES \
> >> - VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, \
> >> + VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, VIRTIO_NET_F_SCTP_CSUM, \
> >> VIRTIO_NET_F_MAC, \
> >> VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, \
> >> VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, \
> >> diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
> >> index f144216..2e7a64a 100644
> >> --- a/include/linux/virtio_net.h
> >> +++ b/include/linux/virtio_net.h
> >> @@ -39,6 +39,9 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
> >>
> >> if (!skb_partial_csum_set(skb, start, off))
> >> return -EINVAL;
> >> +
> >> + if (hdr->flags & VIRTIO_NET_HDR_F_CSUM_NOT_INET)
> >> + skb->csum_not_inet = 1;
> >> }
> >>
> >> if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
> >> @@ -96,6 +99,9 @@ static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb,
> >> hdr->flags = VIRTIO_NET_HDR_F_DATA_VALID;
> >> } /* else everything is zero */
> >>
> >> + if (skb->csum_not_inet)
> >> + hdr->flags &= VIRTIO_NET_HDR_F_CSUM_NOT_INET;
> >> +
> >> return 0;
> >> }
> >>
> >> diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h
> >> index 5de6ed3..3f279c8 100644
> >> --- a/include/uapi/linux/virtio_net.h
> >> +++ b/include/uapi/linux/virtio_net.h
> >> @@ -36,6 +36,7 @@
> >> #define VIRTIO_NET_F_GUEST_CSUM 1 /* Guest handles pkts w/ partial csum */
> >> #define VIRTIO_NET_F_CTRL_GUEST_OFFLOADS 2 /* Dynamic offload configuration. */
> >> #define VIRTIO_NET_F_MTU 3 /* Initial MTU advice */
> >> +#define VIRTIO_NET_F_SCTP_CSUM 4 /* SCTP checksum offload support */
> >> #define VIRTIO_NET_F_MAC 5 /* Host has given MAC address. */
> >> #define VIRTIO_NET_F_GUEST_TSO4 7 /* Guest can handle TSOv4 in. */
> >> #define VIRTIO_NET_F_GUEST_TSO6 8 /* Guest can handle TSOv6 in. */
> >
> > Is this a guest or a host checksum? We should differenciate between the
> > two.
>
> I suppose this is HOST checksum, since it behaves like VIRTIO_NET_F_CSUM only for
> SCTP. I couldn't find the use for the GUEST side flag, since there technically
> isn't any validations and there is no additional mappings from VIRTIO flag to a
> NETIF flag.
>
> If the feature is negotiated, the guest ends up generating partially check-summed
> packets, and the host turns on appropriate flags on it's side. The host will
> also make sure the checksum if fixed up if the guest doesn't support it.
> So 1 flag is currently all that is needed.
>
> -vlad
I see code handling VIRTIO_NET_HDR_F_CSUM_NOT_INET on RX side. Host
needs to know whether it's ok/worth it to set this flag, too.
> >
> >
> >> @@ -101,6 +102,7 @@ struct virtio_net_config {
> >> struct virtio_net_hdr_v1 {
> >> #define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 /* Use csum_start, csum_offset */
> >> #define VIRTIO_NET_HDR_F_DATA_VALID 2 /* Csum is valid */
> >> +#define VIRTIO_NET_HDR_F_CSUM_NOT_INET 4 /* Checksum is not inet */
> >> __u8 flags;
> >> #define VIRTIO_NET_HDR_GSO_NONE 0 /* Not a GSO frame */
> >> #define VIRTIO_NET_HDR_GSO_TCPV4 1 /* GSO frame, IPv4 TCP (TSO) */
> >> --
> >> 2.9.5
^ permalink raw reply
* Re: [PATCH net-next 1/5] virtio: Add support for SCTP checksum offloading
From: Michael S. Tsirkin @ 2018-04-16 17:07 UTC (permalink / raw)
To: Vladislav Yasevich; +Cc: nhorman, netdev, virtualization, linux-sctp
In-Reply-To: <20180402134006.10111-2-vyasevic@redhat.com>
On Mon, Apr 02, 2018 at 09:40:02AM -0400, Vladislav Yasevich wrote:
> To support SCTP checksum offloading, we need to add a new feature
> to virtio_net, so we can negotiate support between the hypervisor
> and the guest.
>
> The signalling to the guest that an alternate checksum needs to
> be used is done via a new flag in the virtio_net_hdr. If the
> flag is set, the host will know to perform an alternate checksum
> calculation, which right now is only CRC32c.
>
> Signed-off-by: Vladislav Yasevich <vyasevic@redhat.com>
> ---
> drivers/net/virtio_net.c | 11 ++++++++---
> include/linux/virtio_net.h | 6 ++++++
> include/uapi/linux/virtio_net.h | 2 ++
> 3 files changed, 16 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 7b187ec..b601294 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -2724,9 +2724,14 @@ static int virtnet_probe(struct virtio_device *vdev)
> /* Do we support "hardware" checksums? */
> if (virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) {
> /* This opens up the world of extra features. */
> - dev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_SG;
> + netdev_features_t sctp = 0;
> +
> + if (virtio_has_feature(vdev, VIRTIO_NET_F_SCTP_CSUM))
> + sctp |= NETIF_F_SCTP_CRC;
> +
> + dev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_SG | sctp;
> if (csum)
> - dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG;
> + dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG | sctp;
>
> if (virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) {
> dev->hw_features |= NETIF_F_TSO
> @@ -2952,7 +2957,7 @@ static struct virtio_device_id id_table[] = {
> };
>
> #define VIRTNET_FEATURES \
> - VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, \
> + VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, VIRTIO_NET_F_SCTP_CSUM, \
> VIRTIO_NET_F_MAC, \
> VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, \
> VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, \
> diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
> index f144216..2e7a64a 100644
> --- a/include/linux/virtio_net.h
> +++ b/include/linux/virtio_net.h
> @@ -39,6 +39,9 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
>
> if (!skb_partial_csum_set(skb, start, off))
> return -EINVAL;
> +
> + if (hdr->flags & VIRTIO_NET_HDR_F_CSUM_NOT_INET)
> + skb->csum_not_inet = 1;
> }
>
> if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
Looks like we need a feature flag to tell host guest is able to handle this flag
in incoming packets ...
> @@ -96,6 +99,9 @@ static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb,
> hdr->flags = VIRTIO_NET_HDR_F_DATA_VALID;
> } /* else everything is zero */
Is comment above still correct?
>
> + if (skb->csum_not_inet)
> + hdr->flags &= VIRTIO_NET_HDR_F_CSUM_NOT_INET;
> +
... and a separate flag to tell guest it's ok to set this flag in
outgoing packets.
Also - this chunk looks like a hack fixing up a value we
have set incorrectly.
Specifically this clears all flags except VIRTIO_NET_HDR_F_CSUM_NOT_INET.
Why do this at all? Do we really want to clear e.g. NEEDS_CSUM? I think we do not
since csum_start, csum_offset are set.
Also - what will ever set this flag in the header?
> return 0;
> }
>
> diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h
> index 5de6ed3..3f279c8 100644
> --- a/include/uapi/linux/virtio_net.h
> +++ b/include/uapi/linux/virtio_net.h
> @@ -36,6 +36,7 @@
> #define VIRTIO_NET_F_GUEST_CSUM 1 /* Guest handles pkts w/ partial csum */
> #define VIRTIO_NET_F_CTRL_GUEST_OFFLOADS 2 /* Dynamic offload configuration. */
> #define VIRTIO_NET_F_MTU 3 /* Initial MTU advice */
> +#define VIRTIO_NET_F_SCTP_CSUM 4 /* SCTP checksum offload support */
> #define VIRTIO_NET_F_MAC 5 /* Host has given MAC address. */
> #define VIRTIO_NET_F_GUEST_TSO4 7 /* Guest can handle TSOv4 in. */
> #define VIRTIO_NET_F_GUEST_TSO6 8 /* Guest can handle TSOv6 in. */
> @@ -101,6 +102,7 @@ struct virtio_net_config {
> struct virtio_net_hdr_v1 {
> #define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 /* Use csum_start, csum_offset */
> #define VIRTIO_NET_HDR_F_DATA_VALID 2 /* Csum is valid */
> +#define VIRTIO_NET_HDR_F_CSUM_NOT_INET 4 /* Checksum is not inet */
Not very informative. Instead, please specify what is it actually.
> __u8 flags;
> #define VIRTIO_NET_HDR_GSO_NONE 0 /* Not a GSO frame */
> #define VIRTIO_NET_HDR_GSO_TCPV4 1 /* GSO frame, IPv4 TCP (TSO) */
> --
> 2.9.5
^ permalink raw reply
* Re: [PATCH net-next 1/5] virtio: Add support for SCTP checksum offloading
From: Michael S. Tsirkin @ 2018-04-16 15:52 UTC (permalink / raw)
To: Vlad Yasevich
Cc: nhorman, netdev, virtualization, linux-sctp, Vladislav Yasevich
In-Reply-To: <f5655ab8-f24f-a5a5-d000-2bbb90ecc552@redhat.com>
On Mon, Apr 16, 2018 at 09:45:48AM -0400, Vlad Yasevich wrote:
> On 04/11/2018 06:49 PM, Michael S. Tsirkin wrote:
> > On Mon, Apr 02, 2018 at 09:40:02AM -0400, Vladislav Yasevich wrote:
> >> To support SCTP checksum offloading, we need to add a new feature
> >> to virtio_net, so we can negotiate support between the hypervisor
> >> and the guest.
> >>
> >> The signalling to the guest that an alternate checksum needs to
> >> be used is done via a new flag in the virtio_net_hdr. If the
> >> flag is set, the host will know to perform an alternate checksum
> >> calculation, which right now is only CRC32c.
> >>
> >> Signed-off-by: Vladislav Yasevich <vyasevic@redhat.com>
> >> ---
> >> drivers/net/virtio_net.c | 11 ++++++++---
> >> include/linux/virtio_net.h | 6 ++++++
> >> include/uapi/linux/virtio_net.h | 2 ++
Could you pls include a virtio TC mailing list on uapi changes?
> >> 3 files changed, 16 insertions(+), 3 deletions(-)
> >>
> >> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> >> index 7b187ec..b601294 100644
> >> --- a/drivers/net/virtio_net.c
> >> +++ b/drivers/net/virtio_net.c
> >> @@ -2724,9 +2724,14 @@ static int virtnet_probe(struct virtio_device *vdev)
> >> /* Do we support "hardware" checksums? */
> >> if (virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) {
> >> /* This opens up the world of extra features. */
> >> - dev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_SG;
> >> + netdev_features_t sctp = 0;
> >> +
> >> + if (virtio_has_feature(vdev, VIRTIO_NET_F_SCTP_CSUM))
> >> + sctp |= NETIF_F_SCTP_CRC;
> >> +
> >> + dev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_SG | sctp;
> >> if (csum)
> >> - dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG;
> >> + dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG | sctp;
> >>
> >> if (virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) {
> >> dev->hw_features |= NETIF_F_TSO
> >> @@ -2952,7 +2957,7 @@ static struct virtio_device_id id_table[] = {
> >> };
> >>
> >> #define VIRTNET_FEATURES \
> >> - VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, \
> >> + VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, VIRTIO_NET_F_SCTP_CSUM, \
> >> VIRTIO_NET_F_MAC, \
> >> VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, \
> >> VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, \
> >> diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
> >> index f144216..2e7a64a 100644
> >> --- a/include/linux/virtio_net.h
> >> +++ b/include/linux/virtio_net.h
> >> @@ -39,6 +39,9 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
> >>
> >> if (!skb_partial_csum_set(skb, start, off))
> >> return -EINVAL;
> >> +
> >> + if (hdr->flags & VIRTIO_NET_HDR_F_CSUM_NOT_INET)
> >> + skb->csum_not_inet = 1;
> >> }
> >>
> >> if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
> >> @@ -96,6 +99,9 @@ static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb,
> >> hdr->flags = VIRTIO_NET_HDR_F_DATA_VALID;
> >> } /* else everything is zero */
> >>
> >> + if (skb->csum_not_inet)
> >> + hdr->flags &= VIRTIO_NET_HDR_F_CSUM_NOT_INET;
> >> +
> >> return 0;
> >> }
> >>
> >> diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h
> >> index 5de6ed3..3f279c8 100644
> >> --- a/include/uapi/linux/virtio_net.h
> >> +++ b/include/uapi/linux/virtio_net.h
> >> @@ -36,6 +36,7 @@
> >> #define VIRTIO_NET_F_GUEST_CSUM 1 /* Guest handles pkts w/ partial csum */
> >> #define VIRTIO_NET_F_CTRL_GUEST_OFFLOADS 2 /* Dynamic offload configuration. */
> >> #define VIRTIO_NET_F_MTU 3 /* Initial MTU advice */
> >> +#define VIRTIO_NET_F_SCTP_CSUM 4 /* SCTP checksum offload support */
> >> #define VIRTIO_NET_F_MAC 5 /* Host has given MAC address. */
> >> #define VIRTIO_NET_F_GUEST_TSO4 7 /* Guest can handle TSOv4 in. */
> >> #define VIRTIO_NET_F_GUEST_TSO6 8 /* Guest can handle TSOv6 in. */
> >
> > Is this a guest or a host checksum? We should differenciate between the
> > two.
>
> I suppose this is HOST checksum, since it behaves like VIRTIO_NET_F_CSUM only for
> SCTP. I couldn't find the use for the GUEST side flag, since there technically
> isn't any validations and there is no additional mappings from VIRTIO flag to a
> NETIF flag.
>
> If the feature is negotiated, the guest ends up generating partially check-summed
> packets, and the host turns on appropriate flags on it's side. The host will
> also make sure the checksum if fixed up if the guest doesn't support it.
> So 1 flag is currently all that is needed.
>
> -vlad
Fine, but let's include HOST in there to make things clear.
Also I think we should use a high flag, like 62.
> >
> >
> >> @@ -101,6 +102,7 @@ struct virtio_net_config {
> >> struct virtio_net_hdr_v1 {
> >> #define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 /* Use csum_start, csum_offset */
> >> #define VIRTIO_NET_HDR_F_DATA_VALID 2 /* Csum is valid */
> >> +#define VIRTIO_NET_HDR_F_CSUM_NOT_INET 4 /* Checksum is not inet */
> >> __u8 flags;
> >> #define VIRTIO_NET_HDR_GSO_NONE 0 /* Not a GSO frame */
> >> #define VIRTIO_NET_HDR_GSO_TCPV4 1 /* GSO frame, IPv4 TCP (TSO) */
> >> --
> >> 2.9.5
^ permalink raw reply
* Re: [PATCH net-next 1/5] virtio: Add support for SCTP checksum offloading
From: Vlad Yasevich @ 2018-04-16 13:45 UTC (permalink / raw)
To: Michael S. Tsirkin, Vladislav Yasevich
Cc: netdev, linux-sctp, nhorman, virtualization
In-Reply-To: <20180412014548-mutt-send-email-mst@kernel.org>
On 04/11/2018 06:49 PM, Michael S. Tsirkin wrote:
> On Mon, Apr 02, 2018 at 09:40:02AM -0400, Vladislav Yasevich wrote:
>> To support SCTP checksum offloading, we need to add a new feature
>> to virtio_net, so we can negotiate support between the hypervisor
>> and the guest.
>>
>> The signalling to the guest that an alternate checksum needs to
>> be used is done via a new flag in the virtio_net_hdr. If the
>> flag is set, the host will know to perform an alternate checksum
>> calculation, which right now is only CRC32c.
>>
>> Signed-off-by: Vladislav Yasevich <vyasevic@redhat.com>
>> ---
>> drivers/net/virtio_net.c | 11 ++++++++---
>> include/linux/virtio_net.h | 6 ++++++
>> include/uapi/linux/virtio_net.h | 2 ++
>> 3 files changed, 16 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
>> index 7b187ec..b601294 100644
>> --- a/drivers/net/virtio_net.c
>> +++ b/drivers/net/virtio_net.c
>> @@ -2724,9 +2724,14 @@ static int virtnet_probe(struct virtio_device *vdev)
>> /* Do we support "hardware" checksums? */
>> if (virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) {
>> /* This opens up the world of extra features. */
>> - dev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_SG;
>> + netdev_features_t sctp = 0;
>> +
>> + if (virtio_has_feature(vdev, VIRTIO_NET_F_SCTP_CSUM))
>> + sctp |= NETIF_F_SCTP_CRC;
>> +
>> + dev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_SG | sctp;
>> if (csum)
>> - dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG;
>> + dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG | sctp;
>>
>> if (virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) {
>> dev->hw_features |= NETIF_F_TSO
>> @@ -2952,7 +2957,7 @@ static struct virtio_device_id id_table[] = {
>> };
>>
>> #define VIRTNET_FEATURES \
>> - VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, \
>> + VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, VIRTIO_NET_F_SCTP_CSUM, \
>> VIRTIO_NET_F_MAC, \
>> VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, \
>> VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, \
>> diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
>> index f144216..2e7a64a 100644
>> --- a/include/linux/virtio_net.h
>> +++ b/include/linux/virtio_net.h
>> @@ -39,6 +39,9 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
>>
>> if (!skb_partial_csum_set(skb, start, off))
>> return -EINVAL;
>> +
>> + if (hdr->flags & VIRTIO_NET_HDR_F_CSUM_NOT_INET)
>> + skb->csum_not_inet = 1;
>> }
>>
>> if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
>> @@ -96,6 +99,9 @@ static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb,
>> hdr->flags = VIRTIO_NET_HDR_F_DATA_VALID;
>> } /* else everything is zero */
>>
>> + if (skb->csum_not_inet)
>> + hdr->flags &= VIRTIO_NET_HDR_F_CSUM_NOT_INET;
>> +
>> return 0;
>> }
>>
>> diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h
>> index 5de6ed3..3f279c8 100644
>> --- a/include/uapi/linux/virtio_net.h
>> +++ b/include/uapi/linux/virtio_net.h
>> @@ -36,6 +36,7 @@
>> #define VIRTIO_NET_F_GUEST_CSUM 1 /* Guest handles pkts w/ partial csum */
>> #define VIRTIO_NET_F_CTRL_GUEST_OFFLOADS 2 /* Dynamic offload configuration. */
>> #define VIRTIO_NET_F_MTU 3 /* Initial MTU advice */
>> +#define VIRTIO_NET_F_SCTP_CSUM 4 /* SCTP checksum offload support */
>> #define VIRTIO_NET_F_MAC 5 /* Host has given MAC address. */
>> #define VIRTIO_NET_F_GUEST_TSO4 7 /* Guest can handle TSOv4 in. */
>> #define VIRTIO_NET_F_GUEST_TSO6 8 /* Guest can handle TSOv6 in. */
>
> Is this a guest or a host checksum? We should differenciate between the
> two.
I suppose this is HOST checksum, since it behaves like VIRTIO_NET_F_CSUM only for
SCTP. I couldn't find the use for the GUEST side flag, since there technically
isn't any validations and there is no additional mappings from VIRTIO flag to a
NETIF flag.
If the feature is negotiated, the guest ends up generating partially check-summed
packets, and the host turns on appropriate flags on it's side. The host will
also make sure the checksum if fixed up if the guest doesn't support it.
So 1 flag is currently all that is needed.
-vlad
>
>
>> @@ -101,6 +102,7 @@ struct virtio_net_config {
>> struct virtio_net_hdr_v1 {
>> #define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 /* Use csum_start, csum_offset */
>> #define VIRTIO_NET_HDR_F_DATA_VALID 2 /* Csum is valid */
>> +#define VIRTIO_NET_HDR_F_CSUM_NOT_INET 4 /* Checksum is not inet */
>> __u8 flags;
>> #define VIRTIO_NET_HDR_GSO_NONE 0 /* Not a GSO frame */
>> #define VIRTIO_NET_HDR_GSO_TCPV4 1 /* GSO frame, IPv4 TCP (TSO) */
>> --
>> 2.9.5
^ permalink raw reply
* Re: [RFC] virtio: Use DMA MAP API for devices without an IOMMU
From: Christoph Hellwig @ 2018-04-15 12:11 UTC (permalink / raw)
To: Benjamin Herrenschmidt
Cc: robh, Michael S. Tsirkin, linux-kernel, virtualization,
Christoph Hellwig, joe, david, linuxppc-dev, elfring,
Anshuman Khandual
In-Reply-To: <1523003838.21446.238.camel@kernel.crashing.org>
On Fri, Apr 06, 2018 at 06:37:18PM +1000, Benjamin Herrenschmidt wrote:
> > > implemented as DMA API which the virtio core understands. There is no
> > > need for an IOMMU to be involved for the device representation in this
> > > case IMHO.
> >
> > This whole virtio translation issue is a mess. I think we need to
> > switch it to the dma API, and then quirk the legacy case to always
> > use the direct mapping inside the dma API.
>
> Fine with using a dma API always on the Linux side, but we do want to
> special case virtio still at the arch and qemu side to have a "direct
> mapping" mode. Not sure how (special flags on PCI devices) to avoid
> actually going through an emulated IOMMU on the qemu side, because that
> slows things down, esp. with vhost.
>
> IE, we can't I think just treat it the same as a physical device.
We should have treated it like a physical device from the start, but
that device has unfortunately sailed.
But yes, we'll need a per-device quirk that says 'don't attach an
iommu'.
^ permalink raw reply
* [RFC] Rewrite page fault interception in TTM drivers
From: Matthew Wilcox @ 2018-04-15 3:31 UTC (permalink / raw)
To: dri-devel, amd-gfx, virtualization
Three^W Two of the TTM drivers intercept the pagefault handler in a rather
un-Linuxy and racy way. If they really must intercept the fault call
(and it's not clear to me that they need to), I'd rather see them do it
like this.
The QXL driver seems least likely to need it; as the virtio driver has
exactly the same code in it, only commented out. The radeon driver takes
its own lock ... maybe there's a way to avoid that since no other driver
needs to take its own lock at this point?
diff --git a/drivers/gpu/drm/qxl/qxl_ttm.c b/drivers/gpu/drm/qxl/qxl_ttm.c
index ee2340e31f06..d2c76a3d6816 100644
--- a/drivers/gpu/drm/qxl/qxl_ttm.c
+++ b/drivers/gpu/drm/qxl/qxl_ttm.c
@@ -102,21 +102,23 @@ static void qxl_ttm_global_fini(struct qxl_device *qdev)
}
}
-static struct vm_operations_struct qxl_ttm_vm_ops;
-static const struct vm_operations_struct *ttm_vm_ops;
-
static int qxl_ttm_fault(struct vm_fault *vmf)
{
struct ttm_buffer_object *bo;
- int r;
bo = (struct ttm_buffer_object *)vmf->vma->vm_private_data;
if (bo == NULL)
return VM_FAULT_NOPAGE;
- r = ttm_vm_ops->fault(vmf);
- return r;
+ return ttm_bo_vm_fault(vmf);
}
+static const struct vm_operations_struct qxl_ttm_vm_ops = {
+ .fault = qxl_ttm_fault,
+ .open = ttm_bo_vm_open,
+ .close = ttm_bo_vm_close,
+ .access = ttm_bo_vm_access,
+};
+
int qxl_mmap(struct file *filp, struct vm_area_struct *vma)
{
struct drm_file *file_priv;
@@ -139,11 +141,6 @@ int qxl_mmap(struct file *filp, struct vm_area_struct *vma)
r = ttm_bo_mmap(filp, vma, &qdev->mman.bdev);
if (unlikely(r != 0))
return r;
- if (unlikely(ttm_vm_ops == NULL)) {
- ttm_vm_ops = vma->vm_ops;
- qxl_ttm_vm_ops = *ttm_vm_ops;
- qxl_ttm_vm_ops.fault = &qxl_ttm_fault;
- }
vma->vm_ops = &qxl_ttm_vm_ops;
return 0;
}
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index 8689fcca051c..08cc0c5b9e94 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -944,9 +944,6 @@ void radeon_ttm_set_active_vram_size(struct radeon_device *rdev, u64 size)
man->size = size >> PAGE_SHIFT;
}
-static struct vm_operations_struct radeon_ttm_vm_ops;
-static const struct vm_operations_struct *ttm_vm_ops = NULL;
-
static int radeon_ttm_fault(struct vm_fault *vmf)
{
struct ttm_buffer_object *bo;
@@ -959,11 +956,18 @@ static int radeon_ttm_fault(struct vm_fault *vmf)
}
rdev = radeon_get_rdev(bo->bdev);
down_read(&rdev->pm.mclk_lock);
- r = ttm_vm_ops->fault(vmf);
+ r = ttm_bo_vm_fault(vmf);
up_read(&rdev->pm.mclk_lock);
return r;
}
+static const struct vm_operations_struct radeon_ttm_vm_ops = {
+ .fault = radeon_ttm_fault,
+ .open = ttm_bo_vm_open,
+ .close = ttm_bo_vm_close,
+ .access = ttm_bo_vm_access,
+};
+
int radeon_mmap(struct file *filp, struct vm_area_struct *vma)
{
struct drm_file *file_priv;
@@ -983,11 +987,6 @@ int radeon_mmap(struct file *filp, struct vm_area_struct *vma)
if (unlikely(r != 0)) {
return r;
}
- if (unlikely(ttm_vm_ops == NULL)) {
- ttm_vm_ops = vma->vm_ops;
- radeon_ttm_vm_ops = *ttm_vm_ops;
- radeon_ttm_vm_ops.fault = &radeon_ttm_fault;
- }
vma->vm_ops = &radeon_ttm_vm_ops;
return 0;
}
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index 8eba95b3c737..f59a8f41aae0 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -104,7 +104,7 @@ static unsigned long ttm_bo_io_mem_pfn(struct ttm_buffer_object *bo,
+ page_offset;
}
-static int ttm_bo_vm_fault(struct vm_fault *vmf)
+int ttm_bo_vm_fault(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
struct ttm_buffer_object *bo = (struct ttm_buffer_object *)
@@ -294,8 +294,9 @@ static int ttm_bo_vm_fault(struct vm_fault *vmf)
ttm_bo_unreserve(bo);
return ret;
}
+EXPORT_SYMBOL_GPL(ttm_bo_vm_fault);
-static void ttm_bo_vm_open(struct vm_area_struct *vma)
+void ttm_bo_vm_open(struct vm_area_struct *vma)
{
struct ttm_buffer_object *bo =
(struct ttm_buffer_object *)vma->vm_private_data;
@@ -304,14 +305,16 @@ static void ttm_bo_vm_open(struct vm_area_struct *vma)
(void)ttm_bo_reference(bo);
}
+EXPORT_SYMBOL_GPL(ttm_bo_vm_open);
-static void ttm_bo_vm_close(struct vm_area_struct *vma)
+void ttm_bo_vm_close(struct vm_area_struct *vma)
{
struct ttm_buffer_object *bo = (struct ttm_buffer_object *)vma->vm_private_data;
ttm_bo_unref(&bo);
vma->vm_private_data = NULL;
}
+EXPORT_SYMBOL_GPL(ttm_bo_vm_close);
static int ttm_bo_vm_access_kmap(struct ttm_buffer_object *bo,
unsigned long offset,
@@ -352,7 +355,7 @@ static int ttm_bo_vm_access_kmap(struct ttm_buffer_object *bo,
return len;
}
-static int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr,
+int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr,
void *buf, int len, int write)
{
unsigned long offset = (addr) - vma->vm_start;
@@ -389,6 +392,7 @@ static int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr,
return ret;
}
+EXPORT_SYMBOL_GPL(ttm_bo_vm_access);
static const struct vm_operations_struct ttm_bo_vm_ops = {
.fault = ttm_bo_vm_fault,
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index c67977aa1a0e..5935fbfc07f5 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -713,6 +713,12 @@ void *ttm_kmap_atomic_prot(struct page *page, pgprot_t prot);
void ttm_kunmap_atomic_prot(void *addr, pgprot_t prot);
+int ttm_bo_vm_fault(struct vm_fault *vmf);
+void ttm_bo_vm_open(struct vm_area_struct *vma);
+void ttm_bo_vm_close(struct vm_area_struct *vma);
+int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr,
+ void *buf, int len, int write);
+
/**
* ttm_bo_io
*
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox