* Re: [PATCH v3 0/2] vhost: fix vhost_vq_access_ok() log check
From: Jason Wang @ 2018-04-11 3:33 UTC (permalink / raw)
To: Stefan Hajnoczi, virtualization
Cc: Linus Torvalds, mst, netdev, syzkaller-bugs, kvm, linux-kernel
In-Reply-To: <20180411023541.15776-1-stefanha@redhat.com>
On 2018年04月11日 10:35, Stefan Hajnoczi wrote:
> v3:
> * Rebased onto net/master and resolved conflict [DaveM]
>
> v2:
> * Rewrote the conditional to make the vq access check clearer [Linus]
> * Added Patch 2 to make the return type consistent and harder to misuse [Linus]
>
> The first patch fixes the vhost virtqueue access check which was recently
> broken. The second patch replaces the int return type with bool to prevent
> future bugs.
>
> Stefan Hajnoczi (2):
> vhost: fix vhost_vq_access_ok() log check
> vhost: return bool from *_access_ok() functions
>
> drivers/vhost/vhost.h | 4 +--
> drivers/vhost/vhost.c | 70 ++++++++++++++++++++++++++-------------------------
> 2 files changed, 38 insertions(+), 36 deletions(-)
>
Acked-by: Jason Wang <jasowang@redhat.com>
Thanks
^ permalink raw reply
* [PATCH v3 2/2] vhost: return bool from *_access_ok() functions
From: Stefan Hajnoczi @ 2018-04-11 2:35 UTC (permalink / raw)
To: virtualization
Cc: Linus Torvalds, jasowang, mst, netdev, syzkaller-bugs, kvm,
linux-kernel, Stefan Hajnoczi
In-Reply-To: <20180411023541.15776-1-stefanha@redhat.com>
Currently vhost *_access_ok() functions return int. This is error-prone
because there are two popular conventions:
1. 0 means failure, 1 means success
2. -errno means failure, 0 means success
Although vhost mostly uses #1, it does not do so consistently.
umem_access_ok() uses #2.
This patch changes the return type from int to bool so that false means
failure and true means success. This eliminates a potential source of
errors.
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
drivers/vhost/vhost.h | 4 ++--
drivers/vhost/vhost.c | 66 +++++++++++++++++++++++++--------------------------
2 files changed, 35 insertions(+), 35 deletions(-)
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index d8ee85ae8fdc..6c844b90a168 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -178,8 +178,8 @@ void vhost_dev_cleanup(struct vhost_dev *);
void vhost_dev_stop(struct vhost_dev *);
long vhost_dev_ioctl(struct vhost_dev *, unsigned int ioctl, void __user *argp);
long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp);
-int vhost_vq_access_ok(struct vhost_virtqueue *vq);
-int vhost_log_access_ok(struct vhost_dev *);
+bool vhost_vq_access_ok(struct vhost_virtqueue *vq);
+bool vhost_log_access_ok(struct vhost_dev *);
int vhost_get_vq_desc(struct vhost_virtqueue *,
struct iovec iov[], unsigned int iov_count,
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index fc805b7fad9d..0fcb51a9940c 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -641,14 +641,14 @@ void vhost_dev_cleanup(struct vhost_dev *dev)
}
EXPORT_SYMBOL_GPL(vhost_dev_cleanup);
-static int log_access_ok(void __user *log_base, u64 addr, unsigned long sz)
+static bool log_access_ok(void __user *log_base, u64 addr, unsigned long sz)
{
u64 a = addr / VHOST_PAGE_SIZE / 8;
/* Make sure 64 bit math will not overflow. */
if (a > ULONG_MAX - (unsigned long)log_base ||
a + (unsigned long)log_base > ULONG_MAX)
- return 0;
+ return false;
return access_ok(VERIFY_WRITE, log_base + a,
(sz + VHOST_PAGE_SIZE * 8 - 1) / VHOST_PAGE_SIZE / 8);
@@ -661,30 +661,30 @@ static bool vhost_overflow(u64 uaddr, u64 size)
}
/* Caller should have vq mutex and device mutex. */
-static int vq_memory_access_ok(void __user *log_base, struct vhost_umem *umem,
- int log_all)
+static bool vq_memory_access_ok(void __user *log_base, struct vhost_umem *umem,
+ int log_all)
{
struct vhost_umem_node *node;
if (!umem)
- return 0;
+ return false;
list_for_each_entry(node, &umem->umem_list, link) {
unsigned long a = node->userspace_addr;
if (vhost_overflow(node->userspace_addr, node->size))
- return 0;
+ return false;
if (!access_ok(VERIFY_WRITE, (void __user *)a,
node->size))
- return 0;
+ return false;
else if (log_all && !log_access_ok(log_base,
node->start,
node->size))
- return 0;
+ return false;
}
- return 1;
+ return true;
}
static inline void __user *vhost_vq_meta_fetch(struct vhost_virtqueue *vq,
@@ -701,13 +701,13 @@ static inline void __user *vhost_vq_meta_fetch(struct vhost_virtqueue *vq,
/* Can we switch to this memory table? */
/* Caller should have device mutex but not vq mutex */
-static int memory_access_ok(struct vhost_dev *d, struct vhost_umem *umem,
- int log_all)
+static bool memory_access_ok(struct vhost_dev *d, struct vhost_umem *umem,
+ int log_all)
{
int i;
for (i = 0; i < d->nvqs; ++i) {
- int ok;
+ bool ok;
bool log;
mutex_lock(&d->vqs[i]->mutex);
@@ -717,12 +717,12 @@ static int memory_access_ok(struct vhost_dev *d, struct vhost_umem *umem,
ok = vq_memory_access_ok(d->vqs[i]->log_base,
umem, log);
else
- ok = 1;
+ ok = true;
mutex_unlock(&d->vqs[i]->mutex);
if (!ok)
- return 0;
+ return false;
}
- return 1;
+ return true;
}
static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len,
@@ -959,21 +959,21 @@ static void vhost_iotlb_notify_vq(struct vhost_dev *d,
spin_unlock(&d->iotlb_lock);
}
-static int umem_access_ok(u64 uaddr, u64 size, int access)
+static bool umem_access_ok(u64 uaddr, u64 size, int access)
{
unsigned long a = uaddr;
/* Make sure 64 bit math will not overflow. */
if (vhost_overflow(uaddr, size))
- return -EFAULT;
+ return false;
if ((access & VHOST_ACCESS_RO) &&
!access_ok(VERIFY_READ, (void __user *)a, size))
- return -EFAULT;
+ return false;
if ((access & VHOST_ACCESS_WO) &&
!access_ok(VERIFY_WRITE, (void __user *)a, size))
- return -EFAULT;
- return 0;
+ return false;
+ return true;
}
static int vhost_process_iotlb_msg(struct vhost_dev *dev,
@@ -988,7 +988,7 @@ static int vhost_process_iotlb_msg(struct vhost_dev *dev,
ret = -EFAULT;
break;
}
- if (umem_access_ok(msg->uaddr, msg->size, msg->perm)) {
+ if (!umem_access_ok(msg->uaddr, msg->size, msg->perm)) {
ret = -EFAULT;
break;
}
@@ -1135,10 +1135,10 @@ static int vhost_iotlb_miss(struct vhost_virtqueue *vq, u64 iova, int access)
return 0;
}
-static int vq_access_ok(struct vhost_virtqueue *vq, unsigned int num,
- struct vring_desc __user *desc,
- struct vring_avail __user *avail,
- struct vring_used __user *used)
+static bool vq_access_ok(struct vhost_virtqueue *vq, unsigned int num,
+ struct vring_desc __user *desc,
+ struct vring_avail __user *avail,
+ struct vring_used __user *used)
{
size_t s = vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
@@ -1161,8 +1161,8 @@ static void vhost_vq_meta_update(struct vhost_virtqueue *vq,
vq->meta_iotlb[type] = node;
}
-static int iotlb_access_ok(struct vhost_virtqueue *vq,
- int access, u64 addr, u64 len, int type)
+static bool iotlb_access_ok(struct vhost_virtqueue *vq,
+ int access, u64 addr, u64 len, int type)
{
const struct vhost_umem_node *node;
struct vhost_umem *umem = vq->iotlb;
@@ -1220,7 +1220,7 @@ EXPORT_SYMBOL_GPL(vq_iotlb_prefetch);
/* Can we log writes? */
/* Caller should have device mutex but not vq mutex */
-int vhost_log_access_ok(struct vhost_dev *dev)
+bool vhost_log_access_ok(struct vhost_dev *dev)
{
return memory_access_ok(dev, dev->umem, 1);
}
@@ -1228,8 +1228,8 @@ EXPORT_SYMBOL_GPL(vhost_log_access_ok);
/* Verify access for write logging. */
/* Caller should have vq mutex and device mutex */
-static int vq_log_access_ok(struct vhost_virtqueue *vq,
- void __user *log_base)
+static bool vq_log_access_ok(struct vhost_virtqueue *vq,
+ void __user *log_base)
{
size_t s = vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
@@ -1242,14 +1242,14 @@ static int vq_log_access_ok(struct vhost_virtqueue *vq,
/* Can we start vq? */
/* Caller should have vq mutex and device mutex */
-int vhost_vq_access_ok(struct vhost_virtqueue *vq)
+bool vhost_vq_access_ok(struct vhost_virtqueue *vq)
{
if (!vq_log_access_ok(vq, vq->log_base))
- return 0;
+ return false;
/* Access validation occurs at prefetch time with IOTLB */
if (vq->iotlb)
- return 1;
+ return true;
return vq_access_ok(vq, vq->num, vq->desc, vq->avail, vq->used);
}
--
2.14.3
^ permalink raw reply related
* [PATCH v3 1/2] vhost: fix vhost_vq_access_ok() log check
From: Stefan Hajnoczi @ 2018-04-11 2:35 UTC (permalink / raw)
To: virtualization
Cc: Linus Torvalds, jasowang, mst, netdev, syzkaller-bugs, kvm,
linux-kernel, Stefan Hajnoczi
In-Reply-To: <20180411023541.15776-1-stefanha@redhat.com>
Commit d65026c6c62e7d9616c8ceb5a53b68bcdc050525 ("vhost: validate log
when IOTLB is enabled") introduced a regression. The logic was
originally:
if (vq->iotlb)
return 1;
return A && B;
After the patch the short-circuit logic for A was inverted:
if (A || vq->iotlb)
return A;
return B;
This patch fixes the regression by rewriting the checks in the obvious
way, no longer returning A when vq->iotlb is non-NULL (which is hard to
understand).
Reported-by: syzbot+65a84dde0214b0387ccd@syzkaller.appspotmail.com
Cc: Jason Wang <jasowang@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
drivers/vhost/vhost.c | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index bec722e41f58..fc805b7fad9d 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -1244,10 +1244,12 @@ static int vq_log_access_ok(struct vhost_virtqueue *vq,
/* Caller should have vq mutex and device mutex */
int vhost_vq_access_ok(struct vhost_virtqueue *vq)
{
- int ret = vq_log_access_ok(vq, vq->log_base);
+ if (!vq_log_access_ok(vq, vq->log_base))
+ return 0;
- if (ret || vq->iotlb)
- return ret;
+ /* Access validation occurs at prefetch time with IOTLB */
+ if (vq->iotlb)
+ return 1;
return vq_access_ok(vq, vq->num, vq->desc, vq->avail, vq->used);
}
--
2.14.3
^ permalink raw reply related
* [PATCH v3 0/2] vhost: fix vhost_vq_access_ok() log check
From: Stefan Hajnoczi @ 2018-04-11 2:35 UTC (permalink / raw)
To: virtualization
Cc: Linus Torvalds, jasowang, mst, netdev, syzkaller-bugs, kvm,
linux-kernel, Stefan Hajnoczi
v3:
* Rebased onto net/master and resolved conflict [DaveM]
v2:
* Rewrote the conditional to make the vq access check clearer [Linus]
* Added Patch 2 to make the return type consistent and harder to misuse [Linus]
The first patch fixes the vhost virtqueue access check which was recently
broken. The second patch replaces the int return type with bool to prevent
future bugs.
Stefan Hajnoczi (2):
vhost: fix vhost_vq_access_ok() log check
vhost: return bool from *_access_ok() functions
drivers/vhost/vhost.h | 4 +--
drivers/vhost/vhost.c | 70 ++++++++++++++++++++++++++-------------------------
2 files changed, 38 insertions(+), 36 deletions(-)
--
2.14.3
^ permalink raw reply
* [PATCH] net: sun: cassini: Replace GFP_ATOMIC with GFP_KERNEL in cas_check_invariants
From: Jia-Ju Bai @ 2018-04-11 2:34 UTC (permalink / raw)
To: asun; +Cc: netdev, linux-kernel, Jia-Ju Bai
cas_check_invariants() is never called in atomic context.
cas_check_invariants() is only called by cas_init_one(), which is
only set as ".probe" in struct pci_driver.
Despite never getting called from atomic context,
cas_check_invariants() calls alloc_pages() with GFP_ATOMIC,
which does not sleep for allocation.
GFP_ATOMIC is not necessary and can be replaced with GFP_KERNEL,
which can sleep and improve the possibility of sucessful allocation.
This is found by a static analysis tool named DCNS written by myself.
And I also manually check it.
Signed-off-by: Jia-Ju Bai <baijiaju1990@gmail.com>
---
drivers/net/ethernet/sun/cassini.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c
index 382993c..4dd38e3 100644
--- a/drivers/net/ethernet/sun/cassini.c
+++ b/drivers/net/ethernet/sun/cassini.c
@@ -3412,7 +3412,7 @@ static int cas_check_invariants(struct cas *cp)
#ifdef USE_PAGE_ORDER
if (PAGE_SHIFT < CAS_JUMBO_PAGE_SHIFT) {
/* see if we can allocate larger pages */
- struct page *page = alloc_pages(GFP_ATOMIC,
+ struct page *page = alloc_pages(GFP_KERNEL,
CAS_JUMBO_PAGE_SHIFT -
PAGE_SHIFT);
if (page) {
--
1.9.1
^ permalink raw reply related
* [PATCH] net: samsung: sxgbe: Replace mdelay with usleep_range in sxgbe_sw_reset
From: Jia-Ju Bai @ 2018-04-11 2:21 UTC (permalink / raw)
To: bh74.an, ks.giri, vipul.pandya; +Cc: netdev, linux-kernel, Jia-Ju Bai
sxgbe_sw_reset() is never called in atomic context.
sxgbe_sw_reset() is only called by sxgbe_drv_probe(), which is
only called by sxgbe_platform_probe().
sxgbe_platform_probe() is set as ".probe" in struct platform_driver.
This function is not called in atomic context.
Despite never getting called from atomic context, sxgbe_sw_reset()
calls mdelay() to busily wait.
This is not necessary and can be replaced with usleep_range() to
avoid busy waiting.
This is found by a static analysis tool named DCNS written by myself.
And I also manually check it.
Signed-off-by: Jia-Ju Bai <baijiaju1990@gmail.com>
---
drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
index 89831ad..99cd586 100644
--- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
+++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
@@ -2038,7 +2038,7 @@ static int sxgbe_sw_reset(void __iomem *addr)
if (!(readl(addr + SXGBE_DMA_MODE_REG) &
SXGBE_DMA_SOFT_RESET))
break;
- mdelay(10);
+ usleep(10000, 11000);
}
if (retry_count < 0)
--
1.9.1
^ permalink raw reply related
* Re: Re: [RFC] vhost: introduce mdev based hardware vhost backend
From: Jason Wang @ 2018-04-11 2:18 UTC (permalink / raw)
To: Liang, Cunming, Michael S. Tsirkin
Cc: Paolo Bonzini, Bie, Tiwei, alex.williamson@redhat.com,
ddutile@redhat.com, Duyck, Alexander H,
virtio-dev@lists.oasis-open.org, linux-kernel@vger.kernel.org,
kvm@vger.kernel.org, virtualization@lists.linux-foundation.org,
netdev@vger.kernel.org, Daly, Dan, Wang, Zhihong, Tan, Jianfeng,
Wang, Xiao W
In-Reply-To: <D0158A423229094DA7ABF71CF2FA0DA34E9226AF@SHSMSX104.ccr.corp.intel.com>
On 2018年04月10日 22:23, Liang, Cunming wrote:
>> -----Original Message-----
>> From: Michael S. Tsirkin [mailto:mst@redhat.com]
>> Sent: Tuesday, April 10, 2018 9:36 PM
>> To: Liang, Cunming<cunming.liang@intel.com>
>> Cc: Paolo Bonzini<pbonzini@redhat.com>; Bie, Tiwei<tiwei.bie@intel.com>;
>> Jason Wang<jasowang@redhat.com>;alex.williamson@redhat.com;
>> ddutile@redhat.com; Duyck, Alexander H<alexander.h.duyck@intel.com>;
>> virtio-dev@lists.oasis-open.org;linux-kernel@vger.kernel.org;
>> kvm@vger.kernel.org;virtualization@lists.linux-foundation.org;
>> netdev@vger.kernel.org; Daly, Dan<dan.daly@intel.com>; Wang, Zhihong
>> <zhihong.wang@intel.com>; Tan, Jianfeng<jianfeng.tan@intel.com>; Wang, Xiao
>> W<xiao.w.wang@intel.com>
>> Subject: Re: [virtio-dev] Re: [RFC] vhost: introduce mdev based hardware vhost
>> backend
>>
>> On Tue, Apr 10, 2018 at 09:23:53AM +0000, Liang, Cunming wrote:
>>>> -----Original Message-----
>>>> From: Paolo Bonzini [mailto:pbonzini@redhat.com]
>>>> Sent: Tuesday, April 10, 2018 3:52 PM
>>>> To: Bie, Tiwei<tiwei.bie@intel.com>; Jason Wang
>>>> <jasowang@redhat.com>
>>>> Cc:mst@redhat.com;alex.williamson@redhat.com;ddutile@redhat.com;
>>>> Duyck, Alexander H<alexander.h.duyck@intel.com>;
>>>> virtio-dev@lists.oasis- open.org;linux-kernel@vger.kernel.org;
>>>> kvm@vger.kernel.org;virtualization@lists.linux-foundation.org;
>>>> netdev@vger.kernel.org; Daly, Dan<dan.daly@intel.com>; Liang,
>>>> Cunming<cunming.liang@intel.com>; Wang, Zhihong
>>>> <zhihong.wang@intel.com>; Tan, Jianfeng<jianfeng.tan@intel.com>;
>>>> Wang, Xiao W<xiao.w.wang@intel.com>
>>>> Subject: Re: [virtio-dev] Re: [RFC] vhost: introduce mdev based
>>>> hardware vhost backend
>>>>
>>>> On 10/04/2018 06:57, Tiwei Bie wrote:
>>>>>> So you just move the abstraction layer from qemu to kernel, and
>>>>>> you still need different drivers in kernel for different device
>>>>>> interfaces of accelerators. This looks even more complex than
>>>>>> leaving it in qemu. As you said, another idea is to implement
>>>>>> userspace vhost backend for accelerators which seems easier and
>>>>>> could co-work with other parts of qemu without inventing new type of
>> messages.
>>>>> I'm not quite sure. Do you think it's acceptable to add various
>>>>> vendor specific hardware drivers in QEMU?
>>>> I think so. We have vendor-specific quirks, and at some point there
>>>> was an idea of using quirks to implement (vendor-specific) live
>>>> migration support for assigned devices.
>>> Vendor-specific quirks of accessing VGA is a small portion. Other major portions
>> are still handled by guest driver.
>>> While in this case, when saying various vendor specific drivers in QEMU, it says
>> QEMU takes over and provides the entire user space device drivers. Some parts
>> are even not relevant to vhost, they're basic device function enabling. Moreover,
>> it could be different kinds of devices(network/block/...) under vhost. No matter
>> # of vendors or # of types, total LOC is not small.
>>> The idea is to avoid introducing these extra complexity out of QEMU, keeping
>> vhost adapter simple. As vhost protocol is de factor standard, it leverages kernel
>> device driver to provide the diversity. Changing once in QEMU, then it supports
>> multi-vendor devices whose drivers naturally providing kernel driver there.
>>> If QEMU is going to build a user space driver framework there, we're open mind
>> on that, even leveraging DPDK as the underlay library. Looking forward to more
>> others' comments from community.
>>> Steve
>> Dependency on a kernel driver is fine IMHO. It's the dependency on a DPDK
>> backend that makes people unhappy, since the functionality in question is setup-
>> time only.
> Agreed, we don't see dependency on kernel driver is a problem.
At engineering level, kernel driver is harder than userspace driver.
>
> mdev based vhost backend (this patch set) is independent with vhost-user extension patch set. In fact, there're a few vhost-user providers, DPDK librte_vhost is one of them. FD.IO/VPP and snabbswitch have their own vhost-user providers. So I can't agree on vhost-user extension patch depends on DPDK backend. But anyway, that's the topic of another mail thread.
>
Well we can treat mdev as another kind of transport of vhost-user. And
technically we can even implement a relay mdev than forward vhost-user
messages to dpdk.
Thanks
^ permalink raw reply
* [PATCH] net: ieee802154: atusb: Replace GFP_ATOMIC with GFP_KERNEL in atusb_probe
From: Jia-Ju Bai @ 2018-04-11 2:14 UTC (permalink / raw)
To: stefan, alex.aring; +Cc: linux-wpan, netdev, linux-kernel, Jia-Ju Bai
atusb_probe() is never called in atomic context.
This function is only set as ".probe" in struct usb_driver.
Despite never getting called from atomic context,
atusb_probe() calls usb_alloc_urb() with GFP_ATOMIC,
which does not sleep for allocation.
GFP_ATOMIC is not necessary and can be replaced with GFP_KERNEL,
which can sleep and improve the possibility of sucessful allocation.
This is found by a static analysis tool named DCNS written by myself.
And I also manually check it.
Signed-off-by: Jia-Ju Bai <baijiaju1990@gmail.com>
---
drivers/net/ieee802154/atusb.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/ieee802154/atusb.c b/drivers/net/ieee802154/atusb.c
index ef68851..ab6a505 100644
--- a/drivers/net/ieee802154/atusb.c
+++ b/drivers/net/ieee802154/atusb.c
@@ -789,7 +789,7 @@ static int atusb_probe(struct usb_interface *interface,
atusb->tx_dr.bRequest = ATUSB_TX;
atusb->tx_dr.wValue = cpu_to_le16(0);
- atusb->tx_urb = usb_alloc_urb(0, GFP_ATOMIC);
+ atusb->tx_urb = usb_alloc_urb(0, GFP_KERNEL);
if (!atusb->tx_urb)
goto fail;
--
1.9.1
^ permalink raw reply related
* Re: [virtio-dev] Re: [RFC] vhost: introduce mdev based hardware vhost backend
From: Jason Wang @ 2018-04-11 2:08 UTC (permalink / raw)
To: Liang, Cunming, Paolo Bonzini, Bie, Tiwei
Cc: mst@redhat.com, alex.williamson@redhat.com, ddutile@redhat.com,
Duyck, Alexander H, virtio-dev@lists.oasis-open.org,
linux-kernel@vger.kernel.org, kvm@vger.kernel.org,
virtualization@lists.linux-foundation.org, netdev@vger.kernel.org,
Daly, Dan, Wang, Zhihong, Tan, Jianfeng, Wang, Xiao W
In-Reply-To: <D0158A423229094DA7ABF71CF2FA0DA34E9222F4@SHSMSX104.ccr.corp.intel.com>
On 2018年04月10日 17:23, Liang, Cunming wrote:
>
>> -----Original Message-----
>> From: Paolo Bonzini [mailto:pbonzini@redhat.com]
>> Sent: Tuesday, April 10, 2018 3:52 PM
>> To: Bie, Tiwei <tiwei.bie@intel.com>; Jason Wang <jasowang@redhat.com>
>> Cc: mst@redhat.com; alex.williamson@redhat.com; ddutile@redhat.com;
>> Duyck, Alexander H <alexander.h.duyck@intel.com>; virtio-dev@lists.oasis-
>> open.org; linux-kernel@vger.kernel.org; kvm@vger.kernel.org;
>> virtualization@lists.linux-foundation.org; netdev@vger.kernel.org; Daly, Dan
>> <dan.daly@intel.com>; Liang, Cunming <cunming.liang@intel.com>; Wang,
>> Zhihong <zhihong.wang@intel.com>; Tan, Jianfeng <jianfeng.tan@intel.com>;
>> Wang, Xiao W <xiao.w.wang@intel.com>
>> Subject: Re: [virtio-dev] Re: [RFC] vhost: introduce mdev based hardware
>> vhost backend
>>
>> On 10/04/2018 06:57, Tiwei Bie wrote:
>>>> So you just move the abstraction layer from qemu to kernel, and you
>>>> still need different drivers in kernel for different device
>>>> interfaces of accelerators. This looks even more complex than leaving
>>>> it in qemu. As you said, another idea is to implement userspace vhost
>>>> backend for accelerators which seems easier and could co-work with
>>>> other parts of qemu without inventing new type of messages.
>>> I'm not quite sure. Do you think it's acceptable to add various vendor
>>> specific hardware drivers in QEMU?
>> I think so. We have vendor-specific quirks, and at some point there was an
>> idea of using quirks to implement (vendor-specific) live migration support for
>> assigned devices.
> Vendor-specific quirks of accessing VGA is a small portion. Other major portions are still handled by guest driver.
>
> While in this case, when saying various vendor specific drivers in QEMU, it says QEMU takes over and provides the entire user space device drivers. Some parts are even not relevant to vhost, they're basic device function enabling. Moreover, it could be different kinds of devices(network/block/...) under vhost. No matter # of vendors or # of types, total LOC is not small.
>
> The idea is to avoid introducing these extra complexity out of QEMU, keeping vhost adapter simple. As vhost protocol is de factor standard, it leverages kernel device driver to provide the diversity. Changing once in QEMU, then it supports multi-vendor devices whose drivers naturally providing kernel driver there.
Let me clarify my question, it's not qemu vs kenrel but userspace vs
kernel. It could be a library which could be linked to qemu. Doing it in
userspace have the following obvious advantages:
- attack surface was limited to userspace
- easier to be maintained (compared to kernel driver)
- easier to be extended without introducing new userspace/kernel interfaces
- not tied to a specific operating system
If we want to do it in the kernel, need to consider to unify code
between mdev device driver and generic driver. For net driver, maybe we
can even consider to do it on top of exist drivers.
>
> If QEMU is going to build a user space driver framework there, we're open mind on that, even leveraging DPDK as the underlay library. Looking forward to more others' comments from community.
I'm doing this now by implementing vhost inside qemu IOThreads. Hope I
can post RFC in few months.
Thanks
> Steve
>
>> Paolo
^ permalink raw reply
* [PATCH] intel: i40evf: Replace GFP_ATOMIC with GFP_KERNEL in i40evf_add_vlan
From: Jia-Ju Bai @ 2018-04-11 2:08 UTC (permalink / raw)
To: jeffrey.t.kirsher; +Cc: intel-wired-lan, netdev, linux-kernel, Jia-Ju Bai
i40evf_add_vlan() is never called in atomic context.
i40evf_add_vlan() is only called by i40evf_vlan_rx_add_vid(),
which is only set as ".ndo_vlan_rx_add_vid" in struct net_device_ops.
".ndo_vlan_rx_add_vid" is not called in atomic context.
Despite never getting called from atomic context,
i40evf_add_vlan() calls kzalloc() with GFP_ATOMIC,
which does not sleep for allocation.
GFP_ATOMIC is not necessary and can be replaced with GFP_KERNEL,
which can sleep and improve the possibility of sucessful allocation.
This is found by a static analysis tool named DCNS written by myself.
And I also manually check it.
Signed-off-by: Jia-Ju Bai <baijiaju1990@gmail.com>
---
drivers/net/ethernet/intel/i40evf/i40evf_main.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
index 1825d95..04b2b9c 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
@@ -770,7 +770,7 @@ i40evf_vlan_filter *i40evf_add_vlan(struct i40evf_adapter *adapter, u16 vlan)
f = i40evf_find_vlan(adapter, vlan);
if (!f) {
- f = kzalloc(sizeof(*f), GFP_ATOMIC);
+ f = kzalloc(sizeof(*f), GFP_KERNEL);
if (!f)
goto clearout;
--
1.9.1
^ permalink raw reply related
* Re: [virtio-dev] Re: [RFC] vhost: introduce mdev based hardware vhost backend
From: Stefan Hajnoczi @ 2018-04-11 2:01 UTC (permalink / raw)
To: Liang, Cunming
Cc: Paolo Bonzini, Bie, Tiwei, Jason Wang, mst@redhat.com,
alex.williamson@redhat.com, ddutile@redhat.com,
Duyck, Alexander H, virtio-dev@lists.oasis-open.org,
linux-kernel@vger.kernel.org, kvm@vger.kernel.org,
virtualization@lists.linux-foundation.org, netdev@vger.kernel.org,
Daly, Dan, Wang, Zhihong, Tan, Jianfeng, Wang, Xiao W
In-Reply-To: <D0158A423229094DA7ABF71CF2FA0DA34E9222F4@SHSMSX104.ccr.corp.intel.com>
[-- Attachment #1: Type: text/plain, Size: 401 bytes --]
On Tue, Apr 10, 2018 at 09:23:53AM +0000, Liang, Cunming wrote:
> If QEMU is going to build a user space driver framework there, we're open mind on that, even leveraging DPDK as the underlay library. Looking forward to more others' comments from community.
There is already an NVMe VFIO driver in QEMU (see block/nvme.c). So in
principle there's no reason against userspace drivers in QEMU.
Stefan
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 455 bytes --]
^ permalink raw reply
* [PATCH] dec: tulip: de4x5: Replace mdelay with usleep_range in de4x5_hw_init
From: Jia-Ju Bai @ 2018-04-11 2:00 UTC (permalink / raw)
To: davem, dhowells, stephen, johannes.berg, arvind.yadav.cs
Cc: netdev, linux-parisc, linux-kernel, Jia-Ju Bai
de4x5_hw_init() is never called in atomic context.
de4x5_hw_init() is only called by de4x5_pci_probe(), which is only
set as ".probe" in struct pci_driver.
Despite never getting called from atomic context, de4x5_hw_init()
calls mdelay() to busily wait.
This is not necessary and can be replaced with usleep_range() to
avoid busy waiting.
This is found by a static analysis tool named DCNS written by myself.
And I also manually check it.
Signed-off-by: Jia-Ju Bai <baijiaju1990@gmail.com>
---
drivers/net/ethernet/dec/tulip/de4x5.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/dec/tulip/de4x5.c b/drivers/net/ethernet/dec/tulip/de4x5.c
index 0affee9..3fb0119 100644
--- a/drivers/net/ethernet/dec/tulip/de4x5.c
+++ b/drivers/net/ethernet/dec/tulip/de4x5.c
@@ -1107,7 +1107,7 @@ static int (*dc_infoblock[])(struct net_device *dev, u_char, u_char *) = {
pdev = to_pci_dev (gendev);
pci_write_config_byte(pdev, PCI_CFDA_PSM, WAKEUP);
}
- mdelay(10);
+ usleep(10000, 11000);
RESET_DE4X5;
--
1.9.1
^ permalink raw reply related
* [PATCH] net: dsa: b53: Replace mdelay with msleep in b53_switch_reset_gpio
From: Jia-Ju Bai @ 2018-04-11 1:51 UTC (permalink / raw)
To: f.fainelli, andrew, vivien.didelot; +Cc: netdev, linux-kernel, Jia-Ju Bai
b53_switch_reset_gpio() is never called in atomic context.
The call chain ending up at b53_switch_reset_gpio() is:
[1] b53_switch_reset_gpio() <- b53_switch_reset() <-
b53_reset_switch() <- b53_setup()
b53_switch_reset_gpio() is set as ".setup" in struct dsa_switch_ops.
This function is not called in atomic context.
Despite never getting called from atomic context, b53_switch_reset_gpio()
calls mdelay() to busily wait.
This is not necessary and can be replaced with msleep() to
avoid busy waiting.
This is found by a static analysis tool named DCNS written by myself.
And I also manually check it.
Signed-off-by: Jia-Ju Bai <baijiaju1990@gmail.com>
---
drivers/net/dsa/b53/b53_common.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index 274f367..e070ff6 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -597,10 +597,10 @@ static void b53_switch_reset_gpio(struct b53_device *dev)
/* Reset sequence: RESET low(50ms)->high(20ms)
*/
gpio_set_value(gpio, 0);
- mdelay(50);
+ msleep(50);
gpio_set_value(gpio, 1);
- mdelay(20);
+ msleep(20);
dev->current_page = 0xff;
}
--
1.9.1
^ permalink raw reply related
* Re: XDP_TX for virtio_net not working in recent kernel?
From: Jason Wang @ 2018-04-11 1:50 UTC (permalink / raw)
To: Daniel Borkmann, Kimitoshi Takahashi; +Cc: xdp-newbies, netdev
In-Reply-To: <1d5849ae-a9ec-93af-e1b5-e7a368193c7a@iogearbox.net>
[-- Attachment #1: Type: text/plain, Size: 2680 bytes --]
On 2018年04月10日 18:17, Daniel Borkmann wrote:
> [ +jason +netdev ]
>
> On 04/10/2018 12:49 AM, Kimitoshi Takahashi wrote:
>> Hello,
>>
>> I'm totally newbie.
>>
>> I want to try some examples under the samples/bpf of the kernel tree using the virtio_net driver in kvm, in order to get familiar with the XDP. However, it seems to me that for kernels newer than linux-4.15, the xdp2 and xdp_tx_tunnel are not working in the case of virtio_net.
>>
>> Can somebody help me out to make them working?
>>
>>
>> 1) For linux-4.11.12, linux-4.12.14, and linux-4.14.31, xdp2 and xdp_tx_tunnel are working.
>>
>> [xdp2]
>> 10.0.0.27# ./xdp2 2 (or ./xdp -N 2)
>>
>> 10.0.0.254# hping3 10.0.0.27 -p 5000 -2
>>
>> 10.0.0.254# tcpdump -nei kbr0 udp port 5000
>> 06:18:37.677070 2e:c9:83:fb:5b:f3 > 52:54:00:11:00:1b, ethertype IPv4 (0x0800), length 42: 10.0.0.254.2982 > 10.0.0.27.5000: UDP, length 0
>> 06:18:37.677265 52:54:00:11:00:1b > 2e:c9:83:fb:5b:f3, ethertype IPv4 (0x0800), length 42: 10.0.0.254.2982 > 10.0.0.27.5000: UDP, length 0
>>
>> I can see the UDP packets with MAC addresses swapped.
>>
>> [xdp_tx_tunnel]
>>
>> 10.0.0.254# telnet 10.1.2.1 80
>>
>> 10.0.0.27# ./xdp_tx_iptunnel -i 2 -a 10.1.2.1 -p 80 -s 10.0.0.27 -d 10.0.0.24 -m 52:54:00:11:00:18
>>
>> 10.0.0.24# tcpdump -nei any proto 4
>> 22:08:20.510354 In 52:54:00:11:00:1b ethertype IPv4 (0x0800), length 96: 10.0.0.27 > 10.0.0.24: 10.0.0.254.60826 > 10.1.2.1.80: Flags [S], seq 3224377720, win 29200, options [mss 1460,sackOK,TS val 217801953 ecr 0,nop,wscale 7], length 0 (ipip-proto-4)
>>
>> I can see the encapsulated SYN packet that is forwarded by the 10.0.0.27.
>>
>> 2) For linux-4.15, linux-4.15.5, linux-4.15.13 and linux-4.16.1, xdp2 and xdp_tx_tunnel are NOT working.
>>
>> [xdp2]
>> 10.0.0.27# ./xdp2 -N 2
>>
>> 10.0.0.254# hping3 10.0.0.27 -p 5000 -2
>>
>> 10.0.0.254# tcpdump -nei br0 udp port 5000
>> 06:45:34.810046 2e:c9:83:fb:5b:f3 > 52:54:00:11:00:1b, ethertype IPv4 (0x0800), length 42: 10.0.0.254.2346 > 10.0.0.27.5000: UDP, length 0
>> 06:45:35.810176 2e:c9:83:fb:5b:f3 > 52:54:00:11:00:1b, ethertype IPv4 (0x0800), length 42: 10.0.0.254.2347 > 10.0.0.27.5000: UDP, length 0
>>
>> Only the outgoing packets can be seen.
>>
>> [xdp_tx_tunnel]
>>
>> 10.0.0.254# telnet 10.1.2.1 80
>>
>> 10.0.0.27# ./xdp_tx_iptunnel -i 2 -a 10.1.2.1 -p 80 -s 10.0.0.27 -d 10.0.0.24 -m 52:54:00:11:00:18
>>
>> 10.0.0.24# tcpdump -nei any proto 4
>>
>> I can NOT see the encapsulated SYN packet that is forwarded by the 10.0.0.27.
>>
>>
>> --
>> Sincerely,
>> Takahashi Kimitoshi
I suspect a kick is missed in the XDP_TX case.
Could you please try the attached patch to see if it fixes the issue?
Thanks
[-- Attachment #2: 0001-virtio-net-add-missing-virtqueue-kick-when-flushing-.patch --]
[-- Type: text/x-patch, Size: 1506 bytes --]
>From f37d74d65671e1a1f1a0c4dd59bf1df69b47f30a Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Mon, 22 Jan 2018 17:07:03 +0800
Subject: [PATCH] virtio-net: add missing virtqueue kick when flushing packets
We tends to batch submitting packets during XDP_TX. This requires to
kick virtqueue after a batch, we tried to do it through
xdp_do_flush_map() which only makes sense for devmap not XDP_TX. So
explicitly kick the virtqueue in this case.
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
drivers/net/virtio_net.c | 11 +++++++++--
1 file changed, 9 insertions(+), 2 deletions(-)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 2337460..d8e1aea 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -1269,7 +1269,9 @@ static int virtnet_poll(struct napi_struct *napi, int budget)
{
struct receive_queue *rq =
container_of(napi, struct receive_queue, napi);
- unsigned int received;
+ struct virtnet_info *vi = rq->vq->vdev->priv;
+ struct send_queue *sq;
+ unsigned int received, qp;
bool xdp_xmit = false;
virtnet_poll_cleantx(rq);
@@ -1280,8 +1282,13 @@ static int virtnet_poll(struct napi_struct *napi, int budget)
if (received < budget)
virtqueue_napi_complete(napi, rq->vq, received);
- if (xdp_xmit)
+ if (xdp_xmit) {
+ qp = vi->curr_queue_pairs - vi->xdp_queue_pairs +
+ smp_processor_id();
+ sq = &vi->sq[qp];
+ virtqueue_kick(sq->vq);
xdp_do_flush_map();
+ }
return received;
}
--
2.7.4
^ permalink raw reply related
* [PATCH 2/2] net: can: sja1000: Replace mdelay with usleep_range in pcan_add_channels
From: Jia-Ju Bai @ 2018-04-11 1:43 UTC (permalink / raw)
To: wg, mkl; +Cc: linux-can, netdev, linux-kernel, Jia-Ju Bai
pcan_add_channels() is never called in atomic context.
pcan_add_channels() is only called by pcan_probe(), which is only set as
".probe" in struct pcmcia_driver.
Despite never getting called from atomic context, pcan_add_channels()
calls mdelay() to busily wait.
This is not necessary and can be replaced with usleep_range() to
avoid busy waiting.
This is found by a static analysis tool named DCNS written by myself.
And I also manually check it.
Signed-off-by: Jia-Ju Bai <baijiaju1990@gmail.com>
---
drivers/net/can/sja1000/peak_pcmcia.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/can/sja1000/peak_pcmcia.c b/drivers/net/can/sja1000/peak_pcmcia.c
index dd56133..1a1fbf3 100644
--- a/drivers/net/can/sja1000/peak_pcmcia.c
+++ b/drivers/net/can/sja1000/peak_pcmcia.c
@@ -530,7 +530,7 @@ static int pcan_add_channels(struct pcan_pccard *card)
pcan_write_reg(card, PCC_CCR, ccr);
/* wait 2ms before unresetting channels */
- mdelay(2);
+ usleep_range(2000, 3000);
ccr &= ~PCC_CCR_RST_ALL;
pcan_write_reg(card, PCC_CCR, ccr);
--
1.9.1
^ permalink raw reply related
* [PATCH 1/2] net: can: sja1000: Replace mdelay with usleep_range in peak_pci_probe
From: Jia-Ju Bai @ 2018-04-11 1:42 UTC (permalink / raw)
To: wg, mkl; +Cc: linux-can, netdev, linux-kernel, Jia-Ju Bai
peak_pci_probe() is never called in atomic context.
peak_pci_probe() is set as ".probe" in struct pci_driver.
Despite never getting called from atomic context, peak_pci_probe()
calls mdelay() to busily wait.
This is not necessary and can be replaced with usleep_range() to
avoid busy waiting.
This is found by a static analysis tool named DCNS written by myself.
And I also manually check it.
Signed-off-by: Jia-Ju Bai <baijiaju1990@gmail.com>
---
drivers/net/can/sja1000/peak_pci.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/can/sja1000/peak_pci.c b/drivers/net/can/sja1000/peak_pci.c
index 131026f..48cf821 100644
--- a/drivers/net/can/sja1000/peak_pci.c
+++ b/drivers/net/can/sja1000/peak_pci.c
@@ -608,7 +608,7 @@ static int peak_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
writeb(0x00, cfg_base + PITA_GPIOICR);
/* Toggle reset */
writeb(0x05, cfg_base + PITA_MISC + 3);
- mdelay(5);
+ usleep_range(5000, 6000);
/* Leave parport mux mode */
writeb(0x04, cfg_base + PITA_MISC + 3);
--
1.9.1
^ permalink raw reply related
* RE: [virtio-dev] Re: [RFC] vhost: introduce mdev based hardware vhost backend
From: Tian, Kevin @ 2018-04-11 1:38 UTC (permalink / raw)
To: Liang, Cunming, Michael S. Tsirkin
Cc: Duyck, Alexander H, virtio-dev@lists.oasis-open.org,
kvm@vger.kernel.org, netdev@vger.kernel.org,
linux-kernel@vger.kernel.org,
virtualization@lists.linux-foundation.org, Wang, Xiao W,
ddutile@redhat.com, Tan, Jianfeng, Wang, Zhihong, Paolo Bonzini
In-Reply-To: <D0158A423229094DA7ABF71CF2FA0DA34E9226AF@SHSMSX104.ccr.corp.intel.com>
> From: Liang, Cunming
> Sent: Tuesday, April 10, 2018 10:24 PM
>
[...]
> >
> > As others said, we do not need to go overeboard. A couple of small
> vendor-
> > specific quirks in qemu isn't a big deal.
>
> It's quite challenge to identify it's small or not, there's no uniform metric.
>
> It's only dependent on QEMU itself, that's the obvious benefit. Tradeoff is
> to build the entire device driver. We don't object to do that in QEMU, but
> wanna make sure to understand the boundary size clearly.
>
It might be helpful if you can post some sample code using proposed
framework - then people have a clear feeling about what size is talked
about here and whether it falls into the concept of 'small quirks'.
Thanks
Kevin
^ permalink raw reply
* [PATCH 2/2] staging: irda: Replace mdelay with usleep_range in irda_usb_probe
From: Jia-Ju Bai @ 2018-04-11 1:33 UTC (permalink / raw)
To: samuel, gregkh, davem, johan, arvind.yadav.cs
Cc: netdev, devel, linux-kernel, Jia-Ju Bai
irda_usb_probe() is never called in atomic context.
irda_usb_probe() is only set as ".probe" in struct usb_driver.
Despite never getting called from atomic context, irda_usb_probe()
calls mdelay() to busily wait.
This is not necessary and can be replaced with usleep_range() to
avoid busy waiting.
This is found by a static analysis tool named DCNS written by myself.
And I also manually check it.
Signed-off-by: Jia-Ju Bai <baijiaju1990@gmail.com>
---
drivers/staging/irda/drivers/irda-usb.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/staging/irda/drivers/irda-usb.c b/drivers/staging/irda/drivers/irda-usb.c
index 723e49b..6ff5b08 100644
--- a/drivers/staging/irda/drivers/irda-usb.c
+++ b/drivers/staging/irda/drivers/irda-usb.c
@@ -1710,7 +1710,7 @@ static int irda_usb_probe(struct usb_interface *intf,
pr_debug("usb_control_msg failed %d\n", ret);
goto err_out_3;
} else {
- mdelay(10);
+ usleep_range(10000, 11000);
}
}
--
1.9.1
^ permalink raw reply related
* [PATCH 1/2] staging: irda: Replace mdelay with usleep_range in stir421x_fw_upload
From: Jia-Ju Bai @ 2018-04-11 1:29 UTC (permalink / raw)
To: samuel, gregkh, davem, johan, arvind.yadav.cs
Cc: netdev, devel, linux-kernel, Jia-Ju Bai
stir421x_fw_upload() is never called in atomic context.
The call chain ending up at stir421x_fw_upload() is:
[1] stir421x_fw_upload() <- stir421x_patch_device() <- irda_usb_probe()
irda_usb_probe() is set as ".probe" in struct usb_driver.
This function is not called in atomic context.
Despite never getting called from atomic context, stir421x_fw_upload()
calls mdelay() to busily wait.
This is not necessary and can be replaced with usleep_range() to
avoid busy waiting.
This is found by a static analysis tool named DCNS written by myself.
And I also manually check it.
Signed-off-by: Jia-Ju Bai <baijiaju1990@gmail.com>
---
drivers/staging/irda/drivers/irda-usb.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/staging/irda/drivers/irda-usb.c b/drivers/staging/irda/drivers/irda-usb.c
index 723e49b..c6c8c2c 100644
--- a/drivers/staging/irda/drivers/irda-usb.c
+++ b/drivers/staging/irda/drivers/irda-usb.c
@@ -1050,7 +1050,7 @@ static int stir421x_fw_upload(struct irda_usb_cb *self,
if (ret < 0)
break;
- mdelay(10);
+ usleep_range(10000, 11000);
}
kfree(patch_block);
--
1.9.1
^ permalink raw reply related
* Re: [PATCH v2 0/2] vhost: fix vhost_vq_access_ok() log check
From: Stefan Hajnoczi @ 2018-04-11 1:21 UTC (permalink / raw)
To: David Miller
Cc: jasowang, virtualization, syzkaller-bugs, mst, torvalds, kvm,
linux-kernel, netdev
In-Reply-To: <20180410.105043.1798364865868187298.davem@davemloft.net>
[-- Attachment #1: Type: text/plain, Size: 1182 bytes --]
On Tue, Apr 10, 2018 at 10:50:43AM -0400, David Miller wrote:
> From: Jason Wang <jasowang@redhat.com>
> Date: Tue, 10 Apr 2018 14:40:10 +0800
>
> > On 2018年04月10日 13:26, Stefan Hajnoczi wrote:
> >> v2:
> >> * Rewrote the conditional to make the vq access check clearer [Linus]
> >> * Added Patch 2 to make the return type consistent and harder to misuse
> >> * [Linus]
> >>
> >> The first patch fixes the vhost virtqueue access check which was
> >> recently
> >> broken. The second patch replaces the int return type with bool to
> >> prevent
> >> future bugs.
> >>
> >> Stefan Hajnoczi (2):
> >> vhost: fix vhost_vq_access_ok() log check
> >> vhost: return bool from *_access_ok() functions
> >>
> >> drivers/vhost/vhost.h | 4 +--
> >> drivers/vhost/vhost.c | 70
> >> ++++++++++++++++++++++++++-------------------------
> >> 2 files changed, 38 insertions(+), 36 deletions(-)
> >>
> >
> > Acked-by: Jason Wang <jasowang@redhat.com>
>
> This sereis doesn't apply cleanly to the net tree, please respin
> and add the appropriate Acks and Fixes: tags that Michael and Jason
> have provided.
Sorry, will fix.
Stefan
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 455 bytes --]
^ permalink raw reply
* Re: [RFC PATCH net-next v6 4/4] netvsc: refactor notifier/event handling code to use the bypass framework
From: Michael S. Tsirkin @ 2018-04-11 1:21 UTC (permalink / raw)
To: Stephen Hemminger
Cc: Sridhar Samudrala, davem, netdev, virtualization, virtio-dev,
jesse.brandeburg, alexander.h.duyck, kubakici, jasowang,
loseweigh, jiri
In-Reply-To: <20180410142608.50f15b45@xeon-e3>
On Tue, Apr 10, 2018 at 02:26:08PM -0700, Stephen Hemminger wrote:
> On Tue, 10 Apr 2018 11:59:50 -0700
> Sridhar Samudrala <sridhar.samudrala@intel.com> wrote:
>
> > Use the registration/notification framework supported by the generic
> > bypass infrastructure.
> >
> > Signed-off-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
> > ---
>
> Thanks for doing this. Your current version has couple show stopper
> issues.
>
> First, the slave device is instantly taking over the slave.
> This doesn't allow udev/systemd to do its device rename of the slave
> device. Netvsc uses a delayed work to workaround this.
>
> Secondly, the select queue needs to call queue selection in VF.
> The bonding/teaming logic doesn't work well for UDP flows.
> Commit b3bf5666a510 ("hv_netvsc: defer queue selection to VF")
> fixed this performance problem.
>
> Lastly, more indirection is bad in current climate.
Well right now netvsc does an indirect call to the PT device,
does it not? If you really want max performance when PT
is in use you need to do the reverse and have PT forward to netvsc.
> I am not completely adverse to this but it needs to be fast, simple
> and completely transparent.
^ permalink raw reply
* [PATCHv2] mISDN: Remove VLAs
From: Laura Abbott @ 2018-04-11 1:04 UTC (permalink / raw)
To: Karsten Keil, David S. Miller
Cc: Laura Abbott, Kees Cook, netdev, linux-kernel, kernel-hardening
There's an ongoing effort to remove VLAs[1] from the kernel to eventually
turn on -Wvla. Remove the VLAs from the mISDN code by switching to using
kstrdup in one place and using an upper bound in another.
Signed-off-by: Laura Abbott <labbott@redhat.com>
---
v2: Switch to a tighter upper bound so we are allocating a more
reasonable amount on the stack (300). This is based on previous checks
against this value.
---
drivers/isdn/mISDN/dsp_hwec.c | 8 +++++---
drivers/isdn/mISDN/l1oip_core.c | 14 +++++++++++---
2 files changed, 16 insertions(+), 6 deletions(-)
diff --git a/drivers/isdn/mISDN/dsp_hwec.c b/drivers/isdn/mISDN/dsp_hwec.c
index a6e87076acc2..5336bbdbfdc5 100644
--- a/drivers/isdn/mISDN/dsp_hwec.c
+++ b/drivers/isdn/mISDN/dsp_hwec.c
@@ -68,12 +68,12 @@ void dsp_hwec_enable(struct dsp *dsp, const char *arg)
goto _do;
{
- char _dup[len + 1];
char *dup, *tok, *name, *val;
int tmp;
- strcpy(_dup, arg);
- dup = _dup;
+ dup = kstrdup(arg, GFP_ATOMIC);
+ if (!dup)
+ return;
while ((tok = strsep(&dup, ","))) {
if (!strlen(tok))
@@ -89,6 +89,8 @@ void dsp_hwec_enable(struct dsp *dsp, const char *arg)
deftaps = tmp;
}
}
+
+ kfree(dup);
}
_do:
diff --git a/drivers/isdn/mISDN/l1oip_core.c b/drivers/isdn/mISDN/l1oip_core.c
index 21d50e4cc5e1..b05022f94f18 100644
--- a/drivers/isdn/mISDN/l1oip_core.c
+++ b/drivers/isdn/mISDN/l1oip_core.c
@@ -279,7 +279,7 @@ l1oip_socket_send(struct l1oip *hc, u8 localcodec, u8 channel, u32 chanmask,
u16 timebase, u8 *buf, int len)
{
u8 *p;
- u8 frame[len + 32];
+ u8 frame[MAX_DFRAME_LEN_L1 + 32];
struct socket *socket = NULL;
if (debug & DEBUG_L1OIP_MSG)
@@ -902,7 +902,11 @@ handle_dmsg(struct mISDNchannel *ch, struct sk_buff *skb)
p = skb->data;
l = skb->len;
while (l) {
- ll = (l < L1OIP_MAX_PERFRAME) ? l : L1OIP_MAX_PERFRAME;
+ /*
+ * This is technically bounded by L1OIP_MAX_PERFRAME but
+ * MAX_DFRAME_LEN_L1 < L1OIP_MAX_PERFRAME
+ */
+ ll = (l < MAX_DFRAME_LEN_L1) ? l : MAX_DFRAME_LEN_L1;
l1oip_socket_send(hc, 0, dch->slot, 0,
hc->chan[dch->slot].tx_counter++, p, ll);
p += ll;
@@ -1140,7 +1144,11 @@ handle_bmsg(struct mISDNchannel *ch, struct sk_buff *skb)
p = skb->data;
l = skb->len;
while (l) {
- ll = (l < L1OIP_MAX_PERFRAME) ? l : L1OIP_MAX_PERFRAME;
+ /*
+ * This is technically bounded by L1OIP_MAX_PERFRAME but
+ * MAX_DFRAME_LEN_L1 < L1OIP_MAX_PERFRAME
+ */
+ ll = (l < MAX_DFRAME_LEN_L1) ? l : MAX_DFRAME_LEN_L1;
l1oip_socket_send(hc, hc->codec, bch->slot, 0,
hc->chan[bch->slot].tx_counter, p, ll);
hc->chan[bch->slot].tx_counter += ll;
--
2.14.3
^ permalink raw reply related
* [PATCH] net/tls: Remove VLA usage
From: Kees Cook @ 2018-04-11 0:52 UTC (permalink / raw)
To: Dave Watson
Cc: linux-kernel, netdev, Ilya Lesokhin, Aviad Yehezkel,
David S. Miller
In the quest to remove VLAs from the kernel[1], this replaces the VLA
size with the only possible size used in the code, and adds a mechanism
to double-check future IV sizes.
[1] https://lkml.kernel.org/r/CA+55aFzCG-zNmZwX4A2FQpadafLfEzK6CC=qPXydAacU1RqZWA@mail.gmail.com
Signed-off-by: Kees Cook <keescook@chromium.org>
---
net/tls/tls_sw.c | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 4dc766b03f00..71e79597f940 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -41,6 +41,8 @@
#include <net/strparser.h>
#include <net/tls.h>
+#define MAX_IV_SIZE TLS_CIPHER_AES_GCM_128_IV_SIZE
+
static int tls_do_decryption(struct sock *sk,
struct scatterlist *sgin,
struct scatterlist *sgout,
@@ -673,7 +675,7 @@ static int decrypt_skb(struct sock *sk, struct sk_buff *skb,
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
- char iv[TLS_CIPHER_AES_GCM_128_SALT_SIZE + tls_ctx->rx.iv_size];
+ char iv[TLS_CIPHER_AES_GCM_128_SALT_SIZE + MAX_IV_SIZE];
struct scatterlist sgin_arr[MAX_SKB_FRAGS + 2];
struct scatterlist *sgin = &sgin_arr[0];
struct strp_msg *rxm = strp_msg(skb);
@@ -1094,6 +1096,12 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
goto free_priv;
}
+ /* Sanity-check the IV size for stack allocations. */
+ if (iv_size > MAX_IV_SIZE) {
+ rc = -EINVAL;
+ goto free_priv;
+ }
+
cctx->prepend_size = TLS_HEADER_SIZE + nonce_size;
cctx->tag_size = tag_size;
cctx->overhead_size = cctx->prepend_size + cctx->tag_size;
--
2.7.4
--
Kees Cook
Pixel Security
^ permalink raw reply related
* Re: [PATCH] make net_gso_ok return false when gso_type is zero(invalid)
From: Wenhua Shi @ 2018-04-11 0:51 UTC (permalink / raw)
To: Marcelo Ricardo Leitner; +Cc: David Miller, netdev, linux-kernel
In-Reply-To: <20180410163216.GB3661@localhost.localdomain>
2018-04-10 18:32 GMT+02:00 Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>:
> On Sun, Apr 08, 2018 at 08:41:21PM +0200, Wenhua Shi wrote:
>> 2018-04-08 18:51 GMT+02:00 David Miller <davem@davemloft.net>:
>> >
>> > From: Wenhua Shi <march511@gmail.com>
>> > Date: Fri, 6 Apr 2018 03:43:39 +0200
>> >
>> > > Signed-off-by: Wenhua Shi <march511@gmail.com>
>> >
>> > This precondition should be made impossible instead of having to do
>> > an extra check everywhere that this helper is invoked, many of which
>> > are in fast paths.
>>
>> I believe the precondition you said is quite true. In my situation, I
>> have to disable GSO for some packet and I notice that it leads to a
>> worse performance (slower than 1Mbps, was almost 800Mbps).
>>
>> Here's the hook I use on debian 9.4, kernel version 4.9:
>
> There is quite a distance between 4.9 and net/net-next. Did you test
> on a more recent kernel too?
>
> Note that TCP stack now works with GSO being always on.
> 0a6b2a1dc2a2 ("tcp: switch to GSO being always on")
>
I've tried testing on the Fedora rawhide channel. The kernel version
is 4.17.0. Detail information is attached.
Without the hook
[root@fedora-s-1vcpu-1gb-sfo1-01 testing]# iperf -c
myanothernormalmachine -d
------------------------------------------------------------
Server listening on TCP port 5001
TCP window size: 85.3 KByte (default)
------------------------------------------------------------
------------------------------------------------------------
Client connecting to myanothernormalmachine, TCP port 5001
TCP window size: 85.0 KByte (default)
------------------------------------------------------------
[ 3] local 107.170.240.XXX port 44692 connected with
104.131.148.XXX port 5001
[ 5] local 107.170.240.XXX port 5001 connected with
104.131.148.XXX port 53978
[ ID] Interval Transfer Bandwidth
[ 3] 0.0-10.0 sec 1.04 GBytes 892 Mbits/sec
[ 5] 0.0-10.0 sec 757 MBytes 638 Mbits/sec
With the hook
[root@fedora-s-1vcpu-1gb-sfo1-01 testing]# iperf -c
myanothernormalmachine -d
------------------------------------------------------------
Server listening on TCP port 5001
TCP window size: 85.3 KByte (default)
------------------------------------------------------------
------------------------------------------------------------
Client connecting to myanothernormalmachine, TCP port 5001
TCP window size: 85.0 KByte (default)
------------------------------------------------------------
[ 3] local 107.170.240.XXX port 44694 connected with
104.131.148.XXX port 5001
[ 5] local 107.170.240.XXX port 5001 connected with
104.131.148.XXX port 53980
[ ID] Interval Transfer Bandwidth
[ 5] 0.0-10.0 sec 1.04 GBytes 894 Mbits/sec
[ 3] 0.0-13.5 sec 170 KBytes 103 Kbits/sec
Kernel
[root@fedora-s-1vcpu-1gb-sfo1-01 testing]# uname -a
Linux fedora-s-1vcpu-1gb-sfo1-01.localdomain
4.17.0-0.rc0.git5.2.fc29.x86_64 #1 SMP Mon Apr 9 17:16:30 UTC 2018
x86_64 x86_64 x86_64 GNU/Linux
Hook Source Code
[root@fedora-s-1vcpu-1gb-sfo1-01 testing]# cat testing.c
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/netfilter.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter_ipv6.h>
#include <linux/skbuff.h>
#include <linux/tcp.h>
#include <linux/ip.h>
unsigned int hook_outgoing(
void * priv,
struct sk_buff * skb,
const struct nf_hook_state * state)
{
printk(KERN_INFO "Hook working...\n");
/* for some reason I have to disable GSO */
skb_gso_reset(skb);
/* The following won't work any more. */
// skb->sk->sk_gso_type = ~0;
return NF_ACCEPT;
}
static struct nf_hook_ops hook =
{
.hook = hook_outgoing,
.pf = PF_INET,
.hooknum = NF_INET_POST_ROUTING,
.priority = NF_IP_PRI_LAST,
};
static int __init init_testing(void)
{
nf_register_net_hook(&init_net, &hook);
return 0;
}
static void __exit exit_testing(void)
{
nf_unregister_net_hook(&init_net, &hook);
}
MODULE_LICENSE("GPL");
module_init(init_testing);
module_exit(exit_testing);
It turns out the problem exists and my previous bypassing trick is not
working any more. I'm now testing whether the patch is working for the
latest net-next branch.
^ permalink raw reply
* [PATCH] netfilter: ftp helper: Support \n and \r terminators for PORT
From: Scott Parlane @ 2018-04-11 0:11 UTC (permalink / raw)
To: pablo, kadlec, fw, netfilter-devel, coreteam, netdev; +Cc: Scott Parlane
The current code requires \r as the terminator directly
after the last digit, and RFC959 indicates CRLF as the
the terminator.
However, we have seen in the wild a client sending
packets with only \n
Signed-off-by: Scott Parlane <scott.parlane@alliedtelesis.co.nz>
---
net/netfilter/nf_conntrack_ftp.c | 40 ++++++++++++++++++++++++----------------
1 file changed, 24 insertions(+), 16 deletions(-)
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index b666959f17..e15bf351f6 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -56,21 +56,23 @@ unsigned int (*nf_nat_ftp_hook)(struct sk_buff *skb,
EXPORT_SYMBOL_GPL(nf_nat_ftp_hook);
static int try_rfc959(const char *, size_t, struct nf_conntrack_man *,
- char, unsigned int *);
+ char, char, unsigned int *);
static int try_rfc1123(const char *, size_t, struct nf_conntrack_man *,
- char, unsigned int *);
+ char, char, unsigned int *);
static int try_eprt(const char *, size_t, struct nf_conntrack_man *,
- char, unsigned int *);
+ char, char, unsigned int *);
static int try_epsv_response(const char *, size_t, struct nf_conntrack_man *,
- char, unsigned int *);
+ char, char, unsigned int *);
static struct ftp_search {
const char *pattern;
size_t plen;
char skip;
char term;
+ char alt_term;
enum nf_ct_ftp_type ftptype;
- int (*getnum)(const char *, size_t, struct nf_conntrack_man *, char, unsigned int *);
+ int (*getnum)(const char *, size_t, struct nf_conntrack_man *, char,
+ char, unsigned int *);
} search[IP_CT_DIR_MAX][2] = {
[IP_CT_DIR_ORIGINAL] = {
{
@@ -78,6 +80,7 @@ static struct ftp_search {
.plen = sizeof("PORT") - 1,
.skip = ' ',
.term = '\r',
+ .alt_term = '\n',
.ftptype = NF_CT_FTP_PORT,
.getnum = try_rfc959,
},
@@ -119,7 +122,7 @@ get_ipv6_addr(const char *src, size_t dlen, struct in6_addr *dst, u_int8_t term)
}
static int try_number(const char *data, size_t dlen, u_int32_t array[],
- int array_size, char sep, char term)
+ int array_size, char sep, char term, char alt_term)
{
u_int32_t i, len;
@@ -136,7 +139,9 @@ static int try_number(const char *data, size_t dlen, u_int32_t array[],
/* Unexpected character; true if it's the
terminator (or we don't care about one)
and we're finished. */
- if ((*data == term || !term) && i == array_size - 1)
+ if ((*data == term || (alt_term &&
+ *data == alt_term) || !term) &&
+ i == array_size - 1)
return len;
pr_debug("Char %u (got %u nums) `%u' unexpected\n",
@@ -152,12 +157,12 @@ static int try_number(const char *data, size_t dlen, u_int32_t array[],
/* Returns 0, or length of numbers: 192,168,1,1,5,6 */
static int try_rfc959(const char *data, size_t dlen,
struct nf_conntrack_man *cmd, char term,
- unsigned int *offset)
+ char alt_term, unsigned int *offset)
{
int length;
u_int32_t array[6];
- length = try_number(data, dlen, array, 6, ',', term);
+ length = try_number(data, dlen, array, 6, ',', term, alt_term);
if (length == 0)
return 0;
@@ -179,7 +184,7 @@ static int try_rfc959(const char *data, size_t dlen,
*/
static int try_rfc1123(const char *data, size_t dlen,
struct nf_conntrack_man *cmd, char term,
- unsigned int *offset)
+ char alt_term, unsigned int *offset)
{
int i;
for (i = 0; i < dlen; i++)
@@ -191,7 +196,7 @@ static int try_rfc1123(const char *data, size_t dlen,
*offset += i;
- return try_rfc959(data + i, dlen - i, cmd, 0, offset);
+ return try_rfc959(data + i, dlen - i, cmd, 0, 0, offset);
}
/* Grab port: number up to delimiter */
@@ -222,7 +227,7 @@ static int get_port(const char *data, int start, size_t dlen, char delim,
/* Returns 0, or length of numbers: |1|132.235.1.2|6275| or |2|3ffe::1|6275| */
static int try_eprt(const char *data, size_t dlen, struct nf_conntrack_man *cmd,
- char term, unsigned int *offset)
+ char term, char alt_term, unsigned int *offset)
{
char delim;
int length;
@@ -251,7 +256,8 @@ static int try_eprt(const char *data, size_t dlen, struct nf_conntrack_man *cmd,
u_int32_t array[4];
/* Now we have IP address. */
- length = try_number(data + 3, dlen - 3, array, 4, '.', delim);
+ length = try_number(data + 3, dlen - 3, array, 4, '.', delim,
+ 0);
if (length != 0)
cmd->u3.ip = htonl((array[0] << 24) | (array[1] << 16)
| (array[2] << 8) | array[3]);
@@ -271,7 +277,7 @@ static int try_eprt(const char *data, size_t dlen, struct nf_conntrack_man *cmd,
/* Returns 0, or length of numbers: |||6446| */
static int try_epsv_response(const char *data, size_t dlen,
struct nf_conntrack_man *cmd, char term,
- unsigned int *offset)
+ char alt_term, unsigned int *offset)
{
char delim;
@@ -289,12 +295,13 @@ static int try_epsv_response(const char *data, size_t dlen,
static int find_pattern(const char *data, size_t dlen,
const char *pattern, size_t plen,
char skip, char term,
+ char alt_term,
unsigned int *numoff,
unsigned int *numlen,
struct nf_conntrack_man *cmd,
int (*getnum)(const char *, size_t,
struct nf_conntrack_man *, char,
- unsigned int *))
+ char, unsigned int *))
{
size_t i = plen;
@@ -337,7 +344,7 @@ static int find_pattern(const char *data, size_t dlen,
pr_debug("Skipped up to `%c'!\n", skip);
*numoff = i;
- *numlen = getnum(data + i, dlen - i, cmd, term, numoff);
+ *numlen = getnum(data + i, dlen - i, cmd, term, alt_term, numoff);
if (!*numlen)
return -1;
@@ -461,6 +468,7 @@ skip_nl_seq:
search[dir][i].plen,
search[dir][i].skip,
search[dir][i].term,
+ search[dir][i].alt_term,
&matchoff, &matchlen,
&cmd,
search[dir][i].getnum);
--
2.16.1
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox