Netdev List
 help / color / mirror / Atom feed
* [PULL] vhost, virtio: last minute fixes
From: Michael S. Tsirkin @ 2019-09-06 13:41 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: kvm, virtualization, netdev, linux-kernel, jasowang, jiangkidd,
	linyunsheng, mst, namit, tiwei.bie

Hope this can still make it.
I was not sure about virtio-net change but it seems that it prevents
livelocks for some people.

The following changes since commit 089cf7f6ecb266b6a4164919a2e69bd2f938374a:

  Linux 5.3-rc7 (2019-09-02 09:57:40 -0700)

are available in the Git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git tags/for_linus

for you to fetch changes up to 02fa5d7b17a761f53ef1eedfc254e1f33bd226b0:

  mm/balloon_compaction: suppress allocation warnings (2019-09-04 07:42:01 -0400)

----------------------------------------------------------------
virtio, vhost, balloon: bugfixes

A couple of last minute bugfixes. And a revert of a failed attempt at
metadata access optimization - we'll try again in the next cycle.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

----------------------------------------------------------------
? jiang (1):
      virtio-net: lower min ring num_free for efficiency

Michael S. Tsirkin (1):
      Revert "vhost: access vq metadata through kernel virtual address"

Nadav Amit (1):
      mm/balloon_compaction: suppress allocation warnings

Tiwei Bie (2):
      vhost/test: fix build for vhost test
      vhost/test: fix build for vhost test

Yunsheng Lin (1):
      vhost: Remove unnecessary variable

 drivers/net/virtio_net.c |   2 +-
 drivers/vhost/test.c     |  13 +-
 drivers/vhost/vhost.c    | 520 +----------------------------------------------
 drivers/vhost/vhost.h    |  41 ----
 mm/balloon_compaction.c  |   3 +-
 5 files changed, 17 insertions(+), 562 deletions(-)

^ permalink raw reply

* Re: [PATCH 1/2] Revert "vhost: access vq metadata through kernel virtual address"
From: Michael S. Tsirkin @ 2019-09-06 13:46 UTC (permalink / raw)
  To: Jason Wang
  Cc: kvm, virtualization, netdev, linux-kernel, jgg, aarcange, jglisse,
	linux-mm
In-Reply-To: <20190905122736.19768-2-jasowang@redhat.com>

On Thu, Sep 05, 2019 at 08:27:35PM +0800, Jason Wang wrote:
> It was reported that metadata acceleration introduces several issues,
> so this patch reverts commit ff466032dc9e5a61217f22ea34b2df932786bbfc,
> 73f628ec9e6bcc45b77c53fe6d0c0ec55eaf82af and
> 0b4a7092ffe568a55bf8f3cefdf79ff666586d91.
> 
> We will rework it on the next version.
> 
> Cc: Jason Gunthorpe <jgg@mellanox.com>
> Signed-off-by: Jason Wang <jasowang@redhat.com>


I am confused by the above.
What I see upstream is 7f466032dc.

commit 7f466032dc9e5a61217f22ea34b2df932786bbfc
Author: Jason Wang <jasowang@redhat.com>
Date:   Fri May 24 04:12:18 2019 -0400

    vhost: access vq metadata through kernel virtual address

so this is what I reverted.

Pls take a look, and let me know if you see issues.

Thanks!

> ---
>  drivers/vhost/vhost.c | 515 +-----------------------------------------
>  drivers/vhost/vhost.h |  41 ----
>  2 files changed, 3 insertions(+), 553 deletions(-)
> 
> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> index 0536f8526359..791562e03fe0 100644
> --- a/drivers/vhost/vhost.c
> +++ b/drivers/vhost/vhost.c
> @@ -298,160 +298,6 @@ static void vhost_vq_meta_reset(struct vhost_dev *d)
>  		__vhost_vq_meta_reset(d->vqs[i]);
>  }
>  
> -#if VHOST_ARCH_CAN_ACCEL_UACCESS
> -static void vhost_map_unprefetch(struct vhost_map *map)
> -{
> -	kfree(map->pages);
> -	map->pages = NULL;
> -	map->npages = 0;
> -	map->addr = NULL;
> -}
> -
> -static void vhost_uninit_vq_maps(struct vhost_virtqueue *vq)
> -{
> -	struct vhost_map *map[VHOST_NUM_ADDRS];
> -	int i;
> -
> -	spin_lock(&vq->mmu_lock);
> -	for (i = 0; i < VHOST_NUM_ADDRS; i++) {
> -		map[i] = rcu_dereference_protected(vq->maps[i],
> -				  lockdep_is_held(&vq->mmu_lock));
> -		if (map[i])
> -			rcu_assign_pointer(vq->maps[i], NULL);
> -	}
> -	spin_unlock(&vq->mmu_lock);
> -
> -	synchronize_rcu();
> -
> -	for (i = 0; i < VHOST_NUM_ADDRS; i++)
> -		if (map[i])
> -			vhost_map_unprefetch(map[i]);
> -
> -}
> -
> -static void vhost_reset_vq_maps(struct vhost_virtqueue *vq)
> -{
> -	int i;
> -
> -	vhost_uninit_vq_maps(vq);
> -	for (i = 0; i < VHOST_NUM_ADDRS; i++)
> -		vq->uaddrs[i].size = 0;
> -}
> -
> -static bool vhost_map_range_overlap(struct vhost_uaddr *uaddr,
> -				     unsigned long start,
> -				     unsigned long end)
> -{
> -	if (unlikely(!uaddr->size))
> -		return false;
> -
> -	return !(end < uaddr->uaddr || start > uaddr->uaddr - 1 + uaddr->size);
> -}
> -
> -static void vhost_invalidate_vq_start(struct vhost_virtqueue *vq,
> -				      int index,
> -				      unsigned long start,
> -				      unsigned long end)
> -{
> -	struct vhost_uaddr *uaddr = &vq->uaddrs[index];
> -	struct vhost_map *map;
> -	int i;
> -
> -	if (!vhost_map_range_overlap(uaddr, start, end))
> -		return;
> -
> -	spin_lock(&vq->mmu_lock);
> -	++vq->invalidate_count;
> -
> -	map = rcu_dereference_protected(vq->maps[index],
> -					lockdep_is_held(&vq->mmu_lock));
> -	if (map) {
> -		if (uaddr->write) {
> -			for (i = 0; i < map->npages; i++)
> -				set_page_dirty(map->pages[i]);
> -		}
> -		rcu_assign_pointer(vq->maps[index], NULL);
> -	}
> -	spin_unlock(&vq->mmu_lock);
> -
> -	if (map) {
> -		synchronize_rcu();
> -		vhost_map_unprefetch(map);
> -	}
> -}
> -
> -static void vhost_invalidate_vq_end(struct vhost_virtqueue *vq,
> -				    int index,
> -				    unsigned long start,
> -				    unsigned long end)
> -{
> -	if (!vhost_map_range_overlap(&vq->uaddrs[index], start, end))
> -		return;
> -
> -	spin_lock(&vq->mmu_lock);
> -	--vq->invalidate_count;
> -	spin_unlock(&vq->mmu_lock);
> -}
> -
> -static int vhost_invalidate_range_start(struct mmu_notifier *mn,
> -					const struct mmu_notifier_range *range)
> -{
> -	struct vhost_dev *dev = container_of(mn, struct vhost_dev,
> -					     mmu_notifier);
> -	int i, j;
> -
> -	if (!mmu_notifier_range_blockable(range))
> -		return -EAGAIN;
> -
> -	for (i = 0; i < dev->nvqs; i++) {
> -		struct vhost_virtqueue *vq = dev->vqs[i];
> -
> -		for (j = 0; j < VHOST_NUM_ADDRS; j++)
> -			vhost_invalidate_vq_start(vq, j,
> -						  range->start,
> -						  range->end);
> -	}
> -
> -	return 0;
> -}
> -
> -static void vhost_invalidate_range_end(struct mmu_notifier *mn,
> -				       const struct mmu_notifier_range *range)
> -{
> -	struct vhost_dev *dev = container_of(mn, struct vhost_dev,
> -					     mmu_notifier);
> -	int i, j;
> -
> -	for (i = 0; i < dev->nvqs; i++) {
> -		struct vhost_virtqueue *vq = dev->vqs[i];
> -
> -		for (j = 0; j < VHOST_NUM_ADDRS; j++)
> -			vhost_invalidate_vq_end(vq, j,
> -						range->start,
> -						range->end);
> -	}
> -}
> -
> -static const struct mmu_notifier_ops vhost_mmu_notifier_ops = {
> -	.invalidate_range_start = vhost_invalidate_range_start,
> -	.invalidate_range_end = vhost_invalidate_range_end,
> -};
> -
> -static void vhost_init_maps(struct vhost_dev *dev)
> -{
> -	struct vhost_virtqueue *vq;
> -	int i, j;
> -
> -	dev->mmu_notifier.ops = &vhost_mmu_notifier_ops;
> -
> -	for (i = 0; i < dev->nvqs; ++i) {
> -		vq = dev->vqs[i];
> -		for (j = 0; j < VHOST_NUM_ADDRS; j++)
> -			RCU_INIT_POINTER(vq->maps[j], NULL);
> -	}
> -}
> -#endif
> -
>  static void vhost_vq_reset(struct vhost_dev *dev,
>  			   struct vhost_virtqueue *vq)
>  {
> @@ -480,11 +326,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
>  	vq->busyloop_timeout = 0;
>  	vq->umem = NULL;
>  	vq->iotlb = NULL;
> -	vq->invalidate_count = 0;
>  	__vhost_vq_meta_reset(vq);
> -#if VHOST_ARCH_CAN_ACCEL_UACCESS
> -	vhost_reset_vq_maps(vq);
> -#endif
>  }
>  
>  static int vhost_worker(void *data)
> @@ -634,9 +476,7 @@ void vhost_dev_init(struct vhost_dev *dev,
>  	INIT_LIST_HEAD(&dev->read_list);
>  	INIT_LIST_HEAD(&dev->pending_list);
>  	spin_lock_init(&dev->iotlb_lock);
> -#if VHOST_ARCH_CAN_ACCEL_UACCESS
> -	vhost_init_maps(dev);
> -#endif
> +
>  
>  	for (i = 0; i < dev->nvqs; ++i) {
>  		vq = dev->vqs[i];
> @@ -645,7 +485,6 @@ void vhost_dev_init(struct vhost_dev *dev,
>  		vq->heads = NULL;
>  		vq->dev = dev;
>  		mutex_init(&vq->mutex);
> -		spin_lock_init(&vq->mmu_lock);
>  		vhost_vq_reset(dev, vq);
>  		if (vq->handle_kick)
>  			vhost_poll_init(&vq->poll, vq->handle_kick,
> @@ -725,18 +564,7 @@ long vhost_dev_set_owner(struct vhost_dev *dev)
>  	if (err)
>  		goto err_cgroup;
>  
> -#if VHOST_ARCH_CAN_ACCEL_UACCESS
> -	err = mmu_notifier_register(&dev->mmu_notifier, dev->mm);
> -	if (err)
> -		goto err_mmu_notifier;
> -#endif
> -
>  	return 0;
> -
> -#if VHOST_ARCH_CAN_ACCEL_UACCESS
> -err_mmu_notifier:
> -	vhost_dev_free_iovecs(dev);
> -#endif
>  err_cgroup:
>  	kthread_stop(worker);
>  	dev->worker = NULL;
> @@ -827,107 +655,6 @@ static void vhost_clear_msg(struct vhost_dev *dev)
>  	spin_unlock(&dev->iotlb_lock);
>  }
>  
> -#if VHOST_ARCH_CAN_ACCEL_UACCESS
> -static void vhost_setup_uaddr(struct vhost_virtqueue *vq,
> -			      int index, unsigned long uaddr,
> -			      size_t size, bool write)
> -{
> -	struct vhost_uaddr *addr = &vq->uaddrs[index];
> -
> -	addr->uaddr = uaddr;
> -	addr->size = size;
> -	addr->write = write;
> -}
> -
> -static void vhost_setup_vq_uaddr(struct vhost_virtqueue *vq)
> -{
> -	vhost_setup_uaddr(vq, VHOST_ADDR_DESC,
> -			  (unsigned long)vq->desc,
> -			  vhost_get_desc_size(vq, vq->num),
> -			  false);
> -	vhost_setup_uaddr(vq, VHOST_ADDR_AVAIL,
> -			  (unsigned long)vq->avail,
> -			  vhost_get_avail_size(vq, vq->num),
> -			  false);
> -	vhost_setup_uaddr(vq, VHOST_ADDR_USED,
> -			  (unsigned long)vq->used,
> -			  vhost_get_used_size(vq, vq->num),
> -			  true);
> -}
> -
> -static int vhost_map_prefetch(struct vhost_virtqueue *vq,
> -			       int index)
> -{
> -	struct vhost_map *map;
> -	struct vhost_uaddr *uaddr = &vq->uaddrs[index];
> -	struct page **pages;
> -	int npages = DIV_ROUND_UP(uaddr->size, PAGE_SIZE);
> -	int npinned;
> -	void *vaddr, *v;
> -	int err;
> -	int i;
> -
> -	spin_lock(&vq->mmu_lock);
> -
> -	err = -EFAULT;
> -	if (vq->invalidate_count)
> -		goto err;
> -
> -	err = -ENOMEM;
> -	map = kmalloc(sizeof(*map), GFP_ATOMIC);
> -	if (!map)
> -		goto err;
> -
> -	pages = kmalloc_array(npages, sizeof(struct page *), GFP_ATOMIC);
> -	if (!pages)
> -		goto err_pages;
> -
> -	err = EFAULT;
> -	npinned = __get_user_pages_fast(uaddr->uaddr, npages,
> -					uaddr->write, pages);
> -	if (npinned > 0)
> -		release_pages(pages, npinned);
> -	if (npinned != npages)
> -		goto err_gup;
> -
> -	for (i = 0; i < npinned; i++)
> -		if (PageHighMem(pages[i]))
> -			goto err_gup;
> -
> -	vaddr = v = page_address(pages[0]);
> -
> -	/* For simplicity, fallback to userspace address if VA is not
> -	 * contigious.
> -	 */
> -	for (i = 1; i < npinned; i++) {
> -		v += PAGE_SIZE;
> -		if (v != page_address(pages[i]))
> -			goto err_gup;
> -	}
> -
> -	map->addr = vaddr + (uaddr->uaddr & (PAGE_SIZE - 1));
> -	map->npages = npages;
> -	map->pages = pages;
> -
> -	rcu_assign_pointer(vq->maps[index], map);
> -	/* No need for a synchronize_rcu(). This function should be
> -	 * called by dev->worker so we are serialized with all
> -	 * readers.
> -	 */
> -	spin_unlock(&vq->mmu_lock);
> -
> -	return 0;
> -
> -err_gup:
> -	kfree(pages);
> -err_pages:
> -	kfree(map);
> -err:
> -	spin_unlock(&vq->mmu_lock);
> -	return err;
> -}
> -#endif
> -
>  void vhost_dev_cleanup(struct vhost_dev *dev)
>  {
>  	int i;
> @@ -957,16 +684,8 @@ void vhost_dev_cleanup(struct vhost_dev *dev)
>  		kthread_stop(dev->worker);
>  		dev->worker = NULL;
>  	}
> -	if (dev->mm) {
> -#if VHOST_ARCH_CAN_ACCEL_UACCESS
> -		mmu_notifier_unregister(&dev->mmu_notifier, dev->mm);
> -#endif
> +	if (dev->mm)
>  		mmput(dev->mm);
> -	}
> -#if VHOST_ARCH_CAN_ACCEL_UACCESS
> -	for (i = 0; i < dev->nvqs; i++)
> -		vhost_uninit_vq_maps(dev->vqs[i]);
> -#endif
>  	dev->mm = NULL;
>  }
>  EXPORT_SYMBOL_GPL(vhost_dev_cleanup);
> @@ -1195,26 +914,6 @@ static inline void __user *__vhost_get_user(struct vhost_virtqueue *vq,
>  
>  static inline int vhost_put_avail_event(struct vhost_virtqueue *vq)
>  {
> -#if VHOST_ARCH_CAN_ACCEL_UACCESS
> -	struct vhost_map *map;
> -	struct vring_used *used;
> -
> -	if (!vq->iotlb) {
> -		rcu_read_lock();
> -
> -		map = rcu_dereference(vq->maps[VHOST_ADDR_USED]);
> -		if (likely(map)) {
> -			used = map->addr;
> -			*((__virtio16 *)&used->ring[vq->num]) =
> -				cpu_to_vhost16(vq, vq->avail_idx);
> -			rcu_read_unlock();
> -			return 0;
> -		}
> -
> -		rcu_read_unlock();
> -	}
> -#endif
> -
>  	return vhost_put_user(vq, cpu_to_vhost16(vq, vq->avail_idx),
>  			      vhost_avail_event(vq));
>  }
> @@ -1223,27 +922,6 @@ static inline int vhost_put_used(struct vhost_virtqueue *vq,
>  				 struct vring_used_elem *head, int idx,
>  				 int count)
>  {
> -#if VHOST_ARCH_CAN_ACCEL_UACCESS
> -	struct vhost_map *map;
> -	struct vring_used *used;
> -	size_t size;
> -
> -	if (!vq->iotlb) {
> -		rcu_read_lock();
> -
> -		map = rcu_dereference(vq->maps[VHOST_ADDR_USED]);
> -		if (likely(map)) {
> -			used = map->addr;
> -			size = count * sizeof(*head);
> -			memcpy(used->ring + idx, head, size);
> -			rcu_read_unlock();
> -			return 0;
> -		}
> -
> -		rcu_read_unlock();
> -	}
> -#endif
> -
>  	return vhost_copy_to_user(vq, vq->used->ring + idx, head,
>  				  count * sizeof(*head));
>  }
> @@ -1251,25 +929,6 @@ static inline int vhost_put_used(struct vhost_virtqueue *vq,
>  static inline int vhost_put_used_flags(struct vhost_virtqueue *vq)
>  
>  {
> -#if VHOST_ARCH_CAN_ACCEL_UACCESS
> -	struct vhost_map *map;
> -	struct vring_used *used;
> -
> -	if (!vq->iotlb) {
> -		rcu_read_lock();
> -
> -		map = rcu_dereference(vq->maps[VHOST_ADDR_USED]);
> -		if (likely(map)) {
> -			used = map->addr;
> -			used->flags = cpu_to_vhost16(vq, vq->used_flags);
> -			rcu_read_unlock();
> -			return 0;
> -		}
> -
> -		rcu_read_unlock();
> -	}
> -#endif
> -
>  	return vhost_put_user(vq, cpu_to_vhost16(vq, vq->used_flags),
>  			      &vq->used->flags);
>  }
> @@ -1277,25 +936,6 @@ static inline int vhost_put_used_flags(struct vhost_virtqueue *vq)
>  static inline int vhost_put_used_idx(struct vhost_virtqueue *vq)
>  
>  {
> -#if VHOST_ARCH_CAN_ACCEL_UACCESS
> -	struct vhost_map *map;
> -	struct vring_used *used;
> -
> -	if (!vq->iotlb) {
> -		rcu_read_lock();
> -
> -		map = rcu_dereference(vq->maps[VHOST_ADDR_USED]);
> -		if (likely(map)) {
> -			used = map->addr;
> -			used->idx = cpu_to_vhost16(vq, vq->last_used_idx);
> -			rcu_read_unlock();
> -			return 0;
> -		}
> -
> -		rcu_read_unlock();
> -	}
> -#endif
> -
>  	return vhost_put_user(vq, cpu_to_vhost16(vq, vq->last_used_idx),
>  			      &vq->used->idx);
>  }
> @@ -1341,50 +981,12 @@ static void vhost_dev_unlock_vqs(struct vhost_dev *d)
>  static inline int vhost_get_avail_idx(struct vhost_virtqueue *vq,
>  				      __virtio16 *idx)
>  {
> -#if VHOST_ARCH_CAN_ACCEL_UACCESS
> -	struct vhost_map *map;
> -	struct vring_avail *avail;
> -
> -	if (!vq->iotlb) {
> -		rcu_read_lock();
> -
> -		map = rcu_dereference(vq->maps[VHOST_ADDR_AVAIL]);
> -		if (likely(map)) {
> -			avail = map->addr;
> -			*idx = avail->idx;
> -			rcu_read_unlock();
> -			return 0;
> -		}
> -
> -		rcu_read_unlock();
> -	}
> -#endif
> -
>  	return vhost_get_avail(vq, *idx, &vq->avail->idx);
>  }
>  
>  static inline int vhost_get_avail_head(struct vhost_virtqueue *vq,
>  				       __virtio16 *head, int idx)
>  {
> -#if VHOST_ARCH_CAN_ACCEL_UACCESS
> -	struct vhost_map *map;
> -	struct vring_avail *avail;
> -
> -	if (!vq->iotlb) {
> -		rcu_read_lock();
> -
> -		map = rcu_dereference(vq->maps[VHOST_ADDR_AVAIL]);
> -		if (likely(map)) {
> -			avail = map->addr;
> -			*head = avail->ring[idx & (vq->num - 1)];
> -			rcu_read_unlock();
> -			return 0;
> -		}
> -
> -		rcu_read_unlock();
> -	}
> -#endif
> -
>  	return vhost_get_avail(vq, *head,
>  			       &vq->avail->ring[idx & (vq->num - 1)]);
>  }
> @@ -1392,98 +994,24 @@ static inline int vhost_get_avail_head(struct vhost_virtqueue *vq,
>  static inline int vhost_get_avail_flags(struct vhost_virtqueue *vq,
>  					__virtio16 *flags)
>  {
> -#if VHOST_ARCH_CAN_ACCEL_UACCESS
> -	struct vhost_map *map;
> -	struct vring_avail *avail;
> -
> -	if (!vq->iotlb) {
> -		rcu_read_lock();
> -
> -		map = rcu_dereference(vq->maps[VHOST_ADDR_AVAIL]);
> -		if (likely(map)) {
> -			avail = map->addr;
> -			*flags = avail->flags;
> -			rcu_read_unlock();
> -			return 0;
> -		}
> -
> -		rcu_read_unlock();
> -	}
> -#endif
> -
>  	return vhost_get_avail(vq, *flags, &vq->avail->flags);
>  }
>  
>  static inline int vhost_get_used_event(struct vhost_virtqueue *vq,
>  				       __virtio16 *event)
>  {
> -#if VHOST_ARCH_CAN_ACCEL_UACCESS
> -	struct vhost_map *map;
> -	struct vring_avail *avail;
> -
> -	if (!vq->iotlb) {
> -		rcu_read_lock();
> -		map = rcu_dereference(vq->maps[VHOST_ADDR_AVAIL]);
> -		if (likely(map)) {
> -			avail = map->addr;
> -			*event = (__virtio16)avail->ring[vq->num];
> -			rcu_read_unlock();
> -			return 0;
> -		}
> -		rcu_read_unlock();
> -	}
> -#endif
> -
>  	return vhost_get_avail(vq, *event, vhost_used_event(vq));
>  }
>  
>  static inline int vhost_get_used_idx(struct vhost_virtqueue *vq,
>  				     __virtio16 *idx)
>  {
> -#if VHOST_ARCH_CAN_ACCEL_UACCESS
> -	struct vhost_map *map;
> -	struct vring_used *used;
> -
> -	if (!vq->iotlb) {
> -		rcu_read_lock();
> -
> -		map = rcu_dereference(vq->maps[VHOST_ADDR_USED]);
> -		if (likely(map)) {
> -			used = map->addr;
> -			*idx = used->idx;
> -			rcu_read_unlock();
> -			return 0;
> -		}
> -
> -		rcu_read_unlock();
> -	}
> -#endif
> -
>  	return vhost_get_used(vq, *idx, &vq->used->idx);
>  }
>  
>  static inline int vhost_get_desc(struct vhost_virtqueue *vq,
>  				 struct vring_desc *desc, int idx)
>  {
> -#if VHOST_ARCH_CAN_ACCEL_UACCESS
> -	struct vhost_map *map;
> -	struct vring_desc *d;
> -
> -	if (!vq->iotlb) {
> -		rcu_read_lock();
> -
> -		map = rcu_dereference(vq->maps[VHOST_ADDR_DESC]);
> -		if (likely(map)) {
> -			d = map->addr;
> -			*desc = *(d + idx);
> -			rcu_read_unlock();
> -			return 0;
> -		}
> -
> -		rcu_read_unlock();
> -	}
> -#endif
> -
>  	return vhost_copy_from_user(vq, desc, vq->desc + idx, sizeof(*desc));
>  }
>  
> @@ -1824,32 +1352,12 @@ static bool iotlb_access_ok(struct vhost_virtqueue *vq,
>  	return true;
>  }
>  
> -#if VHOST_ARCH_CAN_ACCEL_UACCESS
> -static void vhost_vq_map_prefetch(struct vhost_virtqueue *vq)
> -{
> -	struct vhost_map __rcu *map;
> -	int i;
> -
> -	for (i = 0; i < VHOST_NUM_ADDRS; i++) {
> -		rcu_read_lock();
> -		map = rcu_dereference(vq->maps[i]);
> -		rcu_read_unlock();
> -		if (unlikely(!map))
> -			vhost_map_prefetch(vq, i);
> -	}
> -}
> -#endif
> -
>  int vq_meta_prefetch(struct vhost_virtqueue *vq)
>  {
>  	unsigned int num = vq->num;
>  
> -	if (!vq->iotlb) {
> -#if VHOST_ARCH_CAN_ACCEL_UACCESS
> -		vhost_vq_map_prefetch(vq);
> -#endif
> +	if (!vq->iotlb)
>  		return 1;
> -	}
>  
>  	return iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->desc,
>  			       vhost_get_desc_size(vq, num), VHOST_ADDR_DESC) &&
> @@ -2060,16 +1568,6 @@ static long vhost_vring_set_num_addr(struct vhost_dev *d,
>  
>  	mutex_lock(&vq->mutex);
>  
> -#if VHOST_ARCH_CAN_ACCEL_UACCESS
> -	/* Unregister MMU notifer to allow invalidation callback
> -	 * can access vq->uaddrs[] without holding a lock.
> -	 */
> -	if (d->mm)
> -		mmu_notifier_unregister(&d->mmu_notifier, d->mm);
> -
> -	vhost_uninit_vq_maps(vq);
> -#endif
> -
>  	switch (ioctl) {
>  	case VHOST_SET_VRING_NUM:
>  		r = vhost_vring_set_num(d, vq, argp);
> @@ -2081,13 +1579,6 @@ static long vhost_vring_set_num_addr(struct vhost_dev *d,
>  		BUG();
>  	}
>  
> -#if VHOST_ARCH_CAN_ACCEL_UACCESS
> -	vhost_setup_vq_uaddr(vq);
> -
> -	if (d->mm)
> -		mmu_notifier_register(&d->mmu_notifier, d->mm);
> -#endif
> -
>  	mutex_unlock(&vq->mutex);
>  
>  	return r;
> diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
> index 42a8c2a13ab1..e9ed2722b633 100644
> --- a/drivers/vhost/vhost.h
> +++ b/drivers/vhost/vhost.h
> @@ -12,9 +12,6 @@
>  #include <linux/virtio_config.h>
>  #include <linux/virtio_ring.h>
>  #include <linux/atomic.h>
> -#include <linux/pagemap.h>
> -#include <linux/mmu_notifier.h>
> -#include <asm/cacheflush.h>
>  
>  struct vhost_work;
>  typedef void (*vhost_work_fn_t)(struct vhost_work *work);
> @@ -83,24 +80,6 @@ enum vhost_uaddr_type {
>  	VHOST_NUM_ADDRS = 3,
>  };
>  
> -struct vhost_map {
> -	int npages;
> -	void *addr;
> -	struct page **pages;
> -};
> -
> -struct vhost_uaddr {
> -	unsigned long uaddr;
> -	size_t size;
> -	bool write;
> -};
> -
> -#if defined(CONFIG_MMU_NOTIFIER) && ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 0
> -#define VHOST_ARCH_CAN_ACCEL_UACCESS 0
> -#else
> -#define VHOST_ARCH_CAN_ACCEL_UACCESS 0
> -#endif
> -
>  /* The virtqueue structure describes a queue attached to a device. */
>  struct vhost_virtqueue {
>  	struct vhost_dev *dev;
> @@ -111,22 +90,7 @@ struct vhost_virtqueue {
>  	struct vring_desc __user *desc;
>  	struct vring_avail __user *avail;
>  	struct vring_used __user *used;
> -
> -#if VHOST_ARCH_CAN_ACCEL_UACCESS
> -	/* Read by memory accessors, modified by meta data
> -	 * prefetching, MMU notifier and vring ioctl().
> -	 * Synchonrized through mmu_lock (writers) and RCU (writers
> -	 * and readers).
> -	 */
> -	struct vhost_map __rcu *maps[VHOST_NUM_ADDRS];
> -	/* Read by MMU notifier, modified by vring ioctl(),
> -	 * synchronized through MMU notifier
> -	 * registering/unregistering.
> -	 */
> -	struct vhost_uaddr uaddrs[VHOST_NUM_ADDRS];
> -#endif
>  	const struct vhost_umem_node *meta_iotlb[VHOST_NUM_ADDRS];
> -
>  	struct file *kick;
>  	struct eventfd_ctx *call_ctx;
>  	struct eventfd_ctx *error_ctx;
> @@ -181,8 +145,6 @@ struct vhost_virtqueue {
>  	bool user_be;
>  #endif
>  	u32 busyloop_timeout;
> -	spinlock_t mmu_lock;
> -	int invalidate_count;
>  };
>  
>  struct vhost_msg_node {
> @@ -196,9 +158,6 @@ struct vhost_msg_node {
>  
>  struct vhost_dev {
>  	struct mm_struct *mm;
> -#ifdef CONFIG_MMU_NOTIFIER
> -	struct mmu_notifier mmu_notifier;
> -#endif
>  	struct mutex mutex;
>  	struct vhost_virtqueue **vqs;
>  	int nvqs;
> -- 
> 2.19.1

^ permalink raw reply

* Re: [PATCH] Bluetooth: hidp: Fix error checks in hidp_get/set_raw_report
From: Marcel Holtmann @ 2019-09-06 13:56 UTC (permalink / raw)
  To: Dan Elkouby
  Cc: Johan Hedberg, David S. Miller, Dan Carpenter, Fabian Henneke,
	Brian Norris, Al Viro, Andrea Parri, linux-bluetooth, netdev,
	linux-kernel
In-Reply-To: <20190906094158.8854-1-streetwalkermc@gmail.com>

Hi Dan,

> Commit 48d9cc9d85dd ("Bluetooth: hidp: Let hidp_send_message return
> number of queued bytes") changed hidp_send_message to return non-zero
> values on success, which some other bits did not expect. This caused
> spurious errors to be propagated through the stack, breaking some (all?)
> drivers, such as hid-sony for the Dualshock 4 in Bluetooth mode.
> 
> Signed-off-by: Dan Elkouby <streetwalkermc@gmail.com>
> ---
> net/bluetooth/hidp/core.c | 4 ++--
> 1 file changed, 2 insertions(+), 2 deletions(-)

patch has been applied to bluetooth-next tree.

Regards

Marcel


^ permalink raw reply

* Re: [PATCH] Bluetooth: hidp: Fix assumptions on the return value of hidp_send_message
From: Marcel Holtmann @ 2019-09-06 13:57 UTC (permalink / raw)
  To: Jiri Kosina
  Cc: Dan Elkouby, Dan Carpenter, Benjamin Tissoires, Johan Hedberg,
	David S. Miller, Brian Norris, Fabian Henneke, Al Viro,
	Andrea Parri, linux-input, linux-kernel, linux-bluetooth, netdev
In-Reply-To: <nycvar.YFH.7.76.1909061330390.31470@cbobk.fhfr.pm>

Hi Jiri,

>> hidp_send_message was changed to return non-zero values on success,
>> which some other bits did not expect. This caused spurious errors to be
>> propagated through the stack, breaking some drivers, such as hid-sony
>> for the Dualshock 4 in Bluetooth mode.
>> 
>> As pointed out by Dan Carpenter, hid-microsoft directly relied on that
>> assumption as well.
>> 
>> Fixes: 48d9cc9d85dd ("Bluetooth: hidp: Let hidp_send_message return number of queued bytes")
>> 
>> Signed-off-by: Dan Elkouby <streetwalkermc@gmail.com>
> 
> Reviewed-by: Jiri Kosina <jkosina@suse.cz>
> 
> Marcel, are you taking this through your tree?

I am taking this through my tree. And yes, I applied the updated patch, but answered the other ;)

Regards

Marcel


^ permalink raw reply

* Re: [PATCH] Bluetooth: hidp: Fix error checks in hidp_get/set_raw_report
From: Dan Elkouby @ 2019-09-06 14:05 UTC (permalink / raw)
  To: Marcel Holtmann
  Cc: Johan Hedberg, David S. Miller, Dan Carpenter, Fabian Henneke,
	Brian Norris, Al Viro, Andrea Parri, linux-bluetooth, netdev,
	linux-kernel
In-Reply-To: <440C3662-1870-44D8-B4E3-C290CE154F1E@holtmann.org>

On Fri, Sep 6, 2019 at 4:56 PM Marcel Holtmann <marcel@holtmann.org> wrote:
> patch has been applied to bluetooth-next tree.
>

Thanks a lot!

^ permalink raw reply

* Re: [PATCH] Bluetooth: hidp: Fix error checks in hidp_get/set_raw_report
From: Dan Carpenter @ 2019-09-06 14:08 UTC (permalink / raw)
  To: Marcel Holtmann
  Cc: Dan Elkouby, Johan Hedberg, David S. Miller, Fabian Henneke,
	Brian Norris, Al Viro, Andrea Parri, linux-bluetooth, netdev,
	linux-kernel
In-Reply-To: <440C3662-1870-44D8-B4E3-C290CE154F1E@holtmann.org>

On Fri, Sep 06, 2019 at 03:56:29PM +0200, Marcel Holtmann wrote:
> Hi Dan,
> 
> > Commit 48d9cc9d85dd ("Bluetooth: hidp: Let hidp_send_message return
> > number of queued bytes") changed hidp_send_message to return non-zero
> > values on success, which some other bits did not expect. This caused
> > spurious errors to be propagated through the stack, breaking some (all?)
> > drivers, such as hid-sony for the Dualshock 4 in Bluetooth mode.
> > 
> > Signed-off-by: Dan Elkouby <streetwalkermc@gmail.com>
> > ---
> > net/bluetooth/hidp/core.c | 4 ++--
> > 1 file changed, 2 insertions(+), 2 deletions(-)
> 
> patch has been applied to bluetooth-next tree.
> 

The v2 added an additional fix and used the Fixes tag.  Could you apply
that instead?

regards,
dan carpenter


^ permalink raw reply

* Re: [PATCH] hostap: remove set but not used variable 'copied' in prism2_io_debug_proc_read
From: Kalle Valo @ 2019-09-06 14:10 UTC (permalink / raw)
  To: zhong jiang; +Cc: davem, zhongjiang, linux-wireless, netdev, linux-kernel
In-Reply-To: <1567497430-22539-1-git-send-email-zhongjiang@huawei.com>

zhong jiang <zhongjiang@huawei.com> wrote:

> Obviously, variable 'copied' is initialized to zero. But it is not used.
> hence just remove it.
> 
> Signed-off-by: zhong jiang <zhongjiang@huawei.com>

Patch applied to wireless-drivers-next.git, thanks.

64827a6ac049 hostap: remove set but not used variable 'copied' in prism2_io_debug_proc_read

-- 
https://patchwork.kernel.org/patch/11127357/

https://wireless.wiki.kernel.org/en/developers/documentation/submittingpatches


^ permalink raw reply

* Re: [PATCH] Bluetooth: hidp: Fix error checks in hidp_get/set_raw_report
From: Marcel Holtmann @ 2019-09-06 14:12 UTC (permalink / raw)
  To: Dan Carpenter
  Cc: Dan Elkouby, Johan Hedberg, David S. Miller, Fabian Henneke,
	Brian Norris, Al Viro, Andrea Parri, linux-bluetooth, netdev,
	linux-kernel
In-Reply-To: <20190906140744.GC14147@kadam>

Hi Dan,

>>> Commit 48d9cc9d85dd ("Bluetooth: hidp: Let hidp_send_message return
>>> number of queued bytes") changed hidp_send_message to return non-zero
>>> values on success, which some other bits did not expect. This caused
>>> spurious errors to be propagated through the stack, breaking some (all?)
>>> drivers, such as hid-sony for the Dualshock 4 in Bluetooth mode.
>>> 
>>> Signed-off-by: Dan Elkouby <streetwalkermc@gmail.com>
>>> ---
>>> net/bluetooth/hidp/core.c | 4 ++--
>>> 1 file changed, 2 insertions(+), 2 deletions(-)
>> 
>> patch has been applied to bluetooth-next tree.
>> 
> 
> The v2 added an additional fix and used the Fixes tag.  Could you apply
> that instead?

see my reply to Jiri. I replied to the wrong patch, but actually applied to the updated one.

Regards

Marcel


^ permalink raw reply

* [PATCH net-next 1/5] enetc: Fix if_mode extraction
From: Claudiu Manoil @ 2019-09-06 14:15 UTC (permalink / raw)
  To: David S . Miller; +Cc: alexandru.marginean, netdev
In-Reply-To: <1567779344-30965-1-git-send-email-claudiu.manoil@nxp.com>

Fix handling of error return code. Before this fix,
the error code was handled as unsigned type.
Also, on this path if if_mode not found then just handle
it as fixed link (i.e mac2mac connection).

Signed-off-by: Claudiu Manoil <claudiu.manoil@nxp.com>
---
 drivers/net/ethernet/freescale/enetc/enetc_pf.c | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
index 7d6513ff8507..3a556646a2fb 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
@@ -751,6 +751,7 @@ static int enetc_of_get_phy(struct enetc_ndev_priv *priv)
 	struct enetc_pf *pf = enetc_si_priv(priv->si);
 	struct device_node *np = priv->dev->of_node;
 	struct device_node *mdio_np;
+	int phy_mode;
 	int err;
 
 	if (!np) {
@@ -784,17 +785,11 @@ static int enetc_of_get_phy(struct enetc_ndev_priv *priv)
 		}
 	}
 
-	priv->if_mode = of_get_phy_mode(np);
-	if (priv->if_mode < 0) {
-		dev_err(priv->dev, "missing phy type\n");
-		of_node_put(priv->phy_node);
-		if (of_phy_is_fixed_link(np))
-			of_phy_deregister_fixed_link(np);
-		else
-			enetc_mdio_remove(pf);
-
-		return -EINVAL;
-	}
+	phy_mode = of_get_phy_mode(np);
+	if (phy_mode < 0)
+		priv->if_mode = PHY_INTERFACE_MODE_NA; /* fixed link */
+	else
+		priv->if_mode = phy_mode;
 
 	return 0;
 }
-- 
2.17.1


^ permalink raw reply related

* [PATCH net-next 2/5] enetc: Make mdio accessors more generic
From: Claudiu Manoil @ 2019-09-06 14:15 UTC (permalink / raw)
  To: David S . Miller; +Cc: alexandru.marginean, netdev
In-Reply-To: <1567779344-30965-1-git-send-email-claudiu.manoil@nxp.com>

Refactoring needed to support multiple MDIO buses.
'mdio_base' - MDIO registers base address - is being parameterized.
The MDIO accessors are made more generic to be able to work with
different MDIO register bases.
Some includes get cleaned up in the process.

Signed-off-by: Claudiu Manoil <claudiu.manoil@nxp.com>
---
 .../net/ethernet/freescale/enetc/enetc_hw.h   |  1 +
 .../net/ethernet/freescale/enetc/enetc_mdio.c | 60 +++++++++++--------
 .../net/ethernet/freescale/enetc/enetc_mdio.h |  2 +-
 .../ethernet/freescale/enetc/enetc_pci_mdio.c |  2 +
 4 files changed, 39 insertions(+), 26 deletions(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc_hw.h b/drivers/net/ethernet/freescale/enetc/enetc_hw.h
index 88276299f447..534de211b243 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_hw.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc_hw.h
@@ -192,6 +192,7 @@ enum enetc_bdr_type {TX, RX};
 #define ENETC_PFPMR		0x1900
 #define ENETC_PFPMR_PMACE	BIT(1)
 #define ENETC_PFPMR_MWLM	BIT(0)
+#define ENETC_EMDIO_BASE	0x1c00
 #define ENETC_PSIUMHFR0(n, err)	(((err) ? 0x1d08 : 0x1d00) + (n) * 0x10)
 #define ENETC_PSIUMHFR1(n)	(0x1d04 + (n) * 0x10)
 #define ENETC_PSIMMHFR0(n, err)	(((err) ? 0x1d00 : 0x1d08) + (n) * 0x10)
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_mdio.c b/drivers/net/ethernet/freescale/enetc/enetc_mdio.c
index 149883c8f0b8..c9a27e7fe5a7 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_mdio.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_mdio.c
@@ -6,19 +6,30 @@
 #include <linux/iopoll.h>
 #include <linux/of.h>
 
+#include "enetc_pf.h"
 #include "enetc_mdio.h"
 
-#define	ENETC_MDIO_REG_OFFSET	0x1c00
 #define	ENETC_MDIO_CFG	0x0	/* MDIO configuration and status */
 #define	ENETC_MDIO_CTL	0x4	/* MDIO control */
 #define	ENETC_MDIO_DATA	0x8	/* MDIO data */
 #define	ENETC_MDIO_ADDR	0xc	/* MDIO address */
 
-#define enetc_mdio_rd(hw, off) \
-	enetc_port_rd(hw, ENETC_##off + ENETC_MDIO_REG_OFFSET)
-#define enetc_mdio_wr(hw, off, val) \
-	enetc_port_wr(hw, ENETC_##off + ENETC_MDIO_REG_OFFSET, val)
-#define enetc_mdio_rd_reg(off)	enetc_mdio_rd(hw, off)
+static inline u32 _enetc_mdio_rd(struct enetc_mdio_priv *mdio_priv, int off)
+{
+	return enetc_port_rd(mdio_priv->hw, mdio_priv->mdio_base + off);
+}
+
+static inline void _enetc_mdio_wr(struct enetc_mdio_priv *mdio_priv, int off,
+				  u32 val)
+{
+	enetc_port_wr(mdio_priv->hw, mdio_priv->mdio_base + off, val);
+}
+
+#define enetc_mdio_rd(mdio_priv, off) \
+	_enetc_mdio_rd(mdio_priv, ENETC_##off)
+#define enetc_mdio_wr(mdio_priv, off, val) \
+	_enetc_mdio_wr(mdio_priv, ENETC_##off, val)
+#define enetc_mdio_rd_reg(off)	enetc_mdio_rd(mdio_priv, off)
 
 #define ENETC_MDC_DIV		258
 
@@ -35,7 +46,7 @@
 #define MDIO_DATA(x)		((x) & 0xffff)
 
 #define TIMEOUT	1000
-static int enetc_mdio_wait_complete(struct enetc_hw *hw)
+static int enetc_mdio_wait_complete(struct enetc_mdio_priv *mdio_priv)
 {
 	u32 val;
 
@@ -46,7 +57,6 @@ static int enetc_mdio_wait_complete(struct enetc_hw *hw)
 int enetc_mdio_write(struct mii_bus *bus, int phy_id, int regnum, u16 value)
 {
 	struct enetc_mdio_priv *mdio_priv = bus->priv;
-	struct enetc_hw *hw = mdio_priv->hw;
 	u32 mdio_ctl, mdio_cfg;
 	u16 dev_addr;
 	int ret;
@@ -61,29 +71,29 @@ int enetc_mdio_write(struct mii_bus *bus, int phy_id, int regnum, u16 value)
 		mdio_cfg &= ~MDIO_CFG_ENC45;
 	}
 
-	enetc_mdio_wr(hw, MDIO_CFG, mdio_cfg);
+	enetc_mdio_wr(mdio_priv, MDIO_CFG, mdio_cfg);
 
-	ret = enetc_mdio_wait_complete(hw);
+	ret = enetc_mdio_wait_complete(mdio_priv);
 	if (ret)
 		return ret;
 
 	/* set port and dev addr */
 	mdio_ctl = MDIO_CTL_PORT_ADDR(phy_id) | MDIO_CTL_DEV_ADDR(dev_addr);
-	enetc_mdio_wr(hw, MDIO_CTL, mdio_ctl);
+	enetc_mdio_wr(mdio_priv, MDIO_CTL, mdio_ctl);
 
 	/* set the register address */
 	if (regnum & MII_ADDR_C45) {
-		enetc_mdio_wr(hw, MDIO_ADDR, regnum & 0xffff);
+		enetc_mdio_wr(mdio_priv, MDIO_ADDR, regnum & 0xffff);
 
-		ret = enetc_mdio_wait_complete(hw);
+		ret = enetc_mdio_wait_complete(mdio_priv);
 		if (ret)
 			return ret;
 	}
 
 	/* write the value */
-	enetc_mdio_wr(hw, MDIO_DATA, MDIO_DATA(value));
+	enetc_mdio_wr(mdio_priv, MDIO_DATA, MDIO_DATA(value));
 
-	ret = enetc_mdio_wait_complete(hw);
+	ret = enetc_mdio_wait_complete(mdio_priv);
 	if (ret)
 		return ret;
 
@@ -93,7 +103,6 @@ int enetc_mdio_write(struct mii_bus *bus, int phy_id, int regnum, u16 value)
 int enetc_mdio_read(struct mii_bus *bus, int phy_id, int regnum)
 {
 	struct enetc_mdio_priv *mdio_priv = bus->priv;
-	struct enetc_hw *hw = mdio_priv->hw;
 	u32 mdio_ctl, mdio_cfg;
 	u16 dev_addr, value;
 	int ret;
@@ -107,41 +116,41 @@ int enetc_mdio_read(struct mii_bus *bus, int phy_id, int regnum)
 		mdio_cfg &= ~MDIO_CFG_ENC45;
 	}
 
-	enetc_mdio_wr(hw, MDIO_CFG, mdio_cfg);
+	enetc_mdio_wr(mdio_priv, MDIO_CFG, mdio_cfg);
 
-	ret = enetc_mdio_wait_complete(hw);
+	ret = enetc_mdio_wait_complete(mdio_priv);
 	if (ret)
 		return ret;
 
 	/* set port and device addr */
 	mdio_ctl = MDIO_CTL_PORT_ADDR(phy_id) | MDIO_CTL_DEV_ADDR(dev_addr);
-	enetc_mdio_wr(hw, MDIO_CTL, mdio_ctl);
+	enetc_mdio_wr(mdio_priv, MDIO_CTL, mdio_ctl);
 
 	/* set the register address */
 	if (regnum & MII_ADDR_C45) {
-		enetc_mdio_wr(hw, MDIO_ADDR, regnum & 0xffff);
+		enetc_mdio_wr(mdio_priv, MDIO_ADDR, regnum & 0xffff);
 
-		ret = enetc_mdio_wait_complete(hw);
+		ret = enetc_mdio_wait_complete(mdio_priv);
 		if (ret)
 			return ret;
 	}
 
 	/* initiate the read */
-	enetc_mdio_wr(hw, MDIO_CTL, mdio_ctl | MDIO_CTL_READ);
+	enetc_mdio_wr(mdio_priv, MDIO_CTL, mdio_ctl | MDIO_CTL_READ);
 
-	ret = enetc_mdio_wait_complete(hw);
+	ret = enetc_mdio_wait_complete(mdio_priv);
 	if (ret)
 		return ret;
 
 	/* return all Fs if nothing was there */
-	if (enetc_mdio_rd(hw, MDIO_CFG) & MDIO_CFG_RD_ER) {
+	if (enetc_mdio_rd(mdio_priv, MDIO_CFG) & MDIO_CFG_RD_ER) {
 		dev_dbg(&bus->dev,
 			"Error while reading PHY%d reg at %d.%hhu\n",
 			phy_id, dev_addr, regnum);
 		return 0xffff;
 	}
 
-	value = enetc_mdio_rd(hw, MDIO_DATA) & 0xffff;
+	value = enetc_mdio_rd(mdio_priv, MDIO_DATA) & 0xffff;
 
 	return value;
 }
@@ -164,6 +173,7 @@ int enetc_mdio_probe(struct enetc_pf *pf)
 	bus->parent = dev;
 	mdio_priv = bus->priv;
 	mdio_priv->hw = &pf->si->hw;
+	mdio_priv->mdio_base = ENETC_EMDIO_BASE;
 	snprintf(bus->id, MII_BUS_ID_SIZE, "%s", dev_name(dev));
 
 	np = of_get_child_by_name(dev->of_node, "mdio");
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_mdio.h b/drivers/net/ethernet/freescale/enetc/enetc_mdio.h
index 60c9a3889824..a8ea3607c7bf 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_mdio.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc_mdio.h
@@ -2,10 +2,10 @@
 /* Copyright 2019 NXP */
 
 #include <linux/phy.h>
-#include "enetc_pf.h"
 
 struct enetc_mdio_priv {
 	struct enetc_hw *hw;
+	int mdio_base;
 };
 
 int enetc_mdio_write(struct mii_bus *bus, int phy_id, int regnum, u16 value);
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pci_mdio.c b/drivers/net/ethernet/freescale/enetc/enetc_pci_mdio.c
index fbd41ce01f06..e12159ac1fa6 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_pci_mdio.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_pci_mdio.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
 /* Copyright 2019 NXP */
 #include <linux/of_mdio.h>
+#include "enetc_pf.h"
 #include "enetc_mdio.h"
 
 #define ENETC_MDIO_DEV_ID	0xee01
@@ -31,6 +32,7 @@ static int enetc_pci_mdio_probe(struct pci_dev *pdev,
 	bus->parent = dev;
 	mdio_priv = bus->priv;
 	mdio_priv->hw = hw;
+	mdio_priv->mdio_base = ENETC_EMDIO_BASE;
 	snprintf(bus->id, MII_BUS_ID_SIZE, "%s", dev_name(dev));
 
 	pcie_flr(pdev);
-- 
2.17.1


^ permalink raw reply related

* [PATCH net-next 3/5] enetc: Initialize SerDes for SGMII and SXGMII protocols
From: Claudiu Manoil @ 2019-09-06 14:15 UTC (permalink / raw)
  To: David S . Miller; +Cc: alexandru.marginean, netdev
In-Reply-To: <1567779344-30965-1-git-send-email-claudiu.manoil@nxp.com>

ENETC has ethernet MACs capable of SGMII and SXGMII but
in order to use these protocols some serdes configurations
need to be performed.
The serdes is configurable via an internal MDIO bus
connected to an internal PCS device, all reads/writes are
performed at address 0.
This patch basically removes the dependency on bootloader
regarding serdes initialization.

Signed-off-by: Alex Marginean <alexandru.marginean@nxp.com>
Signed-off-by: Claudiu Manoil <claudiu.manoil@nxp.com>
---
 .../net/ethernet/freescale/enetc/enetc_hw.h   | 17 ++++++
 .../net/ethernet/freescale/enetc/enetc_mdio.c | 31 ++++++++++
 .../net/ethernet/freescale/enetc/enetc_pf.c   | 58 +++++++++++++++++++
 .../net/ethernet/freescale/enetc/enetc_pf.h   |  2 +
 4 files changed, 108 insertions(+)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc_hw.h b/drivers/net/ethernet/freescale/enetc/enetc_hw.h
index 534de211b243..ced3693dabdb 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_hw.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc_hw.h
@@ -215,6 +215,23 @@ enum enetc_bdr_type {TX, RX};
 #define ENETC_PM0_MAXFRM	0x8014
 #define ENETC_SET_TX_MTU(val)	((val) << 16)
 #define ENETC_SET_MAXFRM(val)	((val) & 0xffff)
+
+#define ENETC_PM_IMDIO_BASE	0x8030
+/* PCS registers */
+#define ENETC_PCS_CR			0x0
+#define ENETC_PCS_CR_RESET_AN		0x1200
+#define ENETC_PCS_CR_DEF_VAL		0x0140
+#define ENETC_PCS_CR_LANE_RESET		0x8000
+#define ENETC_PCS_DEV_ABILITY		0x04
+#define ENETC_PCS_DEV_ABILITY_SGMII	0x4001
+#define ENETC_PCS_DEV_ABILITY_SXGMII	0x5001
+#define ENETC_PCS_LINK_TIMER1		0x12
+#define ENETC_PCS_LINK_TIMER1_VAL	0x06a0
+#define ENETC_PCS_LINK_TIMER2		0x13
+#define ENETC_PCS_LINK_TIMER2_VAL	0x0003
+#define ENETC_PCS_IF_MODE		0x14
+#define ENETC_PCS_IF_MODE_SGMII_AN	0x0003
+
 #define ENETC_PM0_IF_MODE	0x8300
 #define ENETC_PMO_IFM_RG	BIT(2)
 #define ENETC_PM0_IFM_RLP	(BIT(5) | BIT(11))
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_mdio.c b/drivers/net/ethernet/freescale/enetc/enetc_mdio.c
index c9a27e7fe5a7..0190fcb0c371 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_mdio.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_mdio.c
@@ -200,3 +200,34 @@ void enetc_mdio_remove(struct enetc_pf *pf)
 	if (pf->mdio)
 		mdiobus_unregister(pf->mdio);
 }
+
+int enetc_imdio_init(struct enetc_pf *pf)
+{
+	struct device *dev = &pf->si->pdev->dev;
+	struct enetc_mdio_priv *mdio_priv;
+	struct mii_bus *bus;
+	int err;
+
+	bus = devm_mdiobus_alloc_size(dev, sizeof(*mdio_priv));
+	if (!bus)
+		return -ENOMEM;
+
+	bus->name = "FSL ENETC internal MDIO Bus";
+	bus->read = enetc_mdio_read;
+	bus->write = enetc_mdio_write;
+	bus->parent = dev;
+	mdio_priv = bus->priv;
+	mdio_priv->hw = &pf->si->hw;
+	mdio_priv->mdio_base = ENETC_PM_IMDIO_BASE;
+	snprintf(bus->id, MII_BUS_ID_SIZE, "%s", dev_name(dev));
+
+	err = mdiobus_register(bus);
+	if (err) {
+		dev_err(dev, "cannot register internal MDIO bus\n");
+		return err;
+	}
+
+	pf->imdio = bus;
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
index 3a556646a2fb..9e9bb6b97c41 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
@@ -804,6 +804,60 @@ static void enetc_of_put_phy(struct enetc_ndev_priv *priv)
 		of_node_put(priv->phy_node);
 }
 
+static void enetc_configure_sgmii(struct mii_bus *imdio)
+{
+	/* Set to SGMII mode, use AN */
+	imdio->write(imdio, 0, ENETC_PCS_IF_MODE,
+		     ENETC_PCS_IF_MODE_SGMII_AN);
+
+	/* Dev ability - SGMII */
+	imdio->write(imdio, 0, ENETC_PCS_DEV_ABILITY,
+		     ENETC_PCS_DEV_ABILITY_SGMII);
+
+	/* Adjust link timer for SGMII */
+	imdio->write(imdio, 0, ENETC_PCS_LINK_TIMER1,
+		     ENETC_PCS_LINK_TIMER1_VAL);
+	imdio->write(imdio, 0, ENETC_PCS_LINK_TIMER2,
+		     ENETC_PCS_LINK_TIMER2_VAL);
+
+	/* restart PCS AN */
+	imdio->write(imdio, 0, ENETC_PCS_CR,
+		     ENETC_PCS_CR_RESET_AN | ENETC_PCS_CR_DEF_VAL);
+}
+
+static void enetc_configure_sxgmii(struct mii_bus *imdio)
+{
+	/* Dev ability - SXGMII */
+	imdio->write(imdio, 0, ENETC_PCS_DEV_ABILITY | MII_ADDR_C45,
+		     ENETC_PCS_DEV_ABILITY_SXGMII);
+
+	/* Restart PCS AN */
+	imdio->write(imdio, 0, ENETC_PCS_CR | MII_ADDR_C45,
+		     ENETC_PCS_CR_LANE_RESET | ENETC_PCS_CR_RESET_AN);
+}
+
+static int enetc_configure_serdes(struct enetc_ndev_priv *priv)
+{
+	struct enetc_pf *pf = enetc_si_priv(priv->si);
+	int err;
+
+	if (priv->if_mode != PHY_INTERFACE_MODE_SGMII &&
+	    priv->if_mode != PHY_INTERFACE_MODE_XGMII)
+		return 0;
+
+	err = enetc_imdio_init(pf);
+	if (err)
+		return err;
+
+	if (priv->if_mode == PHY_INTERFACE_MODE_SGMII)
+		enetc_configure_sgmii(pf->imdio);
+
+	if (priv->if_mode == PHY_INTERFACE_MODE_XGMII)
+		enetc_configure_sxgmii(pf->imdio);
+
+	return 0;
+}
+
 static int enetc_pf_probe(struct pci_dev *pdev,
 			  const struct pci_device_id *ent)
 {
@@ -868,6 +922,10 @@ static int enetc_pf_probe(struct pci_dev *pdev,
 	if (err)
 		dev_warn(&pdev->dev, "Fallback to PHY-less operation\n");
 
+	err = enetc_configure_serdes(priv);
+	if (err)
+		dev_warn(&pdev->dev, "Attempted serdes config but failed\n");
+
 	err = register_netdev(ndev);
 	if (err)
 		goto err_reg_netdev;
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.h b/drivers/net/ethernet/freescale/enetc/enetc_pf.h
index 10dd1b53bb08..1357cdb71d64 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_pf.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.h
@@ -44,6 +44,7 @@ struct enetc_pf {
 	DECLARE_BITMAP(active_vlans, VLAN_N_VID);
 
 	struct mii_bus *mdio; /* saved for cleanup */
+	struct mii_bus *imdio;
 };
 
 int enetc_msg_psi_init(struct enetc_pf *pf);
@@ -53,3 +54,4 @@ void enetc_msg_handle_rxmsg(struct enetc_pf *pf, int mbox_id, u16 *status);
 /* MDIO */
 int enetc_mdio_probe(struct enetc_pf *pf);
 void enetc_mdio_remove(struct enetc_pf *pf);
+int enetc_imdio_init(struct enetc_pf *pf);
-- 
2.17.1


^ permalink raw reply related

* [PATCH net-next 4/5] enetc: Drop redundant device node check
From: Claudiu Manoil @ 2019-09-06 14:15 UTC (permalink / raw)
  To: David S . Miller; +Cc: alexandru.marginean, netdev
In-Reply-To: <1567779344-30965-1-git-send-email-claudiu.manoil@nxp.com>

The existence of the DT port node is the first thing checked
at probe time, and probing won't continue if the node is missing.

Signed-off-by: Claudiu Manoil <claudiu.manoil@nxp.com>
---
 drivers/net/ethernet/freescale/enetc/enetc_pf.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
index 9e9bb6b97c41..ebf2996ebe69 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
@@ -754,11 +754,6 @@ static int enetc_of_get_phy(struct enetc_ndev_priv *priv)
 	int phy_mode;
 	int err;
 
-	if (!np) {
-		dev_err(priv->dev, "missing ENETC port node\n");
-		return -ENODEV;
-	}
-
 	priv->phy_node = of_parse_phandle(np, "phy-handle", 0);
 	if (!priv->phy_node) {
 		if (!of_phy_is_fixed_link(np)) {
-- 
2.17.1


^ permalink raw reply related

* [PATCH net-next 5/5] enetc: Use DT protocol information to set up the ports
From: Claudiu Manoil @ 2019-09-06 14:15 UTC (permalink / raw)
  To: David S . Miller; +Cc: alexandru.marginean, netdev
In-Reply-To: <1567779344-30965-1-git-send-email-claudiu.manoil@nxp.com>

From: Alex Marginean <alexandru.marginean@nxp.com>

Use DT information rather than in-band information from bootloader to
set up MAC for XGMII. For RGMII use the DT indication in addition to
RGMII defaults in hardware.
However, this implies that PHY connection information needs to be
extracted before netdevice creation, when the ENETC Port MAC is
being configured.

Signed-off-by: Alex Marginean <alexandru.marginean@nxp.com>
Signed-off-by: Claudiu Manoil <claudiu.manoil@nxp.com>
---
 .../net/ethernet/freescale/enetc/enetc_pf.c   | 55 ++++++++++---------
 .../net/ethernet/freescale/enetc/enetc_pf.h   |  3 +
 2 files changed, 33 insertions(+), 25 deletions(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
index ebf2996ebe69..dbe9515a89b1 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
@@ -507,7 +507,8 @@ static void enetc_port_si_configure(struct enetc_si *si)
 	enetc_port_wr(hw, ENETC_PSIVLANFMR, ENETC_PSIVLANFMR_VS);
 }
 
-static void enetc_configure_port_mac(struct enetc_hw *hw)
+static void enetc_configure_port_mac(struct enetc_hw *hw,
+				     phy_interface_t phy_mode)
 {
 	enetc_port_wr(hw, ENETC_PM0_MAXFRM,
 		      ENETC_SET_MAXFRM(ENETC_RX_MAXFRM_SIZE));
@@ -523,9 +524,11 @@ static void enetc_configure_port_mac(struct enetc_hw *hw)
 		      ENETC_PM0_CMD_TXP	| ENETC_PM0_PROMISC |
 		      ENETC_PM0_TX_EN | ENETC_PM0_RX_EN);
 	/* set auto-speed for RGMII */
-	if (enetc_port_rd(hw, ENETC_PM0_IF_MODE) & ENETC_PMO_IFM_RG)
+	if (enetc_port_rd(hw, ENETC_PM0_IF_MODE) & ENETC_PMO_IFM_RG ||
+	    phy_mode == PHY_INTERFACE_MODE_RGMII)
 		enetc_port_wr(hw, ENETC_PM0_IF_MODE, ENETC_PM0_IFM_RGAUTO);
-	if (enetc_global_rd(hw, ENETC_G_EPFBLPR(1)) == ENETC_G_EPFBLPR1_XGMII)
+
+	if (phy_mode == PHY_INTERFACE_MODE_XGMII)
 		enetc_port_wr(hw, ENETC_PM0_IF_MODE, ENETC_PM0_IFM_XGMII);
 }
 
@@ -549,7 +552,7 @@ static void enetc_configure_port(struct enetc_pf *pf)
 
 	enetc_configure_port_pmac(hw);
 
-	enetc_configure_port_mac(hw);
+	enetc_configure_port_mac(hw, pf->if_mode);
 
 	enetc_port_si_configure(pf->si);
 
@@ -746,28 +749,28 @@ static void enetc_pf_netdev_setup(struct enetc_si *si, struct net_device *ndev,
 	enetc_get_primary_mac_addr(&si->hw, ndev->dev_addr);
 }
 
-static int enetc_of_get_phy(struct enetc_ndev_priv *priv)
+static int enetc_of_get_phy(struct enetc_pf *pf)
 {
-	struct enetc_pf *pf = enetc_si_priv(priv->si);
-	struct device_node *np = priv->dev->of_node;
+	struct device *dev = &pf->si->pdev->dev;
+	struct device_node *np = dev->of_node;
 	struct device_node *mdio_np;
 	int phy_mode;
 	int err;
 
-	priv->phy_node = of_parse_phandle(np, "phy-handle", 0);
-	if (!priv->phy_node) {
+	pf->phy_node = of_parse_phandle(np, "phy-handle", 0);
+	if (!pf->phy_node) {
 		if (!of_phy_is_fixed_link(np)) {
-			dev_err(priv->dev, "PHY not specified\n");
+			dev_err(dev, "PHY not specified\n");
 			return -ENODEV;
 		}
 
 		err = of_phy_register_fixed_link(np);
 		if (err < 0) {
-			dev_err(priv->dev, "fixed link registration failed\n");
+			dev_err(dev, "fixed link registration failed\n");
 			return err;
 		}
 
-		priv->phy_node = of_node_get(np);
+		pf->phy_node = of_node_get(np);
 	}
 
 	mdio_np = of_get_child_by_name(np, "mdio");
@@ -775,28 +778,28 @@ static int enetc_of_get_phy(struct enetc_ndev_priv *priv)
 		of_node_put(mdio_np);
 		err = enetc_mdio_probe(pf);
 		if (err) {
-			of_node_put(priv->phy_node);
+			of_node_put(pf->phy_node);
 			return err;
 		}
 	}
 
 	phy_mode = of_get_phy_mode(np);
 	if (phy_mode < 0)
-		priv->if_mode = PHY_INTERFACE_MODE_NA; /* fixed link */
+		pf->if_mode = PHY_INTERFACE_MODE_NA; /* fixed link */
 	else
-		priv->if_mode = phy_mode;
+		pf->if_mode = phy_mode;
 
 	return 0;
 }
 
-static void enetc_of_put_phy(struct enetc_ndev_priv *priv)
+static void enetc_of_put_phy(struct enetc_pf *pf)
 {
-	struct device_node *np = priv->dev->of_node;
+	struct device_node *np = pf->si->pdev->dev.of_node;
 
 	if (np && of_phy_is_fixed_link(np))
 		of_phy_deregister_fixed_link(np);
-	if (priv->phy_node)
-		of_node_put(priv->phy_node);
+	if (pf->phy_node)
+		of_node_put(pf->phy_node);
 }
 
 static void enetc_configure_sgmii(struct mii_bus *imdio)
@@ -884,6 +887,10 @@ static int enetc_pf_probe(struct pci_dev *pdev,
 	pf->si = si;
 	pf->total_vfs = pci_sriov_get_totalvfs(pdev);
 
+	err = enetc_of_get_phy(pf);
+	if (err)
+		dev_warn(&pdev->dev, "Fallback to PHY-less operation\n");
+
 	enetc_configure_port(pf);
 
 	enetc_get_si_caps(si);
@@ -898,6 +905,8 @@ static int enetc_pf_probe(struct pci_dev *pdev,
 	enetc_pf_netdev_setup(si, ndev, &enetc_ndev_ops);
 
 	priv = netdev_priv(ndev);
+	priv->phy_node = pf->phy_node;
+	priv->if_mode = pf->if_mode;
 
 	enetc_init_si_rings_params(priv);
 
@@ -913,10 +922,6 @@ static int enetc_pf_probe(struct pci_dev *pdev,
 		goto err_alloc_msix;
 	}
 
-	err = enetc_of_get_phy(priv);
-	if (err)
-		dev_warn(&pdev->dev, "Fallback to PHY-less operation\n");
-
 	err = enetc_configure_serdes(priv);
 	if (err)
 		dev_warn(&pdev->dev, "Attempted serdes config but failed\n");
@@ -933,7 +938,6 @@ static int enetc_pf_probe(struct pci_dev *pdev,
 	return 0;
 
 err_reg_netdev:
-	enetc_of_put_phy(priv);
 	enetc_free_msix(priv);
 err_alloc_msix:
 	enetc_free_si_resources(priv);
@@ -941,6 +945,7 @@ static int enetc_pf_probe(struct pci_dev *pdev,
 	si->ndev = NULL;
 	free_netdev(ndev);
 err_alloc_netdev:
+	enetc_of_put_phy(pf);
 err_map_pf_space:
 	enetc_pci_remove(pdev);
 
@@ -963,7 +968,7 @@ static void enetc_pf_remove(struct pci_dev *pdev)
 	unregister_netdev(si->ndev);
 
 	enetc_mdio_remove(pf);
-	enetc_of_put_phy(priv);
+	enetc_of_put_phy(pf);
 
 	enetc_free_msix(priv);
 
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.h b/drivers/net/ethernet/freescale/enetc/enetc_pf.h
index 1357cdb71d64..e0346dcf9ed7 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_pf.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.h
@@ -45,6 +45,9 @@ struct enetc_pf {
 
 	struct mii_bus *mdio; /* saved for cleanup */
 	struct mii_bus *imdio;
+
+	struct device_node *phy_node;
+	phy_interface_t if_mode;
 };
 
 int enetc_msg_psi_init(struct enetc_pf *pf);
-- 
2.17.1


^ permalink raw reply related

* [PATCH net-next 0/5] enetc: Link mode init w/o bootloader
From: Claudiu Manoil @ 2019-09-06 14:15 UTC (permalink / raw)
  To: David S . Miller; +Cc: alexandru.marginean, netdev

The theme of this set is to clear the dependency on bootloader
for PHY link mode protocol init (i.e. SGMII, SXGMII) and MAC
configuration for the ENETC controller.

First patch fixes the DT extracted PHY mode handling.
The second one is a refactoring that prepares the introduction
of the internal MDIO bus.
Internal MDIO bus support is added along with SerDes protocol
configuration routines (3rd patch).
Then after a minor cleanup (patch 4), DT link mode information
is being used to configure the MAC instead of relying on
bootloader configurations.

Alex Marginean (1):
  enetc: Use DT protocol information to set up the ports

Claudiu Manoil (4):
  enetc: Fix if_mode extraction
  enetc: Make mdio accessors more generic
  enetc: Initialize SerDes for SGMII and SXGMII protocols
  enetc: Drop redundant device node check

 .../net/ethernet/freescale/enetc/enetc_hw.h   |  18 +++
 .../net/ethernet/freescale/enetc/enetc_mdio.c |  91 +++++++++----
 .../net/ethernet/freescale/enetc/enetc_mdio.h |   2 +-
 .../ethernet/freescale/enetc/enetc_pci_mdio.c |   2 +
 .../net/ethernet/freescale/enetc/enetc_pf.c   | 127 +++++++++++++-----
 .../net/ethernet/freescale/enetc/enetc_pf.h   |   5 +
 6 files changed, 182 insertions(+), 63 deletions(-)

-- 
2.17.1


^ permalink raw reply

* Re: [PATCH v3 net-next] net: stmmac: Add support for MDIO interrupts
From: Andrew Lunn @ 2019-09-06 14:24 UTC (permalink / raw)
  To: Jose Abreu
  Cc: Voon Weifeng, David S. Miller, Maxime Coquelin,
	netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
	Giuseppe Cavallaro, Alexandre Torgue, Ong Boon Leong
In-Reply-To: <BN8PR12MB3266D427D1AB8E41B13441B6D3BA0@BN8PR12MB3266.namprd12.prod.outlook.com>

On Fri, Sep 06, 2019 at 01:31:14PM +0000, Jose Abreu wrote:
> From: Voon Weifeng <weifeng.voon@intel.com>
> Date: Sep/05/2019, 13:05:30 (UTC+00:00)
> 
> > DW EQoS v5.xx controllers added capability for interrupt generation
> > when MDIO interface is done (GMII Busy bit is cleared).
> > This patch adds support for this interrupt on supported HW to avoid
> > polling on GMII Busy bit.
> 
> Better leave the enabling of this optional because the support for it is 
> also optional depending on the IP HW configuration.

Hi Jose

If there a register which indicates if this feature is part of the IP?

   Andrew

^ permalink raw reply

* RE: [PATCH v3 net-next] net: stmmac: Add support for MDIO interrupts
From: Jose Abreu @ 2019-09-06 14:33 UTC (permalink / raw)
  To: Andrew Lunn, Jose Abreu
  Cc: Voon Weifeng, David S. Miller, Maxime Coquelin,
	netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
	Giuseppe Cavallaro, Alexandre Torgue, Ong Boon Leong
In-Reply-To: <20190906142446.GA29611@lunn.ch>

From: Andrew Lunn <andrew@lunn.ch>
Date: Sep/06/2019, 15:24:46 (UTC+00:00)

> On Fri, Sep 06, 2019 at 01:31:14PM +0000, Jose Abreu wrote:
> > From: Voon Weifeng <weifeng.voon@intel.com>
> > Date: Sep/05/2019, 13:05:30 (UTC+00:00)
> > 
> > > DW EQoS v5.xx controllers added capability for interrupt generation
> > > when MDIO interface is done (GMII Busy bit is cleared).
> > > This patch adds support for this interrupt on supported HW to avoid
> > > polling on GMII Busy bit.
> > 
> > Better leave the enabling of this optional because the support for it is 
> > also optional depending on the IP HW configuration.
> 
> Hi Jose
> 
> If there a register which indicates if this feature is part of the IP?

Yes. That would be SMASEL which is Bit 5 of register MAC_HW_Feature0.

---
Thanks,
Jose Miguel Abreu

^ permalink raw reply

* Re: [PATCH net-next v2 0/4] ravb: remove use of undocumented registers
From: David Miller @ 2019-09-06 14:47 UTC (permalink / raw)
  To: horms+renesas; +Cc: sergei.shtylyov, magnus.damm, netdev, linux-renesas-soc
In-Reply-To: <20190905151059.26794-1-horms+renesas@verge.net.au>

From: Simon Horman <horms+renesas@verge.net.au>
Date: Thu,  5 Sep 2019 17:10:55 +0200

> this short series cleans up the RAVB driver a little.
 ...

Series applied, thanks Simon.

^ permalink raw reply

* Re: [PATCH] lan743x: remove redundant assignment to variable rx_process_result
From: David Miller @ 2019-09-06 14:47 UTC (permalink / raw)
  To: colin.king
  Cc: bryan.whitehead, UNGLinuxDriver, netdev, kernel-janitors,
	linux-kernel
In-Reply-To: <20190905140135.26951-1-colin.king@canonical.com>

From: Colin King <colin.king@canonical.com>
Date: Thu,  5 Sep 2019 15:01:35 +0100

> From: Colin Ian King <colin.king@canonical.com>
> 
> The variable rx_process_result is being initialized with a value that
> is never read and is being re-assigned immediately afterwards. The
> assignment is redundant, so replace it with the return from function
> lan743x_rx_process_packet.
> 
> Addresses-Coverity: ("Unused value")
> Signed-off-by: Colin Ian King <colin.king@canonical.com>

Applied to net-next, thanks.

^ permalink raw reply

* Re: [PATCH net-next,v3 0/4] flow_offload: update mangle action representation
From: Pablo Neira Ayuso @ 2019-09-06 14:50 UTC (permalink / raw)
  To: Edward Cree
  Cc: netfilter-devel, davem, netdev, jakub.kicinski, jiri, saeedm,
	vishal, vladbu
In-Reply-To: <35ac21be-ff2f-a9cd-dd71-28bc37e8a51b@solarflare.com>

On Fri, Sep 06, 2019 at 02:37:16PM +0100, Edward Cree wrote:
> On 06/09/2019 14:14, Pablo Neira Ayuso wrote:
> > OK, I can document this semantics, I need just _time_ to write that
> > documentation. I was expecting this patch description is enough by now
> > until I can get to finish that documentation.
>
> I think for two structs with apparently the same contents but different
>  semantics (one has the mask bitwise complemented) it's best to hold up
>  the code change until the comment is ready to come with it, because
>  until then it's a dangerously confusing situation.

The idea is that flow rule API != tc front-end anymore. So the flow
rule API can evolve regardless the front-end requirements. Before this
update there was no other choice than using the tc pedit layout since
it was exposed to the drivers, this is not the case anymore.

> >> And you can't just coalesce all consecutive mangles, because if you
> >>  mangle two consecutive fields (e.g. UDP sport and dport) the driver
> >>  still needs to disentangle that if it works on a 'fields' (rather
> >>  than 'u32s') level.
> >
> > This infrastructure is _not_ coalescing two consecutive field, e.g.
> > UDP sport and dport is _not_ coalesced. The coalesce routine does
> > _not_ handle multiple tc pedit ex actions.
>
> So an IPv6 address mangle only comes as a single action if it's from
>  netfilter, not if it's coming from TC pedit.

Driver gets one single action from tc/netfilter (and potentially
ethtool if it would support for this), it comes as a single action
from both subsystems:

        front-end -----> flow_rule API -----> drivers

Front-end need to translate their representation to this
FLOW_ACTION_MANGLE layout.

By front-end, I refer to ethtool/netfilter/tc.

>  Therefore drivers still  need to handle an IPv6 or MAC address
>  mangle coming in multiple  actions, therefore your driver
>  simplifications are invalid.  No?

No. IPv6 and MAC come as a single action. All subsystems use the same
flow_rule API, they use the same layout.

> > The model you propose would still need this code for tc pedit to
> > adjust offset/length and coalesce u32 fields.
>
>  Yes, but we don't add code/features to the kernel based on what we
>  *could* use it for later

This is already useful: Look at the cxgb driver in particular, it's
way easier to read.

Other existing drivers do not need to do things like:

        case round_down(offsetof(struct iphdr, tos), 4):

and the play with masks to infer if the user wants to mangle the TOS
field, they can just do:

        case offsetof(struct iphdr, tos):

which is way more natural representation.

^ permalink raw reply

* Re: [PATCH net] net: gso: Fix skb_segment splat when splitting gso_size mangled skb having linear-headed frag_list
From: Willem de Bruijn @ 2019-09-06 14:49 UTC (permalink / raw)
  To: Shmulik Ladkani
  Cc: Daniel Borkmann, Eric Dumazet, eyal, netdev, Shmulik Ladkani,
	Alexander Duyck
In-Reply-To: <20190906094744.345d9442@pixies>

On Fri, Sep 6, 2019 at 2:47 AM Shmulik Ladkani <shmulik@metanetworks.com> wrote:
>
> On Thu, 5 Sep 2019 17:51:20 -0400
> Willem de Bruijn <willemdebruijn.kernel@gmail.com> wrote:
>
> > On Thu, Sep 5, 2019 at 2:36 PM Shmulik Ladkani <shmulik@metanetworks.com> wrote:
> > >
> > > +       if (mss != GSO_BY_FRAGS &&
> > > +           (skb_shinfo(head_skb)->gso_type & SKB_GSO_DODGY)) {
> > > +               /* gso_size is untrusted.
> > > +                *
> > > +                * If head_skb has a frag_list with a linear non head_frag
> > > +                * item, and head_skb's headlen does not fit requested
> > > +                * gso_size, fall back to copying the skbs - by disabling sg.
> > > +                *
> > > +                * We assume checking the first frag suffices, i.e if either of
> > > +                * the frags have non head_frag data, then the first frag is
> > > +                * too.
> > > +                */
> > > +               if (list_skb && skb_headlen(list_skb) && !list_skb->head_frag &&
> > > +                   (mss != skb_headlen(head_skb) - doffset)) {
> >
> > I thought the idea was to check skb_headlen(list_skb), as that is the
> > cause of the problem. Is skb_headlen(head_skb) a good predictor of
> > that? I can certainly imagine that it is, just not sure.
>
> Yes, 'mss != skb_headlen(HEAD_SKB)' seems to be a very good predictor,
> both for the test reproducer, and what's observered on a live system.
>
> We *CANNOT* use 'mss != skb_headlen(LIST_SKB)' as the test condition.
> The packet could have just a SINGLE frag_list member, and that member could
> be a "small remainder" not reaching the full mss size - so we could hit
> the test condition EVEN FOR NON gso_size mangled frag_list skbs -
> which is not desired.

The last segment. Yes, good point.

> Also, is we test 'mss != skb_headlen(list_skb)' and execute 'sg=false'
> ONLY IF 'list_skb' is *NOT* the last item, this is still bogus.
> Imagine a gso_size mangled packet having just head_skb and a single
> "small remainder" frag. This packet will hit the BUG_ON, as the
> 'sg=false' solution is now skipped according to the revised condition.

Right, I wouldn't suggest that.

But I wonder whether it is a given that head_skb has headlen.

Btw, it seems slightly odd to me tot test head_frag before testing
headlen in the v2 patch.

^ permalink raw reply

* Re: [PATCH] net/skbuff: silence warnings under memory pressure
From: Petr Mladek @ 2019-09-06 14:55 UTC (permalink / raw)
  To: Sergey Senozhatsky
  Cc: Steven Rostedt, Qian Cai, davem, Eric Dumazet, Sergey Senozhatsky,
	Michal Hocko, linux-mm, linux-kernel, netdev
In-Reply-To: <20190905113208.GA521@jagdpanzerIV>

On Thu 2019-09-05 20:32:08, Sergey Senozhatsky wrote:
> On (09/04/19 16:42), Qian Cai wrote:
> > > Let me think more.
> > 
> > To summary, those look to me are all good long-term improvement that would
> > reduce the likelihood of this kind of livelock in general especially for other
> > unknown allocations that happen while processing softirqs, but it is still up to
> > the air if it fixes it 100% in all situations as printk() is going to take more
> > time
> 
> Well. So. I guess that we don't need irq_work most of the time.
> 
> We need to queue irq_work for "safe" wake_up_interruptible(), when we
> know that we can deadlock in scheduler. IOW, only when we are invoked
> from the scheduler. Scheduler has printk_deferred(), which tells printk()
> that it cannot do wake_up_interruptible(). Otherwise we can just use
> normal wake_up_process() and don't need that irq_work->wake_up_interruptible()
> indirection. The parts of the scheduler, which by mistake call plain printk()
> from under pi_lock or rq_lock have chances to deadlock anyway and should
> be switched to printk_deferred().
> 
> I think we can queue significantly much less irq_work-s from printk().
> 
> Petr, Steven, what do you think?
> 
> Something like this. Call wake_up_interruptible(), switch to
> wake_up_klogd() only when called from sched code.

Replacing irq_work_queue() with wake_up_interruptible() looks
dangerous to me.

As a result, all "normal" printk() calls from the scheduler
code will deadlock. There is almost always a userspace
logger registered.

By "normal" I mean anything that is not printk_deferred(). For
example, any WARN() from sheduler will cause a deadlock.
We will not even have chance to catch these problems in
advance by lockdep.

The difference is that console_unlock() calls wake_up_process()
only when there is a waiter. And the hard console_lock() is not
called that often.


Honestly, scheduling IRQ looks like the most lightweight and reliable
solution for offloading. We are in big troubles if we could not use
it in printk() code.

IMHO, the best solution is to ratelimit the warnings about the
allocation failures. It does not make sense to repeat the same
warning again and again. We might need a better ratelimiting API
if the current one is not reliable.

Best Regards,
Petr

^ permalink raw reply

* Re: Need more information on "ifi_change" in "struct ifinfomsg"
From: Nicolas Dichtel @ 2019-09-06 14:55 UTC (permalink / raw)
  To: dhan lin, netdev
In-Reply-To: <CAMvS6vbeo5tBADNmLvkXUuSSHmxVpt3UW+jZtxY2Le9nXRbNDw@mail.gmail.com>

Le 06/09/2019 à 07:08, dhan lin a écrit :
> Hi All,
> 
> There is a field called ifi_change in "struct ifinfomsg". man page for
> rtnetlink says its for future use and should be always set to
> 0xFFFFFFFF.
> 
> But ive run some sample tests, to confirm the value is not as per man
> pages explanation.
> Its 0 most of the times and non-zero sometimes.
> 
> I've the following question,
> 
> Is ifi_change set only when there is a state change in interface values?
ifi_change indicates which flags (ifi_flags) have changed.
It does not cover other changes.


Regards,
Nicolas

^ permalink raw reply

* [PATCH bpf-next v10 0/4] BPF: New helper to obtain namespace data  from current task
From: Carlos Neira @ 2019-09-06 15:09 UTC (permalink / raw)
  To: netdev; +Cc: yhs, ebiederm, brouer, cneirabustos, bpf

This helper obtains the active namespace from current and returns pid, tgid,
device and namespace id as seen from that namespace, allowing to instrument
a process inside a container.
Device is read from /proc/self/ns/pid, as in the future it's possible that
different pid_ns files may belong to different devices, according
to the discussion between Eric Biederman and Yonghong in 2017 linux plumbers
conference.
Currently bpf_get_current_pid_tgid(), is used to do pid filtering in bcc's
scripts but this helper returns the pid as seen by the root namespace which is
fine when a bcc script is not executed inside a container.
When the process of interest is inside a container, pid filtering will not work
if bpf_get_current_pid_tgid() is used. This helper addresses this limitation
returning the pid as it's seen by the current namespace where the script is
executing.

This helper has the same use cases as bpf_get_current_pid_tgid() as it can be
used to do pid filtering even inside a container.

For example a bcc script using bpf_get_current_pid_tgid() (tools/funccount.py):

        u32 pid = bpf_get_current_pid_tgid() >> 32;
        if (pid != <pid_arg_passed_in>)
                return 0;
Could be modified to use bpf_get_current_pidns_info() as follows:

        struct bpf_pidns pidns;
        bpf_get_current_pidns_info(&pidns, sizeof(struct bpf_pidns));
        u32 pid = pidns.tgid;
        u32 nsid = pidns.nsid;
        if ((pid != <pid_arg_passed_in>) && (nsid != <nsid_arg_passed_in>))
                return 0;

To find out the name PID namespace id of a process, you could use this command:

$ ps -h -o pidns -p <pid_of_interest>

Or this other command:

$ ls -Li /proc/<pid_of_interest>/ns/pid

Changes from v9 :
Removed samples/bpf in favor of tools/testing/selftests/bpf
Fixed bug when bpf helper is called in an interrupt context.
Code style fixes.
Added more comments on bpf helper struct member.

Signed-off-by: Carlos Neira <cneirabustos@gmail.com>

Carlos Neira (4):
  fs/namei.c: make available filename_lookup() for bpf helpers.
  bpf: new helper to obtain namespace data from  current task New bpf
    helper bpf_get_current_pidns_info.
  tools: Added bpf_get_current_pidns_info  helper.
  tools/testing/selftests/bpf: Add self-tests  for helper
    bpf_get_pidns_info.

 fs/internal.h                                      |   2 -
 fs/namei.c                                         |   1 -
 include/linux/bpf.h                                |   1 +
 include/linux/namei.h                              |   4 +
 include/uapi/linux/bpf.h                           |  35 ++++-
 kernel/bpf/core.c                                  |   1 +
 kernel/bpf/helpers.c                               |  86 ++++++++++++
 kernel/trace/bpf_trace.c                           |   2 +
 tools/include/uapi/linux/bpf.h                     |  35 ++++-
 tools/testing/selftests/bpf/Makefile               |   2 +-
 tools/testing/selftests/bpf/bpf_helpers.h          |   3 +
 .../testing/selftests/bpf/progs/test_pidns_kern.c  |  52 ++++++++
 .../selftests/bpf/progs/test_pidns_nmi_kern.c      |  52 ++++++++
 tools/testing/selftests/bpf/test_pidns.c           | 146 +++++++++++++++++++++
 tools/testing/selftests/bpf/test_pidns_nmi.c       | 139 ++++++++++++++++++++
 15 files changed, 555 insertions(+), 6 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/progs/test_pidns_kern.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_pidns_nmi_kern.c
 create mode 100644 tools/testing/selftests/bpf/test_pidns.c
 create mode 100644 tools/testing/selftests/bpf/test_pidns_nmi.c

-- 
2.11.0


^ permalink raw reply

* [PATCH bpf-next v10 1/4] fs/namei.c: make available filename_lookup() for bpf helpers.
From: Carlos Neira @ 2019-09-06 15:09 UTC (permalink / raw)
  To: netdev; +Cc: yhs, ebiederm, brouer, cneirabustos, bpf
In-Reply-To: <20190906150952.23066-1-cneirabustos@gmail.com>

Signed-off-by: Carlos Neira <cneirabustos@gmail.com>
---
 fs/internal.h         | 2 --
 fs/namei.c            | 1 -
 include/linux/namei.h | 4 ++++
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/fs/internal.h b/fs/internal.h
index 315fcd8d237c..6647e15dd419 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -59,8 +59,6 @@ extern int finish_clean_context(struct fs_context *fc);
 /*
  * namei.c
  */
-extern int filename_lookup(int dfd, struct filename *name, unsigned flags,
-			   struct path *path, struct path *root);
 extern int user_path_mountpoint_at(int, const char __user *, unsigned int, struct path *);
 extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
 			   const char *, unsigned int, struct path *);
diff --git a/fs/namei.c b/fs/namei.c
index 209c51a5226c..a89fc72a4a10 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -19,7 +19,6 @@
 #include <linux/export.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
-#include <linux/fs.h>
 #include <linux/namei.h>
 #include <linux/pagemap.h>
 #include <linux/fsnotify.h>
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 9138b4471dbf..b45c8b6f7cb4 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -6,6 +6,7 @@
 #include <linux/path.h>
 #include <linux/fcntl.h>
 #include <linux/errno.h>
+#include <linux/fs.h>
 
 enum { MAX_NESTED_LINKS = 8 };
 
@@ -97,6 +98,9 @@ extern void unlock_rename(struct dentry *, struct dentry *);
 
 extern void nd_jump_link(struct path *path);
 
+extern int filename_lookup(int dfd, struct filename *name, unsigned flags,
+			   struct path *path, struct path *root);
+
 static inline void nd_terminate_link(void *name, size_t len, size_t maxlen)
 {
 	((char *) name)[min(len, maxlen)] = '\0';
-- 
2.11.0


^ permalink raw reply related

* [PATCH bpf-next v10 2/4] bpf: new helper to obtain namespace data from  current task New bpf helper bpf_get_current_pidns_info.
From: Carlos Neira @ 2019-09-06 15:09 UTC (permalink / raw)
  To: netdev; +Cc: yhs, ebiederm, brouer, cneirabustos, bpf
In-Reply-To: <20190906150952.23066-1-cneirabustos@gmail.com>

This helper(bpf_get_current_pidns_info) obtains the active namespace from
current and returns pid, tgid, device and namespace id as seen from that
namespace, allowing to instrument a process inside a container.

Signed-off-by: Carlos Neira <cneirabustos@gmail.com>
---
 include/linux/bpf.h      |  1 +
 include/uapi/linux/bpf.h | 35 +++++++++++++++++++-
 kernel/bpf/core.c        |  1 +
 kernel/bpf/helpers.c     | 86 ++++++++++++++++++++++++++++++++++++++++++++++++
 kernel/trace/bpf_trace.c |  2 ++
 5 files changed, 124 insertions(+), 1 deletion(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 5b9d22338606..819cb1c84be0 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1055,6 +1055,7 @@ extern const struct bpf_func_proto bpf_get_local_storage_proto;
 extern const struct bpf_func_proto bpf_strtol_proto;
 extern const struct bpf_func_proto bpf_strtoul_proto;
 extern const struct bpf_func_proto bpf_tcp_sock_proto;
+extern const struct bpf_func_proto bpf_get_current_pidns_info_proto;
 
 /* Shared helpers among cBPF and eBPF. */
 void bpf_user_rnd_init_once(void);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index b5889257cc33..3ec9aa1438b7 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2747,6 +2747,32 @@ union bpf_attr {
  *		**-EOPNOTSUPP** kernel configuration does not enable SYN cookies
  *
  *		**-EPROTONOSUPPORT** IP packet version is not 4 or 6
+ *
+ * int bpf_get_current_pidns_info(struct bpf_pidns_info *pidns, u32 size_of_pidns)
+ *	Description
+ *		Get tgid, pid and namespace id as seen by the current namespace,
+ *		and device major/minor numbers from /proc/self/ns/pid. Such
+ *		information is stored in *pidns* of size *size*.
+ *
+ *		This helper is used when pid filtering is needed inside a
+ *		container as bpf_get_current_tgid() helper always returns the
+ *		pid id as seen by the root namespace.
+ *	Return
+ *		0 on success
+ *
+ *		On failure, the returned value is one of the following:
+ *
+ *		**-EINVAL** if *size_of_pidns* is not valid or unable to get ns, pid
+ *		or tgid of the current task.
+ *
+ *		**-ENOENT** if /proc/self/ns/pid does not exists.
+ *
+ *		**-ENOENT** if /proc/self/ns does not exists.
+ *
+ *		**-ENOMEM** if helper internal allocation fails.
+ *
+ *		**-EPERM** if not able to call helper.
+ *
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -2859,7 +2885,8 @@ union bpf_attr {
 	FN(sk_storage_get),		\
 	FN(sk_storage_delete),		\
 	FN(send_signal),		\
-	FN(tcp_gen_syncookie),
+	FN(tcp_gen_syncookie),		\
+	FN(get_current_pidns_info),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -3610,4 +3637,10 @@ struct bpf_sockopt {
 	__s32	retval;
 };
 
+struct bpf_pidns_info {
+	__u32 dev;	/* dev_t from /proc/self/ns/pid inode */
+	__u32 nsid;
+	__u32 tgid;
+	__u32 pid;
+};
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 8191a7db2777..3159f2a0188c 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2038,6 +2038,7 @@ const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak;
 const struct bpf_func_proto bpf_get_current_comm_proto __weak;
 const struct bpf_func_proto bpf_get_current_cgroup_id_proto __weak;
 const struct bpf_func_proto bpf_get_local_storage_proto __weak;
+const struct bpf_func_proto bpf_get_current_pidns_info __weak;
 
 const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
 {
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 5e28718928ca..8dbe6347893c 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -11,6 +11,11 @@
 #include <linux/uidgid.h>
 #include <linux/filter.h>
 #include <linux/ctype.h>
+#include <linux/pid_namespace.h>
+#include <linux/kdev_t.h>
+#include <linux/stat.h>
+#include <linux/namei.h>
+#include <linux/version.h>
 
 #include "../../lib/kstrtox.h"
 
@@ -312,6 +317,87 @@ void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
 	preempt_enable();
 }
 
+BPF_CALL_2(bpf_get_current_pidns_info, struct bpf_pidns_info *, pidns_info, u32,
+	 size)
+{
+	const char *pidns_path = "/proc/self/ns/pid";
+	struct pid_namespace *pidns = NULL;
+	struct filename *fname = NULL;
+	struct inode *inode;
+	struct path kp;
+	pid_t tgid = 0;
+	pid_t pid = 0;
+	int ret = -EINVAL;
+	int len;
+
+	if (unlikely(in_interrupt() ||
+			current->flags & (PF_KTHREAD | PF_EXITING)))
+		return -EPERM;
+
+	if (unlikely(size != sizeof(struct bpf_pidns_info)))
+		return -EINVAL;
+
+	pidns = task_active_pid_ns(current);
+	if (unlikely(!pidns))
+		return -ENOENT;
+
+	pidns_info->nsid =  pidns->ns.inum;
+	pid = task_pid_nr_ns(current, pidns);
+	if (unlikely(!pid))
+		goto clear;
+
+	tgid = task_tgid_nr_ns(current, pidns);
+	if (unlikely(!tgid))
+		goto clear;
+
+	pidns_info->tgid = (u32) tgid;
+	pidns_info->pid = (u32) pid;
+
+	fname = kmem_cache_alloc(names_cachep, GFP_ATOMIC);
+	if (unlikely(!fname)) {
+		ret = -ENOMEM;
+		goto clear;
+	}
+	const size_t fnamesize = offsetof(struct filename, iname[1]);
+	struct filename *tmp;
+
+	tmp = kmalloc(fnamesize, GFP_ATOMIC);
+	if (unlikely(!tmp)) {
+		__putname(fname);
+		ret = -ENOMEM;
+		goto clear;
+	}
+
+	tmp->name = (char *)fname;
+	fname = tmp;
+	len = strlen(pidns_path) + 1;
+	memcpy((char *)fname->name, pidns_path, len);
+	fname->uptr = NULL;
+	fname->aname = NULL;
+	fname->refcnt = 1;
+
+	ret = filename_lookup(AT_FDCWD, fname, 0, &kp, NULL);
+	if (ret)
+		goto clear;
+
+	inode = d_backing_inode(kp.dentry);
+	pidns_info->dev = (u32)inode->i_rdev;
+
+	return 0;
+
+clear:
+	memset((void *)pidns_info, 0, (size_t) size);
+	return ret;
+}
+
+const struct bpf_func_proto bpf_get_current_pidns_info_proto = {
+	.func		= bpf_get_current_pidns_info,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
+	.arg2_type	= ARG_CONST_SIZE,
+};
+
 #ifdef CONFIG_CGROUPS
 BPF_CALL_0(bpf_get_current_cgroup_id)
 {
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index ca1255d14576..5e1dc22765a5 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -709,6 +709,8 @@ tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 #endif
 	case BPF_FUNC_send_signal:
 		return &bpf_send_signal_proto;
+	case BPF_FUNC_get_current_pidns_info:
+		return &bpf_get_current_pidns_info_proto;
 	default:
 		return NULL;
 	}
-- 
2.11.0


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox