Netdev List

Netdev List
 help / color / mirror / Atom feed

* Re: DSA switch
From: Andrew Lunn @ 2018-05-04 12:56 UTC (permalink / raw)
  To: Ran Shalit; +Cc: netdev
In-Reply-To: <CAJ2oMhLvs2+SxzLoDYN9Y6YjcKAN1VwtY6a3b4XYuujLFiOgkw@mail.gmail.com>

> I also see that there is no bridge function in /drivers/net/dsa files
> in our kernel (2.6.37)

Sorry, i cannot really help you with an ancient kernel like that.

       Andrew

^ permalink raw reply

* Re: [PATCH bpf-next v3 03/15] xsk: add umem fill queue support and mmap
From: Daniel Borkmann @ 2018-05-04 12:49 UTC (permalink / raw)
  To: Björn Töpel, magnus.karlsson, alexander.h.duyck,
	alexander.duyck, john.fastabend, ast, brouer,
	willemdebruijn.kernel, mst, netdev
  Cc: michael.lundkvist, jesse.brandeburg, anjali.singhai, qi.z.zhang
In-Reply-To: <20180502110136.3738-4-bjorn.topel@gmail.com>

On 05/02/2018 01:01 PM, Björn Töpel wrote:
> From: Magnus Karlsson <magnus.karlsson@intel.com>
> 
> Here, we add another setsockopt for registered user memory (umem)
> called XDP_UMEM_FILL_QUEUE. Using this socket option, the process can
> ask the kernel to allocate a queue (ring buffer) and also mmap it
> (XDP_UMEM_PGOFF_FILL_QUEUE) into the process.
> 
> The queue is used to explicitly pass ownership of umem frames from the
> user process to the kernel. These frames will in a later patch be
> filled in with Rx packet data by the kernel.
> 
> v2: Fixed potential crash in xsk_mmap.
> 
> Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
> ---
>  include/uapi/linux/if_xdp.h | 15 +++++++++++
>  net/xdp/Makefile            |  2 +-
>  net/xdp/xdp_umem.c          |  5 ++++
>  net/xdp/xdp_umem.h          |  2 ++
>  net/xdp/xsk.c               | 65 ++++++++++++++++++++++++++++++++++++++++++++-
>  net/xdp/xsk_queue.c         | 58 ++++++++++++++++++++++++++++++++++++++++
>  net/xdp/xsk_queue.h         | 38 ++++++++++++++++++++++++++
>  7 files changed, 183 insertions(+), 2 deletions(-)
>  create mode 100644 net/xdp/xsk_queue.c
>  create mode 100644 net/xdp/xsk_queue.h
> 
> diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h
> index 41252135a0fe..975661e1baca 100644
> --- a/include/uapi/linux/if_xdp.h
> +++ b/include/uapi/linux/if_xdp.h
> @@ -23,6 +23,7 @@
>  
>  /* XDP socket options */
>  #define XDP_UMEM_REG			3
> +#define XDP_UMEM_FILL_RING		4
>  
>  struct xdp_umem_reg {
>  	__u64 addr; /* Start of packet data area */
> @@ -31,4 +32,18 @@ struct xdp_umem_reg {
>  	__u32 frame_headroom; /* Frame head room */
>  };
>  
> +/* Pgoff for mmaping the rings */
> +#define XDP_UMEM_PGOFF_FILL_RING	0x100000000
> +
> +struct xdp_ring {
> +	__u32 producer __attribute__((aligned(64)));
> +	__u32 consumer __attribute__((aligned(64)));
> +};
> +
> +/* Used for the fill and completion queues for buffers */
> +struct xdp_umem_ring {
> +	struct xdp_ring ptrs;
> +	__u32 desc[0] __attribute__((aligned(64)));
> +};
> +
>  #endif /* _LINUX_IF_XDP_H */
> diff --git a/net/xdp/Makefile b/net/xdp/Makefile
> index a5d736640a0f..074fb2b2d51c 100644
> --- a/net/xdp/Makefile
> +++ b/net/xdp/Makefile
> @@ -1,2 +1,2 @@
> -obj-$(CONFIG_XDP_SOCKETS) += xsk.o xdp_umem.o
> +obj-$(CONFIG_XDP_SOCKETS) += xsk.o xdp_umem.o xsk_queue.o
>  
> diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
> index ec8b3552be44..e1f627d0cc1c 100644
> --- a/net/xdp/xdp_umem.c
> +++ b/net/xdp/xdp_umem.c
> @@ -65,6 +65,11 @@ static void xdp_umem_release(struct xdp_umem *umem)
>  	struct task_struct *task;
>  	struct mm_struct *mm;
>  
> +	if (umem->fq) {
> +		xskq_destroy(umem->fq);
> +		umem->fq = NULL;
> +	}
> +
>  	if (umem->pgs) {
>  		xdp_umem_unpin_pages(umem);
>  
> diff --git a/net/xdp/xdp_umem.h b/net/xdp/xdp_umem.h
> index 4597ae81a221..25634b8a5c6f 100644
> --- a/net/xdp/xdp_umem.h
> +++ b/net/xdp/xdp_umem.h
> @@ -19,9 +19,11 @@
>  #include <linux/if_xdp.h>
>  #include <linux/workqueue.h>
>  
> +#include "xsk_queue.h"
>  #include "xdp_umem_props.h"
>  
>  struct xdp_umem {
> +	struct xsk_queue *fq;
>  	struct page **pgs;
>  	struct xdp_umem_props props;
>  	u32 npgs;
> diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
> index 84e0e867febb..da67a3c5c1c9 100644
> --- a/net/xdp/xsk.c
> +++ b/net/xdp/xsk.c
> @@ -32,6 +32,7 @@
>  #include <linux/netdevice.h>
>  #include <net/xdp_sock.h>
>  
> +#include "xsk_queue.h"
>  #include "xdp_umem.h"
>  
>  static struct xdp_sock *xdp_sk(struct sock *sk)
> @@ -39,6 +40,21 @@ static struct xdp_sock *xdp_sk(struct sock *sk)
>  	return (struct xdp_sock *)sk;
>  }
>  
> +static int xsk_init_queue(u32 entries, struct xsk_queue **queue)
> +{
> +	struct xsk_queue *q;
> +
> +	if (entries == 0 || *queue || !is_power_of_2(entries))
> +		return -EINVAL;
> +
> +	q = xskq_create(entries);
> +	if (!q)
> +		return -ENOMEM;
> +
> +	*queue = q;
> +	return 0;
> +}
> +
>  static int xsk_release(struct socket *sock)
>  {
>  	struct sock *sk = sock->sk;
> @@ -101,6 +117,23 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
>  		mutex_unlock(&xs->mutex);
>  		return 0;
>  	}
> +	case XDP_UMEM_FILL_RING:
> +	{
> +		struct xsk_queue **q;
> +		int entries;
> +
> +		if (!xs->umem)
> +			return -EINVAL;

(Same here as previously mentioned.)

> +		if (copy_from_user(&entries, optval, sizeof(entries)))
> +			return -EFAULT;
> +
> +		mutex_lock(&xs->mutex);
> +		q = &xs->umem->fq;
> +		err = xsk_init_queue(entries, q);
> +		mutex_unlock(&xs->mutex);
> +		return err;
> +	}
>  	default:
>  		break;
>  	}
> @@ -108,6 +141,36 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
>  	return -ENOPROTOOPT;
>  }
>  
> +static int xsk_mmap(struct file *file, struct socket *sock,
> +		    struct vm_area_struct *vma)
> +{
> +	unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
> +	unsigned long size = vma->vm_end - vma->vm_start;
> +	struct xdp_sock *xs = xdp_sk(sock->sk);
> +	struct xsk_queue *q = NULL;
> +	unsigned long pfn;
> +	struct page *qpg;
> +
> +	if (!xs->umem)
> +		return -EINVAL;
> +
> +	if (offset == XDP_UMEM_PGOFF_FILL_RING)
> +		q = xs->umem->fq;
> +	else
> +		return -EINVAL;
> +
> +	if (!q)
> +		return -EINVAL;

Nit: since q is NULL above, could be simplified as:

	if (offset == XDP_UMEM_PGOFF_FILL_RING)
		q = xs->umem->fq;
	if (!q)
		return -EINVAL;

> +
> +	qpg = virt_to_head_page(q->ring);
> +	if (size > (PAGE_SIZE << compound_order(qpg)))
> +		return -EINVAL;
> +
> +	pfn = virt_to_phys(q->ring) >> PAGE_SHIFT;
> +	return remap_pfn_range(vma, vma->vm_start, pfn,
> +			       size, vma->vm_page_prot);
> +}
> +
>  static struct proto xsk_proto = {
>  	.name =		"XDP",
>  	.owner =	THIS_MODULE,
> @@ -131,7 +194,7 @@ static const struct proto_ops xsk_proto_ops = {
>  	.getsockopt =	sock_no_getsockopt,
>  	.sendmsg =	sock_no_sendmsg,
>  	.recvmsg =	sock_no_recvmsg,
> -	.mmap =		sock_no_mmap,
> +	.mmap =		xsk_mmap,
>  	.sendpage =	sock_no_sendpage,
>  };
>  

^ permalink raw reply

* Re: ipw2200: fix spelling mistake: "functionalitis" -> "functionalities"
From: Kalle Valo @ 2018-05-04 12:37 UTC (permalink / raw)
  To: Colin Ian King
  Cc: Stanislav Yakovlev, David S . Miller,
	linux-wireless-u79uwXL29TY76Z2rM5mHXA,
	netdev-u79uwXL29TY76Z2rM5mHXA,
	kernel-janitors-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20180501093643.23318-1-colin.king-Z7WLFzj8eWMS+FvcfC7Uqw@public.gmane.org>

Colin Ian King <colin.king-Z7WLFzj8eWMS+FvcfC7Uqw@public.gmane.org> wrote:

> From: Colin Ian King <colin.king-Z7WLFzj8eWMS+FvcfC7Uqw@public.gmane.org>
> 
> Trivial fix to spelling mistake in module parameter description text
> 
> Signed-off-by: Colin Ian King <colin.king-Z7WLFzj8eWMS+FvcfC7Uqw@public.gmane.org>

Patch applied to wireless-drivers-next.git, thanks.

7e7939e80e3c ipw2200: fix spelling mistake: "functionalitis" -> "functionalities"

-- 
https://patchwork.kernel.org/patch/10373469/

https://wireless.wiki.kernel.org/en/developers/documentation/submittingpatches

^ permalink raw reply

* Re: rtlwifi: fix spelling mistake: "dismatch" -> "mismatch"
From: Kalle Valo @ 2018-05-04 12:36 UTC (permalink / raw)
  To: Colin Ian King
  Cc: Ping-Ke Shih, David S . Miller, Larry Finger, linux-wireless,
	netdev, kernel-janitors, linux-kernel
In-Reply-To: <20180430134102.11171-1-colin.king@canonical.com>

Colin Ian King <colin.king@canonical.com> wrote:

> From: Colin Ian King <colin.king@canonical.com>
> 
> Trivial fix to spelling mistake in RT_TRACE message text.
> 
> Signed-off-by: Colin Ian King <colin.king@canonical.com>

Patch applied to wireless-drivers-next.git, thanks.

070fc356e21a rtlwifi: fix spelling mistake: "dismatch" -> "mismatch"

-- 
https://patchwork.kernel.org/patch/10371799/

https://wireless.wiki.kernel.org/en/developers/documentation/submittingpatches

^ permalink raw reply

* Re: [PATCH bpf-next v3 02/15] xsk: add user memory registration support sockopt
From: Daniel Borkmann @ 2018-05-04 12:34 UTC (permalink / raw)
  To: Björn Töpel, magnus.karlsson, alexander.h.duyck,
	alexander.duyck, john.fastabend, ast, brouer,
	willemdebruijn.kernel, mst, netdev
  Cc: Björn Töpel, michael.lundkvist, jesse.brandeburg,
	anjali.singhai, qi.z.zhang
In-Reply-To: <20180502110136.3738-3-bjorn.topel@gmail.com>

On 05/02/2018 01:01 PM, Björn Töpel wrote:
[...]

(Few nits for follow-ups I haven't sent out yesterday night yet; they are on
 top of Alexei's remarks.)

> ---
>  include/net/xdp_sock.h      |  31 ++++++
>  include/uapi/linux/if_xdp.h |  34 ++++++
>  net/Makefile                |   1 +
>  net/xdp/Makefile            |   2 +
>  net/xdp/xdp_umem.c          | 245 ++++++++++++++++++++++++++++++++++++++++++++
>  net/xdp/xdp_umem.h          |  45 ++++++++
>  net/xdp/xdp_umem_props.h    |  23 +++++
>  net/xdp/xsk.c               | 215 ++++++++++++++++++++++++++++++++++++++
>  8 files changed, 596 insertions(+)
>  create mode 100644 include/net/xdp_sock.h
>  create mode 100644 include/uapi/linux/if_xdp.h
>  create mode 100644 net/xdp/Makefile
>  create mode 100644 net/xdp/xdp_umem.c
>  create mode 100644 net/xdp/xdp_umem.h
>  create mode 100644 net/xdp/xdp_umem_props.h
>  create mode 100644 net/xdp/xsk.c
> 
> diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
> new file mode 100644
> index 000000000000..94785f5db13e
> --- /dev/null
> +++ b/include/net/xdp_sock.h
> @@ -0,0 +1,31 @@
> +/* SPDX-License-Identifier: GPL-2.0

I think this should just be a single line comment, at least that's the case
for majority of files in tree, so probably good to stick to it as well ...

$ git grep -n "SPDX-License-Identifier" | grep "\/\*" | grep -v "\*\/" | wc -l
20
$ git grep -n "SPDX-License-Identifier" | grep "\/\*" | grep "\*\/" | wc -l
7742

... and probably would also make sense to get rid of the below boiler plate
text then. (Applies to other added files as well in this series, only mention
it here once.)

> + * AF_XDP internal functions
> + * Copyright(c) 2018 Intel Corporation.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + */
> +
> +#ifndef _LINUX_XDP_SOCK_H
> +#define _LINUX_XDP_SOCK_H
> +
> +#include <linux/mutex.h>
> +#include <net/sock.h>
> +
> +struct xdp_umem;
> +
> +struct xdp_sock {
> +	/* struct sock must be the first member of struct xdp_sock */
> +	struct sock sk;
> +	struct xdp_umem *umem;
> +	/* Protects multiple processes in the control path */
> +	struct mutex mutex;
> +};
> +
> +#endif /* _LINUX_XDP_SOCK_H */
[...]
> diff --git a/net/Makefile b/net/Makefile
> index a6147c61b174..77aaddedbd29 100644
> --- a/net/Makefile
> +++ b/net/Makefile
> @@ -85,3 +85,4 @@ obj-y				+= l3mdev/
>  endif
>  obj-$(CONFIG_QRTR)		+= qrtr/
>  obj-$(CONFIG_NET_NCSI)		+= ncsi/
> +obj-$(CONFIG_XDP_SOCKETS)	+= xdp/
> diff --git a/net/xdp/Makefile b/net/xdp/Makefile
> new file mode 100644
> index 000000000000..a5d736640a0f
> --- /dev/null
> +++ b/net/xdp/Makefile
> @@ -0,0 +1,2 @@
> +obj-$(CONFIG_XDP_SOCKETS) += xsk.o xdp_umem.o
> +

Nit: newline at end of file.

> diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
> new file mode 100644
> index 000000000000..ec8b3552be44
> --- /dev/null
> +++ b/net/xdp/xdp_umem.c
[...]
> +
> +#include "xdp_umem.h"
> +
> +#define XDP_UMEM_MIN_FRAME_SIZE 2048
> +
> +int xdp_umem_create(struct xdp_umem **umem)
> +{
> +	*umem = kzalloc(sizeof(**umem), GFP_KERNEL);
> +
> +	if (!(*umem))

Nit: extra () not needed. You also have the extra brackets in couple of
other places/conditionals.

> +		return -ENOMEM;
> +
> +	return 0;
> +}
> +
> +static void xdp_umem_unpin_pages(struct xdp_umem *umem)
> +{
> +	unsigned int i;
> +
> +	if (umem->pgs) {

All call-sites of this have umem->pgs never as NULL.

> +		for (i = 0; i < umem->npgs; i++) {
> +			struct page *page = umem->pgs[i];
> +
> +			set_page_dirty_lock(page);
> +			put_page(page);
> +		}
> +
> +		kfree(umem->pgs);
> +		umem->pgs = NULL;
> +	}
> +}
> +
> +static void xdp_umem_unaccount_pages(struct xdp_umem *umem)
> +{
> +	if (umem->user) {
> +		atomic_long_sub(umem->npgs, &umem->user->locked_vm);
> +		free_uid(umem->user);
> +	}
> +}
> +
> +static void xdp_umem_release(struct xdp_umem *umem)
> +{
> +	struct task_struct *task;
> +	struct mm_struct *mm;
> +
> +	if (umem->pgs) {
> +		xdp_umem_unpin_pages(umem);
> +
> +		task = get_pid_task(umem->pid, PIDTYPE_PID);
> +		put_pid(umem->pid);
> +		if (!task)
> +			goto out;
> +		mm = get_task_mm(task);
> +		put_task_struct(task);
> +		if (!mm)
> +			goto out;
> +
> +		mmput(mm);
> +		umem->pgs = NULL;
> +	}
> +
> +	xdp_umem_unaccount_pages(umem);
> +out:
> +	kfree(umem);
> +}
> +
> +static void xdp_umem_release_deferred(struct work_struct *work)
> +{
> +	struct xdp_umem *umem = container_of(work, struct xdp_umem, work);
> +
> +	xdp_umem_release(umem);
> +}
> +
> +void xdp_get_umem(struct xdp_umem *umem)
> +{
> +	atomic_inc(&umem->users);
> +}
> +
> +void xdp_put_umem(struct xdp_umem *umem)
> +{
> +	if (!umem)
> +		return;
> +
> +	if (atomic_dec_and_test(&umem->users)) {
> +		INIT_WORK(&umem->work, xdp_umem_release_deferred);
> +		schedule_work(&umem->work);
> +	}
> +}
> +
> +static int xdp_umem_pin_pages(struct xdp_umem *umem)
> +{
> +	unsigned int gup_flags = FOLL_WRITE;
> +	long npgs;
> +	int err;
> +
> +	umem->pgs = kcalloc(umem->npgs, sizeof(*umem->pgs), GFP_KERNEL);
> +	if (!umem->pgs)
> +		return -ENOMEM;
> +
> +	down_write(&current->mm->mmap_sem);
> +	npgs = get_user_pages(umem->address, umem->npgs,
> +			      gup_flags, &umem->pgs[0], NULL);
> +	up_write(&current->mm->mmap_sem);
> +
> +	if (npgs != umem->npgs) {
> +		if (npgs >= 0) {
> +			umem->npgs = npgs;
> +			err = -ENOMEM;
> +			goto out_pin;
> +		}
> +		err = npgs;
> +		goto out_pgs;
> +	}
> +	return 0;
> +
> +out_pin:
> +	xdp_umem_unpin_pages(umem);
> +out_pgs:
> +	kfree(umem->pgs);
> +	umem->pgs = NULL;
> +	return err;
> +}
> +
> +static int xdp_umem_account_pages(struct xdp_umem *umem)
> +{
> +	unsigned long lock_limit, new_npgs, old_npgs;
> +
> +	if (capable(CAP_IPC_LOCK))
> +		return 0;
> +
> +	lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
> +	umem->user = get_uid(current_user());
> +
> +	do {
> +		old_npgs = atomic_long_read(&umem->user->locked_vm);
> +		new_npgs = old_npgs + umem->npgs;
> +		if (new_npgs > lock_limit) {
> +			free_uid(umem->user);
> +			umem->user = NULL;
> +			return -ENOBUFS;
> +		}
> +	} while (atomic_long_cmpxchg(&umem->user->locked_vm, old_npgs,
> +				     new_npgs) != old_npgs);
> +	return 0;
> +}
> +
> +int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
> +{
> +	u32 frame_size = mr->frame_size, frame_headroom = mr->frame_headroom;
> +	u64 addr = mr->addr, size = mr->len;
> +	unsigned int nframes, nfpp;
> +	int size_chk, err;
> +
> +	if (!umem)
> +		return -EINVAL;

Wouldn't it be better to remove these sort of defensive checks (here and in
other places)? Eventually they might only end up hiding potential bugs rather
than having them noticed. Only call-site is in xsk_setsockopt() where you do:

[...]
                mutex_lock(&xs->mutex);
                err = xdp_umem_create(&umem);

                err = xdp_umem_reg(umem, &mr);
                if (err) {
                        kfree(umem);
                        mutex_unlock(&xs->mutex);
                        return err;
                }
[...]

Seems more intuitive and easier to audit when you bail out initially when
xdp_umem_create() fails rather than in xdp_umem_reg(), and then the test
for !umem can be removed.

> +	if (frame_size < XDP_UMEM_MIN_FRAME_SIZE || frame_size > PAGE_SIZE) {
> +		/* Strictly speaking we could support this, if:
> +		 * - huge pages, or*
> +		 * - using an IOMMU, or
> +		 * - making sure the memory area is consecutive
> +		 * but for now, we simply say "computer says no".
> +		 */
> +		return -EINVAL;
> +	}
> +
> +	if (!is_power_of_2(frame_size))
> +		return -EINVAL;
> +
> +	if (!PAGE_ALIGNED(addr)) {
> +		/* Memory area has to be page size aligned. For
> +		 * simplicity, this might change.
> +		 */
> +		return -EINVAL;
> +	}
> +
> +	if ((addr + size) < addr)
> +		return -EINVAL;
> +
> +	nframes = size / frame_size;
> +	if (nframes == 0 || nframes > UINT_MAX)
> +		return -EINVAL;
> +
> +	nfpp = PAGE_SIZE / frame_size;
> +	if (nframes < nfpp || nframes % nfpp)
> +		return -EINVAL;
> +
> +	frame_headroom = ALIGN(frame_headroom, 64);
> +
> +	size_chk = frame_size - frame_headroom - XDP_PACKET_HEADROOM;
> +	if (size_chk < 0)
> +		return -EINVAL;
> +
> +	umem->pid = get_task_pid(current, PIDTYPE_PID);
> +	umem->size = (size_t)size;
> +	umem->address = (unsigned long)addr;
> +	umem->props.frame_size = frame_size;
> +	umem->props.nframes = nframes;
> +	umem->frame_headroom = frame_headroom;
> +	umem->npgs = size / PAGE_SIZE;
> +	umem->pgs = NULL;
> +	umem->user = NULL;
> +
> +	umem->frame_size_log2 = ilog2(frame_size);
> +	umem->nfpp_mask = nfpp - 1;
> +	umem->nfpplog2 = ilog2(nfpp);
> +	atomic_set(&umem->users, 1);
> +
> +	err = xdp_umem_account_pages(umem);
> +	if (err)
> +		goto out;
> +
> +	err = xdp_umem_pin_pages(umem);
> +	if (err)
> +		goto out_account;
> +	return 0;
> +
> +out_account:
> +	xdp_umem_unaccount_pages(umem);
> +out:
> +	put_pid(umem->pid);
> +	return err;
> +}
[...]
> +#ifndef XDP_UMEM_H_
> +#define XDP_UMEM_H_
> +
> +#include <linux/mm.h>
> +#include <linux/if_xdp.h>
> +#include <linux/workqueue.h>
> +
> +#include "xdp_umem_props.h"
> +
> +struct xdp_umem {
> +	struct page **pgs;
> +	struct xdp_umem_props props;
> +	u32 npgs;
> +	u32 frame_headroom;
> +	u32 nfpp_mask;
> +	u32 nfpplog2;
> +	u32 frame_size_log2;
> +	struct user_struct *user;
> +	struct pid *pid;
> +	unsigned long address;
> +	size_t size;
> +	atomic_t users;

Convert to refcnt_t?

> +	struct work_struct work;
> +};
> +
> +int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr);
> +void xdp_get_umem(struct xdp_umem *umem);
> +void xdp_put_umem(struct xdp_umem *umem);
> +int xdp_umem_create(struct xdp_umem **umem);
> +
> +#endif /* XDP_UMEM_H_ */
[...]
> diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
> new file mode 100644
> index 000000000000..84e0e867febb
[...]
> +static struct xdp_sock *xdp_sk(struct sock *sk)
> +{
> +	return (struct xdp_sock *)sk;
> +}
> +
> +static int xsk_release(struct socket *sock)
> +{
> +	struct sock *sk = sock->sk;
> +	struct net *net;
> +
> +	if (!sk)
> +		return 0;
> +
> +	net = sock_net(sk);
> +
> +	local_bh_disable();
> +	sock_prot_inuse_add(net, sk->sk_prot, -1);
> +	local_bh_enable();
> +
> +	sock_orphan(sk);
> +	sock->sk = NULL;
> +
> +	sk_refcnt_debug_release(sk);
> +	sock_put(sk);
> +
> +	return 0;
> +}
> +
> +static int xsk_setsockopt(struct socket *sock, int level, int optname,
> +			  char __user *optval, unsigned int optlen)
> +{
> +	struct sock *sk = sock->sk;
> +	struct xdp_sock *xs = xdp_sk(sk);
> +	int err;
> +
> +	if (level != SOL_XDP)
> +		return -ENOPROTOOPT;
> +
> +	switch (optname) {
> +	case XDP_UMEM_REG:
> +	{
> +		struct xdp_umem_reg mr;
> +		struct xdp_umem *umem;
> +
> +		if (xs->umem)

Does this need READ_ONCE() or needs to go under the lock?

> +			return -EBUSY;
> +
> +		if (copy_from_user(&mr, optval, sizeof(mr)))
> +			return -EFAULT;
> +
> +		mutex_lock(&xs->mutex);
> +		err = xdp_umem_create(&umem);
> +
> +		err = xdp_umem_reg(umem, &mr);
> +		if (err) {
> +			kfree(umem);

The kfree() here begs for having a proper destructor such that once you extend
xdp_umem_create() these spots are not missed when further cleanups have to be
performed on dismantle.

> +			mutex_unlock(&xs->mutex);
> +			return err;
> +		}
> +
> +		/* Make sure umem is ready before it can be seen by others */
> +		smp_wmb();
> +
> +		xs->umem = umem;
> +		mutex_unlock(&xs->mutex);
> +		return 0;
> +	}
> +	default:
> +		break;
> +	}
> +
> +	return -ENOPROTOOPT;
> +}
> +
> +static struct proto xsk_proto = {
> +	.name =		"XDP",
> +	.owner =	THIS_MODULE,
> +	.obj_size =	sizeof(struct xdp_sock),
> +};
> +
> +static const struct proto_ops xsk_proto_ops = {
> +	.family =	PF_XDP,
> +	.owner =	THIS_MODULE,
> +	.release =	xsk_release,
> +	.bind =		sock_no_bind,
> +	.connect =	sock_no_connect,
> +	.socketpair =	sock_no_socketpair,
> +	.accept =	sock_no_accept,
> +	.getname =	sock_no_getname,
> +	.poll =		sock_no_poll,
> +	.ioctl =	sock_no_ioctl,
> +	.listen =	sock_no_listen,
> +	.shutdown =	sock_no_shutdown,
> +	.setsockopt =	xsk_setsockopt,
> +	.getsockopt =	sock_no_getsockopt,
> +	.sendmsg =	sock_no_sendmsg,
> +	.recvmsg =	sock_no_recvmsg,
> +	.mmap =		sock_no_mmap,
> +	.sendpage =	sock_no_sendpage,

Nit: would have been nice to properly align the '='

> +};
> +
> +static void xsk_destruct(struct sock *sk)
> +{
> +	struct xdp_sock *xs = xdp_sk(sk);
> +
> +	if (!sock_flag(sk, SOCK_DEAD))
> +		return;
> +
> +	xdp_put_umem(xs->umem);
> +
> +	sk_refcnt_debug_dec(sk);
> +}
> +
> +static int xsk_create(struct net *net, struct socket *sock, int protocol,
> +		      int kern)
> +{
> +	struct sock *sk;
> +	struct xdp_sock *xs;
> +
> +	if (!ns_capable(net->user_ns, CAP_NET_RAW))
> +		return -EPERM;
> +	if (sock->type != SOCK_RAW)
> +		return -ESOCKTNOSUPPORT;
> +
> +	if (protocol)
> +		return -EPROTONOSUPPORT;
> +
> +	sock->state = SS_UNCONNECTED;
[...]

^ permalink raw reply

* Re: [RFC PATCH 5/5] net: macb: Add WOL support with ARP
From: Claudiu Beznea @ 2018-05-04 12:17 UTC (permalink / raw)
  To: harinikatakamlinux, nicolas.ferre, davem
  Cc: netdev, linux-kernel, harinik, michals, appanad
In-Reply-To: <1521726700-22634-6-git-send-email-harinikatakamlinux@gmail.com>



On 22.03.2018 15:51, harinikatakamlinux@gmail.com wrote:
> From: Harini Katakam <harinik@xilinx.com>
> 
> This patch enables ARP wake event support in GEM through the following:
> 
> -> WOL capability can be selected based on the SoC/GEM IP version rather
> than a devictree property alone. Hence add a new capability property and
> set device as "wakeup capable" in probe in this case.
> -> Wake source selection can be done via ethtool or by enabling wakeup
> in /sys/devices/platform/..../ethx/power/
> This patch adds default wake source as ARP and the existing selection of
> WOL using magic packet remains unchanged.
> -> When GEM is the wake device with ARP as the wake event, the current
> IP address to match is written to WOL register along with other
> necessary confuguration required for MAC to recognize an ARP event.
> -> While RX needs to remain enabled, there is no need to process the
> actual wake packet - hence tie off all RX queues to avoid unnecessary
> processing by DMA in the background. 

Why is this different for magic packet vs ARP packet?

This tie off is done using a
> dummy buffer descriptor with used bit set. (There is no other provision
> to disable RX DMA in the GEM IP version in ZynqMP)
> -> TX is disabled and all interrupts except WOL on Q0 are disabled.
> Clear the WOL interrupt as no other action is required from driver.
> Power management of the SoC will already have got the event and will
> take care of initiating resume.
> -> Upon resume ARP WOL config is cleared and macb is reinitialized.
> 
> Signed-off-by: Harini Katakam <harinik@xilinx.com>
> ---
>  drivers/net/ethernet/cadence/macb.h      |   6 ++
>  drivers/net/ethernet/cadence/macb_main.c | 130 +++++++++++++++++++++++++++++--
>  2 files changed, 131 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
> index 9e7fb14..e18ff34 100644
> --- a/drivers/net/ethernet/cadence/macb.h
> +++ b/drivers/net/ethernet/cadence/macb.h
> @@ -93,6 +93,7 @@
>  #define GEM_SA3T		0x009C /* Specific3 Top */
>  #define GEM_SA4B		0x00A0 /* Specific4 Bottom */
>  #define GEM_SA4T		0x00A4 /* Specific4 Top */
> +#define GEM_WOL			0x00B8 /* Wake on LAN */
>  #define GEM_EFTSH		0x00e8 /* PTP Event Frame Transmitted Seconds Register 47:32 */
>  #define GEM_EFRSH		0x00ec /* PTP Event Frame Received Seconds Register 47:32 */
>  #define GEM_PEFTSH		0x00f0 /* PTP Peer Event Frame Transmitted Seconds Register 47:32 */
> @@ -398,6 +399,8 @@
>  #define MACB_PDRSFT_SIZE	1
>  #define MACB_SRI_OFFSET		26 /* TSU Seconds Register Increment */
>  #define MACB_SRI_SIZE		1
> +#define GEM_WOL_OFFSET		28 /* Enable wake-on-lan interrupt in GEM */
> +#define GEM_WOL_SIZE		1
>  
>  /* Timer increment fields */
>  #define MACB_TI_CNS_OFFSET	0
> @@ -635,6 +638,7 @@
>  #define MACB_CAPS_USRIO_DISABLED		0x00000010
>  #define MACB_CAPS_JUMBO				0x00000020
>  #define MACB_CAPS_GEM_HAS_PTP			0x00000040
> +#define MACB_CAPS_WOL				0x00000080

I think would be better to have this as part of bp->wol and use it properly
in suspend/resume hooks.

>  #define MACB_CAPS_FIFO_MODE			0x10000000
>  #define MACB_CAPS_GIGABIT_MODE_AVAILABLE	0x20000000
>  #define MACB_CAPS_SG_DISABLED			0x40000000
> @@ -1147,6 +1151,8 @@ struct macb {
>  	unsigned int		num_queues;
>  	unsigned int		queue_mask;
>  	struct macb_queue	queues[MACB_MAX_QUEUES];
> +	dma_addr_t		rx_ring_tieoff_dma;
> +	struct macb_dma_desc	*rx_ring_tieoff;
>  
>  	spinlock_t		lock;
>  	struct platform_device	*pdev;
> diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
> index bca91bd..9902654 100644
> --- a/drivers/net/ethernet/cadence/macb_main.c
> +++ b/drivers/net/ethernet/cadence/macb_main.c
> @@ -36,6 +36,7 @@
>  #include <linux/udp.h>
>  #include <linux/tcp.h>
>  #include <linux/pm_runtime.h>
> +#include <linux/inetdevice.h>
>  #include "macb.h"
>  
>  #define MACB_RX_BUFFER_SIZE	128
> @@ -1400,6 +1401,12 @@ static irqreturn_t macb_interrupt(int irq, void *dev_id)
>  	spin_lock(&bp->lock);
>  
>  	while (status) {
> +		if (status & GEM_BIT(WOL)) {
> +			if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE)
> +				queue_writel(queue, ISR, GEM_BIT(WOL));
> +			break;
> +		}
> +
>  		/* close possible race with dev_close */
>  		if (unlikely(!netif_running(dev))) {
>  			queue_writel(queue, IDR, -1);
> @@ -1900,6 +1907,12 @@ static void macb_free_consistent(struct macb *bp)
>  		queue->rx_ring = NULL;
>  	}
>  
> +	if (bp->rx_ring_tieoff) {
> +		dma_free_coherent(&bp->pdev->dev, macb_dma_desc_get_size(bp),
> +				  bp->rx_ring_tieoff, bp->rx_ring_tieoff_dma);
> +		bp->rx_ring_tieoff = NULL;
> +	}
> +
>  	for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
>  		kfree(queue->tx_skb);
>  		queue->tx_skb = NULL;
> @@ -1979,6 +1992,14 @@ static int macb_alloc_consistent(struct macb *bp)
>  			   "Allocated RX ring of %d bytes at %08lx (mapped %p)\n",
>  			   size, (unsigned long)queue->rx_ring_dma, queue->rx_ring);
>  	}
> +	/* Allocate one dummy descriptor to tie off RX queues when required */
> +	bp->rx_ring_tieoff = dma_alloc_coherent(&bp->pdev->dev,
> +						macb_dma_desc_get_size(bp),
> +						&bp->rx_ring_tieoff_dma,
> +						GFP_KERNEL);
> +	if (!bp->rx_ring_tieoff)
> +		goto out_err;
> +
>  	if (bp->macbgem_ops.mog_alloc_rx_buffers(bp))
>  		goto out_err;
>  
> @@ -1989,6 +2010,34 @@ static int macb_alloc_consistent(struct macb *bp)
>  	return -ENOMEM;
>  }
>  
> +static void macb_init_tieoff(struct macb *bp)
> +{
> +	struct macb_dma_desc *d = bp->rx_ring_tieoff;
> +
> +	/* Setup a wrapping descriptor with no free slots
> +	 * (WRAP and USED) to tie off/disable unused RX queues.
> +	 */
> +	macb_set_addr(bp, d, MACB_BIT(RX_WRAP) | MACB_BIT(RX_USED));
> +	d->ctrl = 0;
> +}
> +
> +static inline void macb_rx_tieoff(struct macb *bp)
> +{
> +	struct macb_queue *queue = bp->queues;
> +	unsigned int q;
> +
> +	for (q = 0, queue = bp->queues; q < bp->num_queues;
> +	     ++q, ++queue) {
> +		queue_writel(queue, RBQP,
> +			     lower_32_bits(bp->rx_ring_tieoff_dma));
> +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
> +		if (bp->hw_dma_cap & HW_DMA_CAP_64B)
> +			queue_writel(queue, RBQPH,
> +				     upper_32_bits(bp->rx_ring_tieoff_dma));
> +#endif
> +	}
> +}
> +
>  static void gem_init_rings(struct macb *bp)
>  {
>  	struct macb_queue *queue;
> @@ -2011,6 +2060,7 @@ static void gem_init_rings(struct macb *bp)
>  
>  		gem_rx_refill(queue);
>  	}
> +	macb_init_tieoff(bp);
>  
>  }
>  
> @@ -2653,6 +2703,13 @@ static void macb_get_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
>  		if (bp->wol & MACB_WOL_ENABLED)
>  			wol->wolopts |= WAKE_MAGIC;
>  	}
> +
> +	if (bp->caps & MACB_CAPS_WOL) {
> +		wol->supported = WAKE_ARP;
> +
> +		if (bp->wol & MACB_WOL_ENABLED)
> +			wol->wolopts |= WAKE_ARP;
> +	}
>  }
>  
>  static int macb_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
> @@ -2660,10 +2717,11 @@ static int macb_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
>  	struct macb *bp = netdev_priv(netdev);
>  
>  	if (!(bp->wol & MACB_WOL_HAS_MAGIC_PACKET) ||
> -	    (wol->wolopts & ~WAKE_MAGIC))
> +	    !(bp->caps & MACB_CAPS_WOL) ||
> +	    (wol->wolopts & ~WAKE_MAGIC) || (wol->wolopts & ~WAKE_ARP))
>  		return -EOPNOTSUPP;

So, both flags WAKE_MAGIC and WAKE_ARP needs to be set in order to use
Wake on Lan? Shouldn't this be exclusive? I mean if only one is set to
use that one?

Also, wouldn't be better to have all Wake on LAN capabilities in the same
place? I mean either bp->wol or bp->caps??


>  
> -	if (wol->wolopts & WAKE_MAGIC)
> +	if (wol->wolopts & (WAKE_MAGIC | WAKE_ARP))
>  		bp->wol |= MACB_WOL_ENABLED;
>  	else
>  		bp->wol &= ~MACB_WOL_ENABLED;
> @@ -3895,7 +3953,7 @@ static const struct macb_config np4_config = {
>  static const struct macb_config zynqmp_config = {
>  	.caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE |
>  			MACB_CAPS_JUMBO |
> -			MACB_CAPS_GEM_HAS_PTP,
> +			MACB_CAPS_GEM_HAS_PTP | MACB_CAPS_WOL,
>  	.dma_burst_length = 16,
>  	.clk_init = macb_clk_init,
>  	.init = macb_init,
> @@ -4093,6 +4151,9 @@ static int macb_probe(struct platform_device *pdev)
>  
>  	phy_attached_info(phydev);
>  
> +	if (bp->caps & MACB_CAPS_WOL)
> +		device_set_wakeup_capable(&bp->dev->dev, 1);
> +

I think it is better to have this in bp->wol to keep all the wakeup related
events in the same place.

>  	netdev_info(dev, "Cadence %s rev 0x%08x at 0x%08lx irq %d (%pM)\n",
>  		    macb_is_gem(bp) ? "GEM" : "MACB", macb_readl(bp, MID),
>  		    dev->base_addr, dev->irq, dev->dev_addr);
> @@ -4170,16 +4231,58 @@ static int __maybe_unused macb_suspend(struct device *dev)
>  	struct macb_queue *queue = bp->queues;
>  	unsigned long flags;
>  	unsigned int q;
> +	u32 ctrl, arpipmask;
>  
>  	if (!netif_running(netdev))
>  		return 0;
>  
>  
> -	if (bp->wol & MACB_WOL_ENABLED) {
> +	if ((bp->wol & MACB_WOL_ENABLED) &&
> +	    (bp->wol & MACB_WOL_HAS_MAGIC_PACKET)) {

If you will also introduce the other 2 wakeup supported sources of GEM GXL you
will end up having also a new else and conditioning device_may_wakeup() below
if-else condition with a bp->caps & MACB_CAPS_WOL.

I thinking that having something like this will be simpler:
	if (bp->wol & MACB_WOL_ENABLED) {
		if (bp->wol & MACB_WOL_HAS_MAGIC_PACKET)
			macb_configure_magic_pkt();
		if (bp->wol & MACB_WOL_HAS_ARP_PACKET)
			macb_configure_arp_pkt();
	}

What do you think?

>  		macb_writel(bp, IER, MACB_BIT(WOL));
>  		macb_writel(bp, WOL, MACB_BIT(MAG));
>  		enable_irq_wake(bp->queues[0].irq);
>  		netif_device_detach(netdev);
> +	} else if (device_may_wakeup(&bp->dev->dev)) {
> +		/* Use ARP as default wake source */
> +		spin_lock_irqsave(&bp->lock, flags);
> +		ctrl = macb_readl(bp, NCR);
> +		/* Disable TX as is it not required;
> +		 * Disable RX to change BD pointers and enable again
> +		 */
> +		ctrl &= ~(MACB_BIT(TE) | MACB_BIT(RE));
> +		macb_writel(bp, NCR, ctrl);
> +		/* Tie all RX queues */
> +		macb_rx_tieoff(bp);
> +		ctrl = macb_readl(bp, NCR);
> +		ctrl |= MACB_BIT(RE);
> +		macb_writel(bp, NCR, ctrl);
> +		/* Broadcast should be enabled for ARP wake event */
> +		gem_writel(bp, NCFGR, gem_readl(bp, NCFGR) & ~MACB_BIT(NBC));
> +		macb_writel(bp, TSR, -1);
> +		macb_writel(bp, RSR, -1);
> +		macb_readl(bp, ISR);
> +		if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE)
> +			macb_writel(bp, ISR, -1);
> +
> +		/* Enable WOL (Q0 only) and disable all other interrupts */
> +		queue = bp->queues;
> +		queue_writel(queue, IER, GEM_BIT(WOL));
> +		for (q = 0, queue = bp->queues; q < bp->num_queues;
> +		     ++q, ++queue) {
> +			queue_writel(queue, IDR, MACB_RX_INT_FLAGS |
> +						 MACB_TX_INT_FLAGS |
> +						 MACB_BIT(HRESP));
> +		}
> +
> +		arpipmask = cpu_to_be32p(&bp->dev->ip_ptr->ifa_list->ifa_local)
> +					 & 0xFFFF;
> +		gem_writel(bp, WOL, MACB_BIT(ARP) | arpipmask);
> +		spin_unlock_irqrestore(&bp->lock, flags);
> +		enable_irq_wake(bp->queues[0].irq);
> +		netif_device_detach(netdev);
> +		for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue)
> +			napi_disable(&queue->napi);

Is all this really necessary?
I mean, wouldn't be enough the adaption of previous approach? Looking over the initial
approach we have this:

	if (bp->wol & MACB_WOL_ENABLED) {
		macb_writel(bp, IER, MACB_BIT(WOL));
		macb_writel(bp, WOL, MACB_BIT(MAG));
		enable_irq_wake(bp->queues[0].irq);
		netif_device_detach(netdev);
	}

Wouldn't it work if you will change it in something like this:

	u32 wolmask, arpipmask = 0;

	if (bp->wol & MACB_WOL_ENABLED) {
		macb_writel(bp, IER, MACB_BIT(WOL));

		if (bp->wol & MACB_WOL_HAS_ARP_PACKET) {
			/* Enable broadcast. */
			gem_writel(bp, NCFGR, gem_readl(bp, NCFGR) & ~MACB_BIT(NBC));
			arpipmask = cpu_to_be32p(&bp->dev->ip_ptr->ifa_list->ifa_local) & 0xFFFF;
			wolmask = arpipmask | MACB_BIT(ARP);
		} else {
			wolmask = MACB_BIT(MAG);
		}

		macb_writel(bp, WOL, wolmask);
		enable_irq_wake(bp->queues[0].irq);
		netif_device_detach(netdev);
	}

I cannot find anything particular for ARP WOL events in datasheet. Also, 
I cannot find something related to DMA activity while WOL is active

Thank you,
Claudiu

>  	} else {
>  		netif_device_detach(netdev);
>  		for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue)
> @@ -4206,16 +4309,33 @@ static int __maybe_unused macb_resume(struct device *dev)
>  	struct macb *bp = netdev_priv(netdev);
>  	struct macb_queue *queue = bp->queues;
>  	unsigned int q;
> +	unsigned long flags;
>  
>  	if (!netif_running(netdev))
>  		return 0;
>  
>  	pm_runtime_force_resume(dev);
>  
> -	if (bp->wol & MACB_WOL_ENABLED) {
> +	if ((bp->wol & MACB_WOL_ENABLED) &&
> +	    (bp->wol & MACB_WOL_HAS_MAGIC_PACKET)) {
>  		macb_writel(bp, IDR, MACB_BIT(WOL));
>  		macb_writel(bp, WOL, 0);
>  		disable_irq_wake(bp->queues[0].irq);
> +	} else if (device_may_wakeup(&bp->dev->dev)) {
> +		/* Resume after ARP wake event */
> +		spin_lock_irqsave(&bp->lock, flags);
> +		queue = bp->queues;
> +		queue_writel(queue, IDR, GEM_BIT(WOL));
> +		gem_writel(bp, WOL, 0);
> +		/* Clear Q0 ISR as WOL was enabled on Q0 */
> +		if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE)
> +			macb_writel(bp, ISR, -1);
> +		disable_irq_wake(bp->queues[0].irq);
> +		spin_unlock_irqrestore(&bp->lock, flags);
> +		macb_writel(bp, NCR, MACB_BIT(MPE));
> +		for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue)
> +			napi_enable(&queue->napi);
> +		netif_carrier_on(netdev);
>  	} else {
>  		macb_writel(bp, NCR, MACB_BIT(MPE));
>  		for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue)
> 

^ permalink raw reply

* Re: [PATCH net-next] net: stmmac: Add support for U32 TC filter using Flexible RX Parser
From: kbuild test robot @ 2018-05-04 12:04 UTC (permalink / raw)
  To: Jose Abreu
  Cc: kbuild-all, netdev, Jose Abreu, David S. Miller, Joao Pinto,
	Vitor Soares, Giuseppe Cavallaro, Alexandre Torgue
In-Reply-To: <9f57f98ef35360619001882fdcf0d7ef0558863f.1525351447.git.joabreu@synopsys.com>

Hi Jose,

I love your patch! Perhaps something to improve:

[auto build test WARNING on net-next/master]

url:    https://github.com/0day-ci/linux/commits/Jose-Abreu/net-stmmac-Add-support-for-U32-TC-filter-using-Flexible-RX-Parser/20180504-164205
reproduce:
        # apt-get install sparse
        make ARCH=x86_64 allmodconfig
        make C=1 CF=-D__CHECK_ENDIAN__


sparse warnings: (new ones prefixed by >>)

>> drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c:104:14: sparse: incorrect type in assignment (different base types) @@    expected unsigned int [unsigned] [usertype] data @@    got ed int [unsigned] [usertype] data @@
   drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c:104:14:    expected unsigned int [unsigned] [usertype] data
   drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c:104:14:    got restricted __be32 [usertype] val
>> drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c:105:14: sparse: incorrect type in assignment (different base types) @@    expected unsigned int [unsigned] [usertype] mask @@    got ed int [unsigned] [usertype] mask @@
   drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c:105:14:    expected unsigned int [unsigned] [usertype] mask
   drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c:105:14:    got restricted __be32 [usertype] mask

vim +104 drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c

    89	
    90	static int tc_fill_entry(struct stmmac_priv *priv,
    91			struct tc_cls_u32_offload *cls)
    92	{
    93		struct stmmac_tc_entry *entry, *frag = NULL;
    94		struct tc_u32_sel *sel = cls->knode.sel;
    95		u32 off, data, mask, real_off, rem;
    96		u32 prio = cls->common.prio;
    97		int ret;
    98	
    99		/* Only 1 match per entry */
   100		if (sel->nkeys <= 0 || sel->nkeys > 1)
   101			return -EINVAL;
   102	
   103		off = sel->keys[0].off << sel->offshift;
 > 104		data = sel->keys[0].val;
 > 105		mask = sel->keys[0].mask;
   106	
   107		switch (ntohs(cls->common.protocol)) {
   108		case ETH_P_ALL:
   109			break;
   110		case ETH_P_IP:
   111			off += ETH_HLEN;
   112			break;
   113		default:
   114			return -EINVAL;
   115		}
   116	
   117		if (off > priv->tc_off_max)
   118			return -EINVAL;
   119	
   120		real_off = off / 4;
   121		rem = off % 4;
   122	
   123		entry = tc_find_entry(priv, cls, true);
   124		if (!entry)
   125			return -EINVAL;
   126	
   127		if (rem) {
   128			frag = tc_find_entry(priv, cls, true);
   129			if (!frag) {
   130				ret = -EINVAL;
   131				goto err_unuse;
   132			}
   133	
   134			entry->frag_ptr = frag;
   135			entry->val.match_en = (mask << (rem * 8)) &
   136				GENMASK(31, rem * 8);
   137			entry->val.match_data = (data << (rem * 8)) &
   138				GENMASK(31, rem * 8);
   139			entry->val.frame_offset = real_off;
   140			entry->prio = prio;
   141	
   142			frag->val.match_en = (mask >> (rem * 8)) &
   143				GENMASK(rem * 8 - 1, 0);
   144			frag->val.match_data = (data >> (rem * 8)) &
   145				GENMASK(rem * 8 - 1, 0);
   146			frag->val.frame_offset = real_off + 1;
   147			frag->prio = prio;
   148			frag->is_frag = true;
   149		} else {
   150			entry->frag_ptr = NULL;
   151			entry->val.match_en = mask;
   152			entry->val.match_data = data;
   153			entry->val.frame_offset = real_off;
   154			entry->prio = prio;
   155		}
   156	
   157		ret = tc_fill_actions(entry, frag, cls);
   158		if (ret)
   159			goto err_unuse;
   160	
   161		return 0;
   162	
   163	err_unuse:
   164		if (frag)
   165			frag->in_use = false;
   166		entry->in_use = false;
   167		return ret;
   168	}
   169	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

^ permalink raw reply

* Re: [PATCH bpf-next v3 00/15] Introducing AF_XDP support
From: Magnus Karlsson @ 2018-05-04 11:22 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Daniel Borkmann, Björn Töpel, Karlsson, Magnus,
	Alexander Duyck, Alexander Duyck, John Fastabend,
	Alexei Starovoitov, Jesper Dangaard Brouer, Willem de Bruijn,
	Michael S. Tsirkin, Network Development, Björn Töpel,
	michael.lundkvist, Brandeburg, Jesse, Singhai, Anjali,
	Zhang, Qi Z
In-Reply-To: <20180503233819.2tplfjd32auudys2@ast-mbp>

On Fri, May 4, 2018 at 1:38 AM, Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
> On Fri, May 04, 2018 at 12:49:09AM +0200, Daniel Borkmann wrote:
>> On 05/02/2018 01:01 PM, Björn Töpel wrote:
>> > From: Björn Töpel <bjorn.topel@intel.com>
>> >
>> > This patch set introduces a new address family called AF_XDP that is
>> > optimized for high performance packet processing and, in upcoming
>> > patch sets, zero-copy semantics. In this patch set, we have removed
>> > all zero-copy related code in order to make it smaller, simpler and
>> > hopefully more review friendly. This patch set only supports copy-mode
>> > for the generic XDP path (XDP_SKB) for both RX and TX and copy-mode
>> > for RX using the XDP_DRV path. Zero-copy support requires XDP and
>> > driver changes that Jesper Dangaard Brouer is working on. Some of his
>> > work has already been accepted. We will publish our zero-copy support
>> > for RX and TX on top of his patch sets at a later point in time.
>>
>> +1, would be great to see it land this cycle. Saw few minor nits here
>> and there but nothing to hold it up, for the series:
>>
>> Acked-by: Daniel Borkmann <daniel@iogearbox.net>
>>
>> Thanks everyone!
>
> Great stuff!
>
> Applied to bpf-next, with one condition.
> Upcoming zero-copy patches for both RX and TX need to be posted
> and reviewed within this release window.
> If netdev community as a whole won't be able to agree on the zero-copy
> bits we'd need to revert this feature before the next merge window.

Thanks everyone for reviewing this. Highly appreciated.

Just so we understand the purpose correctly:

1: Do you want to see the ZC patches in order to verify that the user
space API holds? If so, we can produce an additional RFC  patch set
using a big chunk of code that we had in RFC V1. We are not proud of
this code since it is clunky, but it hopefully proves the point with
the uapi being the same.

2: And/Or are you worried about us all (the netdev community) not
agreeing on a way to implement ZC internally in the drivers and the
XDP infrastructure? This is not going to be possible to finish during
this cycle since we do not like the implementation we had in RFC V1.
Too intrusive and now we also have nicer abstractions from Jesper that
we can use and extend to provide a (hopefully) much cleaner and less
intrusive solution.

Just so that we focus on the right proof points.

> Few other minor nits:
> patch 3:
> +struct xdp_ring {
> +       __u32 producer __attribute__((aligned(64)));
> +       __u32 consumer __attribute__((aligned(64)));
> +};
> It kinda begs for ____cacheline_aligned_in_smp to be introduced for uapi headers.

Agreed.

> patch 5:
> +struct sockaddr_xdp {
> +       __u16 sxdp_family;
> +       __u32 sxdp_ifindex;
> Not great to have a hole in uapi struct. Please fix it in the follow up.

You are correct. Will fix.

> patch 7:
> Has a lot of synchronize_net(). I think udpate/delete side
> can be improved to avoid them. Otherwise users may unknowingly DoS.

OK. Could you please elaborate on what kind of DoS attacks can be
performed with this, so we can come up with the right solution here?

Thanks: Magnus

> As the next steps I suggest to prioritize the highest to ship
> zero-copy rx/tx patches and to add selftests.
>
> Thanks!
>

^ permalink raw reply

* Re: [PATCH] bpf: fix possible spectre-v1 in find_and_alloc_map()
From: Mark Rutland @ 2018-05-04 10:49 UTC (permalink / raw)
  To: Daniel Borkmann
  Cc: linux-kernel, Alexei Starovoitov, Dan Carpenter, Peter Zijlstra,
	netdev
In-Reply-To: <28363c0d-0b78-681c-d9ea-908671b0067e@iogearbox.net>

On Fri, May 04, 2018 at 02:16:31AM +0200, Daniel Borkmann wrote:
> On 05/03/2018 06:04 PM, Mark Rutland wrote:
> > It's possible for userspace to control attr->map_type. Sanitize it when
> > using it as an array index to prevent an out-of-bounds value being used
> > under speculation.
> > 
> > Found by smatch.
> > 
> > Signed-off-by: Mark Rutland <mark.rutland@arm.com>
> > Cc: Alexei Starovoitov <ast@kernel.org>
> > Cc: Dan Carpenter <dan.carpenter@oracle.com>
> > Cc: Daniel Borkmann <daniel@iogearbox.net>
> > Cc: Peter Zijlstra <peterz@infradead.org>
> > Cc: netdev@vger.kernel.org
> 
> Applied to bpf tree, thanks Mark!

Cheers!

> I've also just submitted one for BPF progs
> (http://patchwork.ozlabs.org/patch/908385/) which is same situation.

That looks good to me. That case doesn't show up in my smatch results so
far, but I might just need a few more build iterations.

Thanks,
Mark.

^ permalink raw reply

* Re: [PATCH bpf-next v4 3/4] bpf: selftest additions for SOCKHASH
From: Quentin Monnet @ 2018-05-04 10:11 UTC (permalink / raw)
  To: John Fastabend, borkmann, ast; +Cc: netdev
In-Reply-To: <1525372108-8690-4-git-send-email-john.fastabend@gmail.com>

2018-05-03 11:28:27 UTC-0700 ~ John Fastabend <john.fastabend@gmail.com>
> This runs existing SOCKMAP tests with SOCKHASH map type. To do this
> we push programs into include file and build two BPF programs. One
> for SOCKHASH and one for SOCKMAP.
> 
> We then run the entire test suite with each type.
> 
> Signed-off-by: John Fastabend <john.fastabend@gmail.com>
> ---
>  tools/include/uapi/linux/bpf.h                     |  6 ++++-
>  tools/testing/selftests/bpf/Makefile               |  3 ++-
>  tools/testing/selftests/bpf/test_sockhash_kern.c   |  4 ++++
>  tools/testing/selftests/bpf/test_sockmap.c         | 27 ++++++++++++++++------
>  .../{test_sockmap_kern.c => test_sockmap_kern.h}   | 10 ++++----
>  5 files changed, 36 insertions(+), 14 deletions(-)
>  create mode 100644 tools/testing/selftests/bpf/test_sockhash_kern.c
>  rename tools/testing/selftests/bpf/{test_sockmap_kern.c => test_sockmap_kern.h} (97%)
> 
> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> index da77a93..5cb983d 100644
> --- a/tools/include/uapi/linux/bpf.h
> +++ b/tools/include/uapi/linux/bpf.h
> @@ -116,6 +116,7 @@ enum bpf_map_type {
>  	BPF_MAP_TYPE_DEVMAP,
>  	BPF_MAP_TYPE_SOCKMAP,
>  	BPF_MAP_TYPE_CPUMAP,
> +	BPF_MAP_TYPE_SOCKHASH,
>  };
>  
>  enum bpf_prog_type {
> @@ -1835,7 +1836,10 @@ struct bpf_stack_build_id {
>  	FN(msg_pull_data),		\
>  	FN(bind),			\
>  	FN(xdp_adjust_tail),		\
> -	FN(skb_get_xfrm_state),
> +	FN(skb_get_xfrm_state),		\
> +	FN(sock_hash_update),		\
> +	FN(msg_redirect_hash),		\
> +	FN(sk_redirect_hash),
>  
>  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
>   * function eBPF program intends to call

Thanks for documenting the helpers in include/uapi/linux/bpf.h! However
the doc is missing in the update to the bpf.h file under tools/ in this
patch, could you please fix it?

Best regards,
Quentin

^ permalink raw reply

* [PATCH REPOST] xen/9pfs: don't inclide rwlock.h directly.
From: Sebastian Andrzej Siewior @ 2018-05-04 10:03 UTC (permalink / raw)
  To: netdev
  Cc: tglx, Sebastian Andrzej Siewior, Eric Van Hensbergen, Ron Minnich,
	Latchesar Ionkov, David S. Miller, v9fs-developer

rwlock.h should not be included directly. Instead linux/splinlock.h
should be included. One thing it does is to break the RT build.

Cc: Eric Van Hensbergen <ericvh@gmail.com>
Cc: Ron Minnich <rminnich@sandia.gov>
Cc: Latchesar Ionkov <lucho@ionkov.net>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: v9fs-developer@lists.sourceforge.net
Cc: netdev@vger.kernel.org
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
 net/9p/trans_xen.c |    1 -
 1 file changed, 1 deletion(-)

--- a/net/9p/trans_xen.c
+++ b/net/9p/trans_xen.c
@@ -38,7 +38,6 @@
 
 #include <linux/module.h>
 #include <linux/spinlock.h>
-#include <linux/rwlock.h>
 #include <net/9p/9p.h>
 #include <net/9p/client.h>
 #include <net/9p/transport.h>

^ permalink raw reply

* [PATCH net-next v2] net: core: rework basic flow dissection helper
From: Paolo Abeni @ 2018-05-04  9:32 UTC (permalink / raw)
  To: netdev; +Cc: David S. Miller, Eric Dumazet, Jason Wang

When the core networking needs to detect the transport offset in a given
packet and parse it explicitly, a full-blown flow_keys struct is used for
storage.
This patch introduces a smaller keys store, rework the basic flow dissect
helper to use it, and apply this new helper where possible - namely in
skb_probe_transport_header(). The used flow dissector data structures
are renamed to match more closely the new role.

The above gives ~50% performance improvement in micro benchmarking around
skb_probe_transport_header() and ~30% around eth_get_headlen(), mostly due
to the smaller memset. Small, but measurable improvement is measured also
in macro benchmarking.

v1 -> v2: use the new helper in eth_get_headlen() and skb_get_poff(),
  as per DaveM suggestion

Suggested-by: David Miller <davem@davemloft.net>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/skbuff.h       | 18 ++++++++++--------
 include/net/flow_dissector.h |  7 ++++++-
 net/core/flow_dissector.c    | 17 +++++++++--------
 net/ethernet/eth.c           |  6 +++---
 4 files changed, 28 insertions(+), 20 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 908d66e55b14..693564a9a979 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1171,7 +1171,7 @@ void __skb_get_hash(struct sk_buff *skb);
 u32 __skb_get_hash_symmetric(const struct sk_buff *skb);
 u32 skb_get_poff(const struct sk_buff *skb);
 u32 __skb_get_poff(const struct sk_buff *skb, void *data,
-		   const struct flow_keys *keys, int hlen);
+		   const struct flow_keys_basic *keys, int hlen);
 __be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto,
 			    void *data, int hlen_proto);
 
@@ -1208,13 +1208,14 @@ static inline bool skb_flow_dissect_flow_keys(const struct sk_buff *skb,
 				  NULL, 0, 0, 0, flags);
 }
 
-static inline bool skb_flow_dissect_flow_keys_buf(struct flow_keys *flow,
-						  void *data, __be16 proto,
-						  int nhoff, int hlen,
-						  unsigned int flags)
+static inline bool
+skb_flow_dissect_flow_keys_basic(const struct sk_buff *skb,
+				 struct flow_keys_basic *flow, void *data,
+				 __be16 proto, int nhoff, int hlen,
+				 unsigned int flags)
 {
 	memset(flow, 0, sizeof(*flow));
-	return __skb_flow_dissect(NULL, &flow_keys_buf_dissector, flow,
+	return __skb_flow_dissect(skb, &flow_keys_basic_dissector, flow,
 				  data, proto, nhoff, hlen, flags);
 }
 
@@ -2350,11 +2351,12 @@ static inline void skb_pop_mac_header(struct sk_buff *skb)
 static inline void skb_probe_transport_header(struct sk_buff *skb,
 					      const int offset_hint)
 {
-	struct flow_keys keys;
+	struct flow_keys_basic keys;
 
 	if (skb_transport_header_was_set(skb))
 		return;
-	else if (skb_flow_dissect_flow_keys(skb, &keys, 0))
+
+	if (skb_flow_dissect_flow_keys_basic(skb, &keys, 0, 0, 0, 0, 0))
 		skb_set_transport_header(skb, keys.control.thoff);
 	else
 		skb_set_transport_header(skb, offset_hint);
diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index 9a074776f70b..e2f6e5c928bb 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -226,6 +226,11 @@ struct flow_dissector {
 	unsigned short int offset[FLOW_DISSECTOR_KEY_MAX];
 };
 
+struct flow_keys_basic {
+	struct flow_dissector_key_control control;
+	struct flow_dissector_key_basic basic;
+};
+
 struct flow_keys {
 	struct flow_dissector_key_control control;
 #define FLOW_KEYS_HASH_START_FIELD basic
@@ -244,7 +249,7 @@ __be32 flow_get_u32_src(const struct flow_keys *flow);
 __be32 flow_get_u32_dst(const struct flow_keys *flow);
 
 extern struct flow_dissector flow_keys_dissector;
-extern struct flow_dissector flow_keys_buf_dissector;
+extern struct flow_dissector flow_keys_basic_dissector;
 
 /* struct flow_keys_digest:
  *
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index d29f09bc5ff9..030d4ca177fb 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -1253,7 +1253,7 @@ __u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb)
 EXPORT_SYMBOL(skb_get_hash_perturb);
 
 u32 __skb_get_poff(const struct sk_buff *skb, void *data,
-		   const struct flow_keys *keys, int hlen)
+		   const struct flow_keys_basic *keys, int hlen)
 {
 	u32 poff = keys->control.thoff;
 
@@ -1314,9 +1314,9 @@ u32 __skb_get_poff(const struct sk_buff *skb, void *data,
  */
 u32 skb_get_poff(const struct sk_buff *skb)
 {
-	struct flow_keys keys;
+	struct flow_keys_basic keys;
 
-	if (!skb_flow_dissect_flow_keys(skb, &keys, 0))
+	if (!skb_flow_dissect_flow_keys_basic(skb, &keys, 0, 0, 0, 0, 0))
 		return 0;
 
 	return __skb_get_poff(skb, skb->data, &keys, skb_headlen(skb));
@@ -1403,7 +1403,7 @@ static const struct flow_dissector_key flow_keys_dissector_symmetric_keys[] = {
 	},
 };
 
-static const struct flow_dissector_key flow_keys_buf_dissector_keys[] = {
+static const struct flow_dissector_key flow_keys_basic_dissector_keys[] = {
 	{
 		.key_id = FLOW_DISSECTOR_KEY_CONTROL,
 		.offset = offsetof(struct flow_keys, control),
@@ -1417,7 +1417,8 @@ static const struct flow_dissector_key flow_keys_buf_dissector_keys[] = {
 struct flow_dissector flow_keys_dissector __read_mostly;
 EXPORT_SYMBOL(flow_keys_dissector);
 
-struct flow_dissector flow_keys_buf_dissector __read_mostly;
+struct flow_dissector flow_keys_basic_dissector __read_mostly;
+EXPORT_SYMBOL(flow_keys_basic_dissector);
 
 static int __init init_default_flow_dissectors(void)
 {
@@ -1427,9 +1428,9 @@ static int __init init_default_flow_dissectors(void)
 	skb_flow_dissector_init(&flow_keys_dissector_symmetric,
 				flow_keys_dissector_symmetric_keys,
 				ARRAY_SIZE(flow_keys_dissector_symmetric_keys));
-	skb_flow_dissector_init(&flow_keys_buf_dissector,
-				flow_keys_buf_dissector_keys,
-				ARRAY_SIZE(flow_keys_buf_dissector_keys));
+	skb_flow_dissector_init(&flow_keys_basic_dissector,
+				flow_keys_basic_dissector_keys,
+				ARRAY_SIZE(flow_keys_basic_dissector_keys));
 	return 0;
 }
 
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index eaeba9b99a73..ee28440f57c5 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -128,15 +128,15 @@ u32 eth_get_headlen(void *data, unsigned int len)
 {
 	const unsigned int flags = FLOW_DISSECTOR_F_PARSE_1ST_FRAG;
 	const struct ethhdr *eth = (const struct ethhdr *)data;
-	struct flow_keys keys;
+	struct flow_keys_basic keys;
 
 	/* this should never happen, but better safe than sorry */
 	if (unlikely(len < sizeof(*eth)))
 		return len;
 
 	/* parse any remaining L2/L3 headers, check for L4 */
-	if (!skb_flow_dissect_flow_keys_buf(&keys, data, eth->h_proto,
-					    sizeof(*eth), len, flags))
+	if (!skb_flow_dissect_flow_keys_basic(NULL, &keys, data, eth->h_proto,
+					      sizeof(*eth), len, flags))
 		return max_t(u32, keys.control.thoff, sizeof(*eth));
 
 	/* parse for any L4 headers */
-- 
2.14.3

^ permalink raw reply related

* Re: [PATCH ipsec-next] xfrm: use a dedicated slab cache for struct xfrm_state
From: Steffen Klassert @ 2018-05-04  9:31 UTC (permalink / raw)
  To: Mathias Krause; +Cc: Herbert Xu, David S. Miller, netdev
In-Reply-To: <1525337707-5281-1-git-send-email-minipli@googlemail.com>

On Thu, May 03, 2018 at 10:55:07AM +0200, Mathias Krause wrote:
> struct xfrm_state is rather large (768 bytes here) and therefore wastes
> quite a lot of memory as it falls into the kmalloc-1024 slab cache,
> leaving 256 bytes of unused memory per XFRM state object -- a net waste
> of 25%.
> 
> Using a dedicated slab cache for struct xfrm_state reduces the level of
> internal fragmentation to a minimum.
> 
> On my configuration SLUB chooses to create a slab cache covering 4
> pages holding 21 objects, resulting in an average memory waste of ~13
> bytes per object -- a net waste of only 1.6%.
> 
> In my tests this led to memory savings of roughly 2.3MB for 10k XFRM
> states.
> 
> Signed-off-by: Mathias Krause <minipli@googlemail.com>

Applied, thanks Mathias!

^ permalink raw reply

* [PATCH] selftests: net: add udpgso* to TEST_GEN_FILES
From: Anders Roxell @ 2018-05-04  9:17 UTC (permalink / raw)
  To: davem, shuah; +Cc: netdev, linux-kselftest, linux-kernel, Anders Roxell

The generated files udpgso* shouldn't be part of TEST_PROGS, they are
used by udpgso.sh and udpgsp_bench.sh. They should be added to the
TEST_GEN_FILES to get installed without being added to the main
run_kselftest.sh script.

Fixes: 3a687bef148d ("selftests: udp gso benchmark")
Signed-off-by: Anders Roxell <anders.roxell@linaro.org>
---
 tools/testing/selftests/net/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 44895de1a0c4..f0363387ef2f 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -11,9 +11,9 @@ TEST_GEN_PROGS_EXTENDED := in_netns.sh
 TEST_GEN_FILES =  socket
 TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy
 TEST_GEN_FILES += tcp_mmap tcp_inq
+TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx
 TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
 TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict
-TEST_GEN_PROGS += udpgso udpgso_bench_tx udpgso_bench_rx
 
 include ../lib.mk
 
-- 
2.11.0

^ permalink raw reply related

* [PATCH net] sctp: delay the authentication for the duplicated cookie-echo chunk
From: Xin Long @ 2018-05-04  9:05 UTC (permalink / raw)
  To: network dev, linux-sctp; +Cc: davem, Marcelo Ricardo Leitner, Neil Horman

Now sctp only delays the authentication for the normal cookie-echo
chunk by setting chunk->auth_chunk in sctp_endpoint_bh_rcv(). But
for the duplicated one with auth, in sctp_assoc_bh_rcv(), it does
authentication first based on the old asoc, which will definitely
fail due to the different auth info in the old asoc.

The duplicated cookie-echo chunk will create a new asoc with the
auth info from this chunk, and the authentication should also be
done with the new asoc's auth info for all of the collision 'A',
'B' and 'D'. Otherwise, the duplicated cookie-echo chunk with auth
will never pass the authentication and create the new connection.

This issue exists since very beginning, and this fix is to make
sctp_assoc_bh_rcv() follow the way sctp_assoc_bh_rcv() does for
the normal cookie-echo chunk to delay the authentication.

While at it, remove the unused params from sctp_sf_authenticate()
and define sctp_auth_chunk_verify() used for all the places that
do the delayed authentication.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
---
 net/sctp/associola.c    | 30 ++++++++++++++++-
 net/sctp/sm_statefuns.c | 86 ++++++++++++++++++++++++++-----------------------
 2 files changed, 75 insertions(+), 41 deletions(-)

diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 837806d..a47179d 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1024,8 +1024,9 @@ static void sctp_assoc_bh_rcv(struct work_struct *work)
 	struct sctp_endpoint *ep;
 	struct sctp_chunk *chunk;
 	struct sctp_inq *inqueue;
-	int state;
+	int first_time = 1;	/* is this the first time through the loop */
 	int error = 0;
+	int state;
 
 	/* The association should be held so we should be safe. */
 	ep = asoc->ep;
@@ -1036,6 +1037,30 @@ static void sctp_assoc_bh_rcv(struct work_struct *work)
 		state = asoc->state;
 		subtype = SCTP_ST_CHUNK(chunk->chunk_hdr->type);
 
+		/* If the first chunk in the packet is AUTH, do special
+		 * processing specified in Section 6.3 of SCTP-AUTH spec
+		 */
+		if (first_time && subtype.chunk == SCTP_CID_AUTH) {
+			struct sctp_chunkhdr *next_hdr;
+
+			next_hdr = sctp_inq_peek(inqueue);
+			if (!next_hdr)
+				goto normal;
+
+			/* If the next chunk is COOKIE-ECHO, skip the AUTH
+			 * chunk while saving a pointer to it so we can do
+			 * Authentication later (during cookie-echo
+			 * processing).
+			 */
+			if (next_hdr->type == SCTP_CID_COOKIE_ECHO) {
+				chunk->auth_chunk = skb_clone(chunk->skb,
+							      GFP_ATOMIC);
+				chunk->auth = 1;
+				continue;
+			}
+		}
+
+normal:
 		/* SCTP-AUTH, Section 6.3:
 		 *    The receiver has a list of chunk types which it expects
 		 *    to be received only after an AUTH-chunk.  This list has
@@ -1074,6 +1099,9 @@ static void sctp_assoc_bh_rcv(struct work_struct *work)
 		/* If there is an error on chunk, discard this packet. */
 		if (error && chunk)
 			chunk->pdiscard = 1;
+
+		if (first_time)
+			first_time = 0;
 	}
 	sctp_association_put(asoc);
 }
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 28c070e..c9ae340 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -153,10 +153,7 @@ static enum sctp_disposition sctp_sf_violation_chunk(
 					struct sctp_cmd_seq *commands);
 
 static enum sctp_ierror sctp_sf_authenticate(
-					struct net *net,
-					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const union sctp_subtype type,
 					struct sctp_chunk *chunk);
 
 static enum sctp_disposition __sctp_sf_do_9_1_abort(
@@ -626,6 +623,38 @@ enum sctp_disposition sctp_sf_do_5_1C_ack(struct net *net,
 	return SCTP_DISPOSITION_CONSUME;
 }
 
+static bool sctp_auth_chunk_verify(struct net *net, struct sctp_chunk *chunk,
+				   const struct sctp_association *asoc)
+{
+	struct sctp_chunk auth;
+
+	if (!chunk->auth_chunk)
+		return true;
+
+	/* SCTP-AUTH:  auth_chunk pointer is only set when the cookie-echo
+	 * is supposed to be authenticated and we have to do delayed
+	 * authentication.  We've just recreated the association using
+	 * the information in the cookie and now it's much easier to
+	 * do the authentication.
+	 */
+
+	/* Make sure that we and the peer are AUTH capable */
+	if (!net->sctp.auth_enable || !asoc->peer.auth_capable)
+		return false;
+
+	/* set-up our fake chunk so that we can process it */
+	auth.skb = chunk->auth_chunk;
+	auth.asoc = chunk->asoc;
+	auth.sctp_hdr = chunk->sctp_hdr;
+	auth.chunk_hdr = (struct sctp_chunkhdr *)
+				skb_push(chunk->auth_chunk,
+					 sizeof(struct sctp_chunkhdr));
+	skb_pull(chunk->auth_chunk, sizeof(struct sctp_chunkhdr));
+	auth.transport = chunk->transport;
+
+	return sctp_sf_authenticate(asoc, &auth) == SCTP_IERROR_NO_ERROR;
+}
+
 /*
  * Respond to a normal COOKIE ECHO chunk.
  * We are the side that is being asked for an association.
@@ -763,37 +792,9 @@ enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net,
 	if (error)
 		goto nomem_init;
 
-	/* SCTP-AUTH:  auth_chunk pointer is only set when the cookie-echo
-	 * is supposed to be authenticated and we have to do delayed
-	 * authentication.  We've just recreated the association using
-	 * the information in the cookie and now it's much easier to
-	 * do the authentication.
-	 */
-	if (chunk->auth_chunk) {
-		struct sctp_chunk auth;
-		enum sctp_ierror ret;
-
-		/* Make sure that we and the peer are AUTH capable */
-		if (!net->sctp.auth_enable || !new_asoc->peer.auth_capable) {
-			sctp_association_free(new_asoc);
-			return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
-		}
-
-		/* set-up our fake chunk so that we can process it */
-		auth.skb = chunk->auth_chunk;
-		auth.asoc = chunk->asoc;
-		auth.sctp_hdr = chunk->sctp_hdr;
-		auth.chunk_hdr = (struct sctp_chunkhdr *)
-					skb_push(chunk->auth_chunk,
-						 sizeof(struct sctp_chunkhdr));
-		skb_pull(chunk->auth_chunk, sizeof(struct sctp_chunkhdr));
-		auth.transport = chunk->transport;
-
-		ret = sctp_sf_authenticate(net, ep, new_asoc, type, &auth);
-		if (ret != SCTP_IERROR_NO_ERROR) {
-			sctp_association_free(new_asoc);
-			return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
-		}
+	if (!sctp_auth_chunk_verify(net, chunk, new_asoc)) {
+		sctp_association_free(new_asoc);
+		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 	}
 
 	repl = sctp_make_cookie_ack(new_asoc, chunk);
@@ -1797,13 +1798,15 @@ static enum sctp_disposition sctp_sf_do_dupcook_a(
 	if (sctp_auth_asoc_init_active_key(new_asoc, GFP_ATOMIC))
 		goto nomem;
 
+	if (!sctp_auth_chunk_verify(net, chunk, new_asoc))
+		return SCTP_DISPOSITION_DISCARD;
+
 	/* Make sure no new addresses are being added during the
 	 * restart.  Though this is a pretty complicated attack
 	 * since you'd have to get inside the cookie.
 	 */
-	if (!sctp_sf_check_restart_addrs(new_asoc, asoc, chunk, commands)) {
+	if (!sctp_sf_check_restart_addrs(new_asoc, asoc, chunk, commands))
 		return SCTP_DISPOSITION_CONSUME;
-	}
 
 	/* If the endpoint is in the SHUTDOWN-ACK-SENT state and recognizes
 	 * the peer has restarted (Action A), it MUST NOT setup a new
@@ -1912,6 +1915,9 @@ static enum sctp_disposition sctp_sf_do_dupcook_b(
 	if (sctp_auth_asoc_init_active_key(new_asoc, GFP_ATOMIC))
 		goto nomem;
 
+	if (!sctp_auth_chunk_verify(net, chunk, new_asoc))
+		return SCTP_DISPOSITION_DISCARD;
+
 	/* Update the content of current association.  */
 	sctp_add_cmd_sf(commands, SCTP_CMD_UPDATE_ASSOC, SCTP_ASOC(new_asoc));
 	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
@@ -2009,6 +2015,9 @@ static enum sctp_disposition sctp_sf_do_dupcook_d(
 	 * a COOKIE ACK.
 	 */
 
+	if (!sctp_auth_chunk_verify(net, chunk, asoc))
+		return SCTP_DISPOSITION_DISCARD;
+
 	/* Don't accidentally move back into established state. */
 	if (asoc->state < SCTP_STATE_ESTABLISHED) {
 		sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
@@ -4171,10 +4180,7 @@ enum sctp_disposition sctp_sf_eat_fwd_tsn_fast(
  * The return value is the disposition of the chunk.
  */
 static enum sctp_ierror sctp_sf_authenticate(
-					struct net *net,
-					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const union sctp_subtype type,
 					struct sctp_chunk *chunk)
 {
 	struct sctp_shared_key *sh_key = NULL;
@@ -4275,7 +4281,7 @@ enum sctp_disposition sctp_sf_eat_auth(struct net *net,
 						  commands);
 
 	auth_hdr = (struct sctp_authhdr *)chunk->skb->data;
-	error = sctp_sf_authenticate(net, ep, asoc, type, chunk);
+	error = sctp_sf_authenticate(asoc, chunk);
 	switch (error) {
 	case SCTP_IERROR_AUTH_BAD_HMAC:
 		/* Generate the ERROR chunk and discard the rest
-- 
2.1.0

^ permalink raw reply related

* [PATCH v2 net-next] net: stmmac: Add support for U32 TC filter using Flexible RX Parser
From: Jose Abreu @ 2018-05-04  9:01 UTC (permalink / raw)
  To: netdev
  Cc: Jose Abreu, David S. Miller, Joao Pinto, Vitor Soares,
	Giuseppe Cavallaro, Alexandre Torgue, Jakub Kicinski

This adds support for U32 filter by using an HW only feature called
Flexible RX Parser. This allow us to match any given packet field with a
pattern and accept/reject or even route the packet to a specific DMA
channel.

Right now we only support acception or rejection of frame and we only
support simple rules. Though, the Parser has the flexibility of jumping to
specific rules as an if condition so complex rules can be established.

This is only supported in GMAC5.10+.

The following commands can be used to test this code:

	1) Setup an ingress qdisk:
	# tc qdisc add dev eth0 handle ffff: ingress

	2) Setup a filter (e.g. filter by IP):
	# tc filter add dev eth0 parent ffff: protocol ip u32 match ip \
		src 192.168.0.3 skip_sw action drop

In every tests performed we always used the "skip_sw" flag to make sure
only the RX Parser was involved.

Signed-off-by: Jose Abreu <joabreu@synopsys.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Joao Pinto <jpinto@synopsys.com>
Cc: Vitor Soares <soares@synopsys.com>
Cc: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Cc: Alexandre Torgue <alexandre.torgue@st.com>
Cc: Jakub Kicinski <kubakici@wp.pl>
---
Changes from v1:
	- Follow Linux network coding style (David)
	- Use tc_cls_can_offload_and_chain0() (Jakub)
---
 drivers/net/ethernet/stmicro/stmmac/Makefile      |    2 +-
 drivers/net/ethernet/stmicro/stmmac/common.h      |    5 +
 drivers/net/ethernet/stmicro/stmmac/dwmac4.h      |    4 +
 drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c |    1 +
 drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c  |    3 +
 drivers/net/ethernet/stmicro/stmmac/dwmac5.c      |  195 ++++++++++++++
 drivers/net/ethernet/stmicro/stmmac/dwmac5.h      |   13 +
 drivers/net/ethernet/stmicro/stmmac/hwif.c        |    8 +
 drivers/net/ethernet/stmicro/stmmac/hwif.h        |   25 ++-
 drivers/net/ethernet/stmicro/stmmac/stmmac.h      |   29 ++
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c |   59 ++++
 drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c   |  295 +++++++++++++++++++++
 12 files changed, 636 insertions(+), 3 deletions(-)
 create mode 100644 drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c

diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile
index e3b578b..68e9e26 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Makefile
+++ b/drivers/net/ethernet/stmicro/stmmac/Makefile
@@ -5,7 +5,7 @@ stmmac-objs:= stmmac_main.o stmmac_ethtool.o stmmac_mdio.o ring_mode.o	\
 	      dwmac100_core.o dwmac100_dma.o enh_desc.o norm_desc.o	\
 	      mmc_core.o stmmac_hwtstamp.o stmmac_ptp.o dwmac4_descs.o	\
 	      dwmac4_dma.o dwmac4_lib.o dwmac4_core.o dwmac5.o hwif.o \
-	      $(stmmac-y)
+	      stmmac_tc.o $(stmmac-y)
 
 # Ordering matters. Generic driver must be last.
 obj-$(CONFIG_STMMAC_PLATFORM)	+= stmmac-platform.o
diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index 627e905..a679cb7 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -353,6 +353,10 @@ struct dma_features {
 	unsigned int rx_fifo_size;
 	/* Automotive Safety Package */
 	unsigned int asp;
+	/* RX Parser */
+	unsigned int frpsel;
+	unsigned int frpbs;
+	unsigned int frpes;
 };
 
 /* GMAC TX FIFO is 8K, Rx FIFO is 16K */
@@ -412,6 +416,7 @@ struct mac_device_info {
 	const struct stmmac_dma_ops *dma;
 	const struct stmmac_mode_ops *mode;
 	const struct stmmac_hwtimestamp *ptp;
+	const struct stmmac_tc_ops *tc;
 	struct mii_regs mii;	/* MII register Addresses */
 	struct mac_link link;
 	void __iomem *pcsr;     /* vpointer to device CSRs */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
index 03eab90..6330a55 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
@@ -194,6 +194,9 @@ enum power_event {
 
 /* MAC HW features3 bitmap */
 #define GMAC_HW_FEAT_ASP		GENMASK(29, 28)
+#define GMAC_HW_FEAT_FRPES		GENMASK(14, 13)
+#define GMAC_HW_FEAT_FRPBS		GENMASK(12, 11)
+#define GMAC_HW_FEAT_FRPSEL		BIT(10)
 
 /* MAC HW ADDR regs */
 #define GMAC_HI_DCS			GENMASK(18, 16)
@@ -202,6 +205,7 @@ enum power_event {
 
 /*  MTL registers */
 #define MTL_OPERATION_MODE		0x00000c00
+#define MTL_FRPE			BIT(15)
 #define MTL_OPERATION_SCHALG_MASK	GENMASK(6, 5)
 #define MTL_OPERATION_SCHALG_WRR	(0x0 << 5)
 #define MTL_OPERATION_SCHALG_WFQ	(0x1 << 5)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
index 7289b3b..a7121a7 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
@@ -795,6 +795,7 @@ static void dwmac4_debug(void __iomem *ioaddr, struct stmmac_extra_stats *x,
 	.safety_feat_config = dwmac5_safety_feat_config,
 	.safety_feat_irq_status = dwmac5_safety_feat_irq_status,
 	.safety_feat_dump = dwmac5_safety_feat_dump,
+	.rxp_config = dwmac5_rxp_config,
 };
 
 int dwmac4_setup(struct stmmac_priv *priv)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
index d37d457..117c3a5 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
@@ -379,6 +379,9 @@ static void dwmac4_get_hw_feature(void __iomem *ioaddr,
 
 	/* 5.10 Features */
 	dma_cap->asp = (hw_cap & GMAC_HW_FEAT_ASP) >> 28;
+	dma_cap->frpes = (hw_cap & GMAC_HW_FEAT_FRPES) >> 13;
+	dma_cap->frpbs = (hw_cap & GMAC_HW_FEAT_FRPBS) >> 11;
+	dma_cap->frpsel = (hw_cap & GMAC_HW_FEAT_FRPSEL) >> 10;
 }
 
 /* Enable/disable TSO feature and set MSS */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c
index 2978550..b2becb8 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c
@@ -7,6 +7,7 @@
 #include "common.h"
 #include "dwmac4.h"
 #include "dwmac5.h"
+#include "stmmac.h"
 
 struct dwmac5_error_desc {
 	bool valid;
@@ -299,3 +300,197 @@ int dwmac5_safety_feat_dump(struct stmmac_safety_stats *stats,
 		*desc = dwmac5_all_errors[module].desc[offset].desc;
 	return 0;
 }
+
+static int dwmac5_rxp_disable(void __iomem *ioaddr)
+{
+	u32 val;
+	int ret;
+
+	val = readl(ioaddr + MTL_OPERATION_MODE);
+	val &= ~MTL_FRPE;
+	writel(val, ioaddr + MTL_OPERATION_MODE);
+
+	ret = readl_poll_timeout(ioaddr + MTL_RXP_CONTROL_STATUS, val,
+			val & RXPI, 1, 10000);
+	if (ret)
+		return ret;
+	return 0;
+}
+
+static void dwmac5_rxp_enable(void __iomem *ioaddr)
+{
+	u32 val;
+
+	val = readl(ioaddr + MTL_OPERATION_MODE);
+	val |= MTL_FRPE;
+	writel(val, ioaddr + MTL_OPERATION_MODE);
+}
+
+static int dwmac5_rxp_update_single_entry(void __iomem *ioaddr,
+					  struct stmmac_tc_entry *entry,
+					  int pos)
+{
+	int ret, i;
+
+	for (i = 0; i < (sizeof(entry->val) / sizeof(u32)); i++) {
+		int real_pos = pos * (sizeof(entry->val) / sizeof(u32)) + i;
+		u32 val;
+
+		/* Wait for ready */
+		ret = readl_poll_timeout(ioaddr + MTL_RXP_IACC_CTRL_STATUS,
+				val, !(val & STARTBUSY), 1, 10000);
+		if (ret)
+			return ret;
+
+		/* Write data */
+		val = *((u32 *)&entry->val + i);
+		writel(val, ioaddr + MTL_RXP_IACC_DATA);
+
+		/* Write pos */
+		val = real_pos & ADDR;
+		writel(val, ioaddr + MTL_RXP_IACC_CTRL_STATUS);
+
+		/* Write OP */
+		val |= WRRDN;
+		writel(val, ioaddr + MTL_RXP_IACC_CTRL_STATUS);
+
+		/* Start Write */
+		val |= STARTBUSY;
+		writel(val, ioaddr + MTL_RXP_IACC_CTRL_STATUS);
+
+		/* Wait for done */
+		ret = readl_poll_timeout(ioaddr + MTL_RXP_IACC_CTRL_STATUS,
+				val, !(val & STARTBUSY), 1, 10000);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static struct stmmac_tc_entry *
+dwmac5_rxp_get_next_entry(struct stmmac_tc_entry *entries, unsigned int count,
+			  u32 curr_prio)
+{
+	struct stmmac_tc_entry *entry;
+	u32 min_prio = ~0x0;
+	int i, min_prio_idx;
+	bool found = false;
+
+	for (i = count - 1; i >= 0; i--) {
+		entry = &entries[i];
+
+		/* Do not update unused entries */
+		if (!entry->in_use)
+			continue;
+		/* Do not update already updated entries (i.e. fragments) */
+		if (entry->in_hw)
+			continue;
+		/* Let last entry be updated last */
+		if (entry->is_last)
+			continue;
+		/* Do not return fragments */
+		if (entry->is_frag)
+			continue;
+		/* Check if we already checked this prio */
+		if (entry->prio < curr_prio)
+			continue;
+		/* Check if this is the minimum prio */
+		if (entry->prio < min_prio) {
+			min_prio = entry->prio;
+			min_prio_idx = i;
+			found = true;
+		}
+	}
+
+	if (found)
+		return &entries[min_prio_idx];
+	return NULL;
+}
+
+int dwmac5_rxp_config(void __iomem *ioaddr, struct stmmac_tc_entry *entries,
+		      unsigned int count)
+{
+	struct stmmac_tc_entry *entry, *frag;
+	int i, ret, nve = 0;
+	u32 curr_prio = 0;
+	u32 old_val, val;
+
+	/* Force disable RX */
+	old_val = readl(ioaddr + GMAC_CONFIG);
+	val = old_val & ~GMAC_CONFIG_RE;
+	writel(val, ioaddr + GMAC_CONFIG);
+
+	/* Disable RX Parser */
+	ret = dwmac5_rxp_disable(ioaddr);
+	if (ret)
+		goto re_enable;
+
+	/* Set all entries as NOT in HW */
+	for (i = 0; i < count; i++) {
+		entry = &entries[i];
+		entry->in_hw = false;
+	}
+
+	/* Update entries by reverse order */
+	while (1) {
+		entry = dwmac5_rxp_get_next_entry(entries, count, curr_prio);
+		if (!entry)
+			break;
+
+		curr_prio = entry->prio;
+		frag = entry->frag_ptr;
+
+		/* Set special fragment requirements */
+		if (frag) {
+			entry->val.af = 0;
+			entry->val.rf = 0;
+			entry->val.nc = 1;
+			entry->val.ok_index = nve + 2;
+		}
+
+		ret = dwmac5_rxp_update_single_entry(ioaddr, entry, nve);
+		if (ret)
+			goto re_enable;
+
+		entry->table_pos = nve++;
+		entry->in_hw = true;
+
+		if (frag && !frag->in_hw) {
+			ret = dwmac5_rxp_update_single_entry(ioaddr, frag, nve);
+			if (ret)
+				goto re_enable;
+			frag->table_pos = nve++;
+			frag->in_hw = true;
+		}
+	}
+
+	if (!nve)
+		goto re_enable;
+
+	/* Update all pass entry */
+	for (i = 0; i < count; i++) {
+		entry = &entries[i];
+		if (!entry->is_last)
+			continue;
+
+		ret = dwmac5_rxp_update_single_entry(ioaddr, entry, nve);
+		if (ret)
+			goto re_enable;
+
+		entry->table_pos = nve++;
+	}
+
+	/* Assume n. of parsable entries == n. of valid entries */
+	val = (nve << 16) & NPE;
+	val |= nve & NVE;
+	writel(val, ioaddr + MTL_RXP_CONTROL_STATUS);
+
+	/* Enable RX Parser */
+	dwmac5_rxp_enable(ioaddr);
+
+re_enable:
+	/* Re-enable RX */
+	writel(old_val, ioaddr + GMAC_CONFIG);
+	return ret;
+}
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.h b/drivers/net/ethernet/stmicro/stmmac/dwmac5.h
index bd4c466..cc810af 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.h
@@ -11,6 +11,17 @@
 #define PRTYEN				BIT(1)
 #define TMOUTEN				BIT(0)
 
+#define MTL_RXP_CONTROL_STATUS		0x00000ca0
+#define RXPI				BIT(31)
+#define NPE				GENMASK(23, 16)
+#define NVE				GENMASK(7, 0)
+#define MTL_RXP_IACC_CTRL_STATUS	0x00000cb0
+#define STARTBUSY			BIT(31)
+#define RXPEIEC				GENMASK(22, 21)
+#define RXPEIEE				BIT(20)
+#define WRRDN				BIT(16)
+#define ADDR				GENMASK(15, 0)
+#define MTL_RXP_IACC_DATA		0x00000cb4
 #define MTL_ECC_CONTROL			0x00000cc0
 #define TSOEE				BIT(4)
 #define MRXPEE				BIT(3)
@@ -48,5 +59,7 @@ int dwmac5_safety_feat_irq_status(struct net_device *ndev,
 		struct stmmac_safety_stats *stats);
 int dwmac5_safety_feat_dump(struct stmmac_safety_stats *stats,
 			int index, unsigned long *count, const char **desc);
+int dwmac5_rxp_config(void __iomem *ioaddr, struct stmmac_tc_entry *entries,
+		      unsigned int count);
 
 #endif /* __DWMAC5_H__ */
diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.c b/drivers/net/ethernet/stmicro/stmmac/hwif.c
index 2b0a7e7..9acc8d2 100644
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.c
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.c
@@ -77,6 +77,7 @@ static int stmmac_dwmac4_quirks(struct stmmac_priv *priv)
 	const void *mac;
 	const void *hwtimestamp;
 	const void *mode;
+	const void *tc;
 	int (*setup)(struct stmmac_priv *priv);
 	int (*quirks)(struct stmmac_priv *priv);
 } stmmac_hw[] = {
@@ -90,6 +91,7 @@ static int stmmac_dwmac4_quirks(struct stmmac_priv *priv)
 		.mac = &dwmac100_ops,
 		.hwtimestamp = &stmmac_ptp,
 		.mode = NULL,
+		.tc = NULL,
 		.setup = dwmac100_setup,
 		.quirks = stmmac_dwmac1_quirks,
 	}, {
@@ -101,6 +103,7 @@ static int stmmac_dwmac4_quirks(struct stmmac_priv *priv)
 		.mac = &dwmac1000_ops,
 		.hwtimestamp = &stmmac_ptp,
 		.mode = NULL,
+		.tc = NULL,
 		.setup = dwmac1000_setup,
 		.quirks = stmmac_dwmac1_quirks,
 	}, {
@@ -112,6 +115,7 @@ static int stmmac_dwmac4_quirks(struct stmmac_priv *priv)
 		.mac = &dwmac4_ops,
 		.hwtimestamp = &stmmac_ptp,
 		.mode = NULL,
+		.tc = NULL,
 		.setup = dwmac4_setup,
 		.quirks = stmmac_dwmac4_quirks,
 	}, {
@@ -123,6 +127,7 @@ static int stmmac_dwmac4_quirks(struct stmmac_priv *priv)
 		.mac = &dwmac410_ops,
 		.hwtimestamp = &stmmac_ptp,
 		.mode = &dwmac4_ring_mode_ops,
+		.tc = NULL,
 		.setup = dwmac4_setup,
 		.quirks = NULL,
 	}, {
@@ -134,6 +139,7 @@ static int stmmac_dwmac4_quirks(struct stmmac_priv *priv)
 		.mac = &dwmac410_ops,
 		.hwtimestamp = &stmmac_ptp,
 		.mode = &dwmac4_ring_mode_ops,
+		.tc = NULL,
 		.setup = dwmac4_setup,
 		.quirks = NULL,
 	}, {
@@ -145,6 +151,7 @@ static int stmmac_dwmac4_quirks(struct stmmac_priv *priv)
 		.mac = &dwmac510_ops,
 		.hwtimestamp = &stmmac_ptp,
 		.mode = &dwmac4_ring_mode_ops,
+		.tc = &dwmac510_tc_ops,
 		.setup = dwmac4_setup,
 		.quirks = NULL,
 	}
@@ -196,6 +203,7 @@ int stmmac_hwif_init(struct stmmac_priv *priv)
 		mac->mac = entry->mac;
 		mac->ptp = entry->hwtimestamp;
 		mac->mode = entry->mode;
+		mac->tc = entry->tc;
 
 		priv->hw = mac;
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h
index bfad616..b7539a1 100644
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.h
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h
@@ -5,10 +5,12 @@
 #ifndef __STMMAC_HWIF_H__
 #define __STMMAC_HWIF_H__
 
+#include <linux/netdevice.h>
+
 #define stmmac_do_void_callback(__priv, __module, __cname,  __arg0, __args...) \
 ({ \
 	int __result = -EINVAL; \
-	if ((__priv)->hw->__module->__cname) { \
+	if ((__priv)->hw->__module && (__priv)->hw->__module->__cname) { \
 		(__priv)->hw->__module->__cname((__arg0), ##__args); \
 		__result = 0; \
 	} \
@@ -17,7 +19,7 @@
 #define stmmac_do_callback(__priv, __module, __cname,  __arg0, __args...) \
 ({ \
 	int __result = -EINVAL; \
-	if ((__priv)->hw->__module->__cname) \
+	if ((__priv)->hw->__module && (__priv)->hw->__module->__cname) \
 		__result = (__priv)->hw->__module->__cname((__arg0), ##__args); \
 	__result; \
 })
@@ -232,6 +234,7 @@ struct stmmac_dma_ops {
 struct net_device;
 struct rgmii_adv;
 struct stmmac_safety_stats;
+struct stmmac_tc_entry;
 
 /* Helpers to program the MAC core */
 struct stmmac_ops {
@@ -301,6 +304,9 @@ struct stmmac_ops {
 			struct stmmac_safety_stats *stats);
 	int (*safety_feat_dump)(struct stmmac_safety_stats *stats,
 			int index, unsigned long *count, const char **desc);
+	/* Flexible RX Parser */
+	int (*rxp_config)(void __iomem *ioaddr, struct stmmac_tc_entry *entries,
+			  unsigned int count);
 };
 
 #define stmmac_core_init(__priv, __args...) \
@@ -365,6 +371,8 @@ struct stmmac_ops {
 	stmmac_do_callback(__priv, mac, safety_feat_irq_status, __args)
 #define stmmac_safety_feat_dump(__priv, __args...) \
 	stmmac_do_callback(__priv, mac, safety_feat_dump, __args)
+#define stmmac_rxp_config(__priv, __args...) \
+	stmmac_do_callback(__priv, mac, rxp_config, __args)
 
 /* PTP and HW Timer helpers */
 struct stmmac_hwtimestamp {
@@ -419,6 +427,18 @@ struct stmmac_mode_ops {
 	stmmac_do_void_callback(__priv, mode, clean_desc3, __args)
 
 struct stmmac_priv;
+struct tc_cls_u32_offload;
+
+struct stmmac_tc_ops {
+	int (*init)(struct stmmac_priv *priv);
+	int (*setup_cls_u32)(struct stmmac_priv *priv,
+			     struct tc_cls_u32_offload *cls);
+};
+
+#define stmmac_tc_init(__priv, __args...) \
+	stmmac_do_callback(__priv, tc, init, __args)
+#define stmmac_tc_setup_cls_u32(__priv, __args...) \
+	stmmac_do_callback(__priv, tc, setup_cls_u32, __args)
 
 extern const struct stmmac_ops dwmac100_ops;
 extern const struct stmmac_dma_ops dwmac100_dma_ops;
@@ -429,6 +449,7 @@ struct stmmac_mode_ops {
 extern const struct stmmac_ops dwmac410_ops;
 extern const struct stmmac_dma_ops dwmac410_dma_ops;
 extern const struct stmmac_ops dwmac510_ops;
+extern const struct stmmac_tc_ops dwmac510_tc_ops;
 
 #define GMAC_VERSION		0x00000020	/* GMAC CORE Version */
 #define GMAC4_VERSION		0x00000110	/* GMAC4+ CORE Version */
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index 2443f20..42fc76e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -76,6 +76,30 @@ struct stmmac_rx_queue {
 	struct napi_struct napi ____cacheline_aligned_in_smp;
 };
 
+struct stmmac_tc_entry {
+	bool in_use;
+	bool in_hw;
+	bool is_last;
+	bool is_frag;
+	void *frag_ptr;
+	unsigned int table_pos;
+	u32 handle;
+	u32 prio;
+	struct {
+		u32 match_data;
+		u32 match_en;
+		u8 af:1;
+		u8 rf:1;
+		u8 im:1;
+		u8 nc:1;
+		u8 res1:4;
+		u8 frame_offset;
+		u8 ok_index;
+		u8 dma_ch_no;
+		u32 res2;
+	} __packed val;
+};
+
 struct stmmac_priv {
 	/* Frequently used values are kept adjacent for cache effect */
 	u32 tx_count_frames;
@@ -151,6 +175,11 @@ struct stmmac_priv {
 	unsigned long state;
 	struct workqueue_struct *wq;
 	struct work_struct service_task;
+
+	/* TC Handling */
+	unsigned int tc_entries_max;
+	unsigned int tc_off_max;
+	struct stmmac_tc_entry *tc_entries;
 };
 
 enum stmmac_state {
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 0135fd3..84b29ef 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -45,6 +45,7 @@
 #include <linux/seq_file.h>
 #endif /* CONFIG_DEBUG_FS */
 #include <linux/net_tstamp.h>
+#include <net/pkt_cls.h>
 #include "stmmac_ptp.h"
 #include "stmmac.h"
 #include <linux/reset.h>
@@ -3786,6 +3787,58 @@ static int stmmac_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 	return ret;
 }
 
+static int stmmac_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
+				    void *cb_priv)
+{
+	struct stmmac_priv *priv = cb_priv;
+	int ret = -EOPNOTSUPP;
+
+	stmmac_disable_all_queues(priv);
+
+	switch (type) {
+	case TC_SETUP_CLSU32:
+		if (tc_cls_can_offload_and_chain0(priv->dev, type_data))
+			ret = stmmac_tc_setup_cls_u32(priv, priv, type_data);
+		break;
+	default:
+		break;
+	}
+
+	stmmac_enable_all_queues(priv);
+	return ret;
+}
+
+static int stmmac_setup_tc_block(struct stmmac_priv *priv,
+				 struct tc_block_offload *f)
+{
+	if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+		return -EOPNOTSUPP;
+
+	switch (f->command) {
+	case TC_BLOCK_BIND:
+		return tcf_block_cb_register(f->block, stmmac_setup_tc_block_cb,
+				priv, priv);
+	case TC_BLOCK_UNBIND:
+		tcf_block_cb_unregister(f->block, stmmac_setup_tc_block_cb, priv);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int stmmac_setup_tc(struct net_device *ndev, enum tc_setup_type type,
+			   void *type_data)
+{
+	struct stmmac_priv *priv = netdev_priv(ndev);
+
+	switch (type) {
+	case TC_SETUP_BLOCK:
+		return stmmac_setup_tc_block(priv, type_data);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
 static int stmmac_set_mac_address(struct net_device *ndev, void *addr)
 {
 	struct stmmac_priv *priv = netdev_priv(ndev);
@@ -4024,6 +4077,7 @@ static void stmmac_exit_fs(struct net_device *dev)
 	.ndo_set_rx_mode = stmmac_set_rx_mode,
 	.ndo_tx_timeout = stmmac_tx_timeout,
 	.ndo_do_ioctl = stmmac_ioctl,
+	.ndo_setup_tc = stmmac_setup_tc,
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller = stmmac_poll_controller,
 #endif
@@ -4223,6 +4277,11 @@ int stmmac_dvr_probe(struct device *device,
 	ndev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
 			    NETIF_F_RXCSUM;
 
+	ret = stmmac_tc_init(priv, priv);
+	if (!ret) {
+		ndev->hw_features |= NETIF_F_HW_TC;
+	}
+
 	if ((priv->plat->tso_en) && (priv->dma_cap.tsoen)) {
 		ndev->hw_features |= NETIF_F_TSO | NETIF_F_TSO6;
 		priv->tso = true;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
new file mode 100644
index 0000000..881c94b
--- /dev/null
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
@@ -0,0 +1,295 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/*
+ * Copyright (c) 2018 Synopsys, Inc. and/or its affiliates.
+ * stmmac TC Handling (HW only)
+ */
+
+#include <net/pkt_cls.h>
+#include <net/tc_act/tc_gact.h>
+#include "common.h"
+#include "dwmac4.h"
+#include "dwmac5.h"
+#include "stmmac.h"
+
+static void tc_fill_all_pass_entry(struct stmmac_tc_entry *entry)
+{
+	memset(entry, 0, sizeof(*entry));
+	entry->in_use = true;
+	entry->is_last = true;
+	entry->is_frag = false;
+	entry->prio = ~0x0;
+	entry->handle = 0;
+	entry->val.match_data = 0x0;
+	entry->val.match_en = 0x0;
+	entry->val.af = 1;
+	entry->val.dma_ch_no = 0x0;
+}
+
+static struct stmmac_tc_entry *tc_find_entry(struct stmmac_priv *priv,
+					     struct tc_cls_u32_offload *cls,
+					     bool free)
+{
+	struct stmmac_tc_entry *entry, *first = NULL, *dup = NULL;
+	u32 loc = cls->knode.handle;
+	int i;
+
+	for (i = 0; i < priv->tc_entries_max; i++) {
+		entry = &priv->tc_entries[i];
+		if (!entry->in_use && !first && free)
+			first = entry;
+		if (entry->handle == loc && !free)
+			dup = entry;
+	}
+
+	if (dup)
+		return dup;
+	if (first) {
+		first->handle = loc;
+		first->in_use = true;
+
+		/* Reset HW values */
+		memset(&first->val, 0, sizeof(first->val));
+	}
+
+	return first;
+}
+
+static int tc_fill_actions(struct stmmac_tc_entry *entry,
+			   struct stmmac_tc_entry *frag,
+			   struct tc_cls_u32_offload *cls)
+{
+	struct stmmac_tc_entry *action_entry = entry;
+	const struct tc_action *act;
+	struct tcf_exts *exts;
+	LIST_HEAD(actions);
+
+	exts = cls->knode.exts;
+	if (!tcf_exts_has_actions(exts))
+		return -EINVAL;
+	if (frag)
+		action_entry = frag;
+
+	tcf_exts_to_list(exts, &actions);
+	list_for_each_entry(act, &actions, list) {
+		/* Accept */
+		if (is_tcf_gact_ok(act)) {
+			action_entry->val.af = 1;
+			break;
+		}
+		/* Drop */
+		if (is_tcf_gact_shot(act)) {
+			action_entry->val.rf = 1;
+			break;
+		}
+
+		/* Unsupported */
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int tc_fill_entry(struct stmmac_priv *priv,
+			 struct tc_cls_u32_offload *cls)
+{
+	struct stmmac_tc_entry *entry, *frag = NULL;
+	struct tc_u32_sel *sel = cls->knode.sel;
+	u32 off, data, mask, real_off, rem;
+	u32 prio = cls->common.prio;
+	int ret;
+
+	/* Only 1 match per entry */
+	if (sel->nkeys <= 0 || sel->nkeys > 1)
+		return -EINVAL;
+
+	off = sel->keys[0].off << sel->offshift;
+	data = sel->keys[0].val;
+	mask = sel->keys[0].mask;
+
+	switch (ntohs(cls->common.protocol)) {
+	case ETH_P_ALL:
+		break;
+	case ETH_P_IP:
+		off += ETH_HLEN;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (off > priv->tc_off_max)
+		return -EINVAL;
+
+	real_off = off / 4;
+	rem = off % 4;
+
+	entry = tc_find_entry(priv, cls, true);
+	if (!entry)
+		return -EINVAL;
+
+	if (rem) {
+		frag = tc_find_entry(priv, cls, true);
+		if (!frag) {
+			ret = -EINVAL;
+			goto err_unuse;
+		}
+
+		entry->frag_ptr = frag;
+		entry->val.match_en = (mask << (rem * 8)) &
+			GENMASK(31, rem * 8);
+		entry->val.match_data = (data << (rem * 8)) &
+			GENMASK(31, rem * 8);
+		entry->val.frame_offset = real_off;
+		entry->prio = prio;
+
+		frag->val.match_en = (mask >> (rem * 8)) &
+			GENMASK(rem * 8 - 1, 0);
+		frag->val.match_data = (data >> (rem * 8)) &
+			GENMASK(rem * 8 - 1, 0);
+		frag->val.frame_offset = real_off + 1;
+		frag->prio = prio;
+		frag->is_frag = true;
+	} else {
+		entry->frag_ptr = NULL;
+		entry->val.match_en = mask;
+		entry->val.match_data = data;
+		entry->val.frame_offset = real_off;
+		entry->prio = prio;
+	}
+
+	ret = tc_fill_actions(entry, frag, cls);
+	if (ret)
+		goto err_unuse;
+
+	return 0;
+
+err_unuse:
+	if (frag)
+		frag->in_use = false;
+	entry->in_use = false;
+	return ret;
+}
+
+static void tc_unfill_entry(struct stmmac_priv *priv,
+			    struct tc_cls_u32_offload *cls)
+{
+	struct stmmac_tc_entry *entry;
+
+	entry = tc_find_entry(priv, cls, false);
+	if (!entry)
+		return;
+
+	entry->in_use = false;
+	if (entry->frag_ptr) {
+		entry = entry->frag_ptr;
+		entry->is_frag = false;
+		entry->in_use = false;
+	}
+}
+
+static int tc_config_knode(struct stmmac_priv *priv,
+			   struct tc_cls_u32_offload *cls)
+{
+	int ret;
+
+	ret = tc_fill_entry(priv, cls);
+	if (ret)
+		return ret;
+
+	ret = stmmac_rxp_config(priv, priv->hw->pcsr, priv->tc_entries,
+			priv->tc_entries_max);
+	if (ret)
+		goto err_unfill;
+
+	return 0;
+
+err_unfill:
+	tc_unfill_entry(priv, cls);
+	return ret;
+}
+
+static int tc_delete_knode(struct stmmac_priv *priv,
+			   struct tc_cls_u32_offload *cls)
+{
+	int ret;
+
+	/* Set entry and fragments as not used */
+	tc_unfill_entry(priv, cls);
+
+	ret = stmmac_rxp_config(priv, priv->hw->pcsr, priv->tc_entries,
+			priv->tc_entries_max);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int tc_setup_cls_u32(struct stmmac_priv *priv,
+			    struct tc_cls_u32_offload *cls)
+{
+	switch (cls->command) {
+	case TC_CLSU32_REPLACE_KNODE:
+		tc_unfill_entry(priv, cls);
+		/* Fall through */
+	case TC_CLSU32_NEW_KNODE:
+		return tc_config_knode(priv, cls);
+	case TC_CLSU32_DELETE_KNODE:
+		return tc_delete_knode(priv, cls);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int tc_init(struct stmmac_priv *priv)
+{
+	struct dma_features *dma_cap = &priv->dma_cap;
+	unsigned int count;
+
+	if (!dma_cap->frpsel)
+		return -EINVAL;
+
+	switch (dma_cap->frpbs) {
+	case 0x0:
+		priv->tc_off_max = 64;
+		break;
+	case 0x1:
+		priv->tc_off_max = 128;
+		break;
+	case 0x2:
+		priv->tc_off_max = 256;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	switch (dma_cap->frpes) {
+	case 0x0:
+		count = 64;
+		break;
+	case 0x1:
+		count = 128;
+		break;
+	case 0x2:
+		count = 256;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* Reserve one last filter which lets all pass */
+	priv->tc_entries_max = count;
+	priv->tc_entries = devm_kzalloc(priv->device,
+			sizeof(*priv->tc_entries) * count, GFP_KERNEL);
+	if (!priv->tc_entries)
+		return -ENOMEM;
+
+	tc_fill_all_pass_entry(&priv->tc_entries[count - 1]);
+
+	dev_info(priv->device, "Enabling HW TC (entries=%d, max_off=%d)\n",
+			priv->tc_entries_max, priv->tc_off_max);
+	return 0;
+}
+
+const struct stmmac_tc_ops dwmac510_tc_ops = {
+	.init = tc_init,
+	.setup_cls_u32 = tc_setup_cls_u32,
+};
-- 
1.7.1

^ permalink raw reply related

* Re: i.MX6S/DL and QCA8334 switch using DSA driver - CPU port not working
From: Michal Vokáč @ 2018-05-04  8:45 UTC (permalink / raw)
  To: Andrew Lunn; +Cc: netdev, Vivien Didelot, Florian Fainelli
In-Reply-To: <20180430132025.GF10066@lunn.ch>

On 30.4.2018 15:20, Andrew Lunn wrote:
>> Using rgmii-id for the port is not valid as the qca8k driver does not support
>> that mode. It only supports rgmii and sgmii. I think this is actually not
>> correct. When phy-mode is set to rgmii for port the qca8k driver configures
>> internal delays in the switch. So it behaves like rgmii-id I think.
>>
>> Should not it be:
>>
>> --- a/drivers/net/dsa/qca8k.c
>> +++ b/drivers/net/dsa/qca8k.c
>> @@ -474,7 +474,7 @@ qca8k_set_pad_ctrl(struct qca8k_priv *priv, int port, int mode)
>>   	 * PHY or MAC.
>>   	 */
>>   	switch (mode) {
>> -	case PHY_INTERFACE_MODE_RGMII:
>> +	case PHY_INTERFACE_MODE_RGMII_ID:
>>   	qca8k_write(priv, reg,
>> 		    QCA8K_PORT_PAD_RGMII_EN |
>> 		    QCA8K_PORT_PAD_RGMII_TX_DELAY(3) |
> 
> We have to be careful cleaning this up. It has the potential to break
> existing boards when using an old device tree blob.

Oh, I see. Thanks for pointing this out.

Some news to the problem with the non-working CPU port.
Andrew, thank you very mych for the ideas how to debug the issue.
I tried what you suggested but have no luck. FYI Now I am doing all my tests
with linux-stable.

First of all I tried to make work my old phy driver for the switch with latest
kernel. It works on v4.1.46 but did not on v4.17-rc2 - no IP@ on eth0.
So a very same issue as I have with the DSA. Bisecting the kernel picked:

d5c3d84 ("net: phy: Avoid polling PHY with PHY_IGNORE_INTERRUPTS")

Fixed that by using PHY_POLL in my driver. I was hoping that I may have similar
issue when using DSA but it looks OK. This is with the DSA enabled:

  # dmesg | grep PHY
  [    3.452536] Generic PHY 2188000.ethernet-1:01: attached PHY driver [Generic PHY] (mii_bus:phy_addr=2188000.ethernet-1:01, irq=POLL)
  [    3.453437] Generic PHY 2188000.ethernet-1:02: attached PHY driver [Generic PHY] (mii_bus:phy_addr=2188000.ethernet-1:02, irq=POLL)
  [   20.769281] Generic PHY fixed-0:00: attached PHY driver [Generic PHY] (mii_bus:phy_addr=fixed-0:00, irq=POLL)

Anyway, now I am sure that I can use RGMII interface with mainline when I am
not using DSA and phy-mode is set to rgmii and I use QCA8K_PORT_PAD_RGMII_TX_DELAY(2)
and QCA8K_PORT_PAD_RGMII_RX_DELAY(2).

To debug the non-working CPU port with DSA I tried these kernel versions:

  - v4.8-rc6-1085-g6b93fb4 - NOT OK
    - Can not go lower than this version. qca8k driver was introduced here.
  - 4.9.84 - NOT OK
  - 4.17-rc2 - NOT OK

Some RGMII delay tunning attempts with v4.17-rc2:

phy-mode (fec)	Rx/Tx delay	result
--------------------------------------
rgmii		0/0		NOT OK
rgmii		1/1		NOT OK
rgmii		2/2		NOT OK
rgmii		3/3		NOT OK
rgmii-id	0/0		NOT OK
rgmii-id	1/1		NOT OK
rgmii-id	2/2		NOT OK
rgmii-id	3/3		NOT OK

I am out of ideas how to further debug this.
Any additional adivce will be much appreciated.

Thanks, Michal.

^ permalink raw reply

* Re: [PATCH net] macmace: Set platform device coherent_dma_mask
From: Michael Schmitz @ 2018-05-04  8:16 UTC (permalink / raw)
  To: Geert Uytterhoeven
  Cc: Christoph Hellwig, Finn Thain, David S. Miller, linux-m68k,
	netdev, Linux Kernel Mailing List
In-Reply-To: <CAMuHMdW+waQSQxLvBXEZkhCAj_kh=4SR1hZ4FAeC8YWXtRKg1A@mail.gmail.com>

Hi Geert,

Am 04.05.2018 um 19:24 schrieb Geert Uytterhoeven:
> Hi Michael,
> 
>>> Yes, that would be useful.  The other assumption could be that
>>> platform devices always allow an all-0xff dma mask.
>>
>> That's not always true (Atari NCR5380 SCSI and floppy would use a 24
>> bit DMA mask). We use bounce buffers allocated from a dedicated lowmem
>> pool there currently, and for all I know don't use the DMA API yet.
>>
>> I bet that is a rare exception though. Setting the default DMA mask
>> for platform devices to all-0xff and letting the few odd drivers force
>> a different setting seems the best way forward.
> 
> I'd say that's usually a property of the platform, not of the device?

Right - I was thinking 'm68k' as platform, not a particular machine like
Mac or Falcon (the 24 bit mask only applies to that particular model
anyway).

> So IMHO it belongs in the platform code, not in the device driver code.

OK - let's have a default mask of 64 bit, and allow machine specific
platform_init() to override using a new helper function.

Cheers,

	Michael

> Gr{oetje,eeting}s,
> 
>                         Geert
> 

^ permalink raw reply

* Re: Repeating "unregister_netdevice: waiting for lo to become free" caused by upstream 76da0704507bb ("ipv6: only call ip6_route_dev_notify() once for NETDEV_UNREGISTER")
From: Rafał Miłecki @ 2018-05-04  7:54 UTC (permalink / raw)
  To: Konstantin Khlebnikov, WANG Cong, David S. Miller,
	Alexey Kuznetsov, Hideaki YOSHIFUJI, Network Development, jeffy,
	David Ahern
  Cc: Greg Kroah-Hartman, Stable, Dan Streetman, Dan Streetman,
	Mathias Tillman
In-Reply-To: <07b74ef0-5ce6-b391-7b0f-59685350e802@gmail.com>

On 25 April 2018 at 16:44, Rafał Miłecki <zajec5@gmail.com> wrote:
> On 25.04.2018 16:30, Konstantin Khlebnikov wrote:
>>
>> On 25.04.2018 17:16, Rafał Miłecki wrote:
>>>
>>> On 23.04.2018 15:08, Rafał Miłecki wrote:
>>>>
>>>> I've just updated my kernel 4.4.x and noticed a regression. Bisecting
>>>> pointed me to the commit 2417da3f4d6bc ("ipv6: only call
>>>> ip6_route_dev_notify() once for NETDEV_UNREGISTER") [0] which is
>>>> backport of upstream 76da0704507bb. That backported commit has
>>>> appeared in a 4.4.103.
>>>>
>>>> I use OpenWrt/LEDE [1] distribution and LXC [2] 1.1.5. After stopping
>>>> a container I start getting these messages:
>>>> [  229.419188] unregister_netdevice: waiting for lo to become free.
>>>> Usage count = 1
>>>> [  239.660408] unregister_netdevice: waiting for lo to become free.
>>>> Usage count = 1
>>>> [  249.839189] unregister_netdevice: waiting for lo to become free.
>>>> Usage count = 1
>>>> (...)
>>>>
>>>> Trying to start LXC nevertheless results in lxc-start command hang
>>>> around network configuration. Trying to query LXC state afterwards
>>>> results in a lxc-info command hang too.
>>>>
>>>> I tried Googling for this issue and found similar reports:
>>>> https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1729637
>>>> https://github.com/fnproject/fn/issues/686
>>>>
>>>> https://lime-technology.com/forums/topic/66863-kernelunregister_netdevice-waiting-for-lo-to-become-free-usage-count-1/
>>>> all of them related to the Docker, which is probably a similar use
>>>> case to the LXC.
>>>>
>>>> I couldn't find any reference to commit 76da0704507bb that could
>>>> suggest fixing the problem I'm seeing.
>>>>
>>>> Does anyone have an idea what is the issue I'm seeing about? Or even
>>>> better, how to fix it? Can I provide any additional info that would
>>>> help?
>>>>
>>>>
>>>> [0]
>>>> https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git/commit/?h=linux-4.4.y&id=2417da3f4d6bc4fc6c77f613f0e2264090892aa5
>>>> [1] https://openwrt.org/
>>>> [2] https://linuxcontainers.org/
>>>
>>>
>>> Today I tried 4.14.34 to see if that helps. Unfortunately it doesn't. I
>>> still experience the same problem.
>>>
>>>  From reading various reports regarding that "unregister_netdevice:
>>> waiting for lo to become free" message it appears the problem is caused
>>> by a leaking dst refcnt somewhere in the kernel code.
>>>
>>> I found links to few commit fixing leaks at various places:
>>> 4a31a6b19f9dd ("sctp: fix dst refcnt leak in sctp_v4_get_dst")
>>> 957d761cf91cd ("sctp: fix dst refcnt leak in sctp_v6_get_dst()")
>>> 4ee806d51176b ("net: tcp: close sock if net namespace is exiting")
>>> d747a7a51b009 ("tcp: reset sk_rx_dst in tcp_disconnect()")
>>> 751eb6b6042a5 ("ipv6: addrconf: fix dev refcont leak when DAD failed")
>>>
>>> All above patches are present in the linux-v4.4.y and are part of kernel
>>> 4.4.124 I use. So it seems I'm facing yet another dst refcnt leak.
>>>
>>> Could commit 2417da3f4d6bc ("ipv6: only call ip6_route_dev_notify() once
>>> for NETDEV_UNREGISTER") introduce a new dst refcnt leak? Or does it only
>>> expost existing one?
>>
>>
>> Mathias Tillman reported this as "4.4.103 linux kernel regression".
>> Last message in that thread (which I couldn't find in mailing list
>> archives) had:
>> | As it turns out, it's due to a patch in the Turris Omnia/OpenWRT code
>> that adds a in6_dev_get call without calling in6_dev_put.
>
>
> Wow, this is very helpful, thank you!
>
> Somehow I didn't even think about OpenWrt downstream patches. Too bad
> this wasn't reported to the OpenWrt community, I spent 2 days on this.
> There is indeed:
> target/linux/generic/patches-4.4/670-ipv6-allow-rejecting-with-source-address-failed-policy.patch
> [PATCH 1/2] ipv6: allow rejecting with "source address failed policy"
>
> I'll move this issue discussion to the OpenWrt/LEDE now, I hope we can
> sort it out.

For a reference it has been fixed in OpenWrt/LEDE by Felix in:

1) master branch:
https://git.openwrt.org/?p=openwrt/openwrt.git;a=commitdiff;h=58f7b5b96c301176d639540df4723c798af2a999

2) lede-17.01 branch
https://git.openwrt.org/?p=openwrt/openwrt.git;a=commitdiff;h=999bb66b20b03c753801ecebf1ec2a03c6a63c96

-- 
Rafał

^ permalink raw reply

* Double-lock bug in drivers/isdn/hardware/mISDN/hfcmulti.c
From: Iago Abal @ 2018-05-04  7:27 UTC (permalink / raw)
  To: Karsten Keil, David S. Miller, Stephen Hemminger, Johannes Berg,
	Arvind Yadav, Kees Cook, netdev

Hi,

There is a potential double-lock sequence starting from hfc_remove_pci().

Forward trace:

  1. hfc_remove_pci()  LOCKS spin_lock_irqsave(&HFClock, flags) at 5284
  2. hfc_remove_pci()  calls release_card(card)                 at 5285
  3. release_card()    calls release_port(hc, hc->chan[ch].dch) at 4674
  4. release_port()    calls plxsd_checksync(hc, 1)             at 4595
  5. plxsd_checksync() calls hfcmulti_resync(hc, ..., rm)       at 1036 or 1044
  6. hfcmulti_resync() LOCKS spin_lock_irqsave(&HFClock, flags) at 933

NB: Bug found by static analysis thanks to EBA
(https://github.com/IagoAbal/eba).

Hope it helps,

-- iago

^ permalink raw reply

* Re: DSA switch
From: Ran Shalit @ 2018-05-04  7:26 UTC (permalink / raw)
  To: Andrew Lunn; +Cc: netdev
In-Reply-To: <CAJ2oMh+0pKsX9w2=upu8SQud8Jyn1WhoAQiREwwu9+ehzFPJVA@mail.gmail.com>

On Fri, May 4, 2018 at 9:59 AM, Ran Shalit <ranshalit@gmail.com> wrote:
> On Fri, May 4, 2018 at 12:05 AM, Andrew Lunn <andrew@lunn.ch> wrote:
>>> I am using kernel 2.6.37, but I think it is not kernel issue, but more
>>> bad patches done on kernel.
>>> It is based on TI's kernel, but with some custom modifications on
>>> driver's switch, to make it work with TI's cpsw switch.
>>> Seems like someone made some bad patch, I'll continue investigating it.
>>> You can ignore the question...
>>>
>>> Many thanks a lot for the help,
>>> Ran
>>
>> There is no DSA driver for the cpsw. Are you just using the cpsw to
>> pass frames to a switch which is supported by DSA?
>>
>> In theory, mainline CPSW should just work for passing frames to an
>> external switch. So why not just use mainline?
>>
>
> It seems that the bridge functions OK,
> so I rather keep on working with it, instead of doing too many
> dramatically changes in the custom kernel of TI's which works with our
> chip (dm8148).
>
> Yet, I would like to ask about the bridge:
> Can a bridge also be used with dsa switch when ports are connected to
> different subnets ?
>
> Regards,
> Ran



I also see that there is no bridge function in /drivers/net/dsa files
in our kernel (2.6.37)
I can't find any reference to *bridge* or function used in bridge patch:
https://patchwork.ozlabs.org/patch/16578/

So, how is it that bridge worked in my system ?
Does it mean that it actually does the bridging not in the switch but
in the kernel ip stack ?

Thank you,
ran

^ permalink raw reply

* Re: [PATCH net] nsh: fix infinite loop
From: Jiri Benc @ 2018-05-04  7:23 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: David S . Miller, netdev, Eric Dumazet
In-Reply-To: <20180503203754.60611-1-edumazet@google.com>

On Thu,  3 May 2018 13:37:54 -0700, Eric Dumazet wrote:
> diff --git a/net/nsh/nsh.c b/net/nsh/nsh.c
> index d7da99a0b0b852d7459eed9ac6d3cdf3d49a1a1c..9696ef96b719bf24625adea2a959deac1d2a975f 100644
> --- a/net/nsh/nsh.c
> +++ b/net/nsh/nsh.c
> @@ -57,6 +57,8 @@ int nsh_pop(struct sk_buff *skb)
>  		return -ENOMEM;
>  	nh = (struct nshhdr *)(skb->data);
>  	length = nsh_hdr_len(nh);
> +	if (length < NSH_BASE_HDR_LEN)
> +		return -EINVAL;
>  	inner_proto = tun_p_to_eth_p(nh->np);
>  	if (!pskb_may_pull(skb, length))
>  		return -ENOMEM;
> @@ -90,6 +92,8 @@ static struct sk_buff *nsh_gso_segment(struct sk_buff *skb,
>  	if (unlikely(!pskb_may_pull(skb, NSH_BASE_HDR_LEN)))
>  		goto out;
>  	nsh_len = nsh_hdr_len(nsh_hdr(skb));
> +	if (nsh_len < NSH_BASE_HDR_LEN)
> +		goto out;
>  	if (unlikely(!pskb_may_pull(skb, nsh_len)))
>  		goto out;
>  

Acked-by: Jiri Benc <jbenc@redhat.com>

Thanks, Eric, and shame on me!

 Jiri

^ permalink raw reply

* Re: [PATCH net] macmace: Set platform device coherent_dma_mask
From: Geert Uytterhoeven @ 2018-05-04  7:24 UTC (permalink / raw)
  To: Michael Schmitz
  Cc: Christoph Hellwig, Finn Thain, David S. Miller, linux-m68k,
	netdev, Linux Kernel Mailing List
In-Reply-To: <CAOmrzk+y5pmm2anaw15pch--y_gqoUO4NQemSbSanAXpEnttkg@mail.gmail.com>

Hi Michael,

On Thu, May 3, 2018 at 10:24 PM, Michael Schmitz <schmitzmic@gmail.com> wrote:
> On Thu, May 3, 2018 at 8:51 PM, Christoph Hellwig <hch@lst.de> wrote:
>> On Thu, May 03, 2018 at 10:46:56AM +0200, Geert Uytterhoeven wrote:
>>> Perhaps you can add a new helper (platform_device_register_simple_dma()?)
>>> that takes the DMA mask, too?
>>> With people setting the mask to kill the WARNING splat, this may become
>>> more common.
>>>
>>> struct platform_device_info already has a dma_mask field, but
>>> platform_device_register_resndata() explicitly sets it to zero.
>>
>> Yes, that would be useful.  The other assumption could be that
>> platform devices always allow an all-0xff dma mask.
>
> That's not always true (Atari NCR5380 SCSI and floppy would use a 24
> bit DMA mask). We use bounce buffers allocated from a dedicated lowmem
> pool there currently, and for all I know don't use the DMA API yet.
>
> I bet that is a rare exception though. Setting the default DMA mask
> for platform devices to all-0xff and letting the few odd drivers force
> a different setting seems the best way forward.

I'd say that's usually a property of the platform, not of the device?
So IMHO it belongs in the platform code, not in the device driver code.

Gr{oetje,eeting}s,

                        Geert

-- 
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds

^ permalink raw reply

* [PATCH] net: sched: cls: fix a potential missing-check bug
From: Wenwen Wang @ 2018-05-04  7:05 UTC (permalink / raw)
  To: Wenwen Wang
  Cc: Kangjie Lu, Jamal Hadi Salim, Cong Wang, Jiri Pirko,
	David S. Miller, open list:TC subsystem, open list

In rsvp_change(), the value of f->res.classid is checked to be no more
than 255. Otherwise, the execution will goto errout. This is enforced by a
if-statement check. However, in the following execution, f->res.classid is
assigned with a new value returned from gen_tunnel(), and the new value
is only checked against 0. Given that gen_tunnel() may return a value
larger than 255 based on data, the new value of f->res.classid should
be re-checked.

This patch adds a re-check to ensure the new value of f->res.classid is not
great than 255; otherwise, an error code will be returned.

Signed-off-by: Wenwen Wang <wang6495@umn.edu>
---
 net/sched/cls_rsvp.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 4f12976..7ced8fc 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -590,6 +590,9 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
 		if (f->res.classid == 0 &&
 		    (f->res.classid = gen_tunnel(data)) == 0)
 			goto errout;
+
+		if (f->res.classid > 255)
+			goto errout;
 	}

 	for (sp = &data->ht[h1];
-- 
2.7.4

^ permalink raw reply related

* Re: DSA switch
From: Ran Shalit @ 2018-05-04  6:59 UTC (permalink / raw)
  To: Andrew Lunn; +Cc: netdev
In-Reply-To: <20180503210545.GJ17027@lunn.ch>

On Fri, May 4, 2018 at 12:05 AM, Andrew Lunn <andrew@lunn.ch> wrote:
>> I am using kernel 2.6.37, but I think it is not kernel issue, but more
>> bad patches done on kernel.
>> It is based on TI's kernel, but with some custom modifications on
>> driver's switch, to make it work with TI's cpsw switch.
>> Seems like someone made some bad patch, I'll continue investigating it.
>> You can ignore the question...
>>
>> Many thanks a lot for the help,
>> Ran
>
> There is no DSA driver for the cpsw. Are you just using the cpsw to
> pass frames to a switch which is supported by DSA?
>
> In theory, mainline CPSW should just work for passing frames to an
> external switch. So why not just use mainline?
>

It seems that the bridge functions OK,
so I rather keep on working with it, instead of doing too many
dramatically changes in the custom kernel of TI's which works with our
chip (dm8148).

Yet, I would like to ask about the bridge:
Can a bridge also be used with dsa switch when ports are connected to
different subnets ?

Regards,
Ran

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox