Netdev List

Netdev List
 help / color / mirror / Atom feed

* Re: [PATCH net-next] fib_rules: rename FRA_PROTOCOL to FRA_ORIGINATOR
From: David Miller @ 2018-03-22 16:39 UTC (permalink / raw)
  To: nicolas.dichtel; +Cc: netdev, sharpd
In-Reply-To: <20180320170453.27577-1-nicolas.dichtel@6wind.com>

From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Tue, 20 Mar 2018 18:04:53 +0100

> As the comment said, this attribute defines the originator of the rule,
> it's not really a (network) protocol.
> Let's rename it accordingly to avoid confusion (difference between
> FRA_PROTOCOL and FRA_IP_PROTO was not obvious).
> 
> CC: Donald Sharp <sharpd@cumulusnetworks.com>
> Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
> ---
> 
> FRA_PROTOCOL exists only in net-next for now, thus it's still possible to
> rename it.

I think by renaming this, and thus losing the connection with how the
term "protocol" is used in our other routing and rule interfaces and
datastructures, this will be creating more confusion than it will be
fixing.

So I won't be applying this.

Thanks.

^ permalink raw reply

* Re: [PATCH v2 1/8] page_frag_cache: Remove pfmemalloc bool
From: Alexander Duyck @ 2018-03-22 16:39 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Matthew Wilcox, Netdev, linux-mm, Jesper Dangaard Brouer,
	Eric Dumazet
In-Reply-To: <20180322153157.10447-2-willy@infradead.org>

On Thu, Mar 22, 2018 at 8:31 AM, Matthew Wilcox <willy@infradead.org> wrote:
> From: Matthew Wilcox <mawilcox@microsoft.com>
>
> Save 4/8 bytes by moving the pfmemalloc indicator from its own bool
> to the top bit of pagecnt_bias.  This has no effect on the fastpath
> of the allocator since the pagecnt_bias cannot go negative.  It's
> a couple of extra instructions in the slowpath.
>
> Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>

So I was just thinking about this and it would probably make more
sense to look at addressing this after you take care of your
conversion from size/offset to a mask. One thing with the mask is that
it should never reach 64K since that is the largest page size if I
recall. With that being the case we could look at dropping mask to a
u16 value and then add a u16 flags field where you could store things
like this. Then you could avoid having to do the masking and math you
are having to do below.

> ---
>  include/linux/mm_types.h | 4 +++-
>  mm/page_alloc.c          | 8 +++++---
>  net/core/skbuff.c        | 5 ++---
>  3 files changed, 10 insertions(+), 7 deletions(-)
>
> diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> index fd1af6b9591d..a63b138ad1a4 100644
> --- a/include/linux/mm_types.h
> +++ b/include/linux/mm_types.h
> @@ -218,6 +218,7 @@ struct page {
>
>  #define PAGE_FRAG_CACHE_MAX_SIZE       __ALIGN_MASK(32768, ~PAGE_MASK)
>  #define PAGE_FRAG_CACHE_MAX_ORDER      get_order(PAGE_FRAG_CACHE_MAX_SIZE)
> +#define PFC_MEMALLOC                   (1U << 31)
>
>  struct page_frag_cache {
>         void * va;
> @@ -231,9 +232,10 @@ struct page_frag_cache {
>          * containing page->_refcount every time we allocate a fragment.
>          */
>         unsigned int            pagecnt_bias;
> -       bool pfmemalloc;
>  };
>
> +#define page_frag_cache_pfmemalloc(pfc)        ((pfc)->pagecnt_bias & PFC_MEMALLOC)
> +
>  typedef unsigned long vm_flags_t;
>
>  /*
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 635d7dd29d7f..61366f23e8c8 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -4395,16 +4395,18 @@ void *page_frag_alloc(struct page_frag_cache *nc,
>                 page_ref_add(page, size - 1);
>
>                 /* reset page count bias and offset to start of new frag */
> -               nc->pfmemalloc = page_is_pfmemalloc(page);
>                 nc->pagecnt_bias = size;
> +               if (page_is_pfmemalloc(page))
> +                       nc->pagecnt_bias |= PFC_MEMALLOC;
>                 nc->offset = size;
>         }
>
>         offset = nc->offset - fragsz;
>         if (unlikely(offset < 0)) {
> +               unsigned int pagecnt_bias = nc->pagecnt_bias & ~PFC_MEMALLOC;
>                 page = virt_to_page(nc->va);
>
> -               if (!page_ref_sub_and_test(page, nc->pagecnt_bias))
> +               if (!page_ref_sub_and_test(page, pagecnt_bias))
>                         goto refill;
>
>  #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
> @@ -4415,7 +4417,7 @@ void *page_frag_alloc(struct page_frag_cache *nc,
>                 set_page_count(page, size);
>
>                 /* reset page count bias and offset to start of new frag */
> -               nc->pagecnt_bias = size;
> +               nc->pagecnt_bias = size | (nc->pagecnt_bias - pagecnt_bias);
>                 offset = size - fragsz;
>         }
>
> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> index 0bb0d8877954..54bbde8f7541 100644
> --- a/net/core/skbuff.c
> +++ b/net/core/skbuff.c
> @@ -412,7 +412,7 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len,
>
>         nc = this_cpu_ptr(&netdev_alloc_cache);
>         data = page_frag_alloc(nc, len, gfp_mask);
> -       pfmemalloc = nc->pfmemalloc;
> +       pfmemalloc = page_frag_cache_pfmemalloc(nc);
>
>         local_irq_restore(flags);
>
> @@ -485,8 +485,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
>                 return NULL;
>         }
>
> -       /* use OR instead of assignment to avoid clearing of bits in mask */
> -       if (nc->page.pfmemalloc)
> +       if (page_frag_cache_pfmemalloc(&nc->page))
>                 skb->pfmemalloc = 1;
>         skb->head_frag = 1;
>
> --
> 2.16.2
>

^ permalink raw reply

* Re: [bpf-next V4 PATCH 09/15] mlx5: register a memory model when XDP is enabled
From: Tariq Toukan @ 2018-03-22 16:37 UTC (permalink / raw)
  To: Jesper Dangaard Brouer, netdev, BjörnTöpel,
	magnus.karlsson
  Cc: eugenia, Jason Wang, John Fastabend, Eran Ben Elisha,
	Saeed Mahameed, galp, Daniel Borkmann, Alexei Starovoitov,
	Tariq Toukan
In-Reply-To: <152172851384.20979.8822446808531686268.stgit@firesoul>



On 22/03/2018 4:21 PM, Jesper Dangaard Brouer wrote:
> Now all the users of ndo_xdp_xmit have been converted to use xdp_return_frame.
> This enable a different memory model, thus activating another code path
> in the xdp_return_frame API.
> 
> V2: Fixed issues pointed out by Tariq.
> 
> Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
> ---
>   drivers/net/ethernet/mellanox/mlx5/core/en_main.c |    8 ++++++++
>   1 file changed, 8 insertions(+)
> 
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> index da94c8cba5ee..2e4ca0f15b62 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> @@ -506,6 +506,14 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
>   		rq->mkey_be = c->mkey_be;
>   	}
>   
> +	/* This must only be activate for order-0 pages */
> +	if (rq->xdp_prog) {
> +		err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
> +						 MEM_TYPE_PAGE_ORDER0, NULL);
> +		if (err)
> +			goto err_rq_wq_destroy;
> +	}
> +
>   	for (i = 0; i < wq_sz; i++) {
>   		struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i);
>   
> 

Reviewed-by: Tariq Toukan <tariqt@mellanox.com>

^ permalink raw reply

* Re: [PATCH v3 net-next] selftests: Add multipath tests for onlink flag
From: David Miller @ 2018-03-22 16:37 UTC (permalink / raw)
  To: dsahern; +Cc: netdev
In-Reply-To: <20180320170430.31543-1-dsahern@gmail.com>

From: David Ahern <dsahern@gmail.com>
Date: Tue, 20 Mar 2018 10:04:30 -0700

> Add multipath tests for onlink flag: one test with onlink added to
> both nexthops, then tests with onlink added to only 1 nexthop.
> 
> Signed-off-by: David Ahern <dsahern@gmail.com>
> ---
> v3
> - no change; resend because of patchworks shows v2 as superseded
> 
> v2
> - add ipv6 test for onlink flag set globally as well as per-nexthop

Applied, thanks David.

So great to see all of these new tests lately!

^ permalink raw reply

* Re: [bpf-next V4 PATCH 01/15] mlx5: basic XDP_REDIRECT forward support
From: Tariq Toukan @ 2018-03-22 16:36 UTC (permalink / raw)
  To: Jesper Dangaard Brouer, netdev, BjörnTöpel,
	magnus.karlsson
  Cc: eugenia, Jason Wang, John Fastabend, Eran Ben Elisha,
	Saeed Mahameed, galp, Daniel Borkmann, Alexei Starovoitov,
	Tariq Toukan
In-Reply-To: <152172847318.20979.18419058830882883814.stgit@firesoul>



On 22/03/2018 4:21 PM, Jesper Dangaard Brouer wrote:
> This implements basic XDP redirect support in mlx5 driver.
> 
> Notice that the ndo_xdp_xmit() is NOT implemented, because that API
> need some changes that this patchset is working towards.
> 
> The main purpose of this patch is have different drivers doing
> XDP_REDIRECT to show how different memory models behave in a cross
> driver world.
> 
> Update(pre-RFCv2 Tariq): Need to DMA unmap page before xdp_do_redirect,
> as the return API does not exist yet to to keep this mapped.
> 
> Update(pre-RFCv3 Saeed): Don't mix XDP_TX and XDP_REDIRECT flushing,
> introduce xdpsq.db.redirect_flush boolian.
> 
> Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
> ---
>   drivers/net/ethernet/mellanox/mlx5/core/en.h    |    1 +
>   drivers/net/ethernet/mellanox/mlx5/core/en_rx.c |   27 ++++++++++++++++++++---
>   2 files changed, 25 insertions(+), 3 deletions(-)
> 

Reviewed-by: Tariq Toukan <tariqt@mellanox.com>

^ permalink raw reply

* Re: [PATCH net] ppp: avoid loop in xmit recursion detection code
From: David Miller @ 2018-03-22 16:36 UTC (permalink / raw)
  To: g.nault; +Cc: netdev, paulus, xuheng333, xeb, stephen
In-Reply-To: <a3460dc8e9e324a6e2fbfeadd359eb5da8c248ac.1521560762.git.g.nault@alphalink.fr>

From: Guillaume Nault <g.nault@alphalink.fr>
Date: Tue, 20 Mar 2018 16:49:26 +0100

> We already detect situations where a PPP channel sends packets back to
> its upper PPP device. While this is enough to avoid deadlocking on xmit
> locks, this doesn't prevent packets from looping between the channel
> and the unit.
> 
> The problem is that ppp_start_xmit() enqueues packets in ppp->file.xq
> before checking for xmit recursion. Therefore, __ppp_xmit_process()
> might dequeue a packet from ppp->file.xq and send it on the channel
> which, in turn, loops it back on the unit. Then ppp_start_xmit()
> queues the packet back to ppp->file.xq and __ppp_xmit_process() picks
> it up and sends it again through the channel. Therefore, the packet
> will loop between __ppp_xmit_process() and ppp_start_xmit() until some
> other part of the xmit path drops it.
> 
> For L2TP, we rapidly fill the skb's headroom and pppol2tp_xmit() drops
> the packet after a few iterations. But PPTP reallocates the headroom
> if necessary, letting the loop run and exhaust the machine resources
> (as reported in https://bugzilla.kernel.org/show_bug.cgi?id=199109).
> 
> Fix this by letting __ppp_xmit_process() enqueue the skb to
> ppp->file.xq, so that we can check for recursion before adding it to
> the queue. Now ppp_xmit_process() can drop the packet when recursion is
> detected.
> 
> __ppp_channel_push() is a bit special. It calls __ppp_xmit_process()
> without having any actual packet to send. This is used by
> ppp_output_wakeup() to re-enable transmission on the parent unit (for
> implementations like ppp_async.c, where the .start_xmit() function
> might not consume the skb, leaving it in ppp->xmit_pending and
> disabling transmission).
> Therefore, __ppp_xmit_process() needs to handle the case where skb is
> NULL, dequeuing as many packets as possible from ppp->file.xq.
> 
> Reported-by: xu heng <xuheng333@zoho.com>
> Fixes: 55454a565836 ("ppp: avoid dealock on recursive xmit")
> Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>

Applied and queued up for -stable, thank you.

^ permalink raw reply

* Re: [PATCH v2 6/8] page_frag_cache: Use a mask instead of offset
From: Alexander Duyck @ 2018-03-22 16:22 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Matthew Wilcox, Netdev, linux-mm, Jesper Dangaard Brouer,
	Eric Dumazet
In-Reply-To: <20180322153157.10447-7-willy@infradead.org>

On Thu, Mar 22, 2018 at 8:31 AM, Matthew Wilcox <willy@infradead.org> wrote:
> From: Matthew Wilcox <mawilcox@microsoft.com>
>
> By combining 'va' and 'offset' into 'addr' and using a mask instead,
> we can save a compare-and-branch in the fast-path of the allocator.
> This removes 4 instructions on x86 (both 32 and 64 bit).

What is the point of renaming "va"? I'm seeing a lot of unneeded
renaming in these patches that doesn't really seem needed and is just
making things harder to review.

> We can avoid storing the mask at all if we know that we're only allocating
> a single page.  This shrinks page_frag_cache from 12 to 8 bytes on 32-bit
> CONFIG_BASE_SMALL build.
>
> Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>

So I am not really a fan of CONFIG_BASE_SMALL in general, so
advertising gains in size is just going back down the reducing size at
the expense of performance train of thought.

Do we know for certain that a higher order page is always aligned to
the size of the higher order page itself? That is one thing I have
never been certain about. I know for example there are head and tail
pages so I was never certain if it was possible to create a higher
order page that is not aligned to to the size of the page itself.

If we can get away with making that assumption then yes I would say
this is probably an optimization, though I think we would be better
off without the CONFIG_BASE_SMALL bits.

> ---
>  include/linux/mm_types.h | 12 +++++++-----
>  mm/page_alloc.c          | 40 +++++++++++++++-------------------------
>  2 files changed, 22 insertions(+), 30 deletions(-)
>
> diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> index 0defff9e3c0e..ebe93edec752 100644
> --- a/include/linux/mm_types.h
> +++ b/include/linux/mm_types.h
> @@ -225,12 +225,9 @@ struct page {
>  #define PFC_MEMALLOC                   (1U << 31)
>
>  struct page_frag_cache {
> -       void * va;
> +       void *addr;
>  #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
> -       __u16 offset;
> -       __u16 size;
> -#else
> -       __u32 offset;
> +       unsigned int mask;

So this is just an akward layout. You now have essentially:
#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
#else
    unsigned int mask;
#endif

>  #endif
>         /* we maintain a pagecount bias, so that we dont dirty cache line
>          * containing page->_refcount every time we allocate a fragment.
> @@ -239,6 +236,11 @@ struct page_frag_cache {
>  };
>
>  #define page_frag_cache_pfmemalloc(pfc)        ((pfc)->pagecnt_bias & PFC_MEMALLOC)
> +#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
> +#define page_frag_cache_mask(pfc)      (pfc)->mask
> +#else
> +#define page_frag_cache_mask(pfc)      (~PAGE_MASK)
> +#endif
>
>  typedef unsigned long vm_flags_t;
>
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 5a2e3e293079..d15a5348a8e4 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -4336,22 +4336,19 @@ EXPORT_SYMBOL(free_pages);
>   * drivers to provide a backing region of memory for use as either an
>   * sk_buff->head, or to be used in the "frags" portion of skb_shared_info.
>   */
> -static struct page *__page_frag_cache_refill(struct page_frag_cache *pfc,
> +static void *__page_frag_cache_refill(struct page_frag_cache *pfc,
>                                              gfp_t gfp_mask)
>  {
>         unsigned int size = PAGE_SIZE;
>         struct page *page = NULL;
> -       struct page *old = pfc->va ? virt_to_page(pfc->va) : NULL;
> +       struct page *old = pfc->addr ? virt_to_head_page(pfc->addr) : NULL;
>         gfp_t gfp = gfp_mask;
>         unsigned int pagecnt_bias = pfc->pagecnt_bias & ~PFC_MEMALLOC;
>
>         /* If all allocations have been freed, we can reuse this page */
>         if (old && page_ref_sub_and_test(old, pagecnt_bias)) {
>                 page = old;
> -#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
> -               /* if size can vary use size else just use PAGE_SIZE */
> -               size = pfc->size;
> -#endif
> +               size = page_frag_cache_mask(pfc) + 1;
>                 /* Page count is 0, we can safely set it */
>                 set_page_count(page, size);
>                 goto reset;
> @@ -4364,27 +4361,24 @@ static struct page *__page_frag_cache_refill(struct page_frag_cache *pfc,
>                                 PAGE_FRAG_CACHE_MAX_ORDER);PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
>         if (page)
>                 size = PAGE_FRAG_CACHE_MAX_SIZE;
> -       pfc->size = size;
> +       pfc->mask = size - 1;
>  #endif
>         if (unlikely(!page))
>                 page = alloc_pages_node(NUMA_NO_NODE, gfp, 0);
>         if (!page) {
> -               pfc->va = NULL;
> +               pfc->addr = NULL;
>                 return NULL;
>         }
>
> -       pfc->va = page_address(page);
> -
>         /* Using atomic_set() would break get_page_unless_zero() users. */
>         page_ref_add(page, size - 1);

You could just use the pfc->mask here instead of size - 1 just to
avoid having to do the subtraction more than once assuming the
compiler doesn't optimize it.

>  reset:
> -       /* reset page count bias and offset to start of new frag */
>         pfc->pagecnt_bias = size;
>         if (page_is_pfmemalloc(page))
>                 pfc->pagecnt_bias |= PFC_MEMALLOC;
> -       pfc->offset = size;
> +       pfc->addr = page_address(page) + size;
>
> -       return page;
> +       return pfc->addr;
>  }
>
>  void __page_frag_cache_drain(struct page *page, unsigned int count)
> @@ -4405,24 +4399,20 @@ EXPORT_SYMBOL(__page_frag_cache_drain);
>  void *page_frag_alloc(struct page_frag_cache *pfc,
>                       unsigned int size, gfp_t gfp_mask)
>  {
> -       struct page *page;
> -       int offset;
> +       void *addr = pfc->addr;
> +       unsigned int offset = (unsigned long)addr & page_frag_cache_mask(pfc);
>
> -       if (unlikely(!pfc->va)) {
> -refill:
> -               page = __page_frag_cache_refill(pfc, gfp_mask);
> -               if (!page)
> +       if (unlikely(offset < size)) {
> +               addr = __page_frag_cache_refill(pfc, gfp_mask);
> +               if (!addr)
>                         return NULL;
>         }
>
> -       offset = pfc->offset - size;
> -       if (unlikely(offset < 0))
> -               goto refill;
> -
> +       addr -= size;
> +       pfc->addr = addr;
>         pfc->pagecnt_bias--;
> -       pfc->offset = offset;
>
> -       return pfc->va + offset;
> +       return addr;
>  }
>  EXPORT_SYMBOL(page_frag_alloc);
>
> --
> 2.16.2
>

^ permalink raw reply

* Re: [PATCH net] ipv6: sr: fix scheduling in RCU when creating seg6 lwtunnel state
From: David Miller @ 2018-03-22 16:22 UTC (permalink / raw)
  To: eric.dumazet; +Cc: dav.lebrun, netdev, dlebrun, roopa
In-Reply-To: <61d63ddf-eefa-3eca-25b4-51e2c9db2fc7@gmail.com>

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 20 Mar 2018 10:20:15 -0700

> 
> 
> On 03/20/2018 10:11 AM, David Lebrun wrote:
>> On 20/03/18 15:07, Eric Dumazet wrote:
>>> This is not the proper fix.
>>>
>>> Control path holds RTNL and can sleeep if needed.
>>>
>>> RCU should be avoided in lwtunnel_build_state()
>>>
>> 
>> +Roopa
>> 
>> In lwtunnel_build_state(), the RCU protects the lwtunnel_encap_ops "ops" which is rcu-dereferenced. Moreover, the lwtunnel_state_alloc() function, which is used in all build_state functions, also uses GFP_ATOMIC, so this seemed a proper fix, or at least proper mitigation.
>> 
>> Do you suggest that the lwtunnel_encap_ops can be protected in a different way, not requiring RCU ?
> 
> Yes, GFP_ATOMIC might be an 'easy fix' for net tree,
> but for the future, GFP_KERNEL allocations make more sense in control path.

Agreed.

I'll apply this to net and queue it up for -stable, but long term making
this able to use GFP_KERNEL properly is the real way to go about it.

^ permalink raw reply

* Re: [PATCH v2 bpf-next 4/8] tracepoint: compute num_args at build time
From: Steven Rostedt @ 2018-03-22 16:14 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Linus Torvalds, David Miller, Daniel Borkmann, Peter Zijlstra,
	Network Development, kernel-team, Linux API
In-Reply-To: <f630fbce-dbe1-d0c3-072d-47215d5afaf3@fb.com>

On Thu, 22 Mar 2018 08:51:16 -0700
Alexei Starovoitov <ast@fb.com> wrote:

> So I definitely support the idea of build time warn for large
> number of args.

I'm more for a build time error for large number of args.

-- Steve

^ permalink raw reply

* Re: [PATCH 1/2] bpf: Remove struct bpf_verifier_env argument from print_bpf_insn
From: Daniel Borkmann @ 2018-03-22 16:07 UTC (permalink / raw)
  To: Jiri Olsa, Quentin Monnet; +Cc: Jiri Olsa, Alexei Starovoitov, lkml, netdev
In-Reply-To: <20180322155723.GD3206@krava>

On 03/22/2018 04:57 PM, Jiri Olsa wrote:
> On Thu, Mar 22, 2018 at 03:35:42PM +0000, Quentin Monnet wrote:
>> 2018-03-22 14:32 UTC+0100 ~ Jiri Olsa <jolsa@redhat.com>
>>> On Thu, Mar 22, 2018 at 10:34:18AM +0100, Daniel Borkmann wrote:
>>>> On 03/21/2018 07:37 PM, Jiri Olsa wrote:
>>>>> On Wed, Mar 21, 2018 at 05:25:33PM +0000, Quentin Monnet wrote:
>>>>>> 2018-03-21 16:02 UTC+0100 ~ Jiri Olsa <jolsa@kernel.org>
>>>>>>> We use print_bpf_insn in user space (bpftool and soon perf),
>>>>>>> so it'd be nice to keep it generic and strip it off the kernel
>>>>>>> struct bpf_verifier_env argument.
>>>>>>>
>>>>>>> This argument can be safely removed, because its users can
>>>>>>> use the struct bpf_insn_cbs::private_data to pass it.
>>>>>>>
>>>>>>> Signed-off-by: Jiri Olsa <jolsa@kernel.org>
>>>>>>> ---
>>>>>>>  kernel/bpf/disasm.c   | 52 +++++++++++++++++++++++++--------------------------
>>>>>>>  kernel/bpf/disasm.h   |  5 +----
>>>>>>>  kernel/bpf/verifier.c |  6 +++---
>>>>>>>  3 files changed, 30 insertions(+), 33 deletions(-)
>>>>>>
>>>>>> [...]
>>>>>>
>>>>>>> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
>>>>>>> index c6eff108aa99..9f27d3fa7259 100644
>>>>>>> --- a/kernel/bpf/verifier.c
>>>>>>> +++ b/kernel/bpf/verifier.c
>>>>>>> @@ -202,8 +202,7 @@ EXPORT_SYMBOL_GPL(bpf_verifier_log_write);
>>>>>>>   * generic for symbol export. The function was renamed, but not the calls in
>>>>>>>   * the verifier to avoid complicating backports. Hence the alias below.
>>>>>>>   */
>>>>>>> -static __printf(2, 3) void verbose(struct bpf_verifier_env *env,
>>>>>>> -				   const char *fmt, ...)
>>>>>>> +static __printf(2, 3) void verbose(void *private_data, const char *fmt, ...)
>>>>>>>  	__attribute__((alias("bpf_verifier_log_write")));
>>>>>>
>>>>>> Just as a note, verbose() will be aliased to a function whose prototype
>>>>>> differs (bpf_verifier_log_write() still expects a struct
>>>>>> bpf_verifier_env as its first argument). I am not so familiar with
>>>>>> function aliases, could this change be a concern?
>>>>>
>>>>> yea, but as it was pointer for pointer switch I did not
>>>>> see any problem with that.. I'll check more
>>>>
>>>> Ok, holding off for now until we have clarification. Other option could also
>>>> be to make it void *private_data everywhere and for the kernel writer then
>>>> do struct bpf_verifier_env *env = private_data.
>>>
>>> can't find much info about the alias behaviour for this
>>> case.. so how about having separate function for the
>>> print_cb like below.. I still need to test it
>>
>> I have nothing against splitting the function. This is a solution I
>> considered for verbose() and bpf_verifier_log_write(), before switching
>> to function alias (on Daniel's suggestion) because the code would be
>> identical for the two separate functions at that time. But with your
>> change they would not have the same signature anymore, so it sounds
>> reasonable. Just a note below...
>>
>>>
>>> thanks,
>>> jirka
>>>
>>>
>>> ---
>>>  kernel/bpf/disasm.c   | 52 +++++++++++++++++++++++++--------------------------
>>>  kernel/bpf/disasm.h   |  5 +----
>>>  kernel/bpf/verifier.c | 41 +++++++++++++++++++++++++++++-----------
>>>  3 files changed, 57 insertions(+), 41 deletions(-)
>>>
>>
>> [...]
>>
>>> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
>>> index e9f7c20691c1..69bf7590877c 100644
>>> --- a/kernel/bpf/verifier.c
>>> +++ b/kernel/bpf/verifier.c
>>> @@ -168,23 +168,16 @@ struct bpf_call_arg_meta {
>>>  
>>>  static DEFINE_MUTEX(bpf_verifier_lock);
>>>  
>>> -/* log_level controls verbosity level of eBPF verifier.
>>> - * bpf_verifier_log_write() is used to dump the verification trace to the log,
>>> - * so the user can figure out what's wrong with the program
>>> - */
>>> -__printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
>>> -					   const char *fmt, ...)
>>> +static void log_write(struct bpf_verifier_env *env, const char *fmt,
>>> +		      va_list args)
>>>  {
>>>  	struct bpf_verifer_log *log = &env->log;
>>>  	unsigned int n;
>>> -	va_list args;
>>>  
>>>  	if (!log->level || !log->ubuf || bpf_verifier_log_full(log))
>>>  		return;
>>>  
>>> -	va_start(args, fmt);
>>>  	n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args);
>>> -	va_end(args);
>>>  
>>>  	WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1,
>>>  		  "verifier log line truncated - local buffer too short\n");
>>> @@ -197,7 +190,32 @@ __printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
>>>  	else
>>>  		log->ubuf = NULL;
>>>  }
>>> +
>>> +/* log_level controls verbosity level of eBPF verifier.
>>> + * bpf_verifier_log_write() is used to dump the verification trace to the log,
>>> + * so the user can figure out what's wrong with the program
>>> + */
>>> +__printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
>>> +					   const char *fmt, ...)
>>> +{
>>> +	va_list args;
>>> +
>>> +	va_start(args, fmt);
>>> +	log_write(env, fmt, args);
>>> +	va_end(args);
>>> +}
>>>  EXPORT_SYMBOL_GPL(bpf_verifier_log_write);
>>> +
>>> +__printf(2, 3) static void print_ins(void *private_data,
>>> +				     const char *fmt, ...)
>>
>> Unless I am mistaken, you could just call this one "verbose()" and
>> simply remove the function alias?
> 
> right you are ;-) I haven't realized that struct bpf_verifier_env *env
> will cleanly cast to void * 
> 
> new version attached.. I'll make some tests and send full patch

When you do so, please make sure to send a full fresh series with the two
patches and also cover letter included, otherwise it's more fragile wrt
potentially applying the wrong patch from one of the replies. :-)

Thanks,
Daniel

^ permalink raw reply

* Re: [PATCH v2 net 0/7] Aquantia atlantic hot fixes 03-2018
From: David Miller @ 2018-03-22 16:03 UTC (permalink / raw)
  To: igor.russkikh; +Cc: netdev, darcari, pavel.belous
In-Reply-To: <cover.1521544563.git.igor.russkikh@aquantia.com>

From: Igor Russkikh <igor.russkikh@aquantia.com>
Date: Tue, 20 Mar 2018 14:40:30 +0300

> This is a set of atlantic driver hot fixes for various areas:
> 
> Some issues with hardware reset covered,
> Fixed napi_poll flood happening on some traffic conditions,
> Allow system to change MAC address on live device,
> Add pci shutdown handler.
> 
> patch v2:
> - reverse christmas tree
> - remove driver private parameter, replacing it with define.

Series applied, thank you.

^ permalink raw reply

* Re: [PATCH v3 net-next 0/5] Add support for RDMA enhancements in cxgb4
From: David Miller @ 2018-03-22 16:00 UTC (permalink / raw)
  To: rajur; +Cc: netdev, nirranjan, indranil, venkatesh, swise, bharat
In-Reply-To: <20180320101142.20102-1-rajur@chelsio.com>

From: Raju Rangoju <rajur@chelsio.com>
Date: Tue, 20 Mar 2018 15:41:37 +0530

> Allocates the HW-resources and provide the necessary routines for the
> upper layer driver (rdma/iw_cxgb4) to enable the RDMA SRQ support for Chelsio adapters.
> 
> Advertise support for write with immediate work request
> Advertise support for write with completion
> 
> v3: modified memory allocation as per Stefano's suggestion
> 
> v2: fixed the patching issues and also
>     fixed the following based on review comments of Stefano Brivio
>  - using kvzalloc instead of vzalloc
>  - using #define instead of enum

Series applied, thank you.

^ permalink raw reply

* Re: [PATCH 1/2] bpf: Remove struct bpf_verifier_env argument from print_bpf_insn
From: Jiri Olsa @ 2018-03-22 15:57 UTC (permalink / raw)
  To: Quentin Monnet
  Cc: Daniel Borkmann, Jiri Olsa, Alexei Starovoitov, lkml, netdev
In-Reply-To: <efa1cd74-12f4-2ac6-cc9a-50669b4b69de@netronome.com>

On Thu, Mar 22, 2018 at 03:35:42PM +0000, Quentin Monnet wrote:
> 2018-03-22 14:32 UTC+0100 ~ Jiri Olsa <jolsa@redhat.com>
> > On Thu, Mar 22, 2018 at 10:34:18AM +0100, Daniel Borkmann wrote:
> >> On 03/21/2018 07:37 PM, Jiri Olsa wrote:
> >>> On Wed, Mar 21, 2018 at 05:25:33PM +0000, Quentin Monnet wrote:
> >>>> 2018-03-21 16:02 UTC+0100 ~ Jiri Olsa <jolsa@kernel.org>
> >>>>> We use print_bpf_insn in user space (bpftool and soon perf),
> >>>>> so it'd be nice to keep it generic and strip it off the kernel
> >>>>> struct bpf_verifier_env argument.
> >>>>>
> >>>>> This argument can be safely removed, because its users can
> >>>>> use the struct bpf_insn_cbs::private_data to pass it.
> >>>>>
> >>>>> Signed-off-by: Jiri Olsa <jolsa@kernel.org>
> >>>>> ---
> >>>>>  kernel/bpf/disasm.c   | 52 +++++++++++++++++++++++++--------------------------
> >>>>>  kernel/bpf/disasm.h   |  5 +----
> >>>>>  kernel/bpf/verifier.c |  6 +++---
> >>>>>  3 files changed, 30 insertions(+), 33 deletions(-)
> >>>>
> >>>> [...]
> >>>>
> >>>>> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> >>>>> index c6eff108aa99..9f27d3fa7259 100644
> >>>>> --- a/kernel/bpf/verifier.c
> >>>>> +++ b/kernel/bpf/verifier.c
> >>>>> @@ -202,8 +202,7 @@ EXPORT_SYMBOL_GPL(bpf_verifier_log_write);
> >>>>>   * generic for symbol export. The function was renamed, but not the calls in
> >>>>>   * the verifier to avoid complicating backports. Hence the alias below.
> >>>>>   */
> >>>>> -static __printf(2, 3) void verbose(struct bpf_verifier_env *env,
> >>>>> -				   const char *fmt, ...)
> >>>>> +static __printf(2, 3) void verbose(void *private_data, const char *fmt, ...)
> >>>>>  	__attribute__((alias("bpf_verifier_log_write")));
> >>>>
> >>>> Just as a note, verbose() will be aliased to a function whose prototype
> >>>> differs (bpf_verifier_log_write() still expects a struct
> >>>> bpf_verifier_env as its first argument). I am not so familiar with
> >>>> function aliases, could this change be a concern?
> >>>
> >>> yea, but as it was pointer for pointer switch I did not
> >>> see any problem with that.. I'll check more
> >>
> >> Ok, holding off for now until we have clarification. Other option could also
> >> be to make it void *private_data everywhere and for the kernel writer then
> >> do struct bpf_verifier_env *env = private_data.
> > 
> > can't find much info about the alias behaviour for this
> > case.. so how about having separate function for the
> > print_cb like below.. I still need to test it
> 
> I have nothing against splitting the function. This is a solution I
> considered for verbose() and bpf_verifier_log_write(), before switching
> to function alias (on Daniel's suggestion) because the code would be
> identical for the two separate functions at that time. But with your
> change they would not have the same signature anymore, so it sounds
> reasonable. Just a note below...
> 
> > 
> > thanks,
> > jirka
> > 
> > 
> > ---
> >  kernel/bpf/disasm.c   | 52 +++++++++++++++++++++++++--------------------------
> >  kernel/bpf/disasm.h   |  5 +----
> >  kernel/bpf/verifier.c | 41 +++++++++++++++++++++++++++++-----------
> >  3 files changed, 57 insertions(+), 41 deletions(-)
> > 
> 
> [...]
> 
> > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> > index e9f7c20691c1..69bf7590877c 100644
> > --- a/kernel/bpf/verifier.c
> > +++ b/kernel/bpf/verifier.c
> > @@ -168,23 +168,16 @@ struct bpf_call_arg_meta {
> >  
> >  static DEFINE_MUTEX(bpf_verifier_lock);
> >  
> > -/* log_level controls verbosity level of eBPF verifier.
> > - * bpf_verifier_log_write() is used to dump the verification trace to the log,
> > - * so the user can figure out what's wrong with the program
> > - */
> > -__printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
> > -					   const char *fmt, ...)
> > +static void log_write(struct bpf_verifier_env *env, const char *fmt,
> > +		      va_list args)
> >  {
> >  	struct bpf_verifer_log *log = &env->log;
> >  	unsigned int n;
> > -	va_list args;
> >  
> >  	if (!log->level || !log->ubuf || bpf_verifier_log_full(log))
> >  		return;
> >  
> > -	va_start(args, fmt);
> >  	n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args);
> > -	va_end(args);
> >  
> >  	WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1,
> >  		  "verifier log line truncated - local buffer too short\n");
> > @@ -197,7 +190,32 @@ __printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
> >  	else
> >  		log->ubuf = NULL;
> >  }
> > +
> > +/* log_level controls verbosity level of eBPF verifier.
> > + * bpf_verifier_log_write() is used to dump the verification trace to the log,
> > + * so the user can figure out what's wrong with the program
> > + */
> > +__printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
> > +					   const char *fmt, ...)
> > +{
> > +	va_list args;
> > +
> > +	va_start(args, fmt);
> > +	log_write(env, fmt, args);
> > +	va_end(args);
> > +}
> >  EXPORT_SYMBOL_GPL(bpf_verifier_log_write);
> > +
> > +__printf(2, 3) static void print_ins(void *private_data,
> > +				     const char *fmt, ...)
> 
> Unless I am mistaken, you could just call this one "verbose()" and
> simply remove the function alias?

right you are ;-) I haven't realized that struct bpf_verifier_env *env
will cleanly cast to void * 

new version attached.. I'll make some tests and send full patch

jirka


---
diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c
index 8740406df2cd..d6b76377cb6e 100644
--- a/kernel/bpf/disasm.c
+++ b/kernel/bpf/disasm.c
@@ -113,16 +113,16 @@ static const char *const bpf_jmp_string[16] = {
 };
 
 static void print_bpf_end_insn(bpf_insn_print_t verbose,
-			       struct bpf_verifier_env *env,
+			       void *private_data,
 			       const struct bpf_insn *insn)
 {
-	verbose(env, "(%02x) r%d = %s%d r%d\n", insn->code, insn->dst_reg,
+	verbose(private_data, "(%02x) r%d = %s%d r%d\n",
+		insn->code, insn->dst_reg,
 		BPF_SRC(insn->code) == BPF_TO_BE ? "be" : "le",
 		insn->imm, insn->dst_reg);
 }
 
 void print_bpf_insn(const struct bpf_insn_cbs *cbs,
-		    struct bpf_verifier_env *env,
 		    const struct bpf_insn *insn,
 		    bool allow_ptr_leaks)
 {
@@ -132,23 +132,23 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs,
 	if (class == BPF_ALU || class == BPF_ALU64) {
 		if (BPF_OP(insn->code) == BPF_END) {
 			if (class == BPF_ALU64)
-				verbose(env, "BUG_alu64_%02x\n", insn->code);
+				verbose(cbs->private_data, "BUG_alu64_%02x\n", insn->code);
 			else
-				print_bpf_end_insn(verbose, env, insn);
+				print_bpf_end_insn(verbose, cbs->private_data, insn);
 		} else if (BPF_OP(insn->code) == BPF_NEG) {
-			verbose(env, "(%02x) r%d = %s-r%d\n",
+			verbose(cbs->private_data, "(%02x) r%d = %s-r%d\n",
 				insn->code, insn->dst_reg,
 				class == BPF_ALU ? "(u32) " : "",
 				insn->dst_reg);
 		} else if (BPF_SRC(insn->code) == BPF_X) {
-			verbose(env, "(%02x) %sr%d %s %sr%d\n",
+			verbose(cbs->private_data, "(%02x) %sr%d %s %sr%d\n",
 				insn->code, class == BPF_ALU ? "(u32) " : "",
 				insn->dst_reg,
 				bpf_alu_string[BPF_OP(insn->code) >> 4],
 				class == BPF_ALU ? "(u32) " : "",
 				insn->src_reg);
 		} else {
-			verbose(env, "(%02x) %sr%d %s %s%d\n",
+			verbose(cbs->private_data, "(%02x) %sr%d %s %s%d\n",
 				insn->code, class == BPF_ALU ? "(u32) " : "",
 				insn->dst_reg,
 				bpf_alu_string[BPF_OP(insn->code) >> 4],
@@ -157,46 +157,46 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs,
 		}
 	} else if (class == BPF_STX) {
 		if (BPF_MODE(insn->code) == BPF_MEM)
-			verbose(env, "(%02x) *(%s *)(r%d %+d) = r%d\n",
+			verbose(cbs->private_data, "(%02x) *(%s *)(r%d %+d) = r%d\n",
 				insn->code,
 				bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
 				insn->dst_reg,
 				insn->off, insn->src_reg);
 		else if (BPF_MODE(insn->code) == BPF_XADD)
-			verbose(env, "(%02x) lock *(%s *)(r%d %+d) += r%d\n",
+			verbose(cbs->private_data, "(%02x) lock *(%s *)(r%d %+d) += r%d\n",
 				insn->code,
 				bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
 				insn->dst_reg, insn->off,
 				insn->src_reg);
 		else
-			verbose(env, "BUG_%02x\n", insn->code);
+			verbose(cbs->private_data, "BUG_%02x\n", insn->code);
 	} else if (class == BPF_ST) {
 		if (BPF_MODE(insn->code) != BPF_MEM) {
-			verbose(env, "BUG_st_%02x\n", insn->code);
+			verbose(cbs->private_data, "BUG_st_%02x\n", insn->code);
 			return;
 		}
-		verbose(env, "(%02x) *(%s *)(r%d %+d) = %d\n",
+		verbose(cbs->private_data, "(%02x) *(%s *)(r%d %+d) = %d\n",
 			insn->code,
 			bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
 			insn->dst_reg,
 			insn->off, insn->imm);
 	} else if (class == BPF_LDX) {
 		if (BPF_MODE(insn->code) != BPF_MEM) {
-			verbose(env, "BUG_ldx_%02x\n", insn->code);
+			verbose(cbs->private_data, "BUG_ldx_%02x\n", insn->code);
 			return;
 		}
-		verbose(env, "(%02x) r%d = *(%s *)(r%d %+d)\n",
+		verbose(cbs->private_data, "(%02x) r%d = *(%s *)(r%d %+d)\n",
 			insn->code, insn->dst_reg,
 			bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
 			insn->src_reg, insn->off);
 	} else if (class == BPF_LD) {
 		if (BPF_MODE(insn->code) == BPF_ABS) {
-			verbose(env, "(%02x) r0 = *(%s *)skb[%d]\n",
+			verbose(cbs->private_data, "(%02x) r0 = *(%s *)skb[%d]\n",
 				insn->code,
 				bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
 				insn->imm);
 		} else if (BPF_MODE(insn->code) == BPF_IND) {
-			verbose(env, "(%02x) r0 = *(%s *)skb[r%d + %d]\n",
+			verbose(cbs->private_data, "(%02x) r0 = *(%s *)skb[r%d + %d]\n",
 				insn->code,
 				bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
 				insn->src_reg, insn->imm);
@@ -212,12 +212,12 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs,
 			if (map_ptr && !allow_ptr_leaks)
 				imm = 0;
 
-			verbose(env, "(%02x) r%d = %s\n",
+			verbose(cbs->private_data, "(%02x) r%d = %s\n",
 				insn->code, insn->dst_reg,
 				__func_imm_name(cbs, insn, imm,
 						tmp, sizeof(tmp)));
 		} else {
-			verbose(env, "BUG_ld_%02x\n", insn->code);
+			verbose(cbs->private_data, "BUG_ld_%02x\n", insn->code);
 			return;
 		}
 	} else if (class == BPF_JMP) {
@@ -227,35 +227,35 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs,
 			char tmp[64];
 
 			if (insn->src_reg == BPF_PSEUDO_CALL) {
-				verbose(env, "(%02x) call pc%s\n",
+				verbose(cbs->private_data, "(%02x) call pc%s\n",
 					insn->code,
 					__func_get_name(cbs, insn,
 							tmp, sizeof(tmp)));
 			} else {
 				strcpy(tmp, "unknown");
-				verbose(env, "(%02x) call %s#%d\n", insn->code,
+				verbose(cbs->private_data, "(%02x) call %s#%d\n", insn->code,
 					__func_get_name(cbs, insn,
 							tmp, sizeof(tmp)),
 					insn->imm);
 			}
 		} else if (insn->code == (BPF_JMP | BPF_JA)) {
-			verbose(env, "(%02x) goto pc%+d\n",
+			verbose(cbs->private_data, "(%02x) goto pc%+d\n",
 				insn->code, insn->off);
 		} else if (insn->code == (BPF_JMP | BPF_EXIT)) {
-			verbose(env, "(%02x) exit\n", insn->code);
+			verbose(cbs->private_data, "(%02x) exit\n", insn->code);
 		} else if (BPF_SRC(insn->code) == BPF_X) {
-			verbose(env, "(%02x) if r%d %s r%d goto pc%+d\n",
+			verbose(cbs->private_data, "(%02x) if r%d %s r%d goto pc%+d\n",
 				insn->code, insn->dst_reg,
 				bpf_jmp_string[BPF_OP(insn->code) >> 4],
 				insn->src_reg, insn->off);
 		} else {
-			verbose(env, "(%02x) if r%d %s 0x%x goto pc%+d\n",
+			verbose(cbs->private_data, "(%02x) if r%d %s 0x%x goto pc%+d\n",
 				insn->code, insn->dst_reg,
 				bpf_jmp_string[BPF_OP(insn->code) >> 4],
 				insn->imm, insn->off);
 		}
 	} else {
-		verbose(env, "(%02x) %s\n",
+		verbose(cbs->private_data, "(%02x) %s\n",
 			insn->code, bpf_class_string[class]);
 	}
 }
diff --git a/kernel/bpf/disasm.h b/kernel/bpf/disasm.h
index 266fe8ee542b..e1324a834a24 100644
--- a/kernel/bpf/disasm.h
+++ b/kernel/bpf/disasm.h
@@ -22,14 +22,12 @@
 #include <string.h>
 #endif
 
-struct bpf_verifier_env;
-
 extern const char *const bpf_alu_string[16];
 extern const char *const bpf_class_string[8];
 
 const char *func_id_name(int id);
 
-typedef __printf(2, 3) void (*bpf_insn_print_t)(struct bpf_verifier_env *env,
+typedef __printf(2, 3) void (*bpf_insn_print_t)(void *private_data,
 						const char *, ...);
 typedef const char *(*bpf_insn_revmap_call_t)(void *private_data,
 					      const struct bpf_insn *insn);
@@ -45,7 +43,6 @@ struct bpf_insn_cbs {
 };
 
 void print_bpf_insn(const struct bpf_insn_cbs *cbs,
-		    struct bpf_verifier_env *env,
 		    const struct bpf_insn *insn,
 		    bool allow_ptr_leaks);
 #endif
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index e9f7c20691c1..e93a6e48641b 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -168,23 +168,16 @@ struct bpf_call_arg_meta {
 
 static DEFINE_MUTEX(bpf_verifier_lock);
 
-/* log_level controls verbosity level of eBPF verifier.
- * bpf_verifier_log_write() is used to dump the verification trace to the log,
- * so the user can figure out what's wrong with the program
- */
-__printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
-					   const char *fmt, ...)
+static void log_write(struct bpf_verifier_env *env, const char *fmt,
+		      va_list args)
 {
 	struct bpf_verifer_log *log = &env->log;
 	unsigned int n;
-	va_list args;
 
 	if (!log->level || !log->ubuf || bpf_verifier_log_full(log))
 		return;
 
-	va_start(args, fmt);
 	n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args);
-	va_end(args);
 
 	WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1,
 		  "verifier log line truncated - local buffer too short\n");
@@ -197,14 +190,30 @@ __printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
 	else
 		log->ubuf = NULL;
 }
-EXPORT_SYMBOL_GPL(bpf_verifier_log_write);
-/* Historically bpf_verifier_log_write was called verbose, but the name was too
- * generic for symbol export. The function was renamed, but not the calls in
- * the verifier to avoid complicating backports. Hence the alias below.
+
+/* log_level controls verbosity level of eBPF verifier.
+ * bpf_verifier_log_write() is used to dump the verification trace to the log,
+ * so the user can figure out what's wrong with the program
  */
-static __printf(2, 3) void verbose(struct bpf_verifier_env *env,
-				   const char *fmt, ...)
-	__attribute__((alias("bpf_verifier_log_write")));
+__printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
+					   const char *fmt, ...)
+{
+	va_list args;
+
+	va_start(args, fmt);
+	log_write(env, fmt, args);
+	va_end(args);
+}
+EXPORT_SYMBOL_GPL(bpf_verifier_log_write);
+
+__printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
+{
+	va_list args;
+
+	va_start(args, fmt);
+	log_write(private_data, fmt, args);
+	va_end(args);
+}
 
 static bool type_is_pkt_pointer(enum bpf_reg_type type)
 {
@@ -4600,10 +4609,11 @@ static int do_check(struct bpf_verifier_env *env)
 		if (env->log.level) {
 			const struct bpf_insn_cbs cbs = {
 				.cb_print	= verbose,
+				.private_data	= env,
 			};
 
 			verbose(env, "%d: ", insn_idx);
-			print_bpf_insn(&cbs, env, insn, env->allow_ptr_leaks);
+			print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
 		}
 
 		if (bpf_prog_is_dev_bound(env->prog->aux)) {

^ permalink raw reply related

* Re: [PATCH net 0/4] s390/qeth: fixes 2018-03-20
From: David Miller @ 2018-03-22 15:52 UTC (permalink / raw)
  To: jwi; +Cc: netdev, linux-s390, schwidefsky, heiko.carstens, raspl, ubraun
In-Reply-To: <20180320065915.62076-1-jwi@linux.vnet.ibm.com>

From: Julian Wiedmann <jwi@linux.vnet.ibm.com>
Date: Tue, 20 Mar 2018 07:59:11 +0100

> Please apply one final set of qeth patches for 4.16.
> All of these fix long-standing bugs, so please queue them up for -stable
> as well.

Series applied and queued up for -stable, thank you.

^ permalink raw reply

* Re: [PATCH v2 bpf-next 4/8] tracepoint: compute num_args at build time
From: Alexei Starovoitov @ 2018-03-22 15:51 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: Linus Torvalds, David Miller, Daniel Borkmann, Peter Zijlstra,
	Network Development, kernel-team, Linux API
In-Reply-To: <20180322093605.51b067fe@gandalf.local.home>

On 3/22/18 6:36 AM, Steven Rostedt wrote:
> On Wed, 21 Mar 2018 15:05:46 -0700
> Alexei Starovoitov <ast@fb.com> wrote:
>
>> Like the only reason my patch is counting till 17 is because of
>> trace_iwlwifi_dev_ucode_error().
>> The next offenders are using 12 arguments:
>> trace_mc_event()
>> trace_mm_vmscan_lru_shrink_inactive()
>>
>> Clearly not every efficient usage of it:
>>          trace_mm_vmscan_lru_shrink_inactive(pgdat->node_id,
>>                          nr_scanned, nr_reclaimed,
>>                          stat.nr_dirty,  stat.nr_writeback,
>>                          stat.nr_congested, stat.nr_immediate,
>>                          stat.nr_activate, stat.nr_ref_keep,
>>                          stat.nr_unmap_fail,
>>                          sc->priority, file);
>> could have passed &stat instead.
>
> Yes they should have, and if I was on the Cc for that patch, I would
> have yelled at them and told them that's exactly what they needed to do.
>
> Perhaps I should add something to keep any tracepoint from having more
> than 6 arguments. That should force a clean up quickly.

I was hesitant to do anything about iwlwifi_dev_ucode_error's 17 args,
because when the code is in such shape there are likely more
skeletons in the closet.
Turned out 'struct iwl_error_event_table' is defined twice with subtle
different field names and layout. While the same
trace_iwlwifi_dev_ucode_error() is used in what looks like two
different cases. I think I managed to refactor it from 17 args to 4
while keeping all bugs in place, but it really should be a job of
the author of the code to deal with such oddities.
Will send the 'fix' in the next respin.
So I definitely support the idea of build time warn for large
number of args.

^ permalink raw reply

* Re: [patch net-next RFC 00/12] devlink: introduce port flavours and common phys_port_name generation
From: Roopa Prabhu @ 2018-03-22 15:51 UTC (permalink / raw)
  To: David Ahern
  Cc: Jiri Pirko, netdev, David Miller, Ido Schimmel, Jakub Kicinski,
	mlxsw, Andrew Lunn, Vivien Didelot, Florian Fainelli,
	Michael Chan, ganeshgr, Saeed Mahameed, Simon Horman,
	pieter.jansenvanvuuren, John Hurley, Dirk van der Merwe,
	Alexander Duyck, Or Gerlitz, vijaya.guvva, satananda.burla,
	raghu.vatsavayi, felix.manlunas, Andy Gospodarek, sathya.perla,
	vasundhara-v.volam, tariqt, eranbe, Jeff Kirsher
In-Reply-To: <7217d1fb-665f-92cf-2704-364b91cb8248@gmail.com>

On Thu, Mar 22, 2018 at 8:34 AM, David Ahern <dsahern@gmail.com> wrote:
> On 3/22/18 4:55 AM, Jiri Pirko wrote:
>> From: Jiri Pirko <jiri@mellanox.com>
>>
>> This patchset resolves 2 issues we have right now:
>> 1) There are many netdevices / ports in the system, for port, pf, vf
>>    represenatation but the user has no way to see which is which
>> 2) The ndo_get_phys_port_name is implemented in each driver separatelly,
>>    which may lead to inconsistent names between drivers.
>
> Similar to ndo_get_phys_port_{name,id}, devlink requires drivers to opt
> in with an implementation right, so you can't really force a solution to
> the consistent naming.
>
>>
>> This patchset introduces port flavours which should address the first
>> problem. I'm testing this with Netronome nfp hardware. When the user
>> has 2 physical ports, 1 pf, and 4 vfs, he should see something like this:
>> # devlink port
>> pci/0000:05:00.0/0: type eth netdev enp5s0np0 flavour physical number 0
>> pci/0000:05:00.0/268435456: type eth netdev eth0 flavour physical number 0
>> pci/0000:05:00.0/268435460: type eth netdev enp5s0np1 flavour physical number 1
>> pci/0000:05:00.0/536875008: type eth netdev eth2 flavour pf_rep number 536875008
>> pci/0000:05:00.0/536870912: type eth netdev eth1 flavour vf_rep number 0
>> pci/0000:05:00.0/536870976: type eth netdev eth3 flavour vf_rep number 1
>> pci/0000:05:00.0/536871040: type eth netdev eth4 flavour vf_rep number 2
>> pci/0000:05:00.0/536871104: type eth netdev eth5 flavour vf_rep number 3
>
> How about 'kind' instead of flavo{u}r?
>

'kind' is good too...but to not confuse it with logical device
rtnetlink 'kind', maybe 'physical_kind' or 'port_kind' or 'phys_kind'
or any-other-better--prefix-to-kind would be preferable

^ permalink raw reply

* Re: [PATCH net-next 0/4] r8169: series with smaller improvements w/o functional changes
From: David Miller @ 2018-03-22 15:48 UTC (permalink / raw)
  To: hkallweit1; +Cc: nic_swsd, netdev
In-Reply-To: <c151da57-5034-7966-8da9-232aa7e163b3@gmail.com>

From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Tue, 20 Mar 2018 07:38:44 +0100

> This series includes smaller improvements w/o intended functional changes.

Series applied to net-next, thank you.

^ permalink raw reply

* Re: [PATCH v2 0/3] net: phy: Add general dummy stubs for MMD register access
From: David Miller @ 2018-03-22 15:43 UTC (permalink / raw)
  To: haokexin; +Cc: netdev, andrew, f.fainelli, claudiu.manoil
In-Reply-To: <20180320014454.12697-1-haokexin@gmail.com>

From: Kevin Hao <haokexin@gmail.com>
Date: Tue, 20 Mar 2018 09:44:51 +0800

> v2:
> As suggested by Andrew:
>   - Add general dummy stubs
>   - Also use that for the micrel phy
> 
> This patch series fix the Ethernet broken on the mpc8315erdb board introduced
> by commit b6b5e8a69118 ("gianfar: Disable EEE autoneg by default").

Series applied, thank you.

^ permalink raw reply

* Re: [RFC PATCH] net: phy: Added device tree binding for dev-addr and dev-addr code check-up and usage
From: Andrew Lunn @ 2018-03-22 15:43 UTC (permalink / raw)
  To: Vicen?iu Galanopulo; +Cc: netdev
In-Reply-To: <HE1PR0402MB3578B23A4DEB394DECB68876EEAA0@HE1PR0402MB3578.eurprd04.prod.outlook.com>

> Just to make sure I understand. Do you want me to change the
> signature of all of_mdiobus_register_phy(), get_phy_device(),
> get_phy_id() and get_phy_c45_ids() and include the dev_addr
> parameter obtained from the device tree?  (a propagation of this
> parameter across all functions all the way to
> get_phy_c45_devs_in_pkg?)

Humm, i thought about this some more...

When calling mdio_read/mdio_write with C45 addresses, we pack both
parts in addr, and mask in MII_ADDR_C45. Try playing with that idea.
It might be less messy.

      Andrew

^ permalink raw reply

* Re: [PATCH v2 bpf-next 5/8] bpf: introduce BPF_RAW_TRACEPOINT
From: Alexei Starovoitov @ 2018-03-22 15:41 UTC (permalink / raw)
  To: Daniel Borkmann, davem
  Cc: torvalds, peterz, rostedt, netdev, kernel-team, linux-api
In-Reply-To: <580e6dcd-7deb-9fe2-5e37-6ce9ad0593a4@iogearbox.net>

On 3/22/18 2:43 AM, Daniel Borkmann wrote:
> On 03/21/2018 07:54 PM, Alexei Starovoitov wrote:
> [...]
>> @@ -546,6 +556,53 @@ extern void ftrace_profile_free_filter(struct perf_event *event);
>>  void perf_trace_buf_update(void *record, u16 type);
>>  void *perf_trace_buf_alloc(int size, struct pt_regs **regs, int *rctxp);
>>
>> +void bpf_trace_run1(struct bpf_prog *prog, u64 arg1);
>> +void bpf_trace_run2(struct bpf_prog *prog, u64 arg1, u64 arg2);
>> +void bpf_trace_run3(struct bpf_prog *prog, u64 arg1, u64 arg2,
>> +		    u64 arg3);
>> +void bpf_trace_run4(struct bpf_prog *prog, u64 arg1, u64 arg2,
>> +		    u64 arg3, u64 arg4);
>> +void bpf_trace_run5(struct bpf_prog *prog, u64 arg1, u64 arg2,
>> +		    u64 arg3, u64 arg4, u64 arg5);
>> +void bpf_trace_run6(struct bpf_prog *prog, u64 arg1, u64 arg2,
>> +		    u64 arg3, u64 arg4, u64 arg5, u64 arg6);
>> +void bpf_trace_run7(struct bpf_prog *prog, u64 arg1, u64 arg2,
>> +		    u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7);
>> +void bpf_trace_run8(struct bpf_prog *prog, u64 arg1, u64 arg2,
>> +		    u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>> +		    u64 arg8);
>> +void bpf_trace_run9(struct bpf_prog *prog, u64 arg1, u64 arg2,
>> +		    u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>> +		    u64 arg8, u64 arg9);
>> +void bpf_trace_run10(struct bpf_prog *prog, u64 arg1, u64 arg2,
>> +		     u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>> +		     u64 arg8, u64 arg9, u64 arg10);
>> +void bpf_trace_run11(struct bpf_prog *prog, u64 arg1, u64 arg2,
>> +		     u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>> +		     u64 arg8, u64 arg9, u64 arg10, u64 arg11);
>> +void bpf_trace_run12(struct bpf_prog *prog, u64 arg1, u64 arg2,
>> +		     u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>> +		     u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12);
>> +void bpf_trace_run13(struct bpf_prog *prog, u64 arg1, u64 arg2,
>> +		     u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>> +		     u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12,
>> +		     u64 arg13);
>> +void bpf_trace_run14(struct bpf_prog *prog, u64 arg1, u64 arg2,
>> +		     u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>> +		     u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12,
>> +		     u64 arg13, u64 arg14);
>> +void bpf_trace_run15(struct bpf_prog *prog, u64 arg1, u64 arg2,
>> +		     u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>> +		     u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12,
>> +		     u64 arg13, u64 arg14, u64 arg15);
>> +void bpf_trace_run16(struct bpf_prog *prog, u64 arg1, u64 arg2,
>> +		     u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>> +		     u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12,
>> +		     u64 arg13, u64 arg14, u64 arg15, u64 arg16);
>> +void bpf_trace_run17(struct bpf_prog *prog, u64 arg1, u64 arg2,
>> +		     u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>> +		     u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12,
>> +		     u64 arg13, u64 arg14, u64 arg15, u64 arg16, u64 arg17);
>>  void perf_trace_run_bpf_submit(void *raw_data, int size, int rctx,
>>  			       struct trace_event_call *call, u64 count,
>>  			       struct pt_regs *regs, struct hlist_head *head,
> [...]
>> @@ -896,3 +976,206 @@ int perf_event_query_prog_array(struct perf_event *event, void __user *info)
>>
>>  	return ret;
>>  }
>> +
>> +static __always_inline
>> +void __bpf_trace_run(struct bpf_prog *prog, u64 *args)
>> +{
>> +	rcu_read_lock();
>> +	preempt_disable();
>> +	(void) BPF_PROG_RUN(prog, args);
>> +	preempt_enable();
>> +	rcu_read_unlock();
>> +}
>> +
>> +#define EVAL1(FN, X) FN(X)
>> +#define EVAL2(FN, X, Y...) FN(X) EVAL1(FN, Y)
>> +#define EVAL3(FN, X, Y...) FN(X) EVAL2(FN, Y)
>> +#define EVAL4(FN, X, Y...) FN(X) EVAL3(FN, Y)
>> +#define EVAL5(FN, X, Y...) FN(X) EVAL4(FN, Y)
>> +#define EVAL6(FN, X, Y...) FN(X) EVAL5(FN, Y)
>> +
>> +#define COPY(X) args[X - 1] = arg##X;
>> +
>> +void bpf_trace_run1(struct bpf_prog *prog, u64 arg1)
>> +{
>> +	u64 args[1];
>> +
>> +	EVAL1(COPY, 1);
>> +	__bpf_trace_run(prog, args);
>> +}
>> +EXPORT_SYMBOL_GPL(bpf_trace_run1);
>> +void bpf_trace_run2(struct bpf_prog *prog, u64 arg1, u64 arg2)
>> +{
>> +	u64 args[2];
>> +
>> +	EVAL2(COPY, 1, 2);
>> +	__bpf_trace_run(prog, args);
>> +}
>> +EXPORT_SYMBOL_GPL(bpf_trace_run2);
>> +void bpf_trace_run3(struct bpf_prog *prog, u64 arg1, u64 arg2,
>> +		    u64 arg3)
>> +{
>> +	u64 args[3];
>> +
>> +	EVAL3(COPY, 1, 2, 3);
>> +	__bpf_trace_run(prog, args);
>> +}
>> +EXPORT_SYMBOL_GPL(bpf_trace_run3);
>> +void bpf_trace_run4(struct bpf_prog *prog, u64 arg1, u64 arg2,
>> +		    u64 arg3, u64 arg4)
>> +{
>> +	u64 args[4];
>> +
>> +	EVAL4(COPY, 1, 2, 3, 4);
>> +	__bpf_trace_run(prog, args);
>> +}
>> +EXPORT_SYMBOL_GPL(bpf_trace_run4);
>> +void bpf_trace_run5(struct bpf_prog *prog, u64 arg1, u64 arg2,
>> +		    u64 arg3, u64 arg4, u64 arg5)
>> +{
>> +	u64 args[5];
>> +
>> +	EVAL5(COPY, 1, 2, 3, 4, 5);
>> +	__bpf_trace_run(prog, args);
>> +}
>> +EXPORT_SYMBOL_GPL(bpf_trace_run5);
>> +void bpf_trace_run6(struct bpf_prog *prog, u64 arg1, u64 arg2,
>> +		    u64 arg3, u64 arg4, u64 arg5, u64 arg6)
>> +{
>> +	u64 args[6];
>> +
>> +	EVAL6(COPY, 1, 2, 3, 4, 5, 6);
>> +	__bpf_trace_run(prog, args);
>> +}
>> +EXPORT_SYMBOL_GPL(bpf_trace_run6);
>> +void bpf_trace_run7(struct bpf_prog *prog, u64 arg1, u64 arg2,
>> +		    u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7)
>> +{
>> +	u64 args[7];
>> +
>> +	EVAL6(COPY, 1, 2, 3, 4, 5, 6);
>> +	EVAL1(COPY, 7);
>> +	__bpf_trace_run(prog, args);
>> +}
>> +EXPORT_SYMBOL_GPL(bpf_trace_run7);
>> +void bpf_trace_run8(struct bpf_prog *prog, u64 arg1, u64 arg2,
>> +		    u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>> +		    u64 arg8)
>> +{
>> +	u64 args[8];
>> +
>> +	EVAL6(COPY, 1, 2, 3, 4, 5, 6);
>> +	EVAL2(COPY, 7, 8);
>> +	__bpf_trace_run(prog, args);
>> +}
>> +EXPORT_SYMBOL_GPL(bpf_trace_run8);
>> +void bpf_trace_run9(struct bpf_prog *prog, u64 arg1, u64 arg2,
>> +		    u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>> +		    u64 arg8, u64 arg9)
>> +{
>> +	u64 args[9];
>> +
>> +	EVAL6(COPY, 1, 2, 3, 4, 5, 6);
>> +	EVAL3(COPY, 7, 8, 9);
>> +	__bpf_trace_run(prog, args);
>> +}
>> +EXPORT_SYMBOL_GPL(bpf_trace_run9);
>> +void bpf_trace_run10(struct bpf_prog *prog, u64 arg1, u64 arg2,
>> +		     u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>> +		     u64 arg8, u64 arg9, u64 arg10)
>> +{
>> +	u64 args[10];
>> +
>> +	EVAL6(COPY, 1, 2, 3, 4, 5, 6);
>> +	EVAL4(COPY, 7, 8, 9, 10);
>> +	__bpf_trace_run(prog, args);
>> +}
>> +EXPORT_SYMBOL_GPL(bpf_trace_run10);
>> +void bpf_trace_run11(struct bpf_prog *prog, u64 arg1, u64 arg2,
>> +		     u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>> +		     u64 arg8, u64 arg9, u64 arg10, u64 arg11)
>> +{
>> +	u64 args[11];
>> +
>> +	EVAL6(COPY, 1, 2, 3, 4, 5, 6);
>> +	EVAL5(COPY, 7, 8, 9, 10, 11);
>> +	__bpf_trace_run(prog, args);
>> +}
>> +EXPORT_SYMBOL_GPL(bpf_trace_run11);
>> +void bpf_trace_run12(struct bpf_prog *prog, u64 arg1, u64 arg2,
>> +		     u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>> +		     u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12)
>> +{
>> +	u64 args[12];
>> +
>> +	EVAL6(COPY, 1, 2, 3, 4, 5, 6);
>> +	EVAL6(COPY, 7, 8, 9, 10, 11, 12);
>> +	__bpf_trace_run(prog, args);
>> +}
>> +EXPORT_SYMBOL_GPL(bpf_trace_run12);
>> +void bpf_trace_run17(struct bpf_prog *prog, u64 arg1, u64 arg2,
>> +		     u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>> +		     u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12,
>> +		     u64 arg13, u64 arg14, u64 arg15, u64 arg16, u64 arg17)
>> +{
>> +	u64 args[17];
>> +
>> +	EVAL6(COPY, 1, 2, 3, 4, 5, 6);
>> +	EVAL6(COPY, 7, 8, 9, 10, 11, 12);
>> +	EVAL5(COPY, 13, 14, 15, 16, 17);
>> +	__bpf_trace_run(prog, args);
>> +}
>> +EXPORT_SYMBOL_GPL(bpf_trace_run17);
>
> Would be nice if we could generate all these above via macro, e.g. when we define
> a hard upper limit for max number of tracepoint args anyway, so this gets automatically
> adjusted as well. Maybe some of the logic from BPF_CALL_*() macros could be borrowed
> for this purpose.

I've thought about it, but couldn't figure out how to do it.
Suggestions are welcome.
The preprocessor cannot expand a constant N into N statements.
There gotta be something like:
...
#define EVAL5(FN, X, Y...) FN(X) EVAL4(FN, Y)
#define EVAL6(FN, X, Y...) FN(X) EVAL5(FN, Y)
for whatever maximum we will pick.
I picked 6 as a good compromise and used it twice in bpf_trace_run1x()
Similar thing possible for u64 arg1, u64 arg2, ...
but it will be harder to read.
Looking forward what you can come up with.

^ permalink raw reply

* [PATCH v5 2/2] ixgbevf: eliminate duplicate barriers on weakly-ordered archs
From: Sinan Kaya @ 2018-03-22 15:42 UTC (permalink / raw)
  To: jeffrey.t.kirsher
  Cc: netdev, timur, sulrich, linux-arm-msm, linux-arm-kernel,
	Sinan Kaya, intel-wired-lan, linux-kernel
In-Reply-To: <1521733352-25266-1-git-send-email-okaya@codeaurora.org>

Code includes wmb() followed by writel() in multiple places. writel()
already has a barrier on some architectures like arm64.

This ends up CPU observing two barriers back to back before executing the
register write.

Since code already has an explicit barrier call, changing writel() to
writel_relaxed().

Signed-off-by: Sinan Kaya <okaya@codeaurora.org>
---
 drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 815cb1a..eaa930e 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -725,7 +725,7 @@ static void ixgbevf_alloc_rx_buffers(struct ixgbevf_ring *rx_ring,
 		 * such as IA-64).
 		 */
 		wmb();
-		writel(i, rx_ring->tail);
+		writel_relaxed(i, rx_ring->tail);
 	}
 }
 
@@ -1232,7 +1232,7 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
 		 * know there are new descriptors to fetch.
 		 */
 		wmb();
-		writel(xdp_ring->next_to_use, xdp_ring->tail);
+		writel_relaxed(xdp_ring->next_to_use, xdp_ring->tail);
 	}
 
 	u64_stats_update_begin(&rx_ring->syncp);
@@ -4004,7 +4004,7 @@ static void ixgbevf_tx_map(struct ixgbevf_ring *tx_ring,
 	tx_ring->next_to_use = i;
 
 	/* notify HW of packet */
-	writel(i, tx_ring->tail);
+	writel_relaxed(i, tx_ring->tail);
 
 	return;
 dma_error:
-- 
2.7.4

^ permalink raw reply related

* [PATCH v5 1/2] ixgbevf: keep writel() closer to wmb()
From: Sinan Kaya @ 2018-03-22 15:42 UTC (permalink / raw)
  To: jeffrey.t.kirsher
  Cc: netdev, timur, sulrich, linux-arm-msm, linux-arm-kernel,
	Sinan Kaya, intel-wired-lan, linux-kernel

Remove ixgbevf_write_tail() in favor of moving writel() close to
wmb().

Signed-off-by: Sinan Kaya <okaya@codeaurora.org>
---
 drivers/net/ethernet/intel/ixgbevf/ixgbevf.h      | 5 -----
 drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 6 +++---
 2 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
index f712646..f97091d 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
@@ -312,11 +312,6 @@ static inline u16 ixgbevf_desc_unused(struct ixgbevf_ring *ring)
 	return ((ntc > ntu) ? 0 : ring->count) + ntc - ntu - 1;
 }
 
-static inline void ixgbevf_write_tail(struct ixgbevf_ring *ring, u32 value)
-{
-	writel(value, ring->tail);
-}
-
 #define IXGBEVF_RX_DESC(R, i)	\
 	(&(((union ixgbe_adv_rx_desc *)((R)->desc))[i]))
 #define IXGBEVF_TX_DESC(R, i)	\
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 3d9033f..815cb1a 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -725,7 +725,7 @@ static void ixgbevf_alloc_rx_buffers(struct ixgbevf_ring *rx_ring,
 		 * such as IA-64).
 		 */
 		wmb();
-		ixgbevf_write_tail(rx_ring, i);
+		writel(i, rx_ring->tail);
 	}
 }
 
@@ -1232,7 +1232,7 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
 		 * know there are new descriptors to fetch.
 		 */
 		wmb();
-		ixgbevf_write_tail(xdp_ring, xdp_ring->next_to_use);
+		writel(xdp_ring->next_to_use, xdp_ring->tail);
 	}
 
 	u64_stats_update_begin(&rx_ring->syncp);
@@ -4004,7 +4004,7 @@ static void ixgbevf_tx_map(struct ixgbevf_ring *tx_ring,
 	tx_ring->next_to_use = i;
 
 	/* notify HW of packet */
-	ixgbevf_write_tail(tx_ring, i);
+	writel(i, tx_ring->tail);
 
 	return;
 dma_error:
-- 
2.7.4

^ permalink raw reply related

* Re: [PATCH 1/2] bpf: Remove struct bpf_verifier_env argument from print_bpf_insn
From: Quentin Monnet @ 2018-03-22 15:35 UTC (permalink / raw)
  To: Jiri Olsa, Daniel Borkmann; +Cc: Jiri Olsa, Alexei Starovoitov, lkml, netdev
In-Reply-To: <20180322133222.GB3206@krava>

2018-03-22 14:32 UTC+0100 ~ Jiri Olsa <jolsa@redhat.com>
> On Thu, Mar 22, 2018 at 10:34:18AM +0100, Daniel Borkmann wrote:
>> On 03/21/2018 07:37 PM, Jiri Olsa wrote:
>>> On Wed, Mar 21, 2018 at 05:25:33PM +0000, Quentin Monnet wrote:
>>>> 2018-03-21 16:02 UTC+0100 ~ Jiri Olsa <jolsa@kernel.org>
>>>>> We use print_bpf_insn in user space (bpftool and soon perf),
>>>>> so it'd be nice to keep it generic and strip it off the kernel
>>>>> struct bpf_verifier_env argument.
>>>>>
>>>>> This argument can be safely removed, because its users can
>>>>> use the struct bpf_insn_cbs::private_data to pass it.
>>>>>
>>>>> Signed-off-by: Jiri Olsa <jolsa@kernel.org>
>>>>> ---
>>>>>  kernel/bpf/disasm.c   | 52 +++++++++++++++++++++++++--------------------------
>>>>>  kernel/bpf/disasm.h   |  5 +----
>>>>>  kernel/bpf/verifier.c |  6 +++---
>>>>>  3 files changed, 30 insertions(+), 33 deletions(-)
>>>>
>>>> [...]
>>>>
>>>>> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
>>>>> index c6eff108aa99..9f27d3fa7259 100644
>>>>> --- a/kernel/bpf/verifier.c
>>>>> +++ b/kernel/bpf/verifier.c
>>>>> @@ -202,8 +202,7 @@ EXPORT_SYMBOL_GPL(bpf_verifier_log_write);
>>>>>   * generic for symbol export. The function was renamed, but not the calls in
>>>>>   * the verifier to avoid complicating backports. Hence the alias below.
>>>>>   */
>>>>> -static __printf(2, 3) void verbose(struct bpf_verifier_env *env,
>>>>> -				   const char *fmt, ...)
>>>>> +static __printf(2, 3) void verbose(void *private_data, const char *fmt, ...)
>>>>>  	__attribute__((alias("bpf_verifier_log_write")));
>>>>
>>>> Just as a note, verbose() will be aliased to a function whose prototype
>>>> differs (bpf_verifier_log_write() still expects a struct
>>>> bpf_verifier_env as its first argument). I am not so familiar with
>>>> function aliases, could this change be a concern?
>>>
>>> yea, but as it was pointer for pointer switch I did not
>>> see any problem with that.. I'll check more
>>
>> Ok, holding off for now until we have clarification. Other option could also
>> be to make it void *private_data everywhere and for the kernel writer then
>> do struct bpf_verifier_env *env = private_data.
> 
> can't find much info about the alias behaviour for this
> case.. so how about having separate function for the
> print_cb like below.. I still need to test it

I have nothing against splitting the function. This is a solution I
considered for verbose() and bpf_verifier_log_write(), before switching
to function alias (on Daniel's suggestion) because the code would be
identical for the two separate functions at that time. But with your
change they would not have the same signature anymore, so it sounds
reasonable. Just a note below...

> 
> thanks,
> jirka
> 
> 
> ---
>  kernel/bpf/disasm.c   | 52 +++++++++++++++++++++++++--------------------------
>  kernel/bpf/disasm.h   |  5 +----
>  kernel/bpf/verifier.c | 41 +++++++++++++++++++++++++++++-----------
>  3 files changed, 57 insertions(+), 41 deletions(-)
> 

[...]

> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index e9f7c20691c1..69bf7590877c 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -168,23 +168,16 @@ struct bpf_call_arg_meta {
>  
>  static DEFINE_MUTEX(bpf_verifier_lock);
>  
> -/* log_level controls verbosity level of eBPF verifier.
> - * bpf_verifier_log_write() is used to dump the verification trace to the log,
> - * so the user can figure out what's wrong with the program
> - */
> -__printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
> -					   const char *fmt, ...)
> +static void log_write(struct bpf_verifier_env *env, const char *fmt,
> +		      va_list args)
>  {
>  	struct bpf_verifer_log *log = &env->log;
>  	unsigned int n;
> -	va_list args;
>  
>  	if (!log->level || !log->ubuf || bpf_verifier_log_full(log))
>  		return;
>  
> -	va_start(args, fmt);
>  	n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args);
> -	va_end(args);
>  
>  	WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1,
>  		  "verifier log line truncated - local buffer too short\n");
> @@ -197,7 +190,32 @@ __printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
>  	else
>  		log->ubuf = NULL;
>  }
> +
> +/* log_level controls verbosity level of eBPF verifier.
> + * bpf_verifier_log_write() is used to dump the verification trace to the log,
> + * so the user can figure out what's wrong with the program
> + */
> +__printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
> +					   const char *fmt, ...)
> +{
> +	va_list args;
> +
> +	va_start(args, fmt);
> +	log_write(env, fmt, args);
> +	va_end(args);
> +}
>  EXPORT_SYMBOL_GPL(bpf_verifier_log_write);
> +
> +__printf(2, 3) static void print_ins(void *private_data,
> +				     const char *fmt, ...)

Unless I am mistaken, you could just call this one "verbose()" and
simply remove the function alias?

> +{
> +	va_list args;
> +
> +	va_start(args, fmt);
> +	log_write(private_data, fmt, args);
> +	va_end(args);
> +}
> +
>  /* Historically bpf_verifier_log_write was called verbose, but the name was too
>   * generic for symbol export. The function was renamed, but not the calls in
>   * the verifier to avoid complicating backports. Hence the alias below.
> @@ -4599,11 +4617,12 @@ static int do_check(struct bpf_verifier_env *env)
>  
>  		if (env->log.level) {
>  			const struct bpf_insn_cbs cbs = {
> -				.cb_print	= verbose,
> +				.cb_print	= print_ins,
> +				.private_data	= env,
>  			};
>  
>  			verbose(env, "%d: ", insn_idx);
> -			print_bpf_insn(&cbs, env, insn, env->allow_ptr_leaks);
> +			print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
>  		}
>  
>  		if (bpf_prog_is_dev_bound(env->prog->aux)) {
> 

^ permalink raw reply

* Re: [patch net-next RFC 00/12] devlink: introduce port flavours and common phys_port_name generation
From: David Ahern @ 2018-03-22 15:34 UTC (permalink / raw)
  To: Jiri Pirko, netdev
  Cc: davem, idosch, jakub.kicinski, mlxsw, andrew, vivien.didelot,
	f.fainelli, michael.chan, ganeshgr, saeedm, simon.horman,
	pieter.jansenvanvuuren, john.hurley, dirk.vandermerwe,
	alexander.h.duyck, ogerlitz, vijaya.guvva, satananda.burla,
	raghu.vatsavayi, felix.manlunas, gospo, sathya.perla,
	vasundhara-v.volam, tariqt, eranbe, jeffrey.t.kirsher
In-Reply-To: <20180322105522.8186-1-jiri@resnulli.us>

On 3/22/18 4:55 AM, Jiri Pirko wrote:
> From: Jiri Pirko <jiri@mellanox.com>
> 
> This patchset resolves 2 issues we have right now:
> 1) There are many netdevices / ports in the system, for port, pf, vf
>    represenatation but the user has no way to see which is which
> 2) The ndo_get_phys_port_name is implemented in each driver separatelly,
>    which may lead to inconsistent names between drivers.

Similar to ndo_get_phys_port_{name,id}, devlink requires drivers to opt
in with an implementation right, so you can't really force a solution to
the consistent naming.

> 
> This patchset introduces port flavours which should address the first
> problem. I'm testing this with Netronome nfp hardware. When the user
> has 2 physical ports, 1 pf, and 4 vfs, he should see something like this:
> # devlink port
> pci/0000:05:00.0/0: type eth netdev enp5s0np0 flavour physical number 0
> pci/0000:05:00.0/268435456: type eth netdev eth0 flavour physical number 0
> pci/0000:05:00.0/268435460: type eth netdev enp5s0np1 flavour physical number 1
> pci/0000:05:00.0/536875008: type eth netdev eth2 flavour pf_rep number 536875008
> pci/0000:05:00.0/536870912: type eth netdev eth1 flavour vf_rep number 0
> pci/0000:05:00.0/536870976: type eth netdev eth3 flavour vf_rep number 1
> pci/0000:05:00.0/536871040: type eth netdev eth4 flavour vf_rep number 2
> pci/0000:05:00.0/536871104: type eth netdev eth5 flavour vf_rep number 3

How about 'kind' instead of flavo{u}r?

^ permalink raw reply

* [PATCH net-next] bridge: Allow max MTU when multiple VLANs present
From: Chas Williams @ 2018-03-22 15:34 UTC (permalink / raw)
  To: davem; +Cc: netdev, stephen, Chas Williams

If the bridge is allowing multiple VLANs, some VLANs may have
different MTUs.  Instead of choosing the minimum MTU for the
bridge interface, choose the maximum MTU of the bridge members.
With this the user only needs to set a larger MTU on the member
ports that are participating in the large MTU VLANS.

Signed-off-by: Chas Williams <3chas3@gmail.com>
---
 net/bridge/br.c         |  2 +-
 net/bridge/br_device.c  |  2 +-
 net/bridge/br_if.c      | 26 ++++++++++++++++++++++----
 net/bridge/br_private.h |  2 +-
 4 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/net/bridge/br.c b/net/bridge/br.c
index 7770481a6506..a3f95ab9d6a3 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -52,7 +52,7 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
 
 	switch (event) {
 	case NETDEV_CHANGEMTU:
-		dev_set_mtu(br->dev, br_min_mtu(br));
+		dev_set_mtu(br->dev, br_mtu(br));
 		break;
 
 	case NETDEV_CHANGEADDR:
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 1285ca30ab0a..278fc999d355 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -224,7 +224,7 @@ static void br_get_stats64(struct net_device *dev,
 static int br_change_mtu(struct net_device *dev, int new_mtu)
 {
 	struct net_bridge *br = netdev_priv(dev);
-	if (new_mtu > br_min_mtu(br))
+	if (new_mtu > br_mtu(br))
 		return -EINVAL;
 
 	dev->mtu = new_mtu;
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 9ba4ed65c52b..48dc4d2e2be3 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -424,8 +424,18 @@ int br_del_bridge(struct net *net, const char *name)
 	return ret;
 }
 
+static bool min_mtu(int a, int b)
+{
+	return a < b ? 1 : 0;
+}
+
+static bool max_mtu(int a, int b)
+{
+	return a > b ? 1 : 0;
+}
+
 /* MTU of the bridge pseudo-device: ETH_DATA_LEN or the minimum of the ports */
-int br_min_mtu(const struct net_bridge *br)
+static int __br_mtu(const struct net_bridge *br, bool (compare_fn)(int, int))
 {
 	const struct net_bridge_port *p;
 	int mtu = 0;
@@ -436,13 +446,21 @@ int br_min_mtu(const struct net_bridge *br)
 		mtu = ETH_DATA_LEN;
 	else {
 		list_for_each_entry(p, &br->port_list, list) {
-			if (!mtu  || p->dev->mtu < mtu)
+			if (!mtu || compare_fn(p->dev->mtu, mtu))
 				mtu = p->dev->mtu;
 		}
 	}
 	return mtu;
 }
 
+int br_mtu(const struct net_bridge *br)
+{
+	if (br->vlan_enabled)
+		return __br_mtu(br, max_mtu);
+	else
+		return __br_mtu(br, min_mtu);
+}
+
 static void br_set_gso_limits(struct net_bridge *br)
 {
 	unsigned int gso_max_size = GSO_MAX_SIZE;
@@ -594,7 +612,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev,
 	if (changed_addr)
 		call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev);
 
-	dev_set_mtu(br->dev, br_min_mtu(br));
+	dev_set_mtu(br->dev, br_mtu(br));
 	br_set_gso_limits(br);
 
 	kobject_uevent(&p->kobj, KOBJ_ADD);
@@ -641,7 +659,7 @@ int br_del_if(struct net_bridge *br, struct net_device *dev)
 	 */
 	del_nbp(p);
 
-	dev_set_mtu(br->dev, br_min_mtu(br));
+	dev_set_mtu(br->dev, br_mtu(br));
 	br_set_gso_limits(br);
 
 	spin_lock_bh(&br->lock);
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 8e13a64d8c99..048d5b51813b 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -578,7 +578,7 @@ int br_del_bridge(struct net *net, const char *name);
 int br_add_if(struct net_bridge *br, struct net_device *dev,
 	      struct netlink_ext_ack *extack);
 int br_del_if(struct net_bridge *br, struct net_device *dev);
-int br_min_mtu(const struct net_bridge *br);
+int br_mtu(const struct net_bridge *br);
 netdev_features_t br_features_recompute(struct net_bridge *br,
 					netdev_features_t features);
 void br_port_flags_change(struct net_bridge_port *port, unsigned long mask);
-- 
2.13.6

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox