Netdev List

Netdev List
 help / color / mirror / Atom feed

* Re: [PATCH bpf-next 2/7] lib: reciprocal_div: implement the improved algorithm on the paper mentioned
From: Song Liu @ 2018-06-26  6:21 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: Alexei Starovoitov, Daniel Borkmann, oss-drivers, Networking,
	Jiong Wang
In-Reply-To: <20180625035421.2991-3-jakub.kicinski@netronome.com>

On Sun, Jun 24, 2018 at 8:54 PM, Jakub Kicinski
<jakub.kicinski@netronome.com> wrote:
> From: Jiong Wang <jiong.wang@netronome.com>
>
> The new added "reciprocal_value_adv" implements the advanced version of the
> algorithm described in Figure 4.2 of the paper except when dividend has MSB
> set which would require u128 divide on host and actually could be easily
> handled before calling the new "reciprocal_value_adv".
>
> The advanced version requires more complex calculation to get the
> reciprocal multiplier and other control variables, but then could reduce
> the required emulation operations.
>
> It makes no sense to use this advanced version for host divide emulation,
> those extra complexities for calculating multiplier etc could completely
> waive our saving on emulation operations.
>
> However, it makes sense to use it for JIT divide code generation (for
> example eBPF JIT backends) for which we are willing to trade performance of
> JITed code with that of host. As shown by the following pseudo code, the
> required emulation operations could go down from 6 (the basic version) to 3
> or 4.
>
> To use the result of "reciprocal_value_adv", suppose we want to calculate
> n/d, the C-style pseudo code will be the following, it could be easily
> changed to real code generation for other JIT targets.
>
>   struct reciprocal_value_adv rvalue;
>   u8 pre_shift, exp;
>
>   if (d >= (1u << 31)) {
>     result = n >= d;
>     return;
>   }
>   rvalue = reciprocal_value_adv(d, 32)
>   exp = rvalue.exp;
>   if (rvalue.is_wide_m && !(d & 1)) {
>     pre_shift = fls(d & -d) - 1;
>     rvalue = reciprocal_value_adv(d >> pre_shift, 32 - pre_shift);
>   } else {
>     pre_shift = 0;
>   }
>
>   // code generation starts.
>   if (imm == 1 << exp) {
>     result = n >> exp;
>   } else if (rvalue.is_wide_m) {
>     // pre_shift must be zero when reached here.
>     t = (n * rvalue.m) >> 32;
>     result = n - t;
>     result >>= 1;
>     result += t;
>     result >>= rvalue.sh - 1;
>   } else {
>     if (pre_shift)
>       result = n >> pre_shift;
>     result = ((u64)result * rvalue.m) >> 32;
>     result >>= rvalue.sh;
>   }
>
> Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
> Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
> ---
>  include/linux/reciprocal_div.h | 65 ++++++++++++++++++++++++++++++++++
>  lib/reciprocal_div.c           | 37 +++++++++++++++++++
>  2 files changed, 102 insertions(+)
>
> diff --git a/include/linux/reciprocal_div.h b/include/linux/reciprocal_div.h
> index e031e9f2f9d8..5a695e4697d3 100644
> --- a/include/linux/reciprocal_div.h
> +++ b/include/linux/reciprocal_div.h
> @@ -25,6 +25,9 @@ struct reciprocal_value {
>         u8 sh1, sh2;
>  };
>
> +/* "reciprocal_value" and "reciprocal_divide" together implement the basic
> + * version of the algorithm described in Figure 4.1 of the paper.
> + */
>  struct reciprocal_value reciprocal_value(u32 d);
>
>  static inline u32 reciprocal_divide(u32 a, struct reciprocal_value R)
> @@ -33,4 +36,66 @@ static inline u32 reciprocal_divide(u32 a, struct reciprocal_value R)
>         return (t + ((a - t) >> R.sh1)) >> R.sh2;
>  }
>
> +struct reciprocal_value_adv {
> +       u32 m;
> +       u8 sh, exp;
> +       bool is_wide_m;
> +};
> +
> +/* "reciprocal_value_adv" implements the advanced version of the algorithm
> + * described in Figure 4.2 of the paper except when dividend has MSB set which
> + * would require u128 divide on host and actually could be easily handled before
> + * calling "reciprocal_value_adv".
> + *
> + * The advanced version requires more complex calculation to get the reciprocal
> + * multiplier and other control variables, but then could reduce the required
> + * emulation operations.
> + *
> + * It makes no sense to use this advanced version for host divide emulation,
> + * those extra complexities for calculating multiplier etc could completely
> + * waive our saving on emulation operations.
> + *
> + * However, it makes sense to use it for JIT divide code generation for which
> + * we are willing to trade performance of JITed code with that of host. As shown
> + * by the following pseudo code, the required emulation operations could go down
> + * from 6 (the basic version) to 3 or 4.
> + *
> + * To use the result of "reciprocal_value_adv", suppose we want to calculate
> + * n/d:
> + *
> + *   struct reciprocal_value_adv rvalue;
> + *   u8 pre_shift, exp;
> + *
> + *   if (d >= (1u << 31)) {
> + *     result = n >= d;
> + *     return;
> + *   }
> + *   rvalue = reciprocal_value_adv(d, 32)
> + *   exp = rvalue.exp;
> + *   if (rvalue.is_wide_m && !(d & 1)) {
> + *     pre_shift = fls(d & -d) - 1;
> + *     rvalue = reciprocal_value_adv(d >> pre_shift, 32 - pre_shift);
> + *   } else {
> + *     pre_shift = 0;
> + *   }
> + *
> + *   // code generation starts.
> + *   if (imm == 1 << exp) {
> + *     result = n >> exp;
> + *   } else if (rvalue.is_wide_m) {
> + *     // pre_shift must be zero when reached here.
> + *     t = (n * rvalue.m) >> 32;
> + *     result = n - t;
> + *     result >>= 1;
> + *     result += t;
> + *     result >>= rvalue.sh - 1;
> + *   } else {
> + *     if (pre_shift)
> + *       result = n >> pre_shift;
> + *     result = ((u64)result * rvalue.m) >> 32;
> + *     result >>= rvalue.sh;
> + *   }
> + */
> +struct reciprocal_value_adv reciprocal_value_adv(u32 d, u8 prec);
> +
>  #endif /* _LINUX_RECIPROCAL_DIV_H */
> diff --git a/lib/reciprocal_div.c b/lib/reciprocal_div.c
> index fcb4ce682c6f..a41501ebad7c 100644
> --- a/lib/reciprocal_div.c
> +++ b/lib/reciprocal_div.c
> @@ -26,3 +26,40 @@ struct reciprocal_value reciprocal_value(u32 d)
>         return R;
>  }
>  EXPORT_SYMBOL(reciprocal_value);
> +
> +struct reciprocal_value_adv reciprocal_value_adv(u32 d, u8 prec)
> +{
> +       struct reciprocal_value_adv R;
> +       u32 l, post_shift;
> +       u64 mhigh, mlow;
> +
> +       l = fls(d - 1);
> +       post_shift = l;
> +       /* NOTE: mlow/mhigh could overflow u64 when l == 32 which means d has
> +        * MSB set. This case needs to be handled before calling
> +        * "reciprocal_value_adv", please see the comment at
> +        * include/linux/reciprocal_div.h.
> +        */

Shall we handle l == 32 case better? I guess the concern here is extra
handling may
slow down the fast path? If that's the case, we should at least add a
WARNING on the
slow path.

Thanks,
Song


> +       mlow = 1ULL << (32 + l);
> +       do_div(mlow, d);
> +       mhigh = (1ULL << (32 + l)) + (1ULL << (32 + l - prec));
> +       do_div(mhigh, d);
> +
> +       for (; post_shift > 0; post_shift--) {
> +               u64 lo = mlow >> 1, hi = mhigh >> 1;
> +
> +               if (lo >= hi)
> +                       break;
> +
> +               mlow = lo;
> +               mhigh = hi;
> +       }
> +
> +       R.m = (u32)mhigh;
> +       R.sh = post_shift;
> +       R.exp = l;
> +       R.is_wide_m = mhigh > U32_MAX;
> +
> +       return R;
> +}
> +EXPORT_SYMBOL(reciprocal_value_adv);
> --
> 2.17.1
>

^ permalink raw reply

* Re: [PATCH bpf-next 3/7] nfp: bpf: rename umin/umax to umin_src/umax_src
From: Song Liu @ 2018-06-26  6:21 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: Alexei Starovoitov, Daniel Borkmann, oss-drivers, Networking,
	Jiong Wang
In-Reply-To: <20180625035421.2991-4-jakub.kicinski@netronome.com>

On Sun, Jun 24, 2018 at 8:54 PM, Jakub Kicinski
<jakub.kicinski@netronome.com> wrote:
> From: Jiong Wang <jiong.wang@netronome.com>
>
> The two fields are a copy of umin and umax info of bpf_insn->src_reg
> generated by verifier.
>
> Rename to make their meaning clear.
>
> Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
> Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>

Acked-by: Song Liu <songliubraving@fb.com>

> ---
>  drivers/net/ethernet/netronome/nfp/bpf/jit.c      | 12 ++++++------
>  drivers/net/ethernet/netronome/nfp/bpf/main.h     | 10 +++++-----
>  drivers/net/ethernet/netronome/nfp/bpf/offload.c  |  2 +-
>  drivers/net/ethernet/netronome/nfp/bpf/verifier.c |  4 ++--
>  4 files changed, 14 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
> index 33111739b210..4a629e9b5c0f 100644
> --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c
> +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
> @@ -1772,8 +1772,8 @@ static int shl_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
>         u8 dst, src;
>
>         dst = insn->dst_reg * 2;
> -       umin = meta->umin;
> -       umax = meta->umax;
> +       umin = meta->umin_src;
> +       umax = meta->umax_src;
>         if (umin == umax)
>                 return __shl_imm64(nfp_prog, dst, umin);
>
> @@ -1881,8 +1881,8 @@ static int shr_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
>         u8 dst, src;
>
>         dst = insn->dst_reg * 2;
> -       umin = meta->umin;
> -       umax = meta->umax;
> +       umin = meta->umin_src;
> +       umax = meta->umax_src;
>         if (umin == umax)
>                 return __shr_imm64(nfp_prog, dst, umin);
>
> @@ -1995,8 +1995,8 @@ static int ashr_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
>         u8 dst, src;
>
>         dst = insn->dst_reg * 2;
> -       umin = meta->umin;
> -       umax = meta->umax;
> +       umin = meta->umin_src;
> +       umax = meta->umax_src;
>         if (umin == umax)
>                 return __ashr_imm64(nfp_prog, dst, umin);
>
> diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h
> index 654fe7823e5e..5975a19c28cb 100644
> --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h
> +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h
> @@ -263,8 +263,8 @@ struct nfp_bpf_reg_state {
>   * @func_id: function id for call instructions
>   * @arg1: arg1 for call instructions
>   * @arg2: arg2 for call instructions
> - * @umin: copy of core verifier umin_value.
> - * @umax: copy of core verifier umax_value.
> + * @umin_src: copy of core verifier umin_value for src opearnd.
> + * @umax_src: copy of core verifier umax_value for src operand.
>   * @off: index of first generated machine instruction (in nfp_prog.prog)
>   * @n: eBPF instruction number
>   * @flags: eBPF instruction extra optimization flags
> @@ -301,11 +301,11 @@ struct nfp_insn_meta {
>                         struct nfp_bpf_reg_state arg2;
>                 };
>                 /* We are interested in range info for some operands,
> -                * for example, the shift amount.
> +                * for example, the shift amount which is kept in src operand.
>                  */
>                 struct {
> -                       u64 umin;
> -                       u64 umax;
> +                       u64 umin_src;
> +                       u64 umax_src;
>                 };
>         };
>         unsigned int off;
> diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
> index 7eae4c0266f8..856a0003bb75 100644
> --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c
> +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
> @@ -191,7 +191,7 @@ nfp_prog_prepare(struct nfp_prog *nfp_prog, const struct bpf_insn *prog,
>                 meta->insn = prog[i];
>                 meta->n = i;
>                 if (is_mbpf_indir_shift(meta))
> -                       meta->umin = U64_MAX;
> +                       meta->umin_src = U64_MAX;
>
>                 list_add_tail(&meta->l, &nfp_prog->insns);
>         }
> diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
> index 4bfeba7b21b2..e862b739441f 100644
> --- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
> +++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
> @@ -555,8 +555,8 @@ nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx)
>                 const struct bpf_reg_state *sreg =
>                         cur_regs(env) + meta->insn.src_reg;
>
> -               meta->umin = min(meta->umin, sreg->umin_value);
> -               meta->umax = max(meta->umax, sreg->umax_value);
> +               meta->umin_src = min(meta->umin_src, sreg->umin_value);
> +               meta->umax_src = max(meta->umax_src, sreg->umax_value);
>         }
>
>         return 0;
> --
> 2.17.1
>

^ permalink raw reply

* Re: [PATCH net-next] net: preserve sock reference when scrubbing the skb.
From: Eric Dumazet @ 2018-06-26  6:41 UTC (permalink / raw)
  To: Cong Wang, Flavio Leitner
  Cc: Linux Kernel Network Developers, Eric Dumazet, Paolo Abeni,
	David Miller, Florian Westphal, NetFilter
In-Reply-To: <CAM_iQpX_X4hhSSrhMZavLibobp6tgMEa_26T6j4QvACKg-HPvw@mail.gmail.com>



On 06/25/2018 09:15 PM, Cong Wang wrote:
> On Mon, Jun 25, 2018 at 8:59 AM Flavio Leitner <fbl@redhat.com> wrote:
>>
>> The sock reference is lost when scrubbing the packet and that breaks
>> TSQ (TCP Small Queues) and XPS (Transmit Packet Steering) causing
>> performance impacts of about 50% in a single TCP stream when crossing
>> network namespaces.
>>
>> XPS breaks because the queue mapping stored in the socket is not
>> available, so another random queue might be selected when the stack
>> needs to transmit something like a TCP ACK, or TCP Retransmissions.
>> That causes packet re-ordering and/or performance issues.
>>
>> TSQ breaks because it orphans the packet while it is still in the
>> host, so packets are queued contributing to the buffer bloat problem.
> 
> Why should TSQ in one stack care about buffer bloat in another stack?
> 
> Actually, I think the current behavior is correct, once the packet leaves
> its current stack (or netns), it should relief the backpressure on TCP
> socket in this stack, whether it will be queued in another stack is beyond
> its concern. This breaks the isolation between networking stacks.
> 

We discussed about this during netconf Cong, nobody was against this planned removal.

When a packet is attached to a socket, we should keep the association as much as possible.

Only when a new association needs to be done, skb_orphan() needs to be called.

Doing this skb_orphan() too soon breaks back pressure in general, this is bad, since a socket
can evades SO_SNDBUF limits.

I am not sure why the patch is so complex, I would have simply removed the skb_orphan().

^ permalink raw reply

* Re: [patch net-next 6/9] net: sched: cls_flower: propagate chain teplate creation and destruction to drivers
From: Jiri Pirko @ 2018-06-26  6:40 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: netdev, davem, jhs, xiyou.wangcong, simon.horman, john.hurley,
	dsahern, mlxsw
In-Reply-To: <20180625220050.0ff6d44c@cakuba.netronome.com>

Tue, Jun 26, 2018 at 07:00:50AM CEST, jakub.kicinski@netronome.com wrote:
>On Mon, 25 Jun 2018 23:01:45 +0200, Jiri Pirko wrote:
>> From: Jiri Pirko <jiri@mellanox.com>
>> 
>> Introduce a couple of flower offload commands in order to propagate
>> template creation/destruction events down to device drivers.
>> Drivers may use this information to prepare HW in an optimal way
>> for future filter insertions.
>> 
>> Signed-off-by: Jiri Pirko <jiri@mellanox.com>
>
>> diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
>> index d64d43843a3a..276ba25a09c3 100644
>> --- a/net/sched/cls_flower.c
>> +++ b/net/sched/cls_flower.c
>> @@ -1120,6 +1120,43 @@ static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg)
>>  	}
>>  }
>>  
>> +static void fl_hw_create_tmplt(struct tcf_chain *chain,
>> +			       struct fl_flow_tmplt *tmplt,
>> +			       struct netlink_ext_ack *extack)
>> +{
>> +	struct tc_cls_flower_offload cls_flower = {};
>> +	struct tcf_block *block = chain->block;
>> +	struct tcf_exts dummy_exts = { 0, };
>> +
>> +	cls_flower.common.chain_index = chain->index;
>
>Did you skip extack on purpose?

Oh, the extack is leftover. I will remove it in v2.

>
>> +	cls_flower.command = TC_CLSFLOWER_TMPLT_CREATE;
>> +	cls_flower.cookie = (unsigned long) tmplt;
>> +	cls_flower.dissector = &tmplt->dissector;
>> +	cls_flower.mask = &tmplt->mask;
>> +	cls_flower.key = &tmplt->dummy_key;
>> +	cls_flower.exts = &dummy_exts;
>> +
>> +	/* We don't care if driver (any of them) fails to handle this
>> +	 * call. It serves just as a hint for it.
>> +	 */
>> +	tc_setup_cb_call(block, NULL, TC_SETUP_CLSFLOWER,
>> +			 &cls_flower, false);
>> +}

^ permalink raw reply

* Re: [PATCH] NFC: llcp: fix nfc_llcp_send_ui_frame() lockup
From: Eric Dumazet @ 2018-06-26  6:44 UTC (permalink / raw)
  To: Sergey Senozhatsky, Dmitry Vyukov
  Cc: Samuel Ortiz, David S. Miller, Steven Rostedt, Petr Mladek,
	syzkaller-bugs, linux-wireless, netdev, LKML, syzbot,
	Sergey Senozhatsky
In-Reply-To: <20180626051221.GC31439@jagdpanzerIV>



On 06/25/2018 10:12 PM, Sergey Senozhatsky wrote:
> On (06/26/18 07:07), Dmitry Vyukov wrote:
> [..]
>>>  #include <net/nfc/nfc.h>
>>> @@ -755,7 +756,8 @@ int nfc_llcp_send_ui_frame(struct nfc_llcp_sock *sock, u8 ssap, u8 dsap,
>>>                 pdu = nfc_alloc_send_skb(sock->dev, &sock->sk, MSG_DONTWAIT,
>>>                                          frag_len + LLCP_HEADER_SIZE, &err);
>>>                 if (pdu == NULL) {
>>> -                       pr_err("Could not allocate PDU\n");
>>> +                       pr_err_ratelimited("Could not allocate PDU\n");
>>> +                       cond_resched();
>>>                         continue;
>>>                 }
>>
>>
>> But this thread is still in an infinite (unkillable?) loop? If yes, we
>> are waiting for the next syzbot report ;)
> 
> The loop is still infinite, correct, but we have a preemption point now.
> Sure, net people can come with a much better solution, I'll be happy to
> scratch my patch.
> 

This can not be the right solution, think about current thread being real time,
cond_resched() might be a nop.

We should probably not loop at all, or not use MSG_DONTWAIT.

(And remove this useless "Could not allocate PDU" message)

NFC maintainers should really take a look at this.

^ permalink raw reply

* Re: [patch net-next 0/9] net: sched: introduce chain templates support with offloading to mlxsw
From: Jiri Pirko @ 2018-06-26  6:43 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: netdev, davem, jhs, xiyou.wangcong, simon.horman, john.hurley,
	dsahern, mlxsw
In-Reply-To: <20180625215850.001276b8@cakuba.netronome.com>

Tue, Jun 26, 2018 at 06:58:50AM CEST, jakub.kicinski@netronome.com wrote:
>On Mon, 25 Jun 2018 23:01:39 +0200, Jiri Pirko wrote:
>> From: Jiri Pirko <jiri@mellanox.com>
>> 
>> For the TC clsact offload these days, some of HW drivers need
>> to hold a magic ball. The reason is, with the first inserted rule inside
>> HW they need to guess what fields will be used for the matching. If
>> later on this guess proves to be wrong and user adds a filter with a
>> different field to match, there's a problem. Mlxsw resolves it now with
>> couple of patterns. Those try to cover as many match fields as possible.
>> This aproach is far from optimal, both performance-wise and scale-wise.
>> Also, there is a combination of filters that in certain order won't
>> succeed.
>> 
>> Most of the time, when user inserts filters in chain, he knows right away
>> how the filters are going to look like - what type and option will they
>> have. For example, he knows that he will only insert filters of type
>> flower matching destination IP address. He can specify a template that
>> would cover all the filters in the chain.
>
>Perhaps it's lack of sleep, but this paragraph threw me a little off
>the track.  IIUC the goal of this set is to provide a way to inform the
>HW about expected matches before any rule is programmed into the HW.
>Not before any rule is added to a particular chain.  One can just use
>the first rule in the chain to make a guess about the chain, but thanks
>to this set user can configure *all* chains before any rules are added.

The template is per-chain. User can use template for chain x and
not-use it for chain y. Up to him.

>
>And that's needed because once any rule is added the tcam config can no
>longer be easily modified?

Yes.

^ permalink raw reply

* Re: [PATCH bpf-next 4/7] nfp: bpf: copy range info for all operands of all ALU operations
From: Song Liu @ 2018-06-26  6:50 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: Alexei Starovoitov, Daniel Borkmann, oss-drivers, Networking,
	Jiong Wang
In-Reply-To: <20180625035421.2991-5-jakub.kicinski@netronome.com>

On Sun, Jun 24, 2018 at 8:54 PM, Jakub Kicinski
<jakub.kicinski@netronome.com> wrote:
> From: Jiong Wang <jiong.wang@netronome.com>
>
> NFP verifier hook is coping range information of the shift amount for
> indirect shift operation so optimized shift sequences could be generated.
>
> We want to use range info to do more things. For example, to decide whether
> multiplication and divide are supported on the given range.
>
> This patch simply let NFP verifier hook to copy range info for all operands
> of all ALU operands.
>
> Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
> Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>

Acked-by: Song Liu <songliubraving@fb.com>

> ---
>  drivers/net/ethernet/netronome/nfp/bpf/main.h | 33 +++++++------------
>  .../net/ethernet/netronome/nfp/bpf/offload.c  |  4 ++-
>  .../net/ethernet/netronome/nfp/bpf/verifier.c |  6 +++-
>  3 files changed, 20 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h
> index 5975a19c28cb..c985d0ac61a3 100644
> --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h
> +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h
> @@ -265,6 +265,8 @@ struct nfp_bpf_reg_state {
>   * @arg2: arg2 for call instructions
>   * @umin_src: copy of core verifier umin_value for src opearnd.
>   * @umax_src: copy of core verifier umax_value for src operand.
> + * @umin_dst: copy of core verifier umin_value for dst opearnd.
> + * @umax_dst: copy of core verifier umax_value for dst operand.
>   * @off: index of first generated machine instruction (in nfp_prog.prog)
>   * @n: eBPF instruction number
>   * @flags: eBPF instruction extra optimization flags
> @@ -300,12 +302,15 @@ struct nfp_insn_meta {
>                         struct bpf_reg_state arg1;
>                         struct nfp_bpf_reg_state arg2;
>                 };
> -               /* We are interested in range info for some operands,
> -                * for example, the shift amount which is kept in src operand.
> +               /* We are interested in range info for operands of ALU
> +                * operations. For example, shift amount, multiplicand and
> +                * multiplier etc.
>                  */
>                 struct {
>                         u64 umin_src;
>                         u64 umax_src;
> +                       u64 umin_dst;
> +                       u64 umax_dst;
>                 };
>         };
>         unsigned int off;
> @@ -339,6 +344,11 @@ static inline u8 mbpf_mode(const struct nfp_insn_meta *meta)
>         return BPF_MODE(meta->insn.code);
>  }
>
> +static inline bool is_mbpf_alu(const struct nfp_insn_meta *meta)
> +{
> +       return mbpf_class(meta) == BPF_ALU64 || mbpf_class(meta) == BPF_ALU;
> +}
> +
>  static inline bool is_mbpf_load(const struct nfp_insn_meta *meta)
>  {
>         return (meta->insn.code & ~BPF_SIZE_MASK) == (BPF_LDX | BPF_MEM);
> @@ -384,25 +394,6 @@ static inline bool is_mbpf_xadd(const struct nfp_insn_meta *meta)
>         return (meta->insn.code & ~BPF_SIZE_MASK) == (BPF_STX | BPF_XADD);
>  }
>
> -static inline bool is_mbpf_indir_shift(const struct nfp_insn_meta *meta)
> -{
> -       u8 code = meta->insn.code;
> -       bool is_alu, is_shift;
> -       u8 opclass, opcode;
> -
> -       opclass = BPF_CLASS(code);
> -       is_alu = opclass == BPF_ALU64 || opclass == BPF_ALU;
> -       if (!is_alu)
> -               return false;
> -
> -       opcode = BPF_OP(code);
> -       is_shift = opcode == BPF_LSH || opcode == BPF_RSH || opcode == BPF_ARSH;
> -       if (!is_shift)
> -               return false;
> -
> -       return BPF_SRC(code) == BPF_X;
> -}
> -
>  /**
>   * struct nfp_prog - nfp BPF program
>   * @bpf: backpointer to the bpf app priv structure
> diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
> index 856a0003bb75..78f44c4d95b4 100644
> --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c
> +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
> @@ -190,8 +190,10 @@ nfp_prog_prepare(struct nfp_prog *nfp_prog, const struct bpf_insn *prog,
>
>                 meta->insn = prog[i];
>                 meta->n = i;
> -               if (is_mbpf_indir_shift(meta))
> +               if (is_mbpf_alu(meta)) {
>                         meta->umin_src = U64_MAX;
> +                       meta->umin_dst = U64_MAX;
> +               }
>
>                 list_add_tail(&meta->l, &nfp_prog->insns);
>         }
> diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
> index e862b739441f..7bd9666bd8ff 100644
> --- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
> +++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
> @@ -551,12 +551,16 @@ nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx)
>         if (is_mbpf_xadd(meta))
>                 return nfp_bpf_check_xadd(nfp_prog, meta, env);
>
> -       if (is_mbpf_indir_shift(meta)) {
> +       if (is_mbpf_alu(meta)) {
>                 const struct bpf_reg_state *sreg =
>                         cur_regs(env) + meta->insn.src_reg;
> +               const struct bpf_reg_state *dreg =
> +                       cur_regs(env) + meta->insn.dst_reg;
>
>                 meta->umin_src = min(meta->umin_src, sreg->umin_value);
>                 meta->umax_src = max(meta->umax_src, sreg->umax_value);
> +               meta->umin_dst = min(meta->umin_dst, dreg->umin_value);
> +               meta->umax_dst = max(meta->umax_dst, dreg->umax_value);
>         }
>
>         return 0;
> --
> 2.17.1
>

^ permalink raw reply

* Re: [PATCH] NFC: llcp: fix nfc_llcp_send_ui_frame() lockup
From: Sergey Senozhatsky @ 2018-06-26  7:00 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Sergey Senozhatsky, Dmitry Vyukov, Samuel Ortiz, David S. Miller,
	Steven Rostedt, Petr Mladek, syzkaller-bugs, linux-wireless,
	netdev, LKML, syzbot, Sergey Senozhatsky
In-Reply-To: <8c410102-43ab-dfdb-0d71-2ee5951e1af8@gmail.com>

On (06/25/18 23:44), Eric Dumazet wrote:
> > The loop is still infinite, correct, but we have a preemption point now.
> > Sure, net people can come with a much better solution, I'll be happy to
> > scratch my patch.
> > 
> 
> This can not be the right solution, think about current thread being real time,
> cond_resched() might be a nop.
> NFC maintainers should really take a look at this.

I'm all for it.

	-ss

^ permalink raw reply

* Re: [patch net-next 0/9] net: sched: introduce chain templates support with offloading to mlxsw
From: Jakub Kicinski @ 2018-06-26  7:00 UTC (permalink / raw)
  To: Jiri Pirko
  Cc: Linux Netdev List, David Miller, Jamal Hadi Salim, Cong Wang,
	Simon Horman, John Hurley, David Ahern, mlxsw
In-Reply-To: <20180626064355.GQ2161@nanopsycho>

On Mon, Jun 25, 2018 at 11:43 PM, Jiri Pirko <jiri@resnulli.us> wrote:
> Tue, Jun 26, 2018 at 06:58:50AM CEST, jakub.kicinski@netronome.com wrote:
>>On Mon, 25 Jun 2018 23:01:39 +0200, Jiri Pirko wrote:
>>> From: Jiri Pirko <jiri@mellanox.com>
>>>
>>> For the TC clsact offload these days, some of HW drivers need
>>> to hold a magic ball. The reason is, with the first inserted rule inside
>>> HW they need to guess what fields will be used for the matching. If
>>> later on this guess proves to be wrong and user adds a filter with a
>>> different field to match, there's a problem. Mlxsw resolves it now with
>>> couple of patterns. Those try to cover as many match fields as possible.
>>> This aproach is far from optimal, both performance-wise and scale-wise.
>>> Also, there is a combination of filters that in certain order won't
>>> succeed.
>>>
>>> Most of the time, when user inserts filters in chain, he knows right away
>>> how the filters are going to look like - what type and option will they
>>> have. For example, he knows that he will only insert filters of type
>>> flower matching destination IP address. He can specify a template that
>>> would cover all the filters in the chain.
>>
>>Perhaps it's lack of sleep, but this paragraph threw me a little off
>>the track.  IIUC the goal of this set is to provide a way to inform the
>>HW about expected matches before any rule is programmed into the HW.
>>Not before any rule is added to a particular chain.  One can just use
>>the first rule in the chain to make a guess about the chain, but thanks
>>to this set user can configure *all* chains before any rules are added.
>
> The template is per-chain. User can use template for chain x and
> not-use it for chain y. Up to him.

Makes sense.

I can't help but wonder if it'd be better to associate the
constraints/rules with chains instead of creating a new "template"
object.  It seems more natural to create a chain with specific
constraints in place than add and delete template of which there can
be at most one to a chain...  Perhaps that's more about the user space
tc command line.  Anyway, not a strong objection, just a thought.

>>And that's needed because once any rule is added the tcam config can no
>>longer be easily modified?
>
> Yes.

^ permalink raw reply

* RE: [PATCH] fman: don't set node on dpaa-ethernet platform device
From: Madalin-cristian Bucur @ 2018-06-26  7:00 UTC (permalink / raw)
  To: David Miller, bas@daedalean.ai
  Cc: netdev@vger.kernel.org, linux-kernel@vger.kernel.org
In-Reply-To: <20180623.102724.1590504977410505871.davem@davemloft.net>

> -----Original Message-----
> From: David Miller [mailto:davem@davemloft.net]
> Sent: Saturday, June 23, 2018 4:27 AM
> To: bas@daedalean.ai
> Cc: netdev@vger.kernel.org; Madalin-cristian Bucur <madalin.bucur@nxp.com>;
> linux-kernel@vger.kernel.org
> Subject: Re: [PATCH] fman: don't set node on dpaa-ethernet platform device
> 
> From: Bas Vermeulen <bas@daedalean.ai>
> Date: Thu, 21 Jun 2018 13:42:22 +0200
> 
> > Setting dev->node to the mac_node in dpaa_eth_add_device during probe
> > causes the mac_probe to be called again for the dpaa-ethernet.* device
> > that was just added.
> >
> > Fix this by not setting dev->node, as it is not needed.
> >
> > Signed-off-by: Bas Vermeulen <bas@daedalean.ai>
> 
> This patch doesn't apply to the current sources.

Hi,

The line the patch is trying to remove was added in this commit

	commit a1a50c8e4c241a505b7270e1a3c6e50d94e794b1
	Author: Florian Fainelli <f.fainelli@gmail.com>
	Date:   Tue Aug 22 15:24:47 2017 -0700

	    fsl/man: Inherit parent device and of_node

and was already removed in this commit

	commit 48167c9ce0b91c068430345bf039c7be23fa2f3f
	Author: Madalin Bucur <madalin.bucur@nxp.com>
	Date:   Mon Oct 16 21:36:05 2017 +0300

	    fsl/fman: remove of_node

Regards,
Madalin

^ permalink raw reply

* Re: Financial Aid
From: M. M Fridman @ 2018-06-26  6:58 UTC (permalink / raw)




-- 
I Mikhail Fridman. has selected you specially as one of my beneficiaries
for my Charitable Donation, Just as I have declared on May 23, 2016 to give
my fortune as charity.

Reply as soon as possible with further directives.

Best Regards,
Mikhail Fridman.

^ permalink raw reply

* Re: [PATCH bpf-next 1/7] nfp: bpf: allow source ptr type be map ptr in memcpy optimization
From: Jakub Kicinski @ 2018-06-26  7:08 UTC (permalink / raw)
  To: Song Liu
  Cc: Alexei Starovoitov, Daniel Borkmann, oss-drivers, Networking,
	Jiong Wang
In-Reply-To: <CAPhsuW4yqanLp6WEYihwMwKny8VxZQTa3G-+nOLu_e4peokNGQ@mail.gmail.com>

On Mon, Jun 25, 2018 at 10:50 PM, Song Liu <liu.song.a23@gmail.com> wrote:
> On Sun, Jun 24, 2018 at 8:54 PM, Jakub Kicinski
> <jakub.kicinski@netronome.com> wrote:
>> From: Jiong Wang <jiong.wang@netronome.com>
>>
>> Map read has been supported on NFP, this patch enables optimization for
>> memcpy from map to packet.
>>
>> This patch also fixed one latent bug which will cause copying from
>> unexpected address once memcpy for map pointer enabled.
>>
>> Reported-by: Mary Pham <mary.pham@netronome.com>
>> Reported-by: David Beckett <david.beckett@netronome.com>
>> Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
>> Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
>> ---
>>  drivers/net/ethernet/netronome/nfp/bpf/jit.c | 5 +++--
>>  1 file changed, 3 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
>> index 8a92088df0d7..33111739b210 100644
>> --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c
>> +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
>> @@ -670,7 +670,7 @@ static int nfp_cpp_memcpy(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
>>         xfer_num = round_up(len, 4) / 4;
>>
>>         if (src_40bit_addr)
>> -               addr40_offset(nfp_prog, meta->insn.src_reg, off, &src_base,
>> +               addr40_offset(nfp_prog, meta->insn.src_reg * 2, off, &src_base,
>>                               &off);
>
> Did this break other cases before this patch?
>
> I am sorry if this is a dumb question. I don't think I fully
> understand addr40_offset().

Only map memory uses 40 bit addressing right now, so the if was pretty
much dead code before the patch.

The memcpy optimization was left out of the initial map support due to
insufficient test coverage, I should have probably left more of the 40
bit addressing code out back then.

^ permalink raw reply

* Re: [patch net-next 0/9] net: sched: introduce chain templates support with offloading to mlxsw
From: Jiri Pirko @ 2018-06-26  7:12 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: Linux Netdev List, David Miller, Jamal Hadi Salim, Cong Wang,
	Simon Horman, John Hurley, David Ahern, mlxsw
In-Reply-To: <CAJpBn1xigrmrHdfP4itPCPSnfkJVi3H3513K5KO=ZJEAnRUgNQ@mail.gmail.com>

Tue, Jun 26, 2018 at 09:00:45AM CEST, jakub.kicinski@netronome.com wrote:
>On Mon, Jun 25, 2018 at 11:43 PM, Jiri Pirko <jiri@resnulli.us> wrote:
>> Tue, Jun 26, 2018 at 06:58:50AM CEST, jakub.kicinski@netronome.com wrote:
>>>On Mon, 25 Jun 2018 23:01:39 +0200, Jiri Pirko wrote:
>>>> From: Jiri Pirko <jiri@mellanox.com>
>>>>
>>>> For the TC clsact offload these days, some of HW drivers need
>>>> to hold a magic ball. The reason is, with the first inserted rule inside
>>>> HW they need to guess what fields will be used for the matching. If
>>>> later on this guess proves to be wrong and user adds a filter with a
>>>> different field to match, there's a problem. Mlxsw resolves it now with
>>>> couple of patterns. Those try to cover as many match fields as possible.
>>>> This aproach is far from optimal, both performance-wise and scale-wise.
>>>> Also, there is a combination of filters that in certain order won't
>>>> succeed.
>>>>
>>>> Most of the time, when user inserts filters in chain, he knows right away
>>>> how the filters are going to look like - what type and option will they
>>>> have. For example, he knows that he will only insert filters of type
>>>> flower matching destination IP address. He can specify a template that
>>>> would cover all the filters in the chain.
>>>
>>>Perhaps it's lack of sleep, but this paragraph threw me a little off
>>>the track.  IIUC the goal of this set is to provide a way to inform the
>>>HW about expected matches before any rule is programmed into the HW.
>>>Not before any rule is added to a particular chain.  One can just use
>>>the first rule in the chain to make a guess about the chain, but thanks
>>>to this set user can configure *all* chains before any rules are added.
>>
>> The template is per-chain. User can use template for chain x and
>> not-use it for chain y. Up to him.
>
>Makes sense.
>
>I can't help but wonder if it'd be better to associate the
>constraints/rules with chains instead of creating a new "template"
>object.  It seems more natural to create a chain with specific
>constraints in place than add and delete template of which there can
>be at most one to a chain...  Perhaps that's more about the user space
>tc command line.  Anyway, not a strong objection, just a thought.

Hmm. I don't think it is good idea. User should see the template in a
"show" command per chain. We would have to have 2 show commands, one to
list the template objects and one to list templates per chains. It makes
things more complicated for no good reason. I think that this simple
chain-lock is easier and serves the purpose.

>
>>>And that's needed because once any rule is added the tcam config can no
>>>longer be easily modified?
>>
>> Yes.

^ permalink raw reply

* Re: [PATCH v2 net] nfp: cast sizeof() to int when comparing with error code
From: Julia Lawall @ 2018-06-26  7:21 UTC (permalink / raw)
  To: Joe Perches
  Cc: Chengguang Xu, jakub.kicinski, davem, LKML, cocci, oss-drivers,
	netdev, Dmitry Torokhov, linux-input, linux-s390
In-Reply-To: <ff0b2834b771a467246b9f7c1b4600c38f5bb463.camel@perches.com>



On Mon, 25 Jun 2018, Joe Perches wrote:

> On Tue, 2018-06-26 at 09:16 +0800, Chengguang Xu wrote:
> > sizeof() will return unsigned value so in the error check
> > negative error code will be always larger than sizeof().
>
> This looks like a general class of error in the kernel
> where a signed result that could be returning a -errno
> is tested against < or <= sizeof()
>
> A couple examples:
>
> drivers/input/mouse/elan_i2c_smbus.c:
>
> 		len = i2c_smbus_read_block_data(client,
> 						ETP_SMBUS_IAP_PASSWORD_READ,
> 						val);
> 		if (len < sizeof(u16)) {
>
> i2c_smbus_read_block_data can return a negative errno
>
>
> net/smc/smc_clc.c:
>
> 	len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1,
> 			     sizeof(struct smc_clc_msg_decline));
> 	if (len < sizeof(struct smc_clc_msg_decline))
>
> where kernel_sendmsg can return a negative errno
>
> There are probably others, I didn't look hard.
>
> Perhaps a cocci script to find these could be generated?

Currently there is a rule for comparison of unsigneds to 0.  It would be
reasonable to extend it for sizes.  I will see what it gives.

julia

^ permalink raw reply

* Re: [PATCH 1/2] sh_eth: fix *enum* RPADIR_BIT
From: Geert Uytterhoeven @ 2018-06-26  7:25 UTC (permalink / raw)
  To: Sergei Shtylyov; +Cc: netdev, David S. Miller, Linux-Renesas
In-Reply-To: <8c72d27f-8b1a-23cf-3f41-781944cd1388@cogentembedded.com>

Hi Sergei,

On Mon, Jun 25, 2018 at 10:37 PM Sergei Shtylyov
<sergei.shtylyov@cogentembedded.com> wrote:
> The *enum*  RPADIR_BIT  was declared in the commit 86a74ff21a7a ("net:
> sh_eth: add support for Renesas SuperH Ethernet") adding SH771x support,
> however the SH771x manual doesn't have the RPADIR register described and,
> moreover, tells why the padding insertion must not be used. The newer SoC
> manuals do have RPADIR documented, though with somewhat different layout --
> update the *enum* according to these manuals...
>
> Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>

Thanks for your patch!

Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>

> --- net-next.orig/drivers/net/ethernet/renesas/sh_eth.h
> +++ net-next/drivers/net/ethernet/renesas/sh_eth.h
> @@ -403,8 +403,7 @@ enum DESC_I_BIT {
>
>  /* RPADIR */
>  enum RPADIR_BIT {
> -       RPADIR_PADS1 = 0x20000, RPADIR_PADS0 = 0x10000,
> -       RPADIR_PADR = 0x0003f,
> +       RPADIR_PADS = 0x1f0000, RPADIR_PADR = 0xffff,

Perhaps add some comments?

        RPADIR_PADS = 0x1f0000; /* Padding Size (insert N bytes of padding) */
        RPADIR_PADR = 0xffff;   /* Padding Slot (insert padding at byte N) */
>  };

Note that none of the RPADIR enums are actually used.

Gr{oetje,eeting}s,

                        Geert

-- 
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds

^ permalink raw reply

* Re: [PATCH 2/2] sh_eth: remove sh_eth_cpu_data::rpadir_value
From: Geert Uytterhoeven @ 2018-06-26  7:25 UTC (permalink / raw)
  To: Sergei Shtylyov; +Cc: netdev, David S. Miller, Linux-Renesas
In-Reply-To: <d2b67329-7bed-80e2-76e8-d0514f8545ae@cogentembedded.com>

On Mon, Jun 25, 2018 at 10:38 PM Sergei Shtylyov
<sergei.shtylyov@cogentembedded.com> wrote:
> If RPADIR exists, the value written to it is always the same for all SoCs
> (and derived from NET_IP_ALIGN), so there has not  been any need to store
> it in the *struct* sh_eth_cpu_data...
>
> Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>

Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>

Gr{oetje,eeting}s,

                        Geert

-- 
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds

^ permalink raw reply

* Re: [PATCH 00/14] ARM: davinci: step towards removing at24_platform_data
From: Bartosz Golaszewski @ 2018-06-26  7:44 UTC (permalink / raw)
  To: Andrew Lunn
  Cc: Sekhar Nori, Kevin Hilman, Russell King, Grygorii Strashko,
	David S . Miller, Srinivas Kandagatla, Lukas Wunner, Rob Herring,
	Florian Fainelli, Dan Carpenter, Ivan Khoronzhuk, David Lechner,
	Greg Kroah-Hartman, Linux ARM, Linux Kernel Mailing List,
	linux-omap, netdev, Bartosz Golaszewski
In-Reply-To: <20180625180237.GC17417@lunn.ch>

2018-06-25 20:02 GMT+02:00 Andrew Lunn <andrew@lunn.ch>:
>> With my patch 1/14 you'll get -EPROBE_DEFER from nvmem_cell_get() if
>> the nvmem provider is not yet registered. Will that help in your case?
>
> I don't think so. My driver instantiates the AT24 device. So if i get
> -EPROBE_DEFER, i need to cleanup the probe, and return -EPROBDE_DEFER
> to the code. Which means i need to remove the AT24 device...
>
>        Andrew

Are you sure this is the correct approach? I understand that you want
to use something like board files for your machine? Wouldn't it be
better to register a platform device for at24, register a cell lookup
and then depend on that cell from your driver?

Would you mind describing your platform in detail a bit more?

Best regards,
Bartosz

^ permalink raw reply

* [patch net-next v2 0/9] net: sched: introduce chain templates support with offloading to mlxsw
From: Jiri Pirko @ 2018-06-26  7:59 UTC (permalink / raw)
  To: netdev
  Cc: davem, jhs, xiyou.wangcong, jakub.kicinski, simon.horman,
	john.hurley, dsahern, mlxsw

From: Jiri Pirko <jiri@mellanox.com>

For the TC clsact offload these days, some of HW drivers need
to hold a magic ball. The reason is, with the first inserted rule inside
HW they need to guess what fields will be used for the matching. If
later on this guess proves to be wrong and user adds a filter with a
different field to match, there's a problem. Mlxsw resolves it now with
couple of patterns. Those try to cover as many match fields as possible.
This aproach is far from optimal, both performance-wise and scale-wise.
Also, there is a combination of filters that in certain order won't
succeed.

Most of the time, when user inserts filters in chain, he knows right away
how the filters are going to look like - what type and option will they
have. For example, he knows that he will only insert filters of type
flower matching destination IP address. He can specify a template that
would cover all the filters in the chain.

This patchset is providing the possibility to user to provide such
template  to kernel and propagate it all the way down to device
drivers.

See the examples below.

Create dummy device with clsact first:
# ip link add type dummy
# tc qdisc add dev dummy0 clsact

There is no template assigned by default:
# tc filter template show dev dummy0 ingress

Add a template of type flower allowing to insert rules matching on last
2 bytes of destination mac address:
# tc filter template add dev dummy0 ingress proto ip flower dst_mac 00:00:00:00:00:00/00:00:00:00:FF:FF

The template is now showed in the list:
# tc filter template show dev dummy0 ingress
filter flower chain 0
  dst_mac 00:00:00:00:00:00/00:00:00:00:ff:ff
  eth_type ipv4

Add another template, this time for chain number 22:
# tc filter template add dev dummy0 ingress proto ip chain 22 flower dst_ip 0.0.0.0/16
# tc filter template show dev dummy0 ingress
filter flower chain 0
  dst_mac 00:00:00:00:00:00/00:00:00:00:ff:ff
  eth_type ipv4
filter flower chain 22
  eth_type ipv4
  dst_ip 0.0.0.0/16

Add a filter that fits the template:
# tc filter add dev dummy0 ingress proto ip flower dst_mac aa:bb:cc:dd:ee:ff/00:00:00:00:00:0F action drop

Addition of filters that does not fit the template would fail:
# tc filter add dev dummy0 ingress proto ip flower dst_mac aa:11:22:33:44:55/00:00:00:FF:00:00 action drop
Error: Mask does not fit the template.
We have an error talking to the kernel, -1
# tc filter add dev dummy0 ingress proto ip flower dst_ip 10.0.0.1 action drop
Error: Mask does not fit the template.
We have an error talking to the kernel, -1

Additions of filters to chain 22:
# tc filter add dev dummy0 ingress proto ip chain 22 flower dst_ip 10.0.0.1/8 action drop
# tc filter add dev dummy0 ingress proto ip chain 22 flower dst_ip 10.0.0.1 action drop
Error: Mask does not fit the template.
We have an error talking to the kernel, -1
# tc filter add dev dummy0 ingress proto ip chain 22 flower dst_ip 10.0.0.1/24 action drop
Error: Mask does not fit the template.
We have an error talking to the kernel, -1

Removal of a template from non-empty chain would fail:
# tc filter template del dev dummy0 ingress
Error: The chain is not empty, unable to delete template.
We have an error talking to the kernel, -1

Once the chain is flushed, the template could be removed:
# tc filter del dev dummy0 ingress
# tc filter template del dev dummy0 ingress

---
v1->v2:
-patch 6:
  - remove leftover extack arg in fl_hw_create_tmplt()

Jiri Pirko (9):
  net: sched: push ops lookup bits into tcf_proto_lookup_ops()
  net: sched: introduce chain templates
  net: sched: cls_flower: move key/mask dumping into a separate function
  net: sched: cls_flower: change fl_init_dissector to accept mask and
    dissector
  net: sched: cls_flower: implement chain templates
  net: sched: cls_flower: propagate chain teplate creation and
    destruction to drivers
  mlxsw: spectrum: Implement chain template hinting
  selftests: forwarding: move shblock tc support check to a separate
    helper
  selftests: forwarding: add tests for TC chain templates

 drivers/net/ethernet/mellanox/mlxsw/spectrum.c     |   5 +
 drivers/net/ethernet/mellanox/mlxsw/spectrum.h     |  12 +-
 drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c |  12 +-
 .../ethernet/mellanox/mlxsw/spectrum_acl_tcam.c    |  25 +-
 .../net/ethernet/mellanox/mlxsw/spectrum_flower.c  |  44 ++-
 include/net/pkt_cls.h                              |   2 +
 include/net/sch_generic.h                          |  14 +-
 include/uapi/linux/rtnetlink.h                     |   7 +
 net/sched/cls_api.c                                | 424 +++++++++++++++++++--
 net/sched/cls_basic.c                              |   2 +-
 net/sched/cls_bpf.c                                |   3 +-
 net/sched/cls_cgroup.c                             |   2 +-
 net/sched/cls_flow.c                               |   3 +-
 net/sched/cls_flower.c                             | 250 +++++++++---
 net/sched/cls_fw.c                                 |   3 +-
 net/sched/cls_matchall.c                           |   3 +-
 net/sched/cls_route.c                              |   2 +-
 net/sched/cls_rsvp.h                               |   3 +-
 net/sched/cls_tcindex.c                            |   2 +-
 net/sched/cls_u32.c                                |   2 +-
 security/selinux/nlmsgtab.c                        |   2 +-
 tools/testing/selftests/net/forwarding/lib.sh      |  12 +
 .../selftests/net/forwarding/tc_chaintemplates.sh  | 160 ++++++++
 .../selftests/net/forwarding/tc_shblocks.sh        |   2 +
 24 files changed, 900 insertions(+), 96 deletions(-)
 create mode 100755 tools/testing/selftests/net/forwarding/tc_chaintemplates.sh

-- 
2.14.4

^ permalink raw reply

* [patch net-next v2 1/9] net: sched: push ops lookup bits into tcf_proto_lookup_ops()
From: Jiri Pirko @ 2018-06-26  7:59 UTC (permalink / raw)
  To: netdev
  Cc: davem, jhs, xiyou.wangcong, jakub.kicinski, simon.horman,
	john.hurley, dsahern, mlxsw
In-Reply-To: <20180626080000.12964-1-jiri@resnulli.us>

From: Jiri Pirko <jiri@mellanox.com>

Push all bits that take care of ops lookup, including module loading
outside tcf_proto_create() function, into tcf_proto_lookup_ops()

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
---
 net/sched/cls_api.c | 53 +++++++++++++++++++++++++++++++----------------------
 1 file changed, 31 insertions(+), 22 deletions(-)

diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index cdc3c87c53e6..db45931bbada 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -39,7 +39,7 @@ static DEFINE_RWLOCK(cls_mod_lock);
 
 /* Find classifier type by string name */
 
-static const struct tcf_proto_ops *tcf_proto_lookup_ops(const char *kind)
+static const struct tcf_proto_ops *__tcf_proto_lookup_ops(const char *kind)
 {
 	const struct tcf_proto_ops *t, *res = NULL;
 
@@ -57,6 +57,33 @@ static const struct tcf_proto_ops *tcf_proto_lookup_ops(const char *kind)
 	return res;
 }
 
+static const struct tcf_proto_ops *
+tcf_proto_lookup_ops(const char *kind, struct netlink_ext_ack *extack)
+{
+	const struct tcf_proto_ops *ops;
+
+	ops = __tcf_proto_lookup_ops(kind);
+	if (ops)
+		return ops;
+#ifdef CONFIG_MODULES
+	rtnl_unlock();
+	request_module("cls_%s", kind);
+	rtnl_lock();
+	ops = __tcf_proto_lookup_ops(kind);
+	/* We dropped the RTNL semaphore in order to perform
+	 * the module load. So, even if we succeeded in loading
+	 * the module we have to replay the request. We indicate
+	 * this using -EAGAIN.
+	 */
+	if (ops) {
+		module_put(ops->owner);
+		return ERR_PTR(-EAGAIN);
+	}
+#endif
+	NL_SET_ERR_MSG(extack, "TC classifier not found");
+	return ERR_PTR(-ENOENT);
+}
+
 /* Register(unregister) new classifier type */
 
 int register_tcf_proto_ops(struct tcf_proto_ops *ops)
@@ -133,27 +160,9 @@ static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
 	if (!tp)
 		return ERR_PTR(-ENOBUFS);
 
-	err = -ENOENT;
-	tp->ops = tcf_proto_lookup_ops(kind);
-	if (!tp->ops) {
-#ifdef CONFIG_MODULES
-		rtnl_unlock();
-		request_module("cls_%s", kind);
-		rtnl_lock();
-		tp->ops = tcf_proto_lookup_ops(kind);
-		/* We dropped the RTNL semaphore in order to perform
-		 * the module load. So, even if we succeeded in loading
-		 * the module we have to replay the request. We indicate
-		 * this using -EAGAIN.
-		 */
-		if (tp->ops) {
-			module_put(tp->ops->owner);
-			err = -EAGAIN;
-		} else {
-			NL_SET_ERR_MSG(extack, "TC classifier not found");
-			err = -ENOENT;
-		}
-#endif
+	tp->ops = tcf_proto_lookup_ops(kind, extack);
+	if (IS_ERR(tp->ops)) {
+		err = PTR_ERR(tp->ops);
 		goto errout;
 	}
 	tp->classify = tp->ops->classify;
-- 
2.14.4

^ permalink raw reply related

* [patch net-next v2 2/9] net: sched: introduce chain templates
From: Jiri Pirko @ 2018-06-26  7:59 UTC (permalink / raw)
  To: netdev
  Cc: davem, jhs, xiyou.wangcong, jakub.kicinski, simon.horman,
	john.hurley, dsahern, mlxsw
In-Reply-To: <20180626080000.12964-1-jiri@resnulli.us>

From: Jiri Pirko <jiri@mellanox.com>

Introduce a group of new tc-rtnl commands to allow user to set per-chain
template. Templates lock down individual chains for particular
classifier type/options combinations. The classifier needs to support
templates, otherwise kernel would reply with error.

For example, to lock chain 22 to allow only filters of type
flower with destination mac address, user needs to do:
  chain 22 flower dst_mac 00:00:00:00:00:00/FF:FF:FF:FF:FF:FF

In case the chain already contains some filters it is not possible to
add or remove template. That is permitted only for empty chains.

Alongside with add/del commands, introduce also get/dump and
notifications.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
---
 include/net/sch_generic.h      |  14 +-
 include/uapi/linux/rtnetlink.h |   7 +
 net/sched/cls_api.c            | 371 ++++++++++++++++++++++++++++++++++++++++-
 net/sched/cls_basic.c          |   2 +-
 net/sched/cls_bpf.c            |   3 +-
 net/sched/cls_cgroup.c         |   2 +-
 net/sched/cls_flow.c           |   3 +-
 net/sched/cls_flower.c         |   3 +-
 net/sched/cls_fw.c             |   3 +-
 net/sched/cls_matchall.c       |   3 +-
 net/sched/cls_route.c          |   2 +-
 net/sched/cls_rsvp.h           |   3 +-
 net/sched/cls_tcindex.c        |   2 +-
 net/sched/cls_u32.c            |   2 +-
 security/selinux/nlmsgtab.c    |   2 +-
 15 files changed, 405 insertions(+), 17 deletions(-)

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 6488daa32f82..f2a27d41fed5 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -235,6 +235,8 @@ struct tcf_result {
 	};
 };
 
+struct tcf_chain;
+
 struct tcf_proto_ops {
 	struct list_head	head;
 	char			kind[IFNAMSIZ];
@@ -250,17 +252,25 @@ struct tcf_proto_ops {
 	int			(*change)(struct net *net, struct sk_buff *,
 					struct tcf_proto*, unsigned long,
 					u32 handle, struct nlattr **,
-					void **, bool,
+					void **, bool, void *tmplt_priv,
 					struct netlink_ext_ack *);
 	int			(*delete)(struct tcf_proto *tp, void *arg,
 					  bool *last,
 					  struct netlink_ext_ack *);
 	void			(*walk)(struct tcf_proto*, struct tcf_walker *arg);
 	void			(*bind_class)(void *, u32, unsigned long);
+	void *			(*tmplt_create)(struct net *net,
+						struct tcf_chain *chain,
+						struct nlattr **tca,
+						struct netlink_ext_ack *extack);
+	void			(*tmplt_destroy)(void *tmplt_priv);
 
 	/* rtnetlink specific */
 	int			(*dump)(struct net*, struct tcf_proto*, void *,
 					struct sk_buff *skb, struct tcmsg*);
+	int			(*tmplt_dump)(struct sk_buff *skb,
+					      struct net *net,
+					      void *tmplt_priv);
 
 	struct module		*owner;
 };
@@ -299,6 +309,8 @@ struct tcf_chain {
 	struct tcf_block *block;
 	u32 index; /* chain index */
 	unsigned int refcnt;
+	const struct tcf_proto_ops *tmplt_ops;
+	void *tmplt_priv;
 };
 
 struct tcf_block {
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 7d8502313c99..45fd8cc1fdb2 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -150,6 +150,13 @@ enum {
 	RTM_NEWCACHEREPORT = 96,
 #define RTM_NEWCACHEREPORT RTM_NEWCACHEREPORT
 
+	RTM_NEWCHAINTMPLT = 100,
+#define RTM_NEWCHAINTMPLT RTM_NEWCHAINTMPLT
+	RTM_DELCHAINTMPLT,
+#define RTM_DELCHAINTMPLT RTM_DELCHAINTMPLT
+	RTM_GETCHAINTMPLT,
+#define RTM_GETCHAINTMPLT RTM_GETCHAINTMPLT
+
 	__RTM_MAX,
 #define RTM_MAX		(((__RTM_MAX + 3) & ~3) - 1)
 };
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index db45931bbada..0c88520f80f2 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -227,7 +227,7 @@ static void tcf_chain_head_change(struct tcf_chain *chain,
 		tcf_chain_head_change_item(item, tp_head);
 }
 
-static void tcf_chain_flush(struct tcf_chain *chain)
+static void tcf_chain_flush(struct tcf_chain *chain, bool destroy_template)
 {
 	struct tcf_proto *tp = rtnl_dereference(chain->filter_chain);
 
@@ -238,6 +238,11 @@ static void tcf_chain_flush(struct tcf_chain *chain)
 		tp = rtnl_dereference(chain->filter_chain);
 		tcf_chain_put(chain);
 	}
+	if (destroy_template && chain->tmplt_ops) {
+		chain->tmplt_ops->tmplt_destroy(chain->tmplt_priv);
+		module_put(chain->tmplt_ops->owner);
+		tcf_chain_put(chain);
+	}
 }
 
 static void tcf_chain_destroy(struct tcf_chain *chain)
@@ -691,7 +696,7 @@ void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
 			tcf_chain_hold(chain);
 
 		list_for_each_entry(chain, &block->chain_list, list)
-			tcf_chain_flush(chain);
+			tcf_chain_flush(chain, true);
 	}
 
 	tcf_block_offload_unbind(block, q, ei);
@@ -1191,9 +1196,15 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 		goto errout;
 	}
 
+	if (chain->tmplt_ops && chain->tmplt_ops != tp->ops) {
+		NL_SET_ERR_MSG(extack, "Chain template is set to a different filter kind");
+		err = -EINVAL;
+		goto errout;
+	}
+
 	err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh,
 			      n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE,
-			      extack);
+			      chain->tmplt_priv, extack);
 	if (err == 0) {
 		if (tp_created)
 			tcf_chain_tp_insert(chain, &chain_info, tp);
@@ -1274,7 +1285,7 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 	if (prio == 0) {
 		tfilter_notify_chain(net, skb, block, q, parent, n,
 				     chain, RTM_DELTFILTER);
-		tcf_chain_flush(chain);
+		tcf_chain_flush(chain, false);
 		err = 0;
 		goto errout;
 	}
@@ -1570,6 +1581,354 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
 	return skb->len;
 }
 
+static int tc_tmplt_fill_node(struct tcf_chain *chain, struct net *net,
+			      struct sk_buff *skb, struct tcf_block *block,
+			      struct Qdisc *q, u32 parent,
+			      u32 portid, u32 seq, u16 flags, int event)
+{
+	unsigned char *b = skb_tail_pointer(skb);
+	const struct tcf_proto_ops *ops;
+	struct nlmsghdr *nlh;
+	struct tcmsg *tcm;
+	void *priv;
+
+	ops = chain->tmplt_ops;
+	priv = chain->tmplt_priv;
+
+	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
+	if (!nlh)
+		goto out_nlmsg_trim;
+	tcm = nlmsg_data(nlh);
+	tcm->tcm_family = AF_UNSPEC;
+	tcm->tcm__pad1 = 0;
+	tcm->tcm__pad2 = 0;
+	tcm->tcm_handle = 0;
+	if (q) {
+		tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
+		tcm->tcm_parent = parent;
+	} else {
+		tcm->tcm_ifindex = TCM_IFINDEX_MAGIC_BLOCK;
+		tcm->tcm_block_index = block->index;
+	}
+
+	if (nla_put_string(skb, TCA_KIND, ops->kind))
+		goto nla_put_failure;
+	if (nla_put_u32(skb, TCA_CHAIN, chain->index))
+		goto nla_put_failure;
+	if (ops->tmplt_dump(skb, net, priv) < 0)
+		goto nla_put_failure;
+
+	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
+	return skb->len;
+
+out_nlmsg_trim:
+nla_put_failure:
+	nlmsg_trim(skb, b);
+	return -EMSGSIZE;
+}
+
+static int tc_tmplt_notify(struct tcf_chain *chain, struct net *net,
+			   struct sk_buff *oskb, struct nlmsghdr *n,
+			   struct tcf_block *block, struct Qdisc *q,
+			   u32 parent, int event, bool unicast)
+{
+	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
+	struct sk_buff *skb;
+
+	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb)
+		return -ENOBUFS;
+
+	if (tc_tmplt_fill_node(chain, net, skb, block, q, parent, portid,
+			       n->nlmsg_seq, n->nlmsg_flags, event) <= 0) {
+		kfree_skb(skb);
+		return -EINVAL;
+	}
+
+	if (unicast)
+		return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
+
+	return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
+			      n->nlmsg_flags & NLM_F_ECHO);
+}
+
+static int tc_ctl_tmplt_add(struct tcf_chain *chain, struct net *net,
+			    struct sk_buff *skb, struct nlmsghdr *n,
+			    struct tcf_block *block, struct Qdisc *q,
+			    u32 parent, struct nlattr **tca,
+			    struct netlink_ext_ack *extack)
+{
+	const struct tcf_proto_ops *ops;
+	void *tmplt_priv;
+
+	if (chain->tmplt_ops) {
+		NL_SET_ERR_MSG(extack, "A template is already set for the chain");
+		return -EBUSY;
+	}
+	if (chain->filter_chain) {
+		NL_SET_ERR_MSG(extack, "The chain is not empty, unable to add template");
+		return -EBUSY;
+	}
+	if (!(n->nlmsg_flags & NLM_F_CREATE)) {
+		NL_SET_ERR_MSG(extack, "Need NLM_F_CREATE to create a new chain template");
+		return -ENOENT;
+	}
+	ops = tcf_proto_lookup_ops(nla_data(tca[TCA_KIND]), extack);
+	if (IS_ERR(ops))
+		return PTR_ERR(ops);
+	if (!ops->tmplt_create || !ops->tmplt_destroy || !ops->tmplt_dump) {
+		NL_SET_ERR_MSG(extack, "Chain templates are not supported with this classifier");
+		return -EOPNOTSUPP;
+	}
+
+	tmplt_priv = ops->tmplt_create(net, chain, tca, extack);
+	if (IS_ERR(tmplt_priv)) {
+		module_put(ops->owner);
+		return PTR_ERR(tmplt_priv);
+	}
+	chain->tmplt_ops = ops;
+	chain->tmplt_priv = tmplt_priv;
+	tc_tmplt_notify(chain, net, skb, n, block, q, parent,
+			RTM_NEWCHAINTMPLT, false);
+	return 0;
+}
+
+static int tc_ctl_tmplt_del(struct tcf_chain *chain, struct net *net,
+			    struct sk_buff *skb, struct nlmsghdr *n,
+			    struct tcf_block *block, struct Qdisc *q,
+			    u32 parent, struct netlink_ext_ack *extack)
+{
+	const struct tcf_proto_ops *ops = chain->tmplt_ops;
+
+	if (!ops) {
+		NL_SET_ERR_MSG(extack, "Unable to delete template as this chain does not have template");
+		return -ENOENT;
+	}
+	if (chain->filter_chain) {
+		NL_SET_ERR_MSG(extack, "The chain is not empty, unable to delete template");
+		return -EBUSY;
+	}
+	if (!ops->tmplt_create) {
+		NL_SET_ERR_MSG(extack, "Chain templates are not supported with this classifier");
+		return -EOPNOTSUPP;
+	}
+	tc_tmplt_notify(chain, net, skb, n, block, q, parent,
+			RTM_DELCHAINTMPLT, false);
+	ops->tmplt_destroy(chain->tmplt_priv);
+	module_put(ops->owner);
+	chain->tmplt_ops = NULL;
+	chain->tmplt_priv = NULL;
+	return 0;
+}
+
+static int tc_ctl_tmplt_get(struct tcf_chain *chain, struct net *net,
+			    struct sk_buff *skb, struct nlmsghdr *n,
+			    struct tcf_block *block, struct Qdisc *q,
+			    u32 parent, struct netlink_ext_ack *extack)
+{
+	const struct tcf_proto_ops *ops = chain->tmplt_ops;
+	int err;
+
+	if (!ops) {
+		NL_SET_ERR_MSG(extack, "Unable to get template as this chain does not have template");
+		return -ENOENT;
+	}
+	err = tc_tmplt_notify(chain, net, skb, n, block, q, parent,
+			      RTM_NEWCHAINTMPLT, true);
+	if (err < 0)
+		NL_SET_ERR_MSG(extack, "Failed to send chain template notify message");
+	return err;
+}
+
+/* Add/delete/get a chain template */
+
+static int tc_ctl_tmplt(struct sk_buff *skb, struct nlmsghdr *n,
+			struct netlink_ext_ack *extack)
+{
+	struct net *net = sock_net(skb->sk);
+	struct nlattr *tca[TCA_MAX + 1];
+	struct tcmsg *t;
+	u32 parent;
+	u32 chain_index;
+	struct Qdisc *q = NULL;
+	struct tcf_chain *chain = NULL;
+	struct tcf_block *block;
+	unsigned long cl;
+	int err;
+
+	if (n->nlmsg_type != RTM_GETCHAINTMPLT &&
+	    !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
+		return -EPERM;
+
+replay:
+	err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL, extack);
+	if (err < 0)
+		return err;
+
+	t = nlmsg_data(n);
+	parent = t->tcm_parent;
+	cl = 0;
+
+	/* Find filter chain. */
+
+	block = tcf_block_find(net, &q, &parent, &cl,
+			       t->tcm_ifindex, t->tcm_block_index, extack);
+	if (IS_ERR(block)) {
+		err = PTR_ERR(block);
+		goto errout;
+	}
+
+	chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
+	if (chain_index > TC_ACT_EXT_VAL_MASK) {
+		NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
+		err = -EINVAL;
+		goto errout;
+	}
+	chain = tcf_chain_get(block, chain_index,
+			      n->nlmsg_type == RTM_NEWCHAINTMPLT);
+	if (!chain) {
+		NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
+		err = n->nlmsg_type == RTM_NEWTFILTER ? -ENOMEM : -EINVAL;
+		goto errout;
+	}
+
+	switch (n->nlmsg_type) {
+	case RTM_NEWCHAINTMPLT:
+		err = tc_ctl_tmplt_add(chain, net, skb, n, block,
+				       q, parent, tca, extack);
+		/* In case the chain template was successfully added,
+		 * take a reference to the chain. This ensures that
+		 * an empty chain with template does not disappear
+		 * at the end of this function.
+		 */
+		if (!err)
+			tcf_chain_hold(chain);
+		break;
+	case RTM_DELCHAINTMPLT:
+		err = tc_ctl_tmplt_del(chain, net, skb, n, block,
+				       q, parent, extack);
+		/* In case the chain template was successfully deleted,
+		 * put a reference to the chain previously taken
+		 * during template addition.
+		 */
+		if (!err)
+			tcf_chain_put(chain);
+		break;
+	case RTM_GETCHAINTMPLT:
+		err = tc_ctl_tmplt_get(chain, net, skb, n, block,
+				       q, parent, extack);
+		break;
+	default:
+		err = -EOPNOTSUPP;
+		NL_SET_ERR_MSG(extack, "Unsupported message type");
+		break;
+	}
+errout:
+	if (chain)
+		tcf_chain_put(chain);
+	if (err == -EAGAIN)
+		/* Replay the request. */
+		goto replay;
+	return err;
+}
+
+/* called with RTNL */
+static int tc_dump_tmplt(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct net *net = sock_net(skb->sk);
+	struct nlattr *tca[TCA_MAX + 1];
+	struct Qdisc *q = NULL;
+	struct tcf_block *block;
+	struct tcf_chain *chain;
+	struct tcmsg *tcm = nlmsg_data(cb->nlh);
+	long index_start;
+	long index;
+	u32 parent;
+	int err;
+
+	if (nlmsg_len(cb->nlh) < sizeof(*tcm))
+		return skb->len;
+
+	err = nlmsg_parse(cb->nlh, sizeof(*tcm), tca, TCA_MAX, NULL, NULL);
+	if (err)
+		return err;
+
+	if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
+		block = tcf_block_lookup(net, tcm->tcm_block_index);
+		if (!block)
+			goto out;
+		/* If we work with block index, q is NULL and parent value
+		 * will never be used in the following code. The check
+		 * in tcf_fill_node prevents it. However, compiler does not
+		 * see that far, so set parent to zero to silence the warning
+		 * about parent being uninitialized.
+		 */
+		parent = 0;
+	} else {
+		const struct Qdisc_class_ops *cops;
+		struct net_device *dev;
+		unsigned long cl = 0;
+
+		dev = __dev_get_by_index(net, tcm->tcm_ifindex);
+		if (!dev)
+			return skb->len;
+
+		parent = tcm->tcm_parent;
+		if (!parent) {
+			q = dev->qdisc;
+			parent = q->handle;
+		} else {
+			q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
+		}
+		if (!q)
+			goto out;
+		cops = q->ops->cl_ops;
+		if (!cops)
+			goto out;
+		if (!cops->tcf_block)
+			goto out;
+		if (TC_H_MIN(tcm->tcm_parent)) {
+			cl = cops->find(q, tcm->tcm_parent);
+			if (cl == 0)
+				goto out;
+		}
+		block = cops->tcf_block(q, cl, NULL);
+		if (!block)
+			goto out;
+		if (tcf_block_shared(block))
+			q = NULL;
+	}
+
+	index_start = cb->args[0];
+	index = 0;
+
+	list_for_each_entry(chain, &block->chain_list, list) {
+		if ((tca[TCA_CHAIN] &&
+		     nla_get_u32(tca[TCA_CHAIN]) != chain->index) ||
+		    !chain->tmplt_ops)
+			continue;
+		if (index < index_start) {
+			index++;
+			continue;
+		}
+		err = tc_tmplt_fill_node(chain, net, skb, block, q, parent,
+					 NETLINK_CB(cb->skb).portid,
+					 cb->nlh->nlmsg_seq, NLM_F_MULTI,
+					 RTM_NEWCHAINTMPLT);
+		if (err <= 0)
+			break;
+		index++;
+	}
+
+	cb->args[0] = index;
+
+out:
+	/* If we did no progress, the error (EMSGSIZE) is real */
+	if (skb->len == 0 && err)
+		return err;
+	return skb->len;
+}
+
 void tcf_exts_destroy(struct tcf_exts *exts)
 {
 #ifdef CONFIG_NET_CLS_ACT
@@ -1795,6 +2154,10 @@ static int __init tc_filter_init(void)
 	rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL, 0);
 	rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_get_tfilter,
 		      tc_dump_tfilter, 0);
+	rtnl_register(PF_UNSPEC, RTM_NEWCHAINTMPLT, tc_ctl_tmplt, NULL, 0);
+	rtnl_register(PF_UNSPEC, RTM_DELCHAINTMPLT, tc_ctl_tmplt, NULL, 0);
+	rtnl_register(PF_UNSPEC, RTM_GETCHAINTMPLT, tc_ctl_tmplt,
+		      tc_dump_tmplt, 0);
 
 	return 0;
 
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 95367f37098d..a690acac7e6e 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -168,7 +168,7 @@ static int basic_set_parms(struct net *net, struct tcf_proto *tp,
 static int basic_change(struct net *net, struct sk_buff *in_skb,
 			struct tcf_proto *tp, unsigned long base, u32 handle,
 			struct nlattr **tca, void **arg, bool ovr,
-			struct netlink_ext_ack *extack)
+			void *tmplt_priv, struct netlink_ext_ack *extack)
 {
 	int err;
 	struct basic_head *head = rtnl_dereference(tp->root);
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 1aa7f6511065..363c43dfb894 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -455,7 +455,8 @@ static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp,
 static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
 			  struct tcf_proto *tp, unsigned long base,
 			  u32 handle, struct nlattr **tca,
-			  void **arg, bool ovr, struct netlink_ext_ack *extack)
+			  void **arg, bool ovr, void *tmplt_priv,
+			  struct netlink_ext_ack *extack)
 {
 	struct cls_bpf_head *head = rtnl_dereference(tp->root);
 	struct cls_bpf_prog *oldprog = *arg;
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 3bc01bdde165..ca5d0315432c 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -78,7 +78,7 @@ static void cls_cgroup_destroy_work(struct work_struct *work)
 static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
 			     struct tcf_proto *tp, unsigned long base,
 			     u32 handle, struct nlattr **tca,
-			     void **arg, bool ovr,
+			     void **arg, bool ovr, void *tmplt_priv,
 			     struct netlink_ext_ack *extack)
 {
 	struct nlattr *tb[TCA_CGROUP_MAX + 1];
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 2bb043cd436b..2a21e26fcee0 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -391,7 +391,8 @@ static void flow_destroy_filter_work(struct work_struct *work)
 static int flow_change(struct net *net, struct sk_buff *in_skb,
 		       struct tcf_proto *tp, unsigned long base,
 		       u32 handle, struct nlattr **tca,
-		       void **arg, bool ovr, struct netlink_ext_ack *extack)
+		       void **arg, bool ovr, void *tmplt_priv,
+		       struct netlink_ext_ack *extack)
 {
 	struct flow_head *head = rtnl_dereference(tp->root);
 	struct flow_filter *fold, *fnew;
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 9e8b26a80fb3..09d6c6e67f9d 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -927,7 +927,8 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp,
 static int fl_change(struct net *net, struct sk_buff *in_skb,
 		     struct tcf_proto *tp, unsigned long base,
 		     u32 handle, struct nlattr **tca,
-		     void **arg, bool ovr, struct netlink_ext_ack *extack)
+		     void **arg, bool ovr, void *tmplt_priv,
+		     struct netlink_ext_ack *extack)
 {
 	struct cls_fl_head *head = rtnl_dereference(tp->root);
 	struct cls_fl_filter *fold = *arg;
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 29eeeaf3ea44..a1d40d48aa24 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -250,7 +250,8 @@ static int fw_set_parms(struct net *net, struct tcf_proto *tp,
 static int fw_change(struct net *net, struct sk_buff *in_skb,
 		     struct tcf_proto *tp, unsigned long base,
 		     u32 handle, struct nlattr **tca, void **arg,
-		     bool ovr, struct netlink_ext_ack *extack)
+		     bool ovr, void *tmplt_priv,
+		     struct netlink_ext_ack *extack)
 {
 	struct fw_head *head = rtnl_dereference(tp->root);
 	struct fw_filter *f = *arg;
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index 47b207ef7762..481e77cbf501 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -152,7 +152,8 @@ static int mall_set_parms(struct net *net, struct tcf_proto *tp,
 static int mall_change(struct net *net, struct sk_buff *in_skb,
 		       struct tcf_proto *tp, unsigned long base,
 		       u32 handle, struct nlattr **tca,
-		       void **arg, bool ovr, struct netlink_ext_ack *extack)
+		       void **arg, bool ovr, void *tmplt_priv,
+		       struct netlink_ext_ack *extack)
 {
 	struct cls_mall_head *head = rtnl_dereference(tp->root);
 	struct nlattr *tb[TCA_MATCHALL_MAX + 1];
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 0404aa5fa7cb..321eb746fe01 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -468,7 +468,7 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
 static int route4_change(struct net *net, struct sk_buff *in_skb,
 			 struct tcf_proto *tp, unsigned long base, u32 handle,
 			 struct nlattr **tca, void **arg, bool ovr,
-			 struct netlink_ext_ack *extack)
+			 void *tmplt_priv, struct netlink_ext_ack *extack)
 {
 	struct route4_head *head = rtnl_dereference(tp->root);
 	struct route4_filter __rcu **fp;
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index e9ccf7daea7d..371618720ef2 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -477,7 +477,8 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
 		       struct tcf_proto *tp, unsigned long base,
 		       u32 handle,
 		       struct nlattr **tca,
-		       void **arg, bool ovr, struct netlink_ext_ack *extack)
+		       void **arg, bool ovr, void *tmplt_priv,
+		       struct netlink_ext_ack *extack)
 {
 	struct rsvp_head *data = rtnl_dereference(tp->root);
 	struct rsvp_filter *f, *nfp;
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 32f4bbd82f35..d9fb5d56c60d 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -500,7 +500,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
 static int
 tcindex_change(struct net *net, struct sk_buff *in_skb,
 	       struct tcf_proto *tp, unsigned long base, u32 handle,
-	       struct nlattr **tca, void **arg, bool ovr,
+	       struct nlattr **tca, void **arg, bool ovr, void *tmplt_priv,
 	       struct netlink_ext_ack *extack)
 {
 	struct nlattr *opt = tca[TCA_OPTIONS];
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index fb861f90fde6..b500ce62ef3c 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -903,7 +903,7 @@ static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp,
 static int u32_change(struct net *net, struct sk_buff *in_skb,
 		      struct tcf_proto *tp, unsigned long base, u32 handle,
 		      struct nlattr **tca, void **arg, bool ovr,
-		      struct netlink_ext_ack *extack)
+		      void *tmplt_priv, struct netlink_ext_ack *extack)
 {
 	struct tc_u_common *tp_c = tp->data;
 	struct tc_u_hnode *ht;
diff --git a/security/selinux/nlmsgtab.c b/security/selinux/nlmsgtab.c
index 7b7433a1a34c..825777efc83e 100644
--- a/security/selinux/nlmsgtab.c
+++ b/security/selinux/nlmsgtab.c
@@ -159,7 +159,7 @@ int selinux_nlmsg_lookup(u16 sclass, u16 nlmsg_type, u32 *perm)
 	switch (sclass) {
 	case SECCLASS_NETLINK_ROUTE_SOCKET:
 		/* RTM_MAX always point to RTM_SETxxxx, ie RTM_NEWxxx + 3 */
-		BUILD_BUG_ON(RTM_MAX != (RTM_NEWCACHEREPORT + 3));
+		BUILD_BUG_ON(RTM_MAX != (RTM_NEWCHAINTMPLT + 3));
 		err = nlmsg_perm(nlmsg_type, perm, nlmsg_route_perms,
 				 sizeof(nlmsg_route_perms));
 		break;
-- 
2.14.4

^ permalink raw reply related

* [patch net-next v2 3/9] net: sched: cls_flower: move key/mask dumping into a separate function
From: Jiri Pirko @ 2018-06-26  7:59 UTC (permalink / raw)
  To: netdev
  Cc: davem, jhs, xiyou.wangcong, jakub.kicinski, simon.horman,
	john.hurley, dsahern, mlxsw
In-Reply-To: <20180626080000.12964-1-jiri@resnulli.us>

From: Jiri Pirko <jiri@mellanox.com>

Push key/mask dumping from fl_dump() into a separate function
fl_dump_key(), that will be reused for template dumping.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
---
 net/sched/cls_flower.c | 62 ++++++++++++++++++++++++++++++--------------------
 1 file changed, 37 insertions(+), 25 deletions(-)

diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 09d6c6e67f9d..76c5516357d5 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -1217,29 +1217,9 @@ static int fl_dump_key_flags(struct sk_buff *skb, u32 flags_key, u32 flags_mask)
 	return nla_put(skb, TCA_FLOWER_KEY_FLAGS_MASK, 4, &_mask);
 }
 
-static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh,
-		   struct sk_buff *skb, struct tcmsg *t)
+static int fl_dump_key(struct sk_buff *skb, struct net *net,
+		       struct fl_flow_key *key, struct fl_flow_key *mask)
 {
-	struct cls_fl_filter *f = fh;
-	struct nlattr *nest;
-	struct fl_flow_key *key, *mask;
-
-	if (!f)
-		return skb->len;
-
-	t->tcm_handle = f->handle;
-
-	nest = nla_nest_start(skb, TCA_OPTIONS);
-	if (!nest)
-		goto nla_put_failure;
-
-	if (f->res.classid &&
-	    nla_put_u32(skb, TCA_FLOWER_CLASSID, f->res.classid))
-		goto nla_put_failure;
-
-	key = &f->key;
-	mask = &f->mask->key;
-
 	if (mask->indev_ifindex) {
 		struct net_device *dev;
 
@@ -1248,9 +1228,6 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh,
 			goto nla_put_failure;
 	}
 
-	if (!tc_skip_hw(f->flags))
-		fl_hw_update_stats(tp, f);
-
 	if (fl_dump_key_val(skb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST,
 			    mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK,
 			    sizeof(key->eth.dst)) ||
@@ -1404,6 +1381,41 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh,
 	if (fl_dump_key_flags(skb, key->control.flags, mask->control.flags))
 		goto nla_put_failure;
 
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh,
+		   struct sk_buff *skb, struct tcmsg *t)
+{
+	struct cls_fl_filter *f = fh;
+	struct nlattr *nest;
+	struct fl_flow_key *key, *mask;
+
+	if (!f)
+		return skb->len;
+
+	t->tcm_handle = f->handle;
+
+	nest = nla_nest_start(skb, TCA_OPTIONS);
+	if (!nest)
+		goto nla_put_failure;
+
+	if (f->res.classid &&
+	    nla_put_u32(skb, TCA_FLOWER_CLASSID, f->res.classid))
+		goto nla_put_failure;
+
+	key = &f->key;
+	mask = &f->mask->key;
+
+	if (fl_dump_key(skb, net, key, mask))
+		goto nla_put_failure;
+
+	if (!tc_skip_hw(f->flags))
+		fl_hw_update_stats(tp, f);
+
 	if (f->flags && nla_put_u32(skb, TCA_FLOWER_FLAGS, f->flags))
 		goto nla_put_failure;
 
-- 
2.14.4

^ permalink raw reply related

* [patch net-next v2 4/9] net: sched: cls_flower: change fl_init_dissector to accept mask and dissector
From: Jiri Pirko @ 2018-06-26  7:59 UTC (permalink / raw)
  To: netdev
  Cc: davem, jhs, xiyou.wangcong, jakub.kicinski, simon.horman,
	john.hurley, dsahern, mlxsw
In-Reply-To: <20180626080000.12964-1-jiri@resnulli.us>

From: Jiri Pirko <jiri@mellanox.com>

This function is going to be used for templates as well, so we need to
pass the pointer separately.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
---
 net/sched/cls_flower.c | 39 ++++++++++++++++++++-------------------
 1 file changed, 20 insertions(+), 19 deletions(-)

diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 76c5516357d5..9ce4375b3252 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -793,47 +793,48 @@ static int fl_init_mask_hashtable(struct fl_flow_mask *mask)
 			FL_KEY_SET(keys, cnt, id, member);			\
 	} while(0);
 
-static void fl_init_dissector(struct fl_flow_mask *mask)
+static void fl_init_dissector(struct flow_dissector *dissector,
+			      struct fl_flow_key *mask)
 {
 	struct flow_dissector_key keys[FLOW_DISSECTOR_KEY_MAX];
 	size_t cnt = 0;
 
 	FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_CONTROL, control);
 	FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_BASIC, basic);
-	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+	FL_KEY_SET_IF_MASKED(mask, keys, cnt,
 			     FLOW_DISSECTOR_KEY_ETH_ADDRS, eth);
-	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+	FL_KEY_SET_IF_MASKED(mask, keys, cnt,
 			     FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
-	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+	FL_KEY_SET_IF_MASKED(mask, keys, cnt,
 			     FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6);
-	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+	FL_KEY_SET_IF_MASKED(mask, keys, cnt,
 			     FLOW_DISSECTOR_KEY_PORTS, tp);
-	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+	FL_KEY_SET_IF_MASKED(mask, keys, cnt,
 			     FLOW_DISSECTOR_KEY_IP, ip);
-	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+	FL_KEY_SET_IF_MASKED(mask, keys, cnt,
 			     FLOW_DISSECTOR_KEY_TCP, tcp);
-	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+	FL_KEY_SET_IF_MASKED(mask, keys, cnt,
 			     FLOW_DISSECTOR_KEY_ICMP, icmp);
-	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+	FL_KEY_SET_IF_MASKED(mask, keys, cnt,
 			     FLOW_DISSECTOR_KEY_ARP, arp);
-	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+	FL_KEY_SET_IF_MASKED(mask, keys, cnt,
 			     FLOW_DISSECTOR_KEY_MPLS, mpls);
-	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+	FL_KEY_SET_IF_MASKED(mask, keys, cnt,
 			     FLOW_DISSECTOR_KEY_VLAN, vlan);
-	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+	FL_KEY_SET_IF_MASKED(mask, keys, cnt,
 			     FLOW_DISSECTOR_KEY_ENC_KEYID, enc_key_id);
-	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+	FL_KEY_SET_IF_MASKED(mask, keys, cnt,
 			     FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, enc_ipv4);
-	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+	FL_KEY_SET_IF_MASKED(mask, keys, cnt,
 			     FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, enc_ipv6);
-	if (FL_KEY_IS_MASKED(&mask->key, enc_ipv4) ||
-	    FL_KEY_IS_MASKED(&mask->key, enc_ipv6))
+	if (FL_KEY_IS_MASKED(mask, enc_ipv4) ||
+	    FL_KEY_IS_MASKED(mask, enc_ipv6))
 		FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_ENC_CONTROL,
 			   enc_control);
-	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+	FL_KEY_SET_IF_MASKED(mask, keys, cnt,
 			     FLOW_DISSECTOR_KEY_ENC_PORTS, enc_tp);
 
-	skb_flow_dissector_init(&mask->dissector, keys, cnt);
+	skb_flow_dissector_init(dissector, keys, cnt);
 }
 
 static struct fl_flow_mask *fl_create_new_mask(struct cls_fl_head *head,
@@ -852,7 +853,7 @@ static struct fl_flow_mask *fl_create_new_mask(struct cls_fl_head *head,
 	if (err)
 		goto errout_free;
 
-	fl_init_dissector(newmask);
+	fl_init_dissector(&newmask->dissector, &newmask->key);
 
 	INIT_LIST_HEAD_RCU(&newmask->filters);
 
-- 
2.14.4

^ permalink raw reply related

* [patch net-next v2 5/9] net: sched: cls_flower: implement chain templates
From: Jiri Pirko @ 2018-06-26  7:59 UTC (permalink / raw)
  To: netdev
  Cc: davem, jhs, xiyou.wangcong, jakub.kicinski, simon.horman,
	john.hurley, dsahern, mlxsw
In-Reply-To: <20180626080000.12964-1-jiri@resnulli.us>

From: Jiri Pirko <jiri@mellanox.com>

Use the previously introduced template extension and implement
callback to create, destroy and dump chain template. The existing
parsing and dumping functions are re-used. Also, check if newly added
filters fit the template if it is set.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
---
 net/sched/cls_flower.c | 107 ++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 106 insertions(+), 1 deletion(-)

diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 9ce4375b3252..d64d43843a3a 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -70,6 +70,13 @@ struct fl_flow_mask {
 	struct list_head list;
 };
 
+struct fl_flow_tmplt {
+	struct fl_flow_key dummy_key;
+	struct fl_flow_key mask;
+	struct flow_dissector dissector;
+	struct tcf_chain *chain;
+};
+
 struct cls_fl_head {
 	struct rhashtable ht;
 	struct list_head masks;
@@ -144,6 +151,23 @@ static void fl_set_masked_key(struct fl_flow_key *mkey, struct fl_flow_key *key,
 		*lmkey++ = *lkey++ & *lmask++;
 }
 
+static bool fl_mask_fits_tmplt(struct fl_flow_tmplt *tmplt,
+			       struct fl_flow_mask *mask)
+{
+	const long *lmask = fl_key_get_start(&mask->key, mask);
+	const long *ltmplt;
+	int i;
+
+	if (!tmplt)
+		return true;
+	ltmplt = fl_key_get_start(&tmplt->mask, mask);
+	for (i = 0; i < fl_mask_range(mask); i += sizeof(long)) {
+		if (~*ltmplt++ & *lmask++)
+			return false;
+	}
+	return true;
+}
+
 static void fl_clear_masked_range(struct fl_flow_key *key,
 				  struct fl_flow_mask *mask)
 {
@@ -902,6 +926,7 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp,
 			struct cls_fl_filter *f, struct fl_flow_mask *mask,
 			unsigned long base, struct nlattr **tb,
 			struct nlattr *est, bool ovr,
+			struct fl_flow_tmplt *tmplt,
 			struct netlink_ext_ack *extack)
 {
 	int err;
@@ -922,6 +947,11 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp,
 	fl_mask_update_range(mask);
 	fl_set_masked_key(&f->mkey, &f->key, mask);
 
+	if (!fl_mask_fits_tmplt(tmplt, mask)) {
+		NL_SET_ERR_MSG_MOD(extack, "Mask does not fit the template");
+		return -EINVAL;
+	}
+
 	return 0;
 }
 
@@ -932,6 +962,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
 		     struct netlink_ext_ack *extack)
 {
 	struct cls_fl_head *head = rtnl_dereference(tp->root);
+	struct fl_flow_tmplt *tmplt = tmplt_priv;
 	struct cls_fl_filter *fold = *arg;
 	struct cls_fl_filter *fnew;
 	struct nlattr **tb;
@@ -988,7 +1019,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
 	}
 
 	err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr,
-			   extack);
+			   tmplt, extack);
 	if (err)
 		goto errout_idr;
 
@@ -1089,6 +1120,52 @@ static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 	}
 }
 
+static void *fl_tmplt_create(struct net *net, struct tcf_chain *chain,
+			     struct nlattr **tca,
+			     struct netlink_ext_ack *extack)
+{
+	struct fl_flow_tmplt *tmplt;
+	struct nlattr **tb;
+	int err;
+
+	if (!tca[TCA_OPTIONS])
+		return ERR_PTR(-EINVAL);
+
+	tb = kcalloc(TCA_FLOWER_MAX + 1, sizeof(struct nlattr *), GFP_KERNEL);
+	if (!tb)
+		return ERR_PTR(-ENOBUFS);
+	err = nla_parse_nested(tb, TCA_FLOWER_MAX, tca[TCA_OPTIONS],
+			       fl_policy, NULL);
+	if (err)
+		goto errout_tb;
+
+	tmplt = kzalloc(sizeof(*tmplt), GFP_KERNEL);
+	if (!tmplt)
+		goto errout_tb;
+	tmplt->chain = chain;
+	err = fl_set_key(net, tb, &tmplt->dummy_key, &tmplt->mask, extack);
+	if (err)
+		goto errout_tmplt;
+	kfree(tb);
+
+	fl_init_dissector(&tmplt->dissector, &tmplt->mask);
+
+	return tmplt;
+
+errout_tmplt:
+	kfree(tmplt);
+errout_tb:
+	kfree(tb);
+	return ERR_PTR(err);
+}
+
+static void fl_tmplt_destroy(void *tmplt_priv)
+{
+	struct fl_flow_tmplt *tmplt = tmplt_priv;
+
+	kfree(tmplt);
+}
+
 static int fl_dump_key_val(struct sk_buff *skb,
 			   void *val, int val_type,
 			   void *mask, int mask_type, int len)
@@ -1435,6 +1512,31 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh,
 	return -1;
 }
 
+static int fl_tmplt_dump(struct sk_buff *skb, struct net *net, void *tmplt_priv)
+{
+	struct fl_flow_tmplt *tmplt = tmplt_priv;
+	struct fl_flow_key *key, *mask;
+	struct nlattr *nest;
+
+	nest = nla_nest_start(skb, TCA_OPTIONS);
+	if (!nest)
+		goto nla_put_failure;
+
+	key = &tmplt->dummy_key;
+	mask = &tmplt->mask;
+
+	if (fl_dump_key(skb, net, key, mask))
+		goto nla_put_failure;
+
+	nla_nest_end(skb, nest);
+
+	return skb->len;
+
+nla_put_failure:
+	nla_nest_cancel(skb, nest);
+	return -EMSGSIZE;
+}
+
 static void fl_bind_class(void *fh, u32 classid, unsigned long cl)
 {
 	struct cls_fl_filter *f = fh;
@@ -1454,6 +1556,9 @@ static struct tcf_proto_ops cls_fl_ops __read_mostly = {
 	.walk		= fl_walk,
 	.dump		= fl_dump,
 	.bind_class	= fl_bind_class,
+	.tmplt_create	= fl_tmplt_create,
+	.tmplt_destroy	= fl_tmplt_destroy,
+	.tmplt_dump	= fl_tmplt_dump,
 	.owner		= THIS_MODULE,
 };
 
-- 
2.14.4

^ permalink raw reply related

* [patch net-next v2 6/9] net: sched: cls_flower: propagate chain teplate creation and destruction to drivers
From: Jiri Pirko @ 2018-06-26  7:59 UTC (permalink / raw)
  To: netdev
  Cc: davem, jhs, xiyou.wangcong, jakub.kicinski, simon.horman,
	john.hurley, dsahern, mlxsw
In-Reply-To: <20180626080000.12964-1-jiri@resnulli.us>

From: Jiri Pirko <jiri@mellanox.com>

Introduce a couple of flower offload commands in order to propagate
template creation/destruction events down to device drivers.
Drivers may use this information to prepare HW in an optimal way
for future filter insertions.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
---
v1->v2:
- remove leftover extack arg in fl_hw_create_tmplt()
---
 include/net/pkt_cls.h  |  2 ++
 net/sched/cls_flower.c | 39 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 41 insertions(+)

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index a3c1a2c47cd4..e83968cf9a70 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -715,6 +715,8 @@ enum tc_fl_command {
 	TC_CLSFLOWER_REPLACE,
 	TC_CLSFLOWER_DESTROY,
 	TC_CLSFLOWER_STATS,
+	TC_CLSFLOWER_TMPLT_CREATE,
+	TC_CLSFLOWER_TMPLT_DESTROY,
 };
 
 struct tc_cls_flower_offload {
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index d64d43843a3a..614dd558d5f1 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -1120,6 +1120,42 @@ static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 	}
 }
 
+static void fl_hw_create_tmplt(struct tcf_chain *chain,
+			       struct fl_flow_tmplt *tmplt)
+{
+	struct tc_cls_flower_offload cls_flower = {};
+	struct tcf_block *block = chain->block;
+	struct tcf_exts dummy_exts = { 0, };
+
+	cls_flower.common.chain_index = chain->index;
+	cls_flower.command = TC_CLSFLOWER_TMPLT_CREATE;
+	cls_flower.cookie = (unsigned long) tmplt;
+	cls_flower.dissector = &tmplt->dissector;
+	cls_flower.mask = &tmplt->mask;
+	cls_flower.key = &tmplt->dummy_key;
+	cls_flower.exts = &dummy_exts;
+
+	/* We don't care if driver (any of them) fails to handle this
+	 * call. It serves just as a hint for it.
+	 */
+	tc_setup_cb_call(block, NULL, TC_SETUP_CLSFLOWER,
+			 &cls_flower, false);
+}
+
+static void fl_hw_destroy_tmplt(struct tcf_chain *chain,
+				struct fl_flow_tmplt *tmplt)
+{
+	struct tc_cls_flower_offload cls_flower = {};
+	struct tcf_block *block = chain->block;
+
+	cls_flower.common.chain_index = chain->index;
+	cls_flower.command = TC_CLSFLOWER_TMPLT_DESTROY;
+	cls_flower.cookie = (unsigned long) tmplt;
+
+	tc_setup_cb_call(block, NULL, TC_SETUP_CLSFLOWER,
+			 &cls_flower, false);
+}
+
 static void *fl_tmplt_create(struct net *net, struct tcf_chain *chain,
 			     struct nlattr **tca,
 			     struct netlink_ext_ack *extack)
@@ -1150,6 +1186,8 @@ static void *fl_tmplt_create(struct net *net, struct tcf_chain *chain,
 
 	fl_init_dissector(&tmplt->dissector, &tmplt->mask);
 
+	fl_hw_create_tmplt(chain, tmplt);
+
 	return tmplt;
 
 errout_tmplt:
@@ -1163,6 +1201,7 @@ static void fl_tmplt_destroy(void *tmplt_priv)
 {
 	struct fl_flow_tmplt *tmplt = tmplt_priv;
 
+	fl_hw_destroy_tmplt(tmplt->chain, tmplt);
 	kfree(tmplt);
 }
 
-- 
2.14.4

^ permalink raw reply related

* [patch net-next v2 8/9] selftests: forwarding: move shblock tc support check to a separate helper
From: Jiri Pirko @ 2018-06-26  7:59 UTC (permalink / raw)
  To: netdev
  Cc: davem, jhs, xiyou.wangcong, jakub.kicinski, simon.horman,
	john.hurley, dsahern, mlxsw
In-Reply-To: <20180626080000.12964-1-jiri@resnulli.us>

From: Jiri Pirko <jiri@mellanox.com>

The shared block support is only needed for tc_shblock.sh. No need to
require that for other test.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
---
 tools/testing/selftests/net/forwarding/lib.sh         | 3 +++
 tools/testing/selftests/net/forwarding/tc_shblocks.sh | 2 ++
 2 files changed, 5 insertions(+)

diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 7b18a53aa556..a736d1d7ecdb 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -28,7 +28,10 @@ check_tc_version()
 		echo "SKIP: iproute2 too old; tc is missing JSON support"
 		exit 1
 	fi
+}
 
+check_tc_shblock_support()
+{
 	tc filter help 2>&1 | grep block &> /dev/null
 	if [[ $? -ne 0 ]]; then
 		echo "SKIP: iproute2 too old; tc is missing shared block support"
diff --git a/tools/testing/selftests/net/forwarding/tc_shblocks.sh b/tools/testing/selftests/net/forwarding/tc_shblocks.sh
index b5b917203815..9826a446e2c0 100755
--- a/tools/testing/selftests/net/forwarding/tc_shblocks.sh
+++ b/tools/testing/selftests/net/forwarding/tc_shblocks.sh
@@ -105,6 +105,8 @@ cleanup()
 	ip link set $swp2 address $swp2origmac
 }
 
+check_tc_shblock_support
+
 trap cleanup EXIT
 
 setup_prepare
-- 
2.14.4

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox