Netdev List
 help / color / mirror / Atom feed
* [PATCH v2 09/10] samples/bpf: use hugepages in xdpsock app
From: Kevin Laatz @ 2019-07-16  3:06 UTC (permalink / raw)
  To: netdev, ast, daniel, bjorn.topel, magnus.karlsson, jakub.kicinski,
	jonathan.lemon
  Cc: bruce.richardson, ciara.loftus, bpf, intel-wired-lan, Kevin Laatz
In-Reply-To: <20190716030637.5634-1-kevin.laatz@intel.com>

This patch modifies xdpsock to use mmap instead of posix_memalign. With
this change, we can use hugepages when running the application in unaligned
chunks mode. Using hugepages makes it more likely that we have physically
contiguous memory, which supports the unaligned chunk mode better.

Signed-off-by: Kevin Laatz <kevin.laatz@intel.com>
---
 samples/bpf/xdpsock_user.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c
index 8f220afd549a..958a27193582 100644
--- a/samples/bpf/xdpsock_user.c
+++ b/samples/bpf/xdpsock_user.c
@@ -69,6 +69,7 @@ static int opt_poll;
 static int opt_interval = 1;
 static u32 opt_umem_flags;
 static int opt_unaligned_chunks;
+static int opt_mmap_flags;
 static u32 opt_xdp_bind_flags;
 static int opt_xsk_frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
 static __u32 prog_id;
@@ -434,6 +435,7 @@ static void parse_command_line(int argc, char **argv)
 		case 'u':
 			opt_umem_flags |= XDP_UMEM_UNALIGNED_CHUNKS;
 			opt_unaligned_chunks = 1;
+			opt_mmap_flags = MAP_HUGETLB;
 			break;
 		case 'F':
 			opt_xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
@@ -696,11 +698,14 @@ int main(int argc, char **argv)
 		exit(EXIT_FAILURE);
 	}
 
-	ret = posix_memalign(&bufs, getpagesize(), /* PAGE_SIZE aligned */
-			     NUM_FRAMES * opt_xsk_frame_size);
-	if (ret)
-		exit_with_error(ret);
-
+	/* Reserve memory for the umem. Use hugepages if unaligned chunk mode */
+	bufs = mmap(NULL, NUM_FRAMES * opt_xsk_frame_size,
+		    PROT_READ | PROT_WRITE,
+		    MAP_PRIVATE | MAP_ANONYMOUS | opt_mmap_flags, -1, 0);
+	if (bufs == MAP_FAILED) {
+		printf("ERROR: mmap failed\n");
+		exit(EXIT_FAILURE);
+	}
        /* Create sockets... */
 	umem = xsk_configure_umem(bufs, NUM_FRAMES * opt_xsk_frame_size);
 	xsks[num_socks++] = xsk_configure_socket(umem);
-- 
2.17.1


^ permalink raw reply related

* [PATCH v2 06/10] libbpf: add flags to umem config
From: Kevin Laatz @ 2019-07-16  3:06 UTC (permalink / raw)
  To: netdev, ast, daniel, bjorn.topel, magnus.karlsson, jakub.kicinski,
	jonathan.lemon
  Cc: bruce.richardson, ciara.loftus, bpf, intel-wired-lan, Kevin Laatz
In-Reply-To: <20190716030637.5634-1-kevin.laatz@intel.com>

This patch adds a 'flags' field to the umem_config and umem_reg structs.
This will allow for more options to be added for configuring umems.

The first use for the flags field is to add a flag for unaligned chunks
mode. These flags can either be user-provided or filled with a default.

Signed-off-by: Kevin Laatz <kevin.laatz@intel.com>
Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>

---
v2:
  - Removed the headroom check from this patch. It has moved to the
    previous patch.
---
 tools/include/uapi/linux/if_xdp.h | 4 ++++
 tools/lib/bpf/xsk.c               | 3 +++
 tools/lib/bpf/xsk.h               | 2 ++
 3 files changed, 9 insertions(+)

diff --git a/tools/include/uapi/linux/if_xdp.h b/tools/include/uapi/linux/if_xdp.h
index faaa5ca2a117..594bcebb189c 100644
--- a/tools/include/uapi/linux/if_xdp.h
+++ b/tools/include/uapi/linux/if_xdp.h
@@ -17,6 +17,9 @@
 #define XDP_COPY	(1 << 1) /* Force copy-mode */
 #define XDP_ZEROCOPY	(1 << 2) /* Force zero-copy mode */
 
+/* Flags for xsk_umem_config flags */
+#define XDP_UMEM_UNALIGNED_CHUNKS (1 << 0)
+
 struct sockaddr_xdp {
 	__u16 sxdp_family;
 	__u16 sxdp_flags;
@@ -53,6 +56,7 @@ struct xdp_umem_reg {
 	__u64 len; /* Length of packet data area */
 	__u32 chunk_size;
 	__u32 headroom;
+	__u32 flags;
 };
 
 struct xdp_statistics {
diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
index b33740221b7e..d5ff3fc39e32 100644
--- a/tools/lib/bpf/xsk.c
+++ b/tools/lib/bpf/xsk.c
@@ -116,6 +116,7 @@ static void xsk_set_umem_config(struct xsk_umem_config *cfg,
 		cfg->comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
 		cfg->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
 		cfg->frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM;
+		cfg->flags = XSK_UMEM__DEFAULT_FLAGS;
 		return;
 	}
 
@@ -123,6 +124,7 @@ static void xsk_set_umem_config(struct xsk_umem_config *cfg,
 	cfg->comp_size = usr_cfg->comp_size;
 	cfg->frame_size = usr_cfg->frame_size;
 	cfg->frame_headroom = usr_cfg->frame_headroom;
+	cfg->flags = usr_cfg->flags;
 }
 
 static int xsk_set_xdp_socket_config(struct xsk_socket_config *cfg,
@@ -182,6 +184,7 @@ int xsk_umem__create(struct xsk_umem **umem_ptr, void *umem_area, __u64 size,
 	mr.len = size;
 	mr.chunk_size = umem->config.frame_size;
 	mr.headroom = umem->config.frame_headroom;
+	mr.flags = umem->config.flags;
 
 	err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_REG, &mr, sizeof(mr));
 	if (err) {
diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h
index 833a6e60d065..44a03d8c34b9 100644
--- a/tools/lib/bpf/xsk.h
+++ b/tools/lib/bpf/xsk.h
@@ -170,12 +170,14 @@ LIBBPF_API int xsk_socket__fd(const struct xsk_socket *xsk);
 #define XSK_UMEM__DEFAULT_FRAME_SHIFT    12 /* 4096 bytes */
 #define XSK_UMEM__DEFAULT_FRAME_SIZE     (1 << XSK_UMEM__DEFAULT_FRAME_SHIFT)
 #define XSK_UMEM__DEFAULT_FRAME_HEADROOM 0
+#define XSK_UMEM__DEFAULT_FLAGS 0
 
 struct xsk_umem_config {
 	__u32 fill_size;
 	__u32 comp_size;
 	__u32 frame_size;
 	__u32 frame_headroom;
+	__u32 flags;
 };
 
 /* Flags for the libbpf_flags field. */
-- 
2.17.1


^ permalink raw reply related

* Re: [bpf-next RFC 2/6] tcp: add skb-less helpers to retrieve SYN cookie
From: Lorenz Bauer @ 2019-07-16 11:34 UTC (permalink / raw)
  To: Petar Penkov
  Cc: Networking, bpf, davem, Alexei Starovoitov, Daniel Borkmann,
	Eric Dumazet, sdf, Petar Penkov
In-Reply-To: <20190716002650.154729-3-ppenkov.kernel@gmail.com>

On Tue, 16 Jul 2019 at 01:27, Petar Penkov <ppenkov.kernel@gmail.com> wrote:
>
> From: Petar Penkov <ppenkov@google.com>
>
> This patch allows generation of a SYN cookie before an SKB has been
> allocated, as is the case at XDP.
>
> Signed-off-by: Petar Penkov <ppenkov@google.com>
> ---
>  include/net/tcp.h    | 11 ++++++
>  net/ipv4/tcp_input.c | 79 ++++++++++++++++++++++++++++++++++++++++++++
>  net/ipv4/tcp_ipv4.c  |  8 +++++
>  net/ipv6/tcp_ipv6.c  |  8 +++++
>  4 files changed, 106 insertions(+)
>
> diff --git a/include/net/tcp.h b/include/net/tcp.h
> index cca3c59b98bf..a128e22c0d5d 100644
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -414,6 +414,17 @@ void tcp_parse_options(const struct net *net, const struct sk_buff *skb,
>                        int estab, struct tcp_fastopen_cookie *foc);
>  const u8 *tcp_parse_md5sig_option(const struct tcphdr *th);
>
> +/*
> + *     BPF SKB-less helpers
> + */
> +u16 tcp_v4_get_syncookie(struct sock *sk, struct iphdr *iph,
> +                        struct tcphdr *tch, u32 *cookie);
> +u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
> +                        struct tcphdr *tch, u32 *cookie);
> +u16 tcp_get_syncookie(struct request_sock_ops *rsk_ops,
> +                     const struct tcp_request_sock_ops *af_ops,
> +                     struct sock *sk, void *iph, struct tcphdr *tch,
> +                     u32 *cookie);
>  /*
>   *     TCP v4 functions exported for the inet6 API
>   */
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index 8892df6de1d4..1406d7e0953c 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -3782,6 +3782,52 @@ static void smc_parse_options(const struct tcphdr *th,
>  #endif
>  }
>
> +/* Try to parse the MSS option from the TCP header. Return 0 on failure, clamped
> + * value on success.
> + *
> + * Invoked for BPF SYN cookie generation, so th should be a SYN.
> + */
> +static u16 tcp_parse_mss_option(const struct net *net, const struct tcphdr *th,
> +                               u16 user_mss)

net seems unused?

> +{
> +       const unsigned char *ptr = (const unsigned char *)(th + 1);
> +       int length = (th->doff * 4) - sizeof(struct tcphdr);
> +       u16 mss = 0;
> +
> +       while (length > 0) {
> +               int opcode = *ptr++;
> +               int opsize;
> +
> +               switch (opcode) {
> +               case TCPOPT_EOL:
> +                       return mss;
> +               case TCPOPT_NOP:        /* Ref: RFC 793 section 3.1 */
> +                       length--;
> +                       continue;
> +               default:
> +                       if (length < 2)
> +                               return mss;
> +                       opsize = *ptr++;
> +                       if (opsize < 2) /* "silly options" */
> +                               return mss;
> +                       if (opsize > length)
> +                               return mss;     /* fail on partial options */
> +                       if (opcode == TCPOPT_MSS && opsize == TCPOLEN_MSS) {
> +                               u16 in_mss = get_unaligned_be16(ptr);
> +
> +                               if (in_mss) {
> +                                       if (user_mss && user_mss < in_mss)
> +                                               in_mss = user_mss;
> +                                       mss = in_mss;
> +                               }
> +                       }
> +                       ptr += opsize - 2;
> +                       length -= opsize;
> +               }
> +       }
> +       return mss;
> +}
> +
>  /* Look for tcp options. Normally only called on SYN and SYNACK packets.
>   * But, this can also be called on packets in the established flow when
>   * the fast version below fails.
> @@ -6464,6 +6510,39 @@ static void tcp_reqsk_record_syn(const struct sock *sk,
>         }
>  }
>
> +u16 tcp_get_syncookie(struct request_sock_ops *rsk_ops,
> +                     const struct tcp_request_sock_ops *af_ops,
> +                     struct sock *sk, void *iph, struct tcphdr *th,
> +                     u32 *cookie)
> +{
> +       u16 mss = 0;
> +#ifdef CONFIG_SYN_COOKIES
> +       bool is_v4 = rsk_ops->family == AF_INET;
> +       struct tcp_sock *tp = tcp_sk(sk);
> +
> +       if (sock_net(sk)->ipv4.sysctl_tcp_syncookies != 2 &&
> +           !inet_csk_reqsk_queue_is_full(sk))
> +               return 0;
> +
> +       if (!tcp_syn_flood_action(sk, rsk_ops->slab_name))
> +               return 0;
> +
> +       if (sk_acceptq_is_full(sk)) {
> +               NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
> +               return 0;
> +       }
> +
> +       mss = tcp_parse_mss_option(sock_net(sk), th, tp->rx_opt.user_mss);
> +       if (!mss)
> +               mss = af_ops->mss_clamp;
> +
> +       tcp_synq_overflow(sk);
> +       *cookie = is_v4 ? __cookie_v4_init_sequence(iph, th, &mss)
> +                       : __cookie_v6_init_sequence(iph, th, &mss);
> +#endif
> +       return mss;
> +}
> +
>  int tcp_conn_request(struct request_sock_ops *rsk_ops,
>                      const struct tcp_request_sock_ops *af_ops,
>                      struct sock *sk, struct sk_buff *skb)
> diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
> index d57641cb3477..0e06e59784bd 100644
> --- a/net/ipv4/tcp_ipv4.c
> +++ b/net/ipv4/tcp_ipv4.c
> @@ -1515,6 +1515,14 @@ static struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb)
>         return sk;
>  }
>
> +u16 tcp_v4_get_syncookie(struct sock *sk, struct iphdr *iph,
> +                        struct tcphdr *tch, u32 *cookie)
> +{
> +       return tcp_get_syncookie(&tcp_request_sock_ops,
> +                                &tcp_request_sock_ipv4_ops, sk, iph, tch,
> +                                cookie);
> +}
> +
>  /* The socket must have it's spinlock held when we get
>   * here, unless it is a TCP_LISTEN socket.
>   *
> diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
> index d56a9019a0fe..ce46cdba54bc 100644
> --- a/net/ipv6/tcp_ipv6.c
> +++ b/net/ipv6/tcp_ipv6.c
> @@ -1058,6 +1058,14 @@ static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
>         return sk;
>  }
>
> +u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
> +                        struct tcphdr *tch, u32 *cookie)
> +{
> +       return tcp_get_syncookie(&tcp6_request_sock_ops,
> +                                &tcp_request_sock_ipv6_ops, sk, iph, tch,
> +                                cookie);
> +}
> +
>  static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
>  {
>         if (skb->protocol == htons(ETH_P_IP))
> --
> 2.22.0.510.g264f2c817a-goog
>


-- 
Lorenz Bauer  |  Systems Engineer
6th Floor, County Hall/The Riverside Building, SE1 7PB, UK

www.cloudflare.com

^ permalink raw reply

* Re: [bpf-next RFC 3/6] bpf: add bpf_tcp_gen_syncookie helper
From: Lorenz Bauer @ 2019-07-16 11:54 UTC (permalink / raw)
  To: Petar Penkov
  Cc: Networking, bpf, davem, Alexei Starovoitov, Daniel Borkmann,
	Eric Dumazet, sdf, Petar Penkov
In-Reply-To: <20190716002650.154729-4-ppenkov.kernel@gmail.com>

On Tue, 16 Jul 2019 at 01:27, Petar Penkov <ppenkov.kernel@gmail.com> wrote:
>
> From: Petar Penkov <ppenkov@google.com>
>
> This helper function allows BPF programs to try to generate SYN
> cookies, given a reference to a listener socket. The function works
> from XDP and with an skb context since bpf_skc_lookup_tcp can lookup a
> socket in both cases.
>
> Signed-off-by: Petar Penkov <ppenkov@google.com>
> Suggested-by: Eric Dumazet <edumazet@google.com>
> ---
>  include/uapi/linux/bpf.h | 30 ++++++++++++++++++-
>  net/core/filter.c        | 62 ++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 91 insertions(+), 1 deletion(-)
>
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 6f68438aa4ed..abf4a85c76d1 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -2713,6 +2713,33 @@ union bpf_attr {
>   *             **-EPERM** if no permission to send the *sig*.
>   *
>   *             **-EAGAIN** if bpf program can try again.
> + *
> + * s64 bpf_tcp_gen_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
> + *     Description
> + *             Try to issue a SYN cookie for the packet with corresponding
> + *             IP/TCP headers, *iph* and *th*, on the listening socket in *sk*.
> + *
> + *             *iph* points to the start of the IPv4 or IPv6 header, while
> + *             *iph_len* contains **sizeof**\ (**struct iphdr**) or
> + *             **sizeof**\ (**struct ip6hdr**).
> + *
> + *             *th* points to the start of the TCP header, while *th_len*
> + *             contains **sizeof**\ (**struct tcphdr**).
> + *
> + *     Return
> + *             On success, lower 32 bits hold the generated SYN cookie in
> + *             network order and the higher 32 bits hold the MSS value for that
> + *             cookie.

I prefer returning the cookie vs. directly creating the packet.
Returning a struct would
be nicest, but it doesn't seem worth it to extend the verifier for
that. Taking a pointer
to a result struct would also be good, but we're out of arguments :/

Nit: the MSS is only 16 bit?

> + *
> + *             On failure, the returned value is one of the following:
> + *
> + *             **-EINVAL** SYN cookie cannot be issued due to error
> + *
> + *             **-ENOENT** SYN cookie should not be issued (no SYN flood)
> + *
> + *             **-ENOTSUPP** kernel configuration does not enable SYN cookies
> + *
> + *             **-EPROTONOSUPPORT** *sk* family is not AF_INET/AF_INET6
>   */
>  #define __BPF_FUNC_MAPPER(FN)          \
>         FN(unspec),                     \
> @@ -2824,7 +2851,8 @@ union bpf_attr {
>         FN(strtoul),                    \
>         FN(sk_storage_get),             \
>         FN(sk_storage_delete),          \
> -       FN(send_signal),
> +       FN(send_signal),                \
> +       FN(tcp_gen_syncookie),
>
>  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
>   * function eBPF program intends to call
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 47f6386fb17a..109fd1e286f4 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -5850,6 +5850,64 @@ static const struct bpf_func_proto bpf_tcp_check_syncookie_proto = {
>         .arg5_type      = ARG_CONST_SIZE,
>  };
>
> +BPF_CALL_5(bpf_tcp_gen_syncookie, struct sock *, sk, void *, iph, u32, iph_len,
> +          struct tcphdr *, th, u32, th_len)
> +{
> +#ifdef CONFIG_SYN_COOKIES
> +       u32 cookie;
> +       u16 mss;
> +
> +       if (unlikely(th_len < sizeof(*th)))
> +               return -EINVAL;
> +
> +       if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN)
> +               return -EINVAL;
> +
> +       if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies)
> +               return -EINVAL;

Should this be ENOENT instead?

> +
> +       if (!th->syn || th->ack || th->fin || th->rst)
> +               return -EINVAL;
> +
> +       switch (sk->sk_family) {
> +       case AF_INET:
> +               if (unlikely(iph_len < sizeof(struct iphdr)))
> +                       return -EINVAL;
> +               mss = tcp_v4_get_syncookie(sk, iph, th, &cookie);
> +               break;
> +
> +#if IS_BUILTIN(CONFIG_IPV6)
> +       case AF_INET6:
> +               if (unlikely(iph_len < sizeof(struct ipv6hdr)))
> +                       return -EINVAL;
> +               mss = tcp_v6_get_syncookie(sk, iph, th, &cookie);
> +               break;
> +#endif /* CONFIG_IPV6 */
> +
> +       default:
> +               return -EPROTONOSUPPORT;
> +       }
> +       if (mss <= 0)
> +               return -ENOENT;
> +
> +       return htonl(cookie) | ((u64)mss << 32);
> +#else
> +       return -ENOTSUPP;
> +#endif /* CONFIG_SYN_COOKIES */
> +}
> +
> +static const struct bpf_func_proto bpf_tcp_gen_syncookie_proto = {
> +       .func           = bpf_tcp_gen_syncookie,
> +       .gpl_only       = true, /* __cookie_v*_init_sequence() is GPL */
> +       .pkt_access     = true,
> +       .ret_type       = RET_INTEGER,
> +       .arg1_type      = ARG_PTR_TO_SOCK_COMMON,
> +       .arg2_type      = ARG_PTR_TO_MEM,
> +       .arg3_type      = ARG_CONST_SIZE,
> +       .arg4_type      = ARG_PTR_TO_MEM,
> +       .arg5_type      = ARG_CONST_SIZE,
> +};
> +
>  #endif /* CONFIG_INET */
>
>  bool bpf_helper_changes_pkt_data(void *func)
> @@ -6135,6 +6193,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
>                 return &bpf_tcp_check_syncookie_proto;
>         case BPF_FUNC_skb_ecn_set_ce:
>                 return &bpf_skb_ecn_set_ce_proto;
> +       case BPF_FUNC_tcp_gen_syncookie:
> +               return &bpf_tcp_gen_syncookie_proto;
>  #endif
>         default:
>                 return bpf_base_func_proto(func_id);
> @@ -6174,6 +6234,8 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
>                 return &bpf_xdp_skc_lookup_tcp_proto;
>         case BPF_FUNC_tcp_check_syncookie:
>                 return &bpf_tcp_check_syncookie_proto;
> +       case BPF_FUNC_tcp_gen_syncookie:
> +               return &bpf_tcp_gen_syncookie_proto;
>  #endif
>         default:
>                 return bpf_base_func_proto(func_id);
> --
> 2.22.0.510.g264f2c817a-goog
>


-- 
Lorenz Bauer  |  Systems Engineer
6th Floor, County Hall/The Riverside Building, SE1 7PB, UK

www.cloudflare.com

^ permalink raw reply

* Re: [bpf-next RFC 3/6] bpf: add bpf_tcp_gen_syncookie helper
From: Lorenz Bauer @ 2019-07-16 11:56 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Petar Penkov, Networking, bpf, davem, Alexei Starovoitov,
	Daniel Borkmann, Eric Dumazet, sdf, Petar Penkov
In-Reply-To: <b6ef24b0-0415-c67d-5a66-21f1c2530414@gmail.com>

On Tue, 16 Jul 2019 at 08:59, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> > +             return -EINVAL;
> > +
> > +     if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN)
> > +             return -EINVAL;
> > +
> > +     if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies)
> > +             return -EINVAL;
> > +
> > +     if (!th->syn || th->ack || th->fin || th->rst)
> > +             return -EINVAL;
> > +
> > +     switch (sk->sk_family) {
>
> This is strange, because a dual stack listener will have sk->sk_family set to AF_INET6.
>
> What really matters here is if the packet is IPv4 or IPv6.
>
> So you need to look at iph->version instead.
>
> Then look if the socket family allows this packet to be processed
> (For example AF_INET6 sockets might prevent IPv4 packets, see sk->sk_ipv6only )

Does this apply for (the existing) tcp_check_syn_cookie as well?

-- 
Lorenz Bauer  |  Systems Engineer
6th Floor, County Hall/The Riverside Building, SE1 7PB, UK

www.cloudflare.com

^ permalink raw reply

* [PATCH bpf] bpf: fix narrower loads on s390
From: Ilya Leoshkevich @ 2019-07-16 11:59 UTC (permalink / raw)
  To: bpf, netdev; +Cc: gor, heiko.carstens, Ilya Leoshkevich

test_pkt_md_access is failing on s390, since the associated eBPF prog
returns TC_ACT_SHOT, which in turn happens because loading a part of a
struct __sk_buff field produces an incorrect result.

The problem is that when verifier emits the code to replace partial load
of a field with a full load, a shift and a bitwise AND, it assumes that
the machine is little endian.

Adjust shift count calculation to account for endianness.

Fixes: 31fd85816dbe ("bpf: permits narrower load from bpf program context fields")
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
---
 kernel/bpf/verifier.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 5900cbb966b1..3f9353653558 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -8616,8 +8616,12 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
 		}
 
 		if (is_narrower_load && size < target_size) {
-			u8 shift = (off & (size_default - 1)) * 8;
-
+			u8 load_off = off & (size_default - 1);
+#ifdef __LITTLE_ENDIAN
+			u8 shift = load_off * 8;
+#else
+			u8 shift = (size_default - (load_off + size)) * 8;
+#endif
 			if (ctx_field_size <= 4) {
 				if (shift)
 					insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH,
-- 
2.21.0


^ permalink raw reply related

* [PATCH 00/14] pending doc patches for 5.3-rc
From: Mauro Carvalho Chehab @ 2019-07-16 12:10 UTC (permalink / raw)
  Cc: Mauro Carvalho Chehab, linux-scsi, esc.storagedev, linuxppc-dev,
	Jonathan Corbet, alsa-devel, kvm, linux-i2c, rcu, linux-pm,
	linux-doc, devicetree, linux-arch, linux-arm-kernel,
	linux-watchdog, x86, dri-devel, netdev, linux-crypto, linux-sh,
	linux-input, linux-pci

Those are the pending documentation patches after my pull request
for this branch:

    git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-media.git tags/docs/v5.3-1

Patches 1 to 13 were already submitted, but got rebased. Patch 14
is a new fixup one.

Patches 1 and 2 weren't submitted before due to merge conflicts
that are now solved upstream;

Patch 3 fixes a series of random Documentation/* references that
are pointing to the wrong places.

Patch 4 fix a longstanding issue: every time a new book is added,
conf.py need changes, in order to allow generating a PDF file.
After the patch, conf.py will automatically recognize new books,
saving the trouble of keeping adding documents to it.

Patches 5 to 11 are due to fonts support when building translations.pdf.
The main focus is to add xeCJK support. While doing it, I discovered
some bugs at sphinx-pre-install script after running it with 7 different
distributions.

Patch 12 improves support for partial doc building. Currently, each
subdir needs to have its own conf.py, in order to support partial
doc build. After it, any Documentation subdir can be used to 
roduce html/pdf docs with:

	make SPHINXDIRS="foo bar" htmldocs
	(or pdfdocs, latexdocs, epubdocs, ...)

Patch 13 is a cleanup patch: it simply get rid of all those extra
conf.py files that  aren't needed anymore. The only extra config
file after it is this one:

	Documentation/media/conf_nitpick.py

With enables some extra optional Sphinx features.

Patch 14 adds Documentation/virtual to the main index.rst file
and add a new *.rst file that was orphaned there.

-

After this series, there's just one more patch meant to be applied
for 5.3, with is still waiting for some patches to be merged from
linux-next:

    https://git.linuxtv.org/mchehab/experimental.git/commit/?id=b1b5dc7d7bbfbbfdace2a248c6458301c6e34100


Mauro Carvalho Chehab (14):
  docs: powerpc: convert docs to ReST and rename to *.rst
  docs: power: add it to to the main documentation index
  docs: fix broken doc references due to renames
  docs: pdf: add all Documentation/*/index.rst to PDF output
  docs: conf.py: add CJK package needed by translations
  docs: conf.py: only use CJK if the font is available
  scripts/sphinx-pre-install: fix script for RHEL/CentOS
  scripts/sphinx-pre-install: don't use LaTeX with CentOS 7
  scripts/sphinx-pre-install: fix latexmk dependencies
  scripts/sphinx-pre-install: cleanup Gentoo checks
  scripts/sphinx-pre-install: seek for Noto CJK fonts for pdf output
  docs: load_config.py: avoid needing a conf.py just due to LaTeX docs
  docs: remove extra conf.py files
  docs: virtual: add it to the documentation body

 Documentation/PCI/pci-error-recovery.rst      |   5 +-
 Documentation/RCU/rculist_nulls.txt           |   2 +-
 Documentation/admin-guide/conf.py             |  10 --
 Documentation/conf.py                         |  30 +++-
 Documentation/core-api/conf.py                |  10 --
 Documentation/crypto/conf.py                  |  10 --
 Documentation/dev-tools/conf.py               |  10 --
 .../devicetree/bindings/arm/idle-states.txt   |   2 +-
 Documentation/doc-guide/conf.py               |  10 --
 Documentation/driver-api/80211/conf.py        |  10 --
 Documentation/driver-api/conf.py              |  10 --
 Documentation/driver-api/pm/conf.py           |  10 --
 Documentation/filesystems/conf.py             |  10 --
 Documentation/gpu/conf.py                     |  10 --
 Documentation/index.rst                       |   3 +
 Documentation/input/conf.py                   |  10 --
 Documentation/kernel-hacking/conf.py          |  10 --
 Documentation/locking/spinlocks.rst           |   4 +-
 Documentation/maintainer/conf.py              |  10 --
 Documentation/media/conf.py                   |  12 --
 Documentation/memory-barriers.txt             |   2 +-
 Documentation/networking/conf.py              |  10 --
 Documentation/power/index.rst                 |   2 +-
 .../{bootwrapper.txt => bootwrapper.rst}      |  28 +++-
 .../{cpu_families.txt => cpu_families.rst}    |  23 +--
 .../{cpu_features.txt => cpu_features.rst}    |   6 +-
 Documentation/powerpc/{cxl.txt => cxl.rst}    |  46 ++++--
 .../powerpc/{cxlflash.txt => cxlflash.rst}    |  10 +-
 .../{DAWR-POWER9.txt => dawr-power9.rst}      |  15 +-
 Documentation/powerpc/{dscr.txt => dscr.rst}  |  18 +-
 ...ecovery.txt => eeh-pci-error-recovery.rst} | 108 ++++++------
 ...ed-dump.txt => firmware-assisted-dump.rst} | 117 +++++++------
 Documentation/powerpc/{hvcs.txt => hvcs.rst}  | 108 ++++++------
 Documentation/powerpc/index.rst               |  34 ++++
 Documentation/powerpc/isa-versions.rst        |  15 +-
 .../powerpc/{mpc52xx.txt => mpc52xx.rst}      |  12 +-
 ...nv.txt => pci_iov_resource_on_powernv.rst} |  15 +-
 .../powerpc/{pmu-ebb.txt => pmu-ebb.rst}      |   1 +
 Documentation/powerpc/ptrace.rst              | 156 ++++++++++++++++++
 Documentation/powerpc/ptrace.txt              | 151 -----------------
 .../{qe_firmware.txt => qe_firmware.rst}      |  37 +++--
 .../{syscall64-abi.txt => syscall64-abi.rst}  |  29 ++--
 ...al_memory.txt => transactional_memory.rst} |  45 ++---
 Documentation/process/conf.py                 |  10 --
 Documentation/sh/conf.py                      |  10 --
 Documentation/sound/conf.py                   |  10 --
 Documentation/sphinx/load_config.py           |  27 ++-
 .../translations/ko_KR/memory-barriers.txt    |   2 +-
 Documentation/userspace-api/conf.py           |  10 --
 Documentation/virtual/kvm/index.rst           |   1 +
 Documentation/vm/conf.py                      |  10 --
 Documentation/watchdog/hpwdt.rst              |   2 +-
 Documentation/x86/conf.py                     |  10 --
 MAINTAINERS                                   |  14 +-
 arch/powerpc/kernel/exceptions-64s.S          |   2 +-
 drivers/gpu/drm/drm_modes.c                   |   2 +-
 drivers/i2c/busses/i2c-nvidia-gpu.c           |   2 +-
 drivers/scsi/hpsa.c                           |   4 +-
 drivers/soc/fsl/qe/qe.c                       |   2 +-
 drivers/tty/hvc/hvcs.c                        |   2 +-
 include/soc/fsl/qe/qe.h                       |   2 +-
 scripts/sphinx-pre-install                    | 118 ++++++++++---
 62 files changed, 738 insertions(+), 678 deletions(-)
 delete mode 100644 Documentation/admin-guide/conf.py
 delete mode 100644 Documentation/core-api/conf.py
 delete mode 100644 Documentation/crypto/conf.py
 delete mode 100644 Documentation/dev-tools/conf.py
 delete mode 100644 Documentation/doc-guide/conf.py
 delete mode 100644 Documentation/driver-api/80211/conf.py
 delete mode 100644 Documentation/driver-api/conf.py
 delete mode 100644 Documentation/driver-api/pm/conf.py
 delete mode 100644 Documentation/filesystems/conf.py
 delete mode 100644 Documentation/gpu/conf.py
 delete mode 100644 Documentation/input/conf.py
 delete mode 100644 Documentation/kernel-hacking/conf.py
 delete mode 100644 Documentation/maintainer/conf.py
 delete mode 100644 Documentation/media/conf.py
 delete mode 100644 Documentation/networking/conf.py
 rename Documentation/powerpc/{bootwrapper.txt => bootwrapper.rst} (93%)
 rename Documentation/powerpc/{cpu_families.txt => cpu_families.rst} (95%)
 rename Documentation/powerpc/{cpu_features.txt => cpu_features.rst} (97%)
 rename Documentation/powerpc/{cxl.txt => cxl.rst} (95%)
 rename Documentation/powerpc/{cxlflash.txt => cxlflash.rst} (98%)
 rename Documentation/powerpc/{DAWR-POWER9.txt => dawr-power9.rst} (95%)
 rename Documentation/powerpc/{dscr.txt => dscr.rst} (91%)
 rename Documentation/powerpc/{eeh-pci-error-recovery.txt => eeh-pci-error-recovery.rst} (82%)
 rename Documentation/powerpc/{firmware-assisted-dump.txt => firmware-assisted-dump.rst} (80%)
 rename Documentation/powerpc/{hvcs.txt => hvcs.rst} (91%)
 create mode 100644 Documentation/powerpc/index.rst
 rename Documentation/powerpc/{mpc52xx.txt => mpc52xx.rst} (91%)
 rename Documentation/powerpc/{pci_iov_resource_on_powernv.txt => pci_iov_resource_on_powernv.rst} (97%)
 rename Documentation/powerpc/{pmu-ebb.txt => pmu-ebb.rst} (99%)
 create mode 100644 Documentation/powerpc/ptrace.rst
 delete mode 100644 Documentation/powerpc/ptrace.txt
 rename Documentation/powerpc/{qe_firmware.txt => qe_firmware.rst} (95%)
 rename Documentation/powerpc/{syscall64-abi.txt => syscall64-abi.rst} (82%)
 rename Documentation/powerpc/{transactional_memory.txt => transactional_memory.rst} (93%)
 delete mode 100644 Documentation/process/conf.py
 delete mode 100644 Documentation/sh/conf.py
 delete mode 100644 Documentation/sound/conf.py
 delete mode 100644 Documentation/userspace-api/conf.py
 delete mode 100644 Documentation/vm/conf.py
 delete mode 100644 Documentation/x86/conf.py

-- 
2.21.0



^ permalink raw reply

* [PATCH 13/14] docs: remove extra conf.py files
From: Mauro Carvalho Chehab @ 2019-07-16 12:10 UTC (permalink / raw)
  Cc: Mauro Carvalho Chehab, Jonathan Corbet, Herbert Xu,
	David S. Miller, Maarten Lankhorst, Maxime Ripard, Sean Paul,
	David Airlie, Daniel Vetter, Dmitry Torokhov, Yoshinori Sato,
	Rich Felker, Jaroslav Kysela, Takashi Iwai, Thomas Gleixner,
	Ingo Molnar, Borislav Petkov, H. Peter Anvin, x86, linux-doc,
	linux-crypto, dri-devel, linux-input, netdev, linux-sh,
	alsa-devel
In-Reply-To: <cover.1563277838.git.mchehab+samsung@kernel.org>

Now that the latex_documents are handled automatically, we can
remove those extra conf.py files.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/admin-guide/conf.py      | 10 ----------
 Documentation/core-api/conf.py         | 10 ----------
 Documentation/crypto/conf.py           | 10 ----------
 Documentation/dev-tools/conf.py        | 10 ----------
 Documentation/doc-guide/conf.py        | 10 ----------
 Documentation/driver-api/80211/conf.py | 10 ----------
 Documentation/driver-api/conf.py       | 10 ----------
 Documentation/driver-api/pm/conf.py    | 10 ----------
 Documentation/filesystems/conf.py      | 10 ----------
 Documentation/gpu/conf.py              | 10 ----------
 Documentation/input/conf.py            | 10 ----------
 Documentation/kernel-hacking/conf.py   | 10 ----------
 Documentation/maintainer/conf.py       | 10 ----------
 Documentation/media/conf.py            | 12 ------------
 Documentation/networking/conf.py       | 10 ----------
 Documentation/process/conf.py          | 10 ----------
 Documentation/sh/conf.py               | 10 ----------
 Documentation/sound/conf.py            | 10 ----------
 Documentation/userspace-api/conf.py    | 10 ----------
 Documentation/vm/conf.py               | 10 ----------
 Documentation/x86/conf.py              | 10 ----------
 21 files changed, 212 deletions(-)
 delete mode 100644 Documentation/admin-guide/conf.py
 delete mode 100644 Documentation/core-api/conf.py
 delete mode 100644 Documentation/crypto/conf.py
 delete mode 100644 Documentation/dev-tools/conf.py
 delete mode 100644 Documentation/doc-guide/conf.py
 delete mode 100644 Documentation/driver-api/80211/conf.py
 delete mode 100644 Documentation/driver-api/conf.py
 delete mode 100644 Documentation/driver-api/pm/conf.py
 delete mode 100644 Documentation/filesystems/conf.py
 delete mode 100644 Documentation/gpu/conf.py
 delete mode 100644 Documentation/input/conf.py
 delete mode 100644 Documentation/kernel-hacking/conf.py
 delete mode 100644 Documentation/maintainer/conf.py
 delete mode 100644 Documentation/media/conf.py
 delete mode 100644 Documentation/networking/conf.py
 delete mode 100644 Documentation/process/conf.py
 delete mode 100644 Documentation/sh/conf.py
 delete mode 100644 Documentation/sound/conf.py
 delete mode 100644 Documentation/userspace-api/conf.py
 delete mode 100644 Documentation/vm/conf.py
 delete mode 100644 Documentation/x86/conf.py

diff --git a/Documentation/admin-guide/conf.py b/Documentation/admin-guide/conf.py
deleted file mode 100644
index 86f738953799..000000000000
--- a/Documentation/admin-guide/conf.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# -*- coding: utf-8; mode: python -*-
-
-project = 'Linux Kernel User Documentation'
-
-tags.add("subproject")
-
-latex_documents = [
-    ('index', 'linux-user.tex', 'Linux Kernel User Documentation',
-     'The kernel development community', 'manual'),
-]
diff --git a/Documentation/core-api/conf.py b/Documentation/core-api/conf.py
deleted file mode 100644
index db1f7659f3da..000000000000
--- a/Documentation/core-api/conf.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# -*- coding: utf-8; mode: python -*-
-
-project = "Core-API Documentation"
-
-tags.add("subproject")
-
-latex_documents = [
-    ('index', 'core-api.tex', project,
-     'The kernel development community', 'manual'),
-]
diff --git a/Documentation/crypto/conf.py b/Documentation/crypto/conf.py
deleted file mode 100644
index 4335d251ddf3..000000000000
--- a/Documentation/crypto/conf.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# -*- coding: utf-8; mode: python -*-
-
-project = 'Linux Kernel Crypto API'
-
-tags.add("subproject")
-
-latex_documents = [
-    ('index', 'crypto-api.tex', 'Linux Kernel Crypto API manual',
-     'The kernel development community', 'manual'),
-]
diff --git a/Documentation/dev-tools/conf.py b/Documentation/dev-tools/conf.py
deleted file mode 100644
index 7faafa3f7888..000000000000
--- a/Documentation/dev-tools/conf.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# -*- coding: utf-8; mode: python -*-
-
-project = "Development tools for the kernel"
-
-tags.add("subproject")
-
-latex_documents = [
-    ('index', 'dev-tools.tex', project,
-     'The kernel development community', 'manual'),
-]
diff --git a/Documentation/doc-guide/conf.py b/Documentation/doc-guide/conf.py
deleted file mode 100644
index fd3731182d5a..000000000000
--- a/Documentation/doc-guide/conf.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# -*- coding: utf-8; mode: python -*-
-
-project = 'Linux Kernel Documentation Guide'
-
-tags.add("subproject")
-
-latex_documents = [
-    ('index', 'kernel-doc-guide.tex', 'Linux Kernel Documentation Guide',
-     'The kernel development community', 'manual'),
-]
diff --git a/Documentation/driver-api/80211/conf.py b/Documentation/driver-api/80211/conf.py
deleted file mode 100644
index 4424b4b0b9c3..000000000000
--- a/Documentation/driver-api/80211/conf.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# -*- coding: utf-8; mode: python -*-
-
-project = "Linux 802.11 Driver Developer's Guide"
-
-tags.add("subproject")
-
-latex_documents = [
-    ('index', '80211.tex', project,
-     'The kernel development community', 'manual'),
-]
diff --git a/Documentation/driver-api/conf.py b/Documentation/driver-api/conf.py
deleted file mode 100644
index 202726d20088..000000000000
--- a/Documentation/driver-api/conf.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# -*- coding: utf-8; mode: python -*-
-
-project = "The Linux driver implementer's API guide"
-
-tags.add("subproject")
-
-latex_documents = [
-    ('index', 'driver-api.tex', project,
-     'The kernel development community', 'manual'),
-]
diff --git a/Documentation/driver-api/pm/conf.py b/Documentation/driver-api/pm/conf.py
deleted file mode 100644
index a89fac11272f..000000000000
--- a/Documentation/driver-api/pm/conf.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# -*- coding: utf-8; mode: python -*-
-
-project = "Device Power Management"
-
-tags.add("subproject")
-
-latex_documents = [
-    ('index', 'pm.tex', project,
-     'The kernel development community', 'manual'),
-]
diff --git a/Documentation/filesystems/conf.py b/Documentation/filesystems/conf.py
deleted file mode 100644
index ea44172af5c4..000000000000
--- a/Documentation/filesystems/conf.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# -*- coding: utf-8; mode: python -*-
-
-project = "Linux Filesystems API"
-
-tags.add("subproject")
-
-latex_documents = [
-    ('index', 'filesystems.tex', project,
-     'The kernel development community', 'manual'),
-]
diff --git a/Documentation/gpu/conf.py b/Documentation/gpu/conf.py
deleted file mode 100644
index 1757b040fb32..000000000000
--- a/Documentation/gpu/conf.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# -*- coding: utf-8; mode: python -*-
-
-project = "Linux GPU Driver Developer's Guide"
-
-tags.add("subproject")
-
-latex_documents = [
-    ('index', 'gpu.tex', project,
-     'The kernel development community', 'manual'),
-]
diff --git a/Documentation/input/conf.py b/Documentation/input/conf.py
deleted file mode 100644
index d2352fdc92ed..000000000000
--- a/Documentation/input/conf.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# -*- coding: utf-8; mode: python -*-
-
-project = "The Linux input driver subsystem"
-
-tags.add("subproject")
-
-latex_documents = [
-    ('index', 'linux-input.tex', project,
-     'The kernel development community', 'manual'),
-]
diff --git a/Documentation/kernel-hacking/conf.py b/Documentation/kernel-hacking/conf.py
deleted file mode 100644
index 3d8acf0f33ad..000000000000
--- a/Documentation/kernel-hacking/conf.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# -*- coding: utf-8; mode: python -*-
-
-project = "Kernel Hacking Guides"
-
-tags.add("subproject")
-
-latex_documents = [
-    ('index', 'kernel-hacking.tex', project,
-     'The kernel development community', 'manual'),
-]
diff --git a/Documentation/maintainer/conf.py b/Documentation/maintainer/conf.py
deleted file mode 100644
index 81e9eb7a7884..000000000000
--- a/Documentation/maintainer/conf.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# -*- coding: utf-8; mode: python -*-
-
-project = 'Linux Kernel Development Documentation'
-
-tags.add("subproject")
-
-latex_documents = [
-    ('index', 'maintainer.tex', 'Linux Kernel Development Documentation',
-     'The kernel development community', 'manual'),
-]
diff --git a/Documentation/media/conf.py b/Documentation/media/conf.py
deleted file mode 100644
index 1f194fcd2cae..000000000000
--- a/Documentation/media/conf.py
+++ /dev/null
@@ -1,12 +0,0 @@
-# -*- coding: utf-8; mode: python -*-
-
-# SPDX-License-Identifier: GPL-2.0
-
-project = 'Linux Media Subsystem Documentation'
-
-tags.add("subproject")
-
-latex_documents = [
-    ('index', 'media.tex', 'Linux Media Subsystem Documentation',
-     'The kernel development community', 'manual'),
-]
diff --git a/Documentation/networking/conf.py b/Documentation/networking/conf.py
deleted file mode 100644
index 40f69e67a883..000000000000
--- a/Documentation/networking/conf.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# -*- coding: utf-8; mode: python -*-
-
-project = "Linux Networking Documentation"
-
-tags.add("subproject")
-
-latex_documents = [
-    ('index', 'networking.tex', project,
-     'The kernel development community', 'manual'),
-]
diff --git a/Documentation/process/conf.py b/Documentation/process/conf.py
deleted file mode 100644
index 1b01a80ad9ce..000000000000
--- a/Documentation/process/conf.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# -*- coding: utf-8; mode: python -*-
-
-project = 'Linux Kernel Development Documentation'
-
-tags.add("subproject")
-
-latex_documents = [
-    ('index', 'process.tex', 'Linux Kernel Development Documentation',
-     'The kernel development community', 'manual'),
-]
diff --git a/Documentation/sh/conf.py b/Documentation/sh/conf.py
deleted file mode 100644
index 1eb684a13ac8..000000000000
--- a/Documentation/sh/conf.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# -*- coding: utf-8; mode: python -*-
-
-project = "SuperH architecture implementation manual"
-
-tags.add("subproject")
-
-latex_documents = [
-    ('index', 'sh.tex', project,
-     'The kernel development community', 'manual'),
-]
diff --git a/Documentation/sound/conf.py b/Documentation/sound/conf.py
deleted file mode 100644
index 3f1fc5e74e7b..000000000000
--- a/Documentation/sound/conf.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# -*- coding: utf-8; mode: python -*-
-
-project = "Linux Sound Subsystem Documentation"
-
-tags.add("subproject")
-
-latex_documents = [
-    ('index', 'sound.tex', project,
-     'The kernel development community', 'manual'),
-]
diff --git a/Documentation/userspace-api/conf.py b/Documentation/userspace-api/conf.py
deleted file mode 100644
index 2eaf59f844e5..000000000000
--- a/Documentation/userspace-api/conf.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# -*- coding: utf-8; mode: python -*-
-
-project = "The Linux kernel user-space API guide"
-
-tags.add("subproject")
-
-latex_documents = [
-    ('index', 'userspace-api.tex', project,
-     'The kernel development community', 'manual'),
-]
diff --git a/Documentation/vm/conf.py b/Documentation/vm/conf.py
deleted file mode 100644
index 3b0b601af558..000000000000
--- a/Documentation/vm/conf.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# -*- coding: utf-8; mode: python -*-
-
-project = "Linux Memory Management Documentation"
-
-tags.add("subproject")
-
-latex_documents = [
-    ('index', 'memory-management.tex', project,
-     'The kernel development community', 'manual'),
-]
diff --git a/Documentation/x86/conf.py b/Documentation/x86/conf.py
deleted file mode 100644
index 33c5c3142e20..000000000000
--- a/Documentation/x86/conf.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# -*- coding: utf-8; mode: python -*-
-
-project = "X86 architecture specific documentation"
-
-tags.add("subproject")
-
-latex_documents = [
-    ('index', 'x86.tex', project,
-     'The kernel development community', 'manual'),
-]
-- 
2.21.0


^ permalink raw reply related

* Re: [PATCH iproute2-rc 8/8] rdma: Document counter statistic
From: Leon Romanovsky @ 2019-07-16 12:19 UTC (permalink / raw)
  To: Gal Pressman
  Cc: Stephen Hemminger, netdev, David Ahern, Mark Zhang,
	RDMA mailing list
In-Reply-To: <92db561d-e89c-0e09-ef2e-9eb9535d504f@amazon.com>

On Tue, Jul 16, 2019 at 11:19:26AM +0300, Gal Pressman wrote:
> On 10/07/2019 10:24, Leon Romanovsky wrote:
> > +.SH "EXAMPLES"
> > +.PP
> > +rdma statistic show
> > +.RS 4
> > +Shows the state of the default counter of all RDMA devices on the system.
> > +.RE
> > +.PP
> > +rdma statistic show link mlx5_2/1
> > +.RS 4
> > +Shows the state of the default counter of specified RDMA port
> > +.RE
> > +.PP
> > +rdma statistic qp show
> > +.RS 4
> > +Shows the state of all qp counters of all RDMA devices on the system.
> > +.RE
> > +.PP
> > +rdma statistic qp show link mlx5_2/1
> > +.RS 4
> > +Shows the state of all qp counters of specified RDMA port.
> > +.RE
> > +.PP
> > +rdma statistic qp show link mlx5_2 pid 30489
> > +.RS 4
> > +Shows the state of all qp counters of specified RDMA port and belonging to pid 30489
> > +.RE
> > +.PP
> > +rdma statistic qp mode
> > +.RS 4
> > +List current counter mode on all deivces
>
> "deivces" -> "devices".

Thanks,

Stephen,

Can you please fix this typo while you are applying the series?

Thanks

^ permalink raw reply

* Re: [PATCH net-next] net: openvswitch: do not update max_headroom if new headroom is equal to old headroom
From: Taehee Yoo @ 2019-07-16 12:28 UTC (permalink / raw)
  To: Pravin Shelar; +Cc: David S. Miller, Linux Kernel Network Developers, ovs dev
In-Reply-To: <CAOrHB_DXSo37ZttVcW3fEuvB9_-2VtzZgf0JNq1ZhSfJJHSa7Q@mail.gmail.com>

On Tue, 16 Jul 2019 at 03:15, Pravin Shelar <pshelar@ovn.org> wrote:
>

Hi Pravin,

> On Fri, Jul 5, 2019 at 9:08 AM Taehee Yoo <ap420073@gmail.com> wrote:
> >
> > When a vport is deleted, the maximum headroom size would be changed.
> > If the vport which has the largest headroom is deleted,
> > the new max_headroom would be set.
> > But, if the new headroom size is equal to the old headroom size,
> > updating routine is unnecessary.
> >
> > Signed-off-by: Taehee Yoo <ap420073@gmail.com>
>
> Sorry for late reply. This patch looks good to me too.
>
> I am curious about reason to avoid adjustment to headroom. why are you
> trying to avoid unnecessary changes to headroom.
>

Thank you for the review!
The intention of this patch is to remove unnecessary overhead.
There is no other reason.

Thanks!

> Thanks,
> Pravin.

^ permalink raw reply

* [PATCH bpf] selftests/bpf: fix perf_buffer on s390
From: Ilya Leoshkevich @ 2019-07-16 12:58 UTC (permalink / raw)
  To: bpf, netdev; +Cc: gor, heiko.carstens, Ilya Leoshkevich

perf_buffer test fails for exactly the same reason test_attach_probe
used to fail: different nanosleep syscall kprobe name.

Reuse the test_attach_probe fix.

Fixes: ee5cf82ce04a ("selftests/bpf: test perf buffer API")
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
---
 .../testing/selftests/bpf/prog_tests/attach_probe.c  | 12 ++----------
 tools/testing/selftests/bpf/prog_tests/perf_buffer.c |  8 +-------
 tools/testing/selftests/bpf/test_progs.h             |  8 ++++++++
 3 files changed, 11 insertions(+), 17 deletions(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
index 47af4afc5013..5ecc267d98b0 100644
--- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c
+++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
@@ -21,14 +21,6 @@ ssize_t get_base_addr() {
 	return -EINVAL;
 }
 
-#ifdef __x86_64__
-#define SYS_KPROBE_NAME "__x64_sys_nanosleep"
-#elif defined(__s390x__)
-#define SYS_KPROBE_NAME "__s390x_sys_nanosleep"
-#else
-#define SYS_KPROBE_NAME "sys_nanosleep"
-#endif
-
 void test_attach_probe(void)
 {
 	const char *kprobe_name = "kprobe/sys_nanosleep";
@@ -86,7 +78,7 @@ void test_attach_probe(void)
 
 	kprobe_link = bpf_program__attach_kprobe(kprobe_prog,
 						 false /* retprobe */,
-						 SYS_KPROBE_NAME);
+						 SYS_NANOSLEEP_KPROBE_NAME);
 	if (CHECK(IS_ERR(kprobe_link), "attach_kprobe",
 		  "err %ld\n", PTR_ERR(kprobe_link))) {
 		kprobe_link = NULL;
@@ -94,7 +86,7 @@ void test_attach_probe(void)
 	}
 	kretprobe_link = bpf_program__attach_kprobe(kretprobe_prog,
 						    true /* retprobe */,
-						    SYS_KPROBE_NAME);
+						    SYS_NANOSLEEP_KPROBE_NAME);
 	if (CHECK(IS_ERR(kretprobe_link), "attach_kretprobe",
 		  "err %ld\n", PTR_ERR(kretprobe_link))) {
 		kretprobe_link = NULL;
diff --git a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
index 3f1ef95865ff..3003fddc0613 100644
--- a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
+++ b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
@@ -5,12 +5,6 @@
 #include <sys/socket.h>
 #include <test_progs.h>
 
-#ifdef __x86_64__
-#define SYS_KPROBE_NAME "__x64_sys_nanosleep"
-#else
-#define SYS_KPROBE_NAME "sys_nanosleep"
-#endif
-
 static void on_sample(void *ctx, int cpu, void *data, __u32 size)
 {
 	int cpu_data = *(int *)data, duration = 0;
@@ -56,7 +50,7 @@ void test_perf_buffer(void)
 
 	/* attach kprobe */
 	link = bpf_program__attach_kprobe(prog, false /* retprobe */,
-					  SYS_KPROBE_NAME);
+					  SYS_NANOSLEEP_KPROBE_NAME);
 	if (CHECK(IS_ERR(link), "attach_kprobe", "err %ld\n", PTR_ERR(link)))
 		goto out_close;
 
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index f095e1d4c657..49e0f7d85643 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -92,3 +92,11 @@ int compare_map_keys(int map1_fd, int map2_fd);
 int compare_stack_ips(int smap_fd, int amap_fd, int stack_trace_len);
 int extract_build_id(char *build_id, size_t size);
 void *spin_lock_thread(void *arg);
+
+#ifdef __x86_64__
+#define SYS_NANOSLEEP_KPROBE_NAME "__x64_sys_nanosleep"
+#elif defined(__s390x__)
+#define SYS_NANOSLEEP_KPROBE_NAME "__s390x_sys_nanosleep"
+#else
+#define SYS_NANOSLEEP_KPROBE_NAME "sys_nanosleep"
+#endif
-- 
2.21.0


^ permalink raw reply related

* Re: [EXT] [PATCH v1] net: fec: optionally reset PHY via a reset-controller
From: Sven Van Asbroeck @ 2019-07-16 13:19 UTC (permalink / raw)
  To: Andy Duan
  Cc: David S . Miller, netdev@vger.kernel.org,
	linux-kernel@vger.kernel.org
In-Reply-To: <VI1PR0402MB36009E99D7361583702B84DDFFCE0@VI1PR0402MB3600.eurprd04.prod.outlook.com>

Hi Andy,

On Mon, Jul 15, 2019 at 10:02 PM Andy Duan <fugang.duan@nxp.com> wrote:
>
> the phylib already can handle mii bus reset and phy device reset

That's a great suggestion, thank you !! I completely overlooked that code.
What will happen to the legacy phy reset code in fec? Are there many users left?

^ permalink raw reply

* [PATCH] libertas_tf: Use correct channel range in lbtf_geo_init
From: YueHaibing @ 2019-07-16 13:25 UTC (permalink / raw)
  To: kvalo, davem, lkundrak, derosier
  Cc: linux-kernel, netdev, linux-wireless, YueHaibing

It seems we should use 'range' instead of 'priv->range'
in lbtf_geo_init(), because 'range' is the corret one
related to current regioncode.

Reported-by: Hulk Robot <hulkci@huawei.com>
Fixes: 691cdb49388b ("libertas_tf: command helper functions for libertas_tf")
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
---
 drivers/net/wireless/marvell/libertas_tf/cmd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/marvell/libertas_tf/cmd.c b/drivers/net/wireless/marvell/libertas_tf/cmd.c
index 1eacca0..a333172 100644
--- a/drivers/net/wireless/marvell/libertas_tf/cmd.c
+++ b/drivers/net/wireless/marvell/libertas_tf/cmd.c
@@ -65,7 +65,7 @@ static void lbtf_geo_init(struct lbtf_private *priv)
 			break;
 		}
 
-	for (ch = priv->range.start; ch < priv->range.end; ch++)
+	for (ch = range.start; ch < range.end; ch++)
 		priv->channels[CHAN_TO_IDX(ch)].flags = 0;
 }
 
-- 
2.7.4



^ permalink raw reply related

* Re: [PATCH] libertas_tf: Use correct channel range in lbtf_geo_init
From: Yuehaibing @ 2019-07-16 13:38 UTC (permalink / raw)
  To: kvalo, davem, lkundrak, derosier; +Cc: linux-kernel, netdev, linux-wireless
In-Reply-To: <20190716132534.11256-1-yuehaibing@huawei.com>

Pls ignore this, sorry

On 2019/7/16 21:25, YueHaibing wrote:
> It seems we should use 'range' instead of 'priv->range'
> in lbtf_geo_init(), because 'range' is the corret one
> related to current regioncode.
> 
> Reported-by: Hulk Robot <hulkci@huawei.com>
> Fixes: 691cdb49388b ("libertas_tf: command helper functions for libertas_tf")
> Signed-off-by: YueHaibing <yuehaibing@huawei.com>
> ---
>  drivers/net/wireless/marvell/libertas_tf/cmd.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/net/wireless/marvell/libertas_tf/cmd.c b/drivers/net/wireless/marvell/libertas_tf/cmd.c
> index 1eacca0..a333172 100644
> --- a/drivers/net/wireless/marvell/libertas_tf/cmd.c
> +++ b/drivers/net/wireless/marvell/libertas_tf/cmd.c
> @@ -65,7 +65,7 @@ static void lbtf_geo_init(struct lbtf_private *priv)
>  			break;
>  		}
>  
> -	for (ch = priv->range.start; ch < priv->range.end; ch++)
> +	for (ch = range.start; ch < range.end; ch++)
>  		priv->channels[CHAN_TO_IDX(ch)].flags = 0;
>  }
>  
> 


^ permalink raw reply

* Re: IPv6 L2TP issues related to 93531c67
From: Paul Donohue @ 2019-07-16 13:56 UTC (permalink / raw)
  To: David Ahern; +Cc: David S. Miller, Alexey Kuznetsov, Hideaki YOSHIFUJI, netdev
In-Reply-To: <d6db74f5-5add-7500-1b7a-fa62302a455f@gmail.com>

[-- Attachment #1: Type: text/plain, Size: 2193 bytes --]

On Mon, Jul 15, 2019 at 12:55:48PM -0600, David Ahern wrote:
> As an FYI, gmail thinks your emails are spam.
Ugh.  Thanks for letting me know.  I'll look into it.

> On 7/15/19 10:18 AM, Paul Donohue wrote:
> > Reverting commit 93531c6743157d7e8c5792f8ed1a57641149d62c (identified by bisection) fixes this issue.
> That commit can not be reverted. It is a foundational piece for a lot of
> other changes. Did you mean the commit before it works and this commit
> fails?
Sorry, yes, I meant the commit before it works, and this one fails.  I did not try reverting this commit on a more recent kernel.

> > It is not obvious to me how commit 93531c6743157d7e8c5792f8ed1a57641149d62c causes this issue, or how it should be fixed.  Could someone take a look and point me in the right direction for further troubleshooting?
> Let's get a complete example that demonstrates the problem, and I can go
> from there. Can you take the attached script and update it so that it
> reflects the problem you are reporting? That script works on latest
> kernel as well as 4.14.133. It uses network namespaces for 2 hosts with
> a router between them.
> 
> Also, check the return of the fib lookups using:
>     perf record -e fib6:* -a
>     <run test, ctrl-c on the record>
>     perf script
> 
> Checkout the fib lookup parameters and result. Do they look correct to
> you for your setup?

Unfortunately, I have a fairly complicated setup, so it took me a while to figure out which pieces were relevant ... But I think I've finally got it.  The missing piece was IPsec.

After establishing an IPsec tunnel to carry the L2TP traffic, the first L2TP packet through the IPsec tunnel permanently breaks the associated L2TP tunnel.  Tearing down the IPsec tunnel does not restore functionality of the L2TP tunnel - I have to tear down and re-create the L2TP tunnel before it will work again.  In my real-world use case, I have two L2TP tunnels running over the same IPsec tunnel, and the first L2TP tunnel to send a packet after IPsec is established gets permanently broken, while the other L2TP tunnel works fine.

I've attached a modified version of the script which demonstrates this issue.

Thank you!
-Paul

[-- Attachment #2: l2tp.sh --]
[-- Type: application/x-sh, Size: 4977 bytes --]

^ permalink raw reply

* Re: [PATCH v3 0/3] kernel/notifier.c: avoid duplicate registration
From: Xiaoming Ni @ 2019-07-16 14:07 UTC (permalink / raw)
  To: Vasily Averin, gregkh@linuxfoundation.org
  Cc: adobriyan@gmail.com, akpm@linux-foundation.org,
	anna.schumaker@netapp.com, arjan@linux.intel.com,
	bfields@fieldses.org, chuck.lever@oracle.com, davem@davemloft.net,
	jlayton@kernel.org, luto@kernel.org, mingo@kernel.org,
	Nadia.Derbey@bull.net, paulmck@linux.vnet.ibm.com,
	semen.protsenko@linaro.org, stable@kernel.org,
	stern@rowland.harvard.edu, tglx@linutronix.de,
	torvalds@linux-foundation.org, trond.myklebust@hammerspace.com,
	viresh.kumar@linaro.org, Huangjianhui (Alex), Dailei,
	linux-kernel@vger.kernel.org, linux-nfs@vger.kernel.org,
	netdev@vger.kernel.org
In-Reply-To: <e4753c70-de7c-063a-dc49-0edc7520ccd2@virtuozzo.com>



On 2019/7/16 18:20, Vasily Averin wrote:
> On 7/16/19 5:00 AM, Xiaoming Ni wrote:
>> On 2019/7/15 13:38, Vasily Averin wrote:
>>> On 7/14/19 5:45 AM, Xiaoming Ni wrote:
>>>> On 2019/7/12 22:07, gregkh@linuxfoundation.org wrote:
>>>>> On Fri, Jul 12, 2019 at 09:11:57PM +0800, Xiaoming Ni wrote:
>>>>>> On 2019/7/11 21:57, Vasily Averin wrote:
>>>>>>> On 7/11/19 4:55 AM, Nixiaoming wrote:
>>>>>>>> On Wed, July 10, 2019 1:49 PM Vasily Averin wrote:
>>>>>>>>> On 7/10/19 6:09 AM, Xiaoming Ni wrote:
>>>>>>>>>> Registering the same notifier to a hook repeatedly can cause the hook
>>>>>>>>>> list to form a ring or lose other members of the list.
>>>>>>>>>
>>>>>>>>> I think is not enough to _prevent_ 2nd register attempt,
>>>>>>>>> it's enough to detect just attempt and generate warning to mark host in bad state.
>>>>>>>>>
>>>>>>>>
>>>>>>>> Duplicate registration is prevented in my patch, not just "mark host in bad state"
>>>>>>>>
>>>>>>>> Duplicate registration is checked and exited in notifier_chain_cond_register()
>>>>>>>>
>>>>>>>> Duplicate registration was checked in notifier_chain_register() but only 
>>>>>>>> the alarm was triggered without exiting. added by commit 831246570d34692e 
>>>>>>>> ("kernel/notifier.c: double register detection")
>>>>>>>>
>>>>>>>> My patch is like a combination of 831246570d34692e and notifier_chain_cond_register(),
>>>>>>>>  which triggers an alarm and exits when a duplicate registration is detected.
>>>>>>>>
>>>>>>>>> Unexpected 2nd register of the same hook most likely will lead to 2nd unregister,
>>>>>>>>> and it can lead to host crash in any time: 
>>>>>>>>> you can unregister notifier on first attempt it can be too early, it can be still in use.
>>>>>>>>> on the other hand you can never call 2nd unregister at all.
>>>>>>>>
>>>>>>>> Since the member was not added to the linked list at the time of the second registration, 
>>>>>>>> no linked list ring was formed. 
>>>>>>>> The member is released on the first unregistration and -ENOENT on the second unregistration.
>>>>>>>> After patching, the fault has been alleviated
>>>>>>>
>>>>>>> You are wrong here.
>>>>>>> 2nd notifier's registration is a pure bug, this should never happen.
>>>>>>> If you know the way to reproduce this situation -- you need to fix it. 
>>>>>>>
>>>>>>> 2nd registration can happen in 2 cases:
>>>>>>> 1) missed rollback, when someone forget to call unregister after successfull registration, 
>>>>>>> and then tried to call register again. It can lead to crash for example when according module will be unloaded.
>>>>>>> 2) some subsystem is registered twice, for example from  different namespaces.
>>>>>>> in this case unregister called during sybsystem cleanup in first namespace will incorrectly remove notifier used 
>>>>>>> in second namespace, it also can lead to unexpacted behaviour.
>>>>>>>
>>>>>> So in these two cases, is it more reasonable to trigger BUG() directly when checking for duplicate registration ?
>>>>>> But why does current notifier_chain_register() just trigger WARN() without exiting ?
>>>>>> notifier_chain_cond_register() direct exit without triggering WARN() ?
>>>>>
>>>>> It should recover from this, if it can be detected.  The main point is
>>>>> that not all apis have to be this "robust" when used within the kernel
>>>>> as we do allow for the callers to know what they are doing :)
>>>>>
>>>> In the notifier_chain_register(), the condition ( (*nl) == n) is the same registration of the same hook.
>>>>  We can intercept this situation and avoid forming a linked list ring to make the API more rob
>>>
>>> Once again -- yes, you CAN prevent list corruption, but you CANNOT recover the host and return it back to safe state.
>>> If double register event was detected -- it means you have bug in kernel.
>>>
>>> Yes, you can add BUG here and crash the host immediately, but I prefer to use warning in such situations.
>>>
>>>>> If this does not cause any additional problems or slow downs, it's
>>>>> probably fine to add.
>>>>>
>>>> Notifier_chain_register() is not a system hotspot function.
>>>> At the same time, there is already a WARN_ONCE judgment. There is no new judgment in the new patch.
>>>> It only changes the processing under the condition of (*nl) == n, which will not cause performance problems.
>>>> At the same time, avoiding the formation of a link ring can make the system more robust.
>>>
>>> I disagree, 
>>> yes, node will have correct list, but anyway node will work wrong and can crash the host in any time.
>>
>> Sorry, my description is not accurate.
>>
>> My patch feature does not prevent users from repeatedly registering hooks.
>> But avoiding the chain ring caused by the user repeatedly registering the hook
>>
>> There are no modules for duplicate registration hooks in the current system.
>> But considering that not all modules are in the kernel source tree,
>> In order to improve the robustness of the kernel API, we should avoid the linked list ring caused by repeated registration.
>> Or in order to improve the efficiency of problem location, when the duplicate registration is checked, the system crashes directly.
> 
> Detect of duplicate registration means an unrecoverable error,
> from this point of view it makes sense to replace WARN_ONCE by BUG_ON.
>  
>> On the other hand, the difference between notifier_chain_register() and notifier_chain_cond_register() for duplicate registrations is confusing:
>> Blocking the formation of the linked list ring in notifier_chain_cond_register()
>> There is no interception of the linked list ring in notifier_chain_register(), just an alarm.
>> Give me the illusion: Isn't notifier_chain_register() allowed to create a linked list ring?
> 
> I'm not sure that I understood your question correctly but will try to answer.
> As far as I see all callers of notifier_chain_cond_register checks return value, expect possible failure and handle it somehow.
> On the other hand callers of notifier_chain_register() in many cases do not check return value and always expect success.
> The goal of original WARN_ONCE -- to detect possible misuse of notifiers and it seems for me it correctly handles this task.
>
Notifier_chain_cond_register() has only one return value: 0
At the same time, it is only called by blocking_notifier_chain_cond_register().
In the function comment of blocking_notifier_chain_cond_register there is " Currently always returns zero."
Therefore, the user cannot check whether the hook has duplicate registration or other errors by checking the return value.

If the interceptor list ring is added to notifier_chain_register(), notifier_chain_register()
 And notifier_chain_cond_register() becomes redundant code, we can delete one of them

> 
> .
> 


^ permalink raw reply

* [PATCH v2] libertas_tf: Use correct channel range in lbtf_geo_init
From: YueHaibing @ 2019-07-16 14:42 UTC (permalink / raw)
  To: kvalo, davem, lkundrak, derosier
  Cc: linux-kernel, netdev, linux-wireless, YueHaibing

It seems we should use 'range' instead of 'priv->range'
in lbtf_geo_init(), because 'range' is the corret one
related to current regioncode.

Reported-by: Hulk Robot <hulkci@huawei.com>
Fixes: 691cdb49388b ("libertas_tf: command helper functions for libertas_tf")
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
---
v2: fix compile error
---
 drivers/net/wireless/marvell/libertas_tf/cmd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/marvell/libertas_tf/cmd.c b/drivers/net/wireless/marvell/libertas_tf/cmd.c
index 1eacca0..a333172 100644
--- a/drivers/net/wireless/marvell/libertas_tf/cmd.c
+++ b/drivers/net/wireless/marvell/libertas_tf/cmd.c
@@ -65,7 +65,7 @@ static void lbtf_geo_init(struct lbtf_private *priv)
 			break;
 		}
 
-	for (ch = priv->range.start; ch < priv->range.end; ch++)
+	for (ch = range->start; ch < range->end; ch++)
 		priv->channels[CHAN_TO_IDX(ch)].flags = 0;
 }
 
-- 
2.7.4



^ permalink raw reply related

* [PATCH] skbuff: fix compilation warnings in skb_dump()
From: Qian Cai @ 2019-07-16 14:54 UTC (permalink / raw)
  To: davem; +Cc: willemb, clang-built-linux, netdev, linux-kernel, Qian Cai

The commit 6413139dfc64 ("skbuff: increase verbosity when dumping skb
data") introduced a few compilation warnings.

net/core/skbuff.c:766:32: warning: format specifies type 'unsigned
short' but the argument has type 'unsigned int' [-Wformat]
                       level, sk->sk_family, sk->sk_type,
sk->sk_protocol);
                                             ^~~~~~~~~~~
net/core/skbuff.c:766:45: warning: format specifies type 'unsigned
short' but the argument has type 'unsigned int' [-Wformat]
                       level, sk->sk_family, sk->sk_type,
sk->sk_protocol);
^~~~~~~~~~~~~~~

Fix them by using the proper types, and also fix some checkpatch
warnings by using pr_info().

WARNING: printk() should include KERN_<LEVEL> facility level
+		printk("%ssk family=%hu type=%u proto=%u\n",

Fixes: 6413139dfc64 ("skbuff: increase verbosity when dumping skb data")
Signed-off-by: Qian Cai <cai@lca.pw>
---
 net/core/skbuff.c | 44 ++++++++++++++++++++++----------------------
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 6f1e31f674a3..fa1e78f7bb96 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -740,30 +740,30 @@ void skb_dump(const char *level, const struct sk_buff *skb, bool full_pkt)
 	has_mac = skb_mac_header_was_set(skb);
 	has_trans = skb_transport_header_was_set(skb);
 
-	printk("%sskb len=%u headroom=%u headlen=%u tailroom=%u\n"
-	       "mac=(%d,%d) net=(%d,%d) trans=%d\n"
-	       "shinfo(txflags=%u nr_frags=%u gso(size=%hu type=%u segs=%hu))\n"
-	       "csum(0x%x ip_summed=%u complete_sw=%u valid=%u level=%u)\n"
-	       "hash(0x%x sw=%u l4=%u) proto=0x%04x pkttype=%u iif=%d\n",
-	       level, skb->len, headroom, skb_headlen(skb), tailroom,
-	       has_mac ? skb->mac_header : -1,
-	       has_mac ? skb_mac_header_len(skb) : -1,
-	       skb->network_header,
-	       has_trans ? skb_network_header_len(skb) : -1,
-	       has_trans ? skb->transport_header : -1,
-	       sh->tx_flags, sh->nr_frags,
-	       sh->gso_size, sh->gso_type, sh->gso_segs,
-	       skb->csum, skb->ip_summed, skb->csum_complete_sw,
-	       skb->csum_valid, skb->csum_level,
-	       skb->hash, skb->sw_hash, skb->l4_hash,
-	       ntohs(skb->protocol), skb->pkt_type, skb->skb_iif);
+	pr_info("%sskb len=%u headroom=%u headlen=%u tailroom=%u\n"
+		"mac=(%d,%d) net=(%d,%d) trans=%d\n"
+		"shinfo(txflags=%u nr_frags=%u gso(size=%hu type=%u segs=%hu))\n"
+		"csum(0x%x ip_summed=%u complete_sw=%u valid=%u level=%u)\n"
+		"hash(0x%x sw=%u l4=%u) proto=0x%04x pkttype=%u iif=%d\n",
+		level, skb->len, headroom, skb_headlen(skb), tailroom,
+		has_mac ? skb->mac_header : -1,
+		has_mac ? skb_mac_header_len(skb) : -1,
+		skb->network_header,
+		has_trans ? skb_network_header_len(skb) : -1,
+		has_trans ? skb->transport_header : -1,
+		sh->tx_flags, sh->nr_frags,
+		sh->gso_size, sh->gso_type, sh->gso_segs,
+		skb->csum, skb->ip_summed, skb->csum_complete_sw,
+		skb->csum_valid, skb->csum_level,
+		skb->hash, skb->sw_hash, skb->l4_hash,
+		ntohs(skb->protocol), skb->pkt_type, skb->skb_iif);
 
 	if (dev)
-		printk("%sdev name=%s feat=0x%pNF\n",
-		       level, dev->name, &dev->features);
+		pr_info("%sdev name=%s feat=0x%pNF\n",
+			level, dev->name, &dev->features);
 	if (sk)
-		printk("%ssk family=%hu type=%hu proto=%hu\n",
-		       level, sk->sk_family, sk->sk_type, sk->sk_protocol);
+		pr_info("%ssk family=%hu type=%u proto=%u\n",
+			level, sk->sk_family, sk->sk_type, sk->sk_protocol);
 
 	if (full_pkt && headroom)
 		print_hex_dump(level, "skb headroom: ", DUMP_PREFIX_OFFSET,
@@ -801,7 +801,7 @@ void skb_dump(const char *level, const struct sk_buff *skb, bool full_pkt)
 	}
 
 	if (full_pkt && skb_has_frag_list(skb)) {
-		printk("skb fraglist:\n");
+		pr_info("skb fraglist:\n");
 		skb_walk_frags(skb, list_skb)
 			skb_dump(level, list_skb, true);
 	}
-- 
1.8.3.1


^ permalink raw reply related

* Re: [PATCH bpf v2] selftests/bpf: skip nmi test when perf hw events are disabled
From: Andrii Nakryiko @ 2019-07-16 14:57 UTC (permalink / raw)
  To: Ilya Leoshkevich; +Cc: bpf, Networking, gor, heiko.carstens, Y Song
In-Reply-To: <20190716105634.21827-1-iii@linux.ibm.com>

On Tue, Jul 16, 2019 at 3:56 AM Ilya Leoshkevich <iii@linux.ibm.com> wrote:
>
> Some setups (e.g. virtual machines) might run with hardware perf events
> disabled. If this is the case, skip the test_send_signal_nmi test.
>
> Add a separate test involving a software perf event. This allows testing
> the perf event path regardless of hardware perf event support.
>
> Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
> ---

LGTM!

Acked-by: Andrii Nakryiko <andriin@fb.com>

>
> v1->v2: Skip the test instead of using a software event.
> Add a separate test with a software event.
>
>  .../selftests/bpf/prog_tests/send_signal.c    | 33 ++++++++++++++++++-
>  1 file changed, 32 insertions(+), 1 deletion(-)
>
> diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c
> index 67cea1686305..54218ee3c004 100644
> --- a/tools/testing/selftests/bpf/prog_tests/send_signal.c
> +++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c
> @@ -173,6 +173,18 @@ static int test_send_signal_tracepoint(void)
>         return test_send_signal_common(&attr, BPF_PROG_TYPE_TRACEPOINT, "tracepoint");
>  }
>

[...]

^ permalink raw reply

* Re: [PATCH] skbuff: fix compilation warnings in skb_dump()
From: Willem de Bruijn @ 2019-07-16 15:04 UTC (permalink / raw)
  To: Qian Cai
  Cc: David Miller, Willem de Bruijn, clang-built-linux,
	Network Development, LKML
In-Reply-To: <1563288840-1913-1-git-send-email-cai@lca.pw>

On Tue, Jul 16, 2019 at 4:56 PM Qian Cai <cai@lca.pw> wrote:
>
> The commit 6413139dfc64 ("skbuff: increase verbosity when dumping skb
> data") introduced a few compilation warnings.
>
> net/core/skbuff.c:766:32: warning: format specifies type 'unsigned
> short' but the argument has type 'unsigned int' [-Wformat]
>                        level, sk->sk_family, sk->sk_type,
> sk->sk_protocol);
>                                              ^~~~~~~~~~~
> net/core/skbuff.c:766:45: warning: format specifies type 'unsigned
> short' but the argument has type 'unsigned int' [-Wformat]
>                        level, sk->sk_family, sk->sk_type,
> sk->sk_protocol);
> ^~~~~~~~~~~~~~~

Ah, I looked at sk_family (skc_family), which is type unsigned short.

But sk_type and sk_protocol are defined as

 unsigned int            sk_padding : 1,
                                sk_kern_sock : 1,
                                sk_no_check_tx : 1,
                                sk_no_check_rx : 1,
                                sk_userlocks : 4,
                                sk_protocol  : 8,
                                sk_type      : 16;

So %u is indeed needed instead of %hu.

> Fix them by using the proper types, and also fix some checkpatch
> warnings by using pr_info().
>
> WARNING: printk() should include KERN_<LEVEL> facility level
> +               printk("%ssk family=%hu type=%u proto=%u\n",

Converting printk to pr_info lowers all levels to KERN_INFO.

skb_dump takes an explicit parameter level to be able to log at
KERN_ERR or KERN_WARNING

I would like to avoid those checkpatch warnings, but this is not the
right approach.

> Fixes: 6413139dfc64 ("skbuff: increase verbosity when dumping skb data")

Thanks. For a v2, please mark the target branch, as [PATCH net v2].

^ permalink raw reply

* RE: [PATCH net 2/4] tcp: tcp_fragment() should apply sane memory limits
From: Prout, Andrew - LLSC - MITLL @ 2019-07-16 15:13 UTC (permalink / raw)
  To: Eric Dumazet, Christoph Paasch
  Cc: David S . Miller, netdev, Greg Kroah-Hartman, Jonathan Looney,
	Neal Cardwell, Tyler Hicks, Yuchung Cheng, Bruce Curtis,
	Jonathan Lemon, Dustin Marquess
In-Reply-To: <adec774ed16540c6b627c2f607f3e216@ll.mit.edu>

On 7/11/2019 11:15 PM, Prout, Andrew - LLSC - MITLL wrote: 
> I in no way intended to imply that I had confirmed the small SO_SNDBUF was a prerequisite to our problem. With my synthetic test, I was looking for a concise reproducer and trying things to > stress the system.

I've looked into this some more: I am now able to reproduce it consistently (on 4.14.132) with my synthetic test seemingly regardless of the SO_SNDBUF size (tested 128KiB, 512KiB, 1MiB and 10MiB). The key change to make it reproducible was to use sendfile() instead of send() - this is what samba was doing in our real failure case. Looking at the code, it appears that sendfile() calls into the zerocopy functions which only check SNDBUF for memory they allocate (the skb structure) - they'll happily "overfill" the send buffer with zerocopy'd pages.

In my tests I can see the send buffer used reported by netstat getting to much larger values than the SO_SNDBUF value I set (with a 10MiB SO_SNDBUF, it got up to ~52MiB on a stalled connection). This of course easily causes the CVE-2019-11478 fix to false alarm and the TCP connection to stall.

^ permalink raw reply

* Re: [PATCH net] net: fix use-after-free in __netif_receive_skb_core
From: Sabrina Dubroca @ 2019-07-16 15:26 UTC (permalink / raw)
  To: Edward Cree; +Cc: netdev, Andreas Steinmetz
In-Reply-To: <8166b82f-8430-1441-32e7-540c1829754e@solarflare.com>

2019-07-12, 16:29:48 +0100, Edward Cree wrote:
> On 10/07/2019 23:47, Sabrina Dubroca wrote:
> > 2019-07-10, 16:07:43 +0100, Edward Cree wrote:
> >> On 10/07/2019 14:52, Sabrina Dubroca wrote:
> >>> -static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc,
> >>> +static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
> >>>  				    struct packet_type **ppt_prev)
> >>>  {
> >>>  	struct packet_type *ptype, *pt_prev;
> >>>  	rx_handler_func_t *rx_handler;
> >>> +	struct sk_buff *skb = *pskb;
> >> Would it not be simpler just to change all users of skb to *pskb?
> >> Then you avoid having to keep doing "*pskb = skb;" whenever skb changes
> >>  (with concomitant risk of bugs if one gets missed).
> > Yes, that would be less risky. I wrote a version of the patch that did
> > exactly that, but found it really too ugly (both the patch and the
> > resulting code).
> If you've still got that version (or can dig it out of your reflog), can
>  you post it so we can see just how ugly it turns out?

No, I didn't even commit it. Rewrote it now, it's rewriting over 1/4
of the lines. Considering that the patch would need to go in stable, I
don't think that's appropriate.

(This has been only compiled, not actually tested)

diff --git a/net/core/dev.c b/net/core/dev.c
index fc676b2610e3..5fb2a15d42e1 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4799,7 +4799,7 @@ static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev,
 	return 0;
 }
 
-static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc,
+static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
 				    struct packet_type **ppt_prev)
 {
 	struct packet_type *ptype, *pt_prev;
@@ -4809,21 +4809,21 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc,
 	int ret = NET_RX_DROP;
 	__be16 type;
 
-	net_timestamp_check(!netdev_tstamp_prequeue, skb);
+	net_timestamp_check(!netdev_tstamp_prequeue, *pskb);
 
-	trace_netif_receive_skb(skb);
+	trace_netif_receive_skb(*pskb);
 
-	orig_dev = skb->dev;
+	orig_dev = (*pskb)->dev;
 
-	skb_reset_network_header(skb);
-	if (!skb_transport_header_was_set(skb))
-		skb_reset_transport_header(skb);
-	skb_reset_mac_len(skb);
+	skb_reset_network_header(*pskb);
+	if (!skb_transport_header_was_set(*pskb))
+		skb_reset_transport_header(*pskb);
+	skb_reset_mac_len(*pskb);
 
 	pt_prev = NULL;
 
 another_round:
-	skb->skb_iif = skb->dev->ifindex;
+	(*pskb)->skb_iif = (*pskb)->dev->ifindex;
 
 	__this_cpu_inc(softnet_data.processed);
 
@@ -4831,22 +4831,22 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc,
 		int ret2;
 
 		preempt_disable();
-		ret2 = do_xdp_generic(rcu_dereference(skb->dev->xdp_prog), skb);
+		ret2 = do_xdp_generic(rcu_dereference((*pskb)->dev->xdp_prog), *pskb);
 		preempt_enable();
 
 		if (ret2 != XDP_PASS)
 			return NET_RX_DROP;
-		skb_reset_mac_len(skb);
+		skb_reset_mac_len(*pskb);
 	}
 
-	if (skb->protocol == cpu_to_be16(ETH_P_8021Q) ||
-	    skb->protocol == cpu_to_be16(ETH_P_8021AD)) {
-		skb = skb_vlan_untag(skb);
-		if (unlikely(!skb))
+	if ((*pskb)->protocol == cpu_to_be16(ETH_P_8021Q) ||
+	    (*pskb)->protocol == cpu_to_be16(ETH_P_8021AD)) {
+		*pskb = skb_vlan_untag(*pskb);
+		if (unlikely(!*pskb))
 			goto out;
 	}
 
-	if (skb_skip_tc_classify(skb))
+	if (skb_skip_tc_classify(*pskb))
 		goto skip_classify;
 
 	if (pfmemalloc)
@@ -4854,50 +4854,50 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc,
 
 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
 		if (pt_prev)
-			ret = deliver_skb(skb, pt_prev, orig_dev);
+			ret = deliver_skb(*pskb, pt_prev, orig_dev);
 		pt_prev = ptype;
 	}
 
-	list_for_each_entry_rcu(ptype, &skb->dev->ptype_all, list) {
+	list_for_each_entry_rcu(ptype, &(*pskb)->dev->ptype_all, list) {
 		if (pt_prev)
-			ret = deliver_skb(skb, pt_prev, orig_dev);
+			ret = deliver_skb(*pskb, pt_prev, orig_dev);
 		pt_prev = ptype;
 	}
 
 skip_taps:
 #ifdef CONFIG_NET_INGRESS
 	if (static_branch_unlikely(&ingress_needed_key)) {
-		skb = sch_handle_ingress(skb, &pt_prev, &ret, orig_dev);
-		if (!skb)
+		*pskb = sch_handle_ingress(*pskb, &pt_prev, &ret, orig_dev);
+		if (!*pskb)
 			goto out;
 
-		if (nf_ingress(skb, &pt_prev, &ret, orig_dev) < 0)
+		if (nf_ingress(*pskb, &pt_prev, &ret, orig_dev) < 0)
 			goto out;
 	}
 #endif
-	skb_reset_tc(skb);
+	skb_reset_tc(*pskb);
 skip_classify:
-	if (pfmemalloc && !skb_pfmemalloc_protocol(skb))
+	if (pfmemalloc && !skb_pfmemalloc_protocol(*pskb))
 		goto drop;
 
-	if (skb_vlan_tag_present(skb)) {
+	if (skb_vlan_tag_present(*pskb)) {
 		if (pt_prev) {
-			ret = deliver_skb(skb, pt_prev, orig_dev);
+			ret = deliver_skb(*pskb, pt_prev, orig_dev);
 			pt_prev = NULL;
 		}
-		if (vlan_do_receive(&skb))
+		if (vlan_do_receive(pskb))
 			goto another_round;
-		else if (unlikely(!skb))
+		else if (unlikely(!*pskb))
 			goto out;
 	}
 
-	rx_handler = rcu_dereference(skb->dev->rx_handler);
+	rx_handler = rcu_dereference((*pskb)->dev->rx_handler);
 	if (rx_handler) {
 		if (pt_prev) {
-			ret = deliver_skb(skb, pt_prev, orig_dev);
+			ret = deliver_skb(*pskb, pt_prev, orig_dev);
 			pt_prev = NULL;
 		}
-		switch (rx_handler(&skb)) {
+		switch (rx_handler(pskb)) {
 		case RX_HANDLER_CONSUMED:
 			ret = NET_RX_SUCCESS;
 			goto out;
@@ -4912,29 +4912,29 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc,
 		}
 	}
 
-	if (unlikely(skb_vlan_tag_present(skb))) {
+	if (unlikely(skb_vlan_tag_present(*pskb))) {
 check_vlan_id:
-		if (skb_vlan_tag_get_id(skb)) {
+		if (skb_vlan_tag_get_id(*pskb)) {
 			/* Vlan id is non 0 and vlan_do_receive() above couldn't
 			 * find vlan device.
 			 */
-			skb->pkt_type = PACKET_OTHERHOST;
-		} else if (skb->protocol == cpu_to_be16(ETH_P_8021Q) ||
-			   skb->protocol == cpu_to_be16(ETH_P_8021AD)) {
+			(*pskb)->pkt_type = PACKET_OTHERHOST;
+		} else if ((*pskb)->protocol == cpu_to_be16(ETH_P_8021Q) ||
+			   (*pskb)->protocol == cpu_to_be16(ETH_P_8021AD)) {
 			/* Outer header is 802.1P with vlan 0, inner header is
 			 * 802.1Q or 802.1AD and vlan_do_receive() above could
 			 * not find vlan dev for vlan id 0.
 			 */
-			__vlan_hwaccel_clear_tag(skb);
-			skb = skb_vlan_untag(skb);
-			if (unlikely(!skb))
+			__vlan_hwaccel_clear_tag(*pskb);
+			*pskb = skb_vlan_untag(*pskb);
+			if (unlikely(!*pskb))
 				goto out;
-			if (vlan_do_receive(&skb))
+			if (vlan_do_receive(pskb))
 				/* After stripping off 802.1P header with vlan 0
 				 * vlan dev is found for inner header.
 				 */
 				goto another_round;
-			else if (unlikely(!skb))
+			else if (unlikely(!*pskb))
 				goto out;
 			else
 				/* We have stripped outer 802.1P vlan 0 header.
@@ -4944,40 +4944,40 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc,
 				goto check_vlan_id;
 		}
 		/* Note: we might in the future use prio bits
-		 * and set skb->priority like in vlan_do_receive()
+		 * and set (*pskb)->priority like in vlan_do_receive()
 		 * For the time being, just ignore Priority Code Point
 		 */
-		__vlan_hwaccel_clear_tag(skb);
+		__vlan_hwaccel_clear_tag(*pskb);
 	}
 
-	type = skb->protocol;
+	type = (*pskb)->protocol;
 
 	/* deliver only exact match when indicated */
 	if (likely(!deliver_exact)) {
-		deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
+		deliver_ptype_list_skb(*pskb, &pt_prev, orig_dev, type,
 				       &ptype_base[ntohs(type) &
 						   PTYPE_HASH_MASK]);
 	}
 
-	deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
+	deliver_ptype_list_skb(*pskb, &pt_prev, orig_dev, type,
 			       &orig_dev->ptype_specific);
 
-	if (unlikely(skb->dev != orig_dev)) {
-		deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
-				       &skb->dev->ptype_specific);
+	if (unlikely((*pskb)->dev != orig_dev)) {
+		deliver_ptype_list_skb(*pskb, &pt_prev, orig_dev, type,
+				       &(*pskb)->dev->ptype_specific);
 	}
 
 	if (pt_prev) {
-		if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC)))
+		if (unlikely(skb_orphan_frags_rx(*pskb, GFP_ATOMIC)))
 			goto drop;
 		*ppt_prev = pt_prev;
 	} else {
 drop:
 		if (!deliver_exact)
-			atomic_long_inc(&skb->dev->rx_dropped);
+			atomic_long_inc(&(*pskb)->dev->rx_dropped);
 		else
-			atomic_long_inc(&skb->dev->rx_nohandler);
-		kfree_skb(skb);
+			atomic_long_inc(&(*pskb)->dev->rx_nohandler);
+		kfree_skb(*pskb);
 		/* Jamal, now you will not able to escape explaining
 		 * me how you were going to use this. :-)
 		 */
@@ -4994,7 +4994,7 @@ static int __netif_receive_skb_one_core(struct sk_buff *skb, bool pfmemalloc)
 	struct packet_type *pt_prev = NULL;
 	int ret;
 
-	ret = __netif_receive_skb_core(skb, pfmemalloc, &pt_prev);
+	ret = __netif_receive_skb_core(&skb, pfmemalloc, &pt_prev);
 	if (pt_prev)
 		ret = INDIRECT_CALL_INET(pt_prev->func, ipv6_rcv, ip_rcv, skb,
 					 skb->dev, pt_prev, orig_dev);
@@ -5072,7 +5072,7 @@ static void __netif_receive_skb_list_core(struct list_head *head, bool pfmemallo
 		struct packet_type *pt_prev = NULL;
 
 		skb_list_del_init(skb);
-		__netif_receive_skb_core(skb, pfmemalloc, &pt_prev);
+		__netif_receive_skb_core(&skb, pfmemalloc, &pt_prev);
 		if (!pt_prev)
 			continue;
 		if (pt_curr != pt_prev || od_curr != orig_dev) {


> Here's a thought, how about switching round the return-vs-pass-by-pointer
>  and writing:
> 
> static struct sk_buff *__netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc,
>                                                 struct packet_type **ppt_prev, int *ret)
> ?
> (Although, you might want to rename 'ret' in that case.)
> 
> Does that make things any less ugly?

That seems more reasonable (also only compiled so far):

diff --git a/net/core/dev.c b/net/core/dev.c
index fc676b2610e3..e09b6a14851c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4799,8 +4799,8 @@ static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev,
 	return 0;
 }
 
-static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc,
-				    struct packet_type **ppt_prev)
+static struct sk_buff *__netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc,
+						struct packet_type **ppt_prev, int *retp)
 {
 	struct packet_type *ptype, *pt_prev;
 	rx_handler_func_t *rx_handler;
@@ -4834,8 +4834,10 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc,
 		ret2 = do_xdp_generic(rcu_dereference(skb->dev->xdp_prog), skb);
 		preempt_enable();
 
-		if (ret2 != XDP_PASS)
-			return NET_RX_DROP;
+		if (ret2 != XDP_PASS) {
+			*retp = NET_RX_DROP;
+			return NULL;
+		}
 		skb_reset_mac_len(skb);
 	}
 
@@ -4985,7 +4987,8 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc,
 	}
 
 out:
-	return ret;
+	*retp = ret;
+	return skb;
 }
 
 static int __netif_receive_skb_one_core(struct sk_buff *skb, bool pfmemalloc)
@@ -4994,7 +4997,7 @@ static int __netif_receive_skb_one_core(struct sk_buff *skb, bool pfmemalloc)
 	struct packet_type *pt_prev = NULL;
 	int ret;
 
-	ret = __netif_receive_skb_core(skb, pfmemalloc, &pt_prev);
+	skb = __netif_receive_skb_core(skb, pfmemalloc, &pt_prev, &ret);
 	if (pt_prev)
 		ret = INDIRECT_CALL_INET(pt_prev->func, ipv6_rcv, ip_rcv, skb,
 					 skb->dev, pt_prev, orig_dev);
@@ -5070,9 +5073,10 @@ static void __netif_receive_skb_list_core(struct list_head *head, bool pfmemallo
 	list_for_each_entry_safe(skb, next, head, list) {
 		struct net_device *orig_dev = skb->dev;
 		struct packet_type *pt_prev = NULL;
+		int ret;
 
 		skb_list_del_init(skb);
-		__netif_receive_skb_core(skb, pfmemalloc, &pt_prev);
+		skb = __netif_receive_skb_core(skb, pfmemalloc, &pt_prev, &ret);
 		if (!pt_prev)
 			continue;
 		if (pt_curr != pt_prev || od_curr != orig_dev) {


-- 
Sabrina

^ permalink raw reply related

* [PULL] virtio, vhost: fixes, features, performance
From: Michael S. Tsirkin @ 2019-07-16 15:31 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: kvm, virtualization, netdev, linux-kernel, aarcange,
	bharat.bhushan, bhelgaas, linux-arm-kernel, linux-mm,
	linux-parisc, davem, eric.auger, gustavo, hch, ihor.matushchak,
	James.Bottomley, jasowang, jean-philippe.brucker, jglisse, mst,
	natechancellor

The following changes since commit c1ea02f15ab5efb3e93fc3144d895410bf79fcf2:

  vhost: scsi: add weight support (2019-05-27 11:08:23 -0400)

are available in the Git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git tags/for_linus

for you to fetch changes up to 5e663f0410fa2f355042209154029842ba1abd43:

  virtio-mmio: add error check for platform_get_irq (2019-07-11 16:22:29 -0400)

----------------------------------------------------------------
virtio, vhost: fixes, features, performance

new iommu device
vhost guest memory access using vmap (just meta-data for now)
minor fixes

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

Note: due to code driver changes the driver-core tree, the following
patch is needed when merging tree with commit 92ce7e83b4e5
("driver_find_device: Unify the match function with
class_find_device()") in the driver-core tree:

From: Nathan Chancellor <natechancellor@gmail.com>
Subject: [PATCH] iommu/virtio: Constify data parameter in viommu_match_node

After commit 92ce7e83b4e5 ("driver_find_device: Unify the match
function with class_find_device()") in the driver-core tree.

Signed-off-by: Nathan Chancellor <natechancellor@gmail.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

---
 drivers/iommu/virtio-iommu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
index 4620dd221ffd..433f4d2ee956 100644
--- a/drivers/iommu/virtio-iommu.c
+++ b/drivers/iommu/virtio-iommu.c
@@ -839,7 +839,7 @@ static void viommu_put_resv_regions(struct device *dev, struct list_head *head)
 static struct iommu_ops viommu_ops;
 static struct virtio_driver virtio_iommu_drv;

-static int viommu_match_node(struct device *dev, void *data)
+static int viommu_match_node(struct device *dev, const void *data)
 {
 	return dev->parent->fwnode == data;
 }

----------------------------------------------------------------
Gustavo A. R. Silva (1):
      scsi: virtio_scsi: Use struct_size() helper

Ihor Matushchak (1):
      virtio-mmio: add error check for platform_get_irq

Jason Wang (6):
      vhost: generalize adding used elem
      vhost: fine grain userspace memory accessors
      vhost: rename vq_iotlb_prefetch() to vq_meta_prefetch()
      vhost: introduce helpers to get the size of metadata area
      vhost: factor out setting vring addr and num
      vhost: access vq metadata through kernel virtual address

Jean-Philippe Brucker (7):
      dt-bindings: virtio-mmio: Add IOMMU description
      dt-bindings: virtio: Add virtio-pci-iommu node
      of: Allow the iommu-map property to omit untranslated devices
      PCI: OF: Initialize dev->fwnode appropriately
      iommu: Add virtio-iommu driver
      iommu/virtio: Add probe request
      iommu/virtio: Add event queue

Michael S. Tsirkin (1):
      vhost: fix clang build warning

 Documentation/devicetree/bindings/virtio/iommu.txt |   66 ++
 Documentation/devicetree/bindings/virtio/mmio.txt  |   30 +
 MAINTAINERS                                        |    7 +
 drivers/iommu/Kconfig                              |   11 +
 drivers/iommu/Makefile                             |    1 +
 drivers/iommu/virtio-iommu.c                       | 1158 ++++++++++++++++++++
 drivers/of/base.c                                  |   10 +-
 drivers/pci/of.c                                   |    8 +
 drivers/scsi/virtio_scsi.c                         |    2 +-
 drivers/vhost/net.c                                |    4 +-
 drivers/vhost/vhost.c                              |  850 +++++++++++---
 drivers/vhost/vhost.h                              |   43 +-
 drivers/virtio/virtio_mmio.c                       |    7 +-
 include/uapi/linux/virtio_ids.h                    |    1 +
 include/uapi/linux/virtio_iommu.h                  |  161 +++
 15 files changed, 2228 insertions(+), 131 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/virtio/iommu.txt
 create mode 100644 drivers/iommu/virtio-iommu.c
 create mode 100644 include/uapi/linux/virtio_iommu.h

^ permalink raw reply related

* Re: [PATCH ghak90 V6 02/10] audit: add container id
From: Richard Guy Briggs @ 2019-07-16 15:37 UTC (permalink / raw)
  To: Paul Moore
  Cc: Tycho Andersen, Serge E. Hallyn, containers, linux-api,
	Linux-Audit Mailing List, linux-fsdevel, LKML, netdev,
	netfilter-devel, sgrubb, omosnace, dhowells, simo, Eric Paris,
	ebiederm, nhorman
In-Reply-To: <CAHC9VhT0V+xi_6nAR5TsM2vs34LbgMeO=-W+MS_kqiXRRzneZQ@mail.gmail.com>

On 2019-07-15 17:09, Paul Moore wrote:
> On Mon, Jul 8, 2019 at 2:12 PM Richard Guy Briggs <rgb@redhat.com> wrote:
> > On 2019-05-30 19:26, Paul Moore wrote:
> 
> ...
> 
> > > I like the creativity, but I worry that at some point these
> > > limitations are going to be raised (limits have a funny way of doing
> > > that over time) and we will be in trouble.  I say "trouble" because I
> > > want to be able to quickly do an audit container ID comparison and
> > > we're going to pay a penalty for these larger values (we'll need this
> > > when we add multiple auditd support and the requisite record routing).
> > >
> > > Thinking about this makes me also realize we probably need to think a
> > > bit longer about audit container ID conflicts between orchestrators.
> > > Right now we just take the value that is given to us by the
> > > orchestrator, but if we want to allow multiple container orchestrators
> > > to work without some form of cooperation in userspace (I think we have
> > > to assume the orchestrators will not talk to each other) we likely
> > > need to have some way to block reuse of an audit container ID.  We
> > > would either need to prevent the orchestrator from explicitly setting
> > > an audit container ID to a currently in use value, or instead generate
> > > the audit container ID in the kernel upon an event triggered by the
> > > orchestrator (e.g. a write to a /proc file).  I suspect we should
> > > start looking at the idr code, I think we will need to make use of it.
> >
> > To address this, I'd suggest that it is enforced to only allow the
> > setting of descendants and to maintain a master list of audit container
> > identifiers (with a hash table if necessary later) that includes the
> > container owner.
> 
> We're discussing the audit container ID management policy elsewhere in
> this thread so I won't comment on that here, but I did want to say
> that we will likely need something better than a simple list of audit
> container IDs from the start.  It's common for systems to have
> thousands of containers now (or multiple thousands), which tells me
> that a list is a poor choice.  You mentioned a hash table, so I would
> suggest starting with that over the list for the initial patchset.

I saw that as an internal incremental improvement that did not affect
the API, so I wanted to keep things a bit simpler (as you've requested
in the past) to get this going, and add that enhancement later.

I'll start working on it now.  The hash table would simply point to
lists anyways unless you can recommend a better approach.

> paul moore

- RGB

--
Richard Guy Briggs <rgb@redhat.com>
Sr. S/W Engineer, Kernel Security, Base Operating Systems
Remote, Ottawa, Red Hat Canada
IRC: rgb, SunRaycer
Voice: +1.647.777.2635, Internal: (81) 32635

^ permalink raw reply

* [PATCH net v2] skbuff: fix compilation warnings in skb_dump()
From: Qian Cai @ 2019-07-16 15:43 UTC (permalink / raw)
  To: davem; +Cc: willemb, joe, clang-built-linux, netdev, linux-kernel, Qian Cai

The commit 6413139dfc64 ("skbuff: increase verbosity when dumping skb
data") introduced a few compilation warnings.

net/core/skbuff.c:766:32: warning: format specifies type 'unsigned
short' but the argument has type 'unsigned int' [-Wformat]
                       level, sk->sk_family, sk->sk_type,
sk->sk_protocol);
                                             ^~~~~~~~~~~
net/core/skbuff.c:766:45: warning: format specifies type 'unsigned
short' but the argument has type 'unsigned int' [-Wformat]
                       level, sk->sk_family, sk->sk_type,
sk->sk_protocol);
^~~~~~~~~~~~~~~

Fix them by using the proper types.

Fixes: 6413139dfc64 ("skbuff: increase verbosity when dumping skb data")
Signed-off-by: Qian Cai <cai@lca.pw>
---

v2: Drop the checkpatch fix as it seems a bit more involved that it does not
    even like passing a variable to it, i.e., printk(level "msg"). Also,
    print_hex_dump() seems need a fix to be complete which can probably be done
    later.

 net/core/skbuff.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 6f1e31f674a3..0338820ee0ec 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -762,7 +762,7 @@ void skb_dump(const char *level, const struct sk_buff *skb, bool full_pkt)
 		printk("%sdev name=%s feat=0x%pNF\n",
 		       level, dev->name, &dev->features);
 	if (sk)
-		printk("%ssk family=%hu type=%hu proto=%hu\n",
+		printk("%ssk family=%hu type=%u proto=%u\n",
 		       level, sk->sk_family, sk->sk_type, sk->sk_protocol);
 
 	if (full_pkt && headroom)
-- 
1.8.3.1


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox