Netdev List
 help / color / mirror / Atom feed
* [PATCH iproute2] iproute: build more easily on Android
From: Lorenzo Colitti @ 2017-10-02 17:03 UTC (permalink / raw)
  To: netdev; +Cc: stephen, enh, Lorenzo Colitti

iproute2 contains a bunch of kernel headers, including uapi ones.
Android's libc uses uapi headers almost directly, and uses a
script to fix kernel types that don't match what userspace
expects.

For example: https://issuetracker.google.com/36987220 reports
that our struct ip_mreq_source contains "__be32 imr_multiaddr"
rather than "struct in_addr imr_multiaddr". The script addresses
this by replacing the uapi struct definition with a #include
<bits/ip_mreq.h> which contains the traditional userspace
definition.

Unfortunately, when we compile iproute2, this definition
conflicts with the one in iproute2's linux/in.h.

Historically we've just solved this problem by running "git rm"
on all the iproute2 include/linux headers that break Android's
libc.  However, deleting the files in this way makes it harder to
keep up with upstream, because every upstream change to
an include file causes a merge conflict with the delete.

This patch fixes the problem by moving the iproute2 linux headers
from include/linux to include/uapi/linux.

Tested: compiles on ubuntu trusty (glibc)

Signed-off-by: Elliott Hughes <enh@google.com>
Signed-off-by: Lorenzo Colitti <lorenzo@google.com>
---
 Makefile                                             | 2 +-
 include/{ => uapi}/linux/atm.h                       | 0
 include/{ => uapi}/linux/atmapi.h                    | 0
 include/{ => uapi}/linux/atmarp.h                    | 0
 include/{ => uapi}/linux/atmdev.h                    | 0
 include/{ => uapi}/linux/atmioc.h                    | 0
 include/{ => uapi}/linux/atmsap.h                    | 0
 include/{ => uapi}/linux/bpf.h                       | 0
 include/{ => uapi}/linux/bpf_common.h                | 0
 include/{ => uapi}/linux/can.h                       | 0
 include/{ => uapi}/linux/can/netlink.h               | 0
 include/{ => uapi}/linux/can/vxcan.h                 | 0
 include/{ => uapi}/linux/devlink.h                   | 0
 include/{ => uapi}/linux/elf-em.h                    | 0
 include/{ => uapi}/linux/fib_rules.h                 | 0
 include/{ => uapi}/linux/filter.h                    | 0
 include/{ => uapi}/linux/fou.h                       | 0
 include/{ => uapi}/linux/gen_stats.h                 | 0
 include/{ => uapi}/linux/genetlink.h                 | 0
 include/{ => uapi}/linux/hdlc/ioctl.h                | 0
 include/{ => uapi}/linux/icmpv6.h                    | 0
 include/{ => uapi}/linux/if.h                        | 0
 include/{ => uapi}/linux/if_addr.h                   | 0
 include/{ => uapi}/linux/if_addrlabel.h              | 0
 include/{ => uapi}/linux/if_alg.h                    | 0
 include/{ => uapi}/linux/if_arp.h                    | 0
 include/{ => uapi}/linux/if_bonding.h                | 0
 include/{ => uapi}/linux/if_bridge.h                 | 0
 include/{ => uapi}/linux/if_ether.h                  | 0
 include/{ => uapi}/linux/if_link.h                   | 0
 include/{ => uapi}/linux/if_macsec.h                 | 0
 include/{ => uapi}/linux/if_packet.h                 | 0
 include/{ => uapi}/linux/if_tun.h                    | 0
 include/{ => uapi}/linux/if_tunnel.h                 | 0
 include/{ => uapi}/linux/if_vlan.h                   | 0
 include/{ => uapi}/linux/ife.h                       | 0
 include/{ => uapi}/linux/ila.h                       | 0
 include/{ => uapi}/linux/in.h                        | 0
 include/{ => uapi}/linux/in6.h                       | 0
 include/{ => uapi}/linux/in_route.h                  | 0
 include/{ => uapi}/linux/inet_diag.h                 | 0
 include/{ => uapi}/linux/ip.h                        | 0
 include/{ => uapi}/linux/ip6_tunnel.h                | 0
 include/{ => uapi}/linux/ipsec.h                     | 0
 include/{ => uapi}/linux/kernel.h                    | 0
 include/{ => uapi}/linux/l2tp.h                      | 0
 include/{ => uapi}/linux/libc-compat.h               | 0
 include/{ => uapi}/linux/limits.h                    | 0
 include/{ => uapi}/linux/lwtunnel.h                  | 0
 include/{ => uapi}/linux/magic.h                     | 0
 include/{ => uapi}/linux/mpls.h                      | 0
 include/{ => uapi}/linux/mpls_iptunnel.h             | 0
 include/{ => uapi}/linux/neighbour.h                 | 0
 include/{ => uapi}/linux/net_namespace.h             | 0
 include/{ => uapi}/linux/netconf.h                   | 0
 include/{ => uapi}/linux/netdevice.h                 | 0
 include/{ => uapi}/linux/netfilter.h                 | 0
 include/{ => uapi}/linux/netfilter/ipset/ip_set.h    | 0
 include/{ => uapi}/linux/netfilter/x_tables.h        | 0
 include/{ => uapi}/linux/netfilter/xt_set.h          | 0
 include/{ => uapi}/linux/netfilter/xt_tcpudp.h       | 0
 include/{ => uapi}/linux/netfilter_ipv4.h            | 0
 include/{ => uapi}/linux/netfilter_ipv4/ip_tables.h  | 0
 include/{ => uapi}/linux/netfilter_ipv6.h            | 0
 include/{ => uapi}/linux/netfilter_ipv6/ip6_tables.h | 0
 include/{ => uapi}/linux/netlink.h                   | 0
 include/{ => uapi}/linux/netlink_diag.h              | 0
 include/{ => uapi}/linux/packet_diag.h               | 0
 include/{ => uapi}/linux/param.h                     | 0
 include/{ => uapi}/linux/pfkeyv2.h                   | 0
 include/{ => uapi}/linux/pkt_cls.h                   | 0
 include/{ => uapi}/linux/pkt_sched.h                 | 0
 include/{ => uapi}/linux/posix_types.h               | 0
 include/{ => uapi}/linux/rtnetlink.h                 | 0
 include/{ => uapi}/linux/sctp.h                      | 0
 include/{ => uapi}/linux/seg6.h                      | 0
 include/{ => uapi}/linux/seg6_genl.h                 | 0
 include/{ => uapi}/linux/seg6_hmac.h                 | 0
 include/{ => uapi}/linux/seg6_iptunnel.h             | 0
 include/{ => uapi}/linux/seg6_local.h                | 0
 include/{ => uapi}/linux/sock_diag.h                 | 0
 include/{ => uapi}/linux/socket.h                    | 0
 include/{ => uapi}/linux/sockios.h                   | 0
 include/{ => uapi}/linux/stddef.h                    | 0
 include/{ => uapi}/linux/sysinfo.h                   | 0
 include/{ => uapi}/linux/tc_act/tc_bpf.h             | 0
 include/{ => uapi}/linux/tc_act/tc_connmark.h        | 0
 include/{ => uapi}/linux/tc_act/tc_csum.h            | 0
 include/{ => uapi}/linux/tc_act/tc_defact.h          | 0
 include/{ => uapi}/linux/tc_act/tc_gact.h            | 0
 include/{ => uapi}/linux/tc_act/tc_ife.h             | 0
 include/{ => uapi}/linux/tc_act/tc_ipt.h             | 0
 include/{ => uapi}/linux/tc_act/tc_mirred.h          | 0
 include/{ => uapi}/linux/tc_act/tc_nat.h             | 0
 include/{ => uapi}/linux/tc_act/tc_pedit.h           | 0
 include/{ => uapi}/linux/tc_act/tc_sample.h          | 0
 include/{ => uapi}/linux/tc_act/tc_skbedit.h         | 0
 include/{ => uapi}/linux/tc_act/tc_skbmod.h          | 0
 include/{ => uapi}/linux/tc_act/tc_tunnel_key.h      | 0
 include/{ => uapi}/linux/tc_act/tc_vlan.h            | 0
 include/{ => uapi}/linux/tc_ematch/tc_em_cmp.h       | 0
 include/{ => uapi}/linux/tc_ematch/tc_em_meta.h      | 0
 include/{ => uapi}/linux/tc_ematch/tc_em_nbyte.h     | 0
 include/{ => uapi}/linux/tcp.h                       | 0
 include/{ => uapi}/linux/tcp_metrics.h               | 0
 include/{ => uapi}/linux/tipc.h                      | 0
 include/{ => uapi}/linux/tipc_netlink.h              | 0
 include/{ => uapi}/linux/types.h                     | 0
 include/{ => uapi}/linux/unix_diag.h                 | 0
 include/{ => uapi}/linux/veth.h                      | 0
 include/{ => uapi}/linux/xfrm.h                      | 0
 111 files changed, 1 insertion(+), 1 deletion(-)
 rename include/{ => uapi}/linux/atm.h (100%)
 rename include/{ => uapi}/linux/atmapi.h (100%)
 rename include/{ => uapi}/linux/atmarp.h (100%)
 rename include/{ => uapi}/linux/atmdev.h (100%)
 rename include/{ => uapi}/linux/atmioc.h (100%)
 rename include/{ => uapi}/linux/atmsap.h (100%)
 rename include/{ => uapi}/linux/bpf.h (100%)
 rename include/{ => uapi}/linux/bpf_common.h (100%)
 rename include/{ => uapi}/linux/can.h (100%)
 rename include/{ => uapi}/linux/can/netlink.h (100%)
 rename include/{ => uapi}/linux/can/vxcan.h (100%)
 rename include/{ => uapi}/linux/devlink.h (100%)
 rename include/{ => uapi}/linux/elf-em.h (100%)
 rename include/{ => uapi}/linux/fib_rules.h (100%)
 rename include/{ => uapi}/linux/filter.h (100%)
 rename include/{ => uapi}/linux/fou.h (100%)
 rename include/{ => uapi}/linux/gen_stats.h (100%)
 rename include/{ => uapi}/linux/genetlink.h (100%)
 rename include/{ => uapi}/linux/hdlc/ioctl.h (100%)
 rename include/{ => uapi}/linux/icmpv6.h (100%)
 rename include/{ => uapi}/linux/if.h (100%)
 rename include/{ => uapi}/linux/if_addr.h (100%)
 rename include/{ => uapi}/linux/if_addrlabel.h (100%)
 rename include/{ => uapi}/linux/if_alg.h (100%)
 rename include/{ => uapi}/linux/if_arp.h (100%)
 rename include/{ => uapi}/linux/if_bonding.h (100%)
 rename include/{ => uapi}/linux/if_bridge.h (100%)
 rename include/{ => uapi}/linux/if_ether.h (100%)
 rename include/{ => uapi}/linux/if_link.h (100%)
 rename include/{ => uapi}/linux/if_macsec.h (100%)
 rename include/{ => uapi}/linux/if_packet.h (100%)
 rename include/{ => uapi}/linux/if_tun.h (100%)
 rename include/{ => uapi}/linux/if_tunnel.h (100%)
 rename include/{ => uapi}/linux/if_vlan.h (100%)
 rename include/{ => uapi}/linux/ife.h (100%)
 rename include/{ => uapi}/linux/ila.h (100%)
 rename include/{ => uapi}/linux/in.h (100%)
 rename include/{ => uapi}/linux/in6.h (100%)
 rename include/{ => uapi}/linux/in_route.h (100%)
 rename include/{ => uapi}/linux/inet_diag.h (100%)
 rename include/{ => uapi}/linux/ip.h (100%)
 rename include/{ => uapi}/linux/ip6_tunnel.h (100%)
 rename include/{ => uapi}/linux/ipsec.h (100%)
 rename include/{ => uapi}/linux/kernel.h (100%)
 rename include/{ => uapi}/linux/l2tp.h (100%)
 rename include/{ => uapi}/linux/libc-compat.h (100%)
 rename include/{ => uapi}/linux/limits.h (100%)
 rename include/{ => uapi}/linux/lwtunnel.h (100%)
 rename include/{ => uapi}/linux/magic.h (100%)
 rename include/{ => uapi}/linux/mpls.h (100%)
 rename include/{ => uapi}/linux/mpls_iptunnel.h (100%)
 rename include/{ => uapi}/linux/neighbour.h (100%)
 rename include/{ => uapi}/linux/net_namespace.h (100%)
 rename include/{ => uapi}/linux/netconf.h (100%)
 rename include/{ => uapi}/linux/netdevice.h (100%)
 rename include/{ => uapi}/linux/netfilter.h (100%)
 rename include/{ => uapi}/linux/netfilter/ipset/ip_set.h (100%)
 rename include/{ => uapi}/linux/netfilter/x_tables.h (100%)
 rename include/{ => uapi}/linux/netfilter/xt_set.h (100%)
 rename include/{ => uapi}/linux/netfilter/xt_tcpudp.h (100%)
 rename include/{ => uapi}/linux/netfilter_ipv4.h (100%)
 rename include/{ => uapi}/linux/netfilter_ipv4/ip_tables.h (100%)
 rename include/{ => uapi}/linux/netfilter_ipv6.h (100%)
 rename include/{ => uapi}/linux/netfilter_ipv6/ip6_tables.h (100%)
 rename include/{ => uapi}/linux/netlink.h (100%)
 rename include/{ => uapi}/linux/netlink_diag.h (100%)
 rename include/{ => uapi}/linux/packet_diag.h (100%)
 rename include/{ => uapi}/linux/param.h (100%)
 rename include/{ => uapi}/linux/pfkeyv2.h (100%)
 rename include/{ => uapi}/linux/pkt_cls.h (100%)
 rename include/{ => uapi}/linux/pkt_sched.h (100%)
 rename include/{ => uapi}/linux/posix_types.h (100%)
 rename include/{ => uapi}/linux/rtnetlink.h (100%)
 rename include/{ => uapi}/linux/sctp.h (100%)
 rename include/{ => uapi}/linux/seg6.h (100%)
 rename include/{ => uapi}/linux/seg6_genl.h (100%)
 rename include/{ => uapi}/linux/seg6_hmac.h (100%)
 rename include/{ => uapi}/linux/seg6_iptunnel.h (100%)
 rename include/{ => uapi}/linux/seg6_local.h (100%)
 rename include/{ => uapi}/linux/sock_diag.h (100%)
 rename include/{ => uapi}/linux/socket.h (100%)
 rename include/{ => uapi}/linux/sockios.h (100%)
 rename include/{ => uapi}/linux/stddef.h (100%)
 rename include/{ => uapi}/linux/sysinfo.h (100%)
 rename include/{ => uapi}/linux/tc_act/tc_bpf.h (100%)
 rename include/{ => uapi}/linux/tc_act/tc_connmark.h (100%)
 rename include/{ => uapi}/linux/tc_act/tc_csum.h (100%)
 rename include/{ => uapi}/linux/tc_act/tc_defact.h (100%)
 rename include/{ => uapi}/linux/tc_act/tc_gact.h (100%)
 rename include/{ => uapi}/linux/tc_act/tc_ife.h (100%)
 rename include/{ => uapi}/linux/tc_act/tc_ipt.h (100%)
 rename include/{ => uapi}/linux/tc_act/tc_mirred.h (100%)
 rename include/{ => uapi}/linux/tc_act/tc_nat.h (100%)
 rename include/{ => uapi}/linux/tc_act/tc_pedit.h (100%)
 rename include/{ => uapi}/linux/tc_act/tc_sample.h (100%)
 rename include/{ => uapi}/linux/tc_act/tc_skbedit.h (100%)
 rename include/{ => uapi}/linux/tc_act/tc_skbmod.h (100%)
 rename include/{ => uapi}/linux/tc_act/tc_tunnel_key.h (100%)
 rename include/{ => uapi}/linux/tc_act/tc_vlan.h (100%)
 rename include/{ => uapi}/linux/tc_ematch/tc_em_cmp.h (100%)
 rename include/{ => uapi}/linux/tc_ematch/tc_em_meta.h (100%)
 rename include/{ => uapi}/linux/tc_ematch/tc_em_nbyte.h (100%)
 rename include/{ => uapi}/linux/tcp.h (100%)
 rename include/{ => uapi}/linux/tcp_metrics.h (100%)
 rename include/{ => uapi}/linux/tipc.h (100%)
 rename include/{ => uapi}/linux/tipc_netlink.h (100%)
 rename include/{ => uapi}/linux/types.h (100%)
 rename include/{ => uapi}/linux/unix_diag.h (100%)
 rename include/{ => uapi}/linux/veth.h (100%)
 rename include/{ => uapi}/linux/xfrm.h (100%)

diff --git a/Makefile b/Makefile
index 75c0e57006..6ad9610430 100644
--- a/Makefile
+++ b/Makefile
@@ -46,7 +46,7 @@ CCOPTS = -O2
 WFLAGS := -Wall -Wstrict-prototypes  -Wmissing-prototypes
 WFLAGS += -Wmissing-declarations -Wold-style-definition -Wformat=2
 
-CFLAGS := $(WFLAGS) $(CCOPTS) -I../include $(DEFINES) $(CFLAGS)
+CFLAGS := $(WFLAGS) $(CCOPTS) -I../include -I../include/uapi $(DEFINES) $(CFLAGS)
 YACCFLAGS = -d -t -v
 
 SUBDIRS=lib ip tc bridge misc netem genl tipc devlink rdma man
diff --git a/include/linux/atm.h b/include/uapi/linux/atm.h
similarity index 100%
rename from include/linux/atm.h
rename to include/uapi/linux/atm.h
diff --git a/include/linux/atmapi.h b/include/uapi/linux/atmapi.h
similarity index 100%
rename from include/linux/atmapi.h
rename to include/uapi/linux/atmapi.h
diff --git a/include/linux/atmarp.h b/include/uapi/linux/atmarp.h
similarity index 100%
rename from include/linux/atmarp.h
rename to include/uapi/linux/atmarp.h
diff --git a/include/linux/atmdev.h b/include/uapi/linux/atmdev.h
similarity index 100%
rename from include/linux/atmdev.h
rename to include/uapi/linux/atmdev.h
diff --git a/include/linux/atmioc.h b/include/uapi/linux/atmioc.h
similarity index 100%
rename from include/linux/atmioc.h
rename to include/uapi/linux/atmioc.h
diff --git a/include/linux/atmsap.h b/include/uapi/linux/atmsap.h
similarity index 100%
rename from include/linux/atmsap.h
rename to include/uapi/linux/atmsap.h
diff --git a/include/linux/bpf.h b/include/uapi/linux/bpf.h
similarity index 100%
rename from include/linux/bpf.h
rename to include/uapi/linux/bpf.h
diff --git a/include/linux/bpf_common.h b/include/uapi/linux/bpf_common.h
similarity index 100%
rename from include/linux/bpf_common.h
rename to include/uapi/linux/bpf_common.h
diff --git a/include/linux/can.h b/include/uapi/linux/can.h
similarity index 100%
rename from include/linux/can.h
rename to include/uapi/linux/can.h
diff --git a/include/linux/can/netlink.h b/include/uapi/linux/can/netlink.h
similarity index 100%
rename from include/linux/can/netlink.h
rename to include/uapi/linux/can/netlink.h
diff --git a/include/linux/can/vxcan.h b/include/uapi/linux/can/vxcan.h
similarity index 100%
rename from include/linux/can/vxcan.h
rename to include/uapi/linux/can/vxcan.h
diff --git a/include/linux/devlink.h b/include/uapi/linux/devlink.h
similarity index 100%
rename from include/linux/devlink.h
rename to include/uapi/linux/devlink.h
diff --git a/include/linux/elf-em.h b/include/uapi/linux/elf-em.h
similarity index 100%
rename from include/linux/elf-em.h
rename to include/uapi/linux/elf-em.h
diff --git a/include/linux/fib_rules.h b/include/uapi/linux/fib_rules.h
similarity index 100%
rename from include/linux/fib_rules.h
rename to include/uapi/linux/fib_rules.h
diff --git a/include/linux/filter.h b/include/uapi/linux/filter.h
similarity index 100%
rename from include/linux/filter.h
rename to include/uapi/linux/filter.h
diff --git a/include/linux/fou.h b/include/uapi/linux/fou.h
similarity index 100%
rename from include/linux/fou.h
rename to include/uapi/linux/fou.h
diff --git a/include/linux/gen_stats.h b/include/uapi/linux/gen_stats.h
similarity index 100%
rename from include/linux/gen_stats.h
rename to include/uapi/linux/gen_stats.h
diff --git a/include/linux/genetlink.h b/include/uapi/linux/genetlink.h
similarity index 100%
rename from include/linux/genetlink.h
rename to include/uapi/linux/genetlink.h
diff --git a/include/linux/hdlc/ioctl.h b/include/uapi/linux/hdlc/ioctl.h
similarity index 100%
rename from include/linux/hdlc/ioctl.h
rename to include/uapi/linux/hdlc/ioctl.h
diff --git a/include/linux/icmpv6.h b/include/uapi/linux/icmpv6.h
similarity index 100%
rename from include/linux/icmpv6.h
rename to include/uapi/linux/icmpv6.h
diff --git a/include/linux/if.h b/include/uapi/linux/if.h
similarity index 100%
rename from include/linux/if.h
rename to include/uapi/linux/if.h
diff --git a/include/linux/if_addr.h b/include/uapi/linux/if_addr.h
similarity index 100%
rename from include/linux/if_addr.h
rename to include/uapi/linux/if_addr.h
diff --git a/include/linux/if_addrlabel.h b/include/uapi/linux/if_addrlabel.h
similarity index 100%
rename from include/linux/if_addrlabel.h
rename to include/uapi/linux/if_addrlabel.h
diff --git a/include/linux/if_alg.h b/include/uapi/linux/if_alg.h
similarity index 100%
rename from include/linux/if_alg.h
rename to include/uapi/linux/if_alg.h
diff --git a/include/linux/if_arp.h b/include/uapi/linux/if_arp.h
similarity index 100%
rename from include/linux/if_arp.h
rename to include/uapi/linux/if_arp.h
diff --git a/include/linux/if_bonding.h b/include/uapi/linux/if_bonding.h
similarity index 100%
rename from include/linux/if_bonding.h
rename to include/uapi/linux/if_bonding.h
diff --git a/include/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
similarity index 100%
rename from include/linux/if_bridge.h
rename to include/uapi/linux/if_bridge.h
diff --git a/include/linux/if_ether.h b/include/uapi/linux/if_ether.h
similarity index 100%
rename from include/linux/if_ether.h
rename to include/uapi/linux/if_ether.h
diff --git a/include/linux/if_link.h b/include/uapi/linux/if_link.h
similarity index 100%
rename from include/linux/if_link.h
rename to include/uapi/linux/if_link.h
diff --git a/include/linux/if_macsec.h b/include/uapi/linux/if_macsec.h
similarity index 100%
rename from include/linux/if_macsec.h
rename to include/uapi/linux/if_macsec.h
diff --git a/include/linux/if_packet.h b/include/uapi/linux/if_packet.h
similarity index 100%
rename from include/linux/if_packet.h
rename to include/uapi/linux/if_packet.h
diff --git a/include/linux/if_tun.h b/include/uapi/linux/if_tun.h
similarity index 100%
rename from include/linux/if_tun.h
rename to include/uapi/linux/if_tun.h
diff --git a/include/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h
similarity index 100%
rename from include/linux/if_tunnel.h
rename to include/uapi/linux/if_tunnel.h
diff --git a/include/linux/if_vlan.h b/include/uapi/linux/if_vlan.h
similarity index 100%
rename from include/linux/if_vlan.h
rename to include/uapi/linux/if_vlan.h
diff --git a/include/linux/ife.h b/include/uapi/linux/ife.h
similarity index 100%
rename from include/linux/ife.h
rename to include/uapi/linux/ife.h
diff --git a/include/linux/ila.h b/include/uapi/linux/ila.h
similarity index 100%
rename from include/linux/ila.h
rename to include/uapi/linux/ila.h
diff --git a/include/linux/in.h b/include/uapi/linux/in.h
similarity index 100%
rename from include/linux/in.h
rename to include/uapi/linux/in.h
diff --git a/include/linux/in6.h b/include/uapi/linux/in6.h
similarity index 100%
rename from include/linux/in6.h
rename to include/uapi/linux/in6.h
diff --git a/include/linux/in_route.h b/include/uapi/linux/in_route.h
similarity index 100%
rename from include/linux/in_route.h
rename to include/uapi/linux/in_route.h
diff --git a/include/linux/inet_diag.h b/include/uapi/linux/inet_diag.h
similarity index 100%
rename from include/linux/inet_diag.h
rename to include/uapi/linux/inet_diag.h
diff --git a/include/linux/ip.h b/include/uapi/linux/ip.h
similarity index 100%
rename from include/linux/ip.h
rename to include/uapi/linux/ip.h
diff --git a/include/linux/ip6_tunnel.h b/include/uapi/linux/ip6_tunnel.h
similarity index 100%
rename from include/linux/ip6_tunnel.h
rename to include/uapi/linux/ip6_tunnel.h
diff --git a/include/linux/ipsec.h b/include/uapi/linux/ipsec.h
similarity index 100%
rename from include/linux/ipsec.h
rename to include/uapi/linux/ipsec.h
diff --git a/include/linux/kernel.h b/include/uapi/linux/kernel.h
similarity index 100%
rename from include/linux/kernel.h
rename to include/uapi/linux/kernel.h
diff --git a/include/linux/l2tp.h b/include/uapi/linux/l2tp.h
similarity index 100%
rename from include/linux/l2tp.h
rename to include/uapi/linux/l2tp.h
diff --git a/include/linux/libc-compat.h b/include/uapi/linux/libc-compat.h
similarity index 100%
rename from include/linux/libc-compat.h
rename to include/uapi/linux/libc-compat.h
diff --git a/include/linux/limits.h b/include/uapi/linux/limits.h
similarity index 100%
rename from include/linux/limits.h
rename to include/uapi/linux/limits.h
diff --git a/include/linux/lwtunnel.h b/include/uapi/linux/lwtunnel.h
similarity index 100%
rename from include/linux/lwtunnel.h
rename to include/uapi/linux/lwtunnel.h
diff --git a/include/linux/magic.h b/include/uapi/linux/magic.h
similarity index 100%
rename from include/linux/magic.h
rename to include/uapi/linux/magic.h
diff --git a/include/linux/mpls.h b/include/uapi/linux/mpls.h
similarity index 100%
rename from include/linux/mpls.h
rename to include/uapi/linux/mpls.h
diff --git a/include/linux/mpls_iptunnel.h b/include/uapi/linux/mpls_iptunnel.h
similarity index 100%
rename from include/linux/mpls_iptunnel.h
rename to include/uapi/linux/mpls_iptunnel.h
diff --git a/include/linux/neighbour.h b/include/uapi/linux/neighbour.h
similarity index 100%
rename from include/linux/neighbour.h
rename to include/uapi/linux/neighbour.h
diff --git a/include/linux/net_namespace.h b/include/uapi/linux/net_namespace.h
similarity index 100%
rename from include/linux/net_namespace.h
rename to include/uapi/linux/net_namespace.h
diff --git a/include/linux/netconf.h b/include/uapi/linux/netconf.h
similarity index 100%
rename from include/linux/netconf.h
rename to include/uapi/linux/netconf.h
diff --git a/include/linux/netdevice.h b/include/uapi/linux/netdevice.h
similarity index 100%
rename from include/linux/netdevice.h
rename to include/uapi/linux/netdevice.h
diff --git a/include/linux/netfilter.h b/include/uapi/linux/netfilter.h
similarity index 100%
rename from include/linux/netfilter.h
rename to include/uapi/linux/netfilter.h
diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h
similarity index 100%
rename from include/linux/netfilter/ipset/ip_set.h
rename to include/uapi/linux/netfilter/ipset/ip_set.h
diff --git a/include/linux/netfilter/x_tables.h b/include/uapi/linux/netfilter/x_tables.h
similarity index 100%
rename from include/linux/netfilter/x_tables.h
rename to include/uapi/linux/netfilter/x_tables.h
diff --git a/include/linux/netfilter/xt_set.h b/include/uapi/linux/netfilter/xt_set.h
similarity index 100%
rename from include/linux/netfilter/xt_set.h
rename to include/uapi/linux/netfilter/xt_set.h
diff --git a/include/linux/netfilter/xt_tcpudp.h b/include/uapi/linux/netfilter/xt_tcpudp.h
similarity index 100%
rename from include/linux/netfilter/xt_tcpudp.h
rename to include/uapi/linux/netfilter/xt_tcpudp.h
diff --git a/include/linux/netfilter_ipv4.h b/include/uapi/linux/netfilter_ipv4.h
similarity index 100%
rename from include/linux/netfilter_ipv4.h
rename to include/uapi/linux/netfilter_ipv4.h
diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/uapi/linux/netfilter_ipv4/ip_tables.h
similarity index 100%
rename from include/linux/netfilter_ipv4/ip_tables.h
rename to include/uapi/linux/netfilter_ipv4/ip_tables.h
diff --git a/include/linux/netfilter_ipv6.h b/include/uapi/linux/netfilter_ipv6.h
similarity index 100%
rename from include/linux/netfilter_ipv6.h
rename to include/uapi/linux/netfilter_ipv6.h
diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/uapi/linux/netfilter_ipv6/ip6_tables.h
similarity index 100%
rename from include/linux/netfilter_ipv6/ip6_tables.h
rename to include/uapi/linux/netfilter_ipv6/ip6_tables.h
diff --git a/include/linux/netlink.h b/include/uapi/linux/netlink.h
similarity index 100%
rename from include/linux/netlink.h
rename to include/uapi/linux/netlink.h
diff --git a/include/linux/netlink_diag.h b/include/uapi/linux/netlink_diag.h
similarity index 100%
rename from include/linux/netlink_diag.h
rename to include/uapi/linux/netlink_diag.h
diff --git a/include/linux/packet_diag.h b/include/uapi/linux/packet_diag.h
similarity index 100%
rename from include/linux/packet_diag.h
rename to include/uapi/linux/packet_diag.h
diff --git a/include/linux/param.h b/include/uapi/linux/param.h
similarity index 100%
rename from include/linux/param.h
rename to include/uapi/linux/param.h
diff --git a/include/linux/pfkeyv2.h b/include/uapi/linux/pfkeyv2.h
similarity index 100%
rename from include/linux/pfkeyv2.h
rename to include/uapi/linux/pfkeyv2.h
diff --git a/include/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
similarity index 100%
rename from include/linux/pkt_cls.h
rename to include/uapi/linux/pkt_cls.h
diff --git a/include/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
similarity index 100%
rename from include/linux/pkt_sched.h
rename to include/uapi/linux/pkt_sched.h
diff --git a/include/linux/posix_types.h b/include/uapi/linux/posix_types.h
similarity index 100%
rename from include/linux/posix_types.h
rename to include/uapi/linux/posix_types.h
diff --git a/include/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
similarity index 100%
rename from include/linux/rtnetlink.h
rename to include/uapi/linux/rtnetlink.h
diff --git a/include/linux/sctp.h b/include/uapi/linux/sctp.h
similarity index 100%
rename from include/linux/sctp.h
rename to include/uapi/linux/sctp.h
diff --git a/include/linux/seg6.h b/include/uapi/linux/seg6.h
similarity index 100%
rename from include/linux/seg6.h
rename to include/uapi/linux/seg6.h
diff --git a/include/linux/seg6_genl.h b/include/uapi/linux/seg6_genl.h
similarity index 100%
rename from include/linux/seg6_genl.h
rename to include/uapi/linux/seg6_genl.h
diff --git a/include/linux/seg6_hmac.h b/include/uapi/linux/seg6_hmac.h
similarity index 100%
rename from include/linux/seg6_hmac.h
rename to include/uapi/linux/seg6_hmac.h
diff --git a/include/linux/seg6_iptunnel.h b/include/uapi/linux/seg6_iptunnel.h
similarity index 100%
rename from include/linux/seg6_iptunnel.h
rename to include/uapi/linux/seg6_iptunnel.h
diff --git a/include/linux/seg6_local.h b/include/uapi/linux/seg6_local.h
similarity index 100%
rename from include/linux/seg6_local.h
rename to include/uapi/linux/seg6_local.h
diff --git a/include/linux/sock_diag.h b/include/uapi/linux/sock_diag.h
similarity index 100%
rename from include/linux/sock_diag.h
rename to include/uapi/linux/sock_diag.h
diff --git a/include/linux/socket.h b/include/uapi/linux/socket.h
similarity index 100%
rename from include/linux/socket.h
rename to include/uapi/linux/socket.h
diff --git a/include/linux/sockios.h b/include/uapi/linux/sockios.h
similarity index 100%
rename from include/linux/sockios.h
rename to include/uapi/linux/sockios.h
diff --git a/include/linux/stddef.h b/include/uapi/linux/stddef.h
similarity index 100%
rename from include/linux/stddef.h
rename to include/uapi/linux/stddef.h
diff --git a/include/linux/sysinfo.h b/include/uapi/linux/sysinfo.h
similarity index 100%
rename from include/linux/sysinfo.h
rename to include/uapi/linux/sysinfo.h
diff --git a/include/linux/tc_act/tc_bpf.h b/include/uapi/linux/tc_act/tc_bpf.h
similarity index 100%
rename from include/linux/tc_act/tc_bpf.h
rename to include/uapi/linux/tc_act/tc_bpf.h
diff --git a/include/linux/tc_act/tc_connmark.h b/include/uapi/linux/tc_act/tc_connmark.h
similarity index 100%
rename from include/linux/tc_act/tc_connmark.h
rename to include/uapi/linux/tc_act/tc_connmark.h
diff --git a/include/linux/tc_act/tc_csum.h b/include/uapi/linux/tc_act/tc_csum.h
similarity index 100%
rename from include/linux/tc_act/tc_csum.h
rename to include/uapi/linux/tc_act/tc_csum.h
diff --git a/include/linux/tc_act/tc_defact.h b/include/uapi/linux/tc_act/tc_defact.h
similarity index 100%
rename from include/linux/tc_act/tc_defact.h
rename to include/uapi/linux/tc_act/tc_defact.h
diff --git a/include/linux/tc_act/tc_gact.h b/include/uapi/linux/tc_act/tc_gact.h
similarity index 100%
rename from include/linux/tc_act/tc_gact.h
rename to include/uapi/linux/tc_act/tc_gact.h
diff --git a/include/linux/tc_act/tc_ife.h b/include/uapi/linux/tc_act/tc_ife.h
similarity index 100%
rename from include/linux/tc_act/tc_ife.h
rename to include/uapi/linux/tc_act/tc_ife.h
diff --git a/include/linux/tc_act/tc_ipt.h b/include/uapi/linux/tc_act/tc_ipt.h
similarity index 100%
rename from include/linux/tc_act/tc_ipt.h
rename to include/uapi/linux/tc_act/tc_ipt.h
diff --git a/include/linux/tc_act/tc_mirred.h b/include/uapi/linux/tc_act/tc_mirred.h
similarity index 100%
rename from include/linux/tc_act/tc_mirred.h
rename to include/uapi/linux/tc_act/tc_mirred.h
diff --git a/include/linux/tc_act/tc_nat.h b/include/uapi/linux/tc_act/tc_nat.h
similarity index 100%
rename from include/linux/tc_act/tc_nat.h
rename to include/uapi/linux/tc_act/tc_nat.h
diff --git a/include/linux/tc_act/tc_pedit.h b/include/uapi/linux/tc_act/tc_pedit.h
similarity index 100%
rename from include/linux/tc_act/tc_pedit.h
rename to include/uapi/linux/tc_act/tc_pedit.h
diff --git a/include/linux/tc_act/tc_sample.h b/include/uapi/linux/tc_act/tc_sample.h
similarity index 100%
rename from include/linux/tc_act/tc_sample.h
rename to include/uapi/linux/tc_act/tc_sample.h
diff --git a/include/linux/tc_act/tc_skbedit.h b/include/uapi/linux/tc_act/tc_skbedit.h
similarity index 100%
rename from include/linux/tc_act/tc_skbedit.h
rename to include/uapi/linux/tc_act/tc_skbedit.h
diff --git a/include/linux/tc_act/tc_skbmod.h b/include/uapi/linux/tc_act/tc_skbmod.h
similarity index 100%
rename from include/linux/tc_act/tc_skbmod.h
rename to include/uapi/linux/tc_act/tc_skbmod.h
diff --git a/include/linux/tc_act/tc_tunnel_key.h b/include/uapi/linux/tc_act/tc_tunnel_key.h
similarity index 100%
rename from include/linux/tc_act/tc_tunnel_key.h
rename to include/uapi/linux/tc_act/tc_tunnel_key.h
diff --git a/include/linux/tc_act/tc_vlan.h b/include/uapi/linux/tc_act/tc_vlan.h
similarity index 100%
rename from include/linux/tc_act/tc_vlan.h
rename to include/uapi/linux/tc_act/tc_vlan.h
diff --git a/include/linux/tc_ematch/tc_em_cmp.h b/include/uapi/linux/tc_ematch/tc_em_cmp.h
similarity index 100%
rename from include/linux/tc_ematch/tc_em_cmp.h
rename to include/uapi/linux/tc_ematch/tc_em_cmp.h
diff --git a/include/linux/tc_ematch/tc_em_meta.h b/include/uapi/linux/tc_ematch/tc_em_meta.h
similarity index 100%
rename from include/linux/tc_ematch/tc_em_meta.h
rename to include/uapi/linux/tc_ematch/tc_em_meta.h
diff --git a/include/linux/tc_ematch/tc_em_nbyte.h b/include/uapi/linux/tc_ematch/tc_em_nbyte.h
similarity index 100%
rename from include/linux/tc_ematch/tc_em_nbyte.h
rename to include/uapi/linux/tc_ematch/tc_em_nbyte.h
diff --git a/include/linux/tcp.h b/include/uapi/linux/tcp.h
similarity index 100%
rename from include/linux/tcp.h
rename to include/uapi/linux/tcp.h
diff --git a/include/linux/tcp_metrics.h b/include/uapi/linux/tcp_metrics.h
similarity index 100%
rename from include/linux/tcp_metrics.h
rename to include/uapi/linux/tcp_metrics.h
diff --git a/include/linux/tipc.h b/include/uapi/linux/tipc.h
similarity index 100%
rename from include/linux/tipc.h
rename to include/uapi/linux/tipc.h
diff --git a/include/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h
similarity index 100%
rename from include/linux/tipc_netlink.h
rename to include/uapi/linux/tipc_netlink.h
diff --git a/include/linux/types.h b/include/uapi/linux/types.h
similarity index 100%
rename from include/linux/types.h
rename to include/uapi/linux/types.h
diff --git a/include/linux/unix_diag.h b/include/uapi/linux/unix_diag.h
similarity index 100%
rename from include/linux/unix_diag.h
rename to include/uapi/linux/unix_diag.h
diff --git a/include/linux/veth.h b/include/uapi/linux/veth.h
similarity index 100%
rename from include/linux/veth.h
rename to include/uapi/linux/veth.h
diff --git a/include/linux/xfrm.h b/include/uapi/linux/xfrm.h
similarity index 100%
rename from include/linux/xfrm.h
rename to include/uapi/linux/xfrm.h
-- 
2.14.2.822.g60be5d43e6-goog

^ permalink raw reply related

* Re: [PATCH 00/18] use ARRAY_SIZE macro
From: Zhi Wang @ 2017-10-02 17:05 UTC (permalink / raw)
  To: Jérémy Lefaure
  Cc: alsa-devel, nouveau, dri-devel, dm-devel, brcm80211-dev-list,
	devel, linux-scsi, linux-rdma, amd-gfx, Jason Gunthorpe,
	linux-acpi, linux-video, intel-wired-lan, linux-media, intel-gfx,
	ecryptfs, linux-nfs, linux-raid, openipmi-developer,
	intel-gvt-dev, devel, brcm80211-dev-list.pdl, netdev, linux-usb,
	linux-wireless, linux-kernel, linux-integrity
In-Reply-To: <20171001193101.8898-1-jeremy.lefaure@lse.epita.fr>


[-- Attachment #1.1: Type: text/plain, Size: 1908 bytes --]

Thanks for the patch! :)

2017-10-01 22:30 GMT+03:00 Jérémy Lefaure <jeremy.lefaure@lse.epita.fr>:

> Hi everyone,
> Using ARRAY_SIZE improves the code readability. I used coccinelle (I
> made a change to the array_size.cocci file [1]) to find several places
> where ARRAY_SIZE could be used instead of other macros or sizeof
> division.
>
> I tried to divide the changes into a patch per subsystem (excepted for
> staging). If one of the patch should be split into several patches, let
> me know.
>
> In order to reduce the size of the To: and Cc: lines, each patch of the
> series is sent only to the maintainers and lists concerned by the patch.
> This cover letter is sent to every list concerned by this series.
>
> This series is based on linux-next next-20170929. Each patch has been
> tested by building the relevant files with W=1.
>
> This series contains the following patches:
> [PATCH 01/18] sound: use ARRAY_SIZE
> [PATCH 02/18] tracing/filter: use ARRAY_SIZE
> [PATCH 03/18] media: use ARRAY_SIZE
> [PATCH 04/18] IB/mlx5: Use ARRAY_SIZE
> [PATCH 05/18] net: use ARRAY_SIZE
> [PATCH 06/18] drm: use ARRAY_SIZE
> [PATCH 07/18] scsi: bfa: use ARRAY_SIZE
> [PATCH 08/18] ecryptfs: use ARRAY_SIZE
> [PATCH 09/18] nfsd: use ARRAY_SIZE
> [PATCH 10/18] orangefs: use ARRAY_SIZE
> [PATCH 11/18] dm space map metadata: use ARRAY_SIZE
> [PATCH 12/18] x86: use ARRAY_SIZE
> [PATCH 13/18] tpm: use ARRAY_SIZE
> [PATCH 14/18] ipmi: use ARRAY_SIZE
> [PATCH 15/18] acpi: use ARRAY_SIZE
> [PATCH 16/18] media: staging: atomisp: use ARRAY_SIZE
> [PATCH 17/18] staging: rtl8723bs: use ARRAY_SIZE
> [PATCH 18/18] staging: rtlwifi: use ARRAY_SIZE
>
>
> [1]: https://lkml.org/lkml/2017/9/13/689
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
>

[-- Attachment #1.2: Type: text/html, Size: 2555 bytes --]

[-- Attachment #2: Type: text/plain, Size: 160 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply

* Re: [next-queue PATCH v2 3/5] net/sched: Introduce the user API for the CBS shaper
From: Vinicius Costa Gomes @ 2017-10-02 17:07 UTC (permalink / raw)
  To: Cong Wang
  Cc: Linux Kernel Network Developers, intel-wired-lan,
	Jamal Hadi Salim, Jiri Pirko, andre.guedes, Ivan Briano,
	Jesus Sanchez-Palencia, boon.leong.ong, richardcochran,
	Henrik Austad, levipearson, rodney.cummings
In-Reply-To: <CAM_iQpWGHE7hgwEZDO+oRGgWdrdYYofnHfuQq3fMOO-yFj7NSw@mail.gmail.com>

Hi,

Cong Wang <xiyou.wangcong@gmail.com> writes:

> On Fri, Sep 29, 2017 at 5:26 PM, Vinicius Costa Gomes
> <vinicius.gomes@intel.com> wrote:
>> Export the API necessary for configuring the CBS shaper (implemented
>> in the next patch) via the tc tool.
>
> This one can be folded into patch 4/5.

Will do.


Cheers,

^ permalink raw reply

* Re: [kernel-hardening] [PATCH 0/2] capability controlled user-namespaces
From: Serge E. Hallyn @ 2017-10-02 17:14 UTC (permalink / raw)
  To: Mahesh Bandewar
  Cc: LKML, Netdev, Kernel-hardening, Linux API, Kees Cook,
	Serge Hallyn, Eric W . Biederman, Eric Dumazet, David Miller,
	Mahesh Bandewar
In-Reply-To: <20170929230952.29673-1-mahesh-bmGAjcP2qsnk1uMJSBkQmQ@public.gmane.org>

Quoting Mahesh Bandewar (mahesh-bmGAjcP2qsnk1uMJSBkQmQ@public.gmane.org):
> From: Mahesh Bandewar <maheshb-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
> 
> [Same as the previous RFC series sent on 9/21]
> 
> TL;DR version
> -------------
> Creating a sandbox environment with namespaces is challenging
> considering what these sandboxed processes can engage into. e.g.
> CVE-2017-6074, CVE-2017-7184, CVE-2017-7308 etc. just to name few.
> Current form of user-namespaces, however, if changed a bit can allow
> us to create a sandbox environment without locking down user-
> namespaces.
> 
> Detailed version
> ----------------

Hi,

still struggling with how I feel about the idea in general.

So is the intent mainly that if/when there comes an 0-day which allows
users with CAP_NET_ADMIN in any namespace to gain privilege on the host,
then this can be used as a stop-gap measure until there is a proper fix?

Otherwise, do you have any guidance for how people should use this?

IMO it should be heavily discouraged to use this tool as a regular
day to day configuration, as I'm not sure there is any "educated"
decision to be made, even by those who are in the know, about what
to put in this set.

> Problem
> -------
> User-namespaces in the current form have increased the attack surface as
> any process can acquire capabilities which are not available to them (by
> default) by performing combination of clone()/unshare()/setns() syscalls.
> 
>     #define _GNU_SOURCE
>     #include <stdio.h>
>     #include <sched.h>
>     #include <netinet/in.h>
> 
>     int main(int ac, char **av)
>     {
>         int sock = -1;
> 
>         printf("Attempting to open RAW socket before unshare()...\n");
>         sock = socket(AF_INET6, SOCK_RAW, IPPROTO_RAW);
>         if (sock < 0) {
>             perror("socket() SOCK_RAW failed: ");
>         } else {
>             printf("Successfully opened RAW-Sock before unshare().\n");
>             close(sock);
>             sock = -1;
>         }
> 
>         if (unshare(CLONE_NEWUSER | CLONE_NEWNET) < 0) {
>             perror("unshare() failed: ");
>             return 1;
>         }
> 
>         printf("Attempting to open RAW socket after unshare()...\n");
>         sock = socket(AF_INET6, SOCK_RAW, IPPROTO_RAW);
>         if (sock < 0) {
>             perror("socket() SOCK_RAW failed: ");
>         } else {
>             printf("Successfully opened RAW-Sock after unshare().\n");
>             close(sock);
>             sock = -1;
>         }
> 
>         return 0;
>     }
> 
> The above example shows how easy it is to acquire NET_RAW capabilities
> and once acquired, these processes could take benefit of above mentioned
> or similar issues discovered/undiscovered with malicious intent. Note
> that this is just an example and the problem/solution is not limited
> to NET_RAW capability *only*. 
> 
> The easiest fix one can apply here is to lock-down user-namespaces which
> many of the distros do (i.e. don't allow users to create user namespaces),
> but unfortunately that prevents everyone from using them.
> 
> Approach
> --------
> Introduce a notion of 'controlled' user-namespaces. Every process on
> the host is allowed to create user-namespaces (governed by the limit
> imposed by per-ns sysctl) however, mark user-namespaces created by
> sandboxed processes as 'controlled'. Use this 'mark' at the time of
> capability check in conjunction with a global capability whitelist.
> If the capability is not whitelisted, processes that belong to 
> controlled user-namespaces will not be allowed.
> 
> Once a user-ns is marked as 'controlled'; all its child user-
> namespaces are marked as 'controlled' too.
> 
> A global whitelist is list of capabilities governed by the
> sysctl which is available to (privileged) user in init-ns to modify
> while it's applicable to all controlled user-namespaces on the host.
> 
> Marking user-namespaces controlled without modifying the whitelist is
> equivalent of the current behavior. The default value of whitelist includes
> all capabilities so that the compatibility is maintained. However it gives
> admins fine-grained ability to control various capabilities system wide
> without locking down user-namespaces.
> 
> Please see individual patches in this series.
> 
> Mahesh Bandewar (2):
>   capability: introduce sysctl for controlled user-ns capability
>     whitelist
>   userns: control capabilities of some user namespaces
> 
>  Documentation/sysctl/kernel.txt | 21 +++++++++++++++++
>  include/linux/capability.h      |  4 ++++
>  include/linux/user_namespace.h  | 20 ++++++++++++++++
>  kernel/capability.c             | 52 +++++++++++++++++++++++++++++++++++++++++
>  kernel/sysctl.c                 |  5 ++++
>  kernel/user_namespace.c         |  3 +++
>  security/commoncap.c            |  8 +++++++
>  7 files changed, 113 insertions(+)
> 
> -- 
> 2.14.2.822.g60be5d43e6-goog

^ permalink raw reply

* Re: [PATCH V4] r8152: add Linksys USB3GIGV1 id
From: Grant Grundler @ 2017-10-02 17:21 UTC (permalink / raw)
  To: David Miller
  Cc: Grant Grundler, Hayes Wang, Oliver Neukum,
	linux-usb@vger.kernel.org, LKML, netdev
In-Reply-To: <20171001.223954.160035131695050852.davem@davemloft.net>

On Sun, Oct 1, 2017 at 10:39 PM, David Miller <davem@davemloft.net> wrote:
> From: Grant Grundler <grundler@chromium.org>
> Date: Thu, 28 Sep 2017 11:35:00 -0700
>
>> This linksys dongle by default comes up in cdc_ether mode.
>> This patch allows r8152 to claim the device:
>>    Bus 002 Device 002: ID 13b1:0041 Linksys
>>
>> Signed-off-by: Grant Grundler <grundler@chromium.org>
>
> Applied, thanks.

thanks David, Doug, Oliver! :)

cheers,
grant

^ permalink raw reply

* Re: v4.14-rc2/arm64 kernel BUG at net/core/skbuff.c:2626
From: Mark Rutland @ 2017-10-02 17:21 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Eric Dumazet, LKML, netdev, linux-arm-kernel, syzkaller,
	David S. Miller, Willem de Bruijn
In-Reply-To: <1506955708.8061.5.camel@edumazet-glaptop3.roam.corp.google.com>

On Mon, Oct 02, 2017 at 07:48:28AM -0700, Eric Dumazet wrote:
> Please try the following fool proof patch.
>
> This is what I had in my local tree back in August but could not
> conclude on the syzkaller bug I was working on.
> 
> diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
> index 681e33998e03b609fdca83a83e0fc62a3fee8c39..e51d777797a927058760a1ab7af00579f7488cb5 100644
> --- a/net/ipv4/icmp.c
> +++ b/net/ipv4/icmp.c
> @@ -732,7 +732,8 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
>  		room = 576;
>  	room -= sizeof(struct iphdr) + icmp_param.replyopts.opt.opt.optlen;
>  	room -= sizeof(struct icmphdr);
> -
> +	if (room < 0)
> +		goto ende;
>  	icmp_param.data_len = skb_in->len - icmp_param.offset;
>  	if (icmp_param.data_len > room)
>  		icmp_param.data_len = room;
> 

Unfortuantely, with this applied I still see the issue.

Syzkaller came up with a minimized reproducer [1], which can trigger the
issue near instantly under syz-execprog. If there's anything that would
help to narrow this down, I'm more than happy to give it a go.

Thanks,
Mark.

[1] https://www.kernel.org/pub/linux/kernel/people/mark/bugs/20171002-skb_clone-misaligned-atomic/syzkaller.repro

^ permalink raw reply

* Re: v4.14-rc2/arm64 kernel BUG at net/core/skbuff.c:2626
From: Eric Dumazet @ 2017-10-02 17:27 UTC (permalink / raw)
  To: Mark Rutland
  Cc: Eric Dumazet, LKML, netdev, linux-arm-kernel, syzkaller,
	David S. Miller, Willem de Bruijn
In-Reply-To: <20171002172131.GA3360@leverpostej>

On Mon, Oct 2, 2017 at 10:21 AM, Mark Rutland <mark.rutland@arm.com> wrote:
> On Mon, Oct 02, 2017 at 07:48:28AM -0700, Eric Dumazet wrote:
>> Please try the following fool proof patch.
>>
>> This is what I had in my local tree back in August but could not
>> conclude on the syzkaller bug I was working on.
>>
>> diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
>> index 681e33998e03b609fdca83a83e0fc62a3fee8c39..e51d777797a927058760a1ab7af00579f7488cb5 100644
>> --- a/net/ipv4/icmp.c
>> +++ b/net/ipv4/icmp.c
>> @@ -732,7 +732,8 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
>>               room = 576;
>>       room -= sizeof(struct iphdr) + icmp_param.replyopts.opt.opt.optlen;
>>       room -= sizeof(struct icmphdr);
>> -
>> +     if (room < 0)
>> +             goto ende;
>>       icmp_param.data_len = skb_in->len - icmp_param.offset;
>>       if (icmp_param.data_len > room)
>>               icmp_param.data_len = room;
>>
>
> Unfortuantely, with this applied I still see the issue.
>
> Syzkaller came up with a minimized reproducer [1], which can trigger the
> issue near instantly under syz-execprog. If there's anything that would
> help to narrow this down, I'm more than happy to give it a go.
>
> Thanks,
> Mark.
>
> [1] https://www.kernel.org/pub/linux/kernel/people/mark/bugs/20171002-skb_clone-misaligned-atomic/syzkaller.repro

Note that I was not trying to address the misaligned stuff.

Only this :

------------[ cut here ]------------
kernel BUG at net/core/skbuff.c:2626!
Internal error: Oops - BUG: 0 [#1] PREEMPT SMP
Modules linked in:
CPU: 3 PID: 0 Comm: swapper/3 Not tainted 4.14.0-rc2-00001-gd7ad33d #115
Hardware name: linux,dummy-virt (DT)
task: ffff80003a901a80 task.stack: ffff80003a908000
PC is at skb_copy_and_csum_bits+0x8dc/0xae0 net/core/skbuff.c:2626
LR is at skb_copy_and_csum_bits+0x8dc/0xae0 net/core/skbuff.c:2626

^ permalink raw reply

* Re: v4.14-rc2/arm64 kernel BUG at net/core/skbuff.c:2626
From: Mark Rutland @ 2017-10-02 17:34 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Eric Dumazet, LKML, netdev, linux-arm-kernel, syzkaller,
	David S. Miller, Willem de Bruijn
In-Reply-To: <CANn89iKXGx2AmaYtqaD_CTvxgG2xC6vbuuNigixtUM82fExODQ@mail.gmail.com>

On Mon, Oct 02, 2017 at 10:27:15AM -0700, Eric Dumazet wrote:
> On Mon, Oct 2, 2017 at 10:21 AM, Mark Rutland <mark.rutland@arm.com> wrote:
> > On Mon, Oct 02, 2017 at 07:48:28AM -0700, Eric Dumazet wrote:
> >> Please try the following fool proof patch.
> >>
> >> This is what I had in my local tree back in August but could not
> >> conclude on the syzkaller bug I was working on.
> >>
> >> diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
> >> index 681e33998e03b609fdca83a83e0fc62a3fee8c39..e51d777797a927058760a1ab7af00579f7488cb5 100644
> >> --- a/net/ipv4/icmp.c
> >> +++ b/net/ipv4/icmp.c
> >> @@ -732,7 +732,8 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
> >>               room = 576;
> >>       room -= sizeof(struct iphdr) + icmp_param.replyopts.opt.opt.optlen;
> >>       room -= sizeof(struct icmphdr);
> >> -
> >> +     if (room < 0)
> >> +             goto ende;
> >>       icmp_param.data_len = skb_in->len - icmp_param.offset;
> >>       if (icmp_param.data_len > room)
> >>               icmp_param.data_len = room;
> >>
> >
> > Unfortuantely, with this applied I still see the issue.
> >
> > Syzkaller came up with a minimized reproducer [1], which can trigger the
> > issue near instantly under syz-execprog. If there's anything that would
> > help to narrow this down, I'm more than happy to give it a go.
> >
> > Thanks,
> > Mark.
> >
> > [1] https://www.kernel.org/pub/linux/kernel/people/mark/bugs/20171002-skb_clone-misaligned-atomic/syzkaller.repro
> 
> Note that I was not trying to address the misaligned stuff.

Aargh, I put the reproducer in the wrong folder thanks to tab-completing
my kup command. :/

The reproducer linked above is for the kernel BUG at
net/core/skbuff.c:2626.

I've uploaded a copy into the relevant bug directory [1], but that'll
take a little while to sync out. I'll drop it from the misalignment bug
folder once that's visible to all.

Sorry about that!

Thanks,
Mark.

[1] https://www.kernel.org/pub/linux/kernel/people/mark/bugs/20171002-skbuff-bug/

^ permalink raw reply

* Re: [PATCH iproute2] iproute: build more easily on Android
From: Stephen Hemminger @ 2017-10-02 17:36 UTC (permalink / raw)
  To: Lorenzo Colitti; +Cc: netdev, enh
In-Reply-To: <20171002170337.42235-1-lorenzo@google.com>

On Tue,  3 Oct 2017 02:03:37 +0900
Lorenzo Colitti <lorenzo@google.com> wrote:

> iproute2 contains a bunch of kernel headers, including uapi ones.
> Android's libc uses uapi headers almost directly, and uses a
> script to fix kernel types that don't match what userspace
> expects.
> 
> For example: https://issuetracker.google.com/36987220 reports
> that our struct ip_mreq_source contains "__be32 imr_multiaddr"
> rather than "struct in_addr imr_multiaddr". The script addresses
> this by replacing the uapi struct definition with a #include
> <bits/ip_mreq.h> which contains the traditional userspace
> definition.
> 
> Unfortunately, when we compile iproute2, this definition
> conflicts with the one in iproute2's linux/in.h.
> 
> Historically we've just solved this problem by running "git rm"
> on all the iproute2 include/linux headers that break Android's
> libc.  However, deleting the files in this way makes it harder to
> keep up with upstream, because every upstream change to
> an include file causes a merge conflict with the delete.
> 
> This patch fixes the problem by moving the iproute2 linux headers
> from include/linux to include/uapi/linux.
> 
> Tested: compiles on ubuntu trusty (glibc)
> 
> Signed-off-by: Elliott Hughes <enh@google.com>
> Signed-off-by: Lorenzo Colitti <lorenzo@google.com>

Rather than moving everything, why not make kernel headers directory
configurable as part of the configure script setup process.

^ permalink raw reply

* Re: [PATCHv4 iproute2 2/2] lib/libnetlink: update rtnl_talk to support malloc buff at run time
From: Stephen Hemminger @ 2017-10-02 17:37 UTC (permalink / raw)
  To: Hangbin Liu; +Cc: netdev, Michal Kubecek, Phil Sutter, Hangbin Liu
In-Reply-To: <1506605626-1744-3-git-send-email-haliu@redhat.com>

On Thu, 28 Sep 2017 21:33:46 +0800
Hangbin Liu <haliu@redhat.com> wrote:

> From: Hangbin Liu <liuhangbin@gmail.com>
> 
> This is an update for 460c03f3f3cc ("iplink: double the buffer size also in
> iplink_get()"). After update, we will not need to double the buffer size
> every time when VFs number increased.
> 
> With call like rtnl_talk(&rth, &req.n, NULL, 0), we can simply remove the
> length parameter.
> 
> With call like rtnl_talk(&rth, nlh, nlh, sizeof(req), I add a new variable
> answer to avoid overwrite data in nlh, because it may has more info after
> nlh. also this will avoid nlh buffer not enough issue.
> 
> We need to free answer after using.
> 
> Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
> Signed-off-by: Phil Sutter <phil@nwl.cc>
> ---

Most of the uses of rtnl_talk() don't need to this peek and dynamic sizing.
Can only those places that need that be targeted?

^ permalink raw reply

* Re: [PATCH net-next 01/12] qed: Add ll2 option to limit the number of bds per packet
From: David Miller @ 2017-10-02 17:56 UTC (permalink / raw)
  To: Michal.Kalderon-YGCgFSpz5w/QT0dZR+AlfA
  Cc: netdev-u79uwXL29TY76Z2rM5mHXA, linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	dledford-H+wXaHxf7aLQT0dZR+AlfA,
	Ariel.Elior-YGCgFSpz5w/QT0dZR+AlfA
In-Reply-To: <1506932638-26268-2-git-send-email-Michal.Kalderon-YGCgFSpz5w/QT0dZR+AlfA@public.gmane.org>

From: Michal Kalderon <Michal.Kalderon-YGCgFSpz5w/QT0dZR+AlfA@public.gmane.org>
Date: Mon, 2 Oct 2017 11:23:47 +0300

> +		p_pkt = (void *)((u8 *)p_tx->descq_array + desc_size * i);

Hmmm... this is definitely a red flag.

> @@ -63,17 +63,14 @@ struct qed_ll2_rx_packet {
>  struct qed_ll2_tx_packet {
>  	struct list_head list_entry;
>  	u16 bd_used;
> -	u16 vlan;
> -	u16 l4_hdr_offset_w;
> -	u8 bd_flags;
>  	bool notify_fw;
>  	void *cookie;
> -
> +	/* Flexible Array of bds_set determined by max_bds_per_packet */
>  	struct {
>  		struct core_tx_bd *txq_bd;
>  		dma_addr_t tx_frag;
>  		u16 frag_len;
> -	} bds_set[ETH_TX_MAX_BDS_PER_NON_LSO_PACKET];
> +	} bds_set[1];
>  };

If you do this then you have to make the ->descq_array a void pointer
or something.

Otherwise someone will try to access it as an array and it will
explode because the elements of the array are of a variable size.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH net-next 02/12] qed: Add ll2 ability of opening a secondary queue
From: David Miller @ 2017-10-02 17:56 UTC (permalink / raw)
  To: Michal.Kalderon-YGCgFSpz5w/QT0dZR+AlfA
  Cc: netdev-u79uwXL29TY76Z2rM5mHXA, linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	dledford-H+wXaHxf7aLQT0dZR+AlfA,
	Ariel.Elior-YGCgFSpz5w/QT0dZR+AlfA
In-Reply-To: <1506932638-26268-3-git-send-email-Michal.Kalderon-YGCgFSpz5w/QT0dZR+AlfA@public.gmane.org>

From: Michal Kalderon <Michal.Kalderon-YGCgFSpz5w/QT0dZR+AlfA@public.gmane.org>
Date: Mon, 2 Oct 2017 11:23:48 +0300

> When more than one ll2 queue is opened ( that is not an OOO queue )
> ll2 code does not have enough information to determine whether
> the queue is the main one or not, so a new field is added to the
> acquire input data to expose the control of determining whether
> the queue is the main queue or a secondary queue.
> 
> Signed-off-by: Michal Kalderon <Michal.Kalderon-YGCgFSpz5w/QT0dZR+AlfA@public.gmane.org>
> Signed-off-by: Ariel Elior <Ariel.Elior-YGCgFSpz5w/QT0dZR+AlfA@public.gmane.org>
> ---
>  drivers/net/ethernet/qlogic/qed/qed_ll2.c | 7 ++++++-
>  drivers/net/ethernet/qlogic/qed/qed_ll2.h | 1 +
>  include/linux/qed/qed_ll2_if.h            | 1 +
>  3 files changed, 8 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
> index 10e3a43..1dd0cca 100644
> --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c
> +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
> @@ -894,7 +894,7 @@ static int qed_sp_ll2_rx_queue_start(struct qed_hwfn *p_hwfn,
>  	p_ramrod->drop_ttl0_flg = p_ll2_conn->input.rx_drop_ttl0_flg;
>  	p_ramrod->inner_vlan_removal_en = p_ll2_conn->input.rx_vlan_removal_en;
>  	p_ramrod->queue_id = p_ll2_conn->queue_id;
> -	p_ramrod->main_func_queue = (conn_type == QED_LL2_TYPE_OOO) ? 0 : 1;
> +	p_ramrod->main_func_queue = p_ll2_conn->main_func_queue;
>  
>  	if ((IS_MF_DEFAULT(p_hwfn) || IS_MF_SI(p_hwfn)) &&
>  	    p_ramrod->main_func_queue && (conn_type != QED_LL2_TYPE_ROCE) &&
> @@ -1265,6 +1265,11 @@ int qed_ll2_acquire_connection(void *cxt, struct qed_ll2_acquire_data *data)
>  
>  	p_ll2_info->tx_dest = (data->input.tx_dest == QED_LL2_TX_DEST_NW) ?
>  			      CORE_TX_DEST_NW : CORE_TX_DEST_LB;
> +	if (data->input.conn_type == QED_LL2_TYPE_OOO ||
> +	    data->input.secondary_queue)
> +		p_ll2_info->main_func_queue = false;
> +	else
> +		p_ll2_info->main_func_queue = true;
 ...
> +	u8 main_func_queue;

If these things are bools please use the 'bool' type.

Thank you.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH net-next 0/3] bridge: neigh msg proxy and flood suppression support
From: Stephen Hemminger @ 2017-10-02 18:02 UTC (permalink / raw)
  To: Roopa Prabhu
  Cc: davem@davemloft.net, netdev@vger.kernel.org, Nikolay Aleksandrov,
	bridge
In-Reply-To: <CAJieiUim2XLMGAomb3S5AeWfYqjxV_raetedWcA_PBiaGPRHWg@mail.gmail.com>

On Mon, 2 Oct 2017 07:49:09 -0700
Roopa Prabhu <roopa@cumulusnetworks.com> wrote:

> On Sun, Oct 1, 2017 at 9:36 PM, Roopa Prabhu <roopa@cumulusnetworks.com> wrote:
> > From: Roopa Prabhu <roopa@cumulusnetworks.com>
> >
> > This series implements arp and nd suppression in the bridge
> > driver for ethernet vpns. It implements rfc7432, section 10
> > https://tools.ietf.org/html/rfc7432#section-10
> > for ethernet VPN deployments. It is similar to the existing
> > BR_ARP_PROXY flag but has a few semantic differences to conform
> > to EVPN standard. In case of EVPN, it is mainly used to avoid flooding to
> > tunnel ports like vxlan/mpls. Unlike the existing flags it suppresses flood
> > of all neigh discovery packets (arp, nd) to tunnel ports.
> >
> > Roopa Prabhu (3):
> >   bridge: add new BR_NEIGH_SUPPRESS port flag to suppress arp and nd
> >     flood
> >   neigh arp suppress first
> >   bridge: suppress nd messages from going to BR_NEIGH_SUPPRESS ports
> >  
> 
> pls ignore, shows conflict applying over recent net-next bridge
> changes. Will rebase and submit v2.

Ok, but the concept looks good.

^ permalink raw reply

* Re: [PATCH net-next 0/2] flow_dissector: dissect tunnel info
From: David Miller @ 2017-10-02 18:06 UTC (permalink / raw)
  To: simon.horman; +Cc: jiri, jhs, xiyou.wangcong, netdev, oss-drivers
In-Reply-To: <1506933676-20121-1-git-send-email-simon.horman@netronome.com>

From: Simon Horman <simon.horman@netronome.com>
Date: Mon,  2 Oct 2017 10:41:14 +0200

> Move dissection of tunnel info from the flower classifier to the flow
> dissector where all other dissection occurs.  This should not have any
> behavioural affect on other users of the flow dissector.

Series applied, thanks Simon.

^ permalink raw reply

* Re: [kernel-hardening] [PATCH 0/2] capability controlled user-namespaces
From: Mahesh Bandewar (महेश बंडेवार) @ 2017-10-02 18:12 UTC (permalink / raw)
  To: Serge E. Hallyn
  Cc: Mahesh Bandewar, LKML, Netdev, Kernel-hardening, Linux API,
	Kees Cook, Eric W . Biederman, Eric Dumazet, David Miller
In-Reply-To: <20171002171410.GA19611-7LNsyQBKDXoIagZqoN9o3w@public.gmane.org>

On Mon, Oct 2, 2017 at 10:14 AM, Serge E. Hallyn <serge-A9i7LUbDfNHQT0dZR+AlfA@public.gmane.org> wrote:
> Quoting Mahesh Bandewar (mahesh-bmGAjcP2qsnk1uMJSBkQmQ@public.gmane.org):
>> From: Mahesh Bandewar <maheshb-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
>>
>> [Same as the previous RFC series sent on 9/21]
>>
>> TL;DR version
>> -------------
>> Creating a sandbox environment with namespaces is challenging
>> considering what these sandboxed processes can engage into. e.g.
>> CVE-2017-6074, CVE-2017-7184, CVE-2017-7308 etc. just to name few.
>> Current form of user-namespaces, however, if changed a bit can allow
>> us to create a sandbox environment without locking down user-
>> namespaces.
>>
>> Detailed version
>> ----------------
>
> Hi,
>
> still struggling with how I feel about the idea in general.
>
> So is the intent mainly that if/when there comes an 0-day which allows
> users with CAP_NET_ADMIN in any namespace to gain privilege on the host,
> then this can be used as a stop-gap measure until there is a proper fix?
>
Thank for looking at this Serge.

Yes, but at the same time it's not just limited to NET_ADMIN but could
be any of the current capabilities.

> Otherwise, do you have any guidance for how people should use this?
>
> IMO it should be heavily discouraged to use this tool as a regular
> day to day configuration, as I'm not sure there is any "educated"
> decision to be made, even by those who are in the know, about what
> to put in this set.
>
I think that really depends on the environment. e.g. in certain
sandboxes third-part / semi-trusted workload is executed where network
resource is not used. In that environment I can easily take off
NET_ADMIN and NET_RAW without affecting anything there. At the same
time I wont have to worry about 0-day related to these two
capabilities. I would say the Admins at these places are in the best
place to decide what they can take-off safely and what they cannot.
Even if they decide not to take-off anything, having a tool at hand to
gain control is important when the next 0-day strikes us that can be
exploited using any of the currently used capabilities.

However, you are absolutely right in terms of using it as a stop-gap
measure to protect environment until it's fixed and the capability in
question can not be safely taken off permanently without hampering
operations.

thanks,
--mahesh..

[...]

^ permalink raw reply

* Re: [PATCH net-next] selftests: rtnetlink.sh: add vxlan and fou test cases
From: David Miller @ 2017-10-02 18:15 UTC (permalink / raw)
  To: fw; +Cc: netdev
In-Reply-To: <20171002100529.602-1-fw@strlen.de>

From: Florian Westphal <fw@strlen.de>
Date: Mon,  2 Oct 2017 12:05:29 +0200

> fou test lifted from ip-fou man page.
> 
> Signed-off-by: Florian Westphal <fw@strlen.de>

I love seeing new testcases ;-)

Applied, thanks.

^ permalink raw reply

* Re: [RFC net-next 1/5] net: dsa: Add infrastructure to support LAG
From: Florian Fainelli @ 2017-10-02 18:19 UTC (permalink / raw)
  To: Andrew Lunn
  Cc: netdev, vivien.didelot, jiri, idosch, Woojung.Huh, john,
	sean.wang
In-Reply-To: <20171002020327.GA21593@lunn.ch>

On 10/01/2017 07:03 PM, Andrew Lunn wrote:
> On Sun, Oct 01, 2017 at 12:46:35PM -0700, Florian Fainelli wrote:
>> Add the necessary logic to support network device events targetting LAG events,
>> this is loosely inspired from mlxsw/spectrum.c.
>>
>> In the process we change dsa_slave_changeupper() to be more generic and be called
>> from both LAG events as well as normal bridge enslaving events paths.
>>
>> The DSA layer takes care of managing the LAG group identifiers, how many LAGs
>> may be supported by a switch, and how many members per LAG are supported by a
>> switch device. When a LAG group is identified, the port is then configured to
>> be a part of that group. When a LAG group no longer has any users, we remove it
>> and we tell the drivers whether it is safe to disable trunking altogether.
>>
>> Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
>> ---
>>  include/net/dsa.h  |  25 +++++++++
>>  net/dsa/dsa2.c     |  12 ++++
>>  net/dsa/dsa_priv.h |   7 +++
>>  net/dsa/port.c     |  92 +++++++++++++++++++++++++++++++
>>  net/dsa/slave.c    | 157 +++++++++++++++++++++++++++++++++++++++++++++++++----
>>  net/dsa/switch.c   |  30 ++++++++++
>>  6 files changed, 312 insertions(+), 11 deletions(-)
>>
>> diff --git a/include/net/dsa.h b/include/net/dsa.h
>> index 10dceccd9ce8..247ea58add68 100644
>> --- a/include/net/dsa.h
>> +++ b/include/net/dsa.h
>> @@ -182,12 +182,20 @@ struct dsa_port {
>>  	u8			stp_state;
>>  	struct net_device	*bridge_dev;
>>  	struct devlink_port	devlink_port;
>> +	u8			lag_id;
>> +	bool			lagged;
>>  	/*
>>  	 * Original copy of the master netdev ethtool_ops
>>  	 */
>>  	const struct ethtool_ops *orig_ethtool_ops;
>>  };
>>  
>> +struct dsa_lag_group {
>> +	/* Used to know when we can disable lag on the switch */
>> +	unsigned int		ref_count;
> 
> Hi Florian
> 
> In what contexts is ref_count manipulated. Normally you use would
> refcounf_t and the operations in linux/refcount.h. But if you know
> there is some other protection, e.g. rtnl, an unsigned int is O.K.
> Maybe scatter some assert_RTNL() in the code?

Hi Andrew,

This is called with rtnl held, but this is a good point. In fact, I
don't think we need the reference count at all, what I am going to
propose now is that we just maintain a bitmask of port members per lag
group (along with the reference to the lag device) and when the hamming
weight of that bitmask is 1, that means we were removing the lat port of
the LAG group and we can stop using that LAG group. This also allow us
to remove the port_lag_member operation, since we would be maintaining
that at the DSA layer now.

> 
>> +static bool dsa_slave_lag_check(struct net_device *dev, struct net_device *lag_dev,
>> +				struct netdev_lag_upper_info *lag_upper_info)
>> +{
>> +	struct dsa_slave_priv *p = netdev_priv(dev);
>> +	u8 lag_id;
>> +
>> +	/* No more lag identifiers available or already in use */
>> +	if (dsa_switch_lag_get_index(p->dp->ds, lag_dev, &lag_id) != 0)
>> +		return false;
>> +
>> +	if (lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_HASH)
>> +		return false;
> 
> I wounder if the driver needs to decide this? Can different hardware
> support different tx_types?

That is a valid point. For instance, the b53/bcm_sf2 switches can only
do MAC DA and SA, SA only, DA only hashing, but you can't do hashing at
a higher level than L2 addresses, this does appear to be something that
the driver should indeed decide.
-- 
Florian

^ permalink raw reply

* Re: [patch net 0/2] mlxsw: Fixes in GRE offloading
From: David Miller @ 2017-10-02 18:19 UTC (permalink / raw)
  To: jiri; +Cc: netdev, petrm, idosch, mlxsw
In-Reply-To: <20171002101457.1462-1-jiri@resnulli.us>

From: Jiri Pirko <jiri@resnulli.us>
Date: Mon,  2 Oct 2017 12:14:55 +0200

> From: Jiri Pirko <jiri@mellanox.com>
> 
> Petr says:
> 
> This patchset fixes a couple unrelated problems in offloading IP-in-IP tunnels
> in mlxsw driver.
> 
> - The first patch fixes a potential reference-counting problem that might lead
>   to a kernel crash.
> 
> - The second patch associates IPIP next hops with their loopback RIFs. Besides
>   being the right thing to do, it also fixes a problem where offloaded IPv6
>   routes that forward to IP-in-IP netdevices were not flagged as such.

Series applied.

^ permalink raw reply

* Re: [patch net-next 0/2] mlxsw: Fixlets
From: David Miller @ 2017-10-02 18:20 UTC (permalink / raw)
  To: jiri; +Cc: netdev, petrm, idosch, mlxsw
In-Reply-To: <20171002102158.2443-1-jiri@resnulli.us>

From: Jiri Pirko <jiri@resnulli.us>
Date: Mon,  2 Oct 2017 12:21:56 +0200

> From: Jiri Pirko <jiri@mellanox.com>
> 
> Couple of small nit fixes from Petr

Series applied.

^ permalink raw reply

* Re: [PATCH v3 00/19] Thunderbolt networking
From: David Miller @ 2017-10-02 18:25 UTC (permalink / raw)
  To: mika.westerberg
  Cc: gregkh, andreas.noever, michael.jamet, yehezkel.bernat,
	amir.jer.levy, Mario.Limonciello, lukas, andriy.shevchenko,
	andrew, netdev, linux-kernel
In-Reply-To: <20171002103846.64602-1-mika.westerberg@linux.intel.com>

From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Mon,  2 Oct 2017 13:38:27 +0300

> In addition of tunneling PCIe, Display Port and USB traffic, Thunderbolt
> allows connecting two hosts (domains) over a Thunderbolt cable. It is
> possible to tunnel arbitrary data packets over such connection using
> high-speed DMA rings available in the Thunderbolt host controller.

Series applied to net-next, thanks!

^ permalink raw reply

* Re: Fw: [Bug 197099] New: Kernel panic in interrupt [l2tp_ppp]
From: SviMik @ 2017-10-02 18:35 UTC (permalink / raw)
  To: netdev
In-Reply-To: <1506952566.8061.3.camel@edumazet-glaptop3.roam.corp.google.com>

Hi, James!

No, I'm suffering from kernel panics since I started using 4.x
kernels. See my current collection:
http://svimik.com/hdmmsk1kp1.png
http://svimik.com/hdmmsk2kp2.png
http://svimik.com/hdmmsk2kp3.png
http://svimik.com/hdmmsk2kp4.png
http://svimik.com/hdmmsk2kp5.png
http://svimik.com/hdmmsk7kp1.png

Screenshots are from three different machines, kernels from 4.8.13 to 4.13.4.

2017-10-02 16:56 GMT+03:00 Eric Dumazet <eric.dumazet@gmail.com>:
> CC svimik@gmail.com so that he is aware of this netdev thread.
>
> On Mon, 2017-10-02 at 14:32 +0100, James Chapman wrote:
>> This seems to be a NULL pointer exception caused by tunnel->sock being
>> NULL at the call to bh_lock_sock() in l2tp_xmit_skb() at
>> l2tp_core.c:1135.
>>
>> tunnel->sock is set NULL in l2tp_core's tunnel socket destructor.
>>
>> At the moment, I don't understand how this happens because
>> pppol2tp_xmit() does a sock_hold() on the tunnel socket before
>> l2tp_xmit_skb() is called. I'm still looking at this.
>>
>> Has this problem only recently started happening?
>>
>>
>>
>>
>>
>> On 1 October 2017 at 18:21, Stephen Hemminger
>> <stephen@networkplumber.org> wrote:
>> >
>> >
>> > Begin forwarded message:
>> >
>> > Date: Sun, 01 Oct 2017 16:22:33 +0000
>> > From: bugzilla-daemon@bugzilla.kernel.org
>> > To: stephen@networkplumber.org
>> > Subject: [Bug 197099] New: Kernel panic in interrupt [l2tp_ppp]
>> >
>> >
>> > https://bugzilla.kernel.org/show_bug.cgi?id=197099
>> >
>> >             Bug ID: 197099
>> >            Summary: Kernel panic in interrupt [l2tp_ppp]
>> >            Product: Networking
>> >            Version: 2.5
>> >     Kernel Version: 4.8.13-1.el6.elrepo.x86_64
>> >           Hardware: x86-64
>> >                 OS: Linux
>> >               Tree: Mainline
>> >             Status: NEW
>> >           Severity: normal
>> >           Priority: P1
>> >          Component: Other
>> >           Assignee: stephen@networkplumber.org
>> >           Reporter: svimik@gmail.com
>> >         Regression: No
>> >
>> > Created attachment 258685
>> >   --> https://bugzilla.kernel.org/attachment.cgi?id=258685&action=edit
>> > stacktrace screenshot
>> >
>> > Hello!
>> >
>> > Getting kernel panics on multiple servers. Since it mentions l2tp_core,
>> > l2tp_ppp and ppp_generic, I decided to report it to Networking (correct me if
>> > I'm wrong).
>> >
>> > Unfortunately I'm still struggling with making kdump work, so the trace
>> > screenshot is all I have at this moment. The only hope is that this stacktrace
>> > means something to the guys that wrote the code.
>> >
>> > --
>> > You are receiving this mail because:
>> > You are the assignee for the bug.
>
>

^ permalink raw reply

* Re: [RFC net-next 0/5] TSN: Add qdisc-based config interfaces for traffic shapers
From: Levi Pearson @ 2017-10-02 18:45 UTC (permalink / raw)
  To: Rodney Cummings
  Cc: Linux Kernel Network Developers, Vinicius Costa Gomes,
	Henrik Austad, richardcochran, jesus.sanchez-palencia,
	andre.guedes
In-Reply-To: <DM2PR0401MB1389A950EAC6FB54D4186B9C927E0@DM2PR0401MB1389.namprd04.prod.outlook.com>

Hi Rodney,

Some archives seem to have threaded it, but I have CC'd the
participants I saw in the original discussion thread since they may
not otherwise notice it amongst the normal traffic.

On Fri, Sep 29, 2017 at 2:44 PM, Rodney Cummings <rodney.cummings@ni.com> wrote:
> Hi,
>
> I am posting my reply to this thread after subscribing, so I apologize
> if the archive happens to attach it to the wrong thread.
>
> First, I'd like to say that I strongly support this RFC.
> We need Linux interfaces for IEEE 802.1 TSN features.
>
> Although I haven't looked in detail, the proposal for CBS looks good.
> My questions/concerns are more related to future work, such for 802.1Qbv
> (scheduled traffic).
>
> 1. Question: From an 802.1 perspective, is this RFC intended to support
> end-station (e.g. NIC in host), bridges (i.e. DSA), or both?
>
> This is very important to clarify, because the usage of this interface
> will be very different for one or the other.
>
> For a bridge, the user code typically represents a remote management
> protocol (e.g. SNMP, NETCONF, RESTCONF), and this interface is
> expected to align with the specifications of 802.1Q clause 12,
> which serves as the information model for management. Historically,
> a standard kernel interface for management hasn't been viewed as
> essential, but I suppose it wouldn't hurt.

I don't think the proposal was meant to cover the case of non-local
switch hardware, but in addition to dsa and switchdev switch ICs
managed by embedded Linux-running SoCs, there are SoCs with embedded
small port count switches or even plain multiple NICs with software
bridging. Many of these embedded small port count switches have FQTSS
hardware that could potentially be configured by the proposed cbs
qdisc. This blurs the line somewhat between what is a "bridge" and
what is an "end-station" in 802.1Q terminology, but nevertheless these
devices exist, sometimes acting as an endpoint + a real bridge and
sometimes as just a system with multiple network interfaces.

> For an end station, the user code can be an implementation of SRP
> (802.1Q clause 35), or it can be an application-specific
> protocol (e.g. industrial fieldbus) that exchanges data according
> to P802.1Qcc clause 46. Either way, the top-level user interface
> is designed for individual streams, not queues and shapers. That
> implies some translation code between that top-level interface
> and this sort of kernel interface.
>
> As a specific end-station example, for CBS, 802.1Q-2014 subclause
> 34.6.1 requires "per-stream queues" in the Talker end-station.
> I don't see 34.6.1 represented in the proposed RFC, but that's
> okay... maybe per-stream queues are implemented in user code.
> Nevertheless, if that is the assumption, I think we need to
> clarify, especially in examples.

You're correct that the FQTSS credit-based shaping algorithm requires
per-stream shaping by Talker endpoints as well, but this is in
addition to the per-class shaping provided by most hardware shaping
implementations that I'm aware of in endpoint network hardware. I
agree that we need to document the need to provide this, but it can
definitely be built on top of the current proposal.

I believe the per-stream shaping could be managed either by a user
space application that manages all use of a streaming traffic class,
or through an additional qdisc module that performs per-stream
management on top of the proposed cbs qdisc, ensuring that the
frames-per-observation interval aspect of each stream's reservation is
obeyed. This becomes a fairly simple qdisc to implement on top of a
per-traffic class shaper, and could even be implemented with the help
of the timestamp that the SO_TXTIME proposal adds to skbuffs, but I
think keeping the layers separate provides more flexibility to
implementations and keeps management of various kinds of hardware
offload support simpler as well.

> 2. Suggestion: Do not assume that a time-aware (i.e. scheduled)
> end-station will always use 802.1Qbv.
>
> For those who are subscribed to the 802.1 mailing list,
> I'd suggest a read of draft P802.1Qcc/D1.6, subclause U.1
> of Annex U. Subclause U.1 assumes that bridges in the network use
> 802.1Qbv, and then it poses the question of what an end-station
> Talker should do. If the end-station also uses 802.1Qbv,
> and that end-station transmits multiple streams, 802.1Qbv is
> a bad implementation. The reason is that the scheduling
> (i.e. order in time) of each stream cannot be controlled, which
> in turn means that the CNC (network manager) cannot optimize
> the 802.1Qbv schedules in bridges. The preferred technique
> is to use "per-stream scheduling" in each Talker, so that
> the CNC can create an optimal schedules (i.e. best determinism).
>
> I'm aware of a small number of proprietary CNC implementations for
> 802.1Qbv in bridges, and they are generally assuming per-stream
> scheduling in end-stations (Talkers).
>
> The i210 NIC's LaunchTime can be used to implement per-stream
> scheduling. I haven't looked at SO_TXTIME in detail, but it sounds
> like per-stream scheduling. If so, then we already have the
> fundamental building blocks for a complete implementation
> of a time-aware end-station.
>
> If we answer the preceding question #1 as "end-station only",
> I would recommend avoiding 802.1Qbv in this interface. There
> isn't really anything wrong with it per-se, but it would lead
> developers down the wrong path.

In some situations, such as device nodes that each incorporate a small
port count switch for the purpose of daisy-chaining a segment of the
network, "end stations" must do a limited subset of local bridge
management as well. I'm not sure how common this is going to be for
industrial control applications, but I know there are audio and
automotive applications built this way.

One particular device I am working with now provides all network
access through a DSA switch chip with hardware Qbv support in addtion
to hardware Qav support. The SoC attached to it has no hardware timed
launch (SO_TXTIME) support. In this case, although the proposed
interface for Qbv is not *sufficient* to make a working time-aware end
station, it does provide a usable building block to provide one. As
with the credit-based shaping system, Talkers must provide an
additional level of per-stream shaping as well, but this is largely
(absent the jitter calculations, which are sort of a middle-level
concern) independent of what sort of hardware offload of the
scheduling is provided.

Both Qbv windows and timed launch support do roughly the same thing;
they *delay* the launch of a hardware-queued frame so it can egress at
a precisely specified time, and at least with the i210 and Qbv, ensure
that no other traffic will be in-progress when that time arrives. For
either to be used effectively, the application still has to prepare
the frame slightly ahead-of-time and thus must have the same level of
time-awareness. This is, again, largely independent of what kind of
hardware offloading support is provided and is also largely
independent of the network stack itself. Neither queue window
management nor SO_TXTIME help the application present its
time-sensitive traffic at the right time; that's a matter to be worked
out with the application taking advantage of PTP and the OS scheduler.
Whether you rely on managed windows or hardware launch time to provide
the precisely correct amount of delay beyond that is immaterial to the
application. In the absence of SO_TXTIME offloading (or even with it,
and in the presence of sufficient OS scheduling jitter), an additional
layer may need to be provided to ensure different applications' frames
are queued in the correct order for egress during the window. Again,
this could be a purely user-space application multiplexer or a
separate qdisc module.

I wholeheartedly agree with you and Richard that we ought to
eventually provide application-level APIs that don't require users to
have deep knowledge of various 802.1Q intricacies. But I believe that
the hardware offloading capability being provided now, and the variety
of the way things are hooked up in real hardware, suggests that we
ought to also build the support for the underlying protocols in layers
so that we don't create unnecessary mismatches between offloading
capability (which can be essential to overall network performance) and
APIs, such that one configuration of offload support is privileged
above others even when comparable scheduling accuracy could be
provided by either.

In any case, only the cbs qdisc has been included in the post-RFC
patch cover page for its last couple of iterations, so there is plenty
of time to discuss how time-aware shaping, preemption, etc. management
should occur beyond the cbs and SO_TXTIME proposals.


Levi

^ permalink raw reply

* Re: [net-next 00/13][pull request] 100GbE Intel Wired LAN Driver Updates 2017-10-02
From: David Miller @ 2017-10-02 18:59 UTC (permalink / raw)
  To: jeffrey.t.kirsher; +Cc: netdev, nhorman, sassmann, jogreene
In-Reply-To: <20171002154236.84043-1-jeffrey.t.kirsher@intel.com>

From: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Date: Mon,  2 Oct 2017 08:42:23 -0700

> This series contains updates to fm10k only.
> 
> Jake provides all but one of the changes in this series.  Most are small
> fixes, starting with ensuring prompt transmission of messages queued up
> after each VF message is received and handled.  Fix a possible race
> condition between the watchdog task and the processing of mailbox
> messages by just checking whether the mailbox is still open.  Fix a
> couple of GCC v7 warnings, including misspelled "fall through" comments
> and warnings about possible truncation of calls to snprintf().  Cleaned
> up a convoluted bitshift and read for the PFVFLRE register.  Fixed a
> potential divide by zero when finding the proper r_idx.
> 
> Markus Elfring fixes an issue which was found using Coccinelle, where
> we should have been using seq_putc() instead of seq_puts().
> 
> The following are changes since commit 0929567a7a2dab8455a7313956973ff0d339709a:
>   samples/bpf: fix warnings in xdp_monitor_user
> and are available in the git repository at:
>   git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/next-queue 100GbE

Pulled, thanks Jeff.

^ permalink raw reply

* Re: [PATCH net-next v11] openvswitch: enable NSH support
From: Jiri Benc @ 2017-10-02 19:13 UTC (permalink / raw)
  To: Yi Yang
  Cc: dev-yBygre7rU0TnMu66kgdUjQ, netdev-u79uwXL29TY76Z2rM5mHXA, e,
	davem-fT/PcQaiUtIeIZ0/mPfg9Q
In-Reply-To: <1506668610-18505-1-git-send-email-yi.y.yang-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>

On Fri, 29 Sep 2017 15:03:30 +0800, Yi Yang wrote:
> --- a/include/net/nsh.h
> +++ b/include/net/nsh.h
> @@ -304,4 +304,7 @@ static inline void nsh_set_flags_ttl_len(struct nshhdr *nsh, u8 flags,
>  			NSH_FLAGS_MASK | NSH_TTL_MASK | NSH_LEN_MASK);
>  }
>  
> +int skb_push_nsh(struct sk_buff *skb, const struct nshhdr *nh);

[...]
> +int skb_push_nsh(struct sk_buff *skb, const struct nshhdr *src_nsh_hdr)

This is minor but since this patch will need a respin anyway, please
name the variables in the forward declaration and here the same.

> +int skb_pop_nsh(struct sk_buff *skb)
> +{
> +	int err;
> +	struct nshhdr *nsh_hdr = (struct nshhdr *)(skb->data);

Bad name of the variable, clashes with the nsh_hdr function. I pointed
that out already.

> +	size_t length;
> +	__be16 inner_proto;
> +
> +	err = skb_ensure_writable(skb, skb_network_offset(skb) +
> +				       sizeof(struct nshhdr));

You assume that the skb starts at the NSH header, thus the
skb_network_offset is completely unnecessary and introduces just
another assumption on the caller. Also, the sizeof(struct nshhdr) is
wrong: there's no guarantee that the header is not smaller or larger
than that.

More importantly though, why do you need skb_ensure_writable? You don't
write into the header. pkskb_may_pull is enough.

	if (!pskb_may_pull(skb, NSH_BASE_HDR_LEN))
		return -ENOMEM;
	length = nsh_hdr_len(nsh_hdr);
	if (!pskb_may_pull(skb, length))
		return -ENOMEM;

> +static int set_nsh(struct sk_buff *skb, struct sw_flow_key *flow_key,
> +		   const struct nlattr *a)
> +{
> +	struct nshhdr *nh;
> +	int err;
> +	u8 flags;
> +	u8 ttl;
> +	int i;
> +
> +	struct ovs_key_nsh key;
> +	struct ovs_key_nsh mask;
> +
> +	err = nsh_key_from_nlattr(a, &key, &mask);
> +	if (err)
> +		return err;
> +
> +	err = skb_ensure_writable(skb, skb_network_offset(skb) +
> +				       sizeof(struct nshhdr));

I missed this before: this is wrong, too. You need to use the real
header size, not sizeof(struct nshhdr). It should be computable from
the flow key.

> +		case OVS_ACTION_ATTR_PUSH_NSH: {
> +			u8 buffer[NSH_HDR_MAX_LEN];
> +			struct nshhdr *nh = (struct nshhdr *)buffer;
> +
> +			nsh_hdr_from_nlattr(nla_data(a), nh,
> +					    NSH_HDR_MAX_LEN);
> +			err = push_nsh(skb, key, (const struct nshhdr *)nh);

Is the cast to const really needed? It looks suspicious. If you added it
because a compiler complained, it's even more suspicious.

> +static int parse_nsh(struct sk_buff *skb, struct sw_flow_key *key)
> +{
> +	struct nshhdr *nh;
> +	unsigned int nh_ofs = skb_network_offset(skb);
> +	u8 version, length;
> +	int err;
> +
> +	err = check_header(skb, nh_ofs + NSH_BASE_HDR_LEN);
> +	if (unlikely(err))
> +		return err;
> +
> +	nh = nsh_hdr(skb);
> +	version = nsh_get_ver(nh);
> +	length = nsh_hdr_len(nh);
> +
> +	if (version != 0)
> +		return -EINVAL;
> +
> +	err = check_header(skb, nh_ofs + length);
> +	if (unlikely(err))
> +		return err;
> +
> +	nh = (struct nshhdr *)skb_network_header(skb);

I really really really hate this. This is the third time I'm telling
you to use the nsh_hdr function. Every time, you change only part of
the places. And this one I even explicitly pointed out in the previous
review.

I'm not supposed to look at my previous review to verify that you
addressed everything. That's your responsibility. Yet I need to do it
because every time, some of my comments remain unaddressed.

> +int nsh_hdr_from_nlattr(const struct nlattr *attr,
> +			struct nshhdr *nh, size_t size)
> +{
> +	struct nlattr *a;
> +	int rem;
> +	u8 flags = 0;
> +	u8 ttl = 0;
> +	int mdlen = 0;
> +
> +	/* validate_nsh has check this, so we needn't do duplicate check here
> +	 */
> +	nla_for_each_nested(a, attr, rem) {
> +		int type = nla_type(a);
> +
> +		switch (type) {
> +		case OVS_NSH_KEY_ATTR_BASE: {
> +			const struct ovs_nsh_key_base *base = nla_data(a);
> +
> +			flags = base->flags;
> +			ttl = base->ttl;
> +			nh->np = base->np;
> +			nh->mdtype = base->mdtype;
> +			nh->path_hdr = base->path_hdr;
> +			break;
> +		}
> +		case OVS_NSH_KEY_ATTR_MD1: {
> +			const struct ovs_nsh_key_md1 *md1 = nla_data(a);
> +
> +			mdlen = nla_len(a);
> +			memcpy(&nh->md1, md1, mdlen);
> +			break;

Looks better. Why not simplify it even more?

		case OVS_NSH_KEY_ATTR_MD1:
			mdlen = nla_len(a);
			memcpy(&nh->md1, nla_data(a), mdlen);
			break;

It's still perfectly readable this way and there's no need for the
braces.

> +		}
> +		case OVS_NSH_KEY_ATTR_MD2: {
> +			const struct u8 *md2 = nla_data(a);
> +
> +			mdlen = nla_len(a);
> +			memcpy(&nh->md2, md2, mdlen);

And here, too.

> +int nsh_key_from_nlattr(const struct nlattr *attr,
> +			struct ovs_key_nsh *nsh, struct ovs_key_nsh *nsh_mask)
> +{
> +	struct nlattr *a;
> +	int rem;
> +
> +	/* validate_nsh has check this, so we needn't do duplicate check here
> +	 */
> +	nla_for_each_nested(a, attr, rem) {
> +		int type = nla_type(a);
> +
> +		switch (type) {
> +		case OVS_NSH_KEY_ATTR_BASE: {
> +			const struct ovs_nsh_key_base *base = nla_data(a);
> +			const struct ovs_nsh_key_base *base_mask = base + 1;
> +
> +			nsh->base = *base;
> +			nsh_mask->base = *base_mask;
> +			break;
> +		}
> +		case OVS_NSH_KEY_ATTR_MD1: {
> +			const struct ovs_nsh_key_md1 *md1 =
> +				(struct ovs_nsh_key_md1 *)nla_data(a);

I'm speechless.

Yes, I don't like the line above. For a reason that I already pointed
out.

I expected more of this version.

 Jiri

^ permalink raw reply

* [PATCH net] socket, bpf: fix possible use after free
From: Eric Dumazet @ 2017-10-02 19:20 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Alexei Starovoitov, Daniel Borkmann

From: Eric Dumazet <edumazet@google.com>

Starting from linux-4.4, 3WHS no longer takes the listener lock.

Since this time, we might hit a use-after-free in sk_filter_charge(),
if the filter we got in the memcpy() of the listener content
just happened to be replaced by a thread changing listener BPF filter.

To fix this, we need to make sure the filter refcount is not already
zero before incrementing it again.

Fixes: e994b2f0fb92 ("tcp: do not lock listener to process SYN packets")
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/core/filter.c |   12 ++++++++----
 net/core/sock.c   |    5 ++++-
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/net/core/filter.c b/net/core/filter.c
index 82edad58d066857aeee562661098effa3b3e6961..74b8c91fb5f4461da58c73568976bc9834c4612b 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -989,10 +989,14 @@ static bool __sk_filter_charge(struct sock *sk, struct sk_filter *fp)
 
 bool sk_filter_charge(struct sock *sk, struct sk_filter *fp)
 {
-	bool ret = __sk_filter_charge(sk, fp);
-	if (ret)
-		refcount_inc(&fp->refcnt);
-	return ret;
+	if (!refcount_inc_not_zero(&fp->refcnt))
+		return false;
+
+	if (!__sk_filter_charge(sk, fp)) {
+		sk_filter_release(fp);
+		return false;
+	}
+	return true;
 }
 
 static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
diff --git a/net/core/sock.c b/net/core/sock.c
index 7d55c05f449d306b43fd850a61ce8ffd44174f7f..23953b741a41fbcf4a6ffb0dd5bf05bd5266b99d 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1684,13 +1684,16 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
 
 		sock_reset_flag(newsk, SOCK_DONE);
 
-		filter = rcu_dereference_protected(newsk->sk_filter, 1);
+		rcu_read_lock();
+		filter = rcu_dereference(sk->sk_filter);
 		if (filter != NULL)
 			/* though it's an empty new sock, the charging may fail
 			 * if sysctl_optmem_max was changed between creation of
 			 * original socket and cloning
 			 */
 			is_charged = sk_filter_charge(newsk, filter);
+		RCU_INIT_POINTER(newsk->sk_filter, filter);
+		rcu_read_unlock();
 
 		if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) {
 			/* We need to make sure that we don't uncharge the new

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox