* Re: Confirmation Email
From: Ms Christine. Hodgson @ 2018-05-19 9:39 UTC (permalink / raw)
To: Recipients
An offer with a potential opportunity for both of us.Do revert and treat this message as urgent and important.
MS Christine Hodgson.
^ permalink raw reply
* [PATCH net-next] r8169: fix network error on resume from suspend
From: Heiner Kallweit @ 2018-05-19 8:29 UTC (permalink / raw)
To: David Miller, Realtek linux nic maintainers; +Cc: netdev@vger.kernel.org
This commit removed calls to rtl_set_rx_mode(). This is ok for the
standard path if the link is brought up, however it breaks system
resume from suspend. Link comes up but no network traffic.
Meanwhile common code from rtl_hw_start_8169/8101/8168() was moved
to rtl_hw_start(), therefore re-add the call to rtl_set_rx_mode()
there.
Due to adding this call we have to move definition of rtl_hw_start()
after definition of rtl_set_rx_mode().
Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Fixes: 82d3ff6dd199 ("r8169: remove calls to rtl_set_rx_mode")
---
drivers/net/ethernet/realtek/r8169.c | 39 ++++++++++++++--------------
1 file changed, 20 insertions(+), 19 deletions(-)
diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index 2c2f0c5b3..75dfac024 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -5064,25 +5064,6 @@ static void rtl_set_rx_tx_desc_registers(struct rtl8169_private *tp)
RTL_W32(tp, RxDescAddrLow, ((u64) tp->RxPhyAddr) & DMA_BIT_MASK(32));
}
-static void rtl_hw_start(struct rtl8169_private *tp)
-{
- RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
-
- tp->hw_start(tp);
-
- rtl_set_rx_max_size(tp);
- rtl_set_rx_tx_desc_registers(tp);
- rtl_set_rx_tx_config_registers(tp);
- RTL_W8(tp, Cfg9346, Cfg9346_Lock);
-
- /* Initially a 10 us delay. Turned it into a PCI commit. - FR */
- RTL_R8(tp, IntrMask);
- RTL_W8(tp, ChipCmd, CmdTxEnb | CmdRxEnb);
- /* no early-rx interrupts */
- RTL_W16(tp, MultiIntr, RTL_R16(tp, MultiIntr) & 0xf000);
- rtl_irq_enable_all(tp);
-}
-
static void rtl8169_set_magic_reg(struct rtl8169_private *tp, unsigned mac_version)
{
static const struct rtl_cfg2_info {
@@ -5160,6 +5141,26 @@ static void rtl_set_rx_mode(struct net_device *dev)
RTL_W32(tp, RxConfig, tmp);
}
+static void rtl_hw_start(struct rtl8169_private *tp)
+{
+ RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
+
+ tp->hw_start(tp);
+
+ rtl_set_rx_max_size(tp);
+ rtl_set_rx_tx_desc_registers(tp);
+ rtl_set_rx_tx_config_registers(tp);
+ RTL_W8(tp, Cfg9346, Cfg9346_Lock);
+
+ /* Initially a 10 us delay. Turned it into a PCI commit. - FR */
+ RTL_R8(tp, IntrMask);
+ RTL_W8(tp, ChipCmd, CmdTxEnb | CmdRxEnb);
+ rtl_set_rx_mode(tp->dev);
+ /* no early-rx interrupts */
+ RTL_W16(tp, MultiIntr, RTL_R16(tp, MultiIntr) & 0xf000);
+ rtl_irq_enable_all(tp);
+}
+
static void rtl_hw_start_8169(struct rtl8169_private *tp)
{
if (tp->mac_version == RTL_GIGA_MAC_VER_05)
--
2.17.0
^ permalink raw reply related
* WARNING in xfrm6_tunnel_net_exit (2)
From: syzbot @ 2018-05-19 8:08 UTC (permalink / raw)
To: davem, herbert, kuznet, linux-kernel, netdev, steffen.klassert,
syzkaller-bugs, yoshfuji
Hello,
syzbot found the following crash on:
HEAD commit: 2c71d338bef2 Merge tag 'powerpc-4.17-6' of git://git.kerne..
git tree: upstream
console output: https://syzkaller.appspot.com/x/log.txt?x=12a7bd57800000
kernel config: https://syzkaller.appspot.com/x/.config?x=f3b4e30da84ec1ed
dashboard link: https://syzkaller.appspot.com/bug?extid=e9aebef558e3ed673934
compiler: gcc (GCC) 8.0.1 20180413 (experimental)
syzkaller repro:https://syzkaller.appspot.com/x/repro.syz?x=17409d57800000
IMPORTANT: if you fix the bug, please add the following tag to the commit:
Reported-by: syzbot+e9aebef558e3ed673934@syzkaller.appspotmail.com
bond0: Enslaving bond_slave_1 as an active interface with an up link
IPv6: ADDRCONF(NETDEV_UP): veth0_to_bond: link is not ready
IPv6: ADDRCONF(NETDEV_UP): veth1_to_bond: link is not ready
IPv6: ADDRCONF(NETDEV_UP): veth1_to_bond: link is not ready
IPv6: ADDRCONF(NETDEV_CHANGE): veth1_to_bond: link becomes ready
WARNING: CPU: 1 PID: 6 at net/ipv6/xfrm6_tunnel.c:348
xfrm6_tunnel_net_exit+0x2df/0x510 net/ipv6/xfrm6_tunnel.c:348
IPv6: ADDRCONF(NETDEV_CHANGE): veth0_to_bond: link becomes ready
Kernel panic - not syncing: panic_on_warn set ...
CPU: 1 PID: 6 Comm: kworker/u4:0 Not tainted 4.17.0-rc5+ #57
IPv6: ADDRCONF(NETDEV_CHANGE): veth1_to_bond: link becomes ready
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
Google 01/01/2011
Workqueue: netns cleanup_net
IPv6: ADDRCONF(NETDEV_CHANGE): veth0_to_bond: link becomes ready
Call Trace:
__dump_stack lib/dump_stack.c:77 [inline]
dump_stack+0x1b9/0x294 lib/dump_stack.c:113
panic+0x22f/0x4de kernel/panic.c:184
IPv6: ADDRCONF(NETDEV_CHANGE): veth1_to_bond: link becomes ready
IPv6: ADDRCONF(NETDEV_CHANGE): veth1_to_bond: link becomes ready
__warn.cold.8+0x163/0x1b3 kernel/panic.c:536
report_bug+0x252/0x2d0 lib/bug.c:186
IPv6: ADDRCONF(NETDEV_UP): veth0_to_bond: link is not ready
fixup_bug arch/x86/kernel/traps.c:178 [inline]
do_error_trap+0x1de/0x490 arch/x86/kernel/traps.c:296
do_invalid_op+0x1b/0x20 arch/x86/kernel/traps.c:315
invalid_op+0x14/0x20 arch/x86/entry/entry_64.S:992
RIP: 0010:xfrm6_tunnel_net_exit+0x2df/0x510 net/ipv6/xfrm6_tunnel.c:348
RSP: 0018:ffff8801d9a973d8 EFLAGS: 00010293
RAX: ffff8801d9a88180 RBX: ffff8801b6eda2b8 RCX: ffffffff868ff5f5
RDX: 0000000000000000 RSI: ffffffff868ff5ff RDI: 0000000000000007
RBP: ffff8801d9a974f8 R08: ffff8801d9a88180 R09: 0000000000000006
R10: ffff8801d9a88180 R11: 0000000000000000 R12: 00000000000000ff
R13: ffffed003b352e82 R14: ffff8801d9a974d0 R15: ffff8801b32f0700
ops_exit_list.isra.7+0xb0/0x160 net/core/net_namespace.c:152
cleanup_net+0x51d/0xb20 net/core/net_namespace.c:523
process_one_work+0xc1e/0x1b50 kernel/workqueue.c:2145
worker_thread+0x1cc/0x1440 kernel/workqueue.c:2279
kthread+0x345/0x410 kernel/kthread.c:240
ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:412
Dumping ftrace buffer:
(ftrace buffer empty)
Kernel Offset: disabled
Rebooting in 86400 seconds..
---
This bug is generated by a bot. It may contain errors.
See https://goo.gl/tpsmEJ for more information about syzbot.
syzbot engineers can be reached at syzkaller@googlegroups.com.
syzbot will keep track of this bug report. See:
https://goo.gl/tpsmEJ#bug-status-tracking for how to communicate with
syzbot.
syzbot can test patches for this bug, for details see:
https://goo.gl/tpsmEJ#testing-patches
^ permalink raw reply
* [PATCH net-next] sctp: add support for SCTP_REUSE_PORT sockopt
From: Xin Long @ 2018-05-19 7:44 UTC (permalink / raw)
To: network dev, linux-sctp; +Cc: Marcelo Ricardo Leitner, Neil Horman, davem
This feature is actually already supported by sk->sk_reuse which can be
set by SO_REUSEADDR. But it's not working exactly as RFC6458 demands in
section 8.1.27, like:
- This option only supports one-to-one style SCTP sockets
- This socket option must not be used after calling bind()
or sctp_bindx().
Besides, SCTP_REUSE_PORT sockopt should be provided for user's programs.
Otherwise, the programs with SCTP_REUSE_PORT from other systems will not
work in linux.
This patch reuses sk->sk_reuse and works pretty much as SO_REUSEADDR,
just with some extra setup limitations that are neeeded when it is being
enabled.
"It should be noted that the behavior of the socket-level socket option
to reuse ports and/or addresses for SCTP sockets is unspecified", so it
leaves SO_REUSEADDR as is for the compatibility.
Signed-off-by: Xin Long <lucien.xin@gmail.com>
---
include/uapi/linux/sctp.h | 1 +
net/sctp/socket.c | 48 +++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 49 insertions(+)
diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h
index b64d583..c02986a 100644
--- a/include/uapi/linux/sctp.h
+++ b/include/uapi/linux/sctp.h
@@ -100,6 +100,7 @@ typedef __s32 sctp_assoc_t;
#define SCTP_RECVNXTINFO 33
#define SCTP_DEFAULT_SNDINFO 34
#define SCTP_AUTH_DEACTIVATE_KEY 35
+#define SCTP_REUSE_PORT 36
/* Internal Socket Options. Some of the sctp library functions are
* implemented using these socket options.
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 1b4593b..8dfcc79 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4170,6 +4170,28 @@ static int sctp_setsockopt_interleaving_supported(struct sock *sk,
return retval;
}
+static int sctp_setsockopt_reuse_port(struct sock *sk, char __user *optval,
+ unsigned int optlen)
+{
+ int val;
+
+ if (!sctp_style(sk, TCP))
+ return -EOPNOTSUPP;
+
+ if (sctp_sk(sk)->ep->base.bind_addr.port)
+ return -EFAULT;
+
+ if (optlen < sizeof(int))
+ return -EINVAL;
+
+ if (get_user(val, (int __user *)optval))
+ return -EFAULT;
+
+ sk->sk_reuse = val ? SK_CAN_REUSE : SK_NO_REUSE;
+
+ return 0;
+}
+
/* API 6.2 setsockopt(), getsockopt()
*
* Applications use setsockopt() and getsockopt() to set or retrieve
@@ -4364,6 +4386,9 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname,
retval = sctp_setsockopt_interleaving_supported(sk, optval,
optlen);
break;
+ case SCTP_REUSE_PORT:
+ retval = sctp_setsockopt_reuse_port(sk, optval, optlen);
+ break;
default:
retval = -ENOPROTOOPT;
break;
@@ -7175,6 +7200,26 @@ static int sctp_getsockopt_interleaving_supported(struct sock *sk, int len,
return retval;
}
+static int sctp_getsockopt_reuse_port(struct sock *sk, int len,
+ char __user *optval,
+ int __user *optlen)
+{
+ int val = 0;
+
+ if (len < sizeof(int))
+ return -EINVAL;
+
+ len = sizeof(int);
+ if (sk->sk_reuse != SK_NO_REUSE)
+ val = 1;
+ if (put_user(len, optlen))
+ return -EFAULT;
+ if (copy_to_user(optval, &val, len))
+ return -EFAULT;
+
+ return 0;
+}
+
static int sctp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen)
{
@@ -7370,6 +7415,9 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname,
retval = sctp_getsockopt_interleaving_supported(sk, len, optval,
optlen);
break;
+ case SCTP_REUSE_PORT:
+ retval = sctp_getsockopt_reuse_port(sk, len, optval, optlen);
+ break;
default:
retval = -ENOPROTOOPT;
break;
--
2.1.0
^ permalink raw reply related
* [PATCHv2 net-next] erspan: set bso bit based on mirrored packet's len
From: William Tu @ 2018-05-19 2:41 UTC (permalink / raw)
To: netdev; +Cc: tobin
Before the patch, the erspan BSO bit (Bad/Short/Oversized) is not
handled. BSO has 4 possible values:
00 --> Good frame with no error, or unknown integrity
11 --> Payload is a Bad Frame with CRC or Alignment Error
01 --> Payload is a Short Frame
10 --> Payload is an Oversized Frame
Based the short/oversized definitions in RFC1757, the patch sets
the bso bit based on the mirrored packet's size.
Reported-by: Xiaoyan Jin <xiaoyanj@vmware.com>
Signed-off-by: William Tu <u9012063@gmail.com>
---
v1->v2
Improve code comments, make enum erspan_bso clearer
---
include/net/erspan.h | 28 ++++++++++++++++++++++++++++
1 file changed, 28 insertions(+)
diff --git a/include/net/erspan.h b/include/net/erspan.h
index d044aa60cc76..b39643ef4c95 100644
--- a/include/net/erspan.h
+++ b/include/net/erspan.h
@@ -219,6 +219,33 @@ static inline __be32 erspan_get_timestamp(void)
return htonl((u32)h_usecs);
}
+/* ERSPAN BSO (Bad/Short/Oversized), see RFC1757
+ * 00b --> Good frame with no error, or unknown integrity
+ * 01b --> Payload is a Short Frame
+ * 10b --> Payload is an Oversized Frame
+ * 11b --> Payload is a Bad Frame with CRC or Alignment Error
+ */
+enum erspan_bso {
+ BSO_NOERROR = 0x0,
+ BSO_SHORT = 0x1,
+ BSO_OVERSIZED = 0x2,
+ BSO_BAD = 0x3,
+};
+
+static inline u8 erspan_detect_bso(struct sk_buff *skb)
+{
+ /* BSO_BAD is not handled because the frame CRC
+ * or alignment error information is in FCS.
+ */
+ if (skb->len < ETH_ZLEN)
+ return BSO_SHORT;
+
+ if (skb->len > ETH_FRAME_LEN)
+ return BSO_OVERSIZED;
+
+ return BSO_NOERROR;
+}
+
static inline void erspan_build_header_v2(struct sk_buff *skb,
u32 id, u8 direction, u16 hwid,
bool truncate, bool is_ipv4)
@@ -248,6 +275,7 @@ static inline void erspan_build_header_v2(struct sk_buff *skb,
vlan_tci = ntohs(qp->tci);
}
+ bso = erspan_detect_bso(skb);
skb_push(skb, sizeof(*ershdr) + ERSPAN_V2_MDSIZE);
ershdr = (struct erspan_base_hdr *)skb->data;
memset(ershdr, 0, sizeof(*ershdr) + ERSPAN_V2_MDSIZE);
--
2.7.4
^ permalink raw reply related
* [PATCH net] net: ip6_gre: fix tunnel metadata device sharing.
From: William Tu @ 2018-05-19 2:22 UTC (permalink / raw)
To: netdev; +Cc: petrm
Currently ip6gre and ip6erspan share single metadata mode device,
using 'collect_md_tun'. Thus, when doing:
ip link add dev ip6gre11 type ip6gretap external
ip link add dev ip6erspan12 type ip6erspan external
RTNETLINK answers: File exists
simply fails due to the 2nd tries to create the same collect_md_tun.
The patch fixes it by adding a separate collect md tunnel device
for the ip6erspan, 'collect_md_tun_erspan'. As a result, a couple
of places need to refactor/split up in order to distinguish ip6gre
and ip6erspan.
First, move the collect_md check at ip6gre_tunnel_{unlink,link} and
create separate function {ip6gre,ip6ersapn}_tunnel_{link_md,unlink_md}.
Then before link/unlink, make sure the link_md/unlink_md is called.
Finally, a separate ndo_uninit is created for ip6erspan. Tested it
using the samples/bpf/test_tunnel_bpf.sh.
Fixes: ef7baf5e083c ("ip6_gre: add ip6 erspan collect_md mode")
Signed-off-by: William Tu <u9012063@gmail.com>
---
net/ipv6/ip6_gre.c | 101 +++++++++++++++++++++++++++++++++++++++++------------
1 file changed, 79 insertions(+), 22 deletions(-)
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 5162ecc45c20..458de353f5d9 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -71,6 +71,7 @@ struct ip6gre_net {
struct ip6_tnl __rcu *tunnels[4][IP6_GRE_HASH_SIZE];
struct ip6_tnl __rcu *collect_md_tun;
+ struct ip6_tnl __rcu *collect_md_tun_erspan;
struct net_device *fb_tunnel_dev;
};
@@ -233,7 +234,12 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
if (cand)
return cand;
- t = rcu_dereference(ign->collect_md_tun);
+ if (gre_proto == htons(ETH_P_ERSPAN) ||
+ gre_proto == htons(ETH_P_ERSPAN2))
+ t = rcu_dereference(ign->collect_md_tun_erspan);
+ else
+ t = rcu_dereference(ign->collect_md_tun);
+
if (t && t->dev->flags & IFF_UP)
return t;
@@ -262,6 +268,31 @@ static struct ip6_tnl __rcu **__ip6gre_bucket(struct ip6gre_net *ign,
return &ign->tunnels[prio][h];
}
+static void ip6gre_tunnel_link_md(struct ip6gre_net *ign, struct ip6_tnl *t)
+{
+ if (t->parms.collect_md)
+ rcu_assign_pointer(ign->collect_md_tun, t);
+}
+
+static void ip6erspan_tunnel_link_md(struct ip6gre_net *ign, struct ip6_tnl *t)
+{
+ if (t->parms.collect_md)
+ rcu_assign_pointer(ign->collect_md_tun_erspan, t);
+}
+
+static void ip6gre_tunnel_unlink_md(struct ip6gre_net *ign, struct ip6_tnl *t)
+{
+ if (t->parms.collect_md)
+ rcu_assign_pointer(ign->collect_md_tun, NULL);
+}
+
+static void ip6erspan_tunnel_unlink_md(struct ip6gre_net *ign,
+ struct ip6_tnl *t)
+{
+ if (t->parms.collect_md)
+ rcu_assign_pointer(ign->collect_md_tun_erspan, NULL);
+}
+
static inline struct ip6_tnl __rcu **ip6gre_bucket(struct ip6gre_net *ign,
const struct ip6_tnl *t)
{
@@ -272,9 +303,6 @@ static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t)
{
struct ip6_tnl __rcu **tp = ip6gre_bucket(ign, t);
- if (t->parms.collect_md)
- rcu_assign_pointer(ign->collect_md_tun, t);
-
rcu_assign_pointer(t->next, rtnl_dereference(*tp));
rcu_assign_pointer(*tp, t);
}
@@ -284,9 +312,6 @@ static void ip6gre_tunnel_unlink(struct ip6gre_net *ign, struct ip6_tnl *t)
struct ip6_tnl __rcu **tp;
struct ip6_tnl *iter;
- if (t->parms.collect_md)
- rcu_assign_pointer(ign->collect_md_tun, NULL);
-
for (tp = ip6gre_bucket(ign, t);
(iter = rtnl_dereference(*tp)) != NULL;
tp = &iter->next) {
@@ -375,11 +400,23 @@ static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net,
return NULL;
}
+static void ip6erspan_tunnel_uninit(struct net_device *dev)
+{
+ struct ip6_tnl *t = netdev_priv(dev);
+ struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id);
+
+ ip6erspan_tunnel_unlink_md(ign, t);
+ ip6gre_tunnel_unlink(ign, t);
+ dst_cache_reset(&t->dst_cache);
+ dev_put(dev);
+}
+
static void ip6gre_tunnel_uninit(struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id);
+ ip6gre_tunnel_unlink_md(ign, t);
ip6gre_tunnel_unlink(ign, t);
dst_cache_reset(&t->dst_cache);
dev_put(dev);
@@ -1806,7 +1843,7 @@ static int ip6erspan_tap_init(struct net_device *dev)
static const struct net_device_ops ip6erspan_netdev_ops = {
.ndo_init = ip6erspan_tap_init,
- .ndo_uninit = ip6gre_tunnel_uninit,
+ .ndo_uninit = ip6erspan_tunnel_uninit,
.ndo_start_xmit = ip6erspan_tunnel_xmit,
.ndo_set_mac_address = eth_mac_addr,
.ndo_validate_addr = eth_validate_addr,
@@ -1875,8 +1912,6 @@ static int ip6gre_newlink_common(struct net *src_net, struct net_device *dev,
struct netlink_ext_ack *extack)
{
struct ip6_tnl *nt;
- struct net *net = dev_net(dev);
- struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
struct ip_tunnel_encap ipencap;
int err;
@@ -1889,16 +1924,6 @@ static int ip6gre_newlink_common(struct net *src_net, struct net_device *dev,
return err;
}
- ip6gre_netlink_parms(data, &nt->parms);
-
- if (nt->parms.collect_md) {
- if (rtnl_dereference(ign->collect_md_tun))
- return -EEXIST;
- } else {
- if (ip6gre_tunnel_find(net, &nt->parms, dev->type))
- return -EEXIST;
- }
-
if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
eth_hw_addr_random(dev);
@@ -1922,12 +1947,26 @@ static int ip6gre_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack)
{
- int err = ip6gre_newlink_common(src_net, dev, tb, data, extack);
struct ip6_tnl *nt = netdev_priv(dev);
struct net *net = dev_net(dev);
+ struct ip6gre_net *ign;
+ int err;
+
+ ip6gre_netlink_parms(data, &nt->parms);
+ ign = net_generic(net, ip6gre_net_id);
+
+ if (nt->parms.collect_md) {
+ if (rtnl_dereference(ign->collect_md_tun))
+ return -EEXIST;
+ } else {
+ if (ip6gre_tunnel_find(net, &nt->parms, dev->type))
+ return -EEXIST;
+ }
+ err = ip6gre_newlink_common(src_net, dev, tb, data, extack);
if (!err) {
ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]);
+ ip6gre_tunnel_link_md(ign, nt);
ip6gre_tunnel_link(net_generic(net, ip6gre_net_id), nt);
}
return err;
@@ -1979,8 +2018,10 @@ static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[],
if (IS_ERR(t))
return PTR_ERR(t);
+ ip6gre_tunnel_unlink_md(ign, t);
ip6gre_tunnel_unlink(ign, t);
ip6gre_tnl_change(t, &p, !tb[IFLA_MTU]);
+ ip6gre_tunnel_link_md(ign, t);
ip6gre_tunnel_link(ign, t);
return 0;
}
@@ -2134,12 +2175,26 @@ static int ip6erspan_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack)
{
- int err = ip6gre_newlink_common(src_net, dev, tb, data, extack);
struct ip6_tnl *nt = netdev_priv(dev);
struct net *net = dev_net(dev);
+ struct ip6gre_net *ign;
+ int err;
+
+ ip6gre_netlink_parms(data, &nt->parms);
+ ign = net_generic(net, ip6gre_net_id);
+
+ if (nt->parms.collect_md) {
+ if (rtnl_dereference(ign->collect_md_tun_erspan))
+ return -EEXIST;
+ } else {
+ if (ip6gre_tunnel_find(net, &nt->parms, dev->type))
+ return -EEXIST;
+ }
+ err = ip6gre_newlink_common(src_net, dev, tb, data, extack);
if (!err) {
ip6erspan_tnl_link_config(nt, !tb[IFLA_MTU]);
+ ip6erspan_tunnel_link_md(ign, nt);
ip6gre_tunnel_link(net_generic(net, ip6gre_net_id), nt);
}
return err;
@@ -2171,8 +2226,10 @@ static int ip6erspan_changelink(struct net_device *dev, struct nlattr *tb[],
if (IS_ERR(t))
return PTR_ERR(t);
+ ip6gre_tunnel_unlink_md(ign, t);
ip6gre_tunnel_unlink(ign, t);
ip6erspan_tnl_change(t, &p, !tb[IFLA_MTU]);
+ ip6erspan_tunnel_link_md(ign, t);
ip6gre_tunnel_link(ign, t);
return 0;
}
--
2.7.4
^ permalink raw reply related
* Re: [patch net-next 0/5] devlink: introduce port flavours and common phys_port_name generation
From: Florian Fainelli @ 2018-05-19 3:34 UTC (permalink / raw)
To: Jiri Pirko, netdev
Cc: davem, idosch, jakub.kicinski, mlxsw, andrew, vivien.didelot,
michael.chan, ganeshgr, saeedm, simon.horman,
pieter.jansenvanvuuren, john.hurley, dirk.vandermerwe,
alexander.h.duyck, ogerlitz, dsahern, vijaya.guvva,
satananda.burla, raghu.vatsavayi, felix.manlunas, gospo,
sathya.perla, vasundhara-v.volam, tariqt, eranbe,
jeffrey.t.kirsher, roopa
In-Reply-To: <20180518072904.29523-1-jiri@resnulli.us>
On 05/18/2018 12:28 AM, Jiri Pirko wrote:
> From: Jiri Pirko <jiri@mellanox.com>
>
> This patchset resolves 2 issues we have right now:
> 1) There are many netdevices / ports in the system, for port, pf, vf
> represenatation but the user has no way to see which is which
> 2) The ndo_get_phys_port_name is implemented in each driver separatelly,
> which may lead to inconsistent names between drivers.
>
> This patchset introduces port flavours which should address the first
> problem. In this initial patchset, I focus on DSA and their port
> flavours. As a follow-up, I plan to add PF and VF representor flavours.
> However, that needs additional dependencies in drivers (nfp, mlx5).
>
> The common phys_port_name generation is used by mlxsw. An example output
> for mlxsw looks like this:
>
> # devlink port
> ...
> pci/0000:03:00.0/59: type eth netdev enp3s0np4 flavour physical number 4
> pci/0000:03:00.0/61: type eth netdev enp3s0np1 flavour physical number 1
> pci/0000:03:00.0/63: type eth netdev enp3s0np2 flavour physical number 2
> pci/0000:03:00.0/49: type eth netdev enp3s0np8s0 flavour physical number 8 split_group 8 subport 0
> pci/0000:03:00.0/50: type eth netdev enp3s0np8s1 flavour physical number 8 split_group 8 subport 1
> pci/0000:03:00.0/51: type eth netdev enp3s0np8s2 flavour physical number 8 split_group 8 subport 2
> pci/0000:03:00.0/52: type eth netdev enp3s0np8s3 flavour physical number 8 split_group 8 subport 3
>
> As you can see, the netdev names are generated according to the flavour
> and port number. In case the port is split, the split subnumber is also
> included.
>
> An example output for dsa_loop testing module looks like this:
> # devlink port
> mdio_bus/fixed-0:1f/0: type eth netdev lan1 flavour physical number 0
> mdio_bus/fixed-0:1f/1: type eth netdev lan2 flavour physical number 1
> mdio_bus/fixed-0:1f/2: type eth netdev lan3 flavour physical number 2
> mdio_bus/fixed-0:1f/3: type eth netdev lan4 flavour physical number 3
> mdio_bus/fixed-0:1f/4: type notset
> mdio_bus/fixed-0:1f/5: type notset flavour cpu number 5
> mdio_bus/fixed-0:1f/6: type notset
> mdio_bus/fixed-0:1f/7: type notset
> mdio_bus/fixed-0:1f/8: type notset
> mdio_bus/fixed-0:1f/9: type notset
> mdio_bus/fixed-0:1f/10: type notset
> mdio_bus/fixed-0:1f/11: type notset
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Tested-by: Florian Fainelli <f.fainelli@gmail.com>
Thanks!
--
Florian
^ permalink raw reply
* Re: [patch net-next RFC 04/12] dsa: set devlink port attrs for dsa ports
From: Florian Fainelli @ 2018-05-19 3:11 UTC (permalink / raw)
To: Andrew Lunn, Jiri Pirko
Cc: netdev, davem, idosch, jakub.kicinski, mlxsw, vivien.didelot,
michael.chan, ganeshgr, saeedm, simon.horman,
pieter.jansenvanvuuren, john.hurley, dirk.vandermerwe,
alexander.h.duyck, ogerlitz, dsahern, vijaya.guvva,
satananda.burla, raghu.vatsavayi, felix.manlunas, gospo,
sathya.perla, vasundhara-v.volam, tariqt, eranbe,
jeffrey.t.kirsher
In-Reply-To: <20180518134504.GC20662@lunn.ch>
On 05/18/2018 06:45 AM, Andrew Lunn wrote:
>> What benefit does it have to register unused ports? What is a usecase
>> for them. Like Florian, I also think they should not be registered.
>
> Hi Jiri
>
> They physically exist, so we are accurately describing the hardware by
> registering them.
You are right that the driver is advertising a number of ports that does
not match what is being expected. We unfortunately do not have a good
API for specifying e.g: a sparse port allocation.
--
Florian
^ permalink raw reply
* Re: [PATCH] net: sched: don't disable bh when accessing action idr
From: Cong Wang @ 2018-05-19 2:59 UTC (permalink / raw)
To: Vlad Buslov
Cc: Linux Kernel Network Developers, Jamal Hadi Salim, Jiri Pirko,
David Miller, LKML
In-Reply-To: <1526658324-6570-1-git-send-email-vladbu@mellanox.com>
On Fri, May 18, 2018 at 8:45 AM, Vlad Buslov <vladbu@mellanox.com> wrote:
> Underlying implementation of action map has changed and doesn't require
> disabling bh anymore. Replace all action idr spinlock usage with regular
> calls that do not disable bh.
Please explain explicitly why it is not required, don't let people
dig, this would save everyone's time.
Also, this should be targeted for net-next, right?
Thanks.
^ permalink raw reply
* Re: [RFC v4 3/5] virtio_ring: add packed ring support
From: Tiwei Bie @ 2018-05-19 2:29 UTC (permalink / raw)
To: Jason Wang; +Cc: mst, virtualization, linux-kernel, netdev, wexu, jfreimann
In-Reply-To: <1a661df0-8ca9-b31d-9c17-8684d608a33a@redhat.com>
On Sat, May 19, 2018 at 09:12:30AM +0800, Jason Wang wrote:
> On 2018年05月18日 22:33, Tiwei Bie wrote:
> > On Fri, May 18, 2018 at 09:17:05PM +0800, Jason Wang wrote:
> > > On 2018年05月18日 19:29, Tiwei Bie wrote:
> > > > On Thu, May 17, 2018 at 08:01:52PM +0800, Jason Wang wrote:
> > > > > On 2018年05月16日 22:33, Tiwei Bie wrote:
> > > > > > On Wed, May 16, 2018 at 10:05:44PM +0800, Jason Wang wrote:
> > > > > > > On 2018年05月16日 21:45, Tiwei Bie wrote:
> > > > > > > > On Wed, May 16, 2018 at 08:51:43PM +0800, Jason Wang wrote:
> > > > > > > > > On 2018年05月16日 20:39, Tiwei Bie wrote:
> > > > > > > > > > On Wed, May 16, 2018 at 07:50:16PM +0800, Jason Wang wrote:
> > > > > > > > > > > On 2018年05月16日 16:37, Tiwei Bie wrote:
> > > > > > [...]
> > > > > > > > > > > > +static void detach_buf_packed(struct vring_virtqueue *vq, unsigned int head,
> > > > > > > > > > > > + unsigned int id, void **ctx)
> > > > > > > > > > > > +{
> > > > > > > > > > > > + struct vring_packed_desc *desc;
> > > > > > > > > > > > + unsigned int i, j;
> > > > > > > > > > > > +
> > > > > > > > > > > > + /* Clear data ptr. */
> > > > > > > > > > > > + vq->desc_state[id].data = NULL;
> > > > > > > > > > > > +
> > > > > > > > > > > > + i = head;
> > > > > > > > > > > > +
> > > > > > > > > > > > + for (j = 0; j < vq->desc_state[id].num; j++) {
> > > > > > > > > > > > + desc = &vq->vring_packed.desc[i];
> > > > > > > > > > > > + vring_unmap_one_packed(vq, desc);
> > > > > > > > > > > As mentioned in previous discussion, this probably won't work for the case
> > > > > > > > > > > of out of order completion since it depends on the information in the
> > > > > > > > > > > descriptor ring. We probably need to extend ctx to record such information.
> > > > > > > > > > Above code doesn't depend on the information in the descriptor
> > > > > > > > > > ring. The vq->desc_state[] is the extended ctx.
> > > > > > > > > >
> > > > > > > > > > Best regards,
> > > > > > > > > > Tiwei Bie
> > > > > > > > > Yes, but desc is a pointer to descriptor ring I think so
> > > > > > > > > vring_unmap_one_packed() still depends on the content of descriptor ring?
> > > > > > > > >
> > > > > > > > I got your point now. I think it makes sense to reserve
> > > > > > > > the bits of the addr field. Driver shouldn't try to get
> > > > > > > > addrs from the descriptors when cleanup the descriptors
> > > > > > > > no matter whether we support out-of-order or not.
> > > > > > > Maybe I was wrong, but I remember spec mentioned something like this.
> > > > > > You're right. Spec mentioned this. I was just repeating
> > > > > > the spec to emphasize that it does make sense. :)
> > > > > >
> > > > > > > > But combining it with the out-of-order support, it will
> > > > > > > > mean that the driver still needs to maintain a desc/ctx
> > > > > > > > list that is very similar to the desc ring in the split
> > > > > > > > ring. I'm not quite sure whether it's something we want.
> > > > > > > > If it is true, I'll do it. So do you think we also want
> > > > > > > > to maintain such a desc/ctx list for packed ring?
> > > > > > > To make it work for OOO backends I think we need something like this
> > > > > > > (hardware NIC drivers are usually have something like this).
> > > > > > Which hardware NIC drivers have this?
> > > > > It's quite common I think, e.g driver track e.g dma addr and page frag
> > > > > somewhere. e.g the ring->rx_info in mlx4 driver.
> > > > It seems that I had a misunderstanding on your
> > > > previous comments. I know it's quite common for
> > > > drivers to track e.g. DMA addrs somewhere (and
> > > > I think one reason behind this is that they want
> > > > to reuse the bits of addr field).
> > > Yes, we may want this for virtio-net as well in the future.
> > >
> > > > But tracking
> > > > addrs somewhere doesn't means supporting OOO.
> > > > I thought you were saying it's quite common for
> > > > hardware NIC drivers to support OOO (i.e. NICs
> > > > will return the descriptors OOO):
> > > >
> > > > I'm not familiar with mlx4, maybe I'm wrong.
> > > > I just had a quick glance. And I found below
> > > > comments in mlx4_en_process_rx_cq():
> > > >
> > > > ```
> > > > /* We assume a 1:1 mapping between CQEs and Rx descriptors, so Rx
> > > > * descriptor offset can be deduced from the CQE index instead of
> > > > * reading 'cqe->index' */
> > > > index = cq->mcq.cons_index & ring->size_mask;
> > > > cqe = mlx4_en_get_cqe(cq->buf, index, priv->cqe_size) + factor;
> > > > ```
> > > >
> > > > It seems that although they have a completion
> > > > queue, they are still using the ring in order.
> > > I guess so (at least from the above bits). Git grep -i "out of order" in
> > > drivers/net gives some hints. Looks like there're few deivces do this.
> > >
> > > > I guess maybe storage device may want OOO.
> > > Right, some iSCSI did.
> > >
> > > But tracking them elsewhere is not only for OOO.
> > >
> > > Spec said:
> > >
> > > for element address
> > >
> > > "
> > > In a used descriptor, Element Address is unused.
> > > "
> > >
> > > for Next flag:
> > >
> > > "
> > > For example, if descriptors are used in the same order in which they are
> > > made available, this will result in
> > > the used descriptor overwriting the first available descriptor in the list,
> > > the used descriptor for the next list
> > > overwriting the first available descriptor in the next list, etc.
> > > "
> > >
> > > for in order completion:
> > >
> > > "
> > > This will result in the used descriptor overwriting the first available
> > > descriptor in the batch, the used descriptor
> > > for the next batch overwriting the first available descriptor in the next
> > > batch, etc.
> > > "
> > >
> > > So:
> > >
> > > - It's an alignment to the spec
> > > - device may (or should) overwrite the descriptor make also make address
> > > field useless.
> > You didn't get my point...
>
> I don't hope so.
>
> > I agreed driver should track the DMA addrs or some
> > other necessary things from the very beginning. And
> > I also repeated the spec to emphasize that it does
> > make sense. And I'd like to do that.
> >
> > What I was saying is that, to support OOO, we may
> > need to manage these context (which saves DMA addrs
> > etc) via a list which is similar to the desc list
> > maintained via `next` in split ring instead of an
> > array whose elements always can be indexed directly.
>
> My point is these context is a must (not only for OOO).
Yeah, and I have the exactly same point after you
pointed that I shouldn't get the addrs from descs.
I do think it makes sense. I'll do it in the next
version. I don't have any doubt about it. All my
questions are about the OOO, instead of whether we
should save context or not. It just seems that you
thought I don't want to do it, and were trying to
convince me that I should do it.
>
> >
> > The desc ring in split ring is an array, but its
> > free entries are managed as list via next. I was
> > just wondering, do we want to manage such a list
> > because of OOO. It's just a very simple question
> > that I want to hear your opinion... (It doesn't
> > means anything, e.g. It doesn't mean I don't want
> > to support OOO. It's just a simple question...)
>
> So the question is yes. But I admit I don't have better idea other than what
> you propose here (something like split ring which is a little bit sad).
> Maybe Michael had.
Yeah, that's why I asked this question. It will
make the packed ring a bit similar to split ring
at least in the driver part. So I want to draw
your attention on this to make sure that we're
on the same page.
Best regards,
Tiwei Bie
>
> Thanks
>
> >
> > Best regards,
> > Tiwei Bie
> >
> > > Thanks
> > >
> > > > Best regards,
> > > > Tiwei Bie
> > > >
> > > > > Thanks
> > > > >
> > > > > > > Not for the patch, but it looks like having a OUT_OF_ORDER feature bit is
> > > > > > > much more simpler to be started with.
> > > > > > +1
> > > > > >
> > > > > > Best regards,
> > > > > > Tiwei Bie
>
^ permalink raw reply
* [PATCH bpf-next 5/5] selftests/bpf: Selftest for sys_sendmsg hooks
From: Andrey Ignatov @ 2018-05-19 2:21 UTC (permalink / raw)
To: netdev; +Cc: Andrey Ignatov, davem, ast, daniel, kernel-team
In-Reply-To: <cover.1526694154.git.rdna@fb.com>
Add selftest for BPF_CGROUP_UDP4_SENDMSG and BPF_CGROUP_UDP6_SENDMSG
attach types.
Try to sendmsg(2) to specific IP:port and test that:
* source IP is overridden as expected.
* remote IP:port pair is overridden as expected;
Both UDPv4 and UDPv6 are tested.
Output:
# test_sock_addr.sh 2>/dev/null
Wait for testing IPv4/IPv6 to become available ... OK
... pre-existing test-cases skipped ...
Test case: sendmsg4: load prog with wrong expected attach type .. [PASS]
Test case: sendmsg4: attach prog with wrong attach type .. [PASS]
Test case: sendmsg4: rewrite IP & port (asm) .. [PASS]
Test case: sendmsg4: rewrite IP & port (C) .. [PASS]
Test case: sendmsg4: deny call .. [PASS]
Test case: sendmsg6: load prog with wrong expected attach type .. [PASS]
Test case: sendmsg6: attach prog with wrong attach type .. [PASS]
Test case: sendmsg6: rewrite IP & port (asm) .. [PASS]
Test case: sendmsg6: rewrite IP & port (C) .. [PASS]
Test case: sendmsg6: deny call .. [PASS]
Summary: 26 PASSED, 0 FAILED
Signed-off-by: Andrey Ignatov <rdna@fb.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
tools/testing/selftests/bpf/Makefile | 3 +-
tools/testing/selftests/bpf/sendmsg4_prog.c | 49 +++
tools/testing/selftests/bpf/sendmsg6_prog.c | 60 ++++
tools/testing/selftests/bpf/test_sock_addr.c | 481 +++++++++++++++++++++++++++
4 files changed, 592 insertions(+), 1 deletion(-)
create mode 100644 tools/testing/selftests/bpf/sendmsg4_prog.c
create mode 100644 tools/testing/selftests/bpf/sendmsg6_prog.c
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 1eb0fa2..d87277a 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -33,7 +33,8 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test
sample_map_ret0.o test_tcpbpf_kern.o test_stacktrace_build_id.o \
sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o test_adjust_tail.o \
test_btf_haskv.o test_btf_nokv.o test_sockmap_kern.o test_tunnel_kern.o \
- test_get_stack_rawtp.o test_sockmap_kern.o test_sockhash_kern.o
+ test_get_stack_rawtp.o test_sockmap_kern.o test_sockhash_kern.o \
+ sendmsg4_prog.o sendmsg6_prog.o
# Order correspond to 'make run_tests' order
TEST_PROGS := test_kmod.sh \
diff --git a/tools/testing/selftests/bpf/sendmsg4_prog.c b/tools/testing/selftests/bpf/sendmsg4_prog.c
new file mode 100644
index 0000000..a91536b
--- /dev/null
+++ b/tools/testing/selftests/bpf/sendmsg4_prog.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+#include <sys/socket.h>
+
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define SRC1_IP4 0xAC100001U /* 172.16.0.1 */
+#define SRC2_IP4 0x00000000U
+#define SRC_REWRITE_IP4 0x7f000004U
+#define DST_IP4 0xC0A801FEU /* 192.168.1.254 */
+#define DST_REWRITE_IP4 0x7f000001U
+#define DST_PORT 4040
+#define DST_REWRITE_PORT4 4444
+
+int _version SEC("version") = 1;
+
+SEC("cgroup/sendmsg4")
+int sendmsg_v4_prog(struct bpf_sock_addr *ctx)
+{
+ if (ctx->type != SOCK_DGRAM)
+ return 0;
+
+ /* Rewrite source. */
+ if (ctx->msg_src_ip4 == bpf_htonl(SRC1_IP4) ||
+ ctx->msg_src_ip4 == bpf_htonl(SRC2_IP4)) {
+ ctx->msg_src_ip4 = bpf_htonl(SRC_REWRITE_IP4);
+ } else {
+ /* Unexpected source. Reject sendmsg. */
+ return 0;
+ }
+
+ /* Rewrite destination. */
+ if ((ctx->user_ip4 >> 24) == (bpf_htonl(DST_IP4) >> 24) &&
+ ctx->user_port == bpf_htons(DST_PORT)) {
+ ctx->user_ip4 = bpf_htonl(DST_REWRITE_IP4);
+ ctx->user_port = bpf_htons(DST_REWRITE_PORT4);
+ } else {
+ /* Unexpected source. Reject sendmsg. */
+ return 0;
+ }
+
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/sendmsg6_prog.c b/tools/testing/selftests/bpf/sendmsg6_prog.c
new file mode 100644
index 0000000..5aeaa28
--- /dev/null
+++ b/tools/testing/selftests/bpf/sendmsg6_prog.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+#include <sys/socket.h>
+
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define SRC_REWRITE_IP6_0 0
+#define SRC_REWRITE_IP6_1 0
+#define SRC_REWRITE_IP6_2 0
+#define SRC_REWRITE_IP6_3 6
+
+#define DST_REWRITE_IP6_0 0
+#define DST_REWRITE_IP6_1 0
+#define DST_REWRITE_IP6_2 0
+#define DST_REWRITE_IP6_3 1
+
+#define DST_REWRITE_PORT6 6666
+
+int _version SEC("version") = 1;
+
+SEC("cgroup/sendmsg6")
+int sendmsg_v6_prog(struct bpf_sock_addr *ctx)
+{
+ if (ctx->type != SOCK_DGRAM)
+ return 0;
+
+ /* Rewrite source. */
+ if (ctx->msg_src_ip6[3] == bpf_htonl(1) ||
+ ctx->msg_src_ip6[3] == bpf_htonl(0)) {
+ ctx->msg_src_ip6[0] = bpf_htonl(SRC_REWRITE_IP6_0);
+ ctx->msg_src_ip6[1] = bpf_htonl(SRC_REWRITE_IP6_1);
+ ctx->msg_src_ip6[2] = bpf_htonl(SRC_REWRITE_IP6_2);
+ ctx->msg_src_ip6[3] = bpf_htonl(SRC_REWRITE_IP6_3);
+ } else {
+ /* Unexpected source. Reject sendmsg. */
+ return 0;
+ }
+
+ /* Rewrite destination. */
+ if ((ctx->user_ip6[0] & 0xFFFF) == bpf_htons(0xFACE) &&
+ ctx->user_ip6[0] >> 16 == bpf_htons(0xB00C)) {
+ ctx->user_ip6[0] = bpf_htonl(DST_REWRITE_IP6_0);
+ ctx->user_ip6[1] = bpf_htonl(DST_REWRITE_IP6_1);
+ ctx->user_ip6[2] = bpf_htonl(DST_REWRITE_IP6_2);
+ ctx->user_ip6[3] = bpf_htonl(DST_REWRITE_IP6_3);
+
+ ctx->user_port = bpf_htons(DST_REWRITE_PORT6);
+ } else {
+ /* Unexpected destination. Reject sendmsg. */
+ return 0;
+ }
+
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c
index ed3e397..05a8f80 100644
--- a/tools/testing/selftests/bpf/test_sock_addr.c
+++ b/tools/testing/selftests/bpf/test_sock_addr.c
@@ -1,12 +1,16 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2018 Facebook
+#define _GNU_SOURCE
+
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <arpa/inet.h>
+#include <netinet/in.h>
#include <sys/types.h>
+#include <sys/select.h>
#include <sys/socket.h>
#include <linux/filter.h>
@@ -24,15 +28,19 @@
#define CG_PATH "/foo"
#define CONNECT4_PROG_PATH "./connect4_prog.o"
#define CONNECT6_PROG_PATH "./connect6_prog.o"
+#define SENDMSG4_PROG_PATH "./sendmsg4_prog.o"
+#define SENDMSG6_PROG_PATH "./sendmsg6_prog.o"
#define SERV4_IP "192.168.1.254"
#define SERV4_REWRITE_IP "127.0.0.1"
+#define SRC4_IP "172.16.0.1"
#define SRC4_REWRITE_IP "127.0.0.4"
#define SERV4_PORT 4040
#define SERV4_REWRITE_PORT 4444
#define SERV6_IP "face:b00c:1234:5678::abcd"
#define SERV6_REWRITE_IP "::1"
+#define SRC6_IP "::1"
#define SRC6_REWRITE_IP "::6"
#define SERV6_PORT 6060
#define SERV6_REWRITE_PORT 6666
@@ -65,6 +73,7 @@ struct sock_addr_test {
enum {
LOAD_REJECT,
ATTACH_REJECT,
+ SYSCALL_REJECT,
SUCCESS,
} expected_result;
};
@@ -73,6 +82,11 @@ static int bind4_prog_load(const struct sock_addr_test *test);
static int bind6_prog_load(const struct sock_addr_test *test);
static int connect4_prog_load(const struct sock_addr_test *test);
static int connect6_prog_load(const struct sock_addr_test *test);
+static int sendmsg_deny_prog_load(const struct sock_addr_test *test);
+static int sendmsg4_rw_asm_prog_load(const struct sock_addr_test *test);
+static int sendmsg4_rw_c_prog_load(const struct sock_addr_test *test);
+static int sendmsg6_rw_asm_prog_load(const struct sock_addr_test *test);
+static int sendmsg6_rw_c_prog_load(const struct sock_addr_test *test);
static struct sock_addr_test tests[] = {
/* bind */
@@ -302,6 +316,148 @@ static struct sock_addr_test tests[] = {
SRC6_REWRITE_IP,
SUCCESS,
},
+
+ /* sendmsg */
+ {
+ "sendmsg4: load prog with wrong expected attach type",
+ sendmsg4_rw_asm_prog_load,
+ BPF_CGROUP_UDP6_SENDMSG,
+ BPF_CGROUP_UDP4_SENDMSG,
+ AF_INET,
+ SOCK_DGRAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ LOAD_REJECT,
+ },
+ {
+ "sendmsg4: attach prog with wrong attach type",
+ sendmsg4_rw_asm_prog_load,
+ BPF_CGROUP_UDP4_SENDMSG,
+ BPF_CGROUP_UDP6_SENDMSG,
+ AF_INET,
+ SOCK_DGRAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ ATTACH_REJECT,
+ },
+ {
+ "sendmsg4: rewrite IP & port (asm)",
+ sendmsg4_rw_asm_prog_load,
+ BPF_CGROUP_UDP4_SENDMSG,
+ BPF_CGROUP_UDP4_SENDMSG,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SRC4_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ "sendmsg4: rewrite IP & port (C)",
+ sendmsg4_rw_c_prog_load,
+ BPF_CGROUP_UDP4_SENDMSG,
+ BPF_CGROUP_UDP4_SENDMSG,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SRC4_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ "sendmsg4: deny call",
+ sendmsg_deny_prog_load,
+ BPF_CGROUP_UDP4_SENDMSG,
+ BPF_CGROUP_UDP4_SENDMSG,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SRC4_REWRITE_IP,
+ SYSCALL_REJECT,
+ },
+ {
+ "sendmsg6: load prog with wrong expected attach type",
+ sendmsg6_rw_asm_prog_load,
+ BPF_CGROUP_UDP4_SENDMSG,
+ BPF_CGROUP_UDP6_SENDMSG,
+ AF_INET6,
+ SOCK_DGRAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ LOAD_REJECT,
+ },
+ {
+ "sendmsg6: attach prog with wrong attach type",
+ sendmsg6_rw_asm_prog_load,
+ BPF_CGROUP_UDP6_SENDMSG,
+ BPF_CGROUP_UDP4_SENDMSG,
+ AF_INET6,
+ SOCK_DGRAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ ATTACH_REJECT,
+ },
+ {
+ "sendmsg6: rewrite IP & port (asm)",
+ sendmsg6_rw_asm_prog_load,
+ BPF_CGROUP_UDP6_SENDMSG,
+ BPF_CGROUP_UDP6_SENDMSG,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ "sendmsg6: rewrite IP & port (C)",
+ sendmsg6_rw_c_prog_load,
+ BPF_CGROUP_UDP6_SENDMSG,
+ BPF_CGROUP_UDP6_SENDMSG,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ "sendmsg6: deny call",
+ sendmsg_deny_prog_load,
+ BPF_CGROUP_UDP6_SENDMSG,
+ BPF_CGROUP_UDP6_SENDMSG,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SYSCALL_REJECT,
+ },
};
static int mk_sockaddr(int domain, const char *ip, unsigned short port,
@@ -540,6 +696,130 @@ static int connect6_prog_load(const struct sock_addr_test *test)
return load_path(test, CONNECT6_PROG_PATH);
}
+static int sendmsg_deny_prog_load(const struct sock_addr_test *test)
+{
+ struct bpf_insn insns[] = {
+ /* return 0 */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ return load_insns(test, insns, sizeof(insns) / sizeof(struct bpf_insn));
+}
+
+static int sendmsg4_rw_asm_prog_load(const struct sock_addr_test *test)
+{
+ struct sockaddr_in dst4_rw_addr;
+ struct in_addr src4_rw_ip;
+
+ if (inet_pton(AF_INET, SRC4_REWRITE_IP, (void *)&src4_rw_ip) != 1) {
+ log_err("Invalid IPv4: %s", SRC4_REWRITE_IP);
+ return -1;
+ }
+
+ if (mk_sockaddr(AF_INET, SERV4_REWRITE_IP, SERV4_REWRITE_PORT,
+ (struct sockaddr *)&dst4_rw_addr,
+ sizeof(dst4_rw_addr)) == -1)
+ return -1;
+
+ struct bpf_insn insns[] = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+
+ /* if (sk.family == AF_INET && */
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock_addr, family)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET, 8),
+
+ /* sk.type == SOCK_DGRAM) { */
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock_addr, type)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_DGRAM, 6),
+
+ /* msg_src_ip4 = src4_rw_ip */
+ BPF_MOV32_IMM(BPF_REG_7, src4_rw_ip.s_addr),
+ BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
+ offsetof(struct bpf_sock_addr, msg_src_ip4)),
+
+ /* user_ip4 = dst4_rw_addr.sin_addr */
+ BPF_MOV32_IMM(BPF_REG_7, dst4_rw_addr.sin_addr.s_addr),
+ BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
+ offsetof(struct bpf_sock_addr, user_ip4)),
+
+ /* user_port = dst4_rw_addr.sin_port */
+ BPF_MOV32_IMM(BPF_REG_7, dst4_rw_addr.sin_port),
+ BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
+ offsetof(struct bpf_sock_addr, user_port)),
+ /* } */
+
+ /* return 1 */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ };
+
+ return load_insns(test, insns, sizeof(insns) / sizeof(struct bpf_insn));
+}
+
+static int sendmsg4_rw_c_prog_load(const struct sock_addr_test *test)
+{
+ return load_path(test, SENDMSG4_PROG_PATH);
+}
+
+static int sendmsg6_rw_asm_prog_load(const struct sock_addr_test *test)
+{
+ struct sockaddr_in6 dst6_rw_addr;
+ struct in6_addr src6_rw_ip;
+
+ if (inet_pton(AF_INET6, SRC6_REWRITE_IP, (void *)&src6_rw_ip) != 1) {
+ log_err("Invalid IPv6: %s", SRC6_REWRITE_IP);
+ return -1;
+ }
+
+ if (mk_sockaddr(AF_INET6, SERV6_REWRITE_IP, SERV6_REWRITE_PORT,
+ (struct sockaddr *)&dst6_rw_addr,
+ sizeof(dst6_rw_addr)) == -1)
+ return -1;
+
+ struct bpf_insn insns[] = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+
+ /* if (sk.family == AF_INET6) { */
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock_addr, family)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET6, 18),
+
+#define STORE_IPV6_WORD_N(DST, SRC, N) \
+ BPF_MOV32_IMM(BPF_REG_7, SRC[N]), \
+ BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7, \
+ offsetof(struct bpf_sock_addr, DST[N]))
+
+#define STORE_IPV6(DST, SRC) \
+ STORE_IPV6_WORD_N(DST, SRC, 0), \
+ STORE_IPV6_WORD_N(DST, SRC, 1), \
+ STORE_IPV6_WORD_N(DST, SRC, 2), \
+ STORE_IPV6_WORD_N(DST, SRC, 3)
+
+ STORE_IPV6(msg_src_ip6, src6_rw_ip.s6_addr32),
+ STORE_IPV6(user_ip6, dst6_rw_addr.sin6_addr.s6_addr32),
+
+ /* user_port = dst6_rw_addr.sin6_port */
+ BPF_MOV32_IMM(BPF_REG_7, dst6_rw_addr.sin6_port),
+ BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
+ offsetof(struct bpf_sock_addr, user_port)),
+
+ /* } */
+
+ /* return 1 */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ };
+
+ return load_insns(test, insns, sizeof(insns) / sizeof(struct bpf_insn));
+}
+
+static int sendmsg6_rw_c_prog_load(const struct sock_addr_test *test)
+{
+ return load_path(test, SENDMSG6_PROG_PATH);
+}
+
static int cmp_addr(const struct sockaddr_storage *addr1,
const struct sockaddr_storage *addr2, int cmp_port)
{
@@ -656,6 +936,135 @@ static int connect_to_server(int type, const struct sockaddr_storage *addr,
return fd;
}
+int init_pktinfo(int domain, struct cmsghdr *cmsg)
+{
+ struct in6_pktinfo *pktinfo6;
+ struct in_pktinfo *pktinfo4;
+
+ if (domain == AF_INET) {
+ cmsg->cmsg_level = SOL_IP;
+ cmsg->cmsg_type = IP_PKTINFO;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo));
+ pktinfo4 = (struct in_pktinfo *)CMSG_DATA(cmsg);
+ memset(pktinfo4, 0, sizeof(struct in_pktinfo));
+ if (inet_pton(domain, SRC4_IP,
+ (void *)&pktinfo4->ipi_spec_dst) != 1)
+ return -1;
+ } else if (domain == AF_INET6) {
+ cmsg->cmsg_level = SOL_IPV6;
+ cmsg->cmsg_type = IPV6_PKTINFO;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
+ pktinfo6 = (struct in6_pktinfo *)CMSG_DATA(cmsg);
+ memset(pktinfo6, 0, sizeof(struct in6_pktinfo));
+ if (inet_pton(domain, SRC6_IP,
+ (void *)&pktinfo6->ipi6_addr) != 1)
+ return -1;
+ } else {
+ return -1;
+ }
+
+ return 0;
+}
+
+static int sendmsg_to_server(const struct sockaddr_storage *addr,
+ socklen_t addr_len, int set_cmsg, int *syscall_err)
+{
+ union {
+ char buf[CMSG_SPACE(sizeof(struct in6_pktinfo))];
+ struct cmsghdr align;
+ } control6;
+ union {
+ char buf[CMSG_SPACE(sizeof(struct in_pktinfo))];
+ struct cmsghdr align;
+ } control4;
+ struct msghdr hdr;
+ struct iovec iov;
+ char data = 'a';
+ int domain;
+ int fd = -1;
+
+ domain = addr->ss_family;
+
+ if (domain != AF_INET && domain != AF_INET6) {
+ log_err("Unsupported address family");
+ goto err;
+ }
+
+ fd = socket(domain, SOCK_DGRAM, 0);
+ if (fd == -1) {
+ log_err("Failed to create client socket");
+ goto err;
+ }
+
+ memset(&iov, 0, sizeof(iov));
+ iov.iov_base = &data;
+ iov.iov_len = sizeof(data);
+
+ memset(&hdr, 0, sizeof(hdr));
+ hdr.msg_name = (void *)addr;
+ hdr.msg_namelen = addr_len;
+ hdr.msg_iov = &iov;
+ hdr.msg_iovlen = 1;
+
+ if (set_cmsg) {
+ if (domain == AF_INET) {
+ hdr.msg_control = &control4;
+ hdr.msg_controllen = sizeof(control4.buf);
+ } else if (domain == AF_INET6) {
+ hdr.msg_control = &control6;
+ hdr.msg_controllen = sizeof(control6.buf);
+ }
+ if (init_pktinfo(domain, CMSG_FIRSTHDR(&hdr))) {
+ log_err("Fail to init pktinfo");
+ goto err;
+ }
+ }
+
+ if (sendmsg(fd, &hdr, 0) != sizeof(data)) {
+ log_err("Fail to send message to server");
+ *syscall_err = errno;
+ goto err;
+ }
+
+ goto out;
+err:
+ close(fd);
+ fd = -1;
+out:
+ return fd;
+}
+
+static int recvmsg_from_client(int sockfd, struct sockaddr_storage *src_addr)
+{
+ struct timeval tv;
+ struct msghdr hdr;
+ struct iovec iov;
+ char data[64];
+ fd_set rfds;
+
+ FD_ZERO(&rfds);
+ FD_SET(sockfd, &rfds);
+
+ tv.tv_sec = 2;
+ tv.tv_usec = 0;
+
+ if (select(sockfd + 1, &rfds, NULL, NULL, &tv) <= 0 ||
+ !FD_ISSET(sockfd, &rfds))
+ return -1;
+
+ memset(&iov, 0, sizeof(iov));
+ iov.iov_base = data;
+ iov.iov_len = sizeof(data);
+
+ memset(&hdr, 0, sizeof(hdr));
+ hdr.msg_name = src_addr;
+ hdr.msg_namelen = sizeof(struct sockaddr_storage);
+ hdr.msg_iov = &iov;
+ hdr.msg_iovlen = 1;
+
+ return recvmsg(sockfd, &hdr, 0);
+}
+
static int init_addrs(const struct sock_addr_test *test,
struct sockaddr_storage *requested_addr,
struct sockaddr_storage *expected_addr,
@@ -753,6 +1162,69 @@ static int run_connect_test_case(const struct sock_addr_test *test)
return err;
}
+static int run_sendmsg_test_case(const struct sock_addr_test *test)
+{
+ socklen_t addr_len = sizeof(struct sockaddr_storage);
+ struct sockaddr_storage expected_src_addr;
+ struct sockaddr_storage requested_addr;
+ struct sockaddr_storage expected_addr;
+ struct sockaddr_storage real_src_addr;
+ int clientfd = -1;
+ int servfd = -1;
+ int set_cmsg;
+ int err = 0;
+
+ if (test->type != SOCK_DGRAM)
+ goto err;
+
+ if (init_addrs(test, &requested_addr, &expected_addr,
+ &expected_src_addr))
+ goto err;
+
+ /* Prepare server to sendmsg to */
+ servfd = start_server(test->type, &expected_addr, addr_len);
+ if (servfd == -1)
+ goto err;
+
+ for (set_cmsg = 0; set_cmsg <= 1; ++set_cmsg) {
+ if (clientfd >= 0)
+ close(clientfd);
+
+ clientfd = sendmsg_to_server(&requested_addr, addr_len,
+ set_cmsg, &err);
+ if (err)
+ goto out;
+ else if (clientfd == -1)
+ goto err;
+
+ /* Try to receive message on server instead of using
+ * getpeername(2) on client socket, to check that client's
+ * destination address was rewritten properly, since
+ * getpeername(2) doesn't work with unconnected datagram
+ * sockets.
+ *
+ * Get source address from recvmsg(2) as well to make sure
+ * source was rewritten properly: getsockname(2) can't be used
+ * since socket is unconnected and source defined for one
+ * specific packet may differ from the one used by default and
+ * returned by getsockname(2).
+ */
+ if (recvmsg_from_client(servfd, &real_src_addr) == -1)
+ goto err;
+
+ if (cmp_addr(&real_src_addr, &expected_src_addr, /*cmp_port*/0))
+ goto err;
+ }
+
+ goto out;
+err:
+ err = -1;
+out:
+ close(clientfd);
+ close(servfd);
+ return err;
+}
+
static int run_test_case(int cgfd, const struct sock_addr_test *test)
{
int progfd = -1;
@@ -784,10 +1256,19 @@ static int run_test_case(int cgfd, const struct sock_addr_test *test)
case BPF_CGROUP_INET6_CONNECT:
err = run_connect_test_case(test);
break;
+ case BPF_CGROUP_UDP4_SENDMSG:
+ case BPF_CGROUP_UDP6_SENDMSG:
+ err = run_sendmsg_test_case(test);
+ break;
default:
goto err;
}
+ if (test->expected_result == SYSCALL_REJECT && err == EPERM) {
+ err = 0; /* error was expected, reset it */
+ goto out;
+ }
+
if (err || test->expected_result != SUCCESS)
goto err;
--
2.9.5
^ permalink raw reply related
* [PATCH bpf-next 2/5] bpf: Sync bpf.h to tools/
From: Andrey Ignatov @ 2018-05-19 2:21 UTC (permalink / raw)
To: netdev; +Cc: Andrey Ignatov, davem, ast, daniel, kernel-team
In-Reply-To: <cover.1526694154.git.rdna@fb.com>
Sync new `BPF_CGROUP_UDP4_SENDMSG` and `BPF_CGROUP_UDP6_SENDMSG`
attach types to tools/.
Signed-off-by: Andrey Ignatov <rdna@fb.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
tools/include/uapi/linux/bpf.h | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 97446bb..b70ad2c 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -158,6 +158,8 @@ enum bpf_attach_type {
BPF_CGROUP_INET6_CONNECT,
BPF_CGROUP_INET4_POST_BIND,
BPF_CGROUP_INET6_POST_BIND,
+ BPF_CGROUP_UDP4_SENDMSG,
+ BPF_CGROUP_UDP6_SENDMSG,
__MAX_BPF_ATTACH_TYPE
};
@@ -2247,6 +2249,12 @@ struct bpf_sock_addr {
__u32 family; /* Allows 4-byte read, but no write */
__u32 type; /* Allows 4-byte read, but no write */
__u32 protocol; /* Allows 4-byte read, but no write */
+ __u32 msg_src_ip4; /* Allows 1,2,4-byte read an 4-byte write.
+ * Stored in network byte order.
+ */
+ __u32 msg_src_ip6[4]; /* Allows 1,2,4-byte read an 4-byte write.
+ * Stored in network byte order.
+ */
};
/* User bpf_sock_ops struct to access socket values and specify request ops
--
2.9.5
^ permalink raw reply related
* [PATCH bpf-next 4/5] selftests/bpf: Prepare test_sock_addr for extension
From: Andrey Ignatov @ 2018-05-19 2:21 UTC (permalink / raw)
To: netdev; +Cc: Andrey Ignatov, davem, ast, daniel, kernel-team
In-Reply-To: <cover.1526694154.git.rdna@fb.com>
test_sock_addr was not easy to extend since it was focused on sys_bind
and sys_connect quite a bit.
Reorganized it so that it'll be easier to cover new test-cases for
`BPF_PROG_TYPE_CGROUP_SOCK_ADDR`:
- decouple test-cases so that only one BPF prog is tested at a time;
- check programmatically that local IP:port for sys_bind, source IP and
destination IP:port for sys_connect are rewritten property by tested
BPF programs.
The output of new version:
# test_sock_addr.sh 2>/dev/null
Wait for testing IPv4/IPv6 to become available ... OK
Test case: bind4: load prog with wrong expected attach type .. [PASS]
Test case: bind4: attach prog with wrong attach type .. [PASS]
Test case: bind4: rewrite IP & TCP port in .. [PASS]
Test case: bind4: rewrite IP & UDP port in .. [PASS]
Test case: bind6: load prog with wrong expected attach type .. [PASS]
Test case: bind6: attach prog with wrong attach type .. [PASS]
Test case: bind6: rewrite IP & TCP port in .. [PASS]
Test case: bind6: rewrite IP & UDP port in .. [PASS]
Test case: connect4: load prog with wrong expected attach type .. [PASS]
Test case: connect4: attach prog with wrong attach type .. [PASS]
Test case: connect4: rewrite IP & TCP port .. [PASS]
Test case: connect4: rewrite IP & UDP port .. [PASS]
Test case: connect6: load prog with wrong expected attach type .. [PASS]
Test case: connect6: attach prog with wrong attach type .. [PASS]
Test case: connect6: rewrite IP & TCP port .. [PASS]
Test case: connect6: rewrite IP & UDP port .. [PASS]
Summary: 16 PASSED, 0 FAILED
(stderr contains errors from libbpf when testing load/attach with
invalid arguments)
Signed-off-by: Andrey Ignatov <rdna@fb.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
tools/testing/selftests/bpf/test_sock_addr.c | 655 +++++++++++++++++++--------
1 file changed, 460 insertions(+), 195 deletions(-)
diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c
index 2950f80..ed3e397 100644
--- a/tools/testing/selftests/bpf/test_sock_addr.c
+++ b/tools/testing/selftests/bpf/test_sock_addr.c
@@ -17,34 +17,292 @@
#include "cgroup_helpers.h"
#include "bpf_rlimit.h"
+#ifndef ARRAY_SIZE
+# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
#define CG_PATH "/foo"
#define CONNECT4_PROG_PATH "./connect4_prog.o"
#define CONNECT6_PROG_PATH "./connect6_prog.o"
#define SERV4_IP "192.168.1.254"
#define SERV4_REWRITE_IP "127.0.0.1"
+#define SRC4_REWRITE_IP "127.0.0.4"
#define SERV4_PORT 4040
#define SERV4_REWRITE_PORT 4444
#define SERV6_IP "face:b00c:1234:5678::abcd"
#define SERV6_REWRITE_IP "::1"
+#define SRC6_REWRITE_IP "::6"
#define SERV6_PORT 6060
#define SERV6_REWRITE_PORT 6666
#define INET_NTOP_BUF 40
-typedef int (*load_fn)(enum bpf_attach_type, const char *comment);
+struct sock_addr_test;
+
+typedef int (*load_fn)(const struct sock_addr_test *test);
typedef int (*info_fn)(int, struct sockaddr *, socklen_t *);
-struct program {
- enum bpf_attach_type type;
- load_fn loadfn;
- int fd;
- const char *name;
- enum bpf_attach_type invalid_type;
+char bpf_log_buf[BPF_LOG_BUF_SIZE];
+
+struct sock_addr_test {
+ const char *descr;
+ /* BPF prog properties */
+ load_fn loadfn;
+ enum bpf_attach_type expected_attach_type;
+ enum bpf_attach_type attach_type;
+ /* Socket properties */
+ int domain;
+ int type;
+ /* IP:port pairs for BPF prog to override */
+ const char *requested_ip;
+ unsigned short requested_port;
+ const char *expected_ip;
+ unsigned short expected_port;
+ const char *expected_src_ip;
+ /* Expected test result */
+ enum {
+ LOAD_REJECT,
+ ATTACH_REJECT,
+ SUCCESS,
+ } expected_result;
};
-char bpf_log_buf[BPF_LOG_BUF_SIZE];
+static int bind4_prog_load(const struct sock_addr_test *test);
+static int bind6_prog_load(const struct sock_addr_test *test);
+static int connect4_prog_load(const struct sock_addr_test *test);
+static int connect6_prog_load(const struct sock_addr_test *test);
+
+static struct sock_addr_test tests[] = {
+ /* bind */
+ {
+ "bind4: load prog with wrong expected attach type",
+ bind4_prog_load,
+ BPF_CGROUP_INET6_BIND,
+ BPF_CGROUP_INET4_BIND,
+ AF_INET,
+ SOCK_STREAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ LOAD_REJECT,
+ },
+ {
+ "bind4: attach prog with wrong attach type",
+ bind4_prog_load,
+ BPF_CGROUP_INET4_BIND,
+ BPF_CGROUP_INET6_BIND,
+ AF_INET,
+ SOCK_STREAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ ATTACH_REJECT,
+ },
+ {
+ "bind4: rewrite IP & TCP port in",
+ bind4_prog_load,
+ BPF_CGROUP_INET4_BIND,
+ BPF_CGROUP_INET4_BIND,
+ AF_INET,
+ SOCK_STREAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ "bind4: rewrite IP & UDP port in",
+ bind4_prog_load,
+ BPF_CGROUP_INET4_BIND,
+ BPF_CGROUP_INET4_BIND,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ "bind6: load prog with wrong expected attach type",
+ bind6_prog_load,
+ BPF_CGROUP_INET4_BIND,
+ BPF_CGROUP_INET6_BIND,
+ AF_INET6,
+ SOCK_STREAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ LOAD_REJECT,
+ },
+ {
+ "bind6: attach prog with wrong attach type",
+ bind6_prog_load,
+ BPF_CGROUP_INET6_BIND,
+ BPF_CGROUP_INET4_BIND,
+ AF_INET,
+ SOCK_STREAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ ATTACH_REJECT,
+ },
+ {
+ "bind6: rewrite IP & TCP port in",
+ bind6_prog_load,
+ BPF_CGROUP_INET6_BIND,
+ BPF_CGROUP_INET6_BIND,
+ AF_INET6,
+ SOCK_STREAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ "bind6: rewrite IP & UDP port in",
+ bind6_prog_load,
+ BPF_CGROUP_INET6_BIND,
+ BPF_CGROUP_INET6_BIND,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ NULL,
+ SUCCESS,
+ },
+
+ /* connect */
+ {
+ "connect4: load prog with wrong expected attach type",
+ connect4_prog_load,
+ BPF_CGROUP_INET6_CONNECT,
+ BPF_CGROUP_INET4_CONNECT,
+ AF_INET,
+ SOCK_STREAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ LOAD_REJECT,
+ },
+ {
+ "connect4: attach prog with wrong attach type",
+ connect4_prog_load,
+ BPF_CGROUP_INET4_CONNECT,
+ BPF_CGROUP_INET6_CONNECT,
+ AF_INET,
+ SOCK_STREAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ ATTACH_REJECT,
+ },
+ {
+ "connect4: rewrite IP & TCP port",
+ connect4_prog_load,
+ BPF_CGROUP_INET4_CONNECT,
+ BPF_CGROUP_INET4_CONNECT,
+ AF_INET,
+ SOCK_STREAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SRC4_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ "connect4: rewrite IP & UDP port",
+ connect4_prog_load,
+ BPF_CGROUP_INET4_CONNECT,
+ BPF_CGROUP_INET4_CONNECT,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SRC4_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ "connect6: load prog with wrong expected attach type",
+ connect6_prog_load,
+ BPF_CGROUP_INET4_CONNECT,
+ BPF_CGROUP_INET6_CONNECT,
+ AF_INET6,
+ SOCK_STREAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ LOAD_REJECT,
+ },
+ {
+ "connect6: attach prog with wrong attach type",
+ connect6_prog_load,
+ BPF_CGROUP_INET6_CONNECT,
+ BPF_CGROUP_INET4_CONNECT,
+ AF_INET,
+ SOCK_STREAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ ATTACH_REJECT,
+ },
+ {
+ "connect6: rewrite IP & TCP port",
+ connect6_prog_load,
+ BPF_CGROUP_INET6_CONNECT,
+ BPF_CGROUP_INET6_CONNECT,
+ AF_INET6,
+ SOCK_STREAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ "connect6: rewrite IP & UDP port",
+ connect6_prog_load,
+ BPF_CGROUP_INET6_CONNECT,
+ BPF_CGROUP_INET6_CONNECT,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SUCCESS,
+ },
+};
static int mk_sockaddr(int domain, const char *ip, unsigned short port,
struct sockaddr *addr, socklen_t addr_len)
@@ -84,25 +342,23 @@ static int mk_sockaddr(int domain, const char *ip, unsigned short port,
return 0;
}
-static int load_insns(enum bpf_attach_type attach_type,
- const struct bpf_insn *insns, size_t insns_cnt,
- const char *comment)
+static int load_insns(const struct sock_addr_test *test,
+ const struct bpf_insn *insns, size_t insns_cnt)
{
struct bpf_load_program_attr load_attr;
int ret;
memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
load_attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
- load_attr.expected_attach_type = attach_type;
+ load_attr.expected_attach_type = test->expected_attach_type;
load_attr.insns = insns;
load_attr.insns_cnt = insns_cnt;
load_attr.license = "GPL";
ret = bpf_load_program_xattr(&load_attr, bpf_log_buf, BPF_LOG_BUF_SIZE);
- if (ret < 0 && comment) {
- log_err(">>> Loading %s program error.\n"
- ">>> Output from verifier:\n%s\n-------\n",
- comment, bpf_log_buf);
+ if (ret < 0 && test->expected_result != LOAD_REJECT) {
+ log_err(">>> Loading program error.\n"
+ ">>> Verifier output:\n%s\n-------\n", bpf_log_buf);
}
return ret;
@@ -119,8 +375,7 @@ static int load_insns(enum bpf_attach_type attach_type,
* to count jumps properly.
*/
-static int bind4_prog_load(enum bpf_attach_type attach_type,
- const char *comment)
+static int bind4_prog_load(const struct sock_addr_test *test)
{
union {
uint8_t u4_addr8[4];
@@ -186,12 +441,10 @@ static int bind4_prog_load(enum bpf_attach_type attach_type,
BPF_EXIT_INSN(),
};
- return load_insns(attach_type, insns,
- sizeof(insns) / sizeof(struct bpf_insn), comment);
+ return load_insns(test, insns, sizeof(insns) / sizeof(struct bpf_insn));
}
-static int bind6_prog_load(enum bpf_attach_type attach_type,
- const char *comment)
+static int bind6_prog_load(const struct sock_addr_test *test)
{
struct sockaddr_in6 addr6_rw;
struct in6_addr ip6;
@@ -254,13 +507,10 @@ static int bind6_prog_load(enum bpf_attach_type attach_type,
BPF_EXIT_INSN(),
};
- return load_insns(attach_type, insns,
- sizeof(insns) / sizeof(struct bpf_insn), comment);
+ return load_insns(test, insns, sizeof(insns) / sizeof(struct bpf_insn));
}
-static int connect_prog_load_path(const char *path,
- enum bpf_attach_type attach_type,
- const char *comment)
+static int load_path(const struct sock_addr_test *test, const char *path)
{
struct bpf_prog_load_attr attr;
struct bpf_object *obj;
@@ -269,75 +519,83 @@ static int connect_prog_load_path(const char *path,
memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
attr.file = path;
attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
- attr.expected_attach_type = attach_type;
+ attr.expected_attach_type = test->expected_attach_type;
if (bpf_prog_load_xattr(&attr, &obj, &prog_fd)) {
- if (comment)
- log_err(">>> Loading %s program at %s error.\n",
- comment, path);
+ if (test->expected_result != LOAD_REJECT)
+ log_err(">>> Loading program (%s) error.\n", path);
return -1;
}
return prog_fd;
}
-static int connect4_prog_load(enum bpf_attach_type attach_type,
- const char *comment)
+static int connect4_prog_load(const struct sock_addr_test *test)
{
- return connect_prog_load_path(CONNECT4_PROG_PATH, attach_type, comment);
+ return load_path(test, CONNECT4_PROG_PATH);
}
-static int connect6_prog_load(enum bpf_attach_type attach_type,
- const char *comment)
+static int connect6_prog_load(const struct sock_addr_test *test)
{
- return connect_prog_load_path(CONNECT6_PROG_PATH, attach_type, comment);
+ return load_path(test, CONNECT6_PROG_PATH);
}
-static void print_ip_port(int sockfd, info_fn fn, const char *fmt)
+static int cmp_addr(const struct sockaddr_storage *addr1,
+ const struct sockaddr_storage *addr2, int cmp_port)
{
- char addr_buf[INET_NTOP_BUF];
- struct sockaddr_storage addr;
- struct sockaddr_in6 *addr6;
- struct sockaddr_in *addr4;
- socklen_t addr_len;
- unsigned short port;
- void *nip;
-
- addr_len = sizeof(struct sockaddr_storage);
- memset(&addr, 0, addr_len);
-
- if (fn(sockfd, (struct sockaddr *)&addr, (socklen_t *)&addr_len) == 0) {
- if (addr.ss_family == AF_INET) {
- addr4 = (struct sockaddr_in *)&addr;
- nip = (void *)&addr4->sin_addr;
- port = ntohs(addr4->sin_port);
- } else if (addr.ss_family == AF_INET6) {
- addr6 = (struct sockaddr_in6 *)&addr;
- nip = (void *)&addr6->sin6_addr;
- port = ntohs(addr6->sin6_port);
- } else {
- return;
- }
- const char *addr_str =
- inet_ntop(addr.ss_family, nip, addr_buf, INET_NTOP_BUF);
- printf(fmt, addr_str ? addr_str : "??", port);
+ const struct sockaddr_in *four1, *four2;
+ const struct sockaddr_in6 *six1, *six2;
+
+ if (addr1->ss_family != addr2->ss_family)
+ return -1;
+
+ if (addr1->ss_family == AF_INET) {
+ four1 = (const struct sockaddr_in *)addr1;
+ four2 = (const struct sockaddr_in *)addr2;
+ return !((four1->sin_port == four2->sin_port || !cmp_port) &&
+ four1->sin_addr.s_addr == four2->sin_addr.s_addr);
+ } else if (addr1->ss_family == AF_INET6) {
+ six1 = (const struct sockaddr_in6 *)addr1;
+ six2 = (const struct sockaddr_in6 *)addr2;
+ return !((six1->sin6_port == six2->sin6_port || !cmp_port) &&
+ !memcmp(&six1->sin6_addr, &six2->sin6_addr,
+ sizeof(struct in6_addr)));
}
+
+ return -1;
+}
+
+static int cmp_sock_addr(info_fn fn, int sock1,
+ const struct sockaddr_storage *addr2, int cmp_port)
+{
+ struct sockaddr_storage addr1;
+ socklen_t len1 = sizeof(addr1);
+
+ memset(&addr1, 0, len1);
+ if (fn(sock1, (struct sockaddr *)&addr1, (socklen_t *)&len1) != 0)
+ return -1;
+
+ return cmp_addr(&addr1, addr2, cmp_port);
+}
+
+static int cmp_local_ip(int sock1, const struct sockaddr_storage *addr2)
+{
+ return cmp_sock_addr(getsockname, sock1, addr2, /*cmp_port*/ 0);
}
-static void print_local_ip_port(int sockfd, const char *fmt)
+static int cmp_local_addr(int sock1, const struct sockaddr_storage *addr2)
{
- print_ip_port(sockfd, getsockname, fmt);
+ return cmp_sock_addr(getsockname, sock1, addr2, /*cmp_port*/ 1);
}
-static void print_remote_ip_port(int sockfd, const char *fmt)
+static int cmp_peer_addr(int sock1, const struct sockaddr_storage *addr2)
{
- print_ip_port(sockfd, getpeername, fmt);
+ return cmp_sock_addr(getpeername, sock1, addr2, /*cmp_port*/ 1);
}
static int start_server(int type, const struct sockaddr_storage *addr,
socklen_t addr_len)
{
-
int fd;
fd = socket(addr->ss_family, type, 0);
@@ -358,8 +616,6 @@ static int start_server(int type, const struct sockaddr_storage *addr,
}
}
- print_local_ip_port(fd, "\t Actual: bind(%s, %d)\n");
-
goto out;
close_out:
close(fd);
@@ -372,19 +628,19 @@ static int connect_to_server(int type, const struct sockaddr_storage *addr,
socklen_t addr_len)
{
int domain;
- int fd;
+ int fd = -1;
domain = addr->ss_family;
if (domain != AF_INET && domain != AF_INET6) {
log_err("Unsupported address family");
- return -1;
+ goto err;
}
fd = socket(domain, type, 0);
if (fd == -1) {
- log_err("Failed to creating client socket");
- return -1;
+ log_err("Failed to create client socket");
+ goto err;
}
if (connect(fd, (const struct sockaddr *)addr, addr_len) == -1) {
@@ -392,162 +648,188 @@ static int connect_to_server(int type, const struct sockaddr_storage *addr,
goto err;
}
- print_remote_ip_port(fd, "\t Actual: connect(%s, %d)");
- print_local_ip_port(fd, " from (%s, %d)\n");
-
- return 0;
+ goto out;
err:
close(fd);
- return -1;
+ fd = -1;
+out:
+ return fd;
}
-static void print_test_case_num(int domain, int type)
+static int init_addrs(const struct sock_addr_test *test,
+ struct sockaddr_storage *requested_addr,
+ struct sockaddr_storage *expected_addr,
+ struct sockaddr_storage *expected_src_addr)
{
- static int test_num;
-
- printf("Test case #%d (%s/%s):\n", ++test_num,
- (domain == AF_INET ? "IPv4" :
- domain == AF_INET6 ? "IPv6" :
- "unknown_domain"),
- (type == SOCK_STREAM ? "TCP" :
- type == SOCK_DGRAM ? "UDP" :
- "unknown_type"));
+ socklen_t addr_len = sizeof(struct sockaddr_storage);
+
+ if (mk_sockaddr(test->domain, test->expected_ip, test->expected_port,
+ (struct sockaddr *)expected_addr, addr_len) == -1)
+ goto err;
+
+ if (mk_sockaddr(test->domain, test->requested_ip, test->requested_port,
+ (struct sockaddr *)requested_addr, addr_len) == -1)
+ goto err;
+
+ if (test->expected_src_ip &&
+ mk_sockaddr(test->domain, test->expected_src_ip, 0,
+ (struct sockaddr *)expected_src_addr, addr_len) == -1)
+ goto err;
+
+ return 0;
+err:
+ return -1;
}
-static int run_test_case(int domain, int type, const char *ip,
- unsigned short port)
+static int run_bind_test_case(const struct sock_addr_test *test)
{
- struct sockaddr_storage addr;
- socklen_t addr_len = sizeof(addr);
+ socklen_t addr_len = sizeof(struct sockaddr_storage);
+ struct sockaddr_storage requested_addr;
+ struct sockaddr_storage expected_addr;
+ int clientfd = -1;
int servfd = -1;
int err = 0;
- print_test_case_num(domain, type);
-
- if (mk_sockaddr(domain, ip, port, (struct sockaddr *)&addr,
- addr_len) == -1)
- return -1;
+ if (init_addrs(test, &requested_addr, &expected_addr, NULL))
+ goto err;
- printf("\tRequested: bind(%s, %d) ..\n", ip, port);
- servfd = start_server(type, &addr, addr_len);
+ servfd = start_server(test->type, &requested_addr, addr_len);
if (servfd == -1)
goto err;
- printf("\tRequested: connect(%s, %d) from (*, *) ..\n", ip, port);
- if (connect_to_server(type, &addr, addr_len))
+ if (cmp_local_addr(servfd, &expected_addr))
+ goto err;
+
+ /* Try to connect to server just in case */
+ clientfd = connect_to_server(test->type, &expected_addr, addr_len);
+ if (clientfd == -1)
goto err;
goto out;
err:
err = -1;
out:
+ close(clientfd);
close(servfd);
return err;
}
-static void close_progs_fds(struct program *progs, size_t prog_cnt)
+static int run_connect_test_case(const struct sock_addr_test *test)
{
- size_t i;
+ socklen_t addr_len = sizeof(struct sockaddr_storage);
+ struct sockaddr_storage expected_src_addr;
+ struct sockaddr_storage requested_addr;
+ struct sockaddr_storage expected_addr;
+ int clientfd = -1;
+ int servfd = -1;
+ int err = 0;
- for (i = 0; i < prog_cnt; ++i) {
- close(progs[i].fd);
- progs[i].fd = -1;
- }
-}
+ if (init_addrs(test, &requested_addr, &expected_addr,
+ &expected_src_addr))
+ goto err;
-static int load_and_attach_progs(int cgfd, struct program *progs,
- size_t prog_cnt)
-{
- size_t i;
-
- for (i = 0; i < prog_cnt; ++i) {
- printf("Load %s with invalid type (can pollute stderr) ",
- progs[i].name);
- fflush(stdout);
- progs[i].fd = progs[i].loadfn(progs[i].invalid_type, NULL);
- if (progs[i].fd != -1) {
- log_err("Load with invalid type accepted for %s",
- progs[i].name);
- goto err;
- }
- printf("... REJECTED\n");
+ /* Prepare server to connect to */
+ servfd = start_server(test->type, &expected_addr, addr_len);
+ if (servfd == -1)
+ goto err;
- printf("Load %s with valid type", progs[i].name);
- progs[i].fd = progs[i].loadfn(progs[i].type, progs[i].name);
- if (progs[i].fd == -1) {
- log_err("Failed to load program %s", progs[i].name);
- goto err;
- }
- printf(" ... OK\n");
-
- printf("Attach %s with invalid type", progs[i].name);
- if (bpf_prog_attach(progs[i].fd, cgfd, progs[i].invalid_type,
- BPF_F_ALLOW_OVERRIDE) != -1) {
- log_err("Attach with invalid type accepted for %s",
- progs[i].name);
- goto err;
- }
- printf(" ... REJECTED\n");
+ clientfd = connect_to_server(test->type, &requested_addr, addr_len);
+ if (clientfd == -1)
+ goto err;
- printf("Attach %s with valid type", progs[i].name);
- if (bpf_prog_attach(progs[i].fd, cgfd, progs[i].type,
- BPF_F_ALLOW_OVERRIDE) == -1) {
- log_err("Failed to attach program %s", progs[i].name);
- goto err;
- }
- printf(" ... OK\n");
- }
+ /* Make sure src and dst addrs were overridden properly */
+ if (cmp_peer_addr(clientfd, &expected_addr))
+ goto err;
- return 0;
+ if (cmp_local_ip(clientfd, &expected_src_addr))
+ goto err;
+
+ goto out;
err:
- close_progs_fds(progs, prog_cnt);
- return -1;
+ err = -1;
+out:
+ close(clientfd);
+ close(servfd);
+ return err;
}
-static int run_domain_test(int domain, int cgfd, struct program *progs,
- size_t prog_cnt, const char *ip, unsigned short port)
+static int run_test_case(int cgfd, const struct sock_addr_test *test)
{
+ int progfd = -1;
int err = 0;
- if (load_and_attach_progs(cgfd, progs, prog_cnt) == -1)
+ printf("Test case: %s .. ", test->descr);
+
+ progfd = test->loadfn(test);
+ if (test->expected_result == LOAD_REJECT && progfd < 0)
+ goto out;
+ else if (test->expected_result == LOAD_REJECT || progfd < 0)
+ goto err;
+
+ err = bpf_prog_attach(progfd, cgfd, test->attach_type,
+ BPF_F_ALLOW_OVERRIDE);
+ if (test->expected_result == ATTACH_REJECT && err) {
+ err = 0; /* error was expected, reset it */
+ goto out;
+ } else if (test->expected_result == ATTACH_REJECT || err) {
goto err;
+ }
- if (run_test_case(domain, SOCK_STREAM, ip, port) == -1)
+ switch (test->attach_type) {
+ case BPF_CGROUP_INET4_BIND:
+ case BPF_CGROUP_INET6_BIND:
+ err = run_bind_test_case(test);
+ break;
+ case BPF_CGROUP_INET4_CONNECT:
+ case BPF_CGROUP_INET6_CONNECT:
+ err = run_connect_test_case(test);
+ break;
+ default:
goto err;
+ }
- if (run_test_case(domain, SOCK_DGRAM, ip, port) == -1)
+ if (err || test->expected_result != SUCCESS)
goto err;
goto out;
err:
err = -1;
out:
- close_progs_fds(progs, prog_cnt);
+ /* Detaching w/o checking return code: best effort attempt. */
+ if (progfd != -1)
+ bpf_prog_detach(cgfd, test->attach_type);
+ close(progfd);
+ printf("[%s]\n", err ? "FAIL" : "PASS");
return err;
}
-static int run_test(void)
+static int run_tests(int cgfd)
+{
+ int passes = 0;
+ int fails = 0;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(tests); ++i) {
+ if (run_test_case(cgfd, &tests[i]))
+ ++fails;
+ else
+ ++passes;
+ }
+ printf("Summary: %d PASSED, %d FAILED\n", passes, fails);
+ return fails ? -1 : 0;
+}
+
+int main(int argc, char **argv)
{
- size_t inet6_prog_cnt;
- size_t inet_prog_cnt;
int cgfd = -1;
int err = 0;
- struct program inet6_progs[] = {
- {BPF_CGROUP_INET6_BIND, bind6_prog_load, -1, "bind6",
- BPF_CGROUP_INET4_BIND},
- {BPF_CGROUP_INET6_CONNECT, connect6_prog_load, -1, "connect6",
- BPF_CGROUP_INET4_CONNECT},
- };
- inet6_prog_cnt = sizeof(inet6_progs) / sizeof(struct program);
-
- struct program inet_progs[] = {
- {BPF_CGROUP_INET4_BIND, bind4_prog_load, -1, "bind4",
- BPF_CGROUP_INET6_BIND},
- {BPF_CGROUP_INET4_CONNECT, connect4_prog_load, -1, "connect4",
- BPF_CGROUP_INET6_CONNECT},
- };
- inet_prog_cnt = sizeof(inet_progs) / sizeof(struct program);
+ if (argc < 2) {
+ fprintf(stderr,
+ "%s has to be run via %s.sh. Skip direct run.\n",
+ argv[0], argv[0]);
+ exit(err);
+ }
if (setup_cgroup_environment())
goto err;
@@ -559,12 +841,7 @@ static int run_test(void)
if (join_cgroup(CG_PATH))
goto err;
- if (run_domain_test(AF_INET, cgfd, inet_progs, inet_prog_cnt, SERV4_IP,
- SERV4_PORT) == -1)
- goto err;
-
- if (run_domain_test(AF_INET6, cgfd, inet6_progs, inet6_prog_cnt,
- SERV6_IP, SERV6_PORT) == -1)
+ if (run_tests(cgfd))
goto err;
goto out;
@@ -573,17 +850,5 @@ static int run_test(void)
out:
close(cgfd);
cleanup_cgroup_environment();
- printf(err ? "### FAIL\n" : "### SUCCESS\n");
return err;
}
-
-int main(int argc, char **argv)
-{
- if (argc < 2) {
- fprintf(stderr,
- "%s has to be run via %s.sh. Skip direct run.\n",
- argv[0], argv[0]);
- exit(0);
- }
- return run_test();
-}
--
2.9.5
^ permalink raw reply related
* [PATCH bpf-next 3/5] libbpf: Support guessing sendmsg{4,6} progs
From: Andrey Ignatov @ 2018-05-19 2:21 UTC (permalink / raw)
To: netdev; +Cc: Andrey Ignatov, davem, ast, daniel, kernel-team
In-Reply-To: <cover.1526694154.git.rdna@fb.com>
libbpf can guess prog type and expected attach type based on section
name. Add hints for "cgroup/sendmsg4" and "cgroup/sendmsg6" section
names.
Signed-off-by: Andrey Ignatov <rdna@fb.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
tools/lib/bpf/libbpf.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 3dbe217..f5238c5 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -2042,6 +2042,8 @@ static const struct {
BPF_SA_PROG_SEC("cgroup/bind6", BPF_CGROUP_INET6_BIND),
BPF_SA_PROG_SEC("cgroup/connect4", BPF_CGROUP_INET4_CONNECT),
BPF_SA_PROG_SEC("cgroup/connect6", BPF_CGROUP_INET6_CONNECT),
+ BPF_SA_PROG_SEC("cgroup/sendmsg4", BPF_CGROUP_UDP4_SENDMSG),
+ BPF_SA_PROG_SEC("cgroup/sendmsg6", BPF_CGROUP_UDP6_SENDMSG),
BPF_S_PROG_SEC("cgroup/post_bind4", BPF_CGROUP_INET4_POST_BIND),
BPF_S_PROG_SEC("cgroup/post_bind6", BPF_CGROUP_INET6_POST_BIND),
};
--
2.9.5
^ permalink raw reply related
* [PATCH bpf-next 1/5] bpf: Hooks for sys_sendmsg
From: Andrey Ignatov @ 2018-05-19 2:21 UTC (permalink / raw)
To: netdev; +Cc: Andrey Ignatov, davem, ast, daniel, kernel-team
In-Reply-To: <cover.1526694154.git.rdna@fb.com>
In addition to already existing BPF hooks for sys_bind and sys_connect,
the patch provides new hooks for sys_sendmsg.
It leverages existing BPF program type `BPF_PROG_TYPE_CGROUP_SOCK_ADDR`
that provides access to socket itlself (properties like family, type,
protocol) and user-passed `struct sockaddr *` so that BPF program can
override destination IP and port for system calls such as sendto(2) or
sendmsg(2) and/or assign source IP to the socket.
The hooks are implemented as two new attach types:
`BPF_CGROUP_UDP4_SENDMSG` and `BPF_CGROUP_UDP6_SENDMSG` for UDPv4 and
UDPv6 correspondingly.
UDPv4 and UDPv6 separate attach types for same reason as sys_bind and
sys_connect hooks, i.e. to prevent reading from / writing to e.g.
user_ip6 fields when user passes sockaddr_in since it'd be out-of-bound.
The difference with already existing hooks is sys_sendmsg are
implemented only for unconnected UDP.
For TCP it doesn't make sense to change user-provided `struct sockaddr *`
at sendto(2)/sendmsg(2) time since socket either was already connected
and has source/destination set or wasn't connected and call to
sendto(2)/sendmsg(2) would lead to ENOTCONN anyway.
Connected UDP is already handled by sys_connect hooks that can override
source/destination at connect time and use fast-path later, i.e. these
hooks don't affect UDP fast-path.
Rewriting source IP is implemented differently than that in sys_connect
hooks. When sys_sendmsg is used with unconnected UDP it doesn't work to
just bind socket to desired local IP address since source IP can be set
on per-packet basis by using ancillary data (cmsg(3)). So no matter if
socket is bound or not, source IP has to be rewritten on every call to
sys_sendmsg.
To do so two new fields are added to UAPI `struct bpf_sock_addr`;
* `msg_src_ip4` to set source IPv4 for UDPv4;
* `msg_src_ip6` to set source IPv6 for UDPv6.
Signed-off-by: Andrey Ignatov <rdna@fb.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
include/linux/bpf-cgroup.h | 23 +++++++++++++++++------
include/linux/filter.h | 1 +
include/uapi/linux/bpf.h | 8 ++++++++
kernel/bpf/cgroup.c | 11 ++++++++++-
kernel/bpf/syscall.c | 8 ++++++++
net/core/filter.c | 39 +++++++++++++++++++++++++++++++++++++++
net/ipv4/udp.c | 20 ++++++++++++++++++--
net/ipv6/udp.c | 17 +++++++++++++++++
8 files changed, 118 insertions(+), 9 deletions(-)
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 30d15e6..46f01ba 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -66,7 +66,8 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk,
int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
struct sockaddr *uaddr,
- enum bpf_attach_type type);
+ enum bpf_attach_type type,
+ void *t_ctx);
int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
struct bpf_sock_ops_kern *sock_ops,
@@ -120,16 +121,18 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
({ \
int __ret = 0; \
if (cgroup_bpf_enabled) \
- __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type); \
+ __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type, \
+ NULL); \
__ret; \
})
-#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, type) \
+#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, type, t_ctx) \
({ \
int __ret = 0; \
if (cgroup_bpf_enabled) { \
lock_sock(sk); \
- __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type); \
+ __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type, \
+ t_ctx); \
release_sock(sk); \
} \
__ret; \
@@ -151,10 +154,16 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET6_CONNECT)
#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) \
- BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET4_CONNECT)
+ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET4_CONNECT, NULL)
#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) \
- BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET6_CONNECT)
+ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET6_CONNECT, NULL)
+
+#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, t_ctx) \
+ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_UDP4_SENDMSG, t_ctx)
+
+#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) \
+ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_UDP6_SENDMSG, t_ctx)
#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) \
({ \
@@ -197,6 +206,8 @@ static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; }
#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr) ({ 0; })
#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; })
diff --git a/include/linux/filter.h b/include/linux/filter.h
index d358d18..d90abda 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1010,6 +1010,7 @@ struct bpf_sock_addr_kern {
* only two (src and dst) are available at convert_ctx_access time
*/
u64 tmp_reg;
+ void *t_ctx; /* Attach type specific context. */
};
struct bpf_sock_ops_kern {
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 97446bb..b70ad2c 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -158,6 +158,8 @@ enum bpf_attach_type {
BPF_CGROUP_INET6_CONNECT,
BPF_CGROUP_INET4_POST_BIND,
BPF_CGROUP_INET6_POST_BIND,
+ BPF_CGROUP_UDP4_SENDMSG,
+ BPF_CGROUP_UDP6_SENDMSG,
__MAX_BPF_ATTACH_TYPE
};
@@ -2247,6 +2249,12 @@ struct bpf_sock_addr {
__u32 family; /* Allows 4-byte read, but no write */
__u32 type; /* Allows 4-byte read, but no write */
__u32 protocol; /* Allows 4-byte read, but no write */
+ __u32 msg_src_ip4; /* Allows 1,2,4-byte read an 4-byte write.
+ * Stored in network byte order.
+ */
+ __u32 msg_src_ip6[4]; /* Allows 1,2,4-byte read an 4-byte write.
+ * Stored in network byte order.
+ */
};
/* User bpf_sock_ops struct to access socket values and specify request ops
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 43171a0..f7c00bd 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -500,6 +500,7 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
* @sk: sock struct that will use sockaddr
* @uaddr: sockaddr struct provided by user
* @type: The type of program to be exectuted
+ * @t_ctx: Pointer to attach type specific context
*
* socket is expected to be of type INET or INET6.
*
@@ -508,12 +509,15 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
*/
int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
struct sockaddr *uaddr,
- enum bpf_attach_type type)
+ enum bpf_attach_type type,
+ void *t_ctx)
{
struct bpf_sock_addr_kern ctx = {
.sk = sk,
.uaddr = uaddr,
+ .t_ctx = t_ctx,
};
+ struct sockaddr_storage unspec;
struct cgroup *cgrp;
int ret;
@@ -523,6 +527,11 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6)
return 0;
+ if (!ctx.uaddr) {
+ memset(&unspec, 0, sizeof(unspec));
+ ctx.uaddr = (struct sockaddr *)&unspec;
+ }
+
cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, BPF_PROG_RUN);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index bfcde94..11a5a95 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1247,6 +1247,8 @@ bpf_prog_load_check_attach_type(enum bpf_prog_type prog_type,
case BPF_CGROUP_INET6_BIND:
case BPF_CGROUP_INET4_CONNECT:
case BPF_CGROUP_INET6_CONNECT:
+ case BPF_CGROUP_UDP4_SENDMSG:
+ case BPF_CGROUP_UDP6_SENDMSG:
return 0;
default:
return -EINVAL;
@@ -1563,6 +1565,8 @@ static int bpf_prog_attach(const union bpf_attr *attr)
case BPF_CGROUP_INET6_BIND:
case BPF_CGROUP_INET4_CONNECT:
case BPF_CGROUP_INET6_CONNECT:
+ case BPF_CGROUP_UDP4_SENDMSG:
+ case BPF_CGROUP_UDP6_SENDMSG:
ptype = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
break;
case BPF_CGROUP_SOCK_OPS:
@@ -1633,6 +1637,8 @@ static int bpf_prog_detach(const union bpf_attr *attr)
case BPF_CGROUP_INET6_BIND:
case BPF_CGROUP_INET4_CONNECT:
case BPF_CGROUP_INET6_CONNECT:
+ case BPF_CGROUP_UDP4_SENDMSG:
+ case BPF_CGROUP_UDP6_SENDMSG:
ptype = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
break;
case BPF_CGROUP_SOCK_OPS:
@@ -1690,6 +1696,8 @@ static int bpf_prog_query(const union bpf_attr *attr,
case BPF_CGROUP_INET6_POST_BIND:
case BPF_CGROUP_INET4_CONNECT:
case BPF_CGROUP_INET6_CONNECT:
+ case BPF_CGROUP_UDP4_SENDMSG:
+ case BPF_CGROUP_UDP6_SENDMSG:
case BPF_CGROUP_SOCK_OPS:
case BPF_CGROUP_DEVICE:
break;
diff --git a/net/core/filter.c b/net/core/filter.c
index aec5eba..f696dc9 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5010,6 +5010,7 @@ static bool sock_addr_is_valid_access(int off, int size,
switch (prog->expected_attach_type) {
case BPF_CGROUP_INET4_BIND:
case BPF_CGROUP_INET4_CONNECT:
+ case BPF_CGROUP_UDP4_SENDMSG:
break;
default:
return false;
@@ -5019,6 +5020,24 @@ static bool sock_addr_is_valid_access(int off, int size,
switch (prog->expected_attach_type) {
case BPF_CGROUP_INET6_BIND:
case BPF_CGROUP_INET6_CONNECT:
+ case BPF_CGROUP_UDP6_SENDMSG:
+ break;
+ default:
+ return false;
+ }
+ break;
+ case bpf_ctx_range(struct bpf_sock_addr, msg_src_ip4):
+ switch (prog->expected_attach_type) {
+ case BPF_CGROUP_UDP4_SENDMSG:
+ break;
+ default:
+ return false;
+ }
+ break;
+ case bpf_ctx_range_till(struct bpf_sock_addr, msg_src_ip6[0],
+ msg_src_ip6[3]):
+ switch (prog->expected_attach_type) {
+ case BPF_CGROUP_UDP6_SENDMSG:
break;
default:
return false;
@@ -5029,6 +5048,9 @@ static bool sock_addr_is_valid_access(int off, int size,
switch (off) {
case bpf_ctx_range(struct bpf_sock_addr, user_ip4):
case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]):
+ case bpf_ctx_range(struct bpf_sock_addr, msg_src_ip4):
+ case bpf_ctx_range_till(struct bpf_sock_addr, msg_src_ip6[0],
+ msg_src_ip6[3]):
/* Only narrow read access allowed for now. */
if (type == BPF_READ) {
bpf_ctx_record_field_size(info, size_default);
@@ -5783,6 +5805,23 @@ static u32 sock_addr_convert_ctx_access(enum bpf_access_type type,
*insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg,
SK_FL_PROTO_SHIFT);
break;
+
+ case offsetof(struct bpf_sock_addr, msg_src_ip4):
+ /* Treat t_ctx as struct in_addr for msg_src_ip4. */
+ SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
+ struct bpf_sock_addr_kern, struct in_addr, t_ctx,
+ s_addr, BPF_SIZE(si->code), 0, tmp_reg);
+ break;
+
+ case bpf_ctx_range_till(struct bpf_sock_addr, msg_src_ip6[0],
+ msg_src_ip6[3]):
+ off = si->off;
+ off -= offsetof(struct bpf_sock_addr, msg_src_ip6[0]);
+ /* Treat t_ctx as struct in6_addr for msg_src_ip6. */
+ SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
+ struct bpf_sock_addr_kern, struct in6_addr, t_ctx,
+ s6_addr32[0], BPF_SIZE(si->code), off, tmp_reg);
+ break;
}
return insn - insn_buf;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index ff4d4ba..a1f9ba2 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -900,6 +900,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
{
struct inet_sock *inet = inet_sk(sk);
struct udp_sock *up = udp_sk(sk);
+ DECLARE_SOCKADDR(struct sockaddr_in *, usin, msg->msg_name);
struct flowi4 fl4_stack;
struct flowi4 *fl4;
int ulen = len;
@@ -954,8 +955,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
/*
* Get and verify the address.
*/
- if (msg->msg_name) {
- DECLARE_SOCKADDR(struct sockaddr_in *, usin, msg->msg_name);
+ if (usin) {
if (msg->msg_namelen < sizeof(*usin))
return -EINVAL;
if (usin->sin_family != AF_INET) {
@@ -1009,6 +1009,22 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
rcu_read_unlock();
}
+ if (!connected) {
+ err = BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk,
+ (struct sockaddr *)usin, &ipc.addr);
+ if (err)
+ goto out_free;
+ if (usin) {
+ if (usin->sin_port == 0) {
+ /* BPF program set invalid port. Reject it. */
+ err = -EINVAL;
+ goto out_free;
+ }
+ daddr = usin->sin_addr.s_addr;
+ dport = usin->sin_port;
+ }
+ }
+
saddr = ipc.addr;
ipc.addr = faddr = daddr;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 2839c1b..6f580ea 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1315,6 +1315,22 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
fl6.saddr = np->saddr;
fl6.fl6_sport = inet->inet_sport;
+ if (!connected) {
+ err = BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk,
+ (struct sockaddr *)sin6, &fl6.saddr);
+ if (err)
+ goto out_no_dst;
+ if (sin6) {
+ if (sin6->sin6_port == 0) {
+ /* BPF program set invalid port. Reject it. */
+ err = -EINVAL;
+ goto out_no_dst;
+ }
+ fl6.fl6_dport = sin6->sin6_port;
+ fl6.daddr = sin6->sin6_addr;
+ }
+ }
+
final_p = fl6_update_dst(&fl6, opt, &final);
if (final_p)
connected = false;
@@ -1394,6 +1410,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
out:
dst_release(dst);
+out_no_dst:
fl6_sock_release(flowlabel);
txopt_put(opt_to_free);
if (!err)
--
2.9.5
^ permalink raw reply related
* [PATCH bpf-next 0/5] bpf: Hooks for sys_sendmsg
From: Andrey Ignatov @ 2018-05-19 2:21 UTC (permalink / raw)
To: netdev; +Cc: Andrey Ignatov, davem, ast, daniel, kernel-team
This path set adds BPF hooks for sys_sendmsg similar to existing hooks for
sys_bind and sys_connect.
Hooks allow to override source IP (including the case when it's set via
cmsg(3)) and destination IP:port for unconnected UDP (slow path). TCP and
connected UDP (fast path) are not affected. This makes UDP support
complete: connected UDP is handled by sys_connect hooks, unconnected by
sys_sendmsg ones.
Similar to sys_connect hooks, sys_sendmsg ones can be used to make system
calls such as sendmsg(2) and sendto(2) return EPERM.
Please see patch 0001 for more details.
Andrey Ignatov (5):
bpf: Hooks for sys_sendmsg
bpf: Sync bpf.h to tools/
libbpf: Support guessing sendmsg{4,6} progs
selftests/bpf: Prepare test_sock_addr for extension
selftests/bpf: Selftest for sys_sendmsg hooks
include/linux/bpf-cgroup.h | 23 +-
include/linux/filter.h | 1 +
include/uapi/linux/bpf.h | 8 +
kernel/bpf/cgroup.c | 11 +-
kernel/bpf/syscall.c | 8 +
net/core/filter.c | 39 +
net/ipv4/udp.c | 20 +-
net/ipv6/udp.c | 17 +
tools/include/uapi/linux/bpf.h | 8 +
tools/lib/bpf/libbpf.c | 2 +
tools/testing/selftests/bpf/Makefile | 3 +-
tools/testing/selftests/bpf/sendmsg4_prog.c | 49 ++
tools/testing/selftests/bpf/sendmsg6_prog.c | 60 ++
tools/testing/selftests/bpf/test_sock_addr.c | 1118 +++++++++++++++++++++-----
14 files changed, 1171 insertions(+), 196 deletions(-)
create mode 100644 tools/testing/selftests/bpf/sendmsg4_prog.c
create mode 100644 tools/testing/selftests/bpf/sendmsg6_prog.c
--
2.9.5
^ permalink raw reply
* Re: [RFC v4 3/5] virtio_ring: add packed ring support
From: Jason Wang @ 2018-05-19 1:12 UTC (permalink / raw)
To: Tiwei Bie; +Cc: mst, virtualization, linux-kernel, netdev, wexu, jfreimann
In-Reply-To: <20180518143334.GA4537@debian>
On 2018年05月18日 22:33, Tiwei Bie wrote:
> On Fri, May 18, 2018 at 09:17:05PM +0800, Jason Wang wrote:
>> On 2018年05月18日 19:29, Tiwei Bie wrote:
>>> On Thu, May 17, 2018 at 08:01:52PM +0800, Jason Wang wrote:
>>>> On 2018年05月16日 22:33, Tiwei Bie wrote:
>>>>> On Wed, May 16, 2018 at 10:05:44PM +0800, Jason Wang wrote:
>>>>>> On 2018年05月16日 21:45, Tiwei Bie wrote:
>>>>>>> On Wed, May 16, 2018 at 08:51:43PM +0800, Jason Wang wrote:
>>>>>>>> On 2018年05月16日 20:39, Tiwei Bie wrote:
>>>>>>>>> On Wed, May 16, 2018 at 07:50:16PM +0800, Jason Wang wrote:
>>>>>>>>>> On 2018年05月16日 16:37, Tiwei Bie wrote:
>>>>> [...]
>>>>>>>>>>> +static void detach_buf_packed(struct vring_virtqueue *vq, unsigned int head,
>>>>>>>>>>> + unsigned int id, void **ctx)
>>>>>>>>>>> +{
>>>>>>>>>>> + struct vring_packed_desc *desc;
>>>>>>>>>>> + unsigned int i, j;
>>>>>>>>>>> +
>>>>>>>>>>> + /* Clear data ptr. */
>>>>>>>>>>> + vq->desc_state[id].data = NULL;
>>>>>>>>>>> +
>>>>>>>>>>> + i = head;
>>>>>>>>>>> +
>>>>>>>>>>> + for (j = 0; j < vq->desc_state[id].num; j++) {
>>>>>>>>>>> + desc = &vq->vring_packed.desc[i];
>>>>>>>>>>> + vring_unmap_one_packed(vq, desc);
>>>>>>>>>> As mentioned in previous discussion, this probably won't work for the case
>>>>>>>>>> of out of order completion since it depends on the information in the
>>>>>>>>>> descriptor ring. We probably need to extend ctx to record such information.
>>>>>>>>> Above code doesn't depend on the information in the descriptor
>>>>>>>>> ring. The vq->desc_state[] is the extended ctx.
>>>>>>>>>
>>>>>>>>> Best regards,
>>>>>>>>> Tiwei Bie
>>>>>>>> Yes, but desc is a pointer to descriptor ring I think so
>>>>>>>> vring_unmap_one_packed() still depends on the content of descriptor ring?
>>>>>>>>
>>>>>>> I got your point now. I think it makes sense to reserve
>>>>>>> the bits of the addr field. Driver shouldn't try to get
>>>>>>> addrs from the descriptors when cleanup the descriptors
>>>>>>> no matter whether we support out-of-order or not.
>>>>>> Maybe I was wrong, but I remember spec mentioned something like this.
>>>>> You're right. Spec mentioned this. I was just repeating
>>>>> the spec to emphasize that it does make sense. :)
>>>>>
>>>>>>> But combining it with the out-of-order support, it will
>>>>>>> mean that the driver still needs to maintain a desc/ctx
>>>>>>> list that is very similar to the desc ring in the split
>>>>>>> ring. I'm not quite sure whether it's something we want.
>>>>>>> If it is true, I'll do it. So do you think we also want
>>>>>>> to maintain such a desc/ctx list for packed ring?
>>>>>> To make it work for OOO backends I think we need something like this
>>>>>> (hardware NIC drivers are usually have something like this).
>>>>> Which hardware NIC drivers have this?
>>>> It's quite common I think, e.g driver track e.g dma addr and page frag
>>>> somewhere. e.g the ring->rx_info in mlx4 driver.
>>> It seems that I had a misunderstanding on your
>>> previous comments. I know it's quite common for
>>> drivers to track e.g. DMA addrs somewhere (and
>>> I think one reason behind this is that they want
>>> to reuse the bits of addr field).
>> Yes, we may want this for virtio-net as well in the future.
>>
>>> But tracking
>>> addrs somewhere doesn't means supporting OOO.
>>> I thought you were saying it's quite common for
>>> hardware NIC drivers to support OOO (i.e. NICs
>>> will return the descriptors OOO):
>>>
>>> I'm not familiar with mlx4, maybe I'm wrong.
>>> I just had a quick glance. And I found below
>>> comments in mlx4_en_process_rx_cq():
>>>
>>> ```
>>> /* We assume a 1:1 mapping between CQEs and Rx descriptors, so Rx
>>> * descriptor offset can be deduced from the CQE index instead of
>>> * reading 'cqe->index' */
>>> index = cq->mcq.cons_index & ring->size_mask;
>>> cqe = mlx4_en_get_cqe(cq->buf, index, priv->cqe_size) + factor;
>>> ```
>>>
>>> It seems that although they have a completion
>>> queue, they are still using the ring in order.
>> I guess so (at least from the above bits). Git grep -i "out of order" in
>> drivers/net gives some hints. Looks like there're few deivces do this.
>>
>>> I guess maybe storage device may want OOO.
>> Right, some iSCSI did.
>>
>> But tracking them elsewhere is not only for OOO.
>>
>> Spec said:
>>
>> for element address
>>
>> "
>> In a used descriptor, Element Address is unused.
>> "
>>
>> for Next flag:
>>
>> "
>> For example, if descriptors are used in the same order in which they are
>> made available, this will result in
>> the used descriptor overwriting the first available descriptor in the list,
>> the used descriptor for the next list
>> overwriting the first available descriptor in the next list, etc.
>> "
>>
>> for in order completion:
>>
>> "
>> This will result in the used descriptor overwriting the first available
>> descriptor in the batch, the used descriptor
>> for the next batch overwriting the first available descriptor in the next
>> batch, etc.
>> "
>>
>> So:
>>
>> - It's an alignment to the spec
>> - device may (or should) overwrite the descriptor make also make address
>> field useless.
> You didn't get my point...
I don't hope so.
> I agreed driver should track the DMA addrs or some
> other necessary things from the very beginning. And
> I also repeated the spec to emphasize that it does
> make sense. And I'd like to do that.
>
> What I was saying is that, to support OOO, we may
> need to manage these context (which saves DMA addrs
> etc) via a list which is similar to the desc list
> maintained via `next` in split ring instead of an
> array whose elements always can be indexed directly.
My point is these context is a must (not only for OOO).
>
> The desc ring in split ring is an array, but its
> free entries are managed as list via next. I was
> just wondering, do we want to manage such a list
> because of OOO. It's just a very simple question
> that I want to hear your opinion... (It doesn't
> means anything, e.g. It doesn't mean I don't want
> to support OOO. It's just a simple question...)
So the question is yes. But I admit I don't have better idea other than
what you propose here (something like split ring which is a little bit
sad). Maybe Michael had.
Thanks
>
> Best regards,
> Tiwei Bie
>
>> Thanks
>>
>>> Best regards,
>>> Tiwei Bie
>>>
>>>> Thanks
>>>>
>>>>>> Not for the patch, but it looks like having a OUT_OF_ORDER feature bit is
>>>>>> much more simpler to be started with.
>>>>> +1
>>>>>
>>>>> Best regards,
>>>>> Tiwei Bie
^ permalink raw reply
* Re: [PATCH net] tuntap: raise EPOLLOUT on device up
From: Jason Wang @ 2018-05-19 1:09 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: netdev, linux-kernel, Hannes Frederic Sowa, Eric Dumazet
In-Reply-To: <20180518172932-mutt-send-email-mst@kernel.org>
On 2018年05月18日 22:46, Michael S. Tsirkin wrote:
> On Fri, May 18, 2018 at 10:11:54PM +0800, Jason Wang wrote:
>>
>> On 2018年05月18日 22:06, Michael S. Tsirkin wrote:
>>> On Fri, May 18, 2018 at 10:00:31PM +0800, Jason Wang wrote:
>>>> On 2018年05月18日 21:26, Jason Wang wrote:
>>>>> On 2018年05月18日 21:13, Michael S. Tsirkin wrote:
>>>>>> On Fri, May 18, 2018 at 09:00:43PM +0800, Jason Wang wrote:
>>>>>>> We return -EIO on device down but can not raise EPOLLOUT after it was
>>>>>>> up. This may confuse user like vhost which expects tuntap to raise
>>>>>>> EPOLLOUT to re-enable its TX routine after tuntap is down. This could
>>>>>>> be easily reproduced by transmitting packets from VM while down and up
>>>>>>> the tap device. Fixing this by set SOCKWQ_ASYNC_NOSPACE on -EIO.
>>>>>>>
>>>>>>> Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
>>>>>>> Cc: Eric Dumazet <edumazet@google.com>
>>>>>>> Fixes: 1bd4978a88ac2 ("tun: honor IFF_UP in tun_get_user()")
>>>>>>> Signed-off-by: Jason Wang <jasowang@redhat.com>
>>>>>>> ---
>>>>>>> drivers/net/tun.c | 4 +++-
>>>>>>> 1 file changed, 3 insertions(+), 1 deletion(-)
>>>>>>>
>>>>>>> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
>>>>>>> index d45ac37..1b29761 100644
>>>>>>> --- a/drivers/net/tun.c
>>>>>>> +++ b/drivers/net/tun.c
>>>>>>> @@ -1734,8 +1734,10 @@ static ssize_t tun_get_user(struct
>>>>>>> tun_struct *tun, struct tun_file *tfile,
>>>>>>> int skb_xdp = 1;
>>>>>>> bool frags = tun_napi_frags_enabled(tun);
>>>>>>> - if (!(tun->dev->flags & IFF_UP))
>>>>>>> + if (!(tun->dev->flags & IFF_UP)) {
>>>>>> Isn't this racy? What if flag is cleared at this point?
>>>>> I think you mean "set at this point"? Then yes, so we probably need to
>>>>> set the bit during tun_net_close().
>>>>>
>>>>> Thanks
>>>> Looks no need, vhost will poll socket after it see EIO. So we are ok here?
>>>>
>>>> Thanks
>>> In fact I don't even understand why does this help any longer.
>>>
>> We disable tx polling and only enable it on demand for a better rx
>> performance. You may want to have a look at :
>>
>> commit feb8892cb441c742d4220cf7ced001e7fa070731
>> Author: Jason Wang <jasowang@redhat.com>
>> Date: Mon Nov 13 11:45:34 2017 +0800
>>
>> vhost_net: conditionally enable tx polling
>>
>> Thanks
>
> Question is, what looks at SOCKWQ_ASYNC_NOSPACE.
> I think it's tested when packet is transmitted,
> but there is no guarantee here any packet will
> ever be transmitted.
>
Well, actually, I do plan to disable vq polling from the beginning. But
looks like you do not want this:
See https://patchwork.kernel.org/patch/10034025/
Thanks
^ permalink raw reply
* Re: [PATCH 1/2] bpf: sockmap, double free in __sock_map_ctx_update_elem()
From: Gustavo A. R. Silva @ 2018-05-19 0:17 UTC (permalink / raw)
To: Dan Carpenter, Daniel Borkmann
Cc: Alexei Starovoitov, John Fastabend, netdev, kernel-janitors
In-Reply-To: <20180518143930.hopqsx3sbrbsxlfp@mwanda>
Hi Dan,
On 05/18/2018 09:39 AM, Dan Carpenter wrote:
> On Fri, May 18, 2018 at 10:27:18AM +0200, Daniel Borkmann wrote:
>>
>> Thanks for the two fixes, appreciate it! There were two similar ones that
>> fix the same issues which were already applied yesterday to bpf-next:
>>
>> https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/commit/?id=0e4364560361d57e8cd873a8990327f3471d7d8a
>> https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/commit/?id=a78622932c27e8ec33e5ba180f3d2e87fb806b28
>
> Hey Gustavo,
>
> We're sort of duplicating each other's work. Could you CC
> kernel-janitors@vger.kernel.org for static checker fixes so that I can
> see what you're working on?
>
Sure thing.
I've been doing this work for more than a year now and just recently we
are having these issues. I'm a bit curious about it.
> We'll probably still send the occasional duplicate which is fine...
>
Yep. Not a big deal for me.
Have a good one.
^ permalink raw reply
* [PATCH bpf-next 7/7] bpf: btf: Add tests for the btf uapi changes
From: Martin KaFai Lau @ 2018-05-19 0:16 UTC (permalink / raw)
To: netdev; +Cc: Alexei Starovoitov, Daniel Borkmann, kernel-team
In-Reply-To: <20180519001650.4043980-1-kafai@fb.com>
This patch does the followings:
1. Modify libbpf and test_btf to reflect the uapi changes in btf
2. Add test for the btf_header changes
3. Add tests for array->index_type
4. Add err_str check to the tests
5. Fix a 4 bytes hole in "struct test #1" by swapping "m" and "n"
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
---
tools/lib/bpf/bpf.c | 4 +-
tools/lib/bpf/bpf.h | 4 +-
tools/lib/bpf/btf.c | 5 +-
tools/lib/bpf/libbpf.c | 34 +--
tools/lib/bpf/libbpf.h | 4 +-
tools/testing/selftests/bpf/test_btf.c | 528 ++++++++++++++++++++++++++-------
6 files changed, 448 insertions(+), 131 deletions(-)
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 6a8a00097fd8..442b4cdfeb71 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -89,8 +89,8 @@ int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr)
min(name_len, BPF_OBJ_NAME_LEN - 1));
attr.numa_node = create_attr->numa_node;
attr.btf_fd = create_attr->btf_fd;
- attr.btf_key_id = create_attr->btf_key_id;
- attr.btf_value_id = create_attr->btf_value_id;
+ attr.btf_key_type_id = create_attr->btf_key_type_id;
+ attr.btf_value_type_id = create_attr->btf_value_type_id;
attr.map_ifindex = create_attr->map_ifindex;
return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 15bff7728cf1..d12344f66d4e 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -36,8 +36,8 @@ struct bpf_create_map_attr {
__u32 max_entries;
__u32 numa_node;
__u32 btf_fd;
- __u32 btf_key_id;
- __u32 btf_value_id;
+ __u32 btf_key_type_id;
+ __u32 btf_value_type_id;
__u32 map_ifindex;
};
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 2bac710e3194..8c54a4b6f187 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -35,9 +35,8 @@ struct btf {
static const char *btf_name_by_offset(const struct btf *btf, uint32_t offset)
{
- if (!BTF_STR_TBL_ELF_ID(offset) &&
- BTF_STR_OFFSET(offset) < btf->hdr->str_len)
- return &btf->strings[BTF_STR_OFFSET(offset)];
+ if (offset < btf->hdr->str_len)
+ return &btf->strings[offset];
else
return NULL;
}
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 3dbe217bf23e..8f1707dbfcfa 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -216,8 +216,8 @@ struct bpf_map {
size_t offset;
int map_ifindex;
struct bpf_map_def def;
- uint32_t btf_key_id;
- uint32_t btf_value_id;
+ uint32_t btf_key_type_id;
+ uint32_t btf_value_type_id;
void *priv;
bpf_map_clear_priv_t clear_priv;
};
@@ -1074,8 +1074,8 @@ static int bpf_map_find_btf_info(struct bpf_map *map, const struct btf *btf)
return -EINVAL;
}
- map->btf_key_id = key_id;
- map->btf_value_id = value_id;
+ map->btf_key_type_id = key_id;
+ map->btf_value_type_id = value_id;
return 0;
}
@@ -1100,24 +1100,24 @@ bpf_object__create_maps(struct bpf_object *obj)
create_attr.value_size = def->value_size;
create_attr.max_entries = def->max_entries;
create_attr.btf_fd = 0;
- create_attr.btf_key_id = 0;
- create_attr.btf_value_id = 0;
+ create_attr.btf_key_type_id = 0;
+ create_attr.btf_value_type_id = 0;
if (obj->btf && !bpf_map_find_btf_info(map, obj->btf)) {
create_attr.btf_fd = btf__fd(obj->btf);
- create_attr.btf_key_id = map->btf_key_id;
- create_attr.btf_value_id = map->btf_value_id;
+ create_attr.btf_key_type_id = map->btf_key_type_id;
+ create_attr.btf_value_type_id = map->btf_value_type_id;
}
*pfd = bpf_create_map_xattr(&create_attr);
- if (*pfd < 0 && create_attr.btf_key_id) {
+ if (*pfd < 0 && create_attr.btf_key_type_id) {
pr_warning("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
map->name, strerror(errno), errno);
create_attr.btf_fd = 0;
- create_attr.btf_key_id = 0;
- create_attr.btf_value_id = 0;
- map->btf_key_id = 0;
- map->btf_value_id = 0;
+ create_attr.btf_key_type_id = 0;
+ create_attr.btf_value_type_id = 0;
+ map->btf_key_type_id = 0;
+ map->btf_value_type_id = 0;
*pfd = bpf_create_map_xattr(&create_attr);
}
@@ -2085,14 +2085,14 @@ const char *bpf_map__name(struct bpf_map *map)
return map ? map->name : NULL;
}
-uint32_t bpf_map__btf_key_id(const struct bpf_map *map)
+uint32_t bpf_map__btf_key_type_id(const struct bpf_map *map)
{
- return map ? map->btf_key_id : 0;
+ return map ? map->btf_key_type_id : 0;
}
-uint32_t bpf_map__btf_value_id(const struct bpf_map *map)
+uint32_t bpf_map__btf_value_type_id(const struct bpf_map *map)
{
- return map ? map->btf_value_id : 0;
+ return map ? map->btf_value_type_id : 0;
}
int bpf_map__set_priv(struct bpf_map *map, void *priv,
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index cd3fd8d782c7..09976531aa74 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -244,8 +244,8 @@ bpf_map__next(struct bpf_map *map, struct bpf_object *obj);
int bpf_map__fd(struct bpf_map *map);
const struct bpf_map_def *bpf_map__def(struct bpf_map *map);
const char *bpf_map__name(struct bpf_map *map);
-uint32_t bpf_map__btf_key_id(const struct bpf_map *map);
-uint32_t bpf_map__btf_value_id(const struct bpf_map *map);
+uint32_t bpf_map__btf_key_type_id(const struct bpf_map *map);
+uint32_t bpf_map__btf_value_type_id(const struct bpf_map *map);
typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *);
int bpf_map__set_priv(struct bpf_map *map, void *priv,
diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c
index c8bceae7ec02..4635d5557639 100644
--- a/tools/testing/selftests/bpf/test_btf.c
+++ b/tools/testing/selftests/bpf/test_btf.c
@@ -113,22 +113,26 @@ static char btf_log_buf[BTF_LOG_BUF_SIZE];
static struct btf_header hdr_tmpl = {
.magic = BTF_MAGIC,
.version = BTF_VERSION,
+ .hdr_len = sizeof(struct btf_header),
};
struct btf_raw_test {
const char *descr;
const char *str_sec;
const char *map_name;
+ const char *err_str;
+ int (*special_test)(unsigned int test_num);
__u32 raw_types[MAX_NR_RAW_TYPES];
__u32 str_sec_size;
enum bpf_map_type map_type;
__u32 key_size;
__u32 value_size;
- __u32 key_id;
- __u32 value_id;
+ __u32 key_type_id;
+ __u32 value_type_id;
__u32 max_entries;
bool btf_load_err;
bool map_create_err;
+ int hdr_len_delta;
int type_off_delta;
int str_off_delta;
int str_len_delta;
@@ -141,8 +145,8 @@ static struct btf_raw_test raw_tests[] = {
* };
*
* struct A {
- * int m;
- * unsigned long long n;
+ * unsigned long long m;
+ * int n;
* char o;
* [3 bytes hole]
* int p[8];
@@ -160,21 +164,24 @@ static struct btf_raw_test raw_tests[] = {
/* char */
BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1), /* [3] */
/* int[8] */
- BTF_TYPE_ARRAY_ENC(1, 1, 8), /* [4] */
+ BTF_TYPE_ARRAY_ENC(1, 8, 8), /* [4] */
/* struct A { */ /* [5] */
BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 6), 180),
- BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int m; */
- BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* unsigned long long n;*/
+ BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* unsigned long long m;*/
+ BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n; */
BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o; */
BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8] */
BTF_MEMBER_ENC(NAME_TBD, 6, 384),/* int q[4][8] */
BTF_MEMBER_ENC(NAME_TBD, 7, 1408), /* enum E r */
/* } */
/* int[4][8] */
- BTF_TYPE_ARRAY_ENC(4, 1, 4), /* [6] */
+ BTF_TYPE_ARRAY_ENC(4, 8, 4), /* [6] */
+ /* enum E */ /* [7] */
BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 2), sizeof(int)),
BTF_ENUM_ENC(NAME_TBD, 0),
BTF_ENUM_ENC(NAME_TBD, 1),
+ /* unsigned int */ /* [8] */
+ BTF_TYPE_INT_ENC(0, 0, 0, 32, 4),
BTF_END_RAW,
},
.str_sec = "\0A\0m\0n\0o\0p\0q\0r\0E\0E0\0E1",
@@ -183,8 +190,8 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "struct_test1_map",
.key_size = sizeof(int),
.value_size = 180,
- .key_id = 1,
- .value_id = 5,
+ .key_type_id = 1,
+ .value_type_id = 5,
.max_entries = 4,
},
@@ -207,7 +214,7 @@ static struct btf_raw_test raw_tests[] = {
/* int */ /* [1] */
BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
/* struct b [4] */ /* [2] */
- BTF_TYPE_ARRAY_ENC(4, 1, 4),
+ BTF_TYPE_ARRAY_ENC(4, 9, 4),
/* struct A { */ /* [3] */
BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 3), 68),
@@ -229,7 +236,9 @@ static struct btf_raw_test raw_tests[] = {
/* const Struct_B */ /* [7] */
BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 6),
/* const Struct_B [4] */ /* [8] */
- BTF_TYPE_ARRAY_ENC(7, 1, 4),
+ BTF_TYPE_ARRAY_ENC(7, 9, 4),
+ /* unsigned int */ /* [9] */
+ BTF_TYPE_INT_ENC(0, 0, 0, 32, 4),
BTF_END_RAW,
},
.str_sec = "\0A\0m\0n\0o\0B\0m\0n\0Struct_B",
@@ -238,8 +247,8 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "struct_test2_map",
.key_size = sizeof(int),
.value_size = 68,
- .key_id = 1,
- .value_id = 3,
+ .key_type_id = 1,
+ .value_type_id = 3,
.max_entries = 4,
},
@@ -258,7 +267,7 @@ static struct btf_raw_test raw_tests[] = {
/* struct A { */ /* [2] */
BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), sizeof(int) * 2 - 1),
BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int m; */
- BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* int n; */
+ BTF_MEMBER_ENC(NAME_TBD, 1, 32),/* int n; */
/* } */
BTF_END_RAW,
},
@@ -268,10 +277,11 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "size_check1_map",
.key_size = sizeof(int),
.value_size = 1,
- .key_id = 1,
- .value_id = 2,
+ .key_type_id = 1,
+ .value_type_id = 2,
.max_entries = 4,
.btf_load_err = true,
+ .err_str = "Member exceeds struct_size",
},
/* Test member exeeds the size of struct
@@ -287,12 +297,14 @@ static struct btf_raw_test raw_tests[] = {
/* int */ /* [1] */
BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, sizeof(int)),
/* int[2] */ /* [2] */
- BTF_TYPE_ARRAY_ENC(1, 1, 2),
+ BTF_TYPE_ARRAY_ENC(1, 4, 2),
/* struct A { */ /* [3] */
BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), sizeof(int) * 3 - 1),
BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int m; */
BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* int n[2]; */
/* } */
+ /* unsigned int */ /* [4] */
+ BTF_TYPE_INT_ENC(0, 0, 0, 32, 4),
BTF_END_RAW,
},
.str_sec = "\0A\0m\0n",
@@ -301,11 +313,11 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "size_check2_map",
.key_size = sizeof(int),
.value_size = 1,
- .key_id = 1,
- .value_id = 3,
+ .key_type_id = 1,
+ .value_type_id = 3,
.max_entries = 4,
.btf_load_err = true,
-
+ .err_str = "Member exceeds struct_size",
},
/* Test member exeeds the size of struct
@@ -335,10 +347,11 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "size_check3_map",
.key_size = sizeof(int),
.value_size = 1,
- .key_id = 1,
- .value_id = 3,
+ .key_type_id = 1,
+ .value_type_id = 3,
.max_entries = 4,
.btf_load_err = true,
+ .err_str = "Member exceeds struct_size",
},
/* Test member exceeds the size of struct
@@ -376,10 +389,11 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "size_check4_map",
.key_size = sizeof(int),
.value_size = 1,
- .key_id = 1,
- .value_id = 3,
+ .key_type_id = 1,
+ .value_type_id = 3,
.max_entries = 4,
.btf_load_err = true,
+ .err_str = "Member exceeds struct_size",
},
/* typedef const void * const_void_ptr;
@@ -411,8 +425,8 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "void_test1_map",
.key_size = sizeof(int),
.value_size = sizeof(void *),
- .key_id = 1,
- .value_id = 4,
+ .key_type_id = 1,
+ .value_type_id = 4,
.max_entries = 4,
},
@@ -440,10 +454,11 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "void_test2_map",
.key_size = sizeof(int),
.value_size = sizeof(void *),
- .key_id = 1,
- .value_id = 3,
+ .key_type_id = 1,
+ .value_type_id = 3,
.max_entries = 4,
.btf_load_err = true,
+ .err_str = "Invalid member",
},
/* typedef const void * const_void_ptr;
@@ -458,10 +473,12 @@ static struct btf_raw_test raw_tests[] = {
BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0),
/* const void* */ /* [3] */
BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2),
- /* typedef const void * const_void_ptr */
+ /* typedef const void * const_void_ptr */ /* [4] */
BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 3),
- /* const_void_ptr[4] */ /* [4] */
- BTF_TYPE_ARRAY_ENC(3, 1, 4),
+ /* const_void_ptr[4] */ /* [5] */
+ BTF_TYPE_ARRAY_ENC(3, 6, 4),
+ /* unsigned int */ /* [6] */
+ BTF_TYPE_INT_ENC(0, 0, 0, 32, 4),
BTF_END_RAW,
},
.str_sec = "\0const_void_ptr",
@@ -470,8 +487,8 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "void_test3_map",
.key_size = sizeof(int),
.value_size = sizeof(void *) * 4,
- .key_id = 1,
- .value_id = 4,
+ .key_type_id = 1,
+ .value_type_id = 4,
.max_entries = 4,
},
@@ -484,7 +501,9 @@ static struct btf_raw_test raw_tests[] = {
/* const void */ /* [2] */
BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0),
/* const void[4] */ /* [3] */
- BTF_TYPE_ARRAY_ENC(2, 1, 4),
+ BTF_TYPE_ARRAY_ENC(2, 4, 4),
+ /* unsigned int */ /* [4] */
+ BTF_TYPE_INT_ENC(0, 0, 0, 32, 4),
BTF_END_RAW,
},
.str_sec = "\0A\0m",
@@ -493,10 +512,11 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "void_test4_map",
.key_size = sizeof(int),
.value_size = sizeof(void *) * 4,
- .key_id = 1,
- .value_id = 3,
+ .key_type_id = 1,
+ .value_type_id = 3,
.max_entries = 4,
.btf_load_err = true,
+ .err_str = "Invalid elem",
},
/* Array_A <------------------+
@@ -512,9 +532,11 @@ static struct btf_raw_test raw_tests[] = {
/* int */ /* [1] */
BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
/* Array_A */ /* [2] */
- BTF_TYPE_ARRAY_ENC(3, 1, 8),
+ BTF_TYPE_ARRAY_ENC(3, 4, 8),
/* Array_B */ /* [3] */
- BTF_TYPE_ARRAY_ENC(2, 1, 8),
+ BTF_TYPE_ARRAY_ENC(2, 4, 8),
+ /* unsigned int */ /* [4] */
+ BTF_TYPE_INT_ENC(0, 0, 0, 32, 4),
BTF_END_RAW,
},
.str_sec = "",
@@ -523,10 +545,11 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "loop_test1_map",
.key_size = sizeof(int),
.value_size = sizeof(sizeof(int) * 8),
- .key_id = 1,
- .value_id = 2,
+ .key_type_id = 1,
+ .value_type_id = 2,
.max_entries = 4,
.btf_load_err = true,
+ .err_str = "Loop detected",
},
/* typedef is _before_ the BTF type of Array_A and Array_B
@@ -548,10 +571,11 @@ static struct btf_raw_test raw_tests[] = {
/* typedef Array_B int_array */
BTF_TYPEDEF_ENC(1, 4), /* [2] */
/* Array_A */
- BTF_TYPE_ARRAY_ENC(2, 1, 8), /* [3] */
+ BTF_TYPE_ARRAY_ENC(2, 5, 8), /* [3] */
/* Array_B */
- BTF_TYPE_ARRAY_ENC(3, 1, 8), /* [4] */
-
+ BTF_TYPE_ARRAY_ENC(3, 5, 8), /* [4] */
+ /* unsigned int */ /* [5] */
+ BTF_TYPE_INT_ENC(0, 0, 0, 32, 4),
BTF_END_RAW,
},
.str_sec = "\0int_array\0",
@@ -560,10 +584,11 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "loop_test2_map",
.key_size = sizeof(int),
.value_size = sizeof(sizeof(int) * 8),
- .key_id = 1,
- .value_id = 2,
+ .key_type_id = 1,
+ .value_type_id = 2,
.max_entries = 4,
.btf_load_err = true,
+ .err_str = "Loop detected",
},
/* Array_A <------------------+
@@ -579,10 +604,11 @@ static struct btf_raw_test raw_tests[] = {
/* int */ /* [1] */
BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
/* Array_A */ /* [2] */
- BTF_TYPE_ARRAY_ENC(3, 1, 8),
+ BTF_TYPE_ARRAY_ENC(3, 4, 8),
/* Array_B */ /* [3] */
- BTF_TYPE_ARRAY_ENC(2, 1, 8),
-
+ BTF_TYPE_ARRAY_ENC(2, 4, 8),
+ /* unsigned int */ /* [4] */
+ BTF_TYPE_INT_ENC(0, 0, 0, 32, 4),
BTF_END_RAW,
},
.str_sec = "",
@@ -591,10 +617,11 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "loop_test3_map",
.key_size = sizeof(int),
.value_size = sizeof(sizeof(int) * 8),
- .key_id = 1,
- .value_id = 2,
+ .key_type_id = 1,
+ .value_type_id = 2,
.max_entries = 4,
.btf_load_err = true,
+ .err_str = "Loop detected",
},
/* typedef is _between_ the BTF type of Array_A and Array_B
@@ -614,11 +641,13 @@ static struct btf_raw_test raw_tests[] = {
/* int */ /* [1] */
BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
/* Array_A */ /* [2] */
- BTF_TYPE_ARRAY_ENC(3, 1, 8),
+ BTF_TYPE_ARRAY_ENC(3, 5, 8),
/* typedef Array_B int_array */ /* [3] */
BTF_TYPEDEF_ENC(NAME_TBD, 4),
/* Array_B */ /* [4] */
- BTF_TYPE_ARRAY_ENC(2, 1, 8),
+ BTF_TYPE_ARRAY_ENC(2, 5, 8),
+ /* unsigned int */ /* [5] */
+ BTF_TYPE_INT_ENC(0, 0, 0, 32, 4),
BTF_END_RAW,
},
.str_sec = "\0int_array\0",
@@ -627,10 +656,11 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "loop_test4_map",
.key_size = sizeof(int),
.value_size = sizeof(sizeof(int) * 8),
- .key_id = 1,
- .value_id = 2,
+ .key_type_id = 1,
+ .value_type_id = 2,
.max_entries = 4,
.btf_load_err = true,
+ .err_str = "Loop detected",
},
/* typedef struct B Struct_B
@@ -668,10 +698,11 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "loop_test5_map",
.key_size = sizeof(int),
.value_size = 8,
- .key_id = 1,
- .value_id = 2,
+ .key_type_id = 1,
+ .value_type_id = 2,
.max_entries = 4,
.btf_load_err = true,
+ .err_str = "Loop detected",
},
/* struct A {
@@ -684,11 +715,13 @@ static struct btf_raw_test raw_tests[] = {
.raw_types = {
/* int */
BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
- BTF_TYPE_ARRAY_ENC(3, 1, 4), /* [2] */
+ BTF_TYPE_ARRAY_ENC(3, 4, 4), /* [2] */
/* struct A */ /* [3] */
BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8),
BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int x; */
BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* struct A array_a[4]; */
+ /* unsigned int */ /* [4] */
+ BTF_TYPE_INT_ENC(0, 0, 0, 32, 4),
BTF_END_RAW,
},
.str_sec = "\0A\0x\0y",
@@ -697,10 +730,11 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "loop_test6_map",
.key_size = sizeof(int),
.value_size = 8,
- .key_id = 1,
- .value_id = 2,
+ .key_type_id = 1,
+ .value_type_id = 2,
.max_entries = 4,
.btf_load_err = true,
+ .err_str = "Loop detected",
},
{
@@ -724,10 +758,11 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "loop_test7_map",
.key_size = sizeof(int),
.value_size = sizeof(void *),
- .key_id = 1,
- .value_id = 2,
+ .key_type_id = 1,
+ .value_type_id = 2,
.max_entries = 4,
.btf_load_err = true,
+ .err_str = "Loop detected",
},
{
@@ -759,14 +794,73 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "loop_test8_map",
.key_size = sizeof(int),
.value_size = sizeof(void *),
- .key_id = 1,
- .value_id = 2,
+ .key_type_id = 1,
+ .value_type_id = 2,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Loop detected",
+},
+
+{
+ .descr = "string section does not end with null",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0int",
+ .str_sec_size = sizeof("\0int") - 1,
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "hdr_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid string section",
+},
+
+{
+ .descr = "empty string section",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = 0,
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "hdr_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid string section",
+},
+
+{
+ .descr = "empty type section",
+ .raw_types = {
+ BTF_END_RAW,
+ },
+ .str_sec = "\0int",
+ .str_sec_size = sizeof("\0int"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "hdr_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
.max_entries = 4,
.btf_load_err = true,
+ .err_str = "No type found",
},
{
- .descr = "type_off == str_off",
+ .descr = "btf_header test #1. Longer hdr_len",
.raw_types = {
/* int */ /* [1] */
BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
@@ -778,15 +872,16 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "hdr_test_map",
.key_size = sizeof(int),
.value_size = sizeof(int),
- .key_id = 1,
- .value_id = 1,
+ .key_type_id = 1,
+ .value_type_id = 1,
.max_entries = 4,
.btf_load_err = true,
- .type_off_delta = sizeof(struct btf_type) + sizeof(int) + sizeof("\0int"),
+ .hdr_len_delta = 4,
+ .err_str = "Unsupported btf_header",
},
{
- .descr = "Unaligned type_off",
+ .descr = "btf_header test #2. Gap between hdr and type",
.raw_types = {
/* int */ /* [1] */
BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
@@ -798,15 +893,16 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "hdr_test_map",
.key_size = sizeof(int),
.value_size = sizeof(int),
- .key_id = 1,
- .value_id = 1,
+ .key_type_id = 1,
+ .value_type_id = 1,
.max_entries = 4,
.btf_load_err = true,
- .type_off_delta = 1,
+ .type_off_delta = 4,
+ .err_str = "Unsupported section found",
},
{
- .descr = "str_off beyonds btf size",
+ .descr = "btf_header test #3. Gap between type and str",
.raw_types = {
/* int */ /* [1] */
BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
@@ -818,15 +914,16 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "hdr_test_map",
.key_size = sizeof(int),
.value_size = sizeof(int),
- .key_id = 1,
- .value_id = 1,
+ .key_type_id = 1,
+ .value_type_id = 1,
.max_entries = 4,
.btf_load_err = true,
- .str_off_delta = sizeof("\0int") + 1,
+ .str_off_delta = 4,
+ .err_str = "Unsupported section found",
},
{
- .descr = "str_len beyonds btf size",
+ .descr = "btf_header test #4. Overlap between type and str",
.raw_types = {
/* int */ /* [1] */
BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
@@ -838,15 +935,16 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "hdr_test_map",
.key_size = sizeof(int),
.value_size = sizeof(int),
- .key_id = 1,
- .value_id = 1,
+ .key_type_id = 1,
+ .value_type_id = 1,
.max_entries = 4,
.btf_load_err = true,
- .str_len_delta = 1,
+ .str_off_delta = -4,
+ .err_str = "Section overlap found",
},
{
- .descr = "String section does not end with null",
+ .descr = "btf_header test #5. Larger BTF size",
.raw_types = {
/* int */ /* [1] */
BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
@@ -858,15 +956,16 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "hdr_test_map",
.key_size = sizeof(int),
.value_size = sizeof(int),
- .key_id = 1,
- .value_id = 1,
+ .key_type_id = 1,
+ .value_type_id = 1,
.max_entries = 4,
.btf_load_err = true,
- .str_len_delta = -1,
+ .str_len_delta = -4,
+ .err_str = "Unsupported section found",
},
{
- .descr = "Empty string section",
+ .descr = "btf_header test #6. Smaller BTF size",
.raw_types = {
/* int */ /* [1] */
BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
@@ -878,11 +977,223 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "hdr_test_map",
.key_size = sizeof(int),
.value_size = sizeof(int),
- .key_id = 1,
- .value_id = 1,
+ .key_type_id = 1,
+ .value_type_id = 1,
.max_entries = 4,
.btf_load_err = true,
- .str_len_delta = 0 - (int)sizeof("\0int"),
+ .str_len_delta = 4,
+ .err_str = "Total section length too long",
+},
+
+{
+ .descr = "array test #1. index_type \"unsigned int\"",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* unsigned int */ /* [2] */
+ BTF_TYPE_INT_ENC(0, 0, 0, 32, 4),
+ /* unsigned int[16] */ /* [3] */
+ BTF_TYPE_ARRAY_ENC(1, 2, 16),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "array_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+},
+
+{
+ .descr = "array test #2. index_type \"const unsigned int\"",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* unsigned int */ /* [2] */
+ BTF_TYPE_INT_ENC(0, 0, 0, 32, 4),
+ /* int[16] */ /* [3] */
+ BTF_TYPE_ARRAY_ENC(1, 4, 16),
+ /* CONST type_id=2 */ /* [4] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 2),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "array_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+},
+
+{
+ .descr = "array test #3. index_type \"int\"",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* int[16] */ /* [2] */
+ BTF_TYPE_ARRAY_ENC(1, 1, 16),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "array_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid index",
+},
+
+{
+ .descr = "array test #3. index_type \"const int\"",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* int[16] */ /* [2] */
+ BTF_TYPE_ARRAY_ENC(1, 3, 16),
+ /* CONST type_id=1 */ /* [3] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 1),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "array_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid index",
+},
+
+{
+ .descr = "array test #4. index_type \"void\"",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* int[16] */ /* [2] */
+ BTF_TYPE_ARRAY_ENC(1, 0, 16),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "array_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid index",
+},
+
+{
+ .descr = "array test #5. index_type \"const void\"",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* int[16] */ /* [2] */
+ BTF_TYPE_ARRAY_ENC(1, 3, 16),
+ /* CONST type_id=0 (void) */ /* [3] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "array_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid index",
+},
+
+{
+ .descr = "array test #6. elem_type \"const void *\"",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* const void *[16] */ /* [2] */
+ BTF_TYPE_ARRAY_ENC(3, 5, 16),
+ /* CONST type_id=4 */ /* [3] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 4),
+ /* void* */ /* [4] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 0),
+ /* unsigned int */ /* [5] */
+ BTF_TYPE_INT_ENC(0, 0, 0, 32, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "array_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+},
+
+{
+ .descr = "array test #7. index_type \"const void *\"",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* const void *[16] */ /* [2] */
+ BTF_TYPE_ARRAY_ENC(3, 3, 16),
+ /* CONST type_id=4 */ /* [3] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 4),
+ /* void* */ /* [4] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 0),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "array_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid index",
+},
+
+{
+ .descr = "invalid BTF_INFO",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ BTF_TYPE_ENC(0, 0x10000000, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "array_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid btf_info",
},
}; /* struct btf_raw_test raw_tests[] */
@@ -951,6 +1262,7 @@ static void *btf_raw_create(const struct btf_header *hdr,
memcpy(raw_btf + offset, str, str_sec_size);
ret_hdr = (struct btf_header *)raw_btf;
+ ret_hdr->type_len = type_sec_size;
ret_hdr->str_off = type_sec_size;
ret_hdr->str_len = str_sec_size;
@@ -981,6 +1293,7 @@ static int do_test_raw(unsigned int test_num)
hdr = raw_btf;
+ hdr->hdr_len = (int)hdr->hdr_len + test->hdr_len_delta;
hdr->type_off = (int)hdr->type_off + test->type_off_delta;
hdr->str_off = (int)hdr->str_off + test->str_off_delta;
hdr->str_len = (int)hdr->str_len + test->str_len_delta;
@@ -992,8 +1305,13 @@ static int do_test_raw(unsigned int test_num)
free(raw_btf);
err = ((btf_fd == -1) != test->btf_load_err);
- CHECK(err, "btf_fd:%d test->btf_load_err:%u",
- btf_fd, test->btf_load_err);
+ if (CHECK(err, "btf_fd:%d test->btf_load_err:%u",
+ btf_fd, test->btf_load_err) ||
+ CHECK(test->err_str && !strstr(btf_log_buf, test->err_str),
+ "expected err_str:%s", test->err_str)) {
+ err = -1;
+ goto done;
+ }
if (err || btf_fd == -1)
goto done;
@@ -1004,8 +1322,8 @@ static int do_test_raw(unsigned int test_num)
create_attr.value_size = test->value_size;
create_attr.max_entries = test->max_entries;
create_attr.btf_fd = btf_fd;
- create_attr.btf_key_id = test->key_id;
- create_attr.btf_value_id = test->value_id;
+ create_attr.btf_key_type_id = test->key_type_id;
+ create_attr.btf_value_type_id = test->value_type_id;
map_fd = bpf_create_map_xattr(&create_attr);
@@ -1267,8 +1585,8 @@ static int test_btf_id(unsigned int test_num)
create_attr.value_size = sizeof(unsigned int);
create_attr.max_entries = 4;
create_attr.btf_fd = btf_fd[0];
- create_attr.btf_key_id = 1;
- create_attr.btf_value_id = 2;
+ create_attr.btf_key_type_id = 1;
+ create_attr.btf_value_type_id = 2;
map_fd = bpf_create_map_xattr(&create_attr);
if (CHECK(map_fd == -1, "errno:%d", errno)) {
@@ -1279,10 +1597,10 @@ static int test_btf_id(unsigned int test_num)
info_len = sizeof(map_info);
err = bpf_obj_get_info_by_fd(map_fd, &map_info, &info_len);
if (CHECK(err || map_info.btf_id != info[0].id ||
- map_info.btf_key_id != 1 || map_info.btf_value_id != 2,
- "err:%d errno:%d info.id:%u btf_id:%u btf_key_id:%u btf_value_id:%u",
- err, errno, info[0].id, map_info.btf_id, map_info.btf_key_id,
- map_info.btf_value_id)) {
+ map_info.btf_key_type_id != 1 || map_info.btf_value_type_id != 2,
+ "err:%d errno:%d info.id:%u btf_id:%u btf_key_type_id:%u btf_value_type_id:%u",
+ err, errno, info[0].id, map_info.btf_id, map_info.btf_key_type_id,
+ map_info.btf_value_type_id)) {
err = -1;
goto done;
}
@@ -1542,10 +1860,10 @@ static int do_test_file(unsigned int test_num)
goto done;
}
- err = (bpf_map__btf_key_id(map) == 0 || bpf_map__btf_value_id(map) == 0)
+ err = (bpf_map__btf_key_type_id(map) == 0 || bpf_map__btf_value_type_id(map) == 0)
!= test->btf_kv_notfound;
- if (CHECK(err, "btf_key_id:%u btf_value_id:%u test->btf_kv_notfound:%u",
- bpf_map__btf_key_id(map), bpf_map__btf_value_id(map),
+ if (CHECK(err, "btf_key_type_id:%u btf_value_type_id:%u test->btf_kv_notfound:%u",
+ bpf_map__btf_key_type_id(map), bpf_map__btf_value_type_id(map),
test->btf_kv_notfound))
goto done;
@@ -1654,8 +1972,8 @@ static struct btf_raw_test pprint_test = {
.map_name = "pprint_test",
.key_size = sizeof(unsigned int),
.value_size = sizeof(struct pprint_mapv),
- .key_id = 3, /* unsigned int */
- .value_id = 16, /* struct pprint_mapv */
+ .key_type_id = 3, /* unsigned int */
+ .value_type_id = 16, /* struct pprint_mapv */
.max_entries = 128 * 1024,
};
@@ -1712,8 +2030,8 @@ static int test_pprint(void)
create_attr.value_size = test->value_size;
create_attr.max_entries = test->max_entries;
create_attr.btf_fd = btf_fd;
- create_attr.btf_key_id = test->key_id;
- create_attr.btf_value_id = test->value_id;
+ create_attr.btf_key_type_id = test->key_type_id;
+ create_attr.btf_value_type_id = test->value_type_id;
map_fd = bpf_create_map_xattr(&create_attr);
if (CHECK(map_fd == -1, "errno:%d", errno)) {
--
2.9.5
^ permalink raw reply related
* [PATCH bpf-next 3/7] bpf: btf: Check array->index_type
From: Martin KaFai Lau @ 2018-05-19 0:16 UTC (permalink / raw)
To: netdev; +Cc: Alexei Starovoitov, Daniel Borkmann, kernel-team
In-Reply-To: <20180519001650.4043980-1-kafai@fb.com>
Instead of ingoring the array->index_type field. Enforce that
it must be an unsigned BTF_KIND_INT.
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
---
kernel/bpf/btf.c | 83 ++++++++++++++++++++++++++++++++++++++++----------------
1 file changed, 59 insertions(+), 24 deletions(-)
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 536e5981ad8c..b4e48dae2240 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -444,6 +444,28 @@ static const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id)
return btf->types[type_id];
}
+/*
+ * Regular int is not a bit field and it must be either
+ * u8/u16/u32/u64.
+ */
+static bool btf_type_int_is_regular(const struct btf_type *t)
+{
+ u16 nr_bits, nr_bytes;
+ u32 int_data;
+
+ int_data = btf_type_int(t);
+ nr_bits = BTF_INT_BITS(int_data);
+ nr_bytes = BITS_ROUNDUP_BYTES(nr_bits);
+ if (BITS_PER_BYTE_MASKED(nr_bits) ||
+ BTF_INT_OFFSET(int_data) ||
+ (nr_bytes != sizeof(u8) && nr_bytes != sizeof(u16) &&
+ nr_bytes != sizeof(u32) && nr_bytes != sizeof(u64))) {
+ return false;
+ }
+
+ return true;
+}
+
__printf(2, 3) static void __btf_verifier_log(struct bpf_verifier_log *log,
const char *fmt, ...)
{
@@ -1309,14 +1331,16 @@ static s32 btf_array_check_meta(struct btf_verifier_env *env,
return -EINVAL;
}
- /* We are a little forgiving on array->index_type since
- * the kernel is not using it.
- */
- /* Array elem cannot be in type void,
- * so !array->type is not allowed.
+ /* Array elem type and index type cannot be in type void,
+ * so !array->type and !array->index_type are not allowed.
*/
if (!array->type || BTF_TYPE_PARENT(array->type)) {
- btf_verifier_log_type(env, t, "Invalid type_id");
+ btf_verifier_log_type(env, t, "Invalid elem");
+ return -EINVAL;
+ }
+
+ if (!array->index_type || BTF_TYPE_PARENT(array->index_type)) {
+ btf_verifier_log_type(env, t, "Invalid index");
return -EINVAL;
}
@@ -1329,11 +1353,35 @@ static int btf_array_resolve(struct btf_verifier_env *env,
const struct resolve_vertex *v)
{
const struct btf_array *array = btf_type_array(v->t);
- const struct btf_type *elem_type;
- u32 elem_type_id = array->type;
+ const struct btf_type *elem_type, *index_type;
+ u32 elem_type_id, index_type_id;
struct btf *btf = env->btf;
u32 elem_size;
+ /* Check array->index_type */
+ index_type_id = array->index_type;
+ index_type = btf_type_by_id(btf, index_type_id);
+ if (btf_type_is_void_or_null(index_type)) {
+ btf_verifier_log_type(env, v->t, "Invalid index");
+ return -EINVAL;
+ }
+
+ if (!env_type_is_resolve_sink(env, index_type) &&
+ !env_type_is_resolved(env, index_type_id))
+ return env_stack_push(env, index_type, index_type_id);
+
+ index_type = btf_type_id_size(btf, &index_type_id, NULL);
+ if (!index_type || !btf_type_is_int(index_type) ||
+ /* bit field int is not allowed */
+ !btf_type_int_is_regular(index_type) ||
+ /* unsigned only */
+ BTF_INT_ENCODING(btf_type_int(index_type))) {
+ btf_verifier_log_type(env, v->t, "Invalid index");
+ return -EINVAL;
+ }
+
+ /* Check array->type */
+ elem_type_id = array->type;
elem_type = btf_type_by_id(btf, elem_type_id);
if (btf_type_is_void_or_null(elem_type)) {
btf_verifier_log_type(env, v->t,
@@ -1351,22 +1399,9 @@ static int btf_array_resolve(struct btf_verifier_env *env,
return -EINVAL;
}
- if (btf_type_is_int(elem_type)) {
- int int_type_data = btf_type_int(elem_type);
- u16 nr_bits = BTF_INT_BITS(int_type_data);
- u16 nr_bytes = BITS_ROUNDUP_BYTES(nr_bits);
-
- /* Put more restriction on array of int. The int cannot
- * be a bit field and it must be either u8/u16/u32/u64.
- */
- if (BITS_PER_BYTE_MASKED(nr_bits) ||
- BTF_INT_OFFSET(int_type_data) ||
- (nr_bytes != sizeof(u8) && nr_bytes != sizeof(u16) &&
- nr_bytes != sizeof(u32) && nr_bytes != sizeof(u64))) {
- btf_verifier_log_type(env, v->t,
- "Invalid array of int");
- return -EINVAL;
- }
+ if (btf_type_is_int(elem_type) && !btf_type_int_is_regular(elem_type)) {
+ btf_verifier_log_type(env, v->t, "Invalid array of int");
+ return -EINVAL;
}
if (array->nelems && elem_size > U32_MAX / array->nelems) {
--
2.9.5
^ permalink raw reply related
* [PATCH bpf-next 0/7] BTF uapi cleanup
From: Martin KaFai Lau @ 2018-05-19 0:16 UTC (permalink / raw)
To: netdev; +Cc: Alexei Starovoitov, Daniel Borkmann, kernel-team
This patch set makes some changes to cleanup the unused
bits in BTF uapi. It also makes the btf_header extensible.
Please see individual patches for details.
Martin KaFai Lau (7):
bpf: Expose check_uarg_tail_zero()
bpf: btf: Change how section is supported in btf_header
bpf: btf: Check array->index_type
bpf: btf: Remove unused bits from uapi/linux/btf.h
bpf: btf: Rename btf_key_id and btf_value_id in bpf_map_info
bpf: btf: Sync bpf.h and btf.h to tools/include/uapi/linux/
bpf: btf: Add tests for the btf uapi changes
include/linux/bpf.h | 6 +-
include/uapi/linux/bpf.h | 8 +-
include/uapi/linux/btf.h | 28 +-
kernel/bpf/arraymap.c | 2 +-
kernel/bpf/btf.c | 318 ++++++++++++++------
kernel/bpf/syscall.c | 32 +-
tools/include/uapi/linux/bpf.h | 8 +-
tools/include/uapi/linux/btf.h | 28 +-
tools/lib/bpf/bpf.c | 4 +-
tools/lib/bpf/bpf.h | 4 +-
tools/lib/bpf/btf.c | 5 +-
tools/lib/bpf/libbpf.c | 34 +--
tools/lib/bpf/libbpf.h | 4 +-
tools/testing/selftests/bpf/test_btf.c | 528 ++++++++++++++++++++++++++-------
14 files changed, 724 insertions(+), 285 deletions(-)
--
2.9.5
^ permalink raw reply
* [PATCH bpf-next 1/7] bpf: Expose check_uarg_tail_zero()
From: Martin KaFai Lau @ 2018-05-19 0:16 UTC (permalink / raw)
To: netdev; +Cc: Alexei Starovoitov, Daniel Borkmann, kernel-team
In-Reply-To: <20180519001650.4043980-1-kafai@fb.com>
This patch exposes check_uarg_tail_zero() which will
be reused by a later BTF patch. Its name is changed to
bpf_check_uarg_tail_zero().
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
---
include/linux/bpf.h | 2 ++
kernel/bpf/syscall.c | 14 +++++++-------
2 files changed, 9 insertions(+), 7 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index ed0122b45b63..f6fe3c719ca8 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -463,6 +463,8 @@ int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file,
int bpf_fd_htab_map_lookup_elem(struct bpf_map *map, void *key, u32 *value);
int bpf_get_file_flag(int flags);
+int bpf_check_uarg_tail_zero(void __user *uaddr, size_t expected_size,
+ size_t actual_size);
/* memcpy that is used with 8-byte aligned pointers, power-of-8 size and
* forced to use 'long' read/writes to try to atomically copy long counters.
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index bfcde949c7f8..2b29ef84ded3 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -65,9 +65,9 @@ static const struct bpf_map_ops * const bpf_map_types[] = {
* copy_from_user() call. However, this is not a concern since this function is
* meant to be a future-proofing of bits.
*/
-static int check_uarg_tail_zero(void __user *uaddr,
- size_t expected_size,
- size_t actual_size)
+int bpf_check_uarg_tail_zero(void __user *uaddr,
+ size_t expected_size,
+ size_t actual_size)
{
unsigned char __user *addr;
unsigned char __user *end;
@@ -1899,7 +1899,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
u32 ulen;
int err;
- err = check_uarg_tail_zero(uinfo, sizeof(info), info_len);
+ err = bpf_check_uarg_tail_zero(uinfo, sizeof(info), info_len);
if (err)
return err;
info_len = min_t(u32, sizeof(info), info_len);
@@ -1998,7 +1998,7 @@ static int bpf_map_get_info_by_fd(struct bpf_map *map,
u32 info_len = attr->info.info_len;
int err;
- err = check_uarg_tail_zero(uinfo, sizeof(info), info_len);
+ err = bpf_check_uarg_tail_zero(uinfo, sizeof(info), info_len);
if (err)
return err;
info_len = min_t(u32, sizeof(info), info_len);
@@ -2038,7 +2038,7 @@ static int bpf_btf_get_info_by_fd(struct btf *btf,
u32 info_len = attr->info.info_len;
int err;
- err = check_uarg_tail_zero(uinfo, sizeof(*uinfo), info_len);
+ err = bpf_check_uarg_tail_zero(uinfo, sizeof(*uinfo), info_len);
if (err)
return err;
@@ -2110,7 +2110,7 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
if (sysctl_unprivileged_bpf_disabled && !capable(CAP_SYS_ADMIN))
return -EPERM;
- err = check_uarg_tail_zero(uattr, sizeof(attr), size);
+ err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size);
if (err)
return err;
size = min_t(u32, size, sizeof(attr));
--
2.9.5
^ permalink raw reply related
* [PATCH bpf-next 2/7] bpf: btf: Change how section is supported in btf_header
From: Martin KaFai Lau @ 2018-05-19 0:16 UTC (permalink / raw)
To: netdev; +Cc: Alexei Starovoitov, Daniel Borkmann, kernel-team
In-Reply-To: <20180519001650.4043980-1-kafai@fb.com>
There are currently unused section descriptions in the btf_header. Those
sections are here to support future BTF use cases. For example, the
func section (func_off) is to support function signature (e.g. the BPF
prog function signature).
Instead of spelling out all potential sections up-front in the btf_header.
This patch makes changes to btf_header such that extending it (e.g. adding
a section) is possible later. The unused ones can be removed for now and
they can be added back later.
This patch:
1. adds a hdr_len to the btf_header. It will allow adding
sections (and other info like parent_label and parent_name)
later. The check is similar to the existing bpf_attr.
If a user passes in a longer hdr_len, the kernel
ensures the extra tailing bytes are 0.
2. allows the section order in the BTF object to be
different from its sec_off order in btf_header.
3. each sec_off is followed by a sec_len. It must not have gap or
overlapping among sections.
The string section is ensured to be at the end due to the 4 bytes
alignment requirement of the type section.
The above changes will allow enough flexibility to
add new sections (and other info) to the btf_header later.
This patch also removes an unnecessary !err check
at the end of btf_parse().
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
---
include/uapi/linux/btf.h | 8 +-
kernel/bpf/btf.c | 207 +++++++++++++++++++++++++++++++++++------------
2 files changed, 158 insertions(+), 57 deletions(-)
diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h
index bcb56ee47014..4fa479741a02 100644
--- a/include/uapi/linux/btf.h
+++ b/include/uapi/linux/btf.h
@@ -12,15 +12,11 @@ struct btf_header {
__u16 magic;
__u8 version;
__u8 flags;
-
- __u32 parent_label;
- __u32 parent_name;
+ __u32 hdr_len;
/* All offsets are in bytes relative to the end of this header */
- __u32 label_off; /* offset of label section */
- __u32 object_off; /* offset of data object section*/
- __u32 func_off; /* offset of function section */
__u32 type_off; /* offset of type section */
+ __u32 type_len; /* length of type section */
__u32 str_off; /* offset of string section */
__u32 str_len; /* length of string section */
};
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index ded10ab47b8a..536e5981ad8c 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -12,6 +12,7 @@
#include <linux/uaccess.h>
#include <linux/kernel.h>
#include <linux/idr.h>
+#include <linux/sort.h>
#include <linux/bpf_verifier.h>
#include <linux/btf.h>
@@ -184,15 +185,13 @@ static DEFINE_IDR(btf_idr);
static DEFINE_SPINLOCK(btf_idr_lock);
struct btf {
- union {
- struct btf_header *hdr;
- void *data;
- };
+ void *data;
struct btf_type **types;
u32 *resolved_ids;
u32 *resolved_sizes;
const char *strings;
void *nohdr_data;
+ struct btf_header hdr;
u32 nr_types;
u32 types_size;
u32 data_size;
@@ -227,6 +226,12 @@ enum resolve_mode {
};
#define MAX_RESOLVE_DEPTH 32
+#define NR_SECS 2
+
+struct btf_sec_info {
+ u32 off;
+ u32 len;
+};
struct btf_verifier_env {
struct btf *btf;
@@ -418,14 +423,14 @@ static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t)
static bool btf_name_offset_valid(const struct btf *btf, u32 offset)
{
return !BTF_STR_TBL_ELF_ID(offset) &&
- BTF_STR_OFFSET(offset) < btf->hdr->str_len;
+ BTF_STR_OFFSET(offset) < btf->hdr.str_len;
}
static const char *btf_name_by_offset(const struct btf *btf, u32 offset)
{
if (!BTF_STR_OFFSET(offset))
return "(anon)";
- else if (BTF_STR_OFFSET(offset) < btf->hdr->str_len)
+ else if (BTF_STR_OFFSET(offset) < btf->hdr.str_len)
return &btf->strings[BTF_STR_OFFSET(offset)];
else
return "(invalid-name-offset)";
@@ -536,7 +541,8 @@ static void btf_verifier_log_member(struct btf_verifier_env *env,
__btf_verifier_log(log, "\n");
}
-static void btf_verifier_log_hdr(struct btf_verifier_env *env)
+static void btf_verifier_log_hdr(struct btf_verifier_env *env,
+ u32 btf_data_size)
{
struct bpf_verifier_log *log = &env->log;
const struct btf *btf = env->btf;
@@ -545,19 +551,16 @@ static void btf_verifier_log_hdr(struct btf_verifier_env *env)
if (!bpf_verifier_log_needed(log))
return;
- hdr = btf->hdr;
+ hdr = &btf->hdr;
__btf_verifier_log(log, "magic: 0x%x\n", hdr->magic);
__btf_verifier_log(log, "version: %u\n", hdr->version);
__btf_verifier_log(log, "flags: 0x%x\n", hdr->flags);
- __btf_verifier_log(log, "parent_label: %u\n", hdr->parent_label);
- __btf_verifier_log(log, "parent_name: %u\n", hdr->parent_name);
- __btf_verifier_log(log, "label_off: %u\n", hdr->label_off);
- __btf_verifier_log(log, "object_off: %u\n", hdr->object_off);
- __btf_verifier_log(log, "func_off: %u\n", hdr->func_off);
+ __btf_verifier_log(log, "hdr_len: %u\n", hdr->hdr_len);
__btf_verifier_log(log, "type_off: %u\n", hdr->type_off);
+ __btf_verifier_log(log, "type_len: %u\n", hdr->type_len);
__btf_verifier_log(log, "str_off: %u\n", hdr->str_off);
__btf_verifier_log(log, "str_len: %u\n", hdr->str_len);
- __btf_verifier_log(log, "btf_total_size: %u\n", btf->data_size);
+ __btf_verifier_log(log, "btf_total_size: %u\n", btf_data_size);
}
static int btf_add_type(struct btf_verifier_env *env, struct btf_type *t)
@@ -1754,9 +1757,9 @@ static int btf_check_all_metas(struct btf_verifier_env *env)
struct btf_header *hdr;
void *cur, *end;
- hdr = btf->hdr;
+ hdr = &btf->hdr;
cur = btf->nohdr_data + hdr->type_off;
- end = btf->nohdr_data + hdr->str_off;
+ end = btf->nohdr_data + hdr->type_len;
env->log_type_id = 1;
while (cur < end) {
@@ -1866,8 +1869,20 @@ static int btf_check_all_types(struct btf_verifier_env *env)
static int btf_parse_type_sec(struct btf_verifier_env *env)
{
+ const struct btf_header *hdr = &env->btf->hdr;
int err;
+ /* Type section must align to 4 bytes */
+ if (hdr->type_off & (sizeof(u32) - 1)) {
+ btf_verifier_log(env, "Unaligned type_off");
+ return -EINVAL;
+ }
+
+ if (!hdr->type_len) {
+ btf_verifier_log(env, "No type found");
+ return -EINVAL;
+ }
+
err = btf_check_all_metas(env);
if (err)
return err;
@@ -1881,10 +1896,15 @@ static int btf_parse_str_sec(struct btf_verifier_env *env)
struct btf *btf = env->btf;
const char *start, *end;
- hdr = btf->hdr;
+ hdr = &btf->hdr;
start = btf->nohdr_data + hdr->str_off;
end = start + hdr->str_len;
+ if (end != btf->data + btf->data_size) {
+ btf_verifier_log(env, "String section is not at the end");
+ return -EINVAL;
+ }
+
if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_NAME_OFFSET ||
start[0] || end[-1]) {
btf_verifier_log(env, "Invalid string section");
@@ -1896,20 +1916,119 @@ static int btf_parse_str_sec(struct btf_verifier_env *env)
return 0;
}
-static int btf_parse_hdr(struct btf_verifier_env *env)
+static const size_t btf_sec_info_offset[] = {
+ offsetof(struct btf_header, type_off),
+ offsetof(struct btf_header, str_off),
+};
+
+static int btf_sec_info_cmp(const void *a, const void *b)
{
+ const struct btf_sec_info *x = a;
+ const struct btf_sec_info *y = b;
+
+ return (int)(x->off - y->off) ? : (int)(x->len - y->len);
+}
+
+static int btf_check_sec_info(struct btf_verifier_env *env,
+ u32 btf_data_size)
+{
+ struct btf_sec_info secs[NR_SECS];
+ u32 total, expected_total, i;
const struct btf_header *hdr;
- struct btf *btf = env->btf;
- u32 meta_left;
+ const struct btf *btf;
+
+ BUILD_BUG_ON(ARRAY_SIZE(btf_sec_info_offset) != NR_SECS);
+
+ btf = env->btf;
+ hdr = &btf->hdr;
+
+ /* Populate the secs from hdr */
+ for (i = 0; i < NR_SECS; i++)
+ secs[i] = *(struct btf_sec_info *)((void *)hdr +
+ btf_sec_info_offset[i]);
+
+ sort(secs, NR_SECS, sizeof(struct btf_sec_info),
+ btf_sec_info_cmp, NULL);
+
+ /* Check for gaps and overlap among sections */
+ total = 0;
+ expected_total = btf_data_size - hdr->hdr_len;
+ for (i = 0; i < NR_SECS; i++) {
+ if (expected_total < secs[i].off) {
+ btf_verifier_log(env, "Invalid section offset");
+ return -EINVAL;
+ }
+ if (total < secs[i].off) {
+ /* gap */
+ btf_verifier_log(env, "Unsupported section found");
+ return -EINVAL;
+ }
+ if (total > secs[i].off) {
+ btf_verifier_log(env, "Section overlap found");
+ return -EINVAL;
+ }
+ if (expected_total - total < secs[i].len) {
+ btf_verifier_log(env,
+ "Total section length too long");
+ return -EINVAL;
+ }
+ total += secs[i].len;
+ }
+
+ /* There is data other than hdr and known sections */
+ if (expected_total != total) {
+ btf_verifier_log(env, "Unsupported section found");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int btf_parse_hdr(struct btf_verifier_env *env, void __user *btf_data,
+ u32 btf_data_size)
+{
+ const struct btf_header *hdr;
+ u32 hdr_len, hdr_copy;
+ struct btf_min_header {
+ u16 magic;
+ u8 version;
+ u8 flags;
+ u32 hdr_len;
+ } __user *min_hdr;
+ struct btf *btf;
+ int err;
+
+ btf = env->btf;
+ min_hdr = btf_data;
+
+ if (btf_data_size < sizeof(*min_hdr)) {
+ btf_verifier_log(env, "hdr_len not found");
+ return -EINVAL;
+ }
- if (btf->data_size < sizeof(*hdr)) {
+ if (get_user(hdr_len, &min_hdr->hdr_len))
+ return -EFAULT;
+
+ if (btf_data_size < hdr_len) {
btf_verifier_log(env, "btf_header not found");
return -EINVAL;
}
- btf_verifier_log_hdr(env);
+ err = bpf_check_uarg_tail_zero(btf_data, sizeof(btf->hdr), hdr_len);
+ if (err) {
+ if (err == -E2BIG)
+ btf_verifier_log(env, "Unsupported btf_header");
+ return err;
+ }
+
+ hdr_copy = min_t(u32, hdr_len, sizeof(btf->hdr));
+ if (copy_from_user(&btf->hdr, btf_data, hdr_copy))
+ return -EFAULT;
+
+ hdr = &btf->hdr;
+
+ btf_verifier_log_hdr(env, btf_data_size);
- hdr = btf->hdr;
if (hdr->magic != BTF_MAGIC) {
btf_verifier_log(env, "Invalid magic");
return -EINVAL;
@@ -1925,26 +2044,14 @@ static int btf_parse_hdr(struct btf_verifier_env *env)
return -ENOTSUPP;
}
- meta_left = btf->data_size - sizeof(*hdr);
- if (!meta_left) {
+ if (btf_data_size == hdr->hdr_len) {
btf_verifier_log(env, "No data");
return -EINVAL;
}
- if (meta_left < hdr->type_off || hdr->str_off <= hdr->type_off ||
- /* Type section must align to 4 bytes */
- hdr->type_off & (sizeof(u32) - 1)) {
- btf_verifier_log(env, "Invalid type_off");
- return -EINVAL;
- }
-
- if (meta_left < hdr->str_off ||
- meta_left - hdr->str_off < hdr->str_len) {
- btf_verifier_log(env, "Invalid str_off or str_len");
- return -EINVAL;
- }
-
- btf->nohdr_data = btf->hdr + 1;
+ err = btf_check_sec_info(env, btf_data_size);
+ if (err)
+ return err;
return 0;
}
@@ -1987,6 +2094,11 @@ static struct btf *btf_parse(void __user *btf_data, u32 btf_data_size,
err = -ENOMEM;
goto errout;
}
+ env->btf = btf;
+
+ err = btf_parse_hdr(env, btf_data, btf_data_size);
+ if (err)
+ goto errout;
data = kvmalloc(btf_data_size, GFP_KERNEL | __GFP_NOWARN);
if (!data) {
@@ -1996,18 +2108,13 @@ static struct btf *btf_parse(void __user *btf_data, u32 btf_data_size,
btf->data = data;
btf->data_size = btf_data_size;
+ btf->nohdr_data = btf->data + btf->hdr.hdr_len;
if (copy_from_user(data, btf_data, btf_data_size)) {
err = -EFAULT;
goto errout;
}
- env->btf = btf;
-
- err = btf_parse_hdr(env);
- if (err)
- goto errout;
-
err = btf_parse_str_sec(env);
if (err)
goto errout;
@@ -2016,16 +2123,14 @@ static struct btf *btf_parse(void __user *btf_data, u32 btf_data_size,
if (err)
goto errout;
- if (!err && log->level && bpf_verifier_log_full(log)) {
+ if (log->level && bpf_verifier_log_full(log)) {
err = -ENOSPC;
goto errout;
}
- if (!err) {
- btf_verifier_env_free(env);
- refcount_set(&btf->refcnt, 1);
- return btf;
- }
+ btf_verifier_env_free(env);
+ refcount_set(&btf->refcnt, 1);
+ return btf;
errout:
btf_verifier_env_free(env);
--
2.9.5
^ permalink raw reply related
* [PATCH bpf-next 5/7] bpf: btf: Rename btf_key_id and btf_value_id in bpf_map_info
From: Martin KaFai Lau @ 2018-05-19 0:16 UTC (permalink / raw)
To: netdev; +Cc: Alexei Starovoitov, Daniel Borkmann, kernel-team
In-Reply-To: <20180519001650.4043980-1-kafai@fb.com>
In "struct bpf_map_info", the name "btf_id", "btf_key_id" and "btf_value_id"
could cause confusion because the "id" of "btf_id" means the BPF obj id
given to the BTF object while
"btf_key_id" and "btf_value_id" means the BTF type id within
that BTF object.
To make it clear, btf_key_id and btf_value_id are
renamed to btf_key_type_id and btf_value_type_id.
Suggested-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
---
include/linux/bpf.h | 4 ++--
include/uapi/linux/bpf.h | 8 ++++----
kernel/bpf/arraymap.c | 2 +-
kernel/bpf/syscall.c | 18 +++++++++---------
4 files changed, 16 insertions(+), 16 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index f6fe3c719ca8..1795eeee846c 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -69,8 +69,8 @@ struct bpf_map {
u32 pages;
u32 id;
int numa_node;
- u32 btf_key_id;
- u32 btf_value_id;
+ u32 btf_key_type_id;
+ u32 btf_value_type_id;
struct btf *btf;
bool unpriv_array;
/* 55 bytes hole */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index d94d333a8225..123ebe4b3662 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -284,8 +284,8 @@ union bpf_attr {
char map_name[BPF_OBJ_NAME_LEN];
__u32 map_ifindex; /* ifindex of netdev to create on */
__u32 btf_fd; /* fd pointing to a BTF type data */
- __u32 btf_key_id; /* BTF type_id of the key */
- __u32 btf_value_id; /* BTF type_id of the value */
+ __u32 btf_key_type_id; /* BTF type_id of the key */
+ __u32 btf_value_type_id; /* BTF type_id of the value */
};
struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
@@ -2211,8 +2211,8 @@ struct bpf_map_info {
__u64 netns_dev;
__u64 netns_ino;
__u32 btf_id;
- __u32 btf_key_id;
- __u32 btf_value_id;
+ __u32 btf_key_type_id;
+ __u32 btf_value_type_id;
} __attribute__((aligned(8)));
struct bpf_btf_info {
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 0fd8d8f1a398..544e58f5f642 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -352,7 +352,7 @@ static void array_map_seq_show_elem(struct bpf_map *map, void *key,
}
seq_printf(m, "%u: ", *(u32 *)key);
- btf_type_seq_show(map->btf, map->btf_value_id, value, m);
+ btf_type_seq_show(map->btf, map->btf_value_type_id, value, m);
seq_puts(m, "\n");
rcu_read_unlock();
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 2b29ef84ded3..0b4c94551001 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -422,7 +422,7 @@ static int bpf_obj_name_cpy(char *dst, const char *src)
return 0;
}
-#define BPF_MAP_CREATE_LAST_FIELD btf_value_id
+#define BPF_MAP_CREATE_LAST_FIELD btf_value_type_id
/* called via syscall */
static int map_create(union bpf_attr *attr)
{
@@ -457,10 +457,10 @@ static int map_create(union bpf_attr *attr)
atomic_set(&map->usercnt, 1);
if (bpf_map_support_seq_show(map) &&
- (attr->btf_key_id || attr->btf_value_id)) {
+ (attr->btf_key_type_id || attr->btf_value_type_id)) {
struct btf *btf;
- if (!attr->btf_key_id || !attr->btf_value_id) {
+ if (!attr->btf_key_type_id || !attr->btf_value_type_id) {
err = -EINVAL;
goto free_map_nouncharge;
}
@@ -471,16 +471,16 @@ static int map_create(union bpf_attr *attr)
goto free_map_nouncharge;
}
- err = map->ops->map_check_btf(map, btf, attr->btf_key_id,
- attr->btf_value_id);
+ err = map->ops->map_check_btf(map, btf, attr->btf_key_type_id,
+ attr->btf_value_type_id);
if (err) {
btf_put(btf);
goto free_map_nouncharge;
}
map->btf = btf;
- map->btf_key_id = attr->btf_key_id;
- map->btf_value_id = attr->btf_value_id;
+ map->btf_key_type_id = attr->btf_key_type_id;
+ map->btf_value_type_id = attr->btf_value_type_id;
}
err = security_bpf_map_alloc(map);
@@ -2013,8 +2013,8 @@ static int bpf_map_get_info_by_fd(struct bpf_map *map,
if (map->btf) {
info.btf_id = btf_id(map->btf);
- info.btf_key_id = map->btf_key_id;
- info.btf_value_id = map->btf_value_id;
+ info.btf_key_type_id = map->btf_key_type_id;
+ info.btf_value_type_id = map->btf_value_type_id;
}
if (bpf_map_is_dev_bound(map)) {
--
2.9.5
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox