From: Matthew Cover <werekraken@gmail.com>
To: davem@davemloft.net, ast@kernel.org, daniel@iogearbox.net,
kafai@fb.com, songliubraving@fb.com, yhs@fb.com,
nikolay@cumulusnetworks.com, sd@queasysnail.net,
sbrivio@redhat.com, vincent@bernat.ch, kda@linux-powerpc.org,
matthew.cover@stackpath.com, jiri@mellanox.com,
edumazet@google.com, pabeni@redhat.com, idosch@mellanox.com,
petrm@mellanox.com, f.fainelli@gmail.com,
stephen@networkplumber.org, dsahern@gmail.com,
christian@brauner.io, jakub.kicinski@netronome.com,
roopa@cumulusnetworks.com, johannes.berg@intel.com,
mkubecek@suse.cz, netdev@vger.kernel.org,
linux-kernel@vger.kernel.org, bpf@vger.kernel.org
Subject: [RFC iproute2-next 2/2] Introduce an eBPF hookpoint for tx queue selection in the XPS (Transmit Packet Steering) code.
Date: Thu, 19 Sep 2019 15:46:05 -0700 [thread overview]
Message-ID: <20190919224605.91550-1-matthew.cover@stackpath.com> (raw)
WORK IN PROGRESS:
* bpf program loading works!
* txq steering via bpf program return code works!
* bpf program unloading not working.
* bpf program attached query not working.
---
include/bpf_api.h | 5 +++
include/uapi/linux/if_link.h | 12 ++++++
ip/Makefile | 2 +-
ip/ip_common.h | 4 ++
ip/iplink.c | 7 ++++
ip/iplink_xps.c | 88 ++++++++++++++++++++++++++++++++++++++++++++
lib/bpf.c | 6 +++
7 files changed, 123 insertions(+), 1 deletion(-)
create mode 100644 ip/iplink_xps.c
diff --git a/include/bpf_api.h b/include/bpf_api.h
index 89d3488..d1a2d90 100644
--- a/include/bpf_api.h
+++ b/include/bpf_api.h
@@ -78,6 +78,11 @@
__section(ELF_SECTION_PROG)
#endif
+#ifndef __section_xps_entry
+# define __section_xps_entry \
+ __section(ELF_SECTION_PROG)
+#endif
+
#ifndef __section_cls_entry
# define __section_cls_entry \
__section(ELF_SECTION_CLASSIFIER)
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index d36919f..9efd686 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -167,6 +167,7 @@ enum {
IFLA_NEW_IFINDEX,
IFLA_MIN_MTU,
IFLA_MAX_MTU,
+ IFLA_XPS,
__IFLA_MAX
};
@@ -977,6 +978,17 @@ enum {
#define IFLA_XDP_MAX (__IFLA_XDP_MAX - 1)
+/* XPS section */
+
+enum {
+ IFLA_XPS_UNSPEC,
+ IFLA_XPS_FD,
+ IFLA_XPS_ATTACHED,
+ __IFLA_XPS_MAX,
+};
+
+#define IFLA_XPS_MAX (__IFLA_XPS_MAX - 1)
+
enum {
IFLA_EVENT_NONE,
IFLA_EVENT_REBOOT, /* internal reset / reboot */
diff --git a/ip/Makefile b/ip/Makefile
index 5ab78d7..9ad1c53 100644
--- a/ip/Makefile
+++ b/ip/Makefile
@@ -5,7 +5,7 @@ IPOBJ=ip.o ipaddress.o ipaddrlabel.o iproute.o iprule.o ipnetns.o \
ipxfrm.o xfrm_state.o xfrm_policy.o xfrm_monitor.o iplink_dummy.o \
iplink_ifb.o iplink_nlmon.o iplink_team.o iplink_vcan.o iplink_vxcan.o \
iplink_vlan.o link_veth.o link_gre.o iplink_can.o iplink_xdp.o \
- iplink_macvlan.o ipl2tp.o link_vti.o link_vti6.o link_xfrm.o \
+ iplink_macvlan.o ipl2tp.o link_vti.o link_vti6.o link_xfrm.o iplink_xps.o \
iplink_vxlan.o tcp_metrics.o iplink_ipoib.o ipnetconf.o link_ip6tnl.o \
link_iptnl.o link_gre6.o iplink_bond.o iplink_bond_slave.o iplink_hsr.o \
iplink_bridge.o iplink_bridge_slave.o ipfou.o iplink_ipvlan.o \
diff --git a/ip/ip_common.h b/ip/ip_common.h
index cd916ec..805d7d2 100644
--- a/ip/ip_common.h
+++ b/ip/ip_common.h
@@ -145,6 +145,10 @@ int xdp_parse(int *argc, char ***argv, struct iplink_req *req, const char *ifnam
bool generic, bool drv, bool offload);
void xdp_dump(FILE *fp, struct rtattr *tb, bool link, bool details);
+/* iplink_xps.c */
+int xps_parse(int *argc, char ***argv, struct iplink_req *req);
+void xps_dump(FILE *fp, struct rtattr *tb);
+
/* iplink_vrf.c */
__u32 ipvrf_get_table(const char *name);
int name_is_vrf(const char *name);
diff --git a/ip/iplink.c b/ip/iplink.c
index 212a088..4d6d557 100644
--- a/ip/iplink.c
+++ b/ip/iplink.c
@@ -101,6 +101,9 @@ void iplink_usage(void)
" [ { xdp | xdpgeneric | xdpdrv | xdpoffload } { off |\n"
" object FILE [ section NAME ] [ verbose ] |\n"
" pinned FILE } ]\n"
+ " [ xps { off |\n"
+ " object FILE [ section NAME ] [ verbose ] |\n"
+ " pinned FILE } ]\n"
" [ master DEVICE ][ vrf NAME ]\n"
" [ nomaster ]\n"
" [ addrgenmode { eui64 | none | stable_secret | random } ]\n"
@@ -668,6 +671,10 @@ int iplink_parse(int argc, char **argv, struct iplink_req *req, char **type)
if (offload && name == dev)
dev = NULL;
+ } else if (strcmp(*argv, "xps") == 0) {
+ NEXT_ARG();
+ if (xps_parse(&argc, &argv, req))
+ exit(-1);
} else if (strcmp(*argv, "netns") == 0) {
NEXT_ARG();
if (netns != -1)
diff --git a/ip/iplink_xps.c b/ip/iplink_xps.c
new file mode 100644
index 0000000..7e94164
--- /dev/null
+++ b/ip/iplink_xps.c
@@ -0,0 +1,88 @@
+/*
+ * iplink_xps.c XPS program loader
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Matthew Cover <matthew.cover@stackpath.com>
+ *
+ * Based on iplink_xdp.c by Daniel Borkmann <daniel@iogearbox.net>
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <linux/bpf.h>
+
+#include "bpf_util.h"
+#include "ip_common.h"
+
+extern int force;
+
+struct xps_req {
+ struct iplink_req *req;
+ __u32 flags;
+};
+
+static void xps_ebpf_cb(void *raw, int fd, const char *annotation)
+{
+ struct xps_req *xps = raw;
+ struct iplink_req *req = xps->req;
+ struct rtattr *xps_attr;
+
+ xps_attr = addattr_nest(&req->n, sizeof(*req), IFLA_XPS);
+ addattr32(&req->n, sizeof(*req), IFLA_XPS_FD, fd);
+ addattr_nest_end(&req->n, xps_attr);
+}
+
+static const struct bpf_cfg_ops bpf_cb_ops = {
+ .ebpf_cb = xps_ebpf_cb,
+};
+
+static int xps_delete(struct iplink_req *req)
+{
+ xps_ebpf_cb(req, -1, NULL);
+ return 0;
+}
+
+int xps_parse(int *argc, char ***argv, struct iplink_req *req)
+{
+
+ struct bpf_cfg_in cfg = {
+ .type = BPF_PROG_TYPE_SOCKET_FILTER,
+ .argc = *argc,
+ .argv = *argv,
+ };
+
+ struct xps_req xps = {
+ .req = req,
+ };
+
+ if (*argc == 1) {
+ if (strcmp(**argv, "none") == 0 ||
+ strcmp(**argv, "off") == 0)
+ return xps_delete(req);
+ }
+
+ if (bpf_parse_and_load_common(&cfg, &bpf_cb_ops, &xps))
+ return -1;
+
+ *argc = cfg.argc;
+ *argv = cfg.argv;
+ return 0;
+}
+
+void xps_dump(FILE *fp, struct rtattr *xps)
+{
+ struct rtattr *tb[IFLA_XPS_MAX + 1];
+
+ parse_rtattr_nested(tb, IFLA_XPS_MAX, xps);
+ if (!tb[IFLA_XPS_ATTACHED] ||
+ !rta_getattr_u8(tb[IFLA_XPS_ATTACHED]))
+ return;
+
+ fprintf(fp, "xps ");
+ /* More to come here in future for 'ip -d link' (digest, etc) ... */
+}
diff --git a/lib/bpf.c b/lib/bpf.c
index 7d2a322..e883afb 100644
--- a/lib/bpf.c
+++ b/lib/bpf.c
@@ -60,6 +60,7 @@ static const enum bpf_prog_type __bpf_types[] = {
BPF_PROG_TYPE_LWT_IN,
BPF_PROG_TYPE_LWT_OUT,
BPF_PROG_TYPE_LWT_XMIT,
+ BPF_PROG_TYPE_SOCKET_FILTER,
};
static const struct bpf_prog_meta __bpf_prog_meta[] = {
@@ -100,6 +101,11 @@ static const struct bpf_prog_meta __bpf_prog_meta[] = {
.subdir = "ip",
.section = ELF_SECTION_PROG,
},
+ [BPF_PROG_TYPE_SOCKET_FILTER] = {
+ .type = "xps",
+ .subdir = "xps",
+ .section = ELF_SECTION_PROG,
+ },
};
static const char *bpf_prog_to_subdir(enum bpf_prog_type type)
--
1.8.3.1
reply other threads:[~2019-09-19 22:46 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190919224605.91550-1-matthew.cover@stackpath.com \
--to=werekraken@gmail.com \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=christian@brauner.io \
--cc=daniel@iogearbox.net \
--cc=davem@davemloft.net \
--cc=dsahern@gmail.com \
--cc=edumazet@google.com \
--cc=f.fainelli@gmail.com \
--cc=idosch@mellanox.com \
--cc=jakub.kicinski@netronome.com \
--cc=jiri@mellanox.com \
--cc=johannes.berg@intel.com \
--cc=kafai@fb.com \
--cc=kda@linux-powerpc.org \
--cc=linux-kernel@vger.kernel.org \
--cc=matthew.cover@stackpath.com \
--cc=mkubecek@suse.cz \
--cc=netdev@vger.kernel.org \
--cc=nikolay@cumulusnetworks.com \
--cc=pabeni@redhat.com \
--cc=petrm@mellanox.com \
--cc=roopa@cumulusnetworks.com \
--cc=sbrivio@redhat.com \
--cc=sd@queasysnail.net \
--cc=songliubraving@fb.com \
--cc=stephen@networkplumber.org \
--cc=vincent@bernat.ch \
--cc=yhs@fb.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).