* [iproute2 net-next 1/8] lib bpf: Add support for BPF_PROG_ATTACH and BPF_PROG_DETACH
2016-12-10 17:47 [iproute2 net-next 0/8] Add support for vrf helper David Ahern
@ 2016-12-10 17:47 ` David Ahern
2016-12-10 17:47 ` [iproute2 net-next 2/8] bpf: export bpf_prog_load David Ahern
` (5 subsequent siblings)
6 siblings, 0 replies; 15+ messages in thread
From: David Ahern @ 2016-12-10 17:47 UTC (permalink / raw)
To: netdev, stephen; +Cc: David Ahern
For consistency with other bpf commands, the functions are named
bpf_prog_attach and bpf_prog_detach. The existing bpf_prog_attach is
renamed to bpf_prog_load_and_report since it calls bpf_prog_load and
bpf_prog_report.
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
---
include/bpf_util.h | 3 +++
lib/bpf.c | 31 ++++++++++++++++++++++++++-----
2 files changed, 29 insertions(+), 5 deletions(-)
diff --git a/include/bpf_util.h b/include/bpf_util.h
index 05baeecda57f..49b96bbc208f 100644
--- a/include/bpf_util.h
+++ b/include/bpf_util.h
@@ -75,6 +75,9 @@ int bpf_trace_pipe(void);
void bpf_print_ops(FILE *f, struct rtattr *bpf_ops, __u16 len);
+int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type);
+int bpf_prog_detach(int target_fd, enum bpf_attach_type type);
+
#ifdef HAVE_ELF
int bpf_send_map_fds(const char *path, const char *obj);
int bpf_recv_map_fds(const char *path, int *fds, struct bpf_map_aux *aux,
diff --git a/lib/bpf.c b/lib/bpf.c
index 2a8cd51d4dae..103fc1ef0593 100644
--- a/lib/bpf.c
+++ b/lib/bpf.c
@@ -850,6 +850,27 @@ int bpf_graft_map(const char *map_path, uint32_t *key, int argc, char **argv)
return ret;
}
+int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type)
+{
+ union bpf_attr attr = {
+ .target_fd = target_fd,
+ .attach_bpf_fd = prog_fd,
+ .attach_type = type,
+ };
+
+ return bpf(BPF_PROG_ATTACH, &attr, sizeof(attr));
+}
+
+int bpf_prog_detach(int target_fd, enum bpf_attach_type type)
+{
+ union bpf_attr attr = {
+ .target_fd = target_fd,
+ .attach_type = type,
+ };
+
+ return bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
+}
+
#ifdef HAVE_ELF
struct bpf_elf_prog {
enum bpf_prog_type type;
@@ -1262,9 +1283,9 @@ static void bpf_prog_report(int fd, const char *section,
bpf_dump_error(ctx, "Verifier analysis:\n\n");
}
-static int bpf_prog_attach(const char *section,
- const struct bpf_elf_prog *prog,
- struct bpf_elf_ctx *ctx)
+static int bpf_prog_load_and_report(const char *section,
+ const struct bpf_elf_prog *prog,
+ struct bpf_elf_ctx *ctx)
{
int tries = 0, fd;
retry:
@@ -1656,7 +1677,7 @@ static int bpf_fetch_prog(struct bpf_elf_ctx *ctx, const char *section,
prog.size = data.sec_data->d_size;
prog.license = ctx->license;
- fd = bpf_prog_attach(section, &prog, ctx);
+ fd = bpf_prog_load_and_report(section, &prog, ctx);
if (fd < 0)
return fd;
@@ -1755,7 +1776,7 @@ static int bpf_fetch_prog_relo(struct bpf_elf_ctx *ctx, const char *section,
prog.size = data_insn.sec_data->d_size;
prog.license = ctx->license;
- fd = bpf_prog_attach(section, &prog, ctx);
+ fd = bpf_prog_load_and_report(section, &prog, ctx);
if (fd < 0) {
*lderr = true;
return fd;
--
2.1.4
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [iproute2 net-next 2/8] bpf: export bpf_prog_load
2016-12-10 17:47 [iproute2 net-next 0/8] Add support for vrf helper David Ahern
2016-12-10 17:47 ` [iproute2 net-next 1/8] lib bpf: Add support for BPF_PROG_ATTACH and BPF_PROG_DETACH David Ahern
@ 2016-12-10 17:47 ` David Ahern
2016-12-10 17:47 ` [iproute2 net-next 4/8] move cmd_exec to lib utils David Ahern
` (4 subsequent siblings)
6 siblings, 0 replies; 15+ messages in thread
From: David Ahern @ 2016-12-10 17:47 UTC (permalink / raw)
To: netdev, stephen; +Cc: David Ahern
Code move only; no functional change intended.
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
---
include/bpf_util.h | 3 +++
lib/bpf.c | 40 ++++++++++++++++++++--------------------
2 files changed, 23 insertions(+), 20 deletions(-)
diff --git a/include/bpf_util.h b/include/bpf_util.h
index 49b96bbc208f..dcbdca6978d6 100644
--- a/include/bpf_util.h
+++ b/include/bpf_util.h
@@ -75,6 +75,9 @@ int bpf_trace_pipe(void);
void bpf_print_ops(FILE *f, struct rtattr *bpf_ops, __u16 len);
+int bpf_prog_load(enum bpf_prog_type type, const struct bpf_insn *insns,
+ size_t size_insns, const char *license, char *log,
+ size_t size_log);
int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type);
int bpf_prog_detach(int target_fd, enum bpf_attach_type type);
diff --git a/lib/bpf.c b/lib/bpf.c
index 103fc1ef0593..b04c3a678b9c 100644
--- a/lib/bpf.c
+++ b/lib/bpf.c
@@ -871,6 +871,26 @@ int bpf_prog_detach(int target_fd, enum bpf_attach_type type)
return bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
}
+int bpf_prog_load(enum bpf_prog_type type, const struct bpf_insn *insns,
+ size_t size_insns, const char *license, char *log,
+ size_t size_log)
+{
+ union bpf_attr attr = {};
+
+ attr.prog_type = type;
+ attr.insns = bpf_ptr_to_u64(insns);
+ attr.insn_cnt = size_insns / sizeof(struct bpf_insn);
+ attr.license = bpf_ptr_to_u64(license);
+
+ if (size_log > 0) {
+ attr.log_buf = bpf_ptr_to_u64(log);
+ attr.log_size = size_log;
+ attr.log_level = 1;
+ }
+
+ return bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
+}
+
#ifdef HAVE_ELF
struct bpf_elf_prog {
enum bpf_prog_type type;
@@ -988,26 +1008,6 @@ static int bpf_map_create(enum bpf_map_type type, uint32_t size_key,
return bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
}
-static int bpf_prog_load(enum bpf_prog_type type, const struct bpf_insn *insns,
- size_t size_insns, const char *license, char *log,
- size_t size_log)
-{
- union bpf_attr attr = {};
-
- attr.prog_type = type;
- attr.insns = bpf_ptr_to_u64(insns);
- attr.insn_cnt = size_insns / sizeof(struct bpf_insn);
- attr.license = bpf_ptr_to_u64(license);
-
- if (size_log > 0) {
- attr.log_buf = bpf_ptr_to_u64(log);
- attr.log_size = size_log;
- attr.log_level = 1;
- }
-
- return bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
-}
-
static int bpf_obj_pin(int fd, const char *pathname)
{
union bpf_attr attr = {};
--
2.1.4
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [iproute2 net-next 4/8] move cmd_exec to lib utils
2016-12-10 17:47 [iproute2 net-next 0/8] Add support for vrf helper David Ahern
2016-12-10 17:47 ` [iproute2 net-next 1/8] lib bpf: Add support for BPF_PROG_ATTACH and BPF_PROG_DETACH David Ahern
2016-12-10 17:47 ` [iproute2 net-next 2/8] bpf: export bpf_prog_load David Ahern
@ 2016-12-10 17:47 ` David Ahern
2016-12-10 17:47 ` [iproute2 net-next 5/8] Add filesystem APIs to lib David Ahern
` (3 subsequent siblings)
6 siblings, 0 replies; 15+ messages in thread
From: David Ahern @ 2016-12-10 17:47 UTC (permalink / raw)
To: netdev, stephen; +Cc: David Ahern
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
---
include/utils.h | 2 ++
ip/ipnetns.c | 34 ----------------------------------
lib/Makefile | 2 +-
lib/exec.c | 41 +++++++++++++++++++++++++++++++++++++++++
4 files changed, 44 insertions(+), 35 deletions(-)
create mode 100644 lib/exec.c
diff --git a/include/utils.h b/include/utils.h
index 26c970daa5d0..ac4517a3bde1 100644
--- a/include/utils.h
+++ b/include/utils.h
@@ -256,4 +256,6 @@ char *int_to_str(int val, char *buf);
int get_guid(__u64 *guid, const char *arg);
int get_real_family(int rtm_type, int rtm_family);
+int cmd_exec(const char *cmd, char **argv, bool do_fork);
+
#endif /* __UTILS_H__ */
diff --git a/ip/ipnetns.c b/ip/ipnetns.c
index bd1e9013706c..db9a541769f1 100644
--- a/ip/ipnetns.c
+++ b/ip/ipnetns.c
@@ -357,40 +357,6 @@ static int netns_list(int argc, char **argv)
return 0;
}
-static int cmd_exec(const char *cmd, char **argv, bool do_fork)
-{
- fflush(stdout);
- if (do_fork) {
- int status;
- pid_t pid;
-
- pid = fork();
- if (pid < 0) {
- perror("fork");
- exit(1);
- }
-
- if (pid != 0) {
- /* Parent */
- if (waitpid(pid, &status, 0) < 0) {
- perror("waitpid");
- exit(1);
- }
-
- if (WIFEXITED(status)) {
- return WEXITSTATUS(status);
- }
-
- exit(1);
- }
- }
-
- if (execvp(cmd, argv) < 0)
- fprintf(stderr, "exec of \"%s\" failed: %s\n",
- cmd, strerror(errno));
- _exit(1);
-}
-
static int on_netns_exec(char *nsname, void *arg)
{
char **argv = arg;
diff --git a/lib/Makefile b/lib/Makefile
index 5b7ec169048a..749073261c49 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -8,7 +8,7 @@ CFLAGS += -fPIC
UTILOBJ = utils.o rt_names.o ll_types.o ll_proto.o ll_addr.o \
inet_proto.o namespace.o json_writer.o \
- names.o color.o bpf.o
+ names.o color.o bpf.o exec.o
NLOBJ=libgenl.o ll_map.o libnetlink.o
diff --git a/lib/exec.c b/lib/exec.c
new file mode 100644
index 000000000000..96edbc422e84
--- /dev/null
+++ b/lib/exec.c
@@ -0,0 +1,41 @@
+#define _ATFILE_SOURCE
+#include <sys/wait.h>
+#include <stdio.h>
+#include <errno.h>
+#include <unistd.h>
+
+#include "utils.h"
+
+int cmd_exec(const char *cmd, char **argv, bool do_fork)
+{
+ fflush(stdout);
+ if (do_fork) {
+ int status;
+ pid_t pid;
+
+ pid = fork();
+ if (pid < 0) {
+ perror("fork");
+ exit(1);
+ }
+
+ if (pid != 0) {
+ /* Parent */
+ if (waitpid(pid, &status, 0) < 0) {
+ perror("waitpid");
+ exit(1);
+ }
+
+ if (WIFEXITED(status)) {
+ return WEXITSTATUS(status);
+ }
+
+ exit(1);
+ }
+ }
+
+ if (execvp(cmd, argv) < 0)
+ fprintf(stderr, "exec of \"%s\" failed: %s\n",
+ cmd, strerror(errno));
+ _exit(1);
+}
--
2.1.4
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [iproute2 net-next 5/8] Add filesystem APIs to lib
2016-12-10 17:47 [iproute2 net-next 0/8] Add support for vrf helper David Ahern
` (2 preceding siblings ...)
2016-12-10 17:47 ` [iproute2 net-next 4/8] move cmd_exec to lib utils David Ahern
@ 2016-12-10 17:47 ` David Ahern
2016-12-10 17:47 ` [iproute2 net-next 6/8] change name_is_vrf to return index David Ahern
` (2 subsequent siblings)
6 siblings, 0 replies; 15+ messages in thread
From: David Ahern @ 2016-12-10 17:47 UTC (permalink / raw)
To: netdev, stephen; +Cc: David Ahern
Add make_path to recursively call mkdir as needed to create a given
path with the given mode.
Add find_cgroup2_mount to lookup path where cgroup2 is mounted. If it
is not already mounted, cgroup2 is mounted under /var/run/cgroup2 for
use by iproute2.
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
---
include/utils.h | 2 +
lib/Makefile | 2 +-
lib/fs.c | 143 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 146 insertions(+), 1 deletion(-)
create mode 100644 lib/fs.c
diff --git a/include/utils.h b/include/utils.h
index ac4517a3bde1..dc1d6b9607dd 100644
--- a/include/utils.h
+++ b/include/utils.h
@@ -257,5 +257,7 @@ int get_guid(__u64 *guid, const char *arg);
int get_real_family(int rtm_type, int rtm_family);
int cmd_exec(const char *cmd, char **argv, bool do_fork);
+int make_path(const char *path, mode_t mode);
+char *find_cgroup2_mount(void);
#endif /* __UTILS_H__ */
diff --git a/lib/Makefile b/lib/Makefile
index 749073261c49..0c57662b4f8f 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -8,7 +8,7 @@ CFLAGS += -fPIC
UTILOBJ = utils.o rt_names.o ll_types.o ll_proto.o ll_addr.o \
inet_proto.o namespace.o json_writer.o \
- names.o color.o bpf.o exec.o
+ names.o color.o bpf.o exec.o fs.o
NLOBJ=libgenl.o ll_map.o libnetlink.o
diff --git a/lib/fs.c b/lib/fs.c
new file mode 100644
index 000000000000..39cc96dccca9
--- /dev/null
+++ b/lib/fs.c
@@ -0,0 +1,143 @@
+/*
+ * fs.c filesystem APIs
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: David Ahern <dsa@cumulusnetworks.com>
+ *
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <sys/mount.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <errno.h>
+#include <limits.h>
+
+#include "utils.h"
+
+#define CGROUP2_FS_NAME "cgroup2"
+
+/* if not already mounted cgroup2 is mounted here for iproute2's use */
+#define MNT_CGRP2_PATH "/var/run/cgroup2"
+
+/* return mount path of first occurrence of given fstype */
+static char *find_fs_mount(const char *fs_to_find)
+{
+ char path[4096];
+ char fstype[128]; /* max length of any filesystem name */
+ char *mnt = NULL;
+ FILE *fp;
+
+ fp = fopen("/proc/mounts", "r");
+ if (!fp) {
+ fprintf(stderr,
+ "Failed to open mounts file: %s\n", strerror(errno));
+ return NULL;
+ }
+
+ while (fscanf(fp, "%*s %4096s %127s %*s %*d %*d\n",
+ path, fstype) == 2) {
+ if (strcmp(fstype, fs_to_find) == 0) {
+ mnt = strdup(path);
+ break;
+ }
+ }
+
+ fclose(fp);
+
+ return mnt;
+}
+
+/* caller needs to free string returned */
+char *find_cgroup2_mount(void)
+{
+ char *mnt = find_fs_mount(CGROUP2_FS_NAME);
+
+ if (mnt)
+ return mnt;
+
+ mnt = strdup(MNT_CGRP2_PATH);
+ if (!mnt) {
+ fprintf(stderr, "Failed to allocate memory for cgroup2 path\n");
+ return NULL;
+
+ }
+
+ if (make_path(mnt, 0755)) {
+ fprintf(stderr, "Failed to setup vrf cgroup2 directory\n");
+ free(mnt);
+ return NULL;
+ }
+
+ if (mount("none", mnt, CGROUP2_FS_NAME, 0, NULL)) {
+ /* EBUSY means already mounted */
+ if (errno != EBUSY) {
+ fprintf(stderr,
+ "Failed to mount cgroup2. Are CGROUPS enabled in your kernel?\n");
+ free(mnt);
+ return NULL;
+ }
+ }
+ return mnt;
+}
+
+int make_path(const char *path, mode_t mode)
+{
+ char *dir, *delim;
+ struct stat sbuf;
+ int rc = -1;
+
+ delim = dir = strdup(path);
+ if (dir == NULL) {
+ fprintf(stderr, "strdup failed copying path");
+ return -1;
+ }
+
+ /* skip '/' -- it had better exist */
+ if (*delim == '/')
+ delim++;
+
+ while (1) {
+ delim = strchr(delim, '/');
+ if (delim)
+ *delim = '\0';
+
+ if (stat(dir, &sbuf) != 0) {
+ if (errno != ENOENT) {
+ fprintf(stderr,
+ "stat failed for %s: %s\n",
+ dir, strerror(errno));
+ goto out;
+ }
+
+ if (mkdir(dir, mode) != 0) {
+ fprintf(stderr,
+ "mkdir failed for %s: %s",
+ dir, strerror(errno));
+ goto out;
+ }
+ }
+
+ if (delim == NULL)
+ break;
+
+ *delim = '/';
+ delim++;
+ if (*delim == '\0')
+ break;
+ }
+ rc = 0;
+out:
+ free(dir);
+
+ return rc;
+}
--
2.1.4
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [iproute2 net-next 6/8] change name_is_vrf to return index
2016-12-10 17:47 [iproute2 net-next 0/8] Add support for vrf helper David Ahern
` (3 preceding siblings ...)
2016-12-10 17:47 ` [iproute2 net-next 5/8] Add filesystem APIs to lib David Ahern
@ 2016-12-10 17:47 ` David Ahern
2016-12-10 17:47 ` [iproute2 net-next 7/8] libnetlink: Add variant of rtnl_talk that does not display RTNETLINK answers error David Ahern
2016-12-10 17:47 ` [iproute2 net-next 8/8] Introduce ip vrf command David Ahern
6 siblings, 0 replies; 15+ messages in thread
From: David Ahern @ 2016-12-10 17:47 UTC (permalink / raw)
To: netdev, stephen; +Cc: David Ahern
index of 0 means name is not a valid vrf.
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
---
ip/ip_common.h | 2 +-
ip/iplink_vrf.c | 15 +++++++++------
2 files changed, 10 insertions(+), 7 deletions(-)
diff --git a/ip/ip_common.h b/ip/ip_common.h
index 0147f45a7a31..3162f1ca5b2c 100644
--- a/ip/ip_common.h
+++ b/ip/ip_common.h
@@ -91,7 +91,7 @@ struct link_util *get_link_kind(const char *kind);
void br_dump_bridge_id(const struct ifla_bridge_id *id, char *buf, size_t len);
__u32 ipvrf_get_table(const char *name);
-bool name_is_vrf(const char *name);
+int name_is_vrf(const char *name);
#ifndef INFINITY_LIFE_TIME
#define INFINITY_LIFE_TIME 0xFFFFFFFFU
diff --git a/ip/iplink_vrf.c b/ip/iplink_vrf.c
index a238b2906805..c101ed770f87 100644
--- a/ip/iplink_vrf.c
+++ b/ip/iplink_vrf.c
@@ -159,7 +159,7 @@ __u32 ipvrf_get_table(const char *name)
return tb_id;
}
-bool name_is_vrf(const char *name)
+int name_is_vrf(const char *name)
{
struct {
struct nlmsghdr n;
@@ -187,24 +187,27 @@ bool name_is_vrf(const char *name)
addattr_l(&req.n, sizeof(req), IFLA_IFNAME, name, strlen(name) + 1);
if (rtnl_talk(&rth, &req.n, &answer.n, sizeof(answer)) < 0)
- return false;
+ return 0;
ifi = NLMSG_DATA(&answer.n);
len = answer.n.nlmsg_len - NLMSG_LENGTH(sizeof(*ifi));
if (len < 0) {
fprintf(stderr, "BUG: Invalid response to link query.\n");
- return false;
+ return 0;
}
parse_rtattr(tb, IFLA_MAX, IFLA_RTA(ifi), len);
if (!tb[IFLA_LINKINFO])
- return false;
+ return 0;
parse_rtattr_nested(li, IFLA_INFO_MAX, tb[IFLA_LINKINFO]);
if (!li[IFLA_INFO_KIND])
- return false;
+ return 0;
+
+ if (strcmp(RTA_DATA(li[IFLA_INFO_KIND]), "vrf"))
+ return 0;
- return strcmp(RTA_DATA(li[IFLA_INFO_KIND]), "vrf") == 0;
+ return ifi->ifi_index;
}
--
2.1.4
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [iproute2 net-next 7/8] libnetlink: Add variant of rtnl_talk that does not display RTNETLINK answers error
2016-12-10 17:47 [iproute2 net-next 0/8] Add support for vrf helper David Ahern
` (4 preceding siblings ...)
2016-12-10 17:47 ` [iproute2 net-next 6/8] change name_is_vrf to return index David Ahern
@ 2016-12-10 17:47 ` David Ahern
2016-12-10 17:47 ` [iproute2 net-next 8/8] Introduce ip vrf command David Ahern
6 siblings, 0 replies; 15+ messages in thread
From: David Ahern @ 2016-12-10 17:47 UTC (permalink / raw)
To: netdev, stephen; +Cc: David Ahern
iplink_vrf has 2 functions used to validate a user given device name is
a VRF device and to return the table id. If the user string is not a
device name ip commands with a vrf keyword show a confusing error
message: "RTNETLINK answers: No such device".
Add a variant of rtnl_talk that does not display the "RTNETLINK answers"
message and update iplink_vrf to use it.
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
---
include/libnetlink.h | 3 +++
ip/iplink_vrf.c | 14 +++++++++++---
lib/libnetlink.c | 20 +++++++++++++++++---
3 files changed, 31 insertions(+), 6 deletions(-)
diff --git a/include/libnetlink.h b/include/libnetlink.h
index 751ebf186dd4..bd0267dfcc02 100644
--- a/include/libnetlink.h
+++ b/include/libnetlink.h
@@ -81,6 +81,9 @@ int rtnl_dump_filter_nc(struct rtnl_handle *rth,
int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n,
struct nlmsghdr *answer, size_t len)
__attribute__((warn_unused_result));
+int rtnl_talk_suppress_rtnl_errmsg(struct rtnl_handle *rtnl, struct nlmsghdr *n,
+ struct nlmsghdr *answer, size_t len)
+ __attribute__((warn_unused_result));
int rtnl_send(struct rtnl_handle *rth, const void *buf, int)
__attribute__((warn_unused_result));
int rtnl_send_check(struct rtnl_handle *rth, const void *buf, int)
diff --git a/ip/iplink_vrf.c b/ip/iplink_vrf.c
index c101ed770f87..917630e85337 100644
--- a/ip/iplink_vrf.c
+++ b/ip/iplink_vrf.c
@@ -13,6 +13,7 @@
#include <string.h>
#include <sys/socket.h>
#include <linux/if_link.h>
+#include <errno.h>
#include "rt_names.h"
#include "utils.h"
@@ -126,8 +127,14 @@ __u32 ipvrf_get_table(const char *name)
addattr_l(&req.n, sizeof(req), IFLA_IFNAME, name, strlen(name) + 1);
- if (rtnl_talk(&rth, &req.n, &answer.n, sizeof(answer)) < 0)
- return 0;
+ if (rtnl_talk_suppress_rtnl_errmsg(&rth, &req.n,
+ &answer.n, sizeof(answer)) < 0) {
+ /* special case "default" vrf to be the main table */
+ if (errno == ENODEV && !strcmp(name, "default"))
+ rtnl_rttable_a2n(&tb_id, "main");
+
+ return tb_id;
+ }
ifi = NLMSG_DATA(&answer.n);
len = answer.n.nlmsg_len - NLMSG_LENGTH(sizeof(*ifi));
@@ -186,7 +193,8 @@ int name_is_vrf(const char *name)
addattr_l(&req.n, sizeof(req), IFLA_IFNAME, name, strlen(name) + 1);
- if (rtnl_talk(&rth, &req.n, &answer.n, sizeof(answer)) < 0)
+ if (rtnl_talk_suppress_rtnl_errmsg(&rth, &req.n,
+ &answer.n, sizeof(answer)) < 0)
return 0;
ifi = NLMSG_DATA(&answer.n);
diff --git a/lib/libnetlink.c b/lib/libnetlink.c
index a5db168e50eb..9d7e89aebbd0 100644
--- a/lib/libnetlink.c
+++ b/lib/libnetlink.c
@@ -12,6 +12,7 @@
#include <stdio.h>
#include <stdlib.h>
+#include <stdbool.h>
#include <unistd.h>
#include <syslog.h>
#include <fcntl.h>
@@ -397,8 +398,9 @@ int rtnl_dump_filter_nc(struct rtnl_handle *rth,
return rtnl_dump_filter_l(rth, a);
}
-int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n,
- struct nlmsghdr *answer, size_t maxlen)
+static int __rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n,
+ struct nlmsghdr *answer, size_t maxlen,
+ bool show_rtnl_err)
{
int status;
unsigned int seq;
@@ -485,7 +487,7 @@ int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n,
return 0;
}
- if (rtnl->proto != NETLINK_SOCK_DIAG)
+ if (rtnl->proto != NETLINK_SOCK_DIAG && show_rtnl_err)
fprintf(stderr,
"RTNETLINK answers: %s\n",
strerror(-err->error));
@@ -517,6 +519,18 @@ int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n,
}
}
+int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n,
+ struct nlmsghdr *answer, size_t maxlen)
+{
+ return __rtnl_talk(rtnl, n, answer, maxlen, true);
+}
+
+int rtnl_talk_suppress_rtnl_errmsg(struct rtnl_handle *rtnl, struct nlmsghdr *n,
+ struct nlmsghdr *answer, size_t maxlen)
+{
+ return __rtnl_talk(rtnl, n, answer, maxlen, false);
+}
+
int rtnl_listen_all_nsid(struct rtnl_handle *rth)
{
unsigned int on = 1;
--
2.1.4
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [iproute2 net-next 8/8] Introduce ip vrf command
2016-12-10 17:47 [iproute2 net-next 0/8] Add support for vrf helper David Ahern
` (5 preceding siblings ...)
2016-12-10 17:47 ` [iproute2 net-next 7/8] libnetlink: Add variant of rtnl_talk that does not display RTNETLINK answers error David Ahern
@ 2016-12-10 17:47 ` David Ahern
6 siblings, 0 replies; 15+ messages in thread
From: David Ahern @ 2016-12-10 17:47 UTC (permalink / raw)
To: netdev, stephen; +Cc: David Ahern
'ip vrf' follows the user semnatics established by 'ip netns'.
The 'ip vrf' subcommand supports 3 usages:
1. Run a command against a given vrf:
ip vrf exec NAME CMD
Uses the recently committed cgroup/sock BPF option. vrf directory
is added to cgroup2 mount. Individual vrfs are created under it. BPF
filter attached to vrf/NAME cgroup2 to set sk_bound_dev_if to the VRF
device index. From there the current process (ip's pid) is addded to
the cgroups.proc file and the given command is exected. In doing so
all AF_INET/AF_INET6 (ipv4/ipv6) sockets are automatically bound to
the VRF domain.
The association is inherited parent to child allowing the command to
be a shell from which other commands are run relative to the VRF.
2. Show the VRF a process is bound to:
ip vrf id
This command essentially looks at /proc/pid/cgroup for a "::/vrf/"
entry with the VRF name following.
3. Show process ids bound to a VRF
ip vrf pids NAME
This command dumps the file MNT/vrf/NAME/cgroup.procs since that file
shows the process ids in the particular vrf cgroup.
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
---
ip/Makefile | 3 +-
ip/ip.c | 4 +-
ip/ip_common.h | 2 +
ip/ipvrf.c | 289 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
man/man8/ip-vrf.8 | 88 +++++++++++++++++
5 files changed, 384 insertions(+), 2 deletions(-)
create mode 100644 ip/ipvrf.c
create mode 100644 man/man8/ip-vrf.8
diff --git a/ip/Makefile b/ip/Makefile
index c8e6c6172741..1928489e7f90 100644
--- a/ip/Makefile
+++ b/ip/Makefile
@@ -7,7 +7,8 @@ IPOBJ=ip.o ipaddress.o ipaddrlabel.o iproute.o iprule.o ipnetns.o \
iplink_vxlan.o tcp_metrics.o iplink_ipoib.o ipnetconf.o link_ip6tnl.o \
link_iptnl.o link_gre6.o iplink_bond.o iplink_bond_slave.o iplink_hsr.o \
iplink_bridge.o iplink_bridge_slave.o ipfou.o iplink_ipvlan.o \
- iplink_geneve.o iplink_vrf.o iproute_lwtunnel.o ipmacsec.o ipila.o
+ iplink_geneve.o iplink_vrf.o iproute_lwtunnel.o ipmacsec.o ipila.o \
+ ipvrf.o
RTMONOBJ=rtmon.o
diff --git a/ip/ip.c b/ip/ip.c
index cb3adcb3f57d..07050b07592a 100644
--- a/ip/ip.c
+++ b/ip/ip.c
@@ -51,7 +51,8 @@ static void usage(void)
" ip [ -force ] -batch filename\n"
"where OBJECT := { link | address | addrlabel | route | rule | neigh | ntable |\n"
" tunnel | tuntap | maddress | mroute | mrule | monitor | xfrm |\n"
-" netns | l2tp | fou | macsec | tcp_metrics | token | netconf | ila }\n"
+" netns | l2tp | fou | macsec | tcp_metrics | token | netconf | ila |\n"
+" vrf }\n"
" OPTIONS := { -V[ersion] | -s[tatistics] | -d[etails] | -r[esolve] |\n"
" -h[uman-readable] | -iec |\n"
" -f[amily] { inet | inet6 | ipx | dnet | mpls | bridge | link } |\n"
@@ -99,6 +100,7 @@ static const struct cmd {
{ "mrule", do_multirule },
{ "netns", do_netns },
{ "netconf", do_ipnetconf },
+ { "vrf", do_ipvrf},
{ "help", do_help },
{ 0 }
};
diff --git a/ip/ip_common.h b/ip/ip_common.h
index 3162f1ca5b2c..28763e81e4a4 100644
--- a/ip/ip_common.h
+++ b/ip/ip_common.h
@@ -57,6 +57,8 @@ extern int do_ipila(int argc, char **argv);
int do_tcp_metrics(int argc, char **argv);
int do_ipnetconf(int argc, char **argv);
int do_iptoken(int argc, char **argv);
+int do_ipvrf(int argc, char **argv);
+
int iplink_get(unsigned int flags, char *name, __u32 filt_mask);
static inline int rtm_get_table(struct rtmsg *r, struct rtattr **tb)
diff --git a/ip/ipvrf.c b/ip/ipvrf.c
new file mode 100644
index 000000000000..c4f0e53532e2
--- /dev/null
+++ b/ip/ipvrf.c
@@ -0,0 +1,289 @@
+/*
+ * ipvrf.c "ip vrf"
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: David Ahern <dsa@cumulusnetworks.com>
+ *
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <sys/mount.h>
+#include <linux/bpf.h>
+#include <linux/if.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <errno.h>
+#include <limits.h>
+
+#include "rt_names.h"
+#include "utils.h"
+#include "ip_common.h"
+#include "libbpf.h"
+#include "bpf_util.h"
+
+#define CGRP_PROC_FILE "/cgroup.procs"
+
+static void usage(void)
+{
+ fprintf(stderr, "Usage: ip vrf exec [NAME] cmd ...\n");
+ fprintf(stderr, " ip vrf identify [PID]\n");
+ fprintf(stderr, " ip vrf pids [NAME]\n");
+
+ exit(-1);
+}
+
+static int ipvrf_identify(int argc, char **argv)
+{
+ char path[PATH_MAX];
+ char buf[4096];
+ char *vrf, *end;
+ int fd, rc = -1;
+ unsigned int pid;
+ ssize_t n;
+
+ if (argc < 1)
+ pid = getpid();
+ else if (argc > 1)
+ invarg("Extra arguments specified\n", argv[1]);
+ else if (get_unsigned(&pid, argv[0], 10))
+ invarg("Invalid pid\n", argv[0]);
+
+ snprintf(path, sizeof(path), "/proc/%d/cgroup", pid);
+ fd = open(path, O_RDONLY);
+ if (fd < 0) {
+ fprintf(stderr,
+ "Failed to open cgroups file: %s\n", strerror(errno));
+ return -1;
+ }
+
+ n = read(fd, buf, sizeof(buf) - 1);
+ if (n < 0) {
+ fprintf(stderr,
+ "Failed to read cgroups file: %s\n", strerror(errno));
+ goto out;
+ }
+ buf[n] = '\0';
+ vrf = strstr(buf, "::/vrf/");
+ if (vrf) {
+ vrf += 7; /* skip past "::/vrf/" */
+ end = strchr(vrf, '\n');
+ if (end)
+ *end = '\0';
+
+ printf("%s\n", vrf);
+ }
+
+ rc = 0;
+out:
+ close(fd);
+
+ return rc;
+}
+
+static int ipvrf_pids(int argc, char **argv)
+{
+ char path[PATH_MAX];
+ char buf[4096];
+ char *mnt, *vrf;
+ int fd, rc = -1;
+ ssize_t n;
+
+ if (argc != 1) {
+ fprintf(stderr, "Invalid arguments\n");
+ return -1;
+ }
+
+ vrf = argv[0];
+
+ mnt = find_cgroup2_mount();
+ if (!mnt)
+ return -1;
+
+ snprintf(path, sizeof(path), "%s/vrf/%s%s", mnt, vrf, CGRP_PROC_FILE);
+ free(mnt);
+ fd = open(path, O_RDONLY);
+ if (fd < 0)
+ return 0; /* no cgroup file, nothing to show */
+
+ while (1) {
+ n = read(fd, buf, sizeof(buf) - 1);
+ if (n < 0) {
+ fprintf(stderr,
+ "Failed to read cgroups file: %s\n", strerror(errno));
+ break;
+ } else if (n == 0) {
+ rc = 0;
+ break;
+ }
+ printf("%s", buf);
+ }
+
+ close(fd);
+
+ return rc;
+}
+
+/* load BPF program to set sk_bound_dev_if for sockets */
+static char bpf_log_buf[256*1024];
+
+static int prog_load(int idx)
+{
+ struct bpf_insn prog[] = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_MOV64_IMM(BPF_REG_3, idx),
+ BPF_MOV64_IMM(BPF_REG_2, offsetof(struct bpf_sock, bound_dev_if)),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, offsetof(struct bpf_sock, bound_dev_if)),
+ BPF_MOV64_IMM(BPF_REG_0, 1), /* r0 = verdict */
+ BPF_EXIT_INSN(),
+ };
+
+ return bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, prog, sizeof(prog),
+ "GPL", bpf_log_buf, sizeof(bpf_log_buf));
+}
+
+static int vrf_configure_cgroup(const char *path, int ifindex)
+{
+ int rc = -1, cg_fd, prog_fd = -1;
+
+ cg_fd = open(path, O_DIRECTORY | O_RDONLY);
+ if (cg_fd < 0) {
+ fprintf(stderr, "Failed to open cgroup path: '%s'\n", strerror(errno));
+ goto out;
+ }
+
+ /*
+ * Load bpf program into kernel and attach to cgroup to affect
+ * socket creates
+ */
+ prog_fd = prog_load(ifindex);
+ if (prog_fd < 0) {
+ printf("Failed to load BPF prog: '%s'\n", strerror(errno));
+ goto out;
+ }
+
+ if (bpf_prog_attach(prog_fd, cg_fd, BPF_CGROUP_INET_SOCK_CREATE)) {
+ fprintf(stderr, "Failed to attach prog to cgroup: '%s'\n",
+ strerror(errno));
+ fprintf(stderr, "Kernel compiled with CGROUP_BPF enabled?\n");
+ goto out;
+ }
+
+ rc = 0;
+out:
+ close(cg_fd);
+ close(prog_fd);
+
+ return rc;
+}
+
+static int vrf_switch(const char *name)
+{
+ char path[PATH_MAX], *mnt, pid[16];
+ int ifindex = name_is_vrf(name);
+ bool default_vrf = false;
+ int rc = -1, len, fd = -1;
+
+ if (!ifindex) {
+ if (strcmp(name, "default")) {
+ fprintf(stderr, "Invalid VRF name\n");
+ return -1;
+ }
+ default_vrf = true;
+ }
+
+ mnt = find_cgroup2_mount();
+ if (!mnt)
+ return -1;
+
+ /* path to cgroup; make sure buffer has room to cat "/cgroup.procs"
+ * to the end of the path
+ */
+ len = snprintf(path, sizeof(path) - sizeof(CGRP_PROC_FILE), "%s%s/%s",
+ mnt, default_vrf ? "" : "/vrf", name);
+ if (len > sizeof(path) - sizeof(CGRP_PROC_FILE)) {
+ fprintf(stderr, "Invalid path to cgroup2 mount\n");
+ goto out;
+ }
+
+ if (make_path(path, 0755)) {
+ fprintf(stderr, "Failed to setup vrf cgroup2 directory\n");
+ goto out;
+ }
+
+ if (!default_vrf && vrf_configure_cgroup(path, ifindex))
+ goto out;
+
+ /*
+ * write pid to cgroup.procs making process part of cgroup
+ */
+ strcat(path, CGRP_PROC_FILE);
+ fd = open(path, O_RDWR | O_APPEND);
+ if (fd < 0) {
+ fprintf(stderr, "cgroups.procs file does not exist.\n");
+ goto out;
+ }
+
+ snprintf(pid, sizeof(pid), "%d", getpid());
+ if (write(fd, pid, strlen(pid)) < 0) {
+ fprintf(stderr, "Failed to join cgroup\n");
+ goto out;
+ }
+
+ rc = 0;
+out:
+ free(mnt);
+ close(fd);
+
+ return rc;
+}
+
+static int ipvrf_exec(int argc, char **argv)
+{
+ if (argc < 1) {
+ fprintf(stderr, "No VRF name specified\n");
+ return -1;
+ }
+ if (argc < 2) {
+ fprintf(stderr, "No command specified\n");
+ return -1;
+ }
+
+ if (vrf_switch(argv[0]))
+ return -1;
+
+ return -cmd_exec(argv[1], argv + 1, !!batch_mode);
+}
+
+int do_ipvrf(int argc, char **argv)
+{
+ if (argc == 0) {
+ fprintf(stderr, "No command given. Try \"ip vrf help\".\n");
+ exit(-1);
+ }
+
+ if (matches(*argv, "identify") == 0)
+ return ipvrf_identify(argc-1, argv+1);
+
+ if (matches(*argv, "pids") == 0)
+ return ipvrf_pids(argc-1, argv+1);
+
+ if (matches(*argv, "exec") == 0)
+ return ipvrf_exec(argc-1, argv+1);
+
+ if (matches(*argv, "help") == 0)
+ usage();
+
+ fprintf(stderr, "Command \"%s\" is unknown, try \"ip vrf help\".\n",
+ *argv);
+
+ exit(-1);
+}
diff --git a/man/man8/ip-vrf.8 b/man/man8/ip-vrf.8
new file mode 100644
index 000000000000..57a7c7692ce8
--- /dev/null
+++ b/man/man8/ip-vrf.8
@@ -0,0 +1,88 @@
+.TH IP\-VRF 8 "7 Dec 2016" "iproute2" "Linux"
+.SH NAME
+ip-vrf \- run a command against a vrf
+.SH SYNOPSIS
+.sp
+.ad l
+.in +8
+.ti -8
+.B ip
+.B vrf
+.RI " { " COMMAND " | "
+.BR help " }"
+.sp
+
+.ti -8
+.BR "ip vrf identify"
+.RI "[ " PID " ]"
+
+.ti -8
+.BR "ip vrf pids"
+.I NAME
+
+.ti -8
+.BR "ip vrf exec "
+.RI "[ " NAME " ] " command ...
+
+.SH DESCRIPTION
+A VRF provides traffic isolation at layer 3 for routing, similar to how a
+VLAN is used to isolate traffic at layer 2. Fundamentally, a VRF is a separate
+routing table. Network devices are associated with a VRF by enslaving the
+device to the VRF. At that point network addresses assigned to the device are
+local to the VRF with host and connected routes moved to the table associated
+with the VRF.
+
+A process can specify a VRF using several APIs -- binding the socket to the
+VRF device using SO_BINDTODEVICE, setting the VRF association using
+IP_UNICAST_IF or IPV6_UNICAST_IF, or specifying the VRF for a specific message
+using IP_PKTINFO or IPV6_PKTINFO.
+
+By default a process is not bound to any VRF. An association can be set
+explicitly by making the program use one of the APIs mentioned above or
+implicitly using a helper to set SO_BINDTODEVICE for all IPv4 and IPv6
+sockets (AF_INET and AF_INET6) when the socket is created. This ip-vrf command
+is a helper to run a command against a specific VRF with the VRF association
+inherited parent to child.
+
+.TP
+.B ip vrf exec [ NAME ] cmd ... - Run cmd against the named VRF
+.sp
+This command allows applications that are VRF unaware to be run against
+a VRF other than the default VRF (main table). A command can be run against
+the default VRF by passing the "default" as the VRF name. This is useful if
+the current shell is associated with another VRF (e.g, Management VRF).
+
+.TP
+.B ip vrf identify [PID] - Report VRF association for process
+.sp
+This command shows the VRF association of the specified process. If PID is
+not specified then the id of the current process is used.
+
+.TP
+.B ip vrf pids NAME - Report processes associated with the named VRF
+.sp
+This command shows all process ids that are associated with the given
+VRF.
+
+.SH CAVEATS
+This command requires a kernel compiled with CGROUPS and CGROUP_BPF enabled.
+
+The VRF helper *only* affects network layer sockets.
+
+.SH EXAMPLES
+.PP
+ip vrf exec red ssh 10.100.1.254
+.RS
+Executes ssh to 10.100.1.254 against the VRF red table.
+.RE
+
+.SH SEE ALSO
+.br
+.BR ip (8),
+.BR ip-link (8),
+.BR ip-address (8),
+.BR ip-route (8),
+.BR ip-neighbor (8)
+
+.SH AUTHOR
+Original Manpage by David Ahern
--
2.1.4
^ permalink raw reply related [flat|nested] 15+ messages in thread