Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH net-next 2/3 v8] net: arp: Add support for raw IP device
From: Subash Abhinov Kasiviswanathan @ 2017-08-23  0:20 UTC (permalink / raw)
  To: netdev, davem, fengguang.wu, dcbw, jiri, stephen, David.Laight,
	marcel, andrew
  Cc: Subash Abhinov Kasiviswanathan
In-Reply-To: <1503447610-11409-1-git-send-email-subashab@codeaurora.org>

Define the raw IP type. This is needed for raw IP net devices
like rmnet.

Signed-off-by: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
---
 include/uapi/linux/if_arp.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/uapi/linux/if_arp.h b/include/uapi/linux/if_arp.h
index cf73510..a2a6356 100644
--- a/include/uapi/linux/if_arp.h
+++ b/include/uapi/linux/if_arp.h
@@ -59,6 +59,7 @@
 #define ARPHRD_LAPB	516		/* LAPB				*/
 #define ARPHRD_DDCMP    517		/* Digital's DDCMP protocol     */
 #define ARPHRD_RAWHDLC	518		/* Raw HDLC			*/
+#define ARPHRD_RAWIP    519		/* Raw IP                       */
 
 #define ARPHRD_TUNNEL	768		/* IPIP tunnel			*/
 #define ARPHRD_TUNNEL6	769		/* IP6IP6 tunnel       		*/
-- 
1.9.1

^ permalink raw reply related

* [PATCH net-next 3/8] bpf: Allow cgroup sock filters to use get_current_uid_gid helper
From: David Ahern @ 2017-08-23  0:20 UTC (permalink / raw)
  To: netdev, daniel, ast, tj, davem; +Cc: David Ahern
In-Reply-To: <1503447621-27997-1-git-send-email-dsahern@gmail.com>

Allow BPF programs run on sock create to use the get_current_uid_gid
helper.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 net/core/filter.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/net/core/filter.c b/net/core/filter.c
index 7ee75a40ff03..6d3f693021f9 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3139,6 +3139,17 @@ bpf_base_func_proto(enum bpf_func_id func_id)
 }
 
 static const struct bpf_func_proto *
+sock_filter_func_proto(enum bpf_func_id func_id)
+{
+	switch (func_id) {
+	case BPF_FUNC_get_current_uid_gid:
+		return &bpf_get_current_uid_gid_proto;
+	default:
+		return bpf_base_func_proto(func_id);
+	}
+}
+
+static const struct bpf_func_proto *
 sk_filter_func_proto(enum bpf_func_id func_id)
 {
 	switch (func_id) {
@@ -4227,7 +4238,7 @@ const struct bpf_verifier_ops lwt_xmit_prog_ops = {
 };
 
 const struct bpf_verifier_ops cg_sock_prog_ops = {
-	.get_func_proto		= bpf_base_func_proto,
+	.get_func_proto		= sock_filter_func_proto,
 	.is_valid_access	= sock_filter_is_valid_access,
 	.convert_ctx_access	= sock_filter_convert_ctx_access,
 };
-- 
2.1.4

^ permalink raw reply related

* [PATCH net-next 4/8] bpf samples: Update sock test to allow setting mark and priority
From: David Ahern @ 2017-08-23  0:20 UTC (permalink / raw)
  To: netdev, daniel, ast, tj, davem; +Cc: David Ahern
In-Reply-To: <1503447621-27997-1-git-send-email-dsahern@gmail.com>

Update sock test to set mark and priority on socket create.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 samples/bpf/test_cgrp2_sock.c  | 129 ++++++++++++++++++++++++++++++++++++-----
 samples/bpf/test_cgrp2_sock.sh |   2 +-
 2 files changed, 116 insertions(+), 15 deletions(-)

diff --git a/samples/bpf/test_cgrp2_sock.c b/samples/bpf/test_cgrp2_sock.c
index c3cfb23e23b5..c2501c9508a7 100644
--- a/samples/bpf/test_cgrp2_sock.c
+++ b/samples/bpf/test_cgrp2_sock.c
@@ -19,55 +19,156 @@
 #include <errno.h>
 #include <fcntl.h>
 #include <net/if.h>
+#include <inttypes.h>
 #include <linux/bpf.h>
 
 #include "libbpf.h"
 
 char bpf_log_buf[BPF_LOG_BUF_SIZE];
 
-static int prog_load(int idx)
+static int prog_load(__u32 idx, __u32 mark, __u32 prio)
 {
-	struct bpf_insn prog[] = {
+	/* save pointer to context */
+	struct bpf_insn prog_start[] = {
 		BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+	};
+	struct bpf_insn prog_end[] = {
+		BPF_MOV64_IMM(BPF_REG_0, 1), /* r0 = verdict */
+		BPF_EXIT_INSN(),
+	};
+
+	/* set sk_bound_dev_if on socket */
+	struct bpf_insn prog_dev[] = {
 		BPF_MOV64_IMM(BPF_REG_3, idx),
 		BPF_MOV64_IMM(BPF_REG_2, offsetof(struct bpf_sock, bound_dev_if)),
 		BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, offsetof(struct bpf_sock, bound_dev_if)),
-		BPF_MOV64_IMM(BPF_REG_0, 1), /* r0 = verdict */
-		BPF_EXIT_INSN(),
 	};
-	size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
 
-	return bpf_load_program(BPF_PROG_TYPE_CGROUP_SOCK, prog, insns_cnt,
+	/* set mark on socket */
+	struct bpf_insn prog_mark[] = {
+		BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+		BPF_MOV64_IMM(BPF_REG_3, mark),
+		BPF_MOV64_IMM(BPF_REG_2, offsetof(struct bpf_sock, mark)),
+		BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, offsetof(struct bpf_sock, mark)),
+	};
+
+	/* set priority on socket */
+	struct bpf_insn prog_prio[] = {
+		BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+		BPF_MOV64_IMM(BPF_REG_3, prio),
+		BPF_MOV64_IMM(BPF_REG_2, offsetof(struct bpf_sock, priority)),
+		BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, offsetof(struct bpf_sock, priority)),
+	};
+
+	struct bpf_insn *prog;
+	size_t insns_cnt;
+	void *p;
+	int ret;
+
+	insns_cnt = sizeof(prog_start) + sizeof(prog_end);
+	if (idx)
+		insns_cnt += sizeof(prog_dev);
+
+	if (mark)
+		insns_cnt += sizeof(prog_mark);
+
+	if (prio)
+		insns_cnt += sizeof(prog_prio);
+
+	p = prog = malloc(insns_cnt);
+	if (!prog) {
+		fprintf(stderr, "Failed to allocate memory for instructions\n");
+		return EXIT_FAILURE;
+	}
+
+	memcpy(p, prog_start, sizeof(prog_start));
+	p += sizeof(prog_start);
+
+	if (idx) {
+		memcpy(p, prog_dev, sizeof(prog_dev));
+		p += sizeof(prog_dev);
+	}
+
+	if (mark) {
+		memcpy(p, prog_mark, sizeof(prog_mark));
+		p += sizeof(prog_mark);
+	}
+
+	if (prio) {
+		memcpy(p, prog_prio, sizeof(prog_prio));
+		p += sizeof(prog_prio);
+	}
+
+	memcpy(p, prog_end, sizeof(prog_end));
+	p += sizeof(prog_end);
+
+	insns_cnt /= sizeof(struct bpf_insn);
+
+	ret = bpf_load_program(BPF_PROG_TYPE_CGROUP_SOCK, prog, insns_cnt,
 				"GPL", 0, bpf_log_buf, BPF_LOG_BUF_SIZE);
+
+	free(prog);
+
+	return ret;
 }
 
 static int usage(const char *argv0)
 {
-	printf("Usage: %s cg-path device-index\n", argv0);
+	printf("Usage: %s -b bind-to-dev -m mark -p prio cg-path\n", argv0);
 	return EXIT_FAILURE;
 }
 
 int main(int argc, char **argv)
 {
+	__u32 idx = 0, mark = 0, prio = 0;
+	const char *cgrp_path = NULL;
 	int cg_fd, prog_fd, ret;
-	unsigned int idx;
+	int rc;
+
+	while ((rc = getopt(argc, argv, "b:m:p:")) != -1) {
+		switch (rc) {
+		case 'b':
+			idx = if_nametoindex(optarg);
+			if (!idx) {
+				idx = strtoumax(optarg, NULL, 0);
+				if (!idx) {
+					printf("Invalid device name\n");
+					return EXIT_FAILURE;
+				}
+			}
+			break;
+		case 'm':
+			mark = strtoumax(optarg, NULL, 0);
+			break;
+		case 'p':
+			prio = strtoumax(optarg, NULL, 0);
+			break;
+		default:
+			return usage(argv[0]);
+		}
+	}
 
-	if (argc < 2)
+	if (optind == argc)
 		return usage(argv[0]);
 
-	idx = if_nametoindex(argv[2]);
-	if (!idx) {
-		printf("Invalid device name\n");
+	cgrp_path = argv[optind];
+	if (!cgrp_path) {
+		fprintf(stderr, "cgroup path not given\n");
+		return EXIT_FAILURE;
+	}
+
+	if (!idx && !mark && !prio) {
+		fprintf(stderr, "One of device, mark or priority must be given\n");
 		return EXIT_FAILURE;
 	}
 
-	cg_fd = open(argv[1], O_DIRECTORY | O_RDONLY);
+	cg_fd = open(cgrp_path, O_DIRECTORY | O_RDONLY);
 	if (cg_fd < 0) {
 		printf("Failed to open cgroup path: '%s'\n", strerror(errno));
 		return EXIT_FAILURE;
 	}
 
-	prog_fd = prog_load(idx);
+	prog_fd = prog_load(idx, mark, prio);
 	printf("Output from kernel verifier:\n%s\n-------\n", bpf_log_buf);
 
 	if (prog_fd < 0) {
diff --git a/samples/bpf/test_cgrp2_sock.sh b/samples/bpf/test_cgrp2_sock.sh
index 925fd467c7cc..1153c33e8964 100755
--- a/samples/bpf/test_cgrp2_sock.sh
+++ b/samples/bpf/test_cgrp2_sock.sh
@@ -20,7 +20,7 @@ function attach_bpf {
 	mkdir -p /tmp/cgroupv2
 	mount -t cgroup2 none /tmp/cgroupv2
 	mkdir -p /tmp/cgroupv2/foo
-	test_cgrp2_sock /tmp/cgroupv2/foo foo
+	test_cgrp2_sock -b foo /tmp/cgroupv2/foo
 	echo $$ >> /tmp/cgroupv2/foo/cgroup.procs
 }
 
-- 
2.1.4

^ permalink raw reply related

* [PATCH net-next 5/8] bpf/samples: Add detach option to test_cgrp2_sock
From: David Ahern @ 2017-08-23  0:20 UTC (permalink / raw)
  To: netdev, daniel, ast, tj, davem; +Cc: David Ahern
In-Reply-To: <1503447621-27997-1-git-send-email-dsahern@gmail.com>

Add option to detach programs from a cgroup.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 samples/bpf/test_cgrp2_sock.c | 48 ++++++++++++++++++++++++++++++-------------
 1 file changed, 34 insertions(+), 14 deletions(-)

diff --git a/samples/bpf/test_cgrp2_sock.c b/samples/bpf/test_cgrp2_sock.c
index c2501c9508a7..1bae450294f6 100644
--- a/samples/bpf/test_cgrp2_sock.c
+++ b/samples/bpf/test_cgrp2_sock.c
@@ -114,7 +114,12 @@ static int prog_load(__u32 idx, __u32 mark, __u32 prio)
 
 static int usage(const char *argv0)
 {
-	printf("Usage: %s -b bind-to-dev -m mark -p prio cg-path\n", argv0);
+	printf("Usage:\n");
+	printf("  Attach a program\n");
+	printf("  %s -b bind-to-dev -m mark -p prio cg-path\n", argv0);
+	printf("\n");
+	printf("  Detach a program\n");
+	printf("  %s -d cg-path\n", argv0);
 	return EXIT_FAILURE;
 }
 
@@ -123,10 +128,14 @@ int main(int argc, char **argv)
 	__u32 idx = 0, mark = 0, prio = 0;
 	const char *cgrp_path = NULL;
 	int cg_fd, prog_fd, ret;
+	int do_attach = 1;
 	int rc;
 
-	while ((rc = getopt(argc, argv, "b:m:p:")) != -1) {
+	while ((rc = getopt(argc, argv, "db:m:p:")) != -1) {
 		switch (rc) {
+		case 'd':
+			do_attach = 0;
+			break;
 		case 'b':
 			idx = if_nametoindex(optarg);
 			if (!idx) {
@@ -157,7 +166,7 @@ int main(int argc, char **argv)
 		return EXIT_FAILURE;
 	}
 
-	if (!idx && !mark && !prio) {
+	if (do_attach && !idx && !mark && !prio) {
 		fprintf(stderr, "One of device, mark or priority must be given\n");
 		return EXIT_FAILURE;
 	}
@@ -168,20 +177,31 @@ int main(int argc, char **argv)
 		return EXIT_FAILURE;
 	}
 
-	prog_fd = prog_load(idx, mark, prio);
-	printf("Output from kernel verifier:\n%s\n-------\n", bpf_log_buf);
+	if (do_attach) {
+		prog_fd = prog_load(idx, mark, prio);
+		printf("Output from kernel verifier:\n%s\n-------\n", bpf_log_buf);
 
-	if (prog_fd < 0) {
-		printf("Failed to load prog: '%s'\n", strerror(errno));
-		return EXIT_FAILURE;
-	}
+		if (prog_fd < 0) {
+			printf("Failed to load prog: '%s'\n", strerror(errno));
+			return EXIT_FAILURE;
+		}
 
-	ret = bpf_prog_attach(prog_fd, cg_fd, BPF_CGROUP_INET_SOCK_CREATE, 0);
-	if (ret < 0) {
-		printf("Failed to attach prog to cgroup: '%s'\n",
-		       strerror(errno));
-		return EXIT_FAILURE;
+		ret = bpf_prog_attach(prog_fd, cg_fd, BPF_CGROUP_INET_SOCK_CREATE,
+				      BPF_F_ALLOW_OVERRIDE);
+		if (ret < 0) {
+			printf("Failed to attach prog to cgroup: '%s'\n",
+			       strerror(errno));
+			return EXIT_FAILURE;
+		}
+	} else {
+		ret = bpf_prog_detach(cg_fd, BPF_CGROUP_INET_SOCK_CREATE);
+		if (ret < 0) {
+			printf("Failed to detach prog from cgroup: '%s'\n",
+			       strerror(errno));
+			return EXIT_FAILURE;
+		}
 	}
 
+	close(cg_fd);
 	return EXIT_SUCCESS;
 }
-- 
2.1.4

^ permalink raw reply related

* [PATCH net-next 6/8] samples/bpf: Add option to dump socket settings
From: David Ahern @ 2017-08-23  0:20 UTC (permalink / raw)
  To: netdev, daniel, ast, tj, davem; +Cc: David Ahern
In-Reply-To: <1503447621-27997-1-git-send-email-dsahern@gmail.com>

Add option to dump socket settings. Will be used in the next patch
to verify bpf programs are correctly setting mark, priority and
device based on the cgroup attachment for the program run.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 samples/bpf/test_cgrp2_sock.c | 75 +++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 73 insertions(+), 2 deletions(-)

diff --git a/samples/bpf/test_cgrp2_sock.c b/samples/bpf/test_cgrp2_sock.c
index 1bae450294f6..5e3f605ed3b3 100644
--- a/samples/bpf/test_cgrp2_sock.c
+++ b/samples/bpf/test_cgrp2_sock.c
@@ -112,6 +112,70 @@ static int prog_load(__u32 idx, __u32 mark, __u32 prio)
 	return ret;
 }
 
+static int get_bind_to_device(int sd, char *name, size_t len)
+{
+	socklen_t optlen = len;
+	int rc;
+
+	name[0] = '\0';
+	rc = getsockopt(sd, SOL_SOCKET, SO_BINDTODEVICE, name, &optlen);
+	if (rc < 0)
+		perror("setsockopt(SO_BINDTODEVICE)");
+
+	return rc;
+}
+
+static unsigned int get_somark(int sd)
+{
+	unsigned int mark = 0;
+	socklen_t optlen = sizeof(mark);
+	int rc;
+
+	rc = getsockopt(sd, SOL_SOCKET, SO_MARK, &mark, &optlen);
+	if (rc < 0)
+		perror("getsockopt(SO_MARK)");
+
+	return mark;
+}
+
+static unsigned int get_priority(int sd)
+{
+	unsigned int prio = 0;
+	socklen_t optlen = sizeof(prio);
+	int rc;
+
+	rc = getsockopt(sd, SOL_SOCKET, SO_PRIORITY, &prio, &optlen);
+	if (rc < 0)
+		perror("getsockopt(SO_PRIORITY)");
+
+	return prio;
+}
+
+static int show_sockopts(int family)
+{
+	unsigned int mark, prio;
+	char name[16];
+	int sd;
+
+	sd = socket(family, SOCK_DGRAM, 17);
+	if (sd < 0) {
+		perror("socket");
+		return 1;
+	}
+
+	if (get_bind_to_device(sd, name, sizeof(name)) < 0)
+		return 1;
+
+	mark = get_somark(sd);
+	prio = get_priority(sd);
+
+	close(sd);
+
+	printf("sd %d: dev %s, mark %u, priority %u\n", sd, name, mark, prio);
+
+	return 0;
+}
+
 static int usage(const char *argv0)
 {
 	printf("Usage:\n");
@@ -120,6 +184,9 @@ static int usage(const char *argv0)
 	printf("\n");
 	printf("  Detach a program\n");
 	printf("  %s -d cg-path\n", argv0);
+	printf("\n");
+	printf("  Show inherited socket settings (mark, priority, and device)\n");
+	printf("  %s [-6]\n", argv0);
 	return EXIT_FAILURE;
 }
 
@@ -128,10 +195,11 @@ int main(int argc, char **argv)
 	__u32 idx = 0, mark = 0, prio = 0;
 	const char *cgrp_path = NULL;
 	int cg_fd, prog_fd, ret;
+	int family = PF_INET;
 	int do_attach = 1;
 	int rc;
 
-	while ((rc = getopt(argc, argv, "db:m:p:")) != -1) {
+	while ((rc = getopt(argc, argv, "db:m:p:6")) != -1) {
 		switch (rc) {
 		case 'd':
 			do_attach = 0;
@@ -152,13 +220,16 @@ int main(int argc, char **argv)
 		case 'p':
 			prio = strtoumax(optarg, NULL, 0);
 			break;
+		case '6':
+			family = PF_INET6;
+			break;
 		default:
 			return usage(argv[0]);
 		}
 	}
 
 	if (optind == argc)
-		return usage(argv[0]);
+		return show_sockopts(family);
 
 	cgrp_path = argv[optind];
 	if (!cgrp_path) {
-- 
2.1.4

^ permalink raw reply related

* [PATCH net-next 7/8] samples/bpf: Add test case for nested socket options
From: David Ahern @ 2017-08-23  0:20 UTC (permalink / raw)
  To: netdev, daniel, ast, tj, davem; +Cc: David Ahern
In-Reply-To: <1503447621-27997-1-git-send-email-dsahern@gmail.com>

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 samples/bpf/test_cgrp2_sock3.sh | 118 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 118 insertions(+)
 create mode 100755 samples/bpf/test_cgrp2_sock3.sh

diff --git a/samples/bpf/test_cgrp2_sock3.sh b/samples/bpf/test_cgrp2_sock3.sh
new file mode 100755
index 000000000000..d371a8740aeb
--- /dev/null
+++ b/samples/bpf/test_cgrp2_sock3.sh
@@ -0,0 +1,118 @@
+#!/bin/sh
+
+# Verify socket options inherited by bpf programs attached
+# to a cgroup.
+
+CGRP_MNT="/tmp/cgroupv2-test_cgrp2_sock"
+
+################################################################################
+#
+print_result()
+{
+	printf "%50s    [%4s]\n" "$1" "$2"
+}
+
+check_sock()
+{
+	out=$(test_cgrp2_sock)
+	echo $out | grep -q "$1"
+	if [ $? -ne 0 ]; then
+		print_result "IPv4: $2" "FAIL"
+		echo "    expected: $1"
+		echo "        have: $out"
+		rc=1
+	else
+		print_result "IPv4: $2" "OK"
+	fi
+}
+
+check_sock6()
+{
+	out=$(test_cgrp2_sock -6)
+	echo $out | grep -q "$1"
+	if [ $? -ne 0 ]; then
+		print_result "IPv6: $2" "FAIL"
+		echo "    expected: $1"
+		echo "        have: $out"
+		rc=1
+	else
+		print_result "IPv6: $2" "OK"
+	fi
+}
+
+################################################################################
+#
+setup()
+{
+	ip li add cgrp2_sock type dummy
+
+	set -e
+
+	mkdir -p ${CGRP_MNT}
+	mount -t cgroup2 none ${CGRP_MNT}
+
+	mkdir -p ${CGRP_MNT}/cgrp_sock_test/prio/mark/dev
+
+	test_cgrp2_sock -p 123 ${CGRP_MNT}/cgrp_sock_test/prio
+	test_cgrp2_sock -m 666 ${CGRP_MNT}/cgrp_sock_test/prio/mark
+	test_cgrp2_sock -b cgrp2_sock ${CGRP_MNT}/cgrp_sock_test/prio/mark/dev
+
+	set +e
+}
+
+cleanup()
+{
+	ip li del cgrp2_sock
+
+	echo $$ >> ${CGRP_MNT}/cgroup.procs
+	rmdir ${CGRP_MNT}/cgrp_sock_test/prio/mark/dev
+	rmdir ${CGRP_MNT}/cgrp_sock_test/prio/mark
+	rmdir ${CGRP_MNT}/cgrp_sock_test/prio
+	rmdir ${CGRP_MNT}/cgrp_sock_test
+
+	umount ${CGRP_MNT}
+}
+
+################################################################################
+# main
+
+rc=0
+
+setup
+
+# set pid into first cgroup. socket should show it
+# has a priority but not a mark or device bind
+echo $$ > ${CGRP_MNT}/cgrp_sock_test/prio/cgroup.procs
+check_sock "dev , mark 0, priority 123" "Priority only"
+
+
+# set pid into second group. socket should show it
+# has a priority and mark but not a device bind
+echo $$ > ${CGRP_MNT}/cgrp_sock_test/prio/mark/cgroup.procs
+check_sock "dev , mark 666, priority 123" "Priority + mark"
+
+# set pid into inner group. socket should show it
+# has a priority, mark and a device bind
+echo $$ > ${CGRP_MNT}/cgrp_sock_test/prio/mark/dev/cgroup.procs
+check_sock "dev cgrp2_sock, mark 666, priority 123" "Priority + mark + dev"
+
+echo
+
+# set pid into first cgroup. socket should show it
+# has a priority but not a mark or device bind
+echo $$ > ${CGRP_MNT}/cgrp_sock_test/prio/cgroup.procs
+check_sock6 "dev , mark 0, priority 123" "Priority only"
+
+# set pid into second group. socket should show it
+# has a priority and mark but not a device bind
+echo $$ > ${CGRP_MNT}/cgrp_sock_test/prio/mark/cgroup.procs
+check_sock6 "dev , mark 666, priority 123" "Priority + mark"
+
+# set pid into inner group. socket should show it
+# has a priority, mark and a device bind
+echo $$ > ${CGRP_MNT}/cgrp_sock_test/prio/mark/dev/cgroup.procs
+check_sock6 "dev cgrp2_sock, mark 666, priority 123" "Priority + mark + dev"
+
+cleanup
+
+exit $rc
-- 
2.1.4

^ permalink raw reply related

* [PATCH net-next 8/8] samples/bpf: Update cgroup socket examples to use uid gid helper
From: David Ahern @ 2017-08-23  0:20 UTC (permalink / raw)
  To: netdev, daniel, ast, tj, davem; +Cc: David Ahern
In-Reply-To: <1503447621-27997-1-git-send-email-dsahern@gmail.com>

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 samples/bpf/sock_flags_kern.c |  5 +++++
 samples/bpf/test_cgrp2_sock.c | 12 +++++++++++-
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/samples/bpf/sock_flags_kern.c b/samples/bpf/sock_flags_kern.c
index 533dd11a6baa..05dcdf8a4baa 100644
--- a/samples/bpf/sock_flags_kern.c
+++ b/samples/bpf/sock_flags_kern.c
@@ -9,8 +9,13 @@ SEC("cgroup/sock1")
 int bpf_prog1(struct bpf_sock *sk)
 {
 	char fmt[] = "socket: family %d type %d protocol %d\n";
+	char fmt2[] = "socket: uid %u gid %u\n";
+	__u64 gid_uid = bpf_get_current_uid_gid();
+	__u32 uid = gid_uid & 0xffffffff;
+	__u32 gid = gid_uid >> 32;
 
 	bpf_trace_printk(fmt, sizeof(fmt), sk->family, sk->type, sk->protocol);
+	bpf_trace_printk(fmt2, sizeof(fmt2), uid, gid);
 
 	/* block PF_INET6, SOCK_RAW, IPPROTO_ICMPV6 sockets
 	 * ie., make ping6 fail
diff --git a/samples/bpf/test_cgrp2_sock.c b/samples/bpf/test_cgrp2_sock.c
index 5e3f605ed3b3..580d4d573826 100644
--- a/samples/bpf/test_cgrp2_sock.c
+++ b/samples/bpf/test_cgrp2_sock.c
@@ -46,8 +46,18 @@ static int prog_load(__u32 idx, __u32 mark, __u32 prio)
 
 	/* set mark on socket */
 	struct bpf_insn prog_mark[] = {
-		BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+		/* get uid of process */
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+			     BPF_FUNC_get_current_uid_gid),
+		BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xffffffff),
+
+		/* if uid is 0, use given mark, else use the uid as the mark */
+		BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
 		BPF_MOV64_IMM(BPF_REG_3, mark),
+
+		/* set the mark on the new socket */
+		BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
 		BPF_MOV64_IMM(BPF_REG_2, offsetof(struct bpf_sock, mark)),
 		BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, offsetof(struct bpf_sock, mark)),
 	};
-- 
2.1.4

^ permalink raw reply related

* [PATCH net-next 3/3 v8] drivers: net: ethernet: qualcomm: rmnet: Initial implementation
From: Subash Abhinov Kasiviswanathan @ 2017-08-23  0:20 UTC (permalink / raw)
  To: netdev, davem, fengguang.wu, dcbw, jiri, stephen, David.Laight,
	marcel, andrew
  Cc: Subash Abhinov Kasiviswanathan
In-Reply-To: <1503447610-11409-1-git-send-email-subashab@codeaurora.org>

RmNet driver provides a transport agnostic MAP (multiplexing and
aggregation protocol) support in embedded module. Module provides
virtual network devices which can be attached to any IP-mode
physical device. This will be used to provide all MAP functionality
on future hardware in a single consistent location.

Signed-off-by: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
---
 Documentation/networking/rmnet.txt                 |  82 ++++
 drivers/net/ethernet/qualcomm/Kconfig              |   2 +
 drivers/net/ethernet/qualcomm/Makefile             |   2 +
 drivers/net/ethernet/qualcomm/rmnet/Kconfig        |  12 +
 drivers/net/ethernet/qualcomm/rmnet/Makefile       |  12 +
 drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c | 417 +++++++++++++++++++++
 drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h |  54 +++
 .../net/ethernet/qualcomm/rmnet/rmnet_handlers.c   | 271 +++++++++++++
 .../net/ethernet/qualcomm/rmnet/rmnet_handlers.h   |  26 ++
 drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h    |  88 +++++
 .../ethernet/qualcomm/rmnet/rmnet_map_command.c    | 107 ++++++
 .../net/ethernet/qualcomm/rmnet/rmnet_map_data.c   | 105 ++++++
 .../net/ethernet/qualcomm/rmnet/rmnet_private.h    |  45 +++
 drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c    | 237 ++++++++++++
 drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h    |  31 ++
 15 files changed, 1491 insertions(+)
 create mode 100644 Documentation/networking/rmnet.txt
 create mode 100644 drivers/net/ethernet/qualcomm/rmnet/Kconfig
 create mode 100644 drivers/net/ethernet/qualcomm/rmnet/Makefile
 create mode 100644 drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
 create mode 100644 drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h
 create mode 100644 drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
 create mode 100644 drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.h
 create mode 100644 drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
 create mode 100644 drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c
 create mode 100644 drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
 create mode 100644 drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h
 create mode 100644 drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
 create mode 100644 drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h

diff --git a/Documentation/networking/rmnet.txt b/Documentation/networking/rmnet.txt
new file mode 100644
index 0000000..6b341ea
--- /dev/null
+++ b/Documentation/networking/rmnet.txt
@@ -0,0 +1,82 @@
+1. Introduction
+
+rmnet driver is used for supporting the Multiplexing and aggregation
+Protocol (MAP). This protocol is used by all recent chipsets using Qualcomm
+Technologies, Inc. modems.
+
+This driver can be used to register onto any physical network device in
+IP mode. Physical transports include USB, HSIC, PCIe and IP accelerator.
+
+Multiplexing allows for creation of logical netdevices (rmnet devices) to
+handle multiple private data networks (PDN) like a default internet, tethering,
+multimedia messaging service (MMS) or IP media subsystem (IMS). Hardware sends
+packets with MAP headers to rmnet. Based on the multiplexer id, rmnet
+routes to the appropriate PDN after removing the MAP header.
+
+Aggregation is required to achieve high data rates. This involves hardware
+sending aggregated bunch of MAP frames. rmnet driver will de-aggregate
+these MAP frames and send them to appropriate PDN's.
+
+2. Packet format
+
+a. MAP packet (data / control)
+
+MAP header has the same endianness of the IP packet.
+
+Packet format -
+
+Bit             0             1           2-7      8 - 15           16 - 31
+Function   Command / Data   Reserved     Pad   Multiplexer ID    Payload length
+Bit            32 - x
+Function     Raw  Bytes
+
+Command (1)/ Data (0) bit value is to indicate if the packet is a MAP command
+or data packet. Control packet is used for transport level flow control. Data
+packets are standard IP packets.
+
+Reserved bits are usually zeroed out and to be ignored by receiver.
+
+Padding is number of bytes to be added for 4 byte alignment if required by
+hardware.
+
+Multiplexer ID is to indicate the PDN on which data has to be sent.
+
+Payload length includes the padding length but does not include MAP header
+length.
+
+b. MAP packet (command specific)
+
+Bit             0             1           2-7      8 - 15           16 - 31
+Function   Command         Reserved     Pad   Multiplexer ID    Payload length
+Bit          32 - 39        40 - 45    46 - 47       48 - 63
+Function   Command name    Reserved   Command Type   Reserved
+Bit          64 - 95
+Function   Transaction ID
+Bit          96 - 127
+Function   Command data
+
+Command 1 indicates disabling flow while 2 is enabling flow
+
+Command types -
+0 for MAP command request
+1 is to acknowledge the receipt of a command
+2 is for unsupported commands
+3 is for error during processing of commands
+
+c. Aggregation
+
+Aggregation is multiple MAP packets (can be data or command) delivered to
+rmnet in a single linear skb. rmnet will process the individual
+packets and either ACK the MAP command or deliver the IP packet to the
+network stack as needed
+
+MAP header|IP Packet|Optional padding|MAP header|IP Packet|Optional padding....
+MAP header|IP Packet|Optional padding|MAP header|Command Packet|Optional pad...
+
+3. Userspace configuration
+
+rmnet userspace configuration is done through netlink library librmnetctl
+and command line utility rmnetcli. Utility is hosted in codeaurora forum git.
+The driver uses rtnl_link_ops for communication.
+
+https://source.codeaurora.org/quic/la/platform/vendor/qcom-opensource/dataservices/tree/rmnetctl
diff --git a/drivers/net/ethernet/qualcomm/Kconfig b/drivers/net/ethernet/qualcomm/Kconfig
index 877675a..f520071 100644
--- a/drivers/net/ethernet/qualcomm/Kconfig
+++ b/drivers/net/ethernet/qualcomm/Kconfig
@@ -59,4 +59,6 @@ config QCOM_EMAC
 	  low power, Receive-Side Scaling (RSS), and IEEE 1588-2008
 	  Precision Clock Synchronization Protocol.
 
+source "drivers/net/ethernet/qualcomm/rmnet/Kconfig"
+
 endif # NET_VENDOR_QUALCOMM
diff --git a/drivers/net/ethernet/qualcomm/Makefile b/drivers/net/ethernet/qualcomm/Makefile
index 92fa7c4..1847350 100644
--- a/drivers/net/ethernet/qualcomm/Makefile
+++ b/drivers/net/ethernet/qualcomm/Makefile
@@ -9,3 +9,5 @@ obj-$(CONFIG_QCA7000_UART) += qcauart.o
 qcauart-objs := qca_uart.o
 
 obj-y += emac/
+
+obj-$(CONFIG_RMNET) += rmnet/
diff --git a/drivers/net/ethernet/qualcomm/rmnet/Kconfig b/drivers/net/ethernet/qualcomm/rmnet/Kconfig
new file mode 100644
index 0000000..4948f14
--- /dev/null
+++ b/drivers/net/ethernet/qualcomm/rmnet/Kconfig
@@ -0,0 +1,12 @@
+#
+# RMNET MAP driver
+#
+
+menuconfig RMNET
+	depends on NETDEVICES
+	bool "RmNet MAP driver"
+	default n
+	---help---
+	  If you say Y here, then the rmnet module will be statically
+	  compiled into the kernel. The rmnet module provides MAP
+	  functionality for embedded and bridged traffic.
diff --git a/drivers/net/ethernet/qualcomm/rmnet/Makefile b/drivers/net/ethernet/qualcomm/rmnet/Makefile
new file mode 100644
index 0000000..1c43e2f
--- /dev/null
+++ b/drivers/net/ethernet/qualcomm/rmnet/Makefile
@@ -0,0 +1,12 @@
+#
+# Makefile for the RMNET module
+#
+
+rmnet-y		 := rmnet_config.o
+rmnet-y		 += rmnet_vnd.o
+rmnet-y		 += rmnet_handlers.o
+rmnet-y		 += rmnet_map_data.o
+rmnet-y		 += rmnet_map_command.o
+obj-$(CONFIG_RMNET) += rmnet.o
+
+CFLAGS_rmnet.o := -I$(src)
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
new file mode 100644
index 0000000..9200c87b
--- /dev/null
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
@@ -0,0 +1,417 @@
+/* Copyright (c) 2013-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * RMNET configuration engine
+ *
+ */
+
+#include <net/sock.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netdevice.h>
+#include "rmnet_config.h"
+#include "rmnet_handlers.h"
+#include "rmnet_vnd.h"
+#include "rmnet_private.h"
+
+/* Local Definitions and Declarations */
+#define RMNET_LOCAL_LOGICAL_ENDPOINT -1
+
+struct rmnet_free_work {
+	struct work_struct work;
+	struct net_device *rmnet_dev;
+};
+
+static int rmnet_is_real_dev_registered(const struct net_device *real_dev)
+{
+	rx_handler_func_t *rx_handler;
+
+	rx_handler = rcu_dereference(real_dev->rx_handler);
+	return (rx_handler == rmnet_rx_handler);
+}
+
+static struct rmnet_real_dev_info*
+__rmnet_get_real_dev_info(const struct net_device *real_dev)
+{
+	if (rmnet_is_real_dev_registered(real_dev))
+		return (struct rmnet_real_dev_info *)
+			rcu_dereference(real_dev->rx_handler_data);
+	else
+		return NULL;
+}
+
+static struct rmnet_endpoint*
+rmnet_get_endpoint(struct net_device *dev, int config_id)
+{
+	struct rmnet_real_dev_info *rdinfo;
+	struct rmnet_endpoint *ep;
+
+	if (!rmnet_is_real_dev_registered(dev)) {
+		ep = rmnet_vnd_get_endpoint(dev);
+	} else {
+		rdinfo = __rmnet_get_real_dev_info(dev);
+
+		if (!rdinfo)
+			return NULL;
+
+		if (config_id == RMNET_LOCAL_LOGICAL_ENDPOINT)
+			ep = &rdinfo->local_ep;
+		else
+			ep = &rdinfo->muxed_ep[config_id];
+	}
+
+	return ep;
+}
+
+static int rmnet_unregister_real_device(struct net_device *real_dev)
+{
+	struct rmnet_real_dev_info *rdinfo;
+	struct list_head *iter;
+
+	ASSERT_RTNL();
+
+	if (!rmnet_is_real_dev_registered(real_dev) ||
+	    netdev_lower_get_next(real_dev, &iter))
+		return -EINVAL;
+
+	rdinfo = __rmnet_get_real_dev_info(real_dev);
+	kfree(rdinfo);
+
+	netdev_rx_handler_unregister(real_dev);
+
+	/* release reference on real_dev */
+	dev_put(real_dev);
+
+	netdev_dbg(real_dev, "Removed from rmnet\n");
+	return 0;
+}
+
+static int rmnet_register_real_device(struct net_device *real_dev)
+{
+	struct rmnet_real_dev_info *rdinfo;
+	int rc;
+
+	ASSERT_RTNL();
+
+	if (rmnet_is_real_dev_registered(real_dev))
+		return -EINVAL;
+
+	rdinfo = kzalloc(sizeof(*rdinfo), GFP_ATOMIC);
+	if (!rdinfo)
+		return -ENOMEM;
+
+	rdinfo->dev = real_dev;
+	rc = netdev_rx_handler_register(real_dev, rmnet_rx_handler, rdinfo);
+
+	if (rc) {
+		kfree(rdinfo);
+		return -EBUSY;
+	}
+
+	/* hold on to real dev for MAP data */
+	dev_hold(real_dev);
+
+	netdev_dbg(real_dev, "registered with rmnet\n");
+	return 0;
+}
+
+static int rmnet_set_ingress_data_format(struct net_device *dev, u32 idf)
+{
+	struct rmnet_real_dev_info *rdinfo;
+
+	ASSERT_RTNL();
+
+	netdev_dbg(dev, "Ingress format 0x%08X\n", idf);
+
+	rdinfo = __rmnet_get_real_dev_info(dev);
+	if (!rdinfo)
+		return -EINVAL;
+
+	rdinfo->ingress_data_format = idf;
+
+	return 0;
+}
+
+static int rmnet_set_egress_data_format(struct net_device *dev, u32 edf,
+					u16 agg_size, u16 agg_count)
+{
+	struct rmnet_real_dev_info *rdinfo;
+
+	ASSERT_RTNL();
+
+	netdev_dbg(dev, "Egress format 0x%08X agg size %d cnt %d\n",
+		   edf, agg_size, agg_count);
+
+	rdinfo = __rmnet_get_real_dev_info(dev);
+	if (!rdinfo)
+		return -EINVAL;
+
+	rdinfo->egress_data_format = edf;
+
+	return 0;
+}
+
+static int __rmnet_set_endpoint_config(struct net_device *dev, int config_id,
+				       struct rmnet_endpoint *ep)
+{
+	struct rmnet_endpoint *dev_ep;
+
+	ASSERT_RTNL();
+
+	dev_ep = rmnet_get_endpoint(dev, config_id);
+
+	if (!dev_ep)
+		return -EINVAL;
+
+	memcpy(dev_ep, ep, sizeof(struct rmnet_endpoint));
+	if (config_id == RMNET_LOCAL_LOGICAL_ENDPOINT)
+		dev_ep->mux_id = 0;
+	else
+		dev_ep->mux_id = config_id;
+
+	return 0;
+}
+
+static int __rmnet_unset_endpoint_config(struct net_device *dev, int config_id)
+{
+	struct rmnet_endpoint *ep;
+
+	ASSERT_RTNL();
+
+	ep = rmnet_get_endpoint(dev, config_id);
+	if (!ep)
+		return -EINVAL;
+
+	memset(ep, 0, sizeof(struct rmnet_endpoint));
+
+	return 0;
+}
+
+static int rmnet_set_endpoint_config(struct net_device *dev,
+				     int config_id, u8 rmnet_mode,
+				     struct net_device *egress_dev)
+{
+	struct rmnet_endpoint ep;
+
+	netdev_dbg(dev, "id %d mode %d dev %s\n",
+		   config_id, rmnet_mode, egress_dev->name);
+
+	if (config_id < RMNET_LOCAL_LOGICAL_ENDPOINT ||
+	    config_id >= RMNET_MAX_LOGICAL_EP)
+		return -EINVAL;
+
+	memset(&ep, 0, sizeof(struct rmnet_endpoint));
+	ep.rmnet_mode = rmnet_mode;
+	ep.egress_dev = egress_dev;
+
+	return __rmnet_set_endpoint_config(dev, config_id, &ep);
+}
+
+static int rmnet_unset_endpoint_config(struct net_device *dev, int config_id)
+{
+	netdev_dbg(dev, "id %d\n", config_id);
+
+	if (config_id < RMNET_LOCAL_LOGICAL_ENDPOINT ||
+	    config_id >= RMNET_MAX_LOGICAL_EP)
+		return -EINVAL;
+
+	return __rmnet_unset_endpoint_config(dev, config_id);
+}
+
+static int rmnet_newlink(struct net *src_net, struct net_device *dev,
+			 struct nlattr *tb[], struct nlattr *data[],
+			 struct netlink_ext_ack *extack)
+{
+	int ingress_format = RMNET_INGRESS_FORMAT_DEMUXING |
+			     RMNET_INGRESS_FORMAT_DEAGGREGATION |
+			     RMNET_INGRESS_FORMAT_MAP;
+	int egress_format = RMNET_EGRESS_FORMAT_MUXING |
+			    RMNET_EGRESS_FORMAT_MAP;
+	struct net_device *real_dev;
+	int mode = RMNET_EPMODE_VND;
+	u16 mux_id;
+
+	real_dev = __dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK]));
+	if (!real_dev || !dev)
+		return -ENODEV;
+
+	if (!data[IFLA_VLAN_ID])
+		return -EINVAL;
+
+	mux_id = nla_get_u16(data[IFLA_VLAN_ID]);
+
+	rmnet_register_real_device(real_dev);
+
+	if (rmnet_vnd_newlink(real_dev, mux_id, dev))
+		return -EINVAL;
+
+	rmnet_set_egress_data_format(real_dev, egress_format, 0, 0);
+	rmnet_set_ingress_data_format(real_dev, ingress_format);
+	rmnet_set_endpoint_config(real_dev, mux_id, mode, dev);
+	rmnet_set_endpoint_config(dev, mux_id, mode, real_dev);
+	netdev_master_upper_dev_link(dev, real_dev, NULL, NULL);
+	return 0;
+}
+
+static void rmnet_delink(struct net_device *dev, struct list_head *head)
+{
+	struct net_device *real_dev;
+	int mux_id;
+
+	real_dev = netdev_master_upper_dev_get_rcu(dev);
+	if (real_dev) {
+		mux_id = rmnet_vnd_get_mux(real_dev, dev);
+
+		/* rmnet_vnd_get_mux() gives mux_id + 1,
+		 * so subtract 1 to get the correct mux_id
+		 */
+		mux_id--;
+		rmnet_unset_endpoint_config(real_dev, mux_id);
+		rmnet_unset_endpoint_config(dev, mux_id);
+		rmnet_vnd_remove_ref_dev(real_dev, mux_id);
+		netdev_upper_dev_unlink(dev, real_dev);
+		rmnet_unregister_real_device(real_dev);
+	}
+
+	unregister_netdevice_queue(dev, head);
+}
+
+static void rmnet_free_later(struct work_struct *work)
+{
+	struct rmnet_free_work *fwork;
+
+	fwork = container_of(work, struct rmnet_free_work, work);
+
+	rtnl_lock();
+	rmnet_delink(fwork->rmnet_dev, NULL);
+	rtnl_unlock();
+
+	kfree(fwork);
+}
+
+static int rmnet_dev_walk(struct net_device *lower_dev, void *data)
+{
+	struct net_device *real_dev = data;
+	struct rmnet_free_work *vnd_work;
+
+	netdev_upper_dev_unlink(lower_dev, real_dev);
+
+	vnd_work = kzalloc(sizeof(*vnd_work), GFP_KERNEL);
+	if (!vnd_work)
+		return -ENOMEM;
+
+	INIT_WORK(&vnd_work->work, rmnet_free_later);
+	vnd_work->rmnet_dev = lower_dev;
+	schedule_work(&vnd_work->work);
+
+	return 0;
+}
+
+static void rmnet_force_unassociate_device(struct net_device *dev)
+{
+	struct net_device *real_dev = dev;
+
+	if (!rmnet_is_real_dev_registered(real_dev))
+		return;
+
+	netdev_walk_all_lower_dev(real_dev, rmnet_dev_walk, real_dev);
+	rmnet_unregister_real_device(real_dev);
+}
+
+static int rmnet_config_notify_cb(struct notifier_block *nb,
+				  unsigned long event, void *data)
+{
+	struct net_device *dev = netdev_notifier_info_to_dev(data);
+
+	if (!dev)
+		return NOTIFY_DONE;
+
+	switch (event) {
+	case NETDEV_UNREGISTER_FINAL:
+	case NETDEV_UNREGISTER:
+		netdev_dbg(dev, "Kernel unregister\n");
+		rmnet_force_unassociate_device(dev);
+		break;
+
+	default:
+		break;
+	}
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block rmnet_dev_notifier __read_mostly = {
+	.notifier_call = rmnet_config_notify_cb,
+};
+
+static int rmnet_rtnl_validate(struct nlattr *tb[], struct nlattr *data[],
+			       struct netlink_ext_ack *extack)
+{
+	u16 mux_id;
+
+	if (!data || !data[IFLA_VLAN_ID])
+		return -EINVAL;
+
+	mux_id = nla_get_u16(data[IFLA_VLAN_ID]);
+	if (!mux_id || mux_id > (RMNET_MAX_LOGICAL_EP - 1))
+		return -ERANGE;
+
+	return 0;
+}
+
+static size_t rmnet_get_size(const struct net_device *dev)
+{
+	return nla_total_size(2); /* IFLA_VLAN_ID */
+}
+
+struct rtnl_link_ops rmnet_link_ops __read_mostly = {
+	.kind		= "rmnet",
+	.maxtype	= __IFLA_VLAN_MAX,
+	.priv_size	= sizeof(struct rmnet_priv),
+	.setup		= rmnet_vnd_setup,
+	.validate	= rmnet_rtnl_validate,
+	.newlink	= rmnet_newlink,
+	.dellink	= rmnet_delink,
+	.get_size	= rmnet_get_size,
+};
+
+struct rmnet_real_dev_info*
+rmnet_get_real_dev_info(struct net_device *real_dev)
+{
+	return __rmnet_get_real_dev_info(real_dev);
+}
+
+/* Startup/Shutdown */
+
+static int __init rmnet_init(void)
+{
+	int rc;
+
+	rc = register_netdevice_notifier(&rmnet_dev_notifier);
+	if (rc != 0)
+		return rc;
+
+	rc = rtnl_link_register(&rmnet_link_ops);
+	if (rc != 0) {
+		unregister_netdevice_notifier(&rmnet_dev_notifier);
+		return rc;
+	}
+	return rc;
+}
+
+static void __exit rmnet_exit(void)
+{
+	unregister_netdevice_notifier(&rmnet_dev_notifier);
+	rtnl_link_unregister(&rmnet_link_ops);
+}
+
+module_init(rmnet_init)
+module_exit(rmnet_exit)
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h
new file mode 100644
index 0000000..8f5a073
--- /dev/null
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h
@@ -0,0 +1,54 @@
+/* Copyright (c) 2013-2014, 2016-2017 The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * RMNET Data configuration engine
+ *
+ */
+
+#include <linux/skbuff.h>
+
+#ifndef _RMNET_CONFIG_H_
+#define _RMNET_CONFIG_H_
+
+#define RMNET_MAX_LOGICAL_EP 255
+#define RMNET_MAX_VND        32
+
+/* Information about the next device to deliver the packet to.
+ * Exact usage of this parameter depends on the rmnet_mode.
+ */
+struct rmnet_endpoint {
+	u8 rmnet_mode;
+	u8 mux_id;
+	struct net_device *egress_dev;
+};
+
+/* One instance of this structure is instantiated for each real_dev associated
+ * with rmnet.
+ */
+struct rmnet_real_dev_info {
+	struct net_device *dev;
+	struct rmnet_endpoint local_ep;
+	struct rmnet_endpoint muxed_ep[RMNET_MAX_LOGICAL_EP];
+	u32 ingress_data_format;
+	u32 egress_data_format;
+	struct net_device *rmnet_devices[RMNET_MAX_VND];
+};
+
+extern struct rtnl_link_ops rmnet_link_ops;
+
+struct rmnet_priv {
+	struct rmnet_endpoint local_ep;
+};
+
+struct rmnet_real_dev_info*
+rmnet_get_real_dev_info(struct net_device *real_dev);
+
+#endif /* _RMNET_CONFIG_H_ */
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
new file mode 100644
index 0000000..bef49ed
--- /dev/null
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
@@ -0,0 +1,271 @@
+/* Copyright (c) 2013-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * RMNET Data ingress/egress handler
+ *
+ */
+
+#include <linux/netdevice.h>
+#include <linux/netdev_features.h>
+#include "rmnet_private.h"
+#include "rmnet_config.h"
+#include "rmnet_vnd.h"
+#include "rmnet_map.h"
+#include "rmnet_handlers.h"
+
+#define RMNET_IP_VERSION_4 0x40
+#define RMNET_IP_VERSION_6 0x60
+
+/* Helper Functions */
+
+static void rmnet_set_skb_proto(struct sk_buff *skb)
+{
+	switch (skb->data[0] & 0xF0) {
+	case RMNET_IP_VERSION_4:
+		skb->protocol = htons(ETH_P_IP);
+		break;
+	case RMNET_IP_VERSION_6:
+		skb->protocol = htons(ETH_P_IPV6);
+		break;
+	default:
+		skb->protocol = htons(ETH_P_MAP);
+		break;
+	}
+}
+
+/* Generic handler */
+
+static rx_handler_result_t
+rmnet_bridge_handler(struct sk_buff *skb, struct rmnet_endpoint *ep)
+{
+	if (!ep->egress_dev)
+		kfree_skb(skb);
+	else
+		rmnet_egress_handler(skb, ep);
+
+	return RX_HANDLER_CONSUMED;
+}
+
+static rx_handler_result_t
+rmnet_deliver_skb(struct sk_buff *skb, struct rmnet_endpoint *ep)
+{
+	switch (ep->rmnet_mode) {
+	case RMNET_EPMODE_NONE:
+		return RX_HANDLER_PASS;
+
+	case RMNET_EPMODE_BRIDGE:
+		return rmnet_bridge_handler(skb, ep);
+
+	case RMNET_EPMODE_VND:
+		skb_reset_transport_header(skb);
+		skb_reset_network_header(skb);
+		rmnet_vnd_rx_fixup(skb, skb->dev);
+
+		skb->pkt_type = PACKET_HOST;
+		skb_set_mac_header(skb, 0);
+		netif_receive_skb(skb);
+		return RX_HANDLER_CONSUMED;
+
+	default:
+		kfree_skb(skb);
+		return RX_HANDLER_CONSUMED;
+	}
+}
+
+static rx_handler_result_t
+rmnet_ingress_deliver_packet(struct sk_buff *skb,
+			     struct rmnet_real_dev_info *rdinfo)
+{
+	if (!rdinfo) {
+		kfree_skb(skb);
+		return RX_HANDLER_CONSUMED;
+	}
+
+	skb->dev = rdinfo->local_ep.egress_dev;
+
+	return rmnet_deliver_skb(skb, &rdinfo->local_ep);
+}
+
+/* MAP handler */
+
+static rx_handler_result_t
+__rmnet_map_ingress_handler(struct sk_buff *skb,
+			    struct rmnet_real_dev_info *rdinfo)
+{
+	struct rmnet_endpoint *ep;
+	u8 mux_id;
+	u16 len;
+
+	if (RMNET_MAP_GET_CD_BIT(skb)) {
+		if (rdinfo->ingress_data_format
+		    & RMNET_INGRESS_FORMAT_MAP_COMMANDS)
+			return rmnet_map_command(skb, rdinfo);
+
+		kfree_skb(skb);
+		return RX_HANDLER_CONSUMED;
+	}
+
+	mux_id = RMNET_MAP_GET_MUX_ID(skb);
+	len = RMNET_MAP_GET_LENGTH(skb) - RMNET_MAP_GET_PAD(skb);
+
+	if (mux_id >= RMNET_MAX_LOGICAL_EP) {
+		kfree_skb(skb);
+		return RX_HANDLER_CONSUMED;
+	}
+
+	ep = &rdinfo->muxed_ep[mux_id];
+
+	if (rdinfo->ingress_data_format & RMNET_INGRESS_FORMAT_DEMUXING)
+		skb->dev = ep->egress_dev;
+
+	/* Subtract MAP header */
+	skb_pull(skb, sizeof(struct rmnet_map_header));
+	skb_trim(skb, len);
+	rmnet_set_skb_proto(skb);
+	return rmnet_deliver_skb(skb, ep);
+}
+
+static rx_handler_result_t
+rmnet_map_ingress_handler(struct sk_buff *skb,
+			  struct rmnet_real_dev_info *rdinfo)
+{
+	struct sk_buff *skbn;
+	int rc;
+
+	if (rdinfo->ingress_data_format & RMNET_INGRESS_FORMAT_DEAGGREGATION) {
+		while ((skbn = rmnet_map_deaggregate(skb, rdinfo)) != NULL)
+			__rmnet_map_ingress_handler(skbn, rdinfo);
+
+		consume_skb(skb);
+		rc = RX_HANDLER_CONSUMED;
+	} else {
+		rc = __rmnet_map_ingress_handler(skb, rdinfo);
+	}
+
+	return rc;
+}
+
+static int rmnet_map_egress_handler(struct sk_buff *skb,
+				    struct rmnet_real_dev_info *rdinfo,
+				    struct rmnet_endpoint *ep,
+				    struct net_device *orig_dev)
+{
+	int required_headroom, additional_header_len;
+	struct rmnet_map_header *map_header;
+
+	additional_header_len = 0;
+	required_headroom = sizeof(struct rmnet_map_header);
+
+	if (skb_headroom(skb) < required_headroom) {
+		if (pskb_expand_head(skb, required_headroom, 0, GFP_KERNEL))
+			return RMNET_MAP_CONSUMED;
+	}
+
+	map_header = rmnet_map_add_map_header(skb, additional_header_len, 0);
+	if (!map_header)
+		return RMNET_MAP_CONSUMED;
+
+	if (rdinfo->egress_data_format & RMNET_EGRESS_FORMAT_MUXING) {
+		if (ep->mux_id == 0xff)
+			map_header->mux_id = 0;
+		else
+			map_header->mux_id = ep->mux_id;
+	}
+
+	skb->protocol = htons(ETH_P_MAP);
+
+	return RMNET_MAP_SUCCESS;
+}
+
+/* Ingress / Egress Entry Points */
+
+/* Processes packet as per ingress data format for receiving device. Logical
+ * endpoint is determined from packet inspection. Packet is then sent to the
+ * egress device listed in the logical endpoint configuration.
+ */
+rx_handler_result_t rmnet_rx_handler(struct sk_buff **pskb)
+{
+	struct rmnet_real_dev_info *rdinfo;
+	struct sk_buff *skb = *pskb;
+	struct net_device *dev;
+	int rc;
+
+	if (!skb)
+		return RX_HANDLER_CONSUMED;
+
+	dev = skb->dev;
+	rdinfo = rmnet_get_real_dev_info(dev);
+
+	if (rdinfo->ingress_data_format & RMNET_INGRESS_FORMAT_MAP) {
+		rc = rmnet_map_ingress_handler(skb, rdinfo);
+	} else {
+		switch (ntohs(skb->protocol)) {
+		case ETH_P_MAP:
+			if (rdinfo->local_ep.rmnet_mode ==
+				RMNET_EPMODE_BRIDGE) {
+				rc = rmnet_ingress_deliver_packet(skb, rdinfo);
+			} else {
+				kfree_skb(skb);
+				rc = RX_HANDLER_CONSUMED;
+			}
+			break;
+
+		case ETH_P_IP:
+		case ETH_P_IPV6:
+			rc = rmnet_ingress_deliver_packet(skb, rdinfo);
+			break;
+
+		default:
+			rc = RX_HANDLER_PASS;
+		}
+	}
+
+	return rc;
+}
+
+/* Modifies packet as per logical endpoint configuration and egress data format
+ * for egress device configured in logical endpoint. Packet is then transmitted
+ * on the egress device.
+ */
+void rmnet_egress_handler(struct sk_buff *skb,
+			  struct rmnet_endpoint *ep)
+{
+	struct rmnet_real_dev_info *rdinfo;
+	struct net_device *orig_dev;
+
+	orig_dev = skb->dev;
+	skb->dev = ep->egress_dev;
+
+	rdinfo = rmnet_get_real_dev_info(skb->dev);
+	if (!rdinfo) {
+		kfree_skb(skb);
+		return;
+	}
+
+	if (rdinfo->egress_data_format & RMNET_EGRESS_FORMAT_MAP) {
+		switch (rmnet_map_egress_handler(skb, rdinfo, ep, orig_dev)) {
+		case RMNET_MAP_CONSUMED:
+			return;
+
+		case RMNET_MAP_SUCCESS:
+			break;
+
+		default:
+			kfree_skb(skb);
+			return;
+		}
+	}
+
+	if (ep->rmnet_mode == RMNET_EPMODE_VND)
+		rmnet_vnd_tx_fixup(skb, orig_dev);
+
+	dev_queue_xmit(skb);
+}
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.h
new file mode 100644
index 0000000..f2638cf
--- /dev/null
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.h
@@ -0,0 +1,26 @@
+/* Copyright (c) 2013, 2016-2017 The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * RMNET Data ingress/egress handler
+ *
+ */
+
+#ifndef _RMNET_HANDLERS_H_
+#define _RMNET_HANDLERS_H_
+
+#include "rmnet_config.h"
+
+void rmnet_egress_handler(struct sk_buff *skb,
+			  struct rmnet_endpoint *ep);
+
+rx_handler_result_t rmnet_rx_handler(struct sk_buff **pskb);
+
+#endif /* _RMNET_HANDLERS_H_ */
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
new file mode 100644
index 0000000..2aabad2
--- /dev/null
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
@@ -0,0 +1,88 @@
+/* Copyright (c) 2013-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _RMNET_MAP_H_
+#define _RMNET_MAP_H_
+
+struct rmnet_map_control_command {
+	u8  command_name;
+	u8  cmd_type:2;
+	u8  reserved:6;
+	u16 reserved2;
+	u32 transaction_id;
+	union {
+		struct {
+			u16 ip_family:2;
+			u16 reserved:14;
+			u16 flow_control_seq_num;
+			u32 qos_id;
+		} flow_control;
+		u8 data[0];
+	};
+}  __aligned(1);
+
+enum rmnet_map_results {
+	RMNET_MAP_SUCCESS,
+	RMNET_MAP_CONSUMED,
+	RMNET_MAP_GENERAL_FAILURE,
+	RMNET_MAP_NOT_ENABLED,
+	RMNET_MAP_FAILED_AGGREGATION,
+	RMNET_MAP_FAILED_MUX
+};
+
+enum rmnet_map_commands {
+	RMNET_MAP_COMMAND_NONE,
+	RMNET_MAP_COMMAND_FLOW_DISABLE,
+	RMNET_MAP_COMMAND_FLOW_ENABLE,
+	/* These should always be the last 2 elements */
+	RMNET_MAP_COMMAND_UNKNOWN,
+	RMNET_MAP_COMMAND_ENUM_LENGTH
+};
+
+struct rmnet_map_header {
+	u8  pad_len:6;
+	u8  reserved_bit:1;
+	u8  cd_bit:1;
+	u8  mux_id;
+	u16 pkt_len;
+}  __aligned(1);
+
+#define RMNET_MAP_GET_MUX_ID(Y) (((struct rmnet_map_header *) \
+				 (Y)->data)->mux_id)
+#define RMNET_MAP_GET_CD_BIT(Y) (((struct rmnet_map_header *) \
+				(Y)->data)->cd_bit)
+#define RMNET_MAP_GET_PAD(Y) (((struct rmnet_map_header *) \
+				(Y)->data)->pad_len)
+#define RMNET_MAP_GET_CMD_START(Y) ((struct rmnet_map_control_command *) \
+				    ((Y)->data + \
+				      sizeof(struct rmnet_map_header)))
+#define RMNET_MAP_GET_LENGTH(Y) (ntohs(((struct rmnet_map_header *) \
+					(Y)->data)->pkt_len))
+
+#define RMNET_MAP_COMMAND_REQUEST     0
+#define RMNET_MAP_COMMAND_ACK         1
+#define RMNET_MAP_COMMAND_UNSUPPORTED 2
+#define RMNET_MAP_COMMAND_INVALID     3
+
+#define RMNET_MAP_NO_PAD_BYTES        0
+#define RMNET_MAP_ADD_PAD_BYTES       1
+
+u8 rmnet_map_demultiplex(struct sk_buff *skb);
+struct sk_buff *rmnet_map_deaggregate(struct sk_buff *skb,
+				      struct rmnet_real_dev_info *rdinfo);
+
+struct rmnet_map_header *rmnet_map_add_map_header(struct sk_buff *skb,
+						  int hdrlen, int pad);
+rx_handler_result_t rmnet_map_command(struct sk_buff *skb,
+				      struct rmnet_real_dev_info *rdinfo);
+
+#endif /* _RMNET_MAP_H_ */
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c
new file mode 100644
index 0000000..ccded40
--- /dev/null
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c
@@ -0,0 +1,107 @@
+/* Copyright (c) 2013-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/netdevice.h>
+#include "rmnet_config.h"
+#include "rmnet_map.h"
+#include "rmnet_private.h"
+#include "rmnet_vnd.h"
+
+static u8 rmnet_map_do_flow_control(struct sk_buff *skb,
+				    struct rmnet_real_dev_info *rdinfo,
+				    int enable)
+{
+	struct rmnet_map_control_command *cmd;
+	struct rmnet_endpoint *ep;
+	struct net_device *vnd;
+	u16 ip_family;
+	u16 fc_seq;
+	u32 qos_id;
+	u8 mux_id;
+	int r;
+
+	mux_id = RMNET_MAP_GET_MUX_ID(skb);
+	cmd = RMNET_MAP_GET_CMD_START(skb);
+
+	if (mux_id >= RMNET_MAX_LOGICAL_EP) {
+		kfree_skb(skb);
+		return RX_HANDLER_CONSUMED;
+	}
+
+	ep = &rdinfo->muxed_ep[mux_id];
+	vnd = ep->egress_dev;
+
+	ip_family = cmd->flow_control.ip_family;
+	fc_seq = ntohs(cmd->flow_control.flow_control_seq_num);
+	qos_id = ntohl(cmd->flow_control.qos_id);
+
+	/* Ignore the ip family and pass the sequence number for both v4 and v6
+	 * sequence. User space does not support creating dedicated flows for
+	 * the 2 protocols
+	 */
+	r = rmnet_vnd_do_flow_control(vnd, enable);
+	if (r) {
+		kfree_skb(skb);
+		return RMNET_MAP_COMMAND_UNSUPPORTED;
+	} else {
+		return RMNET_MAP_COMMAND_ACK;
+	}
+}
+
+static void rmnet_map_send_ack(struct sk_buff *skb,
+			       unsigned char type,
+			       struct rmnet_real_dev_info *rdinfo)
+{
+	struct rmnet_map_control_command *cmd;
+	int xmit_status;
+
+	skb->protocol = htons(ETH_P_MAP);
+
+	cmd = RMNET_MAP_GET_CMD_START(skb);
+	cmd->cmd_type = type & 0x03;
+
+	netif_tx_lock(skb->dev);
+	xmit_status = skb->dev->netdev_ops->ndo_start_xmit(skb, skb->dev);
+	netif_tx_unlock(skb->dev);
+}
+
+/* Process MAP command frame and send N/ACK message as appropriate. Message cmd
+ * name is decoded here and appropriate handler is called.
+ */
+rx_handler_result_t rmnet_map_command(struct sk_buff *skb,
+				      struct rmnet_real_dev_info *rdinfo)
+{
+	struct rmnet_map_control_command *cmd;
+	unsigned char command_name;
+	unsigned char rc = 0;
+
+	cmd = RMNET_MAP_GET_CMD_START(skb);
+	command_name = cmd->command_name;
+
+	switch (command_name) {
+	case RMNET_MAP_COMMAND_FLOW_ENABLE:
+		rc = rmnet_map_do_flow_control(skb, rdinfo, 1);
+		break;
+
+	case RMNET_MAP_COMMAND_FLOW_DISABLE:
+		rc = rmnet_map_do_flow_control(skb, rdinfo, 0);
+		break;
+
+	default:
+		rc = RMNET_MAP_COMMAND_UNSUPPORTED;
+		kfree_skb(skb);
+		break;
+	}
+	if (rc == RMNET_MAP_COMMAND_ACK)
+		rmnet_map_send_ack(skb, rc, rdinfo);
+	return RX_HANDLER_CONSUMED;
+}
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
new file mode 100644
index 0000000..a29c476
--- /dev/null
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
@@ -0,0 +1,105 @@
+/* Copyright (c) 2013-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * RMNET Data MAP protocol
+ *
+ */
+
+#include <linux/netdevice.h>
+#include "rmnet_config.h"
+#include "rmnet_map.h"
+#include "rmnet_private.h"
+
+#define RMNET_MAP_DEAGGR_SPACING  64
+#define RMNET_MAP_DEAGGR_HEADROOM (RMNET_MAP_DEAGGR_SPACING / 2)
+
+/* Adds MAP header to front of skb->data
+ * Padding is calculated and set appropriately in MAP header. Mux ID is
+ * initialized to 0.
+ */
+struct rmnet_map_header *rmnet_map_add_map_header(struct sk_buff *skb,
+						  int hdrlen, int pad)
+{
+	struct rmnet_map_header *map_header;
+	u32 padding, map_datalen;
+	u8 *padbytes;
+
+	if (skb_headroom(skb) < sizeof(struct rmnet_map_header))
+		return NULL;
+
+	map_datalen = skb->len - hdrlen;
+	map_header = (struct rmnet_map_header *)
+			skb_push(skb, sizeof(struct rmnet_map_header));
+	memset(map_header, 0, sizeof(struct rmnet_map_header));
+
+	if (pad == RMNET_MAP_NO_PAD_BYTES) {
+		map_header->pkt_len = htons(map_datalen);
+		return map_header;
+	}
+
+	padding = ALIGN(map_datalen, 4) - map_datalen;
+
+	if (padding == 0)
+		goto done;
+
+	if (skb_tailroom(skb) < padding)
+		return NULL;
+
+	padbytes = (u8 *)skb_put(skb, padding);
+	memset(padbytes, 0, padding);
+
+done:
+	map_header->pkt_len = htons(map_datalen + padding);
+	map_header->pad_len = padding & 0x3F;
+
+	return map_header;
+}
+
+/* Deaggregates a single packet
+ * A whole new buffer is allocated for each portion of an aggregated frame.
+ * Caller should keep calling deaggregate() on the source skb until 0 is
+ * returned, indicating that there are no more packets to deaggregate. Caller
+ * is responsible for freeing the original skb.
+ */
+struct sk_buff *rmnet_map_deaggregate(struct sk_buff *skb,
+				      struct rmnet_real_dev_info *rdinfo)
+{
+	struct rmnet_map_header *maph;
+	struct sk_buff *skbn;
+	u32 packet_len;
+
+	if (skb->len == 0)
+		return NULL;
+
+	maph = (struct rmnet_map_header *)skb->data;
+	packet_len = ntohs(maph->pkt_len) + sizeof(struct rmnet_map_header);
+
+	if (((int)skb->len - (int)packet_len) < 0)
+		return NULL;
+
+	skbn = alloc_skb(packet_len + RMNET_MAP_DEAGGR_SPACING, GFP_ATOMIC);
+	if (!skbn)
+		return NULL;
+
+	skbn->dev = skb->dev;
+	skb_reserve(skbn, RMNET_MAP_DEAGGR_HEADROOM);
+	skb_put(skbn, packet_len);
+	memcpy(skbn->data, skb->data, packet_len);
+	skb_pull(skb, packet_len);
+
+	/* Some hardware can send us empty frames. Catch them */
+	if (ntohs(maph->pkt_len) == 0) {
+		kfree_skb(skb);
+		return NULL;
+	}
+
+	return skbn;
+}
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h
new file mode 100644
index 0000000..ed820b5
--- /dev/null
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h
@@ -0,0 +1,45 @@
+/* Copyright (c) 2013-2014, 2016-2017 The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _RMNET_PRIVATE_H_
+#define _RMNET_PRIVATE_H_
+
+#define RMNET_MAX_VND              32
+#define RMNET_MAX_PACKET_SIZE      16384
+#define RMNET_DFLT_PACKET_SIZE     1500
+#define RMNET_NEEDED_HEADROOM      16
+#define RMNET_TX_QUEUE_LEN         1000
+
+/* Constants */
+#define RMNET_EGRESS_FORMAT__RESERVED__         BIT(0)
+#define RMNET_EGRESS_FORMAT_MAP                 BIT(1)
+#define RMNET_EGRESS_FORMAT_AGGREGATION         BIT(2)
+#define RMNET_EGRESS_FORMAT_MUXING              BIT(3)
+#define RMNET_EGRESS_FORMAT_MAP_CKSUMV3         BIT(4)
+#define RMNET_EGRESS_FORMAT_MAP_CKSUMV4         BIT(5)
+
+#define RMNET_INGRESS_FIX_ETHERNET              BIT(0)
+#define RMNET_INGRESS_FORMAT_MAP                BIT(1)
+#define RMNET_INGRESS_FORMAT_DEAGGREGATION      BIT(2)
+#define RMNET_INGRESS_FORMAT_DEMUXING           BIT(3)
+#define RMNET_INGRESS_FORMAT_MAP_COMMANDS       BIT(4)
+#define RMNET_INGRESS_FORMAT_MAP_CKSUMV3        BIT(5)
+#define RMNET_INGRESS_FORMAT_MAP_CKSUMV4        BIT(6)
+
+/* Pass the frame up the stack with no modifications to skb->dev */
+#define RMNET_EPMODE_NONE (0)
+/* Replace skb->dev to a virtual rmnet device and pass up the stack */
+#define RMNET_EPMODE_VND (1)
+/* Pass the frame directly to another device with dev_queue_xmit() */
+#define RMNET_EPMODE_BRIDGE (2)
+
+#endif /* _RMNET_PRIVATE_H_ */
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
new file mode 100644
index 0000000..094c5df6
--- /dev/null
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
@@ -0,0 +1,237 @@
+/* Copyright (c) 2013-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ *
+ * RMNET Data virtual network driver
+ *
+ */
+
+#include <linux/etherdevice.h>
+#include <linux/if_arp.h>
+#include <net/pkt_sched.h>
+#include "rmnet_config.h"
+#include "rmnet_handlers.h"
+#include "rmnet_private.h"
+#include "rmnet_map.h"
+#include "rmnet_vnd.h"
+
+/* RX/TX Fixup */
+
+void rmnet_vnd_rx_fixup(struct sk_buff *skb, struct net_device *dev)
+{
+	dev->stats.rx_packets++;
+	dev->stats.rx_bytes += skb->len;
+}
+
+void rmnet_vnd_tx_fixup(struct sk_buff *skb, struct net_device *dev)
+{
+	dev->stats.tx_packets++;
+	dev->stats.tx_bytes += skb->len;
+}
+
+/* Network Device Operations */
+
+static netdev_tx_t rmnet_vnd_start_xmit(struct sk_buff *skb,
+					struct net_device *dev)
+{
+	struct rmnet_priv *priv;
+
+	priv = netdev_priv(dev);
+	if (priv->local_ep.egress_dev) {
+		rmnet_egress_handler(skb, &priv->local_ep);
+	} else {
+		dev->stats.tx_dropped++;
+		kfree_skb(skb);
+	}
+	return NETDEV_TX_OK;
+}
+
+static int rmnet_vnd_change_mtu(struct net_device *rmnet_dev, int new_mtu)
+{
+	if (new_mtu < 0 || new_mtu > RMNET_MAX_PACKET_SIZE)
+		return -EINVAL;
+
+	rmnet_dev->mtu = new_mtu;
+	return 0;
+}
+
+static const struct net_device_ops rmnet_vnd_ops = {
+	.ndo_start_xmit = rmnet_vnd_start_xmit,
+	.ndo_change_mtu = rmnet_vnd_change_mtu,
+};
+
+/* Called by kernel whenever a new rmnet<n> device is created. Sets MTU,
+ * flags, ARP type, needed headroom, etc...
+ */
+void rmnet_vnd_setup(struct net_device *rmnet_dev)
+{
+	struct rmnet_priv *priv;
+
+	priv = netdev_priv(rmnet_dev);
+	netdev_dbg(rmnet_dev, "Setting up device %s\n", rmnet_dev->name);
+
+	rmnet_dev->netdev_ops = &rmnet_vnd_ops;
+	rmnet_dev->mtu = RMNET_DFLT_PACKET_SIZE;
+	rmnet_dev->needed_headroom = RMNET_NEEDED_HEADROOM;
+	random_ether_addr(rmnet_dev->dev_addr);
+	rmnet_dev->tx_queue_len = RMNET_TX_QUEUE_LEN;
+
+	/* Raw IP mode */
+	rmnet_dev->header_ops = NULL;  /* No header */
+	rmnet_dev->type = ARPHRD_RAWIP;
+	rmnet_dev->hard_header_len = 0;
+	rmnet_dev->flags &= ~(IFF_BROADCAST | IFF_MULTICAST);
+
+	rmnet_dev->needs_free_netdev = true;
+}
+
+/* Exposed API */
+
+int rmnet_vnd_newlink(struct net_device *real_dev, int id,
+		      struct net_device *rmnet_dev)
+{
+	struct rmnet_real_dev_info *rdinfo;
+	int rc;
+
+	rdinfo = rmnet_get_real_dev_info(real_dev);
+
+	if (rdinfo->rmnet_devices[id])
+		return -EINVAL;
+
+	rc = register_netdevice(rmnet_dev);
+	if (!rc) {
+		rdinfo->rmnet_devices[id] = rmnet_dev;
+		rmnet_dev->rtnl_link_ops = &rmnet_link_ops;
+	}
+	return rc;
+}
+
+/* Unregisters the virtual network device node and frees it.
+ * unregister_netdev locks the rtnl mutex, so the mutex must not be locked
+ * by the caller of the function. unregister_netdev enqueues the request to
+ * unregister the device into a TODO queue. The requests in the TODO queue
+ * are only done after rtnl mutex is unlocked, therefore free_netdev has to
+ * called after unlocking rtnl mutex.
+ */
+int rmnet_vnd_free_dev(struct net_device *real_dev, int id)
+{
+	struct rmnet_real_dev_info *rdinfo;
+	struct net_device *rmnet_dev;
+	struct rmnet_endpoint *ep;
+
+	rdinfo = rmnet_get_real_dev_info(real_dev);
+
+	rtnl_lock();
+	if (id < 0 || id >= RMNET_MAX_VND || !rdinfo->rmnet_devices[id]) {
+		rtnl_unlock();
+		return -EINVAL;
+	}
+
+	ep = rmnet_vnd_get_endpoint(rdinfo->rmnet_devices[id]);
+	if (ep) {
+		rtnl_unlock();
+		return -EINVAL;
+	}
+
+	rmnet_dev = rdinfo->rmnet_devices[id];
+	rdinfo->rmnet_devices[id] = NULL;
+	rtnl_unlock();
+
+	if (rmnet_dev) {
+		unregister_netdev(rmnet_dev);
+		free_netdev(rmnet_dev);
+		return 0;
+	} else {
+		return -EINVAL;
+	}
+}
+
+int rmnet_vnd_remove_ref_dev(struct net_device *real_dev, int id)
+{
+	struct rmnet_real_dev_info *rdinfo;
+	struct rmnet_endpoint *ep;
+
+	rdinfo = rmnet_get_real_dev_info(real_dev);
+	if (id < 0 || id >= RMNET_MAX_VND || !rdinfo->rmnet_devices[id])
+		return -EINVAL;
+
+	ep = rmnet_vnd_get_endpoint(rdinfo->rmnet_devices[id]);
+	rdinfo->rmnet_devices[id] = NULL;
+	return 0;
+}
+
+/* Searches through list of known RmNet virtual devices. This function is O(n)
+ * and should not be used in the data path.
+ *
+ * To get the read id, subtract this result by 1.
+ */
+int rmnet_vnd_get_mux(struct net_device *real_dev,
+		      struct net_device *rmnet_dev)
+{
+	/* This is not an efficient search, but, this will only be called in
+	 * a configuration context, and the list is small.
+	 */
+	struct rmnet_real_dev_info *rdinfo;
+	int i;
+
+	rdinfo = rmnet_get_real_dev_info(real_dev);
+
+	if (!rmnet_dev)
+		return 0;
+
+	for (i = 0; i < RMNET_MAX_VND; i++)
+		if (rmnet_dev == rdinfo->rmnet_devices[i])
+			return i + 1;
+
+	return 0;
+}
+
+/* Gets the logical endpoint configuration for a RmNet virtual network device
+ * node. Caller should confirm that devices is a RmNet VND before calling.
+ */
+struct rmnet_endpoint *rmnet_vnd_get_endpoint(struct net_device *rmnet_dev)
+{
+	struct rmnet_priv *priv;
+
+	if (!rmnet_dev)
+		return NULL;
+
+	priv = netdev_priv(rmnet_dev);
+
+	return &priv->local_ep;
+}
+
+int rmnet_vnd_do_flow_control(struct net_device *rmnet_dev, int enable)
+{
+	netdev_dbg(rmnet_dev, "Setting VND TX queue state to %d\n", enable);
+	/* Although we expect similar number of enable/disable
+	 * commands, optimize for the disable. That is more
+	 * latency sensitive than enable
+	 */
+	if (unlikely(enable))
+		netif_wake_queue(rmnet_dev);
+	else
+		netif_stop_queue(rmnet_dev);
+
+	return 0;
+}
+
+struct net_device *rmnet_vnd_get_by_id(struct net_device *real_dev, int id)
+{
+	struct rmnet_real_dev_info *rdinfo;
+
+	rdinfo = rmnet_get_real_dev_info(real_dev);
+
+	if (id < 0 || id >= RMNET_MAX_VND)
+		return NULL;
+
+	return rdinfo->rmnet_devices[id];
+}
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h
new file mode 100644
index 0000000..f649bb4
--- /dev/null
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h
@@ -0,0 +1,31 @@
+/* Copyright (c) 2013-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * RMNET Data Virtual Network Device APIs
+ *
+ */
+
+#ifndef _RMNET_VND_H_
+#define _RMNET_VND_H_
+
+int rmnet_vnd_do_flow_control(struct net_device *dev, int enable);
+struct rmnet_endpoint *rmnet_vnd_get_endpoint(struct net_device *dev);
+int rmnet_vnd_free_dev(struct net_device *real_dev, int id);
+int rmnet_vnd_remove_ref_dev(struct net_device *real_dev, int id);
+void rmnet_vnd_rx_fixup(struct sk_buff *skb, struct net_device *dev);
+void rmnet_vnd_tx_fixup(struct sk_buff *skb, struct net_device *dev);
+int rmnet_vnd_get_mux(struct net_device *real_dev,
+		      struct net_device *rmnet_dev);
+struct net_device *rmnet_vnd_get_by_id(struct net_device *real_dev, int id);
+void rmnet_vnd_setup(struct net_device *dev);
+int rmnet_vnd_newlink(struct net_device *real_dev, int id,
+		      struct net_device *new_device);
+#endif /* _RMNET_VND_H_ */
-- 
1.9.1

^ permalink raw reply related

* Re: [PATCH] e1000: changed some expensive calls of udelay to usleep_range
From: Jeff Kirsher @ 2017-08-23  0:30 UTC (permalink / raw)
  To: nxf23276
  Cc: michael.kardonik, shannon.nelson, carolyn.wyborny,
	donald.c.skidmore, bruce.w.allan, john.ronciak, mitch.a.williams,
	intel-wired-lan, netdev, linux-kernel
In-Reply-To: <1503435747-29639-1-git-send-email-matthew.tan_1@nxp.com>

[-- Attachment #1: Type: text/plain, Size: 1057 bytes --]

On Tue, 2017-08-22 at 16:02 -0500, nxf23276 wrote:
>     Calls to udelay are not preemtable by userspace so userspace
>     applications experience a large (~200us) latency when running on
> core
>     0. Instead usleep_range can be used to be more friendly to
> userspace
>     since it is preemtable. This is due to udelay using busy-wait
> loops
>     while usleep_rang uses hrtimers instead. It is recommended to use
>     udelay when the delay is <10us since at that precision overhead
> of
>     usleep_range hrtimer setup causes issues. However, the replaced
> calls
>     are for 50us and 100us so this should not be not an issue.
> 
> Signed-off-by: nxf23276 <matthew.tan_1@nxp.com>
> ---
>  drivers/net/ethernet/intel/e1000e/phy.c | 8 ++++----
>  1 file changed, 4 insertions(+), 4 deletions(-)

First of all, your name is nxf23276?  Really??

Second, you titled your patch that you were makeding changes to e1000
driver, yet you are asking to modify e1000e.  Just based on these 2
needed changes, I am dropping your patch.

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply

* Re: [RFC PATCH] dt-binding: net: sfp binding documentation
From: Rob Herring @ 2017-08-23  0:33 UTC (permalink / raw)
  To: Baruch Siach
  Cc: Mark Rutland, Andrew Lunn, Florian Fainelli, David S . Miller,
	Russell King, netdev,
	devicetree-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
In-Reply-To: <20170822044900.debakzpc3qatx4cg-MwjkAAnuF3khR1HGirfZ1z4kX+cae0hd@public.gmane.org>

On Tue, Aug 22, 2017 at 07:49:01AM +0300, Baruch Siach wrote:
> Hi Rob,
> 
> On Mon, Aug 21, 2017 at 02:10:33PM -0500, Rob Herring wrote:
> > On Sun, Aug 20, 2017 at 5:28 AM, Baruch Siach <baruch-NswTu9S1W3P6gbPvEgmw2w@public.gmane.org> wrote:
> > > Add device-tree binding documentation SFP transceivers. Support for SFP
> > > transceivers has been recently introduced (drivers/net/phy/sfp.c).
> > >
> > > Signed-off-by: Baruch Siach <baruch-NswTu9S1W3P6gbPvEgmw2w@public.gmane.org>
> > > ---
> > >
> > > The SFP driver is on net-next.
> > >
> > > Not sure about the rate-select-gpio property name. The SFP+ standard
> > > (not supported yet) uses two signals, RS0 and RS1. RS0 is compatible
> > > with the SFP rate select signal, while RS1 controls the Tx rate.
> > > ---
> > >  Documentation/devicetree/bindings/net/sff-sfp.txt | 24 +++++++++++++++++++++++
> > >  1 file changed, 24 insertions(+)
> > >  create mode 100644 Documentation/devicetree/bindings/net/sff-sfp.txt
> > >
> > > diff --git a/Documentation/devicetree/bindings/net/sff-sfp.txt b/Documentation/devicetree/bindings/net/sff-sfp.txt
> > > new file mode 100644
> > > index 000000000000..f0c27bc3925e
> > > --- /dev/null
> > > +++ b/Documentation/devicetree/bindings/net/sff-sfp.txt
> > > @@ -0,0 +1,24 @@
> > > +Small Form Factor (SFF) Committee Small Form-factor Pluggable (SFP)
> > > +Transceiver
> > > +
> > > +Required properties:
> > > +
> > > +- compatible : must be "sff,sfp"
> > 
> > Need to document "sff" vendor prefix.
> 
> "sff" stands for Small Form Factor Committee, now under Storage Networking 
> Industry Association (SNIA). Not really a vendor, but a standards body. Does 
> that count? I could not find any other example in vendor-prefixes.txt, other 
> than "linux" which is kind of special.

sff is fine. I'd be fine with 'snia' too. Just need to add it to vendor 
prefixes.

One more thing, use "sff,sfp.txt" matching the compatible for the 
filename.

> 
> > Kind of a short name, but I guess it is sufficient. Are there
> > revisions of the standard (not SFP+) or more than one form factor (I
> > don't recall any)?
> 
> I'm not aware of any other revisions.

Okay.

Rob
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: XDP redirect measurements, gotchas and tracepoints
From: Alexander Duyck @ 2017-08-23  1:06 UTC (permalink / raw)
  To: Michael Chan
  Cc: Duyck, Alexander H, john.fastabend@gmail.com, brouer@redhat.com,
	pstaszewski@itcare.pl, netdev@vger.kernel.org,
	xdp-newbies@vger.kernel.org, andy@greyhouse.net,
	borkmann@iogearbox.net
In-Reply-To: <CACKFLimMWLto4FRxNE3tzrO5MU+p67J7wu=n1nmg0t8QGYjgrA@mail.gmail.com>

On Tue, Aug 22, 2017 at 1:04 PM, Michael Chan <michael.chan@broadcom.com> wrote:
> On Tue, Aug 22, 2017 at 11:30 AM, Duyck, Alexander H
> <alexander.h.duyck@intel.com> wrote:
>> On Tue, 2017-08-22 at 11:17 -0700, John Fastabend wrote:
>>> On 08/22/2017 11:02 AM, Michael Chan wrote:
>>> > On Mon, Aug 21, 2017 at 12:25 PM, Jesper Dangaard Brouer
>>> > <brouer@redhat.com> wrote:
>>> > >
>>> > > I'be been playing with the latest XDP_REDIRECT feature, that was
>>> > > accepted in net-next (for ixgbe), see merge commit[1].
>>> > >  [1] https://git.kernel.org/davem/net-next/c/6093ec2dc31
>>> > >
>>> >
>>> > Just catching on XDP_REDIRECT and I have a very basic question.  The
>>> > ingress device passes the XDP buffer to the egress device for XDP
>>> > redirect transmission.  When the egress device has transmitted the
>>> > packet, is it supposed to just free the buffer?  Or is it supposed to
>>> > be recycled?
>>> >
>>> > In XDP_TX, the buffer is recycled back to the rx ring.
>>> >
>>>
>>> With XDP_REDIRECT we must "just free the buffer" in ixgbe this means
>>> page_frag_free() on the data. There is no way to know where the xdp
>>> buffer came from it could be a different NIC for example.
>>>
>>> However with how ixgbe is coded up recycling will work as long as
>>> the memory is free'd before the driver ring tries to use it again. In
>>> normal usage this should be the case. And if we are over-running a device
>>> it doesn't really hurt to slow down the sender a bit.
>>>
>>> I think this is a pretty good model, we could probably provide a set
>>> of APIs for drivers to use so that we get some consistency across
>>> vendors here, ala Jesper's page pool ideas.
>>>
>>> (+Alex, for ixgbe details)
>>>
>>> Thanks,
>>> John
>>
>> I think you pretty much covered the inner workings for the ixgbe bits.
>>
>> The only piece I would add is that the recycling trick normally only
>> works if the same interface/driver is doing both the Tx and the Rx. The
>> redirect code cannot assume that is the case and that is the reason why
>> it must always be freeing the traffic on clean-up.
>>
>
> Right, but it's conceivable to add an API to "return" the buffer to
> the input device, right?

You could, it is just added complexity. "just free the buffer" in
ixgbe usually just amounts to one atomic operation to decrement the
total page count since page recycling is already implemented in the
driver. You still would have to unmap the buffer regardless of if you
were recycling it or not so all you would save is 1.000015259 atomic
operations per packet. The fraction is because once every 64K uses we
have to bulk update the count on the page.

There are still thoughts at some point in the future to consider
changing the layout so that we lay things out linearly instead of
interleaving the page halves. However that is a bit of optimization
and right now I don't really have the spare time to explore it. It
would help the performance by making sure the pages are warm on the
second freeing assuming all the packets in a given flow are received
back to back.

- Alex

^ permalink raw reply

* linux-next: manual merge of the net-next tree with the net tree
From: Stephen Rothwell @ 2017-08-23  1:31 UTC (permalink / raw)
  To: David Miller, Networking
  Cc: Linux-Next Mailing List, Linux Kernel Mailing List, Wei Wang,
	Ido Schimmel, iri Pirko

Hi all,

Today's linux-next merge of the net-next tree got a conflict in:

  net/ipv6/ip6_fib.c

between commit:

  c5cff8561d2d ("ipv6: add rcu grace period before freeing fib6_node")

from the net tree and commit:

  a460aa83963b ("ipv6: fib: Add helpers to hold / drop a reference on rt6_info")

from the net-next tree.

I fixed it up (see below) and can carry the fix as necessary. This
is now fixed as far as linux-next is concerned, but any non trivial
conflicts should be mentioned to your upstream maintainer when your tree
is submitted for merging.  You may also want to consider cooperating
with the maintainer of the conflicting tree to minimise any particularly
complex conflicts.

-- 
Cheers,
Stephen Rothwell

diff --cc net/ipv6/ip6_fib.c
index a5ebf86f6be8,549aacc3cb2c..000000000000
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@@ -160,12 -154,7 +161,12 @@@ static void node_free_rcu(struct rcu_he
  	kmem_cache_free(fib6_node_kmem, fn);
  }

 +static void node_free(struct fib6_node *fn)
 +{
 +	call_rcu(&fn->rcu, node_free_rcu);
 +}
 +
- static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
+ void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
  {
  	int cpu;

^ permalink raw reply

* Re: [PATCH net-next 1/8] bpf: Recursively apply cgroup sock filters
From: Alexei Starovoitov @ 2017-08-23  1:40 UTC (permalink / raw)
  To: David Ahern; +Cc: netdev, daniel, ast, tj, davem
In-Reply-To: <1503447621-27997-2-git-send-email-dsahern@gmail.com>

On Tue, Aug 22, 2017 at 05:20:14PM -0700, David Ahern wrote:
> Recursively apply sock filters attached to a cgroup. For now, start
> with the inner cgroup attached to the socket and work back to the
> root. If desired the inverse can be done use an attach flag (start
> with parent cgroup and go in).
> 
> Signed-off-by: David Ahern <dsahern@gmail.com>
> ---
>  include/linux/bpf-cgroup.h |  5 +++--
>  kernel/bpf/cgroup.c        |  4 +---
>  kernel/cgroup/cgroup.c     | 18 ++++++++++++++++++
>  3 files changed, 22 insertions(+), 5 deletions(-)
> 
> diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
> index d41d40ac3efd..d95e44ccd549 100644
> --- a/include/linux/bpf-cgroup.h
> +++ b/include/linux/bpf-cgroup.h
> @@ -40,8 +40,9 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
>  				struct sk_buff *skb,
>  				enum bpf_attach_type type);
>  
> -int __cgroup_bpf_run_filter_sk(struct sock *sk,
> +int __cgroup_bpf_run_filter_sk(struct cgroup *cgrp, struct sock *sk,
>  			       enum bpf_attach_type type);
> +int cgroup_bpf_run_filter_sk(struct sock *sk, enum bpf_attach_type type);
>  
>  int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
>  				     struct bpf_sock_ops_kern *sock_ops,
> @@ -74,7 +75,7 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
>  ({									       \
>  	int __ret = 0;							       \
>  	if (cgroup_bpf_enabled && sk) {					       \
> -		__ret = __cgroup_bpf_run_filter_sk(sk,			       \
> +		__ret = cgroup_bpf_run_filter_sk(sk,			       \
>  						 BPF_CGROUP_INET_SOCK_CREATE); \
>  	}								       \
>  	__ret;								       \
> diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
> index 546113430049..0480610bda83 100644
> --- a/kernel/bpf/cgroup.c
> +++ b/kernel/bpf/cgroup.c
> @@ -217,14 +217,12 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb);
>   * This function will return %-EPERM if any if an attached program was found
>   * and if it returned != 1 during execution. In all other cases, 0 is returned.
>   */
> -int __cgroup_bpf_run_filter_sk(struct sock *sk,
> +int __cgroup_bpf_run_filter_sk(struct cgroup *cgrp, struct sock *sk,
>  			       enum bpf_attach_type type)
>  {
> -	struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
>  	struct bpf_prog *prog;
>  	int ret = 0;
>  
> -
>  	rcu_read_lock();
>  
>  	prog = rcu_dereference(cgrp->bpf.effective[type]);
> diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
> index df2e0f14a95d..7480cebab073 100644
> --- a/kernel/cgroup/cgroup.c
> +++ b/kernel/cgroup/cgroup.c
> @@ -5186,4 +5186,22 @@ int cgroup_bpf_update(struct cgroup *cgrp, struct bpf_prog *prog,
>  	mutex_unlock(&cgroup_mutex);
>  	return ret;
>  }
> +
> +int cgroup_bpf_run_filter_sk(struct sock *sk,
> +			     enum bpf_attach_type type)
> +{
> +	struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
> +	int ret = 0;
> +
> +	while (cgrp) {
> +		ret = __cgroup_bpf_run_filter_sk(cgrp, sk, type);
> +		if (ret < 0)
> +			break;
> +
> +		cgrp = cgroup_parent(cgrp);
> +	}

I think this walk changes semantics for existing setups, so we cannot do it
by default and have to add new attach flag.
Also why break on (ret < 0) ?
The caller of this does:
  err = BPF_CGROUP_RUN_PROG_INET_SOCK(sk);
  if (err) {
          sk_common_release(sk);
so we should probably break out of the loop on if (ret) too.

^ permalink raw reply

* Re: [PATCH net-next 2/8] bpf: Add mark and priority to sock options that can be set
From: Alexei Starovoitov @ 2017-08-23  1:41 UTC (permalink / raw)
  To: David Ahern; +Cc: netdev, daniel, ast, tj, davem
In-Reply-To: <1503447621-27997-3-git-send-email-dsahern@gmail.com>

On Tue, Aug 22, 2017 at 05:20:15PM -0700, David Ahern wrote:
> Add socket mark and priority to fields that can be set by
> ebpf program when a socket is created.
> 
> Signed-off-by: David Ahern <dsahern@gmail.com>

lgtm
Acked-by: Alexei Starovoitov <ast@kernel.org>

^ permalink raw reply

* Re: [PATCH net-next 3/8] bpf: Allow cgroup sock filters to use get_current_uid_gid helper
From: Alexei Starovoitov @ 2017-08-23  1:49 UTC (permalink / raw)
  To: David Ahern; +Cc: netdev, daniel, ast, tj, davem
In-Reply-To: <1503447621-27997-4-git-send-email-dsahern@gmail.com>

On Tue, Aug 22, 2017 at 05:20:16PM -0700, David Ahern wrote:
> Allow BPF programs run on sock create to use the get_current_uid_gid
> helper.
> 
> Signed-off-by: David Ahern <dsahern@gmail.com>

as far as i understand socket creation path the context of
inet_create/inet6_create should always have valid uid/gid,
so i think the patch is fine, but please add a comment to make
sure all future callsites where BPF_CGROUP_RUN_PROG_INET_SOCK()
called will be evaluated for this matter.

^ permalink raw reply

* [PATCH net-next] ipv4: do metrics match when looking up and deleting a route
From: Xin Long @ 2017-08-23  2:07 UTC (permalink / raw)
  To: network dev; +Cc: davem, Hannes Frederic Sowa

Now when ipv4 route inserts a fib_info, it memcmp fib_metrics.
It means ipv4 route identifies one route also with metrics.

But when removing a route, it tries to find the route without
caring about the metrics. It will cause that the route with
right metrics can't be removed.

Thomas noticed this issue when doing the testing:

1. add:
   # ip route append 192.168.7.0/24 dev v window 1000
   # ip route append 192.168.7.0/24 dev v window 1001
   # ip route append 192.168.7.0/24 dev v window 1002
   # ip route append 192.168.7.0/24 dev v window 1003
2. delete:
   # ip route delete 192.168.7.0/24 dev v window 1002
3. show:
     192.168.7.0/24 proto boot scope link window 1001
     192.168.7.0/24 proto boot scope link window 1002
     192.168.7.0/24 proto boot scope link window 1003

The one with window 1002 wasn't deleted but the first one was.

This patch is to do metrics match when looking up and deleting
one route.

Reported-by: Thomas Haller <thaller@redhat.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
---
 net/ipv4/fib_lookup.h    |  1 +
 net/ipv4/fib_semantics.c | 34 ++++++++++++++++++++++++++++++++++
 net/ipv4/fib_trie.c      |  3 ++-
 3 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index 769ab87..5b2af19 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -32,6 +32,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
 				 struct netlink_ext_ack *extack);
 int fib_nh_match(struct fib_config *cfg, struct fib_info *fi,
 		 struct netlink_ext_ack *extack);
+bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi);
 int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, u32 tb_id,
 		  u8 type, __be32 dst, int dst_len, u8 tos, struct fib_info *fi,
 		  unsigned int);
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 394d800..57a5d48 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -696,6 +696,40 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi,
 	return 0;
 }
 
+bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi)
+{
+	struct nlattr *nla;
+	int remaining;
+
+	if (!cfg->fc_mx)
+		return true;
+
+	nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
+		int type = nla_type(nla);
+		u32 val;
+
+		if (!type)
+			continue;
+		if (type > RTAX_MAX)
+			return false;
+
+		if (type == RTAX_CC_ALGO) {
+			char tmp[TCP_CA_NAME_MAX];
+			bool ecn_ca = false;
+
+			nla_strlcpy(tmp, nla, sizeof(tmp));
+			val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
+		} else {
+			val = nla_get_u32(nla);
+		}
+
+		if (fi->fib_metrics->metrics[type - 1] != val)
+			return false;
+	}
+
+	return true;
+}
+
 
 /*
  * Picture
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 1a6ffb0..c636650 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1563,7 +1563,8 @@ int fib_table_delete(struct net *net, struct fib_table *tb,
 		     fi->fib_prefsrc == cfg->fc_prefsrc) &&
 		    (!cfg->fc_protocol ||
 		     fi->fib_protocol == cfg->fc_protocol) &&
-		    fib_nh_match(cfg, fi, extack) == 0) {
+		    fib_nh_match(cfg, fi, extack) == 0 &&
+		    fib_metrics_match(cfg, fi)) {
 			fa_to_delete = fa;
 			break;
 		}
-- 
2.1.0

^ permalink raw reply related

* Re: [PATCH net-next v7 02/10] bpf: Add eBPF program subtype and is_valid_subtype() verifier
From: Alexei Starovoitov @ 2017-08-23  2:44 UTC (permalink / raw)
  To: Mickaël Salaün
  Cc: linux-kernel, Alexei Starovoitov, Andy Lutomirski,
	Arnaldo Carvalho de Melo, Casey Schaufler, Daniel Borkmann,
	David Drysdale, David S . Miller, Eric W . Biederman,
	James Morris, Jann Horn, Jonathan Corbet, Matthew Garrett,
	Michael Kerrisk, Kees Cook, Paul Moore, Sargun Dhillon,
	Serge E . Hallyn, Shuah Khan, Tejun Heo, Thomas Graf <tgr
In-Reply-To: <20170821000933.13024-3-mic@digikod.net>

On Mon, Aug 21, 2017 at 02:09:25AM +0200, Mickaël Salaün wrote:
> The goal of the program subtype is to be able to have different static
> fine-grained verifications for a unique program type.
> 
> The struct bpf_verifier_ops gets a new optional function:
> is_valid_subtype(). This new verifier is called at the beginning of the
> eBPF program verification to check if the (optional) program subtype is
> valid.
> 
> For now, only Landlock eBPF programs are using a program subtype (see
> next commit) but this could be used by other program types in the future.
> 
> Signed-off-by: Mickaël Salaün <mic@digikod.net>
> Cc: Alexei Starovoitov <ast@kernel.org>
> Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
> Cc: Daniel Borkmann <daniel@iogearbox.net>
> Cc: David S. Miller <davem@davemloft.net>
> Link: https://lkml.kernel.org/r/20160827205559.GA43880@ast-mbp.thefacebook.com
> ---
> 
> Changes since v6:
> * rename Landlock version to ABI to better reflect its purpose
> * fix unsigned integer checks
> * fix pointer cast
> * constify pointers
> * rebase
> 
> Changes since v5:
> * use a prog_subtype pointer and make it future-proof
> * add subtype test
> * constify bpf_load_program()'s subtype argument
> * cleanup subtype initialization
> * rebase
> 
> Changes since v4:
> * replace the "status" field with "version" (more generic)
> * replace the "access" field with "ability" (less confusing)
> 
> Changes since v3:
> * remove the "origin" field
> * add an "option" field
> * cleanup comments
> ---
>  include/linux/bpf.h                         |  7 ++-
>  include/linux/filter.h                      |  2 +
>  include/uapi/linux/bpf.h                    | 11 +++++
>  kernel/bpf/syscall.c                        | 22 ++++++++-
>  kernel/bpf/verifier.c                       | 17 +++++--
>  kernel/trace/bpf_trace.c                    | 15 ++++--
>  net/core/filter.c                           | 71 ++++++++++++++++++-----------
>  samples/bpf/bpf_load.c                      |  3 +-
>  samples/bpf/cookie_uid_helper_example.c     |  2 +-
>  samples/bpf/fds_example.c                   |  2 +-
>  samples/bpf/sock_example.c                  |  3 +-
>  samples/bpf/test_cgrp2_attach.c             |  2 +-
>  samples/bpf/test_cgrp2_attach2.c            |  2 +-
>  samples/bpf/test_cgrp2_sock.c               |  2 +-
>  tools/include/uapi/linux/bpf.h              | 11 +++++
>  tools/lib/bpf/bpf.c                         | 10 +++-
>  tools/lib/bpf/bpf.h                         |  5 +-
>  tools/lib/bpf/libbpf.c                      |  4 +-
>  tools/perf/tests/bpf.c                      |  2 +-
>  tools/testing/selftests/bpf/test_align.c    |  2 +-
>  tools/testing/selftests/bpf/test_tag.c      |  2 +-
>  tools/testing/selftests/bpf/test_verifier.c | 17 ++++++-
>  22 files changed, 158 insertions(+), 56 deletions(-)

...

> diff --git a/include/linux/filter.h b/include/linux/filter.h
> index 7015116331af..0c3fadbb5a58 100644
> --- a/include/linux/filter.h
> +++ b/include/linux/filter.h
> @@ -464,6 +464,8 @@ struct bpf_prog {
>  	u32			len;		/* Number of filter blocks */
>  	u32			jited_len;	/* Size of jited insns in bytes */
>  	u8			tag[BPF_TAG_SIZE];
> +	u8			has_subtype;
> +	union bpf_prog_subtype	subtype;	/* Fine-grained verifications */

these burn a hole in very performance sensitive structure.
Also there are bits rigth above. use them instead of u8 has_subtype?
or can these two fields be part of bpf_prog_aux ?

>  	struct bpf_prog_aux	*aux;		/* Auxiliary fields */
>  	struct sock_fprog_kern	*orig_prog;	/* Original BPF program */
>  	unsigned int		(*bpf_func)(const void *ctx,
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 843818dff96d..8541ab85e432 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -177,6 +177,15 @@ enum bpf_attach_type {
>  /* Specify numa node during map creation */
>  #define BPF_F_NUMA_NODE		(1U << 2)
>  
> +union bpf_prog_subtype {
> +	struct {
> +		__u32		abi; /* minimal ABI version, cf. user doc */

the concept of abi (version) sounds a bit weird to me.
Why bother with it at all?
Once the first set of patches lands the kernel as whole will have landlock feature
with a set of helpers, actions, event types.
Some future patches will extend the landlock feature step by step.
This abi concept assumes that anyone who adds new helper would need
to keep incrementing this 'abi'. What value does it give to user or to kernel?
The users will already know that landlock is present in kernel 4.14 or whatever
and the kernel 4.18 has more landlock features. Why bother with extra abi number?

> +		__u32		event; /* enum landlock_subtype_event */
> +		__aligned_u64	ability; /* LANDLOCK_SUBTYPE_ABILITY_* */
> +		__aligned_u64	option; /* LANDLOCK_SUBTYPE_OPTION_* */
> +	} landlock_rule;
> +} __attribute__((aligned(8)));
> +
>  union bpf_attr {
>  	struct { /* anonymous struct used by BPF_MAP_CREATE command */
>  		__u32	map_type;	/* one of enum bpf_map_type */
> @@ -212,6 +221,8 @@ union bpf_attr {
>  		__aligned_u64	log_buf;	/* user supplied buffer */
>  		__u32		kern_version;	/* checked when prog_type=kprobe */
>  		__u32		prog_flags;
> +		__aligned_u64	prog_subtype;	/* bpf_prog_subtype address */
> +		__u32		prog_subtype_size;
>  	};

more general question: what is the status of security/ bits?
I'm assuming they still need to be reviewed and explicitly acked by James, right?


^ permalink raw reply

* Re: [PATCH v4 net-next] arm: eBPF JIT compiler
From: Shubham Bansal @ 2017-08-23  3:05 UTC (permalink / raw)
  To: David Miller
  Cc: Russell King - ARM Linux, Network Development, Alexei Starovoitov,
	Daniel Borkmann, linux-arm-kernel, LKML, Kees Cook
In-Reply-To: <20170822.093216.2102213259577450903.davem@davemloft.net>

Hi David,

On Tue, Aug 22, 2017 at 10:02 PM, David Miller <davem@davemloft.net> wrote:
>
> You posted this 4 times. :-(
>
> I hope I applied the right one.

All 4 of these are the same patch. I mistakenly sent it 4 times. My
apologies for that.
>
> Go check net-next and please send me any necessary fix up patches.
I just checked. Its the correct patch.

Thanks a lot David. :)

^ permalink raw reply

* Re: [PATCH v12 7/8] wireless: ipw2200: Replace PCI pool old API
From: Kalle Valo @ 2017-08-23  3:58 UTC (permalink / raw)
  To: Stanislav Yakovlev
  Cc: Romain Perier, Dan Williams, Doug Ledford, Sean Hefty,
	Hal Rosenstock, jeffrey.t.kirsher-ral2JQCrhuEAvxtiuMwx3w,
	David S. Miller, linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Greg Kroah-Hartman,
	linux-wireless-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <CA++WF2MAT3aHd3TYfLgym9ssFKBsti6+t9b1yJ_eXHEmRBWwcQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>

+ linux-wireless

Stanislav Yakovlev <stas.yakovlev-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> writes:

> On 22 August 2017 at 07:47, Romain Perier <romain.perier-ZGY8ohtN/8qB+jHODAdFcQ@public.gmane.org> wrote:
>> The PCI pool API is deprecated. This commit replaces the PCI pool old
>> API by the appropriate function with the DMA pool API.
>>
>> Signed-off-by: Romain Perier <romain.perier-ZGY8ohtN/8qB+jHODAdFcQ@public.gmane.org>
>> Reviewed-by: Peter Senna Tschudin <peter.senna-ZGY8ohtN/8qB+jHODAdFcQ@public.gmane.org>
>> ---
>>  drivers/net/wireless/intel/ipw2x00/ipw2200.c | 13 +++++++------
>>  1 file changed, 7 insertions(+), 6 deletions(-)
>>
>
> Acked-by: Stanislav Yakovlev <stas.yakovlev-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
>
> Thanks, and sorry for the long review.
>
> Kalle, could you please apply it to the wireless-drivers-next tree?

It was not sent to linux-wireless so patchwork didn't see it and hence
it's not on my queue. Please resend.

[1] https://patchwork.kernel.org/project/linux-wireless/list/

-- 
Kalle Valo
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH] drivers: net: wireless: atmel: check memory allocation failure
From: Kalle Valo @ 2017-08-23  4:13 UTC (permalink / raw)
  To: Himanshu Jha
  Cc: linux-wireless-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	netdev-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <1503389481-4988-1-git-send-email-himanshujha199640-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>

Himanshu Jha <himanshujha199640-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> writes:

> Check memory allocation failure and return -ENOMEM if failure
> occurs.
>
> Signed-off-by: Himanshu Jha <himanshujha199640-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>

The title prefix is wrong:

https://wireless.wiki.kernel.org/en/developers/documentation/submittingpatches#commit_title_is_wrong

-- 
Kalle Valo

^ permalink raw reply

* RE: [PATCH] vsock: only load vmci transport on VMware hypervisor by default
From: Dexuan Cui @ 2017-08-23  4:21 UTC (permalink / raw)
  To: Jorgen S. Hansen, Stefan Hajnoczi
  Cc: davem@davemloft.net, netdev@vger.kernel.org,
	gregkh@linuxfoundation.org, devel@linuxdriverproject.org,
	KY Srinivasan, Haiyang Zhang, Stephen Hemminger, George Zhang,
	Michal Kubecek, Asias He, Vitaly Kuznetsov, Cathy Avery,
	jasowang@redhat.com, Rolf Neugebauer, Dave Scott, Marcelo Cerri,
	apw@canonical.com, olaf@aepfle.de
In-Reply-To: <A6BE88A8-6E79-486E-821F-0D5C8B1B3034@vmware.com>

> From: Jorgen S. Hansen [mailto:jhansen@vmware.com]
> > On Aug 22, 2017, at 11:54 AM, Stefan Hajnoczi <stefanha@redhat.com>
> wrote:
> > ...
> > We *can* by looking at the destination CID.  Please take a look at
> > drivers/misc/vmw_vmci/vmci_route.c:vmci_route() to see how VMCI
> handles
> > nested virt.
> >
> > It boils down to something like this:
> >
> >  static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
> >                                  int addr_len, int flags)
> >  {
> >      ...
> >      if (remote_addr.svm_cid == VMADDR_CID_HOST)
> >          transport = host_transport;
> >      else
> >          transport = guest_transport;
> >
> > It's easy for connect(2) but Jorgen mentioned it's harder for listen(2)
> > because the socket would need to listen on both transports.  We define
> > two new constants VMADDR_CID_LISTEN_FROM_GUEST and
> > VMADDR_CID_LISTEN_FROM_HOST for bind(2) so that applications can
> decide
> > which side to listen on.
> 
> If a socket is bound to VMADDR_CID_HOST, we would consider that socket as
> bound to the host side transport, so that would be the same as
> VMADDR_CID_LISTEN_FROM_GUEST. For the guest, we have
> IOCTL_VM_SOCKETS_GET_LOCAL_CID, so that could be used to get and bind
> a socket to the guest transport (VMCI will always return the guest CID as the
> local one, if the VMCI driver is used in a guest, and it looks like virtio will do
> the same). We could treat VMADDR_CID_ANY as always being the guest
> transport, since that is the use case where you don’t know upfront what
> your CID is, if we don’t want to listen on all transports. So we would use the
> host transport, if a socket is bound to VMADDR_CID_HOST, or if there is no
> guest transport, and in all other cases use the guest transport. However,
> having a couple of symbolic names like you suggest certainly makes it more
> obvious, and could be used in combination with this. It would be a plus if
> existing applications would function as intended in most cases.
> 
> >   Or the listen socket could simply listen to
> > both sides.
> 
> The only problem here would be the potential for a guest and a host app to
> have a conflict wrt port numbers, even though they would be able to
> operate fine, if restricted to their appropriate transport.
> 
> Thanks,
> Jorgen

Hi Jorgen, Stefan,
Thank you for the detailed analysis!
You have a much better understanding than me about the complex
scenarios. Can you please work out a patch? :-)

IMO Linux driver of Hyper-V sockets is the simplest case, as we only have
the "to host" option (the host side driver of Hyper-V sockets runs on 
Windows kernel and I don't think the other hypervisors emulate
the full Hyper-V VMBus 4.0, which is required to support Hyper-V sockets).

-- Dexuan


^ permalink raw reply

* Re: [PATCH net-next 0/2] Two minor BPF cleanups
From: David Miller @ 2017-08-23  4:26 UTC (permalink / raw)
  To: daniel; +Cc: ast, john.fastabend, netdev
In-Reply-To: <cover.1503445395.git.daniel@iogearbox.net>

From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 23 Aug 2017 01:47:52 +0200

> Two minor cleanups on devmap and redirect I still had
> in my queue.

Series applied.

^ permalink raw reply

* Re: [trivial] gre: fix goto statement typo
From: David Miller @ 2017-08-23  4:29 UTC (permalink / raw)
  To: u9012063; +Cc: netdev, trivial
In-Reply-To: <1503446645-33652-1-git-send-email-u9012063@gmail.com>

From: William Tu <u9012063@gmail.com>
Date: Tue, 22 Aug 2017 17:04:05 -0700

> Fix typo: pnet_tap_faied.
> 
> Signed-off-by: William Tu <u9012063@gmail.com>

Applied to net-next.

^ permalink raw reply

* Re: [PATCH v2 0/2] enable hires timer to timeout datagram socket
From: David Miller @ 2017-08-23  4:30 UTC (permalink / raw)
  To: vallish
  Cc: shuah, richardcochran, xiyou.wangcong, netdev, linux-kernel,
	eduval, anchalag
In-Reply-To: <1503447027-44399-1-git-send-email-vallish@amazon.com>

From: Vallish Vaidyeshwara <vallish@amazon.com>
Date: Wed, 23 Aug 2017 00:10:25 +0000

> I am submitting 2 patch series to enable hires timer to timeout
> datagram sockets (AF_UNIX & AF_INET domain) and test code to test
> timeout accuracy on these sockets.

This is not reasonable.

If you want high resolution events with real guarantees, please use
the kernel interfaces which provide this as explained to you as
feedback by other reviewers.

I'm not applying this, sorry.

^ permalink raw reply

* RE: [PATCH v2 1/6] fsl/fman: enable FMan Keygen
From: Madalin-cristian Bucur @ 2017-08-23  4:36 UTC (permalink / raw)
  To: David Miller
  Cc: netdev@vger.kernel.org, linuxppc-dev@lists.ozlabs.org,
	linux-kernel@vger.kernel.org
In-Reply-To: <20170822.143510.1048652578169181274.davem@davemloft.net>

> -----Original Message-----
> From: netdev-owner@vger.kernel.org [mailto:netdev-owner@vger.kernel.org]
> On Behalf Of David Miller
> Sent: Wednesday, August 23, 2017 12:35 AM
> Subject: Re: [PATCH v2 1/6] fsl/fman: enable FMan Keygen
> 
> From: Madalin Bucur <madalin.bucur@nxp.com>
> Date: Tue, 22 Aug 2017 20:31:01 +0300
> 
> >  /**
> > + * fman_get_keygen
> > + *
> > + * @fman:	A Pointer to FMan device
> > + *
> > + * Get the handle to KeyGen module part of FM driver
> > + *
> > + * Return: Handle to KeyGen
> > + */
> > +struct fman_keygen *fman_get_keygen(struct fman *fman)
> > +{
> > +	return fman->keygen;
> > +}
> > +EXPORT_SYMBOL(fman_get_keygen);
> 
> Please don't do this.
> 
> Just directly derefence the pointer in the source code to
> get the keygen.
> 
> Thank you.

Hi,

The struct fman is only visible in the fman file, the fman port module uses struct
fman as an opaque pointer, thus this export.

Madalin

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox