* [net-next PATCH 1/9] bpf: convert sockmap field attach_bpf_fd2 to type
From: John Fastabend @ 2017-08-28 14:10 UTC (permalink / raw)
To: ast, daniel, davem; +Cc: netdev, john.fastabend
In-Reply-To: <20170828140850.14143.83953.stgit@john-Precision-Tower-5810>
In the initial sockmap API we provided strparser and verdict programs
using a single attach command by extending the attach API with a the
attach_bpf_fd2 field.
However, if we add other programs in the future we will be adding a
field for every new possible type, attach_bpf_fd(3,4,..). This
seems a bit clumsy for an API. So lets push the programs using two
new type fields.
BPF_SK_SKB_STREAM_PARSER
BPF_SK_SKB_STREAM_VERDICT
This has the advantage of having a readable name and can easily be
extended in the future.
Updates to samples and sockmap included here also generalize tests
slightly to support upcoming patch for multiple map support.
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Fixes: 174a79ff9515 ("bpf: sockmap with sk redirect support")
Suggested-by: Alexei Starovoitov <ast@kernel.org>
---
include/linux/bpf.h | 10 +-
include/uapi/linux/bpf.h | 9 -
kernel/bpf/sockmap.c | 25 ++--
kernel/bpf/syscall.c | 38 ++----
samples/sockmap/sockmap_kern.c | 6 -
samples/sockmap/sockmap_user.c | 12 ++
tools/include/uapi/linux/bpf.h | 9 -
tools/lib/bpf/bpf.c | 14 --
tools/lib/bpf/bpf.h | 4 -
tools/testing/selftests/bpf/bpf_helpers.h | 3
tools/testing/selftests/bpf/sockmap_parse_prog.c | 2
tools/testing/selftests/bpf/sockmap_verdict_prog.c | 2
tools/testing/selftests/bpf/test_maps.c | 133 +++++++++-----------
13 files changed, 116 insertions(+), 151 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 830f472..c2cb1b5 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -39,8 +39,6 @@ struct bpf_map_ops {
void (*map_fd_put_ptr)(void *ptr);
u32 (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf);
u32 (*map_fd_sys_lookup_elem)(void *ptr);
- int (*map_attach)(struct bpf_map *map,
- struct bpf_prog *p1, struct bpf_prog *p2);
};
struct bpf_map {
@@ -387,11 +385,19 @@ static inline void __dev_map_flush(struct bpf_map *map)
#if defined(CONFIG_STREAM_PARSER) && defined(CONFIG_BPF_SYSCALL)
struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key);
+int sock_map_attach_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type);
#else
static inline struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key)
{
return NULL;
}
+
+static inline int sock_map_attach_prog(struct bpf_map *map,
+ struct bpf_prog *prog,
+ u32 type)
+{
+ return -EOPNOTSUPP;
+}
#endif
/* verifier prototypes for helper functions called from eBPF programs */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 843818d..97227be 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -136,7 +136,8 @@ enum bpf_attach_type {
BPF_CGROUP_INET_EGRESS,
BPF_CGROUP_INET_SOCK_CREATE,
BPF_CGROUP_SOCK_OPS,
- BPF_CGROUP_SMAP_INGRESS,
+ BPF_SK_SKB_STREAM_PARSER,
+ BPF_SK_SKB_STREAM_VERDICT,
__MAX_BPF_ATTACH_TYPE
};
@@ -224,7 +225,6 @@ enum bpf_attach_type {
__u32 attach_bpf_fd; /* eBPF program to attach */
__u32 attach_type;
__u32 attach_flags;
- __u32 attach_bpf_fd2;
};
struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */
@@ -580,14 +580,11 @@ enum bpf_attach_type {
* @flags: reserved for future use
* Return: SK_REDIRECT
*
- * int bpf_sock_map_update(skops, map, key, flags, map_flags)
+ * int bpf_sock_map_update(skops, map, key, flags)
* @skops: pointer to bpf_sock_ops
* @map: pointer to sockmap to update
* @key: key to insert/update sock in map
* @flags: same flags as map update elem
- * @map_flags: sock map specific flags
- * bit 1: Enable strparser
- * other bits: reserved
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index 617c239..cf570d1 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -723,20 +723,24 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
return err;
}
-static int sock_map_attach_prog(struct bpf_map *map,
- struct bpf_prog *parse,
- struct bpf_prog *verdict)
+int sock_map_attach_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type)
{
struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
- struct bpf_prog *_parse, *_verdict;
+ struct bpf_prog *orig;
- _parse = xchg(&stab->bpf_parse, parse);
- _verdict = xchg(&stab->bpf_verdict, verdict);
+ switch (type) {
+ case BPF_SK_SKB_STREAM_PARSER:
+ orig = xchg(&stab->bpf_parse, prog);
+ break;
+ case BPF_SK_SKB_STREAM_VERDICT:
+ orig = xchg(&stab->bpf_verdict, prog);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
- if (_parse)
- bpf_prog_put(_parse);
- if (_verdict)
- bpf_prog_put(_verdict);
+ if (orig)
+ bpf_prog_put(orig);
return 0;
}
@@ -777,7 +781,6 @@ static int sock_map_update_elem(struct bpf_map *map,
.map_get_next_key = sock_map_get_next_key,
.map_update_elem = sock_map_update_elem,
.map_delete_elem = sock_map_delete_elem,
- .map_attach = sock_map_attach_prog,
};
BPF_CALL_5(bpf_sock_map_update, struct bpf_sock_ops_kern *, bpf_sock,
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 9378f3b..021a05d 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1093,12 +1093,12 @@ static int bpf_obj_get(const union bpf_attr *attr)
#ifdef CONFIG_CGROUP_BPF
-#define BPF_PROG_ATTACH_LAST_FIELD attach_bpf_fd2
+#define BPF_PROG_ATTACH_LAST_FIELD attach_flags
-static int sockmap_get_from_fd(const union bpf_attr *attr, int ptype)
+static int sockmap_get_from_fd(const union bpf_attr *attr)
{
- struct bpf_prog *prog1, *prog2;
int ufd = attr->target_fd;
+ struct bpf_prog *prog;
struct bpf_map *map;
struct fd f;
int err;
@@ -1108,29 +1108,16 @@ static int sockmap_get_from_fd(const union bpf_attr *attr, int ptype)
if (IS_ERR(map))
return PTR_ERR(map);
- if (!map->ops->map_attach) {
- fdput(f);
- return -EOPNOTSUPP;
- }
-
- prog1 = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
- if (IS_ERR(prog1)) {
+ prog = bpf_prog_get_type(attr->attach_bpf_fd, BPF_PROG_TYPE_SK_SKB);
+ if (IS_ERR(prog)) {
fdput(f);
- return PTR_ERR(prog1);
- }
-
- prog2 = bpf_prog_get_type(attr->attach_bpf_fd2, ptype);
- if (IS_ERR(prog2)) {
- fdput(f);
- bpf_prog_put(prog1);
- return PTR_ERR(prog2);
+ return PTR_ERR(prog);
}
- err = map->ops->map_attach(map, prog1, prog2);
+ err = sock_map_attach_prog(map, prog, attr->attach_type);
if (err) {
fdput(f);
- bpf_prog_put(prog1);
- bpf_prog_put(prog2);
+ bpf_prog_put(prog);
return err;
}
@@ -1165,16 +1152,13 @@ static int bpf_prog_attach(const union bpf_attr *attr)
case BPF_CGROUP_SOCK_OPS:
ptype = BPF_PROG_TYPE_SOCK_OPS;
break;
- case BPF_CGROUP_SMAP_INGRESS:
- ptype = BPF_PROG_TYPE_SK_SKB;
- break;
+ case BPF_SK_SKB_STREAM_PARSER:
+ case BPF_SK_SKB_STREAM_VERDICT:
+ return sockmap_get_from_fd(attr);
default:
return -EINVAL;
}
- if (attr->attach_type == BPF_CGROUP_SMAP_INGRESS)
- return sockmap_get_from_fd(attr, ptype);
-
prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
if (IS_ERR(prog))
return PTR_ERR(prog);
diff --git a/samples/sockmap/sockmap_kern.c b/samples/sockmap/sockmap_kern.c
index 6ff986f..f9b38ef 100644
--- a/samples/sockmap/sockmap_kern.c
+++ b/samples/sockmap/sockmap_kern.c
@@ -82,8 +82,7 @@ int bpf_sockmap(struct bpf_sock_ops *skops)
if (lport == 10000) {
ret = 1;
err = bpf_sock_map_update(skops, &sock_map, &ret,
- BPF_NOEXIST,
- BPF_SOCKMAP_STRPARSER);
+ BPF_NOEXIST);
bpf_printk("passive(%i -> %i) map ctx update err: %d\n",
lport, bpf_ntohl(rport), err);
}
@@ -95,8 +94,7 @@ int bpf_sockmap(struct bpf_sock_ops *skops)
if (bpf_ntohl(rport) == 10001) {
ret = 10;
err = bpf_sock_map_update(skops, &sock_map, &ret,
- BPF_NOEXIST,
- BPF_SOCKMAP_STRPARSER);
+ BPF_NOEXIST);
bpf_printk("active(%i -> %i) map ctx update err: %d\n",
lport, bpf_ntohl(rport), err);
}
diff --git a/samples/sockmap/sockmap_user.c b/samples/sockmap/sockmap_user.c
index fb78f5a..7cc9d22 100644
--- a/samples/sockmap/sockmap_user.c
+++ b/samples/sockmap/sockmap_user.c
@@ -256,8 +256,16 @@ int main(int argc, char **argv)
}
/* Attach programs to sockmap */
- err = __bpf_prog_attach(prog_fd[0], prog_fd[1], map_fd[0],
- BPF_CGROUP_SMAP_INGRESS, 0);
+ err = bpf_prog_attach(prog_fd[0], map_fd[0],
+ BPF_SK_SKB_STREAM_PARSER, 0);
+ if (err) {
+ fprintf(stderr, "ERROR: bpf_prog_attach (sockmap): %d (%s)\n",
+ err, strerror(errno));
+ return err;
+ }
+
+ err = bpf_prog_attach(prog_fd[1], map_fd[0],
+ BPF_SK_SKB_STREAM_VERDICT, 0);
if (err) {
fprintf(stderr, "ERROR: bpf_prog_attach (sockmap): %d (%s)\n",
err, strerror(errno));
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index f8f6377..09ac590 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -136,7 +136,8 @@ enum bpf_attach_type {
BPF_CGROUP_INET_EGRESS,
BPF_CGROUP_INET_SOCK_CREATE,
BPF_CGROUP_SOCK_OPS,
- BPF_CGROUP_SMAP_INGRESS,
+ BPF_SK_SKB_STREAM_PARSER,
+ BPF_SK_SKB_STREAM_VERDICT,
__MAX_BPF_ATTACH_TYPE
};
@@ -227,7 +228,6 @@ enum bpf_sockmap_flags {
__u32 attach_bpf_fd; /* eBPF program to attach */
__u32 attach_type;
__u32 attach_flags;
- __u32 attach_bpf_fd2;
};
struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */
@@ -572,14 +572,11 @@ enum bpf_sockmap_flags {
* @flags: reserved for future use
* Return: SK_REDIRECT
*
- * int bpf_sock_map_update(skops, map, key, flags, map_flags)
+ * int bpf_sock_map_update(skops, map, key, flags)
* @skops: pointer to bpf_sock_ops
* @map: pointer to sockmap to update
* @key: key to insert/update sock in map
* @flags: same flags as map update elem
- * @map_flags: sock map specific flags
- * bit 1: Enable strparser
- * other bits: reserved
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index a071761..1d6907d 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -235,28 +235,20 @@ int bpf_obj_get(const char *pathname)
return sys_bpf(BPF_OBJ_GET, &attr, sizeof(attr));
}
-int __bpf_prog_attach(int prog_fd1, int prog_fd2, int target_fd,
- enum bpf_attach_type type,
- unsigned int flags)
+int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type,
+ unsigned int flags)
{
union bpf_attr attr;
bzero(&attr, sizeof(attr));
attr.target_fd = target_fd;
- attr.attach_bpf_fd = prog_fd1;
- attr.attach_bpf_fd2 = prog_fd2;
+ attr.attach_bpf_fd = prog_fd;
attr.attach_type = type;
attr.attach_flags = flags;
return sys_bpf(BPF_PROG_ATTACH, &attr, sizeof(attr));
}
-int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type,
- unsigned int flags)
-{
- return __bpf_prog_attach(prog_fd, 0, target_fd, type, flags);
-}
-
int bpf_prog_detach(int target_fd, enum bpf_attach_type type)
{
union bpf_attr attr;
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 90e9d4e..b8ea584 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -56,10 +56,6 @@ int bpf_map_update_elem(int fd, const void *key, const void *value,
int bpf_obj_get(const char *pathname);
int bpf_prog_attach(int prog_fd, int attachable_fd, enum bpf_attach_type type,
unsigned int flags);
-int __bpf_prog_attach(int prog1, int prog2,
- int attachable_fd,
- enum bpf_attach_type type,
- unsigned int flags);
int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type);
int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size,
void *data_out, __u32 *size_out, __u32 *retval,
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index 98f3be2..36fb916 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -68,8 +68,7 @@ static int (*bpf_setsockopt)(void *ctx, int level, int optname, void *optval,
static int (*bpf_sk_redirect_map)(void *map, int key, int flags) =
(void *) BPF_FUNC_sk_redirect_map;
static int (*bpf_sock_map_update)(void *map, void *key, void *value,
- unsigned long long flags,
- unsigned long long map_lags) =
+ unsigned long long flags) =
(void *) BPF_FUNC_sock_map_update;
diff --git a/tools/testing/selftests/bpf/sockmap_parse_prog.c b/tools/testing/selftests/bpf/sockmap_parse_prog.c
index 8b54531..710f43f 100644
--- a/tools/testing/selftests/bpf/sockmap_parse_prog.c
+++ b/tools/testing/selftests/bpf/sockmap_parse_prog.c
@@ -30,7 +30,7 @@ int bpf_prog1(struct __sk_buff *skb)
*/
d[0] = 1;
- bpf_printk("data[0] = (%u): local_port %i remote %i\n",
+ bpf_printk("parse: data[0] = (%u): local_port %i remote %i\n",
d[0], lport, bpf_ntohl(rport));
return skb->len;
}
diff --git a/tools/testing/selftests/bpf/sockmap_verdict_prog.c b/tools/testing/selftests/bpf/sockmap_verdict_prog.c
index d5f9447..0573c1d 100644
--- a/tools/testing/selftests/bpf/sockmap_verdict_prog.c
+++ b/tools/testing/selftests/bpf/sockmap_verdict_prog.c
@@ -40,7 +40,7 @@ int bpf_prog2(struct __sk_buff *skb)
d[6] = 0xe;
d[7] = 0xf;
- bpf_printk("data[0] = (%u): local_port %i remote %i\n",
+ bpf_printk("verdict: data[0] = (%u): local_port %i remote %i redirect 5\n",
d[0], lport, bpf_ntohl(rport));
return bpf_sk_redirect_map(&sock_map, 5, 0);
}
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 40b2d1f..6df6e62 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -547,20 +547,26 @@ static void test_sockmap(int task, void *data)
goto out_sockmap;
}
- /* Nothing attached so these should fail */
+ /* Test update without programs */
for (i = 0; i < 6; i++) {
err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_ANY);
- if (!err) {
- printf("Failed invalid update sockmap '%i:%i'\n",
+ if (err) {
+ printf("Failed noprog update sockmap '%i:%i'\n",
i, sfd[i]);
goto out_sockmap;
}
}
/* Test attaching bad fds */
- err = __bpf_prog_attach(-1, -2, fd, BPF_CGROUP_SMAP_INGRESS, 0);
+ err = bpf_prog_attach(-1, fd, BPF_SK_SKB_STREAM_PARSER, 0);
if (!err) {
- printf("Failed invalid prog attach\n");
+ printf("Failed invalid parser prog attach\n");
+ goto out_sockmap;
+ }
+
+ err = bpf_prog_attach(-1, fd, BPF_SK_SKB_STREAM_VERDICT, 0);
+ if (!err) {
+ printf("Failed invalid verdict prog attach\n");
goto out_sockmap;
}
@@ -591,14 +597,21 @@ static void test_sockmap(int task, void *data)
goto out_sockmap;
}
- err = __bpf_prog_attach(parse_prog, verdict_prog, map_fd,
- BPF_CGROUP_SMAP_INGRESS, 0);
+ err = bpf_prog_attach(parse_prog, map_fd,
+ BPF_SK_SKB_STREAM_PARSER, 0);
+ if (err) {
+ printf("Failed bpf prog attach\n");
+ goto out_sockmap;
+ }
+
+ err = bpf_prog_attach(verdict_prog, map_fd,
+ BPF_SK_SKB_STREAM_VERDICT, 0);
if (err) {
printf("Failed bpf prog attach\n");
goto out_sockmap;
}
- /* Test map update elem */
+ /* Test map update elem afterwards fd lives in fd and map_fd */
for (i = 0; i < 6; i++) {
err = bpf_map_update_elem(map_fd, &i, &sfd[i], BPF_ANY);
if (err) {
@@ -649,96 +662,68 @@ static void test_sockmap(int task, void *data)
goto out_sockmap;
}
- /* Delete the reset of the elems include some NULL elems */
- for (i = 0; i < 6; i++) {
- err = bpf_map_delete_elem(map_fd, &i);
- if (err && (i == 0 || i == 1 || i >= 4)) {
- printf("Failed delete sockmap %i '%i:%i'\n",
- err, i, sfd[i]);
- goto out_sockmap;
- } else if (!err && (i == 2 || i == 3)) {
- printf("Failed null delete sockmap %i '%i:%i'\n",
- err, i, sfd[i]);
- goto out_sockmap;
- }
- }
-
- /* Test having multiple SMAPs open and active on same fds */
- err = __bpf_prog_attach(parse_prog, verdict_prog, fd,
- BPF_CGROUP_SMAP_INGRESS, 0);
- if (err) {
- printf("Failed fd bpf prog attach\n");
- goto out_sockmap;
- }
-
- for (i = 0; i < 6; i++) {
- err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_ANY);
- if (err) {
- printf("Failed fd update sockmap %i '%i:%i'\n",
- err, i, sfd[i]);
- goto out_sockmap;
- }
- }
-
- /* Test duplicate socket add of NOEXIST, ANY and EXIST */
- i = 0;
+ /* Push fd into same slot */
+ i = 2;
err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_NOEXIST);
if (!err) {
- printf("Failed BPF_NOEXIST create\n");
+ printf("Failed allowed sockmap dup slot BPF_NOEXIST\n");
goto out_sockmap;
}
err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_ANY);
if (err) {
- printf("Failed sockmap update BPF_ANY\n");
+ printf("Failed sockmap update new slot BPF_ANY\n");
goto out_sockmap;
}
err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_EXIST);
if (err) {
- printf("Failed sockmap update BPF_EXIST\n");
+ printf("Failed sockmap update new slot BPF_EXIST\n");
goto out_sockmap;
}
- /* The above were pushing fd into same slot try different slot now */
- i = 2;
- err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_NOEXIST);
- if (!err) {
- printf("Failed BPF_NOEXIST create\n");
- goto out_sockmap;
+ /* Delete the elems without programs */
+ for (i = 0; i < 6; i++) {
+ err = bpf_map_delete_elem(fd, &i);
+ if (err) {
+ printf("Failed delete sockmap %i '%i:%i'\n",
+ err, i, sfd[i]);
+ }
}
- err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_ANY);
+ /* Test having multiple maps open and set with programs on same fds */
+ err = bpf_prog_attach(parse_prog, fd,
+ BPF_SK_SKB_STREAM_PARSER, 0);
if (err) {
- printf("Failed sockmap update BPF_ANY\n");
+ printf("Failed fd bpf parse prog attach\n");
goto out_sockmap;
}
-
- err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_EXIST);
+ err = bpf_prog_attach(verdict_prog, fd,
+ BPF_SK_SKB_STREAM_VERDICT, 0);
if (err) {
- printf("Failed sockmap update BPF_EXIST\n");
+ printf("Failed fd bpf verdict prog attach\n");
goto out_sockmap;
}
- /* Try pushing fd into different map, this is not allowed at the
- * moment. Which programs would we use?
- */
- err = bpf_map_update_elem(map_fd, &i, &sfd[i], BPF_NOEXIST);
- if (!err) {
- printf("Failed BPF_NOEXIST create\n");
- goto out_sockmap;
- }
-
- err = bpf_map_update_elem(map_fd, &i, &sfd[i], BPF_ANY);
- if (!err) {
- printf("Failed sockmap update BPF_ANY\n");
- goto out_sockmap;
- }
-
- err = bpf_map_update_elem(map_fd, &i, &sfd[i], BPF_EXIST);
- if (!err) {
- printf("Failed sockmap update BPF_EXIST\n");
- goto out_sockmap;
+ for (i = 4; i < 6; i++) {
+ err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_ANY);
+ if (!err) {
+ printf("Failed allowed duplicate programs in update ANY sockmap %i '%i:%i'\n",
+ err, i, sfd[i]);
+ goto out_sockmap;
+ }
+ err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_NOEXIST);
+ if (!err) {
+ printf("Failed allowed duplicate program in update NOEXIST sockmap %i '%i:%i'\n",
+ err, i, sfd[i]);
+ goto out_sockmap;
+ }
+ err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_EXIST);
+ if (!err) {
+ printf("Failed allowed duplicate program in update EXIST sockmap %i '%i:%i'\n",
+ err, i, sfd[i]);
+ goto out_sockmap;
+ }
}
/* Test map close sockets */
^ permalink raw reply related
* [net-next PATCH 2/9] bpf: sockmap, remove STRPARSER map_flags and add multi-map support
From: John Fastabend @ 2017-08-28 14:10 UTC (permalink / raw)
To: ast, daniel, davem; +Cc: netdev, john.fastabend
In-Reply-To: <20170828140850.14143.83953.stgit@john-Precision-Tower-5810>
The addition of map_flags BPF_SOCKMAP_STRPARSER flags was to handle a
specific use case where we want to have BPF parse program disabled on
an entry in a sockmap.
However, Alexei found the API a bit cumbersome and I agreed. Lets
remove the STRPARSER flag and support the use case by allowing socks
to be in multiple maps. This allows users to create two maps one with
programs attached and one without. When socks are added to maps they
now inherit any programs attached to the map. This is a nice
generalization and IMO improves the API.
The API rules are less ambiguous and do not need a flag:
- When a sock is added to a sockmap we have two cases,
i. The sock map does not have any attached programs so
we can add sock to map without inheriting bpf programs.
The sock may exist in 0 or more other maps.
ii. The sock map has an attached BPF program. To avoid duplicate
bpf programs we only add the sock entry if it does not have
an existing strparser/verdict attached, returning -EBUSY if
a program is already attached. Otherwise attach the program
and inherit strparser/verdict programs from the sock map.
This allows for socks to be in a multiple maps for redirects and
inherit a BPF program from a single map.
Also this patch simplifies the logic around BPF_{EXIST|NOEXIST|ANY}
flags. In the original patch I tried to be extra clever and only
update map entries when necessary. Now I've decided the complexity
is not worth it. If users constantly update an entry with the same
sock for no reason (i.e. update an entry without actually changing
any parameters on map or sock) we still do an alloc/release. Using
this and allowing multiple entries of a sock to exist in a map the
logic becomes much simpler.
Note: Now that multiple maps are supported the "maps" pointer called
when a socket is closed becomes a list of maps to remove the sock from.
To keep the map up to date when a sock is added to the sockmap we must
add the map/elem in the list. Likewise when it is removed we must
remove it from the list. This results in searching the per psock list
on delete operation. On TCP_CLOSE events we walk the list and remove
the psock from all map/entry locations. I don't see any perf
implications in this because at most I have a psock in two maps. If
a psock were to be in many maps its possibly this might be noticeable
on delete but I can't think of a reason to dup a psock in many maps.
The sk_callback_lock is used to protect read/writes to the list. This
was convenient because in all locations we were taking the lock
anyways just after working on the list. Also the lock is per sock so
in normal cases we shouldn't see any contention.
Suggested-by: Alexei Starovoitov <ast@kernel.org>
Fixes: 174a79ff9515 ("bpf: sockmap with sk redirect support")
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
---
include/uapi/linux/bpf.h | 3 -
kernel/bpf/sockmap.c | 269 ++++++++++++++++++++++++++++------------------
2 files changed, 165 insertions(+), 107 deletions(-)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 97227be..08c206a 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -143,9 +143,6 @@ enum bpf_attach_type {
#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE
-/* If BPF_SOCKMAP_STRPARSER is used sockmap will use strparser on receive */
-#define BPF_SOCKMAP_STRPARSER (1U << 0)
-
/* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command
* to the given target_fd cgroup the descendent cgroup will be able to
* override effective bpf program that was inherited from this cgroup
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index cf570d1..a6882e5 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -13,15 +13,16 @@
/* A BPF sock_map is used to store sock objects. This is primarly used
* for doing socket redirect with BPF helper routines.
*
- * A sock map may have two BPF programs attached to it, a program used
- * to parse packets and a program to provide a verdict and redirect
- * decision on the packet. If no BPF parse program is provided it is
- * assumed that every skb is a "message" (skb->len). Otherwise the
- * parse program is attached to strparser and used to build messages
- * that may span multiple skbs. The verdict program will either select
- * a socket to send/receive the skb on or provide the drop code indicating
- * the skb should be dropped. More actions may be added later as needed.
- * The default program will drop packets.
+ * A sock map may have BPF programs attached to it, currently a program
+ * used to parse packets and a program to provide a verdict and redirect
+ * decision on the packet are supported. Any programs attached to a sock
+ * map are inherited by sock objects when they are added to the map. If
+ * no BPF programs are attached the sock object may only be used for sock
+ * redirect.
+ *
+ * A sock object may be in multiple maps, but can only inherit a single
+ * parse or verdict program. If adding a sock object to a map would result
+ * in having multiple parsing programs the update will return an EBUSY error.
*
* For reference this program is similar to devmap used in XDP context
* reviewing these together may be useful. For an example please review
@@ -44,15 +45,21 @@ struct bpf_stab {
struct sock **sock_map;
struct bpf_prog *bpf_parse;
struct bpf_prog *bpf_verdict;
- refcount_t refcnt;
};
enum smap_psock_state {
SMAP_TX_RUNNING,
};
+struct smap_psock_map_entry {
+ struct list_head list;
+ struct sock **entry;
+};
+
struct smap_psock {
struct rcu_head rcu;
+ /* refcnt is used inside sk_callback_lock */
+ u32 refcnt;
/* datapath variables */
struct sk_buff_head rxqueue;
@@ -66,10 +73,9 @@ struct smap_psock {
struct strparser strp;
struct bpf_prog *bpf_parse;
struct bpf_prog *bpf_verdict;
- struct bpf_stab *stab;
+ struct list_head maps;
/* Back reference used when sock callback trigger sockmap operations */
- int key;
struct sock *sock;
unsigned long state;
@@ -83,7 +89,7 @@ struct smap_psock {
static inline struct smap_psock *smap_psock_sk(const struct sock *sk)
{
- return (struct smap_psock *)rcu_dereference_sk_user_data(sk);
+ return rcu_dereference_sk_user_data(sk);
}
static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb)
@@ -149,11 +155,12 @@ static void smap_report_sk_error(struct smap_psock *psock, int err)
sk->sk_error_report(sk);
}
-static void smap_release_sock(struct sock *sock);
+static void smap_release_sock(struct smap_psock *psock, struct sock *sock);
/* Called with lock_sock(sk) held */
static void smap_state_change(struct sock *sk)
{
+ struct smap_psock_map_entry *e, *tmp;
struct smap_psock *psock;
struct sock *osk;
@@ -184,9 +191,15 @@ static void smap_state_change(struct sock *sk)
psock = smap_psock_sk(sk);
if (unlikely(!psock))
break;
- osk = cmpxchg(&psock->stab->sock_map[psock->key], sk, NULL);
- if (osk == sk)
- smap_release_sock(sk);
+ write_lock_bh(&sk->sk_callback_lock);
+ list_for_each_entry_safe(e, tmp, &psock->maps, list) {
+ osk = cmpxchg(e->entry, sk, NULL);
+ if (osk == sk) {
+ list_del(&e->list);
+ smap_release_sock(psock, sk);
+ }
+ }
+ write_unlock_bh(&sk->sk_callback_lock);
break;
default:
psock = smap_psock_sk(sk);
@@ -289,9 +302,8 @@ static void smap_write_space(struct sock *sk)
static void smap_stop_sock(struct smap_psock *psock, struct sock *sk)
{
- write_lock_bh(&sk->sk_callback_lock);
if (!psock->strp_enabled)
- goto out;
+ return;
sk->sk_data_ready = psock->save_data_ready;
sk->sk_write_space = psock->save_write_space;
sk->sk_state_change = psock->save_state_change;
@@ -300,8 +312,6 @@ static void smap_stop_sock(struct smap_psock *psock, struct sock *sk)
psock->save_state_change = NULL;
strp_stop(&psock->strp);
psock->strp_enabled = false;
-out:
- write_unlock_bh(&sk->sk_callback_lock);
}
static void smap_destroy_psock(struct rcu_head *rcu)
@@ -318,9 +328,11 @@ static void smap_destroy_psock(struct rcu_head *rcu)
schedule_work(&psock->gc_work);
}
-static void smap_release_sock(struct sock *sock)
+static void smap_release_sock(struct smap_psock *psock, struct sock *sock)
{
- struct smap_psock *psock = smap_psock_sk(sock);
+ psock->refcnt--;
+ if (psock->refcnt)
+ return;
smap_stop_sock(psock, sock);
clear_bit(SMAP_TX_RUNNING, &psock->state);
@@ -414,6 +426,7 @@ static void sock_map_remove_complete(struct bpf_stab *stab)
static void smap_gc_work(struct work_struct *w)
{
+ struct smap_psock_map_entry *e, *tmp;
struct smap_psock *psock;
psock = container_of(w, struct smap_psock, gc_work);
@@ -431,8 +444,10 @@ static void smap_gc_work(struct work_struct *w)
if (psock->bpf_verdict)
bpf_prog_put(psock->bpf_verdict);
- if (refcount_dec_and_test(&psock->stab->refcnt))
- sock_map_remove_complete(psock->stab);
+ list_for_each_entry_safe(e, tmp, &psock->maps, list) {
+ list_del(&e->list);
+ kfree(e);
+ }
sock_put(psock->sock);
kfree(psock);
@@ -453,6 +468,8 @@ static struct smap_psock *smap_init_psock(struct sock *sock,
skb_queue_head_init(&psock->rxqueue);
INIT_WORK(&psock->tx_work, smap_tx_work);
INIT_WORK(&psock->gc_work, smap_gc_work);
+ INIT_LIST_HEAD(&psock->maps);
+ psock->refcnt = 1;
rcu_assign_sk_user_data(sock, psock);
sock_hold(sock);
@@ -503,13 +520,24 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
if (!stab->sock_map)
goto free_stab;
- refcount_set(&stab->refcnt, 1);
return &stab->map;
free_stab:
kfree(stab);
return ERR_PTR(err);
}
+static void smap_list_remove(struct smap_psock *psock, struct sock **entry)
+{
+ struct smap_psock_map_entry *e, *tmp;
+
+ list_for_each_entry_safe(e, tmp, &psock->maps, list) {
+ if (e->entry == entry) {
+ list_del(&e->list);
+ break;
+ }
+ }
+}
+
static void sock_map_free(struct bpf_map *map)
{
struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
@@ -526,13 +554,18 @@ static void sock_map_free(struct bpf_map *map)
*/
rcu_read_lock();
for (i = 0; i < stab->map.max_entries; i++) {
+ struct smap_psock *psock;
struct sock *sock;
sock = xchg(&stab->sock_map[i], NULL);
if (!sock)
continue;
- smap_release_sock(sock);
+ write_lock_bh(&sock->sk_callback_lock);
+ psock = smap_psock_sk(sock);
+ smap_list_remove(psock, &stab->sock_map[i]);
+ smap_release_sock(psock, sock);
+ write_unlock_bh(&sock->sk_callback_lock);
}
rcu_read_unlock();
@@ -541,8 +574,7 @@ static void sock_map_free(struct bpf_map *map)
if (stab->bpf_parse)
bpf_prog_put(stab->bpf_parse);
- if (refcount_dec_and_test(&stab->refcnt))
- sock_map_remove_complete(stab);
+ sock_map_remove_complete(stab);
}
static int sock_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
@@ -576,6 +608,7 @@ struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key)
static int sock_map_delete_elem(struct bpf_map *map, void *key)
{
struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
+ struct smap_psock *psock;
int k = *(u32 *)key;
struct sock *sock;
@@ -586,7 +619,17 @@ static int sock_map_delete_elem(struct bpf_map *map, void *key)
if (!sock)
return -EINVAL;
- smap_release_sock(sock);
+ write_lock_bh(&sock->sk_callback_lock);
+ psock = smap_psock_sk(sock);
+ if (!psock)
+ goto out;
+
+ if (psock->bpf_parse)
+ smap_stop_sock(psock, sock);
+ smap_list_remove(psock, &stab->sock_map[k]);
+ smap_release_sock(psock, sock);
+out:
+ write_unlock_bh(&sock->sk_callback_lock);
return 0;
}
@@ -601,29 +644,34 @@ static int sock_map_delete_elem(struct bpf_map *map, void *key)
* and syncd so we are certain all references from the update/lookup/delete
* operations as well as references in the data path are no longer in use.
*
- * A psock object holds a refcnt on the sockmap it is attached to and this is
- * not decremented until after a RCU grace period and garbage collection occurs.
- * This ensures the map is not free'd until psocks linked to it are removed. The
- * map link is used when the independent sock events trigger map deletion.
+ * Psocks may exist in multiple maps, but only a single set of parse/verdict
+ * programs may be inherited from the maps it belongs to. A reference count
+ * is kept with the total number of references to the psock from all maps. The
+ * psock will not be released until this reaches zero. The psock and sock
+ * user data data use the sk_callback_lock to protect critical data structures
+ * from concurrent access. This allows us to avoid two updates from modifying
+ * the user data in sock and the lock is required anyways for modifying
+ * callbacks, we simply increase its scope slightly.
*
- * Psocks may only participate in one sockmap at a time. Users that try to
- * join a single sock to multiple maps will get an error.
- *
- * Last, but not least, it is possible the socket is closed while running
- * an update on an existing psock. This will release the psock, but again
- * not until the update has completed due to rcu grace period rules.
+ * Rules to follow,
+ * - psock must always be read inside RCU critical section
+ * - sk_user_data must only be modified inside sk_callback_lock and read
+ * inside RCU critical section.
+ * - psock->maps list must only be read & modified inside sk_callback_lock
+ * - sock_map must use READ_ONCE and (cmp)xchg operations
+ * - BPF verdict/parse programs must use READ_ONCE and xchg operations
*/
static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
struct bpf_map *map,
- void *key, u64 flags, u64 map_flags)
+ void *key, u64 flags)
{
struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
+ struct smap_psock_map_entry *e = NULL;
struct bpf_prog *verdict, *parse;
- struct smap_psock *psock = NULL;
- struct sock *old_sock, *sock;
+ struct sock *osock, *sock;
+ struct smap_psock *psock;
u32 i = *(u32 *)key;
- bool update = false;
- int err = 0;
+ int err;
if (unlikely(flags > BPF_EXIST))
return -EINVAL;
@@ -631,35 +679,22 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
if (unlikely(i >= stab->map.max_entries))
return -E2BIG;
- if (unlikely(map_flags > BPF_SOCKMAP_STRPARSER))
- return -EINVAL;
-
- verdict = parse = NULL;
sock = READ_ONCE(stab->sock_map[i]);
-
- if (flags == BPF_EXIST || flags == BPF_ANY) {
- if (!sock && flags == BPF_EXIST) {
- return -ENOENT;
- } else if (sock && sock != skops->sk) {
- return -EINVAL;
- } else if (sock) {
- psock = smap_psock_sk(sock);
- if (unlikely(!psock))
- return -EBUSY;
- update = true;
- }
- } else if (sock && BPF_NOEXIST) {
+ if (flags == BPF_EXIST && !sock)
+ return -ENOENT;
+ else if (flags == BPF_NOEXIST && sock)
return -EEXIST;
- }
- /* reserve BPF programs early so can abort easily on failures */
- if (map_flags & BPF_SOCKMAP_STRPARSER) {
- verdict = READ_ONCE(stab->bpf_verdict);
- parse = READ_ONCE(stab->bpf_parse);
+ sock = skops->sk;
- if (!verdict || !parse)
- return -ENOENT;
+ /* 1. If sock map has BPF programs those will be inherited by the
+ * sock being added. If the sock is already attached to BPF programs
+ * this results in an error.
+ */
+ verdict = READ_ONCE(stab->bpf_verdict);
+ parse = READ_ONCE(stab->bpf_parse);
+ if (parse && verdict) {
/* bpf prog refcnt may be zero if a concurrent attach operation
* removes the program after the above READ_ONCE() but before
* we increment the refcnt. If this is the case abort with an
@@ -676,50 +711,78 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
}
}
- if (!psock) {
- sock = skops->sk;
- if (rcu_dereference_sk_user_data(sock))
- return -EEXIST;
+ write_lock_bh(&sock->sk_callback_lock);
+ psock = smap_psock_sk(sock);
+
+ /* 2. Do not allow inheriting programs if psock exists and has
+ * already inherited programs. This would create confusion on
+ * which parser/verdict program is running. If no psock exists
+ * create one. Inside sk_callback_lock to ensure concurrent create
+ * doesn't update user data.
+ */
+ if (psock) {
+ if (READ_ONCE(psock->bpf_parse) && parse) {
+ err = -EBUSY;
+ goto out_progs;
+ }
+ psock->refcnt++;
+ } else {
psock = smap_init_psock(sock, stab);
if (IS_ERR(psock)) {
- if (verdict)
- bpf_prog_put(verdict);
- if (parse)
- bpf_prog_put(parse);
- return PTR_ERR(psock);
+ err = PTR_ERR(psock);
+ goto out_progs;
}
- psock->key = i;
- psock->stab = stab;
- refcount_inc(&stab->refcnt);
+
set_bit(SMAP_TX_RUNNING, &psock->state);
}
- if (map_flags & BPF_SOCKMAP_STRPARSER) {
- write_lock_bh(&sock->sk_callback_lock);
- if (psock->strp_enabled)
- goto start_done;
+ e = kzalloc(sizeof(*e), GFP_ATOMIC | __GFP_NOWARN);
+ if (!e) {
+ err = -ENOMEM;
+ goto out_progs;
+ }
+ e->entry = &stab->sock_map[i];
+
+ /* 3. At this point we have a reference to a valid psock that is
+ * running. Attach any BPF programs needed.
+ */
+ if (parse && verdict && !psock->strp_enabled) {
err = smap_init_sock(psock, sock);
if (err)
- goto out;
+ goto out_free;
smap_init_progs(psock, stab, verdict, parse);
smap_start_sock(psock, sock);
-start_done:
- write_unlock_bh(&sock->sk_callback_lock);
- } else if (update) {
- smap_stop_sock(psock, sock);
}
- if (!update) {
- old_sock = xchg(&stab->sock_map[i], skops->sk);
- if (old_sock)
- smap_release_sock(old_sock);
- }
+ /* 4. Place psock in sockmap for use and stop any programs on
+ * the old sock assuming its not the same sock we are replacing
+ * it with. Because we can only have a single set of programs if
+ * old_sock has a strp we can stop it.
+ */
+ list_add_tail(&e->list, &psock->maps);
+ write_unlock_bh(&sock->sk_callback_lock);
+ osock = xchg(&stab->sock_map[i], sock);
+ if (osock) {
+ struct smap_psock *opsock = smap_psock_sk(osock);
+
+ write_lock_bh(&osock->sk_callback_lock);
+ if (osock != sock && parse)
+ smap_stop_sock(opsock, osock);
+ smap_list_remove(opsock, &stab->sock_map[i]);
+ smap_release_sock(opsock, osock);
+ write_unlock_bh(&osock->sk_callback_lock);
+ }
return 0;
-out:
+out_free:
+ smap_release_sock(psock, sock);
+out_progs:
+ if (verdict)
+ bpf_prog_put(verdict);
+ if (parse)
+ bpf_prog_put(parse);
write_unlock_bh(&sock->sk_callback_lock);
- if (!update)
- smap_release_sock(sock);
+ kfree(e);
return err;
}
@@ -768,8 +831,7 @@ static int sock_map_update_elem(struct bpf_map *map,
return -EINVAL;
}
- err = sock_map_ctx_update_elem(&skops, map, key,
- flags, BPF_SOCKMAP_STRPARSER);
+ err = sock_map_ctx_update_elem(&skops, map, key, flags);
fput(socket->file);
return err;
}
@@ -783,11 +845,11 @@ static int sock_map_update_elem(struct bpf_map *map,
.map_delete_elem = sock_map_delete_elem,
};
-BPF_CALL_5(bpf_sock_map_update, struct bpf_sock_ops_kern *, bpf_sock,
- struct bpf_map *, map, void *, key, u64, flags, u64, map_flags)
+BPF_CALL_4(bpf_sock_map_update, struct bpf_sock_ops_kern *, bpf_sock,
+ struct bpf_map *, map, void *, key, u64, flags)
{
WARN_ON_ONCE(!rcu_read_lock_held());
- return sock_map_ctx_update_elem(bpf_sock, map, key, flags, map_flags);
+ return sock_map_ctx_update_elem(bpf_sock, map, key, flags);
}
const struct bpf_func_proto bpf_sock_map_update_proto = {
@@ -799,5 +861,4 @@ static int sock_map_update_elem(struct bpf_map *map,
.arg2_type = ARG_CONST_MAP_PTR,
.arg3_type = ARG_PTR_TO_MAP_KEY,
.arg4_type = ARG_ANYTHING,
- .arg5_type = ARG_ANYTHING,
};
^ permalink raw reply related
* [net-next PATCH 3/9] bpf: sockmap add missing rcu_read_(un)lock in smap_data_ready
From: John Fastabend @ 2017-08-28 14:10 UTC (permalink / raw)
To: ast, daniel, davem; +Cc: netdev, john.fastabend
In-Reply-To: <20170828140850.14143.83953.stgit@john-Precision-Tower-5810>
References to psock must be done inside RCU critical section.
Fixes: 174a79ff9515 ("bpf: sockmap with sk redirect support")
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
---
kernel/bpf/sockmap.c | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index a6882e5..266011c8 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -227,11 +227,14 @@ static void smap_data_ready(struct sock *sk)
{
struct smap_psock *psock;
- write_lock_bh(&sk->sk_callback_lock);
+ rcu_read_lock();
psock = smap_psock_sk(sk);
- if (likely(psock))
+ if (likely(psock)) {
+ write_lock_bh(&sk->sk_callback_lock);
strp_data_ready(&psock->strp);
- write_unlock_bh(&sk->sk_callback_lock);
+ write_unlock_bh(&sk->sk_callback_lock);
+ }
+ rcu_read_unlock();
}
static void smap_tx_work(struct work_struct *w)
^ permalink raw reply related
* [net-next PATCH 4/9] bpf: additional sockmap self tests
From: John Fastabend @ 2017-08-28 14:11 UTC (permalink / raw)
To: ast, daniel, davem; +Cc: netdev, john.fastabend
In-Reply-To: <20170828140850.14143.83953.stgit@john-Precision-Tower-5810>
Add some more sockmap tests to cover,
- forwarding to NULL entries
- more than two maps to test list ops
- forwarding to different map
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
---
tools/testing/selftests/bpf/sockmap_parse_prog.c | 6 +
tools/testing/selftests/bpf/sockmap_verdict_prog.c | 23 +++-
tools/testing/selftests/bpf/test_maps.c | 113 +++++++++++++-------
3 files changed, 96 insertions(+), 46 deletions(-)
diff --git a/tools/testing/selftests/bpf/sockmap_parse_prog.c b/tools/testing/selftests/bpf/sockmap_parse_prog.c
index 710f43f..fae3b96 100644
--- a/tools/testing/selftests/bpf/sockmap_parse_prog.c
+++ b/tools/testing/selftests/bpf/sockmap_parse_prog.c
@@ -19,16 +19,16 @@ int bpf_prog1(struct __sk_buff *skb)
void *data = (void *)(long) skb->data;
__u32 lport = skb->local_port;
__u32 rport = skb->remote_port;
- char *d = data;
+ __u8 *d = data;
- if (data + 8 > data_end)
+ if (data + 10 > data_end)
return skb->len;
/* This write/read is a bit pointless but tests the verifier and
* strparser handler for read/write pkt data and access into sk
* fields.
*/
- d[0] = 1;
+ d[7] = 1;
bpf_printk("parse: data[0] = (%u): local_port %i remote %i\n",
d[0], lport, bpf_ntohl(rport));
diff --git a/tools/testing/selftests/bpf/sockmap_verdict_prog.c b/tools/testing/selftests/bpf/sockmap_verdict_prog.c
index 0573c1d..dada207 100644
--- a/tools/testing/selftests/bpf/sockmap_verdict_prog.c
+++ b/tools/testing/selftests/bpf/sockmap_verdict_prog.c
@@ -12,7 +12,14 @@
##__VA_ARGS__); \
})
-struct bpf_map_def SEC("maps") sock_map = {
+struct bpf_map_def SEC("maps") sock_map_rx = {
+ .type = BPF_MAP_TYPE_SOCKMAP,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 20,
+};
+
+struct bpf_map_def SEC("maps") sock_map_tx = {
.type = BPF_MAP_TYPE_SOCKMAP,
.key_size = sizeof(int),
.value_size = sizeof(int),
@@ -26,11 +33,15 @@ int bpf_prog2(struct __sk_buff *skb)
void *data = (void *)(long) skb->data;
__u32 lport = skb->local_port;
__u32 rport = skb->remote_port;
- char *d = data;
+ __u8 *d = data;
+ __u8 sk, map;
if (data + 8 > data_end)
return SK_DROP;
+ map = d[0];
+ sk = d[1];
+
d[0] = 0xd;
d[1] = 0xe;
d[2] = 0xa;
@@ -40,9 +51,11 @@ int bpf_prog2(struct __sk_buff *skb)
d[6] = 0xe;
d[7] = 0xf;
- bpf_printk("verdict: data[0] = (%u): local_port %i remote %i redirect 5\n",
- d[0], lport, bpf_ntohl(rport));
- return bpf_sk_redirect_map(&sock_map, 5, 0);
+ bpf_printk("verdict: data[0] = redir(%u:%u)\n", map, sk);
+
+ if (!map)
+ return bpf_sk_redirect_map(&sock_map_rx, sk, 0);
+ return bpf_sk_redirect_map(&sock_map_tx, sk, 0);
}
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 6df6e62..0a7f457 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -465,10 +465,10 @@ static void test_sockmap(int task, void *data)
{
int ports[] = {50200, 50201, 50202, 50204};
int err, i, fd, sfd[6] = {0xdeadbeef};
- char buf[] = "hello sockmap user\n";
- int one = 1, map_fd, s, sc, rc;
+ u8 buf[20] = {0x0, 0x5, 0x3, 0x2, 0x1, 0x0};
+ int one = 1, map_fd_rx, map_fd_tx, s, sc, rc;
int parse_prog, verdict_prog;
- struct bpf_map *bpf_map;
+ struct bpf_map *bpf_map_rx, *bpf_map_tx;
struct sockaddr_in addr;
struct bpf_object *obj;
struct timeval to;
@@ -585,26 +585,38 @@ static void test_sockmap(int task, void *data)
goto out_sockmap;
}
- bpf_map = bpf_object__find_map_by_name(obj, "sock_map");
- if (IS_ERR(bpf_map)) {
- printf("Failed to load map from verdict prog\n");
+ bpf_map_rx = bpf_object__find_map_by_name(obj, "sock_map_rx");
+ if (IS_ERR(bpf_map_rx)) {
+ printf("Failed to load map rx from verdict prog\n");
goto out_sockmap;
}
- map_fd = bpf_map__fd(bpf_map);
- if (map_fd < 0) {
+ map_fd_rx = bpf_map__fd(bpf_map_rx);
+ if (map_fd_rx < 0) {
printf("Failed to get map fd\n");
goto out_sockmap;
}
- err = bpf_prog_attach(parse_prog, map_fd,
+ bpf_map_tx = bpf_object__find_map_by_name(obj, "sock_map_tx");
+ if (IS_ERR(bpf_map_tx)) {
+ printf("Failed to load map tx from verdict prog\n");
+ goto out_sockmap;
+ }
+
+ map_fd_tx = bpf_map__fd(bpf_map_tx);
+ if (map_fd_tx < 0) {
+ printf("Failed to get map tx fd\n");
+ goto out_sockmap;
+ }
+
+ err = bpf_prog_attach(parse_prog, map_fd_rx,
BPF_SK_SKB_STREAM_PARSER, 0);
if (err) {
printf("Failed bpf prog attach\n");
goto out_sockmap;
}
- err = bpf_prog_attach(verdict_prog, map_fd,
+ err = bpf_prog_attach(verdict_prog, map_fd_rx,
BPF_SK_SKB_STREAM_VERDICT, 0);
if (err) {
printf("Failed bpf prog attach\n");
@@ -613,9 +625,15 @@ static void test_sockmap(int task, void *data)
/* Test map update elem afterwards fd lives in fd and map_fd */
for (i = 0; i < 6; i++) {
- err = bpf_map_update_elem(map_fd, &i, &sfd[i], BPF_ANY);
+ err = bpf_map_update_elem(map_fd_rx, &i, &sfd[i], BPF_ANY);
+ if (err) {
+ printf("Failed map_fd_rx update sockmap %i '%i:%i'\n",
+ err, i, sfd[i]);
+ goto out_sockmap;
+ }
+ err = bpf_map_update_elem(map_fd_tx, &i, &sfd[i], BPF_ANY);
if (err) {
- printf("Failed map_fd update sockmap %i '%i:%i'\n",
+ printf("Failed map_fd_tx update sockmap %i '%i:%i'\n",
err, i, sfd[i]);
goto out_sockmap;
}
@@ -623,42 +641,61 @@ static void test_sockmap(int task, void *data)
/* Test map delete elem and remove send/recv sockets */
for (i = 2; i < 4; i++) {
- err = bpf_map_delete_elem(map_fd, &i);
+ err = bpf_map_delete_elem(map_fd_rx, &i);
+ if (err) {
+ printf("Failed delete sockmap rx %i '%i:%i'\n",
+ err, i, sfd[i]);
+ goto out_sockmap;
+ }
+ err = bpf_map_delete_elem(map_fd_tx, &i);
if (err) {
- printf("Failed delete sockmap %i '%i:%i'\n",
+ printf("Failed delete sockmap tx %i '%i:%i'\n",
err, i, sfd[i]);
goto out_sockmap;
}
}
/* Test map send/recv */
- sc = send(sfd[2], buf, 10, 0);
- if (sc < 0) {
- printf("Failed sockmap send\n");
- goto out_sockmap;
- }
+ for (i = 0; i < 2; i++) {
+ buf[0] = i;
+ buf[1] = 0x5;
+ sc = send(sfd[2], buf, 20, 0);
+ if (sc < 0) {
+ printf("Failed sockmap send\n");
+ goto out_sockmap;
+ }
- FD_ZERO(&w);
- FD_SET(sfd[3], &w);
- to.tv_sec = 1;
- to.tv_usec = 0;
- s = select(sfd[3] + 1, &w, NULL, NULL, &to);
- if (s == -1) {
- perror("Failed sockmap select()");
- goto out_sockmap;
- } else if (!s) {
- printf("Failed sockmap unexpected timeout\n");
- goto out_sockmap;
- }
+ FD_ZERO(&w);
+ FD_SET(sfd[3], &w);
+ to.tv_sec = 1;
+ to.tv_usec = 0;
+ s = select(sfd[3] + 1, &w, NULL, NULL, &to);
+ if (s == -1) {
+ perror("Failed sockmap select()");
+ goto out_sockmap;
+ } else if (!s) {
+ printf("Failed sockmap unexpected timeout\n");
+ goto out_sockmap;
+ }
- if (!FD_ISSET(sfd[3], &w)) {
- printf("Failed sockmap select/recv\n");
- goto out_sockmap;
+ if (!FD_ISSET(sfd[3], &w)) {
+ printf("Failed sockmap select/recv\n");
+ goto out_sockmap;
+ }
+
+ rc = recv(sfd[3], buf, sizeof(buf), 0);
+ if (rc < 0) {
+ printf("Failed sockmap recv\n");
+ goto out_sockmap;
+ }
}
- rc = recv(sfd[3], buf, sizeof(buf), 0);
- if (rc < 0) {
- printf("Failed sockmap recv\n");
+ /* Negative null entry lookup from datapath should be dropped */
+ buf[0] = 1;
+ buf[1] = 12;
+ sc = send(sfd[2], buf, 20, 0);
+ if (sc < 0) {
+ printf("Failed sockmap send\n");
goto out_sockmap;
}
@@ -730,7 +767,7 @@ static void test_sockmap(int task, void *data)
for (i = 0; i < 6; i++)
close(sfd[i]);
close(fd);
- close(map_fd);
+ close(map_fd_rx);
bpf_object__close(obj);
return;
out:
^ permalink raw reply related
* [net-next PATCH 5/9] bpf: more SK_SKB selftests
From: John Fastabend @ 2017-08-28 14:11 UTC (permalink / raw)
To: ast, daniel, davem; +Cc: netdev, john.fastabend
In-Reply-To: <20170828140850.14143.83953.stgit@john-Precision-Tower-5810>
Tests packet read/writes and additional skb fields.
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
---
tools/testing/selftests/bpf/test_verifier.c | 98 +++++++++++++++++++++++++++
1 file changed, 98 insertions(+)
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 353d170..8eb0995 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -1119,6 +1119,104 @@ struct test_val {
.prog_type = BPF_PROG_TYPE_SK_SKB,
},
{
+ "invalid access of tc_classid for SK_SKB",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, tc_classid)),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_SKB,
+ .errstr = "invalid bpf_context access",
+ },
+ {
+ "check skb->mark is writeable by SK_SKB",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, mark)),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SK_SKB,
+ },
+ {
+ "check skb->tc_index is writeable by SK_SKB",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, tc_index)),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SK_SKB,
+ },
+ {
+ "check skb->priority is writeable by SK_SKB",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, priority)),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SK_SKB,
+ },
+ {
+ "direct packet read for SK_SKB",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SK_SKB,
+ },
+ {
+ "direct packet write for SK_SKB",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+ BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SK_SKB,
+ },
+ {
+ "overlapping checks for direct packet access SK_SKB",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 4),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_2, 6),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SK_SKB,
+ },
+ {
"check skb->mark is not writeable by sockets",
.insns = {
BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
^ permalink raw reply related
* [net-next PATCH 6/9] bpf: harden sockmap program attach to ensure correct map type
From: John Fastabend @ 2017-08-28 14:11 UTC (permalink / raw)
To: ast, daniel, davem; +Cc: netdev, john.fastabend
In-Reply-To: <20170828140850.14143.83953.stgit@john-Precision-Tower-5810>
When attaching a program to sockmap we need to check map type
is correct.
Fixes: 174a79ff9515 ("bpf: sockmap with sk redirect support")
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
---
kernel/bpf/sockmap.c | 3 ++
tools/testing/selftests/bpf/sockmap_verdict_prog.c | 7 +++++
tools/testing/selftests/bpf/test_maps.c | 27 +++++++++++++++++---
3 files changed, 33 insertions(+), 4 deletions(-)
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index 266011c8..38bf4e4 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -794,6 +794,9 @@ int sock_map_attach_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type)
struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
struct bpf_prog *orig;
+ if (unlikely(map->map_type != BPF_MAP_TYPE_SOCKMAP))
+ return -EINVAL;
+
switch (type) {
case BPF_SK_SKB_STREAM_PARSER:
orig = xchg(&stab->bpf_parse, prog);
diff --git a/tools/testing/selftests/bpf/sockmap_verdict_prog.c b/tools/testing/selftests/bpf/sockmap_verdict_prog.c
index dada207..9b99bd1 100644
--- a/tools/testing/selftests/bpf/sockmap_verdict_prog.c
+++ b/tools/testing/selftests/bpf/sockmap_verdict_prog.c
@@ -26,6 +26,13 @@ struct bpf_map_def SEC("maps") sock_map_tx = {
.max_entries = 20,
};
+struct bpf_map_def SEC("maps") sock_map_break = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 20,
+};
+
SEC("sk_skb2")
int bpf_prog2(struct __sk_buff *skb)
{
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 0a7f457..0c4b56d 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -463,12 +463,12 @@ static void test_devmap(int task, void *data)
#define SOCKMAP_VERDICT_PROG "./sockmap_verdict_prog.o"
static void test_sockmap(int task, void *data)
{
+ int one = 1, map_fd_rx, map_fd_tx, map_fd_break, s, sc, rc;
+ struct bpf_map *bpf_map_rx, *bpf_map_tx, *bpf_map_break;
int ports[] = {50200, 50201, 50202, 50204};
int err, i, fd, sfd[6] = {0xdeadbeef};
u8 buf[20] = {0x0, 0x5, 0x3, 0x2, 0x1, 0x0};
- int one = 1, map_fd_rx, map_fd_tx, s, sc, rc;
int parse_prog, verdict_prog;
- struct bpf_map *bpf_map_rx, *bpf_map_tx;
struct sockaddr_in addr;
struct bpf_object *obj;
struct timeval to;
@@ -609,17 +609,36 @@ static void test_sockmap(int task, void *data)
goto out_sockmap;
}
+ bpf_map_break = bpf_object__find_map_by_name(obj, "sock_map_break");
+ if (IS_ERR(bpf_map_break)) {
+ printf("Failed to load map tx from verdict prog\n");
+ goto out_sockmap;
+ }
+
+ map_fd_break = bpf_map__fd(bpf_map_break);
+ if (map_fd_break < 0) {
+ printf("Failed to get map tx fd\n");
+ goto out_sockmap;
+ }
+
+ err = bpf_prog_attach(parse_prog, map_fd_break,
+ BPF_SK_SKB_STREAM_PARSER, 0);
+ if (!err) {
+ printf("Allowed attaching SK_SKB program to invalid map\n");
+ goto out_sockmap;
+ }
+
err = bpf_prog_attach(parse_prog, map_fd_rx,
BPF_SK_SKB_STREAM_PARSER, 0);
if (err) {
- printf("Failed bpf prog attach\n");
+ printf("Failed stream parser bpf prog attach\n");
goto out_sockmap;
}
err = bpf_prog_attach(verdict_prog, map_fd_rx,
BPF_SK_SKB_STREAM_VERDICT, 0);
if (err) {
- printf("Failed bpf prog attach\n");
+ printf("Failed stream verdict bpf prog attach\n");
goto out_sockmap;
}
^ permalink raw reply related
* [net-next PATCH 7/9] bpf: sockmap indicate sock events to listeners
From: John Fastabend @ 2017-08-28 14:12 UTC (permalink / raw)
To: ast, daniel, davem; +Cc: netdev, john.fastabend
In-Reply-To: <20170828140850.14143.83953.stgit@john-Precision-Tower-5810>
After userspace pushes sockets into a sockmap it may not be receiving
data (assuming stream_{parser|verdict} programs are attached). But, it
may still want to manage the socks. A common pattern is to poll/select
for a POLLRDHUP event so we can close the sock.
This patch adds the logic to wake up these listeners.
Also add TCP_SYN_SENT to the list of events to handle. We don't want
to break the connection just because we happen to be in this state.
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
---
kernel/bpf/sockmap.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index 38bf4e4..bcc326a 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -162,6 +162,7 @@ static void smap_state_change(struct sock *sk)
{
struct smap_psock_map_entry *e, *tmp;
struct smap_psock *psock;
+ struct socket_wq *wq;
struct sock *osk;
rcu_read_lock();
@@ -171,6 +172,7 @@ static void smap_state_change(struct sock *sk)
* is established.
*/
switch (sk->sk_state) {
+ case TCP_SYN_SENT:
case TCP_SYN_RECV:
case TCP_ESTABLISHED:
break;
@@ -208,6 +210,10 @@ static void smap_state_change(struct sock *sk)
smap_report_sk_error(psock, EPIPE);
break;
}
+
+ wq = rcu_dereference(sk->sk_wq);
+ if (skwq_has_sleeper(wq))
+ wake_up_interruptible_all(&wq->wait);
rcu_read_unlock();
}
^ permalink raw reply related
* [net-next PATCH 8/9] bpf: sockmap requires STREAM_PARSER add Kconfig entry
From: John Fastabend @ 2017-08-28 14:12 UTC (permalink / raw)
To: ast, daniel, davem; +Cc: netdev, john.fastabend
In-Reply-To: <20170828140850.14143.83953.stgit@john-Precision-Tower-5810>
SOCKMAP uses strparser code (compiled with Kconfig option
CONFIG_STREAM_PARSER) to run the parser BPF program. Without this
config option set sockmap wont be compiled. However, at the moment
the only way to pull in the strparser code is to enable KCM.
To resolve this create a BPF specific config option to pull
only the strparser piece in that sockmap needs. This also
allows folks who want to use BPF/syscall/maps but don't need
sockmap to easily opt out.
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
---
net/Kconfig | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/net/Kconfig b/net/Kconfig
index 7d57ef3..17ca213 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -301,6 +301,18 @@ config BPF_JIT
/proc/sys/net/core/bpf_jit_harden (optional)
/proc/sys/net/core/bpf_jit_kallsyms (optional)
+config BPF_STREAM_PARSER
+ bool "enable BPF STREAM_PARSER"
+ depends on BPF_SYSCALL
+ select STREAM_PARSER
+ ---help---
+ Enabling this allows a stream parser to be used with
+ BPF_MAP_TYPE_SOCKMAP.
+
+ BPF_MAP_TYPE_SOCKMAP provides a map type to use with network sockets.
+ It can be used to enforce socket policy, implement socket redirects,
+ etc.
+
config NET_FLOW_LIMIT
bool
depends on RPS
^ permalink raw reply related
* [net-next PATCH 9/9] bpf: test_maps add sockmap stress test
From: John Fastabend @ 2017-08-28 14:12 UTC (permalink / raw)
To: ast, daniel, davem; +Cc: netdev, john.fastabend
In-Reply-To: <20170828140850.14143.83953.stgit@john-Precision-Tower-5810>
Sockmap is a bit different than normal stress tests that can run
in parallel as is. We need to reuse the same socket pool and map
pool to get good stress test cases.
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
---
tools/testing/selftests/bpf/test_maps.c | 29 ++++++++++++++++++++++++++++-
1 file changed, 28 insertions(+), 1 deletion(-)
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 0c4b56d..7059bb3 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -461,7 +461,7 @@ static void test_devmap(int task, void *data)
#include <linux/err.h>
#define SOCKMAP_PARSE_PROG "./sockmap_parse_prog.o"
#define SOCKMAP_VERDICT_PROG "./sockmap_verdict_prog.o"
-static void test_sockmap(int task, void *data)
+static void test_sockmap(int tasks, void *data)
{
int one = 1, map_fd_rx, map_fd_tx, map_fd_break, s, sc, rc;
struct bpf_map *bpf_map_rx, *bpf_map_tx, *bpf_map_break;
@@ -473,6 +473,7 @@ static void test_sockmap(int task, void *data)
struct bpf_object *obj;
struct timeval to;
__u32 key, value;
+ pid_t pid[tasks];
fd_set w;
/* Create some sockets to use with sockmap */
@@ -782,6 +783,32 @@ static void test_sockmap(int task, void *data)
}
}
+ /* Test tasks number of forked operations */
+ for (i = 0; i < tasks; i++) {
+ pid[i] = fork();
+ if (pid[i] == 0) {
+ for (i = 0; i < 6; i++) {
+ bpf_map_delete_elem(map_fd_tx, &i);
+ bpf_map_delete_elem(map_fd_rx, &i);
+ bpf_map_update_elem(map_fd_tx, &i,
+ &sfd[i], BPF_ANY);
+ bpf_map_update_elem(map_fd_rx, &i,
+ &sfd[i], BPF_ANY);
+ }
+ exit(0);
+ } else if (pid[i] == -1) {
+ printf("Couldn't spawn #%d process!\n", i);
+ exit(1);
+ }
+ }
+
+ for (i = 0; i < tasks; i++) {
+ int status;
+
+ assert(waitpid(pid[i], &status, 0) == pid[i]);
+ assert(status == 0);
+ }
+
/* Test map close sockets */
for (i = 0; i < 6; i++)
close(sfd[i]);
^ permalink raw reply related
* Re: [PATCH net-next] bridge: fdb add and delete tracepoints
From: Roopa Prabhu @ 2017-08-28 14:30 UTC (permalink / raw)
To: Florian Fainelli
Cc: Nikolay Aleksandrov, netdev@vger.kernel.org, bridge,
davem@davemloft.net, Andrew Lunn
In-Reply-To: <592631a3-6a80-407a-f087-37f7f04417d2@gmail.com>
On Sun, Aug 27, 2017 at 7:11 PM, Florian Fainelli <f.fainelli@gmail.com> wrote:
> On 08/27/2017 02:33 PM, Roopa Prabhu wrote:
>> From: Roopa Prabhu <roopa@cumulusnetworks.com>
>>
>> Tracepoints to trace bridge forwarding database updates.
>
> Thanks for adding this!
>
>>
>> Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
>> ---
>> include/trace/events/bridge.h | 98 +++++++++++++++++++++++++++++++++++++++++++
>> net/bridge/br_fdb.c | 7 ++++
>> net/core/net-traces.c | 6 +++
>> 3 files changed, 111 insertions(+)
>> create mode 100644 include/trace/events/bridge.h
>>
>> diff --git a/include/trace/events/bridge.h b/include/trace/events/bridge.h
>> new file mode 100644
>> index 0000000..e2d52cf
>> --- /dev/null
>> +++ b/include/trace/events/bridge.h
>> @@ -0,0 +1,98 @@
>> +#undef TRACE_SYSTEM
>> +#define TRACE_SYSTEM bridge
>> +
>> +#if !defined(_TRACE_BRIDGE_H) || defined(TRACE_HEADER_MULTI_READ)
>> +#define _TRACE_BRIDGE_H
>> +
>> +#include <linux/netdevice.h>
>> +#include <linux/tracepoint.h>
>> +
>> +#include "../../../net/bridge/br_private.h"
>> +
>> +TRACE_EVENT(br_fdb_add,
>> +
>> + TP_PROTO(struct ndmsg *ndm, struct net_device *dev,
>> + const unsigned char *addr, u16 vid, u16 nlh_flags),
>> +
>> + TP_ARGS(ndm, dev, addr, vid, nlh_flags),
>> +
>> + TP_STRUCT__entry(
>> + __field(u8, ndm_flags)
>> + __string(dev, dev->name)
>> + __array(unsigned char, addr, 6)
>
> Can you use ETH_ALEN instead of 6 here?
>
>> + __field(u16, vid)
>> + __field(u16, nlh_flags)
>> + ),
>> +
>> + TP_fast_assign(
>> + __assign_str(dev, dev->name);
>> + memcpy(__entry->addr, addr, 6);
>
> Likewise
will do.
>
>> + __entry->vid = vid;
>> + __entry->nlh_flags = nlh_flags;
>> + __entry->ndm_flags = ndm->ndm_flags;
>> + ),
>> +
>> + TP_printk("dev %s addr %02x:%02x:%02x:%02x:%02x:%02x vid %u nlh_flags %x ndm_flags = %x",
>
> I wonder if we could make %pM work for TP_printk() as this would
> simplify the argument list a bitt.
yeah i struggled with getting %pM to work here.
> Can you use %04x for vid, nlh_flags
> and %02x for ndm_flags?
will do,
>
>> + __get_str(dev), __entry->addr[0], __entry->addr[1],
>> + __entry->addr[2], __entry->addr[3], __entry->addr[4],
>> + __entry->addr[5], __entry->vid,
>> + __entry->nlh_flags, __entry->ndm_flags)
>> +);
>> +
>> +TRACE_EVENT(br_fdb_external_learn_add,
>> +
>> + TP_PROTO(struct net_bridge *br, struct net_bridge_port *p,
>> + const unsigned char *addr, u16 vid),
>> +
>> + TP_ARGS(br, p, addr, vid),
>> +
>> + TP_STRUCT__entry(
>> + __string(br_dev, br->dev->name)
>> + __string(dev, p->dev->name)
>> + __array(unsigned char, addr, 6)
>> + __field(u16, vid)
>> + ),
>> +
>> + TP_fast_assign(
>> + __assign_str(br_dev, br ? br->dev->name : "null");
>> + __assign_str(dev, p ? p->dev->name : "null");
>> + memcpy(__entry->addr, addr, 6);
>> + __entry->vid = vid;
>> + ),
>> +
>> + TP_printk("br_dev %s port %s addr %02x:%02x:%02x:%02x:%02x:%02x vid %u",
>> + __get_str(br_dev), __get_str(dev), __entry->addr[0],
>> + __entry->addr[1], __entry->addr[2], __entry->addr[3],
>> + __entry->addr[4], __entry->addr[5], __entry->vid)
>> +);
>> +
>> +TRACE_EVENT(fdb_delete,
>> +
>> + TP_PROTO(struct net_bridge *br, struct net_bridge_fdb_entry *f),
>> +
>> + TP_ARGS(br, f),
>> +
>> + TP_STRUCT__entry(
>> + __string(br_dev, br->dev->name)
>> + __string(dev, f->dst ? f->dst->dev->name : "null")
>> + __array(unsigned char, addr, 6)
>
> Same here, using ETH_ALEN would be clearer.
>
ack, thanks for the review.
^ permalink raw reply
* Re: [PATCH] DSA support for Micrel KSZ8895
From: Maxim Uvarov @ 2017-08-28 14:47 UTC (permalink / raw)
To: Andrew Lunn
Cc: Pavel Machek, Woojung.Huh, nathan.leigh.conrad, Vivien Didelot,
Florian Fainelli, netdev, linux-kernel, Tristram.Ha
In-Reply-To: <20170828140927.GD10418@lunn.ch>
Micrel has some drivers on their web site to support some chips. For
that chips they do virtual mdio over spi.
And driver is available on download page:
http://www.microchip.com/wwwproducts/en/KSZ8895
Documentation->Software library.
Both driver and DSA driver. Driver has to work with some minor fixups
related to your kernel version. But I think they are don't care about
up-streaming that code.
So you can take their code as a reference.
2017-08-28 17:09 GMT+03:00 Andrew Lunn <andrew@lunn.ch>:
>> I may be confused here, but AFAICT:
>>
>> 1) Yes, it has standard layout when accessed over MDIO.
>
>
> Section 4.8 of the datasheet says:
>
> All the registers defined in this section can be also accessed
> via the SPI interface.
>
> Meaning all PHY registers can be access via the SPI interface. So you
> should be able to make a standard Linux MDIO bus driver which performs
> SPI reads.
>
> Andrew
Micrel has some drivers on their web site to support some chips. For
that chips they do virtual mdio over spi.
And driver is available on download page:
http://www.microchip.com/wwwproducts/en/KSZ8895
Documentation->Software library.
Both driver and DSA driver. Driver has to work with some minor fixups
related to your kernel version. But I think they are don't care about
up-streaming that code.
So you can take their code as a reference.
--
Best regards,
Maxim Uvarov
^ permalink raw reply
* IPv6 loopback issue report
From: Tariq Toukan @ 2017-08-28 14:48 UTC (permalink / raw)
To: Linux Kernel Network Developers, David Miller, Alexey Kuznetsov,
Hideaki YOSHIFUJI
Cc: ranro, guye, Eran Ben Elisha
Hi all,
We encountered the following issue in our regression tests over net-next
branch.
IPv6 loopback ping fails, while it works for IPv4.
Reproduces with all NICs, doesn't seem to be a driver issue.
Example:
# ifconfig ens8
ens8: flags=4163<UP,BROADCAST,RUNNING,MULTICAST> mtu 1500
inet 11.141.16.6 netmask 255.255.0.0 broadcast 11.141.255.255
inet6 fe80::7efe:90ff:fecb:7502 prefixlen 64 scopeid 0x20<link>
ether 7c:fe:90:cb:75:02 txqueuelen 1000 (Ethernet)
RX packets 12 bytes 1164 (1.1 KiB)
RX errors 0 dropped 0 overruns 0 frame 0
TX packets 30 bytes 2484 (2.4 KiB)
TX errors 0 dropped 0 overruns 0 carrier 0 collisions 0
# ping -c 3 11.141.16.6
PING 11.141.16.6 (11.141.16.6) 56(84) bytes of data.
64 bytes from 11.141.16.6: icmp_seq=1 ttl=64 time=0.017 ms
64 bytes from 11.141.16.6: icmp_seq=2 ttl=64 time=0.014 ms
64 bytes from 11.141.16.6: icmp_seq=3 ttl=64 time=0.014 ms
--- 11.141.16.6 ping statistics ---
3 packets transmitted, 3 received, 0% packet loss, time 2025ms
rtt min/avg/max/mdev = 0.014/0.015/0.017/0.001 ms
# /bin/ping6 -c 3 fe80::7efe:90ff:fecb:7502%ens8
PING fe80::7efe:90ff:fecb:7502%ens8(fe80::7efe:90ff:fecb:7502) 56 data bytes
--- fe80::7efe:90ff:fecb:7502%ens8 ping statistics ---
3 packets transmitted, 0 received, 100% packet loss, time 2043ms
Regards,
Tariq Toukan
^ permalink raw reply
* [PATCH net-next v3 00/13] net: mvpp2: comphy configuration
From: Antoine Tenart @ 2017-08-28 14:57 UTC (permalink / raw)
To: davem, kishon, andrew, jason, sebastian.hesselbarth,
gregory.clement
Cc: Antoine Tenart, thomas.petazzoni, nadavh, linux, linux-kernel, mw,
stefanc, miquel.raynal, netdev
Hi all,
This series, following up the one one the GoP/MAC configuration, aims at
stopping to depend on the firmware/bootloader configuration when using
the PPv2 engine. With this series the PPv2 driver does not need to rely
on a previous configuration, and dynamic reconfiguration while the
kernel is running can be done (i.e. switch one port from SGMII to 10G,
or the opposite). A port can now be configured in a different mode than
what's done in the firmware/bootloader as well.
The series first contain patches in the generic PHY framework to support
what is called the comphy (common PHYs), which is an h/w block providing
PHYs that can be configured in various modes ranging from SGMII, 10G
to SATA and others. As of now only the SGMII and 10G modes are
supported by the comphy driver.
Then patches are modifying the PPv2 driver to first add the comphy
initialization sequence (i.e. calls to the generic PHY framework) and to
then take advantage of this to allow dynamic reconfiguration (i.e.
configuring the mode of a port given what's connected, between sgmii and
10G). Note the use of the comphy in the PPv2 driver is kept optional
(i.e. if not described in dt the driver still as before an relies on the
firmware/bootloader configuration).
Finally there are dt/defconfig patches to describe and take advantage of
this.
This was tested on a range of devices: 8040-db, 8040-mcbin and 7040-db.
Thanks!
Antoine
Since v2:
- Kept the link mode enforcement.
- Removed the netif_running() check.
- Reworded the "dynamic reconfiguration of the PHY mode" commit log.
- Added one patch not to force the GMAC autoneg parameters when using
the XLG MAC.
Since v1:
- Updated the mode settings variable name in the comphy driver to
have 'cp110' in it.
- Documented the PHY cell argument in the dt documentation.
- New patch adding comphy phandles for the 7040-db board.
- Checked if the carrier_on/off functions were needed. They are.
- s/PHY/generic PHY/ in commit log of patch 1.
- Rebased on the latest net-next/master.
Antoine Tenart (12):
phy: add sgmii and 10gkr modes to the phy_mode enum
phy: add the mvebu cp110 comphy driver
Documentation/bindings: phy: document the Marvell comphy driver
net: mvpp2: initialize the comphy
net: mvpp2: simplify the link_event function
net: mvpp2: improve the link management function
net: mvpp2: do not set GMAC autoneg when using XLG MAC
net: mvpp2: dynamic reconfiguration of the comphy/GoP/MAC
arm64: dts: marvell: extend the cp110 syscon register area length
arm64: dts: marvell: add comphy nodes on cp110 master and slave
arm64: dts: marvell: mcbin: add comphy references to Ethernet ports
arm64: dts: marvell: 7040-db: add comphy references to Ethernet ports
Miquel Raynal (1):
arm64: defconfig: enable Marvell CP110 comphy
.../devicetree/bindings/phy/phy-mvebu-comphy.txt | 43 ++
arch/arm64/boot/dts/marvell/armada-7040-db.dts | 1 +
arch/arm64/boot/dts/marvell/armada-8040-mcbin.dts | 3 +
.../boot/dts/marvell/armada-cp110-master.dtsi | 40 +-
.../arm64/boot/dts/marvell/armada-cp110-slave.dtsi | 40 +-
arch/arm64/configs/defconfig | 1 +
drivers/net/ethernet/marvell/mvpp2.c | 153 +++--
drivers/phy/marvell/Kconfig | 10 +
drivers/phy/marvell/Makefile | 1 +
drivers/phy/marvell/phy-mvebu-cp110-comphy.c | 656 +++++++++++++++++++++
include/linux/phy/phy.h | 2 +
11 files changed, 915 insertions(+), 35 deletions(-)
create mode 100644 Documentation/devicetree/bindings/phy/phy-mvebu-comphy.txt
create mode 100644 drivers/phy/marvell/phy-mvebu-cp110-comphy.c
--
2.13.5
^ permalink raw reply
* [PATCH net-next v3 01/13] phy: add sgmii and 10gkr modes to the phy_mode enum
From: Antoine Tenart @ 2017-08-28 14:57 UTC (permalink / raw)
To: davem, kishon, andrew, jason, sebastian.hesselbarth,
gregory.clement
Cc: Antoine Tenart, thomas.petazzoni, nadavh, linux, linux-kernel, mw,
stefanc, miquel.raynal, netdev
In-Reply-To: <20170828145725.2539-1-antoine.tenart@free-electrons.com>
This patch adds more generic PHY modes to the phy_mode enum, to
allow configuring generic PHYs to the SGMII and/or the 10GKR mode
by using the set_mode callback.
Signed-off-by: Antoine Tenart <antoine.tenart@free-electrons.com>
---
include/linux/phy/phy.h | 2 ++
1 file changed, 2 insertions(+)
diff --git a/include/linux/phy/phy.h b/include/linux/phy/phy.h
index 78bb0d7f6b11..e694d4008c4a 100644
--- a/include/linux/phy/phy.h
+++ b/include/linux/phy/phy.h
@@ -27,6 +27,8 @@ enum phy_mode {
PHY_MODE_USB_HOST,
PHY_MODE_USB_DEVICE,
PHY_MODE_USB_OTG,
+ PHY_MODE_SGMII,
+ PHY_MODE_10GKR,
};
/**
--
2.13.5
^ permalink raw reply related
* [PATCH net-next v3 02/13] phy: add the mvebu cp110 comphy driver
From: Antoine Tenart @ 2017-08-28 14:57 UTC (permalink / raw)
To: davem, kishon, andrew, jason, sebastian.hesselbarth,
gregory.clement
Cc: Antoine Tenart, thomas.petazzoni, nadavh, linux, linux-kernel, mw,
stefanc, miquel.raynal, netdev
In-Reply-To: <20170828145725.2539-1-antoine.tenart@free-electrons.com>
On the CP110 unit, which can be found on various Marvell platforms such
as the 7k and 8k (currently), a comphy (common PHYs) hardware block can
be found. This block provides a number of PHYs which can be used in
various modes by other controllers (network, SATA ...). These common
PHYs must be configured for the controllers using them to work correctly
either at boot time, or when the system runs to switch the mode used.
This patch adds a driver for this comphy hardware block, providing
callbacks for the its PHYs so that consumers can configure the modes
used.
As of this commit, two modes are supported by the comphy driver: sgmii
and 10gkr.
Signed-off-by: Antoine Tenart <antoine.tenart@free-electrons.com>
---
drivers/phy/marvell/Kconfig | 10 +
drivers/phy/marvell/Makefile | 1 +
drivers/phy/marvell/phy-mvebu-cp110-comphy.c | 656 +++++++++++++++++++++++++++
3 files changed, 667 insertions(+)
create mode 100644 drivers/phy/marvell/phy-mvebu-cp110-comphy.c
diff --git a/drivers/phy/marvell/Kconfig b/drivers/phy/marvell/Kconfig
index 048d8893bc2e..26755f3d1a9a 100644
--- a/drivers/phy/marvell/Kconfig
+++ b/drivers/phy/marvell/Kconfig
@@ -21,6 +21,16 @@ config PHY_BERLIN_USB
help
Enable this to support the USB PHY on Marvell Berlin SoCs.
+config PHY_MVEBU_CP110_COMPHY
+ tristate "Marvell CP110 comphy driver"
+ depends on ARCH_MVEBU && OF
+ select GENERIC_PHY
+ help
+ This driver allows to control the comphy, an hardware block providing
+ shared serdes PHYs on Marvell Armada 7k/8k (in the CP110). Its serdes
+ lanes can be used by various controllers (Ethernet, sata, usb,
+ PCIe...).
+
config PHY_MVEBU_SATA
def_bool y
depends on ARCH_DOVE || MACH_DOVE || MACH_KIRKWOOD
diff --git a/drivers/phy/marvell/Makefile b/drivers/phy/marvell/Makefile
index 3fc188f59118..0cf6a7cbaf9f 100644
--- a/drivers/phy/marvell/Makefile
+++ b/drivers/phy/marvell/Makefile
@@ -1,6 +1,7 @@
obj-$(CONFIG_ARMADA375_USBCLUSTER_PHY) += phy-armada375-usb2.o
obj-$(CONFIG_PHY_BERLIN_SATA) += phy-berlin-sata.o
obj-$(CONFIG_PHY_BERLIN_USB) += phy-berlin-usb.o
+obj-$(CONFIG_PHY_MVEBU_CP110_COMPHY) += phy-mvebu-cp110-comphy.o
obj-$(CONFIG_PHY_MVEBU_SATA) += phy-mvebu-sata.o
obj-$(CONFIG_PHY_PXA_28NM_HSIC) += phy-pxa-28nm-hsic.o
obj-$(CONFIG_PHY_PXA_28NM_USB2) += phy-pxa-28nm-usb2.o
diff --git a/drivers/phy/marvell/phy-mvebu-cp110-comphy.c b/drivers/phy/marvell/phy-mvebu-cp110-comphy.c
new file mode 100644
index 000000000000..41556e790856
--- /dev/null
+++ b/drivers/phy/marvell/phy-mvebu-cp110-comphy.c
@@ -0,0 +1,656 @@
+/*
+ * Copyright (C) 2017 Marvell
+ *
+ * Antoine Tenart <antoine.tenart@free-electrons.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/mfd/syscon.h>
+#include <linux/module.h>
+#include <linux/phy/phy.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+
+/* Relative to priv->base */
+#define MVEBU_COMPHY_SERDES_CFG0(n) (0x0 + (n) * 0x1000)
+#define MVEBU_COMPHY_SERDES_CFG0_PU_PLL BIT(1)
+#define MVEBU_COMPHY_SERDES_CFG0_GEN_RX(n) ((n) << 3)
+#define MVEBU_COMPHY_SERDES_CFG0_GEN_TX(n) ((n) << 7)
+#define MVEBU_COMPHY_SERDES_CFG0_PU_RX BIT(11)
+#define MVEBU_COMPHY_SERDES_CFG0_PU_TX BIT(12)
+#define MVEBU_COMPHY_SERDES_CFG0_HALF_BUS BIT(14)
+#define MVEBU_COMPHY_SERDES_CFG1(n) (0x4 + (n) * 0x1000)
+#define MVEBU_COMPHY_SERDES_CFG1_RESET BIT(3)
+#define MVEBU_COMPHY_SERDES_CFG1_RX_INIT BIT(4)
+#define MVEBU_COMPHY_SERDES_CFG1_CORE_RESET BIT(5)
+#define MVEBU_COMPHY_SERDES_CFG1_RF_RESET BIT(6)
+#define MVEBU_COMPHY_SERDES_CFG2(n) (0x8 + (n) * 0x1000)
+#define MVEBU_COMPHY_SERDES_CFG2_DFE_EN BIT(4)
+#define MVEBU_COMPHY_SERDES_STATUS0(n) (0x18 + (n) * 0x1000)
+#define MVEBU_COMPHY_SERDES_STATUS0_TX_PLL_RDY BIT(2)
+#define MVEBU_COMPHY_SERDES_STATUS0_RX_PLL_RDY BIT(3)
+#define MVEBU_COMPHY_SERDES_STATUS0_RX_INIT BIT(4)
+#define MVEBU_COMPHY_PWRPLL_CTRL(n) (0x804 + (n) * 0x1000)
+#define MVEBU_COMPHY_PWRPLL_CTRL_RFREQ(n) ((n) << 0)
+#define MVEBU_COMPHY_PWRPLL_PHY_MODE(n) ((n) << 5)
+#define MVEBU_COMPHY_IMP_CAL(n) (0x80c + (n) * 0x1000)
+#define MVEBU_COMPHY_IMP_CAL_TX_EXT(n) ((n) << 10)
+#define MVEBU_COMPHY_IMP_CAL_TX_EXT_EN BIT(15)
+#define MVEBU_COMPHY_DFE_RES(n) (0x81c + (n) * 0x1000)
+#define MVEBU_COMPHY_DFE_RES_FORCE_GEN_TBL BIT(15)
+#define MVEBU_COMPHY_COEF(n) (0x828 + (n) * 0x1000)
+#define MVEBU_COMPHY_COEF_DFE_EN BIT(14)
+#define MVEBU_COMPHY_COEF_DFE_CTRL BIT(15)
+#define MVEBU_COMPHY_GEN1_S0(n) (0x834 + (n) * 0x1000)
+#define MVEBU_COMPHY_GEN1_S0_TX_AMP(n) ((n) << 1)
+#define MVEBU_COMPHY_GEN1_S0_TX_EMPH(n) ((n) << 7)
+#define MVEBU_COMPHY_GEN1_S1(n) (0x838 + (n) * 0x1000)
+#define MVEBU_COMPHY_GEN1_S1_RX_MUL_PI(n) ((n) << 0)
+#define MVEBU_COMPHY_GEN1_S1_RX_MUL_PF(n) ((n) << 3)
+#define MVEBU_COMPHY_GEN1_S1_RX_MUL_FI(n) ((n) << 6)
+#define MVEBU_COMPHY_GEN1_S1_RX_MUL_FF(n) ((n) << 8)
+#define MVEBU_COMPHY_GEN1_S1_RX_DFE_EN BIT(10)
+#define MVEBU_COMPHY_GEN1_S1_RX_DIV(n) ((n) << 11)
+#define MVEBU_COMPHY_GEN1_S2(n) (0x8f4 + (n) * 0x1000)
+#define MVEBU_COMPHY_GEN1_S2_TX_EMPH(n) ((n) << 0)
+#define MVEBU_COMPHY_GEN1_S2_TX_EMPH_EN BIT(4)
+#define MVEBU_COMPHY_LOOPBACK(n) (0x88c + (n) * 0x1000)
+#define MVEBU_COMPHY_LOOPBACK_DBUS_WIDTH(n) ((n) << 1)
+#define MVEBU_COMPHY_VDD_CAL0(n) (0x908 + (n) * 0x1000)
+#define MVEBU_COMPHY_VDD_CAL0_CONT_MODE BIT(15)
+#define MVEBU_COMPHY_EXT_SELV(n) (0x914 + (n) * 0x1000)
+#define MVEBU_COMPHY_EXT_SELV_RX_SAMPL(n) ((n) << 5)
+#define MVEBU_COMPHY_MISC_CTRL0(n) (0x93c + (n) * 0x1000)
+#define MVEBU_COMPHY_MISC_CTRL0_ICP_FORCE BIT(5)
+#define MVEBU_COMPHY_MISC_CTRL0_REFCLK_SEL BIT(10)
+#define MVEBU_COMPHY_RX_CTRL1(n) (0x940 + (n) * 0x1000)
+#define MVEBU_COMPHY_RX_CTRL1_RXCLK2X_SEL BIT(11)
+#define MVEBU_COMPHY_RX_CTRL1_CLK8T_EN BIT(12)
+#define MVEBU_COMPHY_SPEED_DIV(n) (0x954 + (n) * 0x1000)
+#define MVEBU_COMPHY_SPEED_DIV_TX_FORCE BIT(7)
+#define MVEBU_SP_CALIB(n) (0x96c + (n) * 0x1000)
+#define MVEBU_SP_CALIB_SAMPLER(n) ((n) << 8)
+#define MVEBU_SP_CALIB_SAMPLER_EN BIT(12)
+#define MVEBU_COMPHY_TX_SLEW_RATE(n) (0x974 + (n) * 0x1000)
+#define MVEBU_COMPHY_TX_SLEW_RATE_EMPH(n) ((n) << 5)
+#define MVEBU_COMPHY_TX_SLEW_RATE_SLC(n) ((n) << 10)
+#define MVEBU_COMPHY_DLT_CTRL(n) (0x984 + (n) * 0x1000)
+#define MVEBU_COMPHY_DLT_CTRL_DTL_FLOOP_EN BIT(2)
+#define MVEBU_COMPHY_FRAME_DETECT0(n) (0xa14 + (n) * 0x1000)
+#define MVEBU_COMPHY_FRAME_DETECT0_PATN(n) ((n) << 7)
+#define MVEBU_COMPHY_FRAME_DETECT3(n) (0xa20 + (n) * 0x1000)
+#define MVEBU_COMPHY_FRAME_DETECT3_LOST_TIMEOUT_EN BIT(12)
+#define MVEBU_COMPHY_DME(n) (0xa28 + (n) * 0x1000)
+#define MVEBU_COMPHY_DME_ETH_MODE BIT(7)
+#define MVEBU_COMPHY_TRAINING0(n) (0xa68 + (n) * 0x1000)
+#define MVEBU_COMPHY_TRAINING0_P2P_HOLD BIT(15)
+#define MVEBU_COMPHY_TRAINING5(n) (0xaa4 + (n) * 0x1000)
+#define MVEBU_COMPHY_TRAINING5_RX_TIMER(n) ((n) << 0)
+#define MVEBU_COMPHY_TX_TRAIN_PRESET(n) (0xb1c + (n) * 0x1000)
+#define MVEBU_COMPHY_TX_TRAIN_PRESET_16B_AUTO_EN BIT(8)
+#define MVEBU_COMPHY_TX_TRAIN_PRESET_PRBS11 BIT(9)
+#define MVEBU_COMPHY_GEN1_S3(n) (0xc40 + (n) * 0x1000)
+#define MVEBU_COMPHY_GEN1_S3_FBCK_SEL BIT(9)
+#define MVEBU_COMPHY_GEN1_S4(n) (0xc44 + (n) * 0x1000)
+#define MVEBU_COMPHY_GEN1_S4_DFE_RES(n) ((n) << 8)
+#define MVEBU_COMPHY_TX_PRESET(n) (0xc68 + (n) * 0x1000)
+#define MVEBU_COMPHY_TX_PRESET_INDEX(n) ((n) << 0)
+#define MVEBU_COMPHY_GEN1_S5(n) (0xd38 + (n) * 0x1000)
+#define MVEBU_COMPHY_GEN1_S5_ICP(n) ((n) << 0)
+
+/* Relative to priv->regmap */
+#define MVEBU_COMPHY_CONF1(n) (0x1000 + (n) * 0x28)
+#define MVEBU_COMPHY_CONF1_PWRUP BIT(1)
+#define MVEBU_COMPHY_CONF1_USB_PCIE BIT(2) /* 0: Ethernet/SATA */
+#define MVEBU_COMPHY_CONF6(n) (0x1014 + (n) * 0x28)
+#define MVEBU_COMPHY_CONF6_40B BIT(18)
+#define MVEBU_COMPHY_SELECTOR 0x1140
+#define MVEBU_COMPHY_SELECTOR_PHY(n) ((n) * 0x4)
+
+#define MVEBU_COMPHY_LANES 6
+#define MVEBU_COMPHY_PORTS 3
+
+struct mvebu_comhy_conf {
+ enum phy_mode mode;
+ unsigned lane;
+ unsigned port;
+ u32 mux;
+};
+
+#define MVEBU_COMPHY_CONF(_lane, _port, _mode, _mux) \
+ { \
+ .lane = _lane, \
+ .port = _port, \
+ .mode = _mode, \
+ .mux = _mux, \
+ }
+
+static const struct mvebu_comhy_conf mvebu_comphy_cp110_modes[] = {
+ /* lane 0 */
+ MVEBU_COMPHY_CONF(0, 1, PHY_MODE_SGMII, 0x1),
+ /* lane 1 */
+ MVEBU_COMPHY_CONF(1, 2, PHY_MODE_SGMII, 0x1),
+ /* lane 2 */
+ MVEBU_COMPHY_CONF(2, 0, PHY_MODE_SGMII, 0x1),
+ MVEBU_COMPHY_CONF(2, 0, PHY_MODE_10GKR, 0x1),
+ /* lane 3 */
+ MVEBU_COMPHY_CONF(3, 1, PHY_MODE_SGMII, 0x2),
+ /* lane 4 */
+ MVEBU_COMPHY_CONF(4, 0, PHY_MODE_SGMII, 0x2),
+ MVEBU_COMPHY_CONF(4, 0, PHY_MODE_10GKR, 0x2),
+ MVEBU_COMPHY_CONF(4, 1, PHY_MODE_SGMII, 0x1),
+ /* lane 5 */
+ MVEBU_COMPHY_CONF(5, 2, PHY_MODE_SGMII, 0x1),
+};
+
+struct mvebu_comphy_priv {
+ void __iomem *base;
+ struct regmap *regmap;
+ struct device *dev;
+ struct phy *phys[MVEBU_COMPHY_LANES];
+ int modes[MVEBU_COMPHY_LANES];
+};
+
+struct mvebu_comphy_lane {
+ struct mvebu_comphy_priv *priv;
+ struct device_node *of_node;
+ unsigned id;
+ enum phy_mode mode;
+ int port;
+};
+
+static int mvebu_comphy_get_mux(int lane, int port, enum phy_mode mode)
+{
+ int i, n = ARRAY_SIZE(mvebu_comphy_cp110_modes);
+
+ /* Unused PHY mux value is 0x0 */
+ if (mode == PHY_MODE_INVALID)
+ return 0;
+
+ for (i = 0; i < n; i++) {
+ if (mvebu_comphy_cp110_modes[i].lane == lane &&
+ mvebu_comphy_cp110_modes[i].port == port &&
+ mvebu_comphy_cp110_modes[i].mode == mode)
+ break;
+ }
+
+ if (i == n)
+ return -EINVAL;
+
+ return mvebu_comphy_cp110_modes[i].mux;
+}
+
+static void mvebu_comphy_ethernet_init_reset(struct mvebu_comphy_lane *lane,
+ enum phy_mode mode)
+{
+ struct mvebu_comphy_priv *priv = lane->priv;
+ u32 val;
+
+ regmap_read(priv->regmap, MVEBU_COMPHY_CONF1(lane->id), &val);
+ val &= ~MVEBU_COMPHY_CONF1_USB_PCIE;
+ val |= MVEBU_COMPHY_CONF1_PWRUP;
+ regmap_write(priv->regmap, MVEBU_COMPHY_CONF1(lane->id), val);
+
+ /* Select baud rates and PLLs */
+ val = readl(priv->base + MVEBU_COMPHY_SERDES_CFG0(lane->id));
+ val &= ~(MVEBU_COMPHY_SERDES_CFG0_PU_PLL |
+ MVEBU_COMPHY_SERDES_CFG0_PU_RX |
+ MVEBU_COMPHY_SERDES_CFG0_PU_TX |
+ MVEBU_COMPHY_SERDES_CFG0_HALF_BUS |
+ MVEBU_COMPHY_SERDES_CFG0_GEN_RX(0xf) |
+ MVEBU_COMPHY_SERDES_CFG0_GEN_TX(0xf));
+ if (mode == PHY_MODE_10GKR)
+ val |= MVEBU_COMPHY_SERDES_CFG0_GEN_RX(0xe) |
+ MVEBU_COMPHY_SERDES_CFG0_GEN_TX(0xe);
+ else if (mode == PHY_MODE_SGMII)
+ val |= MVEBU_COMPHY_SERDES_CFG0_GEN_RX(0x6) |
+ MVEBU_COMPHY_SERDES_CFG0_GEN_TX(0x6) |
+ MVEBU_COMPHY_SERDES_CFG0_HALF_BUS;
+ writel(val, priv->base + MVEBU_COMPHY_SERDES_CFG0(lane->id));
+
+ /* reset */
+ val = readl(priv->base + MVEBU_COMPHY_SERDES_CFG1(lane->id));
+ val &= ~(MVEBU_COMPHY_SERDES_CFG1_RESET |
+ MVEBU_COMPHY_SERDES_CFG1_CORE_RESET |
+ MVEBU_COMPHY_SERDES_CFG1_RF_RESET);
+ writel(val, priv->base + MVEBU_COMPHY_SERDES_CFG1(lane->id));
+
+ /* de-assert reset */
+ val = readl(priv->base + MVEBU_COMPHY_SERDES_CFG1(lane->id));
+ val |= MVEBU_COMPHY_SERDES_CFG1_RESET |
+ MVEBU_COMPHY_SERDES_CFG1_CORE_RESET;
+ writel(val, priv->base + MVEBU_COMPHY_SERDES_CFG1(lane->id));
+
+ /* wait until clocks are ready */
+ mdelay(1);
+
+ /* exlicitly disable 40B, the bits isn't clear on reset */
+ regmap_read(priv->regmap, MVEBU_COMPHY_CONF6(lane->id), &val);
+ val &= ~MVEBU_COMPHY_CONF6_40B;
+ regmap_write(priv->regmap, MVEBU_COMPHY_CONF6(lane->id), val);
+
+ /* refclk selection */
+ val = readl(priv->base + MVEBU_COMPHY_MISC_CTRL0(lane->id));
+ val &= ~MVEBU_COMPHY_MISC_CTRL0_REFCLK_SEL;
+ if (mode == PHY_MODE_10GKR)
+ val |= MVEBU_COMPHY_MISC_CTRL0_ICP_FORCE;
+ writel(val, priv->base + MVEBU_COMPHY_MISC_CTRL0(lane->id));
+
+ /* power and pll selection */
+ val = readl(priv->base + MVEBU_COMPHY_PWRPLL_CTRL(lane->id));
+ val &= ~(MVEBU_COMPHY_PWRPLL_CTRL_RFREQ(0x1f) |
+ MVEBU_COMPHY_PWRPLL_PHY_MODE(0x7));
+ val |= MVEBU_COMPHY_PWRPLL_CTRL_RFREQ(0x1) |
+ MVEBU_COMPHY_PWRPLL_PHY_MODE(0x4);
+ writel(val, priv->base + MVEBU_COMPHY_PWRPLL_CTRL(lane->id));
+
+ val = readl(priv->base + MVEBU_COMPHY_LOOPBACK(lane->id));
+ val &= ~MVEBU_COMPHY_LOOPBACK_DBUS_WIDTH(0x7);
+ val |= MVEBU_COMPHY_LOOPBACK_DBUS_WIDTH(0x1);
+ writel(val, priv->base + MVEBU_COMPHY_LOOPBACK(lane->id));
+}
+
+static int mvebu_comphy_init_plls(struct mvebu_comphy_lane *lane,
+ enum phy_mode mode)
+{
+ struct mvebu_comphy_priv *priv = lane->priv;
+ u32 val;
+
+ /* SERDES external config */
+ val = readl(priv->base + MVEBU_COMPHY_SERDES_CFG0(lane->id));
+ val |= MVEBU_COMPHY_SERDES_CFG0_PU_PLL |
+ MVEBU_COMPHY_SERDES_CFG0_PU_RX |
+ MVEBU_COMPHY_SERDES_CFG0_PU_TX;
+ writel(val, priv->base + MVEBU_COMPHY_SERDES_CFG0(lane->id));
+
+ /* check rx/tx pll */
+ readl_poll_timeout(priv->base + MVEBU_COMPHY_SERDES_STATUS0(lane->id),
+ val,
+ val & (MVEBU_COMPHY_SERDES_STATUS0_RX_PLL_RDY |
+ MVEBU_COMPHY_SERDES_STATUS0_TX_PLL_RDY),
+ 1000, 150000);
+ if (!(val & (MVEBU_COMPHY_SERDES_STATUS0_RX_PLL_RDY |
+ MVEBU_COMPHY_SERDES_STATUS0_TX_PLL_RDY)))
+ return -ETIMEDOUT;
+
+ /* rx init */
+ val = readl(priv->base + MVEBU_COMPHY_SERDES_CFG1(lane->id));
+ val |= MVEBU_COMPHY_SERDES_CFG1_RX_INIT;
+ writel(val, priv->base + MVEBU_COMPHY_SERDES_CFG1(lane->id));
+
+ /* check rx */
+ readl_poll_timeout(priv->base + MVEBU_COMPHY_SERDES_STATUS0(lane->id),
+ val, val & MVEBU_COMPHY_SERDES_STATUS0_RX_INIT,
+ 1000, 10000);
+ if (!(val & MVEBU_COMPHY_SERDES_STATUS0_RX_INIT))
+ return -ETIMEDOUT;
+
+ val = readl(priv->base + MVEBU_COMPHY_SERDES_CFG1(lane->id));
+ val &= ~MVEBU_COMPHY_SERDES_CFG1_RX_INIT;
+ writel(val, priv->base + MVEBU_COMPHY_SERDES_CFG1(lane->id));
+
+ return 0;
+}
+
+static int mvebu_comphy_set_mode_sgmii(struct phy *phy, enum phy_mode mode)
+{
+ struct mvebu_comphy_lane *lane = phy_get_drvdata(phy);
+ struct mvebu_comphy_priv *priv = lane->priv;
+ u32 val;
+
+ mvebu_comphy_ethernet_init_reset(lane, mode);
+
+ val = readl(priv->base + MVEBU_COMPHY_RX_CTRL1(lane->id));
+ val &= ~MVEBU_COMPHY_RX_CTRL1_CLK8T_EN;
+ val |= MVEBU_COMPHY_RX_CTRL1_RXCLK2X_SEL;
+ writel(val, priv->base + MVEBU_COMPHY_RX_CTRL1(lane->id));
+
+ val = readl(priv->base + MVEBU_COMPHY_DLT_CTRL(lane->id));
+ val &= ~MVEBU_COMPHY_DLT_CTRL_DTL_FLOOP_EN;
+ writel(val, priv->base + MVEBU_COMPHY_DLT_CTRL(lane->id));
+
+ regmap_read(priv->regmap, MVEBU_COMPHY_CONF1(lane->id), &val);
+ val &= ~MVEBU_COMPHY_CONF1_USB_PCIE;
+ val |= MVEBU_COMPHY_CONF1_PWRUP;
+ regmap_write(priv->regmap, MVEBU_COMPHY_CONF1(lane->id), val);
+
+ val = readl(priv->base + MVEBU_COMPHY_GEN1_S0(lane->id));
+ val &= ~MVEBU_COMPHY_GEN1_S0_TX_EMPH(0xf);
+ val |= MVEBU_COMPHY_GEN1_S0_TX_EMPH(0x1);
+ writel(val, priv->base + MVEBU_COMPHY_GEN1_S0(lane->id));
+
+ return mvebu_comphy_init_plls(lane, mode);
+}
+
+static int mvebu_comphy_set_mode_10gkr(struct phy *phy, enum phy_mode mode)
+{
+ struct mvebu_comphy_lane *lane = phy_get_drvdata(phy);
+ struct mvebu_comphy_priv *priv = lane->priv;
+ u32 val;
+
+ mvebu_comphy_ethernet_init_reset(lane, mode);
+
+ val = readl(priv->base + MVEBU_COMPHY_RX_CTRL1(lane->id));
+ val |= MVEBU_COMPHY_RX_CTRL1_RXCLK2X_SEL |
+ MVEBU_COMPHY_RX_CTRL1_CLK8T_EN;
+ writel(val, priv->base + MVEBU_COMPHY_RX_CTRL1(lane->id));
+
+ val = readl(priv->base + MVEBU_COMPHY_DLT_CTRL(lane->id));
+ val |= MVEBU_COMPHY_DLT_CTRL_DTL_FLOOP_EN;
+ writel(val, priv->base + MVEBU_COMPHY_DLT_CTRL(lane->id));
+
+ /* Speed divider */
+ val = readl(priv->base + MVEBU_COMPHY_SPEED_DIV(lane->id));
+ val |= MVEBU_COMPHY_SPEED_DIV_TX_FORCE;
+ writel(val, priv->base + MVEBU_COMPHY_SPEED_DIV(lane->id));
+
+ val = readl(priv->base + MVEBU_COMPHY_SERDES_CFG2(lane->id));
+ val |= MVEBU_COMPHY_SERDES_CFG2_DFE_EN;
+ writel(val, priv->base + MVEBU_COMPHY_SERDES_CFG2(lane->id));
+
+ /* DFE resolution */
+ val = readl(priv->base + MVEBU_COMPHY_DFE_RES(lane->id));
+ val |= MVEBU_COMPHY_DFE_RES_FORCE_GEN_TBL;
+ writel(val, priv->base + MVEBU_COMPHY_DFE_RES(lane->id));
+
+ val = readl(priv->base + MVEBU_COMPHY_GEN1_S0(lane->id));
+ val &= ~(MVEBU_COMPHY_GEN1_S0_TX_AMP(0x1f) |
+ MVEBU_COMPHY_GEN1_S0_TX_EMPH(0xf));
+ val |= MVEBU_COMPHY_GEN1_S0_TX_AMP(0x1c) |
+ MVEBU_COMPHY_GEN1_S0_TX_EMPH(0xe);
+ writel(val, priv->base + MVEBU_COMPHY_GEN1_S0(lane->id));
+
+ val = readl(priv->base + MVEBU_COMPHY_GEN1_S2(lane->id));
+ val &= ~MVEBU_COMPHY_GEN1_S2_TX_EMPH(0xf);
+ val |= MVEBU_COMPHY_GEN1_S2_TX_EMPH_EN;
+ writel(val, priv->base + MVEBU_COMPHY_GEN1_S2(lane->id));
+
+ val = readl(priv->base + MVEBU_COMPHY_TX_SLEW_RATE(lane->id));
+ val |= MVEBU_COMPHY_TX_SLEW_RATE_EMPH(0x3) |
+ MVEBU_COMPHY_TX_SLEW_RATE_SLC(0x3f);
+ writel(val, priv->base + MVEBU_COMPHY_TX_SLEW_RATE(lane->id));
+
+ /* Impedance calibration */
+ val = readl(priv->base + MVEBU_COMPHY_IMP_CAL(lane->id));
+ val &= ~MVEBU_COMPHY_IMP_CAL_TX_EXT(0x1f);
+ val |= MVEBU_COMPHY_IMP_CAL_TX_EXT(0xe) |
+ MVEBU_COMPHY_IMP_CAL_TX_EXT_EN;
+ writel(val, priv->base + MVEBU_COMPHY_IMP_CAL(lane->id));
+
+ val = readl(priv->base + MVEBU_COMPHY_GEN1_S5(lane->id));
+ val &= ~MVEBU_COMPHY_GEN1_S5_ICP(0xf);
+ writel(val, priv->base + MVEBU_COMPHY_GEN1_S5(lane->id));
+
+ val = readl(priv->base + MVEBU_COMPHY_GEN1_S1(lane->id));
+ val &= ~(MVEBU_COMPHY_GEN1_S1_RX_MUL_PI(0x7) |
+ MVEBU_COMPHY_GEN1_S1_RX_MUL_PF(0x7) |
+ MVEBU_COMPHY_GEN1_S1_RX_MUL_FI(0x3) |
+ MVEBU_COMPHY_GEN1_S1_RX_MUL_FF(0x3));
+ val |= MVEBU_COMPHY_GEN1_S1_RX_DFE_EN |
+ MVEBU_COMPHY_GEN1_S1_RX_MUL_PI(0x2) |
+ MVEBU_COMPHY_GEN1_S1_RX_MUL_PF(0x2) |
+ MVEBU_COMPHY_GEN1_S1_RX_MUL_FF(0x1) |
+ MVEBU_COMPHY_GEN1_S1_RX_DIV(0x3);
+ writel(val, priv->base + MVEBU_COMPHY_GEN1_S1(lane->id));
+
+ val = readl(priv->base + MVEBU_COMPHY_COEF(lane->id));
+ val &= ~(MVEBU_COMPHY_COEF_DFE_EN | MVEBU_COMPHY_COEF_DFE_CTRL);
+ writel(val, priv->base + MVEBU_COMPHY_COEF(lane->id));
+
+ val = readl(priv->base + MVEBU_COMPHY_GEN1_S4(lane->id));
+ val &= ~MVEBU_COMPHY_GEN1_S4_DFE_RES(0x3);
+ val |= MVEBU_COMPHY_GEN1_S4_DFE_RES(0x1);
+ writel(val, priv->base + MVEBU_COMPHY_GEN1_S4(lane->id));
+
+ val = readl(priv->base + MVEBU_COMPHY_GEN1_S3(lane->id));
+ val |= MVEBU_COMPHY_GEN1_S3_FBCK_SEL;
+ writel(val, priv->base + MVEBU_COMPHY_GEN1_S3(lane->id));
+
+ /* rx training timer */
+ val = readl(priv->base + MVEBU_COMPHY_TRAINING5(lane->id));
+ val &= ~MVEBU_COMPHY_TRAINING5_RX_TIMER(0x3ff);
+ val |= MVEBU_COMPHY_TRAINING5_RX_TIMER(0x13);
+ writel(val, priv->base + MVEBU_COMPHY_TRAINING5(lane->id));
+
+ /* tx train peak to peak hold */
+ val = readl(priv->base + MVEBU_COMPHY_TRAINING0(lane->id));
+ val |= MVEBU_COMPHY_TRAINING0_P2P_HOLD;
+ writel(val, priv->base + MVEBU_COMPHY_TRAINING0(lane->id));
+
+ val = readl(priv->base + MVEBU_COMPHY_TX_PRESET(lane->id));
+ val &= ~MVEBU_COMPHY_TX_PRESET_INDEX(0xf);
+ val |= MVEBU_COMPHY_TX_PRESET_INDEX(0x2); /* preset coeff */
+ writel(val, priv->base + MVEBU_COMPHY_TX_PRESET(lane->id));
+
+ val = readl(priv->base + MVEBU_COMPHY_FRAME_DETECT3(lane->id));
+ val &= ~MVEBU_COMPHY_FRAME_DETECT3_LOST_TIMEOUT_EN;
+ writel(val, priv->base + MVEBU_COMPHY_FRAME_DETECT3(lane->id));
+
+ val = readl(priv->base + MVEBU_COMPHY_TX_TRAIN_PRESET(lane->id));
+ val |= MVEBU_COMPHY_TX_TRAIN_PRESET_16B_AUTO_EN |
+ MVEBU_COMPHY_TX_TRAIN_PRESET_PRBS11;
+ writel(val, priv->base + MVEBU_COMPHY_TX_TRAIN_PRESET(lane->id));
+
+ val = readl(priv->base + MVEBU_COMPHY_FRAME_DETECT0(lane->id));
+ val &= ~MVEBU_COMPHY_FRAME_DETECT0_PATN(0x1ff);
+ val |= MVEBU_COMPHY_FRAME_DETECT0_PATN(0x88);
+ writel(val, priv->base + MVEBU_COMPHY_FRAME_DETECT0(lane->id));
+
+ val = readl(priv->base + MVEBU_COMPHY_DME(lane->id));
+ val |= MVEBU_COMPHY_DME_ETH_MODE;
+ writel(val, priv->base + MVEBU_COMPHY_DME(lane->id));
+
+ val = readl(priv->base + MVEBU_COMPHY_VDD_CAL0(lane->id));
+ val |= MVEBU_COMPHY_VDD_CAL0_CONT_MODE;
+ writel(val, priv->base + MVEBU_COMPHY_VDD_CAL0(lane->id));
+
+ val = readl(priv->base + MVEBU_SP_CALIB(lane->id));
+ val &= ~MVEBU_SP_CALIB_SAMPLER(0x3);
+ val |= MVEBU_SP_CALIB_SAMPLER(0x3) |
+ MVEBU_SP_CALIB_SAMPLER_EN;
+ writel(val, priv->base + MVEBU_SP_CALIB(lane->id));
+ val &= ~MVEBU_SP_CALIB_SAMPLER_EN;
+ writel(val, priv->base + MVEBU_SP_CALIB(lane->id));
+
+ /* External rx regulator */
+ val = readl(priv->base + MVEBU_COMPHY_EXT_SELV(lane->id));
+ val &= ~MVEBU_COMPHY_EXT_SELV_RX_SAMPL(0x1f);
+ val |= MVEBU_COMPHY_EXT_SELV_RX_SAMPL(0x1a);
+ writel(val, priv->base + MVEBU_COMPHY_EXT_SELV(lane->id));
+
+ return mvebu_comphy_init_plls(lane, mode);
+}
+
+static int mvebu_comphy_power_on(struct phy *phy)
+{
+ struct mvebu_comphy_lane *lane = phy_get_drvdata(phy);
+ struct mvebu_comphy_priv *priv = lane->priv;
+ int ret;
+ u32 mux, val;
+
+ mux = mvebu_comphy_get_mux(lane->id, lane->port, lane->mode);
+ if (mux < 0)
+ return -ENOTSUPP;
+
+ regmap_read(priv->regmap, MVEBU_COMPHY_SELECTOR, &val);
+ val &= ~(0xf << MVEBU_COMPHY_SELECTOR_PHY(lane->id));
+ val |= mux << MVEBU_COMPHY_SELECTOR_PHY(lane->id);
+ regmap_write(priv->regmap, MVEBU_COMPHY_SELECTOR, val);
+
+ switch (lane->mode) {
+ case PHY_MODE_SGMII:
+ ret = mvebu_comphy_set_mode_sgmii(phy, lane->mode);
+ break;
+ case PHY_MODE_10GKR:
+ ret = mvebu_comphy_set_mode_10gkr(phy, lane->mode);
+ break;
+ default:
+ return -ENOTSUPP;
+ }
+
+ /* digital reset */
+ val = readl(priv->base + MVEBU_COMPHY_SERDES_CFG1(lane->id));
+ val |= MVEBU_COMPHY_SERDES_CFG1_RF_RESET;
+ writel(val, priv->base + MVEBU_COMPHY_SERDES_CFG1(lane->id));
+
+ return ret;
+}
+
+static int mvebu_comphy_set_mode(struct phy *phy, enum phy_mode mode)
+{
+ struct mvebu_comphy_lane *lane = phy_get_drvdata(phy);
+
+ if (mvebu_comphy_get_mux(lane->id, lane->port, mode) < 0)
+ return -EINVAL;
+
+ lane->mode = mode;
+ return 0;
+}
+
+static int mvebu_comphy_power_off(struct phy *phy)
+{
+ struct mvebu_comphy_lane *lane = phy_get_drvdata(phy);
+ struct mvebu_comphy_priv *priv = lane->priv;
+ u32 val;
+
+ val = readl(priv->base + MVEBU_COMPHY_SERDES_CFG1(lane->id));
+ val &= ~(MVEBU_COMPHY_SERDES_CFG1_RESET |
+ MVEBU_COMPHY_SERDES_CFG1_CORE_RESET |
+ MVEBU_COMPHY_SERDES_CFG1_RF_RESET);
+ writel(val, priv->base + MVEBU_COMPHY_SERDES_CFG1(lane->id));
+
+ regmap_read(priv->regmap, MVEBU_COMPHY_SELECTOR, &val);
+ val &= ~(0xf << MVEBU_COMPHY_SELECTOR_PHY(lane->id));
+ regmap_write(priv->regmap, MVEBU_COMPHY_SELECTOR, val);
+
+ return 0;
+}
+
+static const struct phy_ops mvebu_comphy_ops = {
+ .power_on = mvebu_comphy_power_on,
+ .power_off = mvebu_comphy_power_off,
+ .set_mode = mvebu_comphy_set_mode,
+};
+
+static struct phy *mvebu_comphy_xlate(struct device *dev,
+ struct of_phandle_args *args)
+{
+ struct mvebu_comphy_priv *priv = dev_get_drvdata(dev);
+ struct mvebu_comphy_lane *lane;
+ int i;
+
+ if (WARN_ON(args->args[0] >= MVEBU_COMPHY_PORTS))
+ return ERR_PTR(-EINVAL);
+
+ for (i = 0; i < MVEBU_COMPHY_LANES; i++) {
+ if (!priv->phys[i])
+ continue;
+
+ lane = phy_get_drvdata(priv->phys[i]);
+ if (priv->phys[i] && args->np == lane->of_node)
+ break;
+ }
+
+ if (i == MVEBU_COMPHY_LANES)
+ return ERR_PTR(-ENODEV);
+
+ if (lane->port >= 0)
+ return ERR_PTR(-EBUSY);
+
+ lane->port = args->args[0];
+ return priv->phys[i];
+}
+
+static int mvebu_comphy_probe(struct platform_device *pdev)
+{
+ struct mvebu_comphy_priv *priv;
+ struct phy_provider *provider;
+ struct device_node *child;
+ struct resource *res;
+
+ priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ priv->dev = &pdev->dev;
+ priv->regmap =
+ syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
+ "marvell,system-controller");
+ if (IS_ERR(priv->regmap))
+ return PTR_ERR(priv->regmap);
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ priv->base = devm_ioremap_resource(&pdev->dev, res);
+ if (!priv->base)
+ return -ENOMEM;
+
+ for_each_available_child_of_node(pdev->dev.of_node, child) {
+ struct mvebu_comphy_lane *lane;
+ struct phy *phy;
+ int ret;
+ u32 val;
+
+ ret = of_property_read_u32(child, "reg", &val);
+ if (ret < 0) {
+ dev_err(&pdev->dev, "missing 'reg' property (%d)\n",
+ ret);
+ continue;
+ }
+
+ if (val >= MVEBU_COMPHY_LANES) {
+ dev_err(&pdev->dev, "invalid 'reg' property\n");
+ continue;
+ }
+
+ lane = devm_kzalloc(&pdev->dev, sizeof(*lane), GFP_KERNEL);
+ if (!lane)
+ return -ENOMEM;
+
+ phy = devm_phy_create(&pdev->dev, NULL, &mvebu_comphy_ops);
+ if (IS_ERR(phy))
+ return PTR_ERR(phy);
+
+ lane->priv = priv;
+ lane->of_node = child;
+ lane->mode = PHY_MODE_INVALID;
+ lane->id = val;
+ lane->port = -1;
+ phy_set_drvdata(phy, lane);
+
+ priv->phys[val] = phy;
+
+ /*
+ * Once all modes are supported in this driver we should call
+ * mvebu_comphy_power_off(phy) here to avoid relying on the
+ * bootloader/firmware configuration.
+ */
+ }
+
+ dev_set_drvdata(&pdev->dev, priv);
+ provider = devm_of_phy_provider_register(&pdev->dev,
+ mvebu_comphy_xlate);
+ return PTR_ERR_OR_ZERO(provider);
+}
+
+static const struct of_device_id mvebu_comphy_of_match_table[] = {
+ { .compatible = "marvell,comphy-cp110" },
+ { },
+};
+MODULE_DEVICE_TABLE(of, mvebu_comphy_of_match_table);
+
+static struct platform_driver mvebu_comphy_driver = {
+ .probe = mvebu_comphy_probe,
+ .driver = {
+ .name = "mvebu-comphy",
+ .of_match_table = mvebu_comphy_of_match_table,
+ },
+};
+module_platform_driver(mvebu_comphy_driver);
+
+MODULE_AUTHOR("Antoine Tenart <antoine.tenart@free-electrons.com>");
+MODULE_DESCRIPTION("Common PHY driver for mvebu SoCs");
+MODULE_LICENSE("GPL v2");
--
2.13.5
^ permalink raw reply related
* [PATCH net-next v3 03/13] Documentation/bindings: phy: document the Marvell comphy driver
From: Antoine Tenart @ 2017-08-28 14:57 UTC (permalink / raw)
To: davem, kishon, andrew, jason, sebastian.hesselbarth,
gregory.clement
Cc: Antoine Tenart, thomas.petazzoni, nadavh, linux, linux-kernel, mw,
stefanc, miquel.raynal, netdev
In-Reply-To: <20170828145725.2539-1-antoine.tenart@free-electrons.com>
The Marvell Armada 7K/8K SoCs contains an hardware block called COMPHY
that provides a number of shared PHYs used by various interfaces in the
SoC: network, SATA, PCIe, etc. This Device Tree binding allows to
describe this COMPHY hardware block.
Signed-off-by: Antoine Tenart <antoine.tenart@free-electrons.com>
---
.../devicetree/bindings/phy/phy-mvebu-comphy.txt | 43 ++++++++++++++++++++++
1 file changed, 43 insertions(+)
create mode 100644 Documentation/devicetree/bindings/phy/phy-mvebu-comphy.txt
diff --git a/Documentation/devicetree/bindings/phy/phy-mvebu-comphy.txt b/Documentation/devicetree/bindings/phy/phy-mvebu-comphy.txt
new file mode 100644
index 000000000000..bfcf80341657
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/phy-mvebu-comphy.txt
@@ -0,0 +1,43 @@
+mvebu comphy driver
+-------------------
+
+A comphy controller can be found on Marvell Armada 7k/8k on the CP110. It
+provides a number of shared PHYs used by various interfaces (network, sata,
+usb, PCIe...).
+
+Required properties:
+
+- compatible: should be "marvell,comphy-cp110"
+- reg: should contain the comphy register location and length.
+- marvell,system-controller: should contain a phandle to the
+ system controller node.
+- #address-cells: should be 1.
+- #size-cells: should be 0.
+
+A sub-node is required for each comphy lane provided by the comphy.
+
+Required properties (child nodes):
+
+- reg: comphy lane number.
+- #phy-cells : from the generic phy bindings, must be 1. Defines the
+ input port to use for a given comphy lane.
+
+Example:
+
+ cpm_comphy: phy@120000 {
+ compatible = "marvell,comphy-cp110";
+ reg = <0x120000 0x6000>;
+ marvell,system-controller = <&cpm_syscon0>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ cpm_comphy0: phy@0 {
+ reg = <0>;
+ #phy-cells = <1>;
+ };
+
+ cpm_comphy1: phy@1 {
+ reg = <1>;
+ #phy-cells = <1>;
+ };
+ };
--
2.13.5
^ permalink raw reply related
* [PATCH net-next v3 04/13] net: mvpp2: initialize the comphy
From: Antoine Tenart @ 2017-08-28 14:57 UTC (permalink / raw)
To: davem, kishon, andrew, jason, sebastian.hesselbarth,
gregory.clement
Cc: Antoine Tenart, thomas.petazzoni, nadavh, linux, linux-kernel, mw,
stefanc, miquel.raynal, netdev
In-Reply-To: <20170828145725.2539-1-antoine.tenart@free-electrons.com>
On some platforms, the comphy is between the MAC GoP and the PHYs. The
mvpp2 driver currently relies on the firmware/bootloader to configure
the comphy. As a comphy driver was added to the generic PHY framework,
this patch uses it in the mvpp2 driver to configure the comphy at boot
time to avoid relying on the bootloader.
Signed-off-by: Antoine Tenart <antoine.tenart@free-electrons.com>
---
drivers/net/ethernet/marvell/mvpp2.c | 44 +++++++++++++++++++++++++++++++++++-
1 file changed, 43 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
index e312dfc3555b..fab231858a41 100644
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@ -28,6 +28,7 @@
#include <linux/of_address.h>
#include <linux/of_device.h>
#include <linux/phy.h>
+#include <linux/phy/phy.h>
#include <linux/clk.h>
#include <linux/hrtimer.h>
#include <linux/ktime.h>
@@ -861,6 +862,7 @@ struct mvpp2_port {
phy_interface_t phy_interface;
struct device_node *phy_node;
+ struct phy *comphy;
unsigned int link;
unsigned int duplex;
unsigned int speed;
@@ -4420,6 +4422,32 @@ static int mvpp22_gop_init(struct mvpp2_port *port)
return -EINVAL;
}
+static int mvpp22_comphy_init(struct mvpp2_port *port)
+{
+ enum phy_mode mode;
+ int ret;
+
+ if (!port->comphy)
+ return 0;
+
+ switch (port->phy_interface) {
+ case PHY_INTERFACE_MODE_SGMII:
+ mode = PHY_MODE_SGMII;
+ break;
+ case PHY_INTERFACE_MODE_10GKR:
+ mode = PHY_MODE_10GKR;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ ret = phy_set_mode(port->comphy, mode);
+ if (ret)
+ return ret;
+
+ return phy_power_on(port->comphy);
+}
+
static void mvpp2_port_mii_gmac_configure_mode(struct mvpp2_port *port)
{
u32 val;
@@ -6404,8 +6432,10 @@ static void mvpp2_start_dev(struct mvpp2_port *port)
/* Enable interrupts on all CPUs */
mvpp2_interrupts_enable(port);
- if (port->priv->hw_version == MVPP22)
+ if (port->priv->hw_version == MVPP22) {
+ mvpp22_comphy_init(port);
mvpp22_gop_init(port);
+ }
mvpp2_port_mii_set(port);
mvpp2_port_enable(port);
@@ -6436,6 +6466,7 @@ static void mvpp2_stop_dev(struct mvpp2_port *port)
mvpp2_egress_disable(port);
mvpp2_port_disable(port);
phy_stop(ndev->phydev);
+ phy_power_off(port->comphy);
}
static int mvpp2_check_ringparam_valid(struct net_device *dev,
@@ -7270,6 +7301,7 @@ static int mvpp2_port_probe(struct platform_device *pdev,
struct mvpp2 *priv)
{
struct device_node *phy_node;
+ struct phy *comphy;
struct mvpp2_port *port;
struct mvpp2_port_pcpu *port_pcpu;
struct net_device *dev;
@@ -7311,6 +7343,15 @@ static int mvpp2_port_probe(struct platform_device *pdev,
goto err_free_netdev;
}
+ comphy = devm_of_phy_get(&pdev->dev, port_node, NULL);
+ if (IS_ERR(comphy)) {
+ if (PTR_ERR(comphy) == -EPROBE_DEFER) {
+ err = -EPROBE_DEFER;
+ goto err_free_netdev;
+ }
+ comphy = NULL;
+ }
+
if (of_property_read_u32(port_node, "port-id", &id)) {
err = -EINVAL;
dev_err(&pdev->dev, "missing port-id value\n");
@@ -7344,6 +7385,7 @@ static int mvpp2_port_probe(struct platform_device *pdev,
port->phy_node = phy_node;
port->phy_interface = phy_mode;
+ port->comphy = comphy;
if (priv->hw_version == MVPP21) {
res = platform_get_resource(pdev, IORESOURCE_MEM, 2 + id);
--
2.13.5
^ permalink raw reply related
* [PATCH net-next v3 05/13] net: mvpp2: simplify the link_event function
From: Antoine Tenart @ 2017-08-28 14:57 UTC (permalink / raw)
To: davem, kishon, andrew, jason, sebastian.hesselbarth,
gregory.clement
Cc: Antoine Tenart, thomas.petazzoni, nadavh, linux, linux-kernel, mw,
stefanc, miquel.raynal, netdev
In-Reply-To: <20170828145725.2539-1-antoine.tenart@free-electrons.com>
The link_event function is somewhat complicated. This cosmetic patch
simplifies it.
Signed-off-by: Antoine Tenart <antoine.tenart@free-electrons.com>
---
drivers/net/ethernet/marvell/mvpp2.c | 13 ++++---------
1 file changed, 4 insertions(+), 9 deletions(-)
diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
index fab231858a41..9c0c81e68d55 100644
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@ -5740,7 +5740,6 @@ static void mvpp2_link_event(struct net_device *dev)
{
struct mvpp2_port *port = netdev_priv(dev);
struct phy_device *phydev = dev->phydev;
- int status_change = 0;
u32 val;
if (phydev->link) {
@@ -5771,16 +5770,8 @@ static void mvpp2_link_event(struct net_device *dev)
}
if (phydev->link != port->link) {
- if (!phydev->link) {
- port->duplex = -1;
- port->speed = 0;
- }
-
port->link = phydev->link;
- status_change = 1;
- }
- if (status_change) {
if (phydev->link) {
val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
val |= (MVPP2_GMAC_FORCE_LINK_PASS |
@@ -5789,9 +5780,13 @@ static void mvpp2_link_event(struct net_device *dev)
mvpp2_egress_enable(port);
mvpp2_ingress_enable(port);
} else {
+ port->duplex = -1;
+ port->speed = 0;
+
mvpp2_ingress_disable(port);
mvpp2_egress_disable(port);
}
+
phy_print_status(phydev);
}
}
--
2.13.5
^ permalink raw reply related
* [PATCH net-next v3 06/13] net: mvpp2: improve the link management function
From: Antoine Tenart @ 2017-08-28 14:57 UTC (permalink / raw)
To: davem, kishon, andrew, jason, sebastian.hesselbarth,
gregory.clement
Cc: Antoine Tenart, thomas.petazzoni, nadavh, linux, linux-kernel, mw,
stefanc, miquel.raynal, netdev
In-Reply-To: <20170828145725.2539-1-antoine.tenart@free-electrons.com>
When the link status changes, the phylib calls the link_event function
in the mvpp2 driver. Before this patch only the egress/ingress transmit
was enabled/disabled. This patch adds more functionality to the link
status management code by enabling/disabling the port per-cpu
interrupts, and the port itself. The queues are now stopped as well, and
the netif carrier helpers are called.
Signed-off-by: Antoine Tenart <antoine.tenart@free-electrons.com>
---
drivers/net/ethernet/marvell/mvpp2.c | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
index 9c0c81e68d55..09cad32734f3 100644
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@ -5777,14 +5777,25 @@ static void mvpp2_link_event(struct net_device *dev)
val |= (MVPP2_GMAC_FORCE_LINK_PASS |
MVPP2_GMAC_FORCE_LINK_DOWN);
writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+
+ mvpp2_interrupts_enable(port);
+ mvpp2_port_enable(port);
+
mvpp2_egress_enable(port);
mvpp2_ingress_enable(port);
+ netif_carrier_on(dev);
+ netif_tx_wake_all_queues(dev);
} else {
port->duplex = -1;
port->speed = 0;
+ netif_tx_stop_all_queues(dev);
+ netif_carrier_off(dev);
mvpp2_ingress_disable(port);
mvpp2_egress_disable(port);
+
+ mvpp2_port_disable(port);
+ mvpp2_interrupts_disable(port);
}
phy_print_status(phydev);
--
2.13.5
^ permalink raw reply related
* [PATCH net-next v3 07/13] net: mvpp2: do not set GMAC autoneg when using XLG MAC
From: Antoine Tenart @ 2017-08-28 14:57 UTC (permalink / raw)
To: davem, kishon, andrew, jason, sebastian.hesselbarth,
gregory.clement
Cc: Antoine Tenart, thomas.petazzoni, nadavh, linux, linux-kernel, mw,
stefanc, miquel.raynal, netdev
In-Reply-To: <20170828145725.2539-1-antoine.tenart@free-electrons.com>
When using the XLG MAC, it does not make sense to force the GMAC autoneg
parameters. This patch adds checks to only set the GMAC autoneg
parameters when needed (i.e. when not using the XLG MAC).
Signed-off-by: Antoine Tenart <antoine.tenart@free-electrons.com>
---
drivers/net/ethernet/marvell/mvpp2.c | 64 +++++++++++++++++++++++-------------
1 file changed, 42 insertions(+), 22 deletions(-)
diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
index 09cad32734f3..2f05a0b0773c 100644
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@ -5735,6 +5735,37 @@ static irqreturn_t mvpp2_isr(int irq, void *dev_id)
return IRQ_HANDLED;
}
+static void mvpp2_gmac_set_autoneg(struct mvpp2_port *port,
+ struct phy_device *phydev)
+{
+ u32 val;
+
+ if (port->phy_interface != PHY_INTERFACE_MODE_RGMII &&
+ port->phy_interface != PHY_INTERFACE_MODE_RGMII_ID &&
+ port->phy_interface != PHY_INTERFACE_MODE_RGMII_RXID &&
+ port->phy_interface != PHY_INTERFACE_MODE_RGMII_TXID &&
+ port->phy_interface != PHY_INTERFACE_MODE_SGMII)
+ return;
+
+ val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+ val &= ~(MVPP2_GMAC_CONFIG_MII_SPEED |
+ MVPP2_GMAC_CONFIG_GMII_SPEED |
+ MVPP2_GMAC_CONFIG_FULL_DUPLEX |
+ MVPP2_GMAC_AN_SPEED_EN |
+ MVPP2_GMAC_AN_DUPLEX_EN);
+
+ if (phydev->duplex)
+ val |= MVPP2_GMAC_CONFIG_FULL_DUPLEX;
+
+ if (phydev->speed == SPEED_1000)
+ val |= MVPP2_GMAC_CONFIG_GMII_SPEED;
+ else if (phydev->speed == SPEED_100)
+ val |= MVPP2_GMAC_CONFIG_MII_SPEED;
+
+ writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+
+}
+
/* Adjust link */
static void mvpp2_link_event(struct net_device *dev)
{
@@ -5745,24 +5776,7 @@ static void mvpp2_link_event(struct net_device *dev)
if (phydev->link) {
if ((port->speed != phydev->speed) ||
(port->duplex != phydev->duplex)) {
- u32 val;
-
- val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
- val &= ~(MVPP2_GMAC_CONFIG_MII_SPEED |
- MVPP2_GMAC_CONFIG_GMII_SPEED |
- MVPP2_GMAC_CONFIG_FULL_DUPLEX |
- MVPP2_GMAC_AN_SPEED_EN |
- MVPP2_GMAC_AN_DUPLEX_EN);
-
- if (phydev->duplex)
- val |= MVPP2_GMAC_CONFIG_FULL_DUPLEX;
-
- if (phydev->speed == SPEED_1000)
- val |= MVPP2_GMAC_CONFIG_GMII_SPEED;
- else if (phydev->speed == SPEED_100)
- val |= MVPP2_GMAC_CONFIG_MII_SPEED;
-
- writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+ mvpp2_gmac_set_autoneg(port, phydev);
port->duplex = phydev->duplex;
port->speed = phydev->speed;
@@ -5773,10 +5787,16 @@ static void mvpp2_link_event(struct net_device *dev)
port->link = phydev->link;
if (phydev->link) {
- val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
- val |= (MVPP2_GMAC_FORCE_LINK_PASS |
- MVPP2_GMAC_FORCE_LINK_DOWN);
- writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+ if (port->phy_interface == PHY_INTERFACE_MODE_RGMII ||
+ port->phy_interface == PHY_INTERFACE_MODE_RGMII_ID ||
+ port->phy_interface == PHY_INTERFACE_MODE_RGMII_RXID ||
+ port->phy_interface == PHY_INTERFACE_MODE_RGMII_TXID ||
+ port->phy_interface == PHY_INTERFACE_MODE_SGMII) {
+ val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+ val |= (MVPP2_GMAC_FORCE_LINK_PASS |
+ MVPP2_GMAC_FORCE_LINK_DOWN);
+ writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+ }
mvpp2_interrupts_enable(port);
mvpp2_port_enable(port);
--
2.13.5
^ permalink raw reply related
* [PATCH net-next v3 08/13] net: mvpp2: dynamic reconfiguration of the comphy/GoP/MAC
From: Antoine Tenart @ 2017-08-28 14:57 UTC (permalink / raw)
To: davem, kishon, andrew, jason, sebastian.hesselbarth,
gregory.clement
Cc: Antoine Tenart, thomas.petazzoni, nadavh, linux, linux-kernel, mw,
stefanc, miquel.raynal, netdev
In-Reply-To: <20170828145725.2539-1-antoine.tenart@free-electrons.com>
This patch adds logic to reconfigure the comphy/GoP/MAC when the link
state is updated at runtime. This is very useful on boards where many
link speed are supported: depending on what is negotiated the PPv2
driver will automatically reconfigures the link between the PHY and the
MAC.
Signed-off-by: Antoine Tenart <antoine.tenart@free-electrons.com>
---
drivers/net/ethernet/marvell/mvpp2.c | 21 ++++++++++++++++++++-
1 file changed, 20 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
index 2f05a0b0773c..9e64b1ba3d43 100644
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@ -5771,9 +5771,28 @@ static void mvpp2_link_event(struct net_device *dev)
{
struct mvpp2_port *port = netdev_priv(dev);
struct phy_device *phydev = dev->phydev;
+ bool link_reconfigured = false;
u32 val;
if (phydev->link) {
+ if (port->phy_interface != phydev->interface && port->comphy) {
+ /* disable current port for reconfiguration */
+ mvpp2_interrupts_disable(port);
+ netif_carrier_off(port->dev);
+ mvpp2_port_disable(port);
+ phy_power_off(port->comphy);
+
+ /* comphy reconfiguration */
+ port->phy_interface = phydev->interface;
+ mvpp22_comphy_init(port);
+
+ /* gop/mac reconfiguration */
+ mvpp22_gop_init(port);
+ mvpp2_port_mii_set(port);
+
+ link_reconfigured = true;
+ }
+
if ((port->speed != phydev->speed) ||
(port->duplex != phydev->duplex)) {
mvpp2_gmac_set_autoneg(port, phydev);
@@ -5783,7 +5802,7 @@ static void mvpp2_link_event(struct net_device *dev)
}
}
- if (phydev->link != port->link) {
+ if (phydev->link != port->link || link_reconfigured) {
port->link = phydev->link;
if (phydev->link) {
--
2.13.5
^ permalink raw reply related
* [PATCH net-next v3 09/13] arm64: dts: marvell: extend the cp110 syscon register area length
From: Antoine Tenart @ 2017-08-28 14:57 UTC (permalink / raw)
To: davem, kishon, andrew, jason, sebastian.hesselbarth,
gregory.clement
Cc: Antoine Tenart, thomas.petazzoni, nadavh, linux, linux-kernel, mw,
stefanc, miquel.raynal, netdev
In-Reply-To: <20170828145725.2539-1-antoine.tenart@free-electrons.com>
This patch extends on both cp110 the system register area length to
include some of the comphy registers as well.
Signed-off-by: Antoine Tenart <antoine.tenart@free-electrons.com>
---
arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi | 2 +-
arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi b/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi
index 18299e164cb7..9b2581473183 100644
--- a/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi
+++ b/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi
@@ -118,7 +118,7 @@
cpm_syscon0: system-controller@440000 {
compatible = "syscon", "simple-mfd";
- reg = <0x440000 0x1000>;
+ reg = <0x440000 0x2000>;
cpm_clk: clock {
compatible = "marvell,cp110-clock";
diff --git a/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi b/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi
index 5ae8fa575859..d3902f218c46 100644
--- a/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi
+++ b/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi
@@ -125,7 +125,7 @@
cps_syscon0: system-controller@440000 {
compatible = "syscon", "simple-mfd";
- reg = <0x440000 0x1000>;
+ reg = <0x440000 0x2000>;
cps_clk: clock {
compatible = "marvell,cp110-clock";
--
2.13.5
^ permalink raw reply related
* [PATCH net-next v3 10/13] arm64: dts: marvell: add comphy nodes on cp110 master and slave
From: Antoine Tenart @ 2017-08-28 14:57 UTC (permalink / raw)
To: davem, kishon, andrew, jason, sebastian.hesselbarth,
gregory.clement
Cc: Antoine Tenart, thomas.petazzoni, nadavh, linux, linux-kernel, mw,
stefanc, miquel.raynal, netdev
In-Reply-To: <20170828145725.2539-1-antoine.tenart@free-electrons.com>
Now that the comphy driver is available, this patch adds the
corresponding nodes in the cp110 master and slave device trees.
Signed-off-by: Antoine Tenart <antoine.tenart@free-electrons.com>
---
.../boot/dts/marvell/armada-cp110-master.dtsi | 38 ++++++++++++++++++++++
.../arm64/boot/dts/marvell/armada-cp110-slave.dtsi | 38 ++++++++++++++++++++++
2 files changed, 76 insertions(+)
diff --git a/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi b/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi
index 9b2581473183..f2a50552bad4 100644
--- a/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi
+++ b/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi
@@ -91,6 +91,44 @@
};
};
+ cpm_comphy: phy@120000 {
+ compatible = "marvell,comphy-cp110";
+ reg = <0x120000 0x6000>;
+ marvell,system-controller = <&cpm_syscon0>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ cpm_comphy0: phy@0 {
+ reg = <0>;
+ #phy-cells = <1>;
+ };
+
+ cpm_comphy1: phy@1 {
+ reg = <1>;
+ #phy-cells = <1>;
+ };
+
+ cpm_comphy2: phy@2 {
+ reg = <2>;
+ #phy-cells = <1>;
+ };
+
+ cpm_comphy3: phy@3 {
+ reg = <3>;
+ #phy-cells = <1>;
+ };
+
+ cpm_comphy4: phy@4 {
+ reg = <4>;
+ #phy-cells = <1>;
+ };
+
+ cpm_comphy5: phy@5 {
+ reg = <5>;
+ #phy-cells = <1>;
+ };
+ };
+
cpm_mdio: mdio@12a200 {
#address-cells = <1>;
#size-cells = <0>;
diff --git a/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi b/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi
index d3902f218c46..bd7f7d0e6de9 100644
--- a/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi
+++ b/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi
@@ -98,6 +98,44 @@
};
};
+ cps_comphy: phy@120000 {
+ compatible = "marvell,comphy-cp110";
+ reg = <0x120000 0x6000>;
+ marvell,system-controller = <&cps_syscon0>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ cps_comphy0: phy@0 {
+ reg = <0>;
+ #phy-cells = <1>;
+ };
+
+ cps_comphy1: phy@1 {
+ reg = <1>;
+ #phy-cells = <1>;
+ };
+
+ cps_comphy2: phy@2 {
+ reg = <2>;
+ #phy-cells = <1>;
+ };
+
+ cps_comphy3: phy@3 {
+ reg = <3>;
+ #phy-cells = <1>;
+ };
+
+ cps_comphy4: phy@4 {
+ reg = <4>;
+ #phy-cells = <1>;
+ };
+
+ cps_comphy5: phy@5 {
+ reg = <5>;
+ #phy-cells = <1>;
+ };
+ };
+
cps_mdio: mdio@12a200 {
#address-cells = <1>;
#size-cells = <0>;
--
2.13.5
^ permalink raw reply related
* [PATCH net-next v3 11/13] arm64: dts: marvell: mcbin: add comphy references to Ethernet ports
From: Antoine Tenart @ 2017-08-28 14:57 UTC (permalink / raw)
To: davem, kishon, andrew, jason, sebastian.hesselbarth,
gregory.clement
Cc: Antoine Tenart, thomas.petazzoni, nadavh, linux, linux-kernel, mw,
stefanc, miquel.raynal, netdev
In-Reply-To: <20170828145725.2539-1-antoine.tenart@free-electrons.com>
This patch adds comphy phandles to the Ethernet ports in the mcbin
device tree. The comphy is used to configure the serdes PHYs used by
these ports.
Signed-off-by: Antoine Tenart <antoine.tenart@free-electrons.com>
---
arch/arm64/boot/dts/marvell/armada-8040-mcbin.dts | 3 +++
1 file changed, 3 insertions(+)
diff --git a/arch/arm64/boot/dts/marvell/armada-8040-mcbin.dts b/arch/arm64/boot/dts/marvell/armada-8040-mcbin.dts
index 9f0a00802452..970081ca197b 100644
--- a/arch/arm64/boot/dts/marvell/armada-8040-mcbin.dts
+++ b/arch/arm64/boot/dts/marvell/armada-8040-mcbin.dts
@@ -148,6 +148,7 @@
&cpm_eth0 {
status = "okay";
phy = <&phy0>;
+ phys = <&cpm_comphy4 0>;
phy-mode = "10gbase-kr";
};
@@ -181,6 +182,7 @@
&cps_eth0 {
status = "okay";
phy = <&phy8>;
+ phys = <&cps_comphy4 0>;
phy-mode = "10gbase-kr";
};
@@ -189,6 +191,7 @@
status = "okay";
phy = <&ge_phy>;
phy-mode = "sgmii";
+ phys = <&cps_comphy0 1>;
};
&cps_sata0 {
--
2.13.5
^ permalink raw reply related
* [PATCH net-next v3 12/13] arm64: dts: marvell: 7040-db: add comphy references to Ethernet ports
From: Antoine Tenart @ 2017-08-28 14:57 UTC (permalink / raw)
To: davem, kishon, andrew, jason, sebastian.hesselbarth,
gregory.clement
Cc: Antoine Tenart, thomas.petazzoni, nadavh, linux, linux-kernel, mw,
stefanc, miquel.raynal, netdev
In-Reply-To: <20170828145725.2539-1-antoine.tenart@free-electrons.com>
This patch adds comphy phandles to the Ethernet ports in the 7040-db
device tree. The comphy is used to configure the serdes PHYs used by
these ports.
Signed-off-by: Antoine Tenart <antoine.tenart@free-electrons.com>
---
arch/arm64/boot/dts/marvell/armada-7040-db.dts | 1 +
1 file changed, 1 insertion(+)
diff --git a/arch/arm64/boot/dts/marvell/armada-7040-db.dts b/arch/arm64/boot/dts/marvell/armada-7040-db.dts
index 92c761c380d3..03d1c42d7c47 100644
--- a/arch/arm64/boot/dts/marvell/armada-7040-db.dts
+++ b/arch/arm64/boot/dts/marvell/armada-7040-db.dts
@@ -180,6 +180,7 @@
status = "okay";
phy = <&phy0>;
phy-mode = "sgmii";
+ phys = <&cpm_comphy0 1>;
};
&cpm_eth2 {
--
2.13.5
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox