* [PATCH bpf-next] bpf: add read/write access to skb->tstamp from tc clsact progs
@ 2018-11-21 0:18 Vlad Dumitrescu
2018-11-21 0:49 ` Eric Dumazet
` (2 more replies)
0 siblings, 3 replies; 10+ messages in thread
From: Vlad Dumitrescu @ 2018-11-21 0:18 UTC (permalink / raw)
To: Alexei Starovoitov, Daniel Borkmann, netdev
Cc: Eric Dumazet, Willem de Bruijn, Vlad Dumitrescu
This could be used to rate limit egress traffic in concert with a qdisc
which supports Earliest Departure Time, such as FQ.
Signed-off-by: Vlad Dumitrescu <vladum@google.com>
---
include/uapi/linux/bpf.h | 1 +
net/core/filter.c | 26 +++++++++++++++++++++
tools/include/uapi/linux/bpf.h | 1 +
tools/testing/selftests/bpf/test_verifier.c | 4 ++++
4 files changed, 32 insertions(+)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index c1554aa074659..23e2031a43d43 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2468,6 +2468,7 @@ struct __sk_buff {
__u32 data_meta;
struct bpf_flow_keys *flow_keys;
+ __u64 tstamp;
};
struct bpf_tunnel_key {
diff --git a/net/core/filter.c b/net/core/filter.c
index f6ca38a7d4332..c45155c8e519c 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5573,6 +5573,10 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type
if (size != sizeof(struct bpf_flow_keys *))
return false;
break;
+ case bpf_ctx_range(struct __sk_buff, tstamp):
+ if (size != sizeof(__u64))
+ return false;
+ break;
default:
/* Only narrow read access allowed for now. */
if (type == BPF_WRITE) {
@@ -5600,6 +5604,7 @@ static bool sk_filter_is_valid_access(int off, int size,
case bpf_ctx_range(struct __sk_buff, data_end):
case bpf_ctx_range(struct __sk_buff, flow_keys):
case bpf_ctx_range_till(struct __sk_buff, family, local_port):
+ case bpf_ctx_range(struct __sk_buff, tstamp):
return false;
}
@@ -5624,6 +5629,7 @@ static bool cg_skb_is_valid_access(int off, int size,
case bpf_ctx_range(struct __sk_buff, tc_classid):
case bpf_ctx_range(struct __sk_buff, data_meta):
case bpf_ctx_range(struct __sk_buff, flow_keys):
+ case bpf_ctx_range(struct __sk_buff, tstamp):
return false;
case bpf_ctx_range(struct __sk_buff, data):
case bpf_ctx_range(struct __sk_buff, data_end):
@@ -5665,6 +5671,7 @@ static bool lwt_is_valid_access(int off, int size,
case bpf_ctx_range_till(struct __sk_buff, family, local_port):
case bpf_ctx_range(struct __sk_buff, data_meta):
case bpf_ctx_range(struct __sk_buff, flow_keys):
+ case bpf_ctx_range(struct __sk_buff, tstamp):
return false;
}
@@ -5874,6 +5881,7 @@ static bool tc_cls_act_is_valid_access(int off, int size,
case bpf_ctx_range(struct __sk_buff, priority):
case bpf_ctx_range(struct __sk_buff, tc_classid):
case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
+ case bpf_ctx_range(struct __sk_buff, tstamp):
break;
default:
return false;
@@ -6093,6 +6101,7 @@ static bool sk_skb_is_valid_access(int off, int size,
case bpf_ctx_range(struct __sk_buff, tc_classid):
case bpf_ctx_range(struct __sk_buff, data_meta):
case bpf_ctx_range(struct __sk_buff, flow_keys):
+ case bpf_ctx_range(struct __sk_buff, tstamp):
return false;
}
@@ -6179,6 +6188,7 @@ static bool flow_dissector_is_valid_access(int off, int size,
case bpf_ctx_range(struct __sk_buff, tc_classid):
case bpf_ctx_range(struct __sk_buff, data_meta):
case bpf_ctx_range_till(struct __sk_buff, family, local_port):
+ case bpf_ctx_range(struct __sk_buff, tstamp):
return false;
}
@@ -6488,6 +6498,22 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
*insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
si->src_reg, off);
break;
+
+ case offsetof(struct __sk_buff, tstamp):
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tstamp) != 8);
+
+ if (type == BPF_WRITE)
+ *insn++ = BPF_STX_MEM(BPF_DW,
+ si->dst_reg, si->src_reg,
+ bpf_target_off(struct sk_buff,
+ tstamp, 8,
+ target_size));
+ else
+ *insn++ = BPF_LDX_MEM(BPF_DW,
+ si->dst_reg, si->src_reg,
+ bpf_target_off(struct sk_buff,
+ tstamp, 8,
+ target_size));
}
return insn - insn_buf;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index c1554aa074659..23e2031a43d43 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -2468,6 +2468,7 @@ struct __sk_buff {
__u32 data_meta;
struct bpf_flow_keys *flow_keys;
+ __u64 tstamp;
};
struct bpf_tunnel_key {
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 54d16fbdef8b9..10b04a52904e0 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -2446,6 +2446,10 @@ static struct bpf_test tests[] = {
offsetof(struct __sk_buff, tc_index)),
BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
offsetof(struct __sk_buff, cb[3])),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, tstamp)),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, tstamp)),
BPF_EXIT_INSN(),
},
.errstr_unpriv = "",
--
2.19.1.1215.g8438c0b245-goog
^ permalink raw reply related [flat|nested] 10+ messages in thread* Re: [PATCH bpf-next] bpf: add read/write access to skb->tstamp from tc clsact progs 2018-11-21 0:18 [PATCH bpf-next] bpf: add read/write access to skb->tstamp from tc clsact progs Vlad Dumitrescu @ 2018-11-21 0:49 ` Eric Dumazet 2018-11-21 2:01 ` Willem de Bruijn 2018-11-21 2:40 ` Alexei Starovoitov 2018-11-22 19:39 ` [PATCH v2 bpf-next] bpf: add skb->tstamp r/w access from tc clsact and cg skb progs Vlad Dumitrescu 2 siblings, 1 reply; 10+ messages in thread From: Eric Dumazet @ 2018-11-21 0:49 UTC (permalink / raw) To: Vlad Dumitrescu, Alexei Starovoitov, Daniel Borkmann, netdev Cc: Eric Dumazet, Willem de Bruijn On 11/20/2018 04:18 PM, Vlad Dumitrescu wrote: > This could be used to rate limit egress traffic in concert with a qdisc > which supports Earliest Departure Time, such as FQ. > > Signed-off-by: Vlad Dumitrescu <vladum@google.com> > --- > include/uapi/linux/bpf.h | 1 + > net/core/filter.c | 26 +++++++++++++++++++++ > tools/include/uapi/linux/bpf.h | 1 + > tools/testing/selftests/bpf/test_verifier.c | 4 ++++ > 4 files changed, 32 insertions(+) > Awesome, thanks Vlad Note that this also can be used to implement a delay (a la netem). Acked-by: Eric Dumazet <edumazet@google.com> ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH bpf-next] bpf: add read/write access to skb->tstamp from tc clsact progs 2018-11-21 0:49 ` Eric Dumazet @ 2018-11-21 2:01 ` Willem de Bruijn 0 siblings, 0 replies; 10+ messages in thread From: Willem de Bruijn @ 2018-11-21 2:01 UTC (permalink / raw) To: Eric Dumazet Cc: Vlad Dumitrescu, Alexei Starovoitov, Daniel Borkmann, Network Development, Eric Dumazet, Willem de Bruijn On Tue, Nov 20, 2018 at 8:22 PM Eric Dumazet <eric.dumazet@gmail.com> wrote: > > > > On 11/20/2018 04:18 PM, Vlad Dumitrescu wrote: > > This could be used to rate limit egress traffic in concert with a qdisc > > which supports Earliest Departure Time, such as FQ. > > > > Signed-off-by: Vlad Dumitrescu <vladum@google.com> > > --- > > include/uapi/linux/bpf.h | 1 + > > net/core/filter.c | 26 +++++++++++++++++++++ > > tools/include/uapi/linux/bpf.h | 1 + > > tools/testing/selftests/bpf/test_verifier.c | 4 ++++ > > 4 files changed, 32 insertions(+) > > > > Awesome, thanks Vlad > > Note that this also can be used to implement a delay (a la netem). > > Acked-by: Eric Dumazet <edumazet@google.com> Acked-by: Willem de Bruijn <willemb@google.com> ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH bpf-next] bpf: add read/write access to skb->tstamp from tc clsact progs 2018-11-21 0:18 [PATCH bpf-next] bpf: add read/write access to skb->tstamp from tc clsact progs Vlad Dumitrescu 2018-11-21 0:49 ` Eric Dumazet @ 2018-11-21 2:40 ` Alexei Starovoitov 2018-11-21 13:08 ` Eric Dumazet 2018-11-22 19:39 ` [PATCH v2 bpf-next] bpf: add skb->tstamp r/w access from tc clsact and cg skb progs Vlad Dumitrescu 2 siblings, 1 reply; 10+ messages in thread From: Alexei Starovoitov @ 2018-11-21 2:40 UTC (permalink / raw) To: Vlad Dumitrescu Cc: Alexei Starovoitov, Daniel Borkmann, netdev, Eric Dumazet, Willem de Bruijn On Tue, Nov 20, 2018 at 07:18:48PM -0500, Vlad Dumitrescu wrote: > This could be used to rate limit egress traffic in concert with a qdisc > which supports Earliest Departure Time, such as FQ. > > Signed-off-by: Vlad Dumitrescu <vladum@google.com> > --- > include/uapi/linux/bpf.h | 1 + > net/core/filter.c | 26 +++++++++++++++++++++ > tools/include/uapi/linux/bpf.h | 1 + > tools/testing/selftests/bpf/test_verifier.c | 4 ++++ > 4 files changed, 32 insertions(+) > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index c1554aa074659..23e2031a43d43 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -2468,6 +2468,7 @@ struct __sk_buff { > > __u32 data_meta; > struct bpf_flow_keys *flow_keys; > + __u64 tstamp; > }; > > struct bpf_tunnel_key { > diff --git a/net/core/filter.c b/net/core/filter.c > index f6ca38a7d4332..c45155c8e519c 100644 > --- a/net/core/filter.c > +++ b/net/core/filter.c > @@ -5573,6 +5573,10 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type > if (size != sizeof(struct bpf_flow_keys *)) > return false; > break; > + case bpf_ctx_range(struct __sk_buff, tstamp): > + if (size != sizeof(__u64)) > + return false; > + break; > default: > /* Only narrow read access allowed for now. */ > if (type == BPF_WRITE) { > @@ -5600,6 +5604,7 @@ static bool sk_filter_is_valid_access(int off, int size, > case bpf_ctx_range(struct __sk_buff, data_end): > case bpf_ctx_range(struct __sk_buff, flow_keys): > case bpf_ctx_range_till(struct __sk_buff, family, local_port): > + case bpf_ctx_range(struct __sk_buff, tstamp): > return false; > } > > @@ -5624,6 +5629,7 @@ static bool cg_skb_is_valid_access(int off, int size, > case bpf_ctx_range(struct __sk_buff, tc_classid): > case bpf_ctx_range(struct __sk_buff, data_meta): > case bpf_ctx_range(struct __sk_buff, flow_keys): > + case bpf_ctx_range(struct __sk_buff, tstamp): > return false; looks good to me. Any particular reason you decided to disable it for cg_skb ? It seems to me the same EDT approach will work from cgroup-bpf skb hooks just as well and then we can have neat way of controlling traffic per-container instead of tc-clsbpf global. If you're already on cgroup v2 it will save you a lot of classifier cycles, since you'd be able to group apps by cgroup instead of relying on ip only. ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH bpf-next] bpf: add read/write access to skb->tstamp from tc clsact progs 2018-11-21 2:40 ` Alexei Starovoitov @ 2018-11-21 13:08 ` Eric Dumazet 2018-11-21 18:48 ` Vlad Dumitrescu 0 siblings, 1 reply; 10+ messages in thread From: Eric Dumazet @ 2018-11-21 13:08 UTC (permalink / raw) To: Alexei Starovoitov, Vlad Dumitrescu Cc: Alexei Starovoitov, Daniel Borkmann, netdev, Eric Dumazet, Willem de Bruijn On 11/20/2018 06:40 PM, Alexei Starovoitov wrote: > > looks good to me. > > Any particular reason you decided to disable it for cg_skb ? > It seems to me the same EDT approach will work from > cgroup-bpf skb hooks just as well and then we can have neat > way of controlling traffic per-container instead of tc-clsbpf global. > If you're already on cgroup v2 it will save you a lot of classifier > cycles, since you'd be able to group apps by cgroup > instead of relying on ip only. Vlad first wrote a complete version, but we felt explaining the _why_ was probably harder. No particular reason, other than having to write more tests perhaps. ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH bpf-next] bpf: add read/write access to skb->tstamp from tc clsact progs 2018-11-21 13:08 ` Eric Dumazet @ 2018-11-21 18:48 ` Vlad Dumitrescu 2018-11-21 22:46 ` Alexei Starovoitov 2018-11-21 22:57 ` Daniel Borkmann 0 siblings, 2 replies; 10+ messages in thread From: Vlad Dumitrescu @ 2018-11-21 18:48 UTC (permalink / raw) To: eric.dumazet, alexei.starovoitov Cc: Vlad Dumitrescu, ast, Daniel Borkmann, netdev, edumazet, willemb On Wed, Nov 21, 2018 at 5:08 AM Eric Dumazet <eric.dumazet@gmail.com> wrote: > > > > On 11/20/2018 06:40 PM, Alexei Starovoitov wrote: > > > > > looks good to me. > > > > Any particular reason you decided to disable it for cg_skb ? > > It seems to me the same EDT approach will work from > > cgroup-bpf skb hooks just as well and then we can have neat > > way of controlling traffic per-container instead of tc-clsbpf global. > > If you're already on cgroup v2 it will save you a lot of classifier > > cycles, since you'd be able to group apps by cgroup > > instead of relying on ip only. > > Vlad first wrote a complete version, but we felt explaining the _why_ > was probably harder. > > No particular reason, other than having to write more tests perhaps. This sounds reasonable to me. I can prepare a v2. Any concerns regarding capabilities? For example data and data_end are only available to CAP_SYS_ADMIN. Note that enforcement of this would be done by a global component later in the pipeline (e.g., FQ qdisc). Any opinions on sk_filter, lwt, and sk_skb before I send v2? ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH bpf-next] bpf: add read/write access to skb->tstamp from tc clsact progs 2018-11-21 18:48 ` Vlad Dumitrescu @ 2018-11-21 22:46 ` Alexei Starovoitov 2018-11-21 22:57 ` Daniel Borkmann 1 sibling, 0 replies; 10+ messages in thread From: Alexei Starovoitov @ 2018-11-21 22:46 UTC (permalink / raw) To: Vlad Dumitrescu Cc: eric.dumazet, Vlad Dumitrescu, ast, Daniel Borkmann, netdev, edumazet, willemb On Wed, Nov 21, 2018 at 10:48:21AM -0800, Vlad Dumitrescu wrote: > On Wed, Nov 21, 2018 at 5:08 AM Eric Dumazet <eric.dumazet@gmail.com> wrote: > > > > > > > > On 11/20/2018 06:40 PM, Alexei Starovoitov wrote: > > > > > > > > looks good to me. > > > > > > Any particular reason you decided to disable it for cg_skb ? > > > It seems to me the same EDT approach will work from > > > cgroup-bpf skb hooks just as well and then we can have neat > > > way of controlling traffic per-container instead of tc-clsbpf global. > > > If you're already on cgroup v2 it will save you a lot of classifier > > > cycles, since you'd be able to group apps by cgroup > > > instead of relying on ip only. > > > > Vlad first wrote a complete version, but we felt explaining the _why_ > > was probably harder. > > > > No particular reason, other than having to write more tests perhaps. > > This sounds reasonable to me. I can prepare a v2. thank you > Any concerns regarding capabilities? For example data and data_end are > only available to CAP_SYS_ADMIN. Note that enforcement of this would > be done by a global component later in the pipeline (e.g., FQ qdisc). I'd do cap_sys_admin for now, since i'm not sure whether any tstamp values will be acceptable to fq. > Any opinions on sk_filter, lwt, and sk_skb before I send v2? sk_filter not appealing, since it's too late in the stack. lwt could be interesting, but I'd wait until first user appears. sk_skb - useful, but it requires more work. We'll follow up to that sk_skb with our own patches. Thanks! ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH bpf-next] bpf: add read/write access to skb->tstamp from tc clsact progs 2018-11-21 18:48 ` Vlad Dumitrescu 2018-11-21 22:46 ` Alexei Starovoitov @ 2018-11-21 22:57 ` Daniel Borkmann 1 sibling, 0 replies; 10+ messages in thread From: Daniel Borkmann @ 2018-11-21 22:57 UTC (permalink / raw) To: Vlad Dumitrescu, eric.dumazet, alexei.starovoitov Cc: Vlad Dumitrescu, ast, netdev, edumazet, willemb On 11/21/2018 07:48 PM, Vlad Dumitrescu wrote: > On Wed, Nov 21, 2018 at 5:08 AM Eric Dumazet <eric.dumazet@gmail.com> wrote: >> On 11/20/2018 06:40 PM, Alexei Starovoitov wrote: >>> >>> looks good to me. >>> >>> Any particular reason you decided to disable it for cg_skb ? >>> It seems to me the same EDT approach will work from >>> cgroup-bpf skb hooks just as well and then we can have neat >>> way of controlling traffic per-container instead of tc-clsbpf global. >>> If you're already on cgroup v2 it will save you a lot of classifier >>> cycles, since you'd be able to group apps by cgroup >>> instead of relying on ip only. >> >> Vlad first wrote a complete version, but we felt explaining the _why_ >> was probably harder. >> >> No particular reason, other than having to write more tests perhaps. > > This sounds reasonable to me. I can prepare a v2. > > Any concerns regarding capabilities? For example data and data_end are > only available to CAP_SYS_ADMIN. Note that enforcement of this would > be done by a global component later in the pipeline (e.g., FQ qdisc). cg_skb_is_valid_access() has the CAP_SYS_ADMIN enforcement for direct packet access since cg_skb can also run from unprivileged. Makes sense to do the same for skb->tstamp for the STX_MEM part at least. > Any opinions on sk_filter, lwt, and sk_skb before I send v2? I'd probably leave that out for the time being if there is no concrete use at this point. Thanks, Daniel ^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH v2 bpf-next] bpf: add skb->tstamp r/w access from tc clsact and cg skb progs 2018-11-21 0:18 [PATCH bpf-next] bpf: add read/write access to skb->tstamp from tc clsact progs Vlad Dumitrescu 2018-11-21 0:49 ` Eric Dumazet 2018-11-21 2:40 ` Alexei Starovoitov @ 2018-11-22 19:39 ` Vlad Dumitrescu 2018-11-22 23:49 ` Alexei Starovoitov 2 siblings, 1 reply; 10+ messages in thread From: Vlad Dumitrescu @ 2018-11-22 19:39 UTC (permalink / raw) To: Alexei Starovoitov, Daniel Borkmann, netdev Cc: Eric Dumazet, Willem de Bruijn, Vlad Dumitrescu This could be used to rate limit egress traffic in concert with a qdisc which supports Earliest Departure Time, such as FQ. Write access from cg skb progs only with CAP_SYS_ADMIN, since the value will be used by downstream qdiscs. It might make sense to relax this. Changes v1 -> v2: - allow access from cg skb, write only with CAP_SYS_ADMIN Signed-off-by: Vlad Dumitrescu <vladum@google.com> --- include/uapi/linux/bpf.h | 1 + net/core/filter.c | 29 +++++++++++++++++++++ tools/include/uapi/linux/bpf.h | 1 + tools/testing/selftests/bpf/test_verifier.c | 29 +++++++++++++++++++++ 4 files changed, 60 insertions(+) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index c1554aa074659..23e2031a43d43 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2468,6 +2468,7 @@ struct __sk_buff { __u32 data_meta; struct bpf_flow_keys *flow_keys; + __u64 tstamp; }; struct bpf_tunnel_key { diff --git a/net/core/filter.c b/net/core/filter.c index f6ca38a7d4332..65dc13aeca7c4 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5573,6 +5573,10 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type if (size != sizeof(struct bpf_flow_keys *)) return false; break; + case bpf_ctx_range(struct __sk_buff, tstamp): + if (size != sizeof(__u64)) + return false; + break; default: /* Only narrow read access allowed for now. */ if (type == BPF_WRITE) { @@ -5600,6 +5604,7 @@ static bool sk_filter_is_valid_access(int off, int size, case bpf_ctx_range(struct __sk_buff, data_end): case bpf_ctx_range(struct __sk_buff, flow_keys): case bpf_ctx_range_till(struct __sk_buff, family, local_port): + case bpf_ctx_range(struct __sk_buff, tstamp): return false; } @@ -5638,6 +5643,10 @@ static bool cg_skb_is_valid_access(int off, int size, case bpf_ctx_range(struct __sk_buff, priority): case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]): break; + case bpf_ctx_range(struct __sk_buff, tstamp): + if (!capable(CAP_SYS_ADMIN)) + return false; + break; default: return false; } @@ -5665,6 +5674,7 @@ static bool lwt_is_valid_access(int off, int size, case bpf_ctx_range_till(struct __sk_buff, family, local_port): case bpf_ctx_range(struct __sk_buff, data_meta): case bpf_ctx_range(struct __sk_buff, flow_keys): + case bpf_ctx_range(struct __sk_buff, tstamp): return false; } @@ -5874,6 +5884,7 @@ static bool tc_cls_act_is_valid_access(int off, int size, case bpf_ctx_range(struct __sk_buff, priority): case bpf_ctx_range(struct __sk_buff, tc_classid): case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]): + case bpf_ctx_range(struct __sk_buff, tstamp): break; default: return false; @@ -6093,6 +6104,7 @@ static bool sk_skb_is_valid_access(int off, int size, case bpf_ctx_range(struct __sk_buff, tc_classid): case bpf_ctx_range(struct __sk_buff, data_meta): case bpf_ctx_range(struct __sk_buff, flow_keys): + case bpf_ctx_range(struct __sk_buff, tstamp): return false; } @@ -6179,6 +6191,7 @@ static bool flow_dissector_is_valid_access(int off, int size, case bpf_ctx_range(struct __sk_buff, tc_classid): case bpf_ctx_range(struct __sk_buff, data_meta): case bpf_ctx_range_till(struct __sk_buff, family, local_port): + case bpf_ctx_range(struct __sk_buff, tstamp): return false; } @@ -6488,6 +6501,22 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, si->src_reg, off); break; + + case offsetof(struct __sk_buff, tstamp): + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tstamp) != 8); + + if (type == BPF_WRITE) + *insn++ = BPF_STX_MEM(BPF_DW, + si->dst_reg, si->src_reg, + bpf_target_off(struct sk_buff, + tstamp, 8, + target_size)); + else + *insn++ = BPF_LDX_MEM(BPF_DW, + si->dst_reg, si->src_reg, + bpf_target_off(struct sk_buff, + tstamp, 8, + target_size)); } return insn - insn_buf; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index c1554aa074659..23e2031a43d43 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -2468,6 +2468,7 @@ struct __sk_buff { __u32 data_meta; struct bpf_flow_keys *flow_keys; + __u64 tstamp; }; struct bpf_tunnel_key { diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 54d16fbdef8b9..537a8f91af02d 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -2446,6 +2446,10 @@ static struct bpf_test tests[] = { offsetof(struct __sk_buff, tc_index)), BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, offsetof(struct __sk_buff, cb[3])), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, tstamp)), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, tstamp)), BPF_EXIT_INSN(), }, .errstr_unpriv = "", @@ -5297,6 +5301,31 @@ static struct bpf_test tests[] = { .errstr_unpriv = "R2 leaks addr into helper function", .prog_type = BPF_PROG_TYPE_CGROUP_SKB, }, + { + "write tstamp from CGROUP_SKB", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, tstamp)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "invalid bpf_context access off=152 size=8", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + }, + { + "read tstamp from CGROUP_SKB", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, tstamp)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + }, { "multiple registers share map_lookup_elem result", .insns = { -- 2.19.1.1215.g8438c0b245-goog ^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [PATCH v2 bpf-next] bpf: add skb->tstamp r/w access from tc clsact and cg skb progs 2018-11-22 19:39 ` [PATCH v2 bpf-next] bpf: add skb->tstamp r/w access from tc clsact and cg skb progs Vlad Dumitrescu @ 2018-11-22 23:49 ` Alexei Starovoitov 0 siblings, 0 replies; 10+ messages in thread From: Alexei Starovoitov @ 2018-11-22 23:49 UTC (permalink / raw) To: Vlad Dumitrescu Cc: Alexei Starovoitov, Daniel Borkmann, netdev, Eric Dumazet, Willem de Bruijn On Thu, Nov 22, 2018 at 02:39:16PM -0500, Vlad Dumitrescu wrote: > This could be used to rate limit egress traffic in concert with a qdisc > which supports Earliest Departure Time, such as FQ. > > Write access from cg skb progs only with CAP_SYS_ADMIN, since the value > will be used by downstream qdiscs. It might make sense to relax this. > > Changes v1 -> v2: > - allow access from cg skb, write only with CAP_SYS_ADMIN > > Signed-off-by: Vlad Dumitrescu <vladum@google.com> Applied to bpf-next. I copied Eric's and Willem's Acks from v1, since v2 is essentially the same. Thanks everyone! ^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2018-11-23 10:31 UTC | newest] Thread overview: 10+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2018-11-21 0:18 [PATCH bpf-next] bpf: add read/write access to skb->tstamp from tc clsact progs Vlad Dumitrescu 2018-11-21 0:49 ` Eric Dumazet 2018-11-21 2:01 ` Willem de Bruijn 2018-11-21 2:40 ` Alexei Starovoitov 2018-11-21 13:08 ` Eric Dumazet 2018-11-21 18:48 ` Vlad Dumitrescu 2018-11-21 22:46 ` Alexei Starovoitov 2018-11-21 22:57 ` Daniel Borkmann 2018-11-22 19:39 ` [PATCH v2 bpf-next] bpf: add skb->tstamp r/w access from tc clsact and cg skb progs Vlad Dumitrescu 2018-11-22 23:49 ` Alexei Starovoitov
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).