* [PATCH net-next V2 0/2] tun: allow to attahc eBPF filter
@ 2018-01-05 3:54 Jason Wang
2018-01-05 3:54 ` [PATCH net-next V2 1/2] tuntap: rename struct tun_steering_prog to struct tun_prog Jason Wang
2018-01-05 3:54 ` [PATCH net-next V2 2/2] tun: allow to attach ebpf socket filter Jason Wang
0 siblings, 2 replies; 5+ messages in thread
From: Jason Wang @ 2018-01-05 3:54 UTC (permalink / raw)
To: netdev, linux-kernel; +Cc: mst, willemb, Jason Wang
Hi all:
This series tries to implement eBPF socket filter for tun. This could
be used for implementing efficient virtio-net receive filter for
vhost-net.
Thanks
Changes from V1:
- trim more bytes if vlan tag is existed to make sure the packet
length does not exceed what is allowed by the filter.
Jason Wang (2):
tuntap: rename struct tun_steering_prog to struct tun_prog
tun: allow to attach ebpf socket filter
drivers/net/tun.c | 71 ++++++++++++++++++++++++++++++++-------------
include/uapi/linux/if_tun.h | 1 +
2 files changed, 52 insertions(+), 20 deletions(-)
--
2.7.4
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH net-next V2 1/2] tuntap: rename struct tun_steering_prog to struct tun_prog
2018-01-05 3:54 [PATCH net-next V2 0/2] tun: allow to attahc eBPF filter Jason Wang
@ 2018-01-05 3:54 ` Jason Wang
2018-01-05 3:54 ` [PATCH net-next V2 2/2] tun: allow to attach ebpf socket filter Jason Wang
1 sibling, 0 replies; 5+ messages in thread
From: Jason Wang @ 2018-01-05 3:54 UTC (permalink / raw)
To: netdev, linux-kernel; +Cc: mst, willemb, Jason Wang
To be reused by other eBPF program other than queue selection.
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
drivers/net/tun.c | 32 ++++++++++++++++----------------
1 file changed, 16 insertions(+), 16 deletions(-)
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index e367d631..0853829 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -195,7 +195,7 @@ struct tun_flow_entry {
#define TUN_NUM_FLOW_ENTRIES 1024
-struct tun_steering_prog {
+struct tun_prog {
struct rcu_head rcu;
struct bpf_prog *prog;
};
@@ -237,7 +237,7 @@ struct tun_struct {
u32 rx_batched;
struct tun_pcpu_stats __percpu *pcpu_stats;
struct bpf_prog __rcu *xdp_prog;
- struct tun_steering_prog __rcu *steering_prog;
+ struct tun_prog __rcu *steering_prog;
};
static int tun_napi_receive(struct napi_struct *napi, int budget)
@@ -571,7 +571,7 @@ static u16 tun_automq_select_queue(struct tun_struct *tun, struct sk_buff *skb)
static u16 tun_ebpf_select_queue(struct tun_struct *tun, struct sk_buff *skb)
{
- struct tun_steering_prog *prog;
+ struct tun_prog *prog;
u16 ret = 0;
prog = rcu_dereference(tun->steering_prog);
@@ -2027,19 +2027,18 @@ static ssize_t tun_chr_read_iter(struct kiocb *iocb, struct iov_iter *to)
return ret;
}
-static void tun_steering_prog_free(struct rcu_head *rcu)
+static void tun_prog_free(struct rcu_head *rcu)
{
- struct tun_steering_prog *prog = container_of(rcu,
- struct tun_steering_prog, rcu);
+ struct tun_prog *prog = container_of(rcu, struct tun_prog, rcu);
bpf_prog_destroy(prog->prog);
kfree(prog);
}
-static int __tun_set_steering_ebpf(struct tun_struct *tun,
- struct bpf_prog *prog)
+static int __tun_set_ebpf(struct tun_struct *tun, struct tun_prog **prog_p,
+ struct bpf_prog *prog)
{
- struct tun_steering_prog *old, *new = NULL;
+ struct tun_prog *old, *new = NULL;
if (prog) {
new = kmalloc(sizeof(*new), GFP_KERNEL);
@@ -2049,13 +2048,13 @@ static int __tun_set_steering_ebpf(struct tun_struct *tun,
}
spin_lock_bh(&tun->lock);
- old = rcu_dereference_protected(tun->steering_prog,
+ old = rcu_dereference_protected(*prog_p,
lockdep_is_held(&tun->lock));
- rcu_assign_pointer(tun->steering_prog, new);
+ rcu_assign_pointer(*prog_p, new);
spin_unlock_bh(&tun->lock);
if (old)
- call_rcu(&old->rcu, tun_steering_prog_free);
+ call_rcu(&old->rcu, tun_prog_free);
return 0;
}
@@ -2068,7 +2067,7 @@ static void tun_free_netdev(struct net_device *dev)
free_percpu(tun->pcpu_stats);
tun_flow_uninit(tun);
security_tun_dev_free_security(tun->security);
- __tun_set_steering_ebpf(tun, NULL);
+ __tun_set_ebpf(tun, &tun->steering_prog, NULL);
}
static void tun_setup(struct net_device *dev)
@@ -2550,7 +2549,8 @@ static int tun_set_queue(struct file *file, struct ifreq *ifr)
return ret;
}
-static int tun_set_steering_ebpf(struct tun_struct *tun, void __user *data)
+static int tun_set_ebpf(struct tun_struct *tun, struct tun_prog **prog_p,
+ void __user *data)
{
struct bpf_prog *prog;
int fd;
@@ -2566,7 +2566,7 @@ static int tun_set_steering_ebpf(struct tun_struct *tun, void __user *data)
return PTR_ERR(prog);
}
- return __tun_set_steering_ebpf(tun, prog);
+ return __tun_set_ebpf(tun, prog_p, prog);
}
static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
@@ -2846,7 +2846,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
break;
case TUNSETSTEERINGEBPF:
- ret = tun_set_steering_ebpf(tun, argp);
+ ret = tun_set_ebpf(tun, &tun->steering_prog, argp);
break;
default:
--
2.7.4
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH net-next V2 2/2] tun: allow to attach ebpf socket filter
2018-01-05 3:54 [PATCH net-next V2 0/2] tun: allow to attahc eBPF filter Jason Wang
2018-01-05 3:54 ` [PATCH net-next V2 1/2] tuntap: rename struct tun_steering_prog to struct tun_prog Jason Wang
@ 2018-01-05 3:54 ` Jason Wang
2018-01-05 16:21 ` Willem de Bruijn
1 sibling, 1 reply; 5+ messages in thread
From: Jason Wang @ 2018-01-05 3:54 UTC (permalink / raw)
To: netdev, linux-kernel; +Cc: mst, willemb, Jason Wang
This patch allows userspace to attach eBPF filter to tun. This will
allow to implement VM dataplane filtering in a more efficient way
compared to cBPF filter by allowing either qemu or libvirt to
attach eBPF filter to tun.
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
drivers/net/tun.c | 39 +++++++++++++++++++++++++++++++++++----
include/uapi/linux/if_tun.h | 1 +
2 files changed, 36 insertions(+), 4 deletions(-)
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 0853829..9fc8b70 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -238,6 +238,12 @@ struct tun_struct {
struct tun_pcpu_stats __percpu *pcpu_stats;
struct bpf_prog __rcu *xdp_prog;
struct tun_prog __rcu *steering_prog;
+ struct tun_prog __rcu *filter_prog;
+};
+
+struct veth {
+ __be16 h_vlan_proto;
+ __be16 h_vlan_TCI;
};
static int tun_napi_receive(struct napi_struct *napi, int budget)
@@ -984,12 +990,25 @@ static void tun_automq_xmit(struct tun_struct *tun, struct sk_buff *skb)
#endif
}
+static unsigned int run_ebpf_filter(struct tun_struct *tun,
+ struct sk_buff *skb,
+ int len)
+{
+ struct tun_prog *prog = rcu_dereference(tun->filter_prog);
+
+ if (prog)
+ len = bpf_prog_run_clear_cb(prog->prog, skb);
+
+ return len;
+}
+
/* Net device start xmit */
static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct tun_struct *tun = netdev_priv(dev);
int txq = skb->queue_mapping;
struct tun_file *tfile;
+ int len = skb->len;
rcu_read_lock();
tfile = rcu_dereference(tun->tfiles[txq]);
@@ -1015,6 +1034,16 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
sk_filter(tfile->socket.sk, skb))
goto drop;
+ len = run_ebpf_filter(tun, skb, len);
+
+ /* Trim extra bytes since we may inster vlan proto & TCI
+ * in tun_put_user().
+ */
+ if (skb_vlan_tag_present(skb))
+ len -= skb_vlan_tag_present(skb) ? sizeof(struct veth) : 0;
+ if (len <= 0 || pskb_trim(skb, len))
+ goto drop;
+
if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC)))
goto drop;
@@ -1904,10 +1933,7 @@ static ssize_t tun_put_user(struct tun_struct *tun,
if (vlan_hlen) {
int ret;
- struct {
- __be16 h_vlan_proto;
- __be16 h_vlan_TCI;
- } veth;
+ struct veth veth;
veth.h_vlan_proto = skb->vlan_proto;
veth.h_vlan_TCI = htons(skb_vlan_tag_get(skb));
@@ -2068,6 +2094,7 @@ static void tun_free_netdev(struct net_device *dev)
tun_flow_uninit(tun);
security_tun_dev_free_security(tun->security);
__tun_set_ebpf(tun, &tun->steering_prog, NULL);
+ __tun_set_ebpf(tun, &tun->filter_prog, NULL);
}
static void tun_setup(struct net_device *dev)
@@ -2849,6 +2876,10 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
ret = tun_set_ebpf(tun, &tun->steering_prog, argp);
break;
+ case TUNSETFILTEREBPF:
+ ret = tun_set_ebpf(tun, &tun->filter_prog, argp);
+ break;
+
default:
ret = -EINVAL;
break;
diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h
index fb38c17..ee432cd 100644
--- a/include/uapi/linux/if_tun.h
+++ b/include/uapi/linux/if_tun.h
@@ -58,6 +58,7 @@
#define TUNSETVNETBE _IOW('T', 222, int)
#define TUNGETVNETBE _IOR('T', 223, int)
#define TUNSETSTEERINGEBPF _IOR('T', 224, int)
+#define TUNSETFILTEREBPF _IOR('T', 225, int)
/* TUNSETIFF ifr flags */
#define IFF_TUN 0x0001
--
2.7.4
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH net-next V2 2/2] tun: allow to attach ebpf socket filter
2018-01-05 3:54 ` [PATCH net-next V2 2/2] tun: allow to attach ebpf socket filter Jason Wang
@ 2018-01-05 16:21 ` Willem de Bruijn
2018-01-08 3:55 ` Jason Wang
0 siblings, 1 reply; 5+ messages in thread
From: Willem de Bruijn @ 2018-01-05 16:21 UTC (permalink / raw)
To: Jason Wang
Cc: Network Development, LKML, Michael S. Tsirkin, Willem de Bruijn
On Fri, Jan 5, 2018 at 4:54 AM, Jason Wang <jasowang@redhat.com> wrote:
> This patch allows userspace to attach eBPF filter to tun. This will
> allow to implement VM dataplane filtering in a more efficient way
> compared to cBPF filter by allowing either qemu or libvirt to
> attach eBPF filter to tun.
>
> Signed-off-by: Jason Wang <jasowang@redhat.com>
> ---
> drivers/net/tun.c | 39 +++++++++++++++++++++++++++++++++++----
> include/uapi/linux/if_tun.h | 1 +
> 2 files changed, 36 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> index 0853829..9fc8b70 100644
> --- a/drivers/net/tun.c
> +++ b/drivers/net/tun.c
> @@ -238,6 +238,12 @@ struct tun_struct {
> struct tun_pcpu_stats __percpu *pcpu_stats;
> struct bpf_prog __rcu *xdp_prog;
> struct tun_prog __rcu *steering_prog;
> + struct tun_prog __rcu *filter_prog;
> +};
> +
> +struct veth {
> + __be16 h_vlan_proto;
> + __be16 h_vlan_TCI;
> };
>
> static int tun_napi_receive(struct napi_struct *napi, int budget)
> @@ -984,12 +990,25 @@ static void tun_automq_xmit(struct tun_struct *tun, struct sk_buff *skb)
> #endif
> }
>
> +static unsigned int run_ebpf_filter(struct tun_struct *tun,
> + struct sk_buff *skb,
> + int len)
> +{
> + struct tun_prog *prog = rcu_dereference(tun->filter_prog);
> +
> + if (prog)
> + len = bpf_prog_run_clear_cb(prog->prog, skb);
> +
> + return len;
> +}
> +
> /* Net device start xmit */
> static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
> {
> struct tun_struct *tun = netdev_priv(dev);
> int txq = skb->queue_mapping;
> struct tun_file *tfile;
> + int len = skb->len;
>
> rcu_read_lock();
> tfile = rcu_dereference(tun->tfiles[txq]);
> @@ -1015,6 +1034,16 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
> sk_filter(tfile->socket.sk, skb))
> goto drop;
>
> + len = run_ebpf_filter(tun, skb, len);
> +
> + /* Trim extra bytes since we may inster vlan proto & TCI
inster -> insert
> + * in tun_put_user().
> + */
> + if (skb_vlan_tag_present(skb))
> + len -= skb_vlan_tag_present(skb) ? sizeof(struct veth) : 0;
no need for testing skb_vlan_tag_present twice.
more importantly, why trim these bytes unconditionally?
only if the filter trims a packet to a length shorter than the the minimum
could this cause problems. sk_filter_trim_cap with a lower bound avoids
that: skb_vlan_tag_present(skb) ? sizeof(struct vlan_ethhdr) : 0;
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH net-next V2 2/2] tun: allow to attach ebpf socket filter
2018-01-05 16:21 ` Willem de Bruijn
@ 2018-01-08 3:55 ` Jason Wang
0 siblings, 0 replies; 5+ messages in thread
From: Jason Wang @ 2018-01-08 3:55 UTC (permalink / raw)
To: Willem de Bruijn
Cc: Network Development, LKML, Michael S. Tsirkin, Willem de Bruijn
On 2018年01月06日 00:21, Willem de Bruijn wrote:
> On Fri, Jan 5, 2018 at 4:54 AM, Jason Wang <jasowang@redhat.com> wrote:
>> This patch allows userspace to attach eBPF filter to tun. This will
>> allow to implement VM dataplane filtering in a more efficient way
>> compared to cBPF filter by allowing either qemu or libvirt to
>> attach eBPF filter to tun.
>>
>> Signed-off-by: Jason Wang <jasowang@redhat.com>
>> ---
>> drivers/net/tun.c | 39 +++++++++++++++++++++++++++++++++++----
>> include/uapi/linux/if_tun.h | 1 +
>> 2 files changed, 36 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
>> index 0853829..9fc8b70 100644
>> --- a/drivers/net/tun.c
>> +++ b/drivers/net/tun.c
>> @@ -238,6 +238,12 @@ struct tun_struct {
>> struct tun_pcpu_stats __percpu *pcpu_stats;
>> struct bpf_prog __rcu *xdp_prog;
>> struct tun_prog __rcu *steering_prog;
>> + struct tun_prog __rcu *filter_prog;
>> +};
>> +
>> +struct veth {
>> + __be16 h_vlan_proto;
>> + __be16 h_vlan_TCI;
>> };
>>
>> static int tun_napi_receive(struct napi_struct *napi, int budget)
>> @@ -984,12 +990,25 @@ static void tun_automq_xmit(struct tun_struct *tun, struct sk_buff *skb)
>> #endif
>> }
>>
>> +static unsigned int run_ebpf_filter(struct tun_struct *tun,
>> + struct sk_buff *skb,
>> + int len)
>> +{
>> + struct tun_prog *prog = rcu_dereference(tun->filter_prog);
>> +
>> + if (prog)
>> + len = bpf_prog_run_clear_cb(prog->prog, skb);
>> +
>> + return len;
>> +}
>> +
>> /* Net device start xmit */
>> static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
>> {
>> struct tun_struct *tun = netdev_priv(dev);
>> int txq = skb->queue_mapping;
>> struct tun_file *tfile;
>> + int len = skb->len;
>>
>> rcu_read_lock();
>> tfile = rcu_dereference(tun->tfiles[txq]);
>> @@ -1015,6 +1034,16 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
>> sk_filter(tfile->socket.sk, skb))
>> goto drop;
>>
>> + len = run_ebpf_filter(tun, skb, len);
>> +
>> + /* Trim extra bytes since we may inster vlan proto & TCI
> inster -> insert
Will fix.
>
>> + * in tun_put_user().
>> + */
>> + if (skb_vlan_tag_present(skb))
>> + len -= skb_vlan_tag_present(skb) ? sizeof(struct veth) : 0;
> no need for testing skb_vlan_tag_present twice.
Right.
> more importantly, why trim these bytes unconditionally?
>
> only if the filter trims a packet to a length shorter than the the minimum
> could this cause problems. sk_filter_trim_cap with a lower bound avoids
> that: skb_vlan_tag_present(skb) ? sizeof(struct vlan_ethhdr) : 0;
The problem is, if the filter want to trim to packet to 50 bytes, we may
get 54 bytes if vlan tag is existed. This seems wrong.
Thanks
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2018-01-08 3:55 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2018-01-05 3:54 [PATCH net-next V2 0/2] tun: allow to attahc eBPF filter Jason Wang
2018-01-05 3:54 ` [PATCH net-next V2 1/2] tuntap: rename struct tun_steering_prog to struct tun_prog Jason Wang
2018-01-05 3:54 ` [PATCH net-next V2 2/2] tun: allow to attach ebpf socket filter Jason Wang
2018-01-05 16:21 ` Willem de Bruijn
2018-01-08 3:55 ` Jason Wang
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).