netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH net-next V2 0/2] tun: allow to attahc eBPF filter
@ 2018-01-05  3:54 Jason Wang
  2018-01-05  3:54 ` [PATCH net-next V2 1/2] tuntap: rename struct tun_steering_prog to struct tun_prog Jason Wang
  2018-01-05  3:54 ` [PATCH net-next V2 2/2] tun: allow to attach ebpf socket filter Jason Wang
  0 siblings, 2 replies; 5+ messages in thread
From: Jason Wang @ 2018-01-05  3:54 UTC (permalink / raw)
  To: netdev, linux-kernel; +Cc: mst, willemb, Jason Wang

Hi all:

This series tries to implement eBPF socket filter for tun. This could
be used for implementing efficient virtio-net receive filter for
vhost-net.

Thanks

Changes from V1:
- trim more bytes if vlan tag is existed to make sure the packet
  length does not exceed what is allowed by the filter.

Jason Wang (2):
  tuntap: rename struct tun_steering_prog to struct tun_prog
  tun: allow to attach ebpf socket filter

 drivers/net/tun.c           | 71 ++++++++++++++++++++++++++++++++-------------
 include/uapi/linux/if_tun.h |  1 +
 2 files changed, 52 insertions(+), 20 deletions(-)

-- 
2.7.4

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH net-next V2 1/2] tuntap: rename struct tun_steering_prog to struct tun_prog
  2018-01-05  3:54 [PATCH net-next V2 0/2] tun: allow to attahc eBPF filter Jason Wang
@ 2018-01-05  3:54 ` Jason Wang
  2018-01-05  3:54 ` [PATCH net-next V2 2/2] tun: allow to attach ebpf socket filter Jason Wang
  1 sibling, 0 replies; 5+ messages in thread
From: Jason Wang @ 2018-01-05  3:54 UTC (permalink / raw)
  To: netdev, linux-kernel; +Cc: mst, willemb, Jason Wang

To be reused by other eBPF program other than queue selection.

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/net/tun.c | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index e367d631..0853829 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -195,7 +195,7 @@ struct tun_flow_entry {
 
 #define TUN_NUM_FLOW_ENTRIES 1024
 
-struct tun_steering_prog {
+struct tun_prog {
 	struct rcu_head rcu;
 	struct bpf_prog *prog;
 };
@@ -237,7 +237,7 @@ struct tun_struct {
 	u32 rx_batched;
 	struct tun_pcpu_stats __percpu *pcpu_stats;
 	struct bpf_prog __rcu *xdp_prog;
-	struct tun_steering_prog __rcu *steering_prog;
+	struct tun_prog __rcu *steering_prog;
 };
 
 static int tun_napi_receive(struct napi_struct *napi, int budget)
@@ -571,7 +571,7 @@ static u16 tun_automq_select_queue(struct tun_struct *tun, struct sk_buff *skb)
 
 static u16 tun_ebpf_select_queue(struct tun_struct *tun, struct sk_buff *skb)
 {
-	struct tun_steering_prog *prog;
+	struct tun_prog *prog;
 	u16 ret = 0;
 
 	prog = rcu_dereference(tun->steering_prog);
@@ -2027,19 +2027,18 @@ static ssize_t tun_chr_read_iter(struct kiocb *iocb, struct iov_iter *to)
 	return ret;
 }
 
-static void tun_steering_prog_free(struct rcu_head *rcu)
+static void tun_prog_free(struct rcu_head *rcu)
 {
-	struct tun_steering_prog *prog = container_of(rcu,
-					 struct tun_steering_prog, rcu);
+	struct tun_prog *prog = container_of(rcu, struct tun_prog, rcu);
 
 	bpf_prog_destroy(prog->prog);
 	kfree(prog);
 }
 
-static int __tun_set_steering_ebpf(struct tun_struct *tun,
-				   struct bpf_prog *prog)
+static int __tun_set_ebpf(struct tun_struct *tun, struct tun_prog **prog_p,
+			  struct bpf_prog *prog)
 {
-	struct tun_steering_prog *old, *new = NULL;
+	struct tun_prog *old, *new = NULL;
 
 	if (prog) {
 		new = kmalloc(sizeof(*new), GFP_KERNEL);
@@ -2049,13 +2048,13 @@ static int __tun_set_steering_ebpf(struct tun_struct *tun,
 	}
 
 	spin_lock_bh(&tun->lock);
-	old = rcu_dereference_protected(tun->steering_prog,
+	old = rcu_dereference_protected(*prog_p,
 					lockdep_is_held(&tun->lock));
-	rcu_assign_pointer(tun->steering_prog, new);
+	rcu_assign_pointer(*prog_p, new);
 	spin_unlock_bh(&tun->lock);
 
 	if (old)
-		call_rcu(&old->rcu, tun_steering_prog_free);
+		call_rcu(&old->rcu, tun_prog_free);
 
 	return 0;
 }
@@ -2068,7 +2067,7 @@ static void tun_free_netdev(struct net_device *dev)
 	free_percpu(tun->pcpu_stats);
 	tun_flow_uninit(tun);
 	security_tun_dev_free_security(tun->security);
-	__tun_set_steering_ebpf(tun, NULL);
+	__tun_set_ebpf(tun, &tun->steering_prog, NULL);
 }
 
 static void tun_setup(struct net_device *dev)
@@ -2550,7 +2549,8 @@ static int tun_set_queue(struct file *file, struct ifreq *ifr)
 	return ret;
 }
 
-static int tun_set_steering_ebpf(struct tun_struct *tun, void __user *data)
+static int tun_set_ebpf(struct tun_struct *tun, struct tun_prog **prog_p,
+			void __user *data)
 {
 	struct bpf_prog *prog;
 	int fd;
@@ -2566,7 +2566,7 @@ static int tun_set_steering_ebpf(struct tun_struct *tun, void __user *data)
 			return PTR_ERR(prog);
 	}
 
-	return __tun_set_steering_ebpf(tun, prog);
+	return __tun_set_ebpf(tun, prog_p, prog);
 }
 
 static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
@@ -2846,7 +2846,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 		break;
 
 	case TUNSETSTEERINGEBPF:
-		ret = tun_set_steering_ebpf(tun, argp);
+		ret = tun_set_ebpf(tun, &tun->steering_prog, argp);
 		break;
 
 	default:
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH net-next V2 2/2] tun: allow to attach ebpf socket filter
  2018-01-05  3:54 [PATCH net-next V2 0/2] tun: allow to attahc eBPF filter Jason Wang
  2018-01-05  3:54 ` [PATCH net-next V2 1/2] tuntap: rename struct tun_steering_prog to struct tun_prog Jason Wang
@ 2018-01-05  3:54 ` Jason Wang
  2018-01-05 16:21   ` Willem de Bruijn
  1 sibling, 1 reply; 5+ messages in thread
From: Jason Wang @ 2018-01-05  3:54 UTC (permalink / raw)
  To: netdev, linux-kernel; +Cc: mst, willemb, Jason Wang

This patch allows userspace to attach eBPF filter to tun. This will
allow to implement VM dataplane filtering in a more efficient way
compared to cBPF filter by allowing either qemu or libvirt to
attach eBPF filter to tun.

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/net/tun.c           | 39 +++++++++++++++++++++++++++++++++++----
 include/uapi/linux/if_tun.h |  1 +
 2 files changed, 36 insertions(+), 4 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 0853829..9fc8b70 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -238,6 +238,12 @@ struct tun_struct {
 	struct tun_pcpu_stats __percpu *pcpu_stats;
 	struct bpf_prog __rcu *xdp_prog;
 	struct tun_prog __rcu *steering_prog;
+	struct tun_prog __rcu *filter_prog;
+};
+
+struct veth {
+	__be16 h_vlan_proto;
+	__be16 h_vlan_TCI;
 };
 
 static int tun_napi_receive(struct napi_struct *napi, int budget)
@@ -984,12 +990,25 @@ static void tun_automq_xmit(struct tun_struct *tun, struct sk_buff *skb)
 #endif
 }
 
+static unsigned int run_ebpf_filter(struct tun_struct *tun,
+				    struct sk_buff *skb,
+				    int len)
+{
+	struct tun_prog *prog = rcu_dereference(tun->filter_prog);
+
+	if (prog)
+		len = bpf_prog_run_clear_cb(prog->prog, skb);
+
+	return len;
+}
+
 /* Net device start xmit */
 static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct tun_struct *tun = netdev_priv(dev);
 	int txq = skb->queue_mapping;
 	struct tun_file *tfile;
+	int len = skb->len;
 
 	rcu_read_lock();
 	tfile = rcu_dereference(tun->tfiles[txq]);
@@ -1015,6 +1034,16 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
 	    sk_filter(tfile->socket.sk, skb))
 		goto drop;
 
+	len = run_ebpf_filter(tun, skb, len);
+
+	/* Trim extra bytes since we may inster vlan proto & TCI
+	 * in tun_put_user().
+	 */
+	if (skb_vlan_tag_present(skb))
+		len -= skb_vlan_tag_present(skb) ? sizeof(struct veth) : 0;
+	if (len <= 0 || pskb_trim(skb, len))
+		goto drop;
+
 	if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC)))
 		goto drop;
 
@@ -1904,10 +1933,7 @@ static ssize_t tun_put_user(struct tun_struct *tun,
 
 	if (vlan_hlen) {
 		int ret;
-		struct {
-			__be16 h_vlan_proto;
-			__be16 h_vlan_TCI;
-		} veth;
+		struct veth veth;
 
 		veth.h_vlan_proto = skb->vlan_proto;
 		veth.h_vlan_TCI = htons(skb_vlan_tag_get(skb));
@@ -2068,6 +2094,7 @@ static void tun_free_netdev(struct net_device *dev)
 	tun_flow_uninit(tun);
 	security_tun_dev_free_security(tun->security);
 	__tun_set_ebpf(tun, &tun->steering_prog, NULL);
+	__tun_set_ebpf(tun, &tun->filter_prog, NULL);
 }
 
 static void tun_setup(struct net_device *dev)
@@ -2849,6 +2876,10 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 		ret = tun_set_ebpf(tun, &tun->steering_prog, argp);
 		break;
 
+	case TUNSETFILTEREBPF:
+		ret = tun_set_ebpf(tun, &tun->filter_prog, argp);
+		break;
+
 	default:
 		ret = -EINVAL;
 		break;
diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h
index fb38c17..ee432cd 100644
--- a/include/uapi/linux/if_tun.h
+++ b/include/uapi/linux/if_tun.h
@@ -58,6 +58,7 @@
 #define TUNSETVNETBE _IOW('T', 222, int)
 #define TUNGETVNETBE _IOR('T', 223, int)
 #define TUNSETSTEERINGEBPF _IOR('T', 224, int)
+#define TUNSETFILTEREBPF _IOR('T', 225, int)
 
 /* TUNSETIFF ifr flags */
 #define IFF_TUN		0x0001
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH net-next V2 2/2] tun: allow to attach ebpf socket filter
  2018-01-05  3:54 ` [PATCH net-next V2 2/2] tun: allow to attach ebpf socket filter Jason Wang
@ 2018-01-05 16:21   ` Willem de Bruijn
  2018-01-08  3:55     ` Jason Wang
  0 siblings, 1 reply; 5+ messages in thread
From: Willem de Bruijn @ 2018-01-05 16:21 UTC (permalink / raw)
  To: Jason Wang
  Cc: Network Development, LKML, Michael S. Tsirkin, Willem de Bruijn

On Fri, Jan 5, 2018 at 4:54 AM, Jason Wang <jasowang@redhat.com> wrote:
> This patch allows userspace to attach eBPF filter to tun. This will
> allow to implement VM dataplane filtering in a more efficient way
> compared to cBPF filter by allowing either qemu or libvirt to
> attach eBPF filter to tun.
>
> Signed-off-by: Jason Wang <jasowang@redhat.com>
> ---
>  drivers/net/tun.c           | 39 +++++++++++++++++++++++++++++++++++----
>  include/uapi/linux/if_tun.h |  1 +
>  2 files changed, 36 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> index 0853829..9fc8b70 100644
> --- a/drivers/net/tun.c
> +++ b/drivers/net/tun.c
> @@ -238,6 +238,12 @@ struct tun_struct {
>         struct tun_pcpu_stats __percpu *pcpu_stats;
>         struct bpf_prog __rcu *xdp_prog;
>         struct tun_prog __rcu *steering_prog;
> +       struct tun_prog __rcu *filter_prog;
> +};
> +
> +struct veth {
> +       __be16 h_vlan_proto;
> +       __be16 h_vlan_TCI;
>  };
>
>  static int tun_napi_receive(struct napi_struct *napi, int budget)
> @@ -984,12 +990,25 @@ static void tun_automq_xmit(struct tun_struct *tun, struct sk_buff *skb)
>  #endif
>  }
>
> +static unsigned int run_ebpf_filter(struct tun_struct *tun,
> +                                   struct sk_buff *skb,
> +                                   int len)
> +{
> +       struct tun_prog *prog = rcu_dereference(tun->filter_prog);
> +
> +       if (prog)
> +               len = bpf_prog_run_clear_cb(prog->prog, skb);
> +
> +       return len;
> +}
> +
>  /* Net device start xmit */
>  static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
>  {
>         struct tun_struct *tun = netdev_priv(dev);
>         int txq = skb->queue_mapping;
>         struct tun_file *tfile;
> +       int len = skb->len;
>
>         rcu_read_lock();
>         tfile = rcu_dereference(tun->tfiles[txq]);
> @@ -1015,6 +1034,16 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
>             sk_filter(tfile->socket.sk, skb))
>                 goto drop;
>
> +       len = run_ebpf_filter(tun, skb, len);
> +
> +       /* Trim extra bytes since we may inster vlan proto & TCI

inster -> insert

> +        * in tun_put_user().
> +        */
> +       if (skb_vlan_tag_present(skb))
> +               len -= skb_vlan_tag_present(skb) ? sizeof(struct veth) : 0;

no need for testing skb_vlan_tag_present twice.

more importantly, why trim these bytes unconditionally?

only if the filter trims a packet to a length shorter than the the minimum
could this cause problems. sk_filter_trim_cap with a lower bound avoids
that: skb_vlan_tag_present(skb) ? sizeof(struct vlan_ethhdr) : 0;

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH net-next V2 2/2] tun: allow to attach ebpf socket filter
  2018-01-05 16:21   ` Willem de Bruijn
@ 2018-01-08  3:55     ` Jason Wang
  0 siblings, 0 replies; 5+ messages in thread
From: Jason Wang @ 2018-01-08  3:55 UTC (permalink / raw)
  To: Willem de Bruijn
  Cc: Network Development, LKML, Michael S. Tsirkin, Willem de Bruijn



On 2018年01月06日 00:21, Willem de Bruijn wrote:
> On Fri, Jan 5, 2018 at 4:54 AM, Jason Wang <jasowang@redhat.com> wrote:
>> This patch allows userspace to attach eBPF filter to tun. This will
>> allow to implement VM dataplane filtering in a more efficient way
>> compared to cBPF filter by allowing either qemu or libvirt to
>> attach eBPF filter to tun.
>>
>> Signed-off-by: Jason Wang <jasowang@redhat.com>
>> ---
>>   drivers/net/tun.c           | 39 +++++++++++++++++++++++++++++++++++----
>>   include/uapi/linux/if_tun.h |  1 +
>>   2 files changed, 36 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
>> index 0853829..9fc8b70 100644
>> --- a/drivers/net/tun.c
>> +++ b/drivers/net/tun.c
>> @@ -238,6 +238,12 @@ struct tun_struct {
>>          struct tun_pcpu_stats __percpu *pcpu_stats;
>>          struct bpf_prog __rcu *xdp_prog;
>>          struct tun_prog __rcu *steering_prog;
>> +       struct tun_prog __rcu *filter_prog;
>> +};
>> +
>> +struct veth {
>> +       __be16 h_vlan_proto;
>> +       __be16 h_vlan_TCI;
>>   };
>>
>>   static int tun_napi_receive(struct napi_struct *napi, int budget)
>> @@ -984,12 +990,25 @@ static void tun_automq_xmit(struct tun_struct *tun, struct sk_buff *skb)
>>   #endif
>>   }
>>
>> +static unsigned int run_ebpf_filter(struct tun_struct *tun,
>> +                                   struct sk_buff *skb,
>> +                                   int len)
>> +{
>> +       struct tun_prog *prog = rcu_dereference(tun->filter_prog);
>> +
>> +       if (prog)
>> +               len = bpf_prog_run_clear_cb(prog->prog, skb);
>> +
>> +       return len;
>> +}
>> +
>>   /* Net device start xmit */
>>   static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
>>   {
>>          struct tun_struct *tun = netdev_priv(dev);
>>          int txq = skb->queue_mapping;
>>          struct tun_file *tfile;
>> +       int len = skb->len;
>>
>>          rcu_read_lock();
>>          tfile = rcu_dereference(tun->tfiles[txq]);
>> @@ -1015,6 +1034,16 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
>>              sk_filter(tfile->socket.sk, skb))
>>                  goto drop;
>>
>> +       len = run_ebpf_filter(tun, skb, len);
>> +
>> +       /* Trim extra bytes since we may inster vlan proto & TCI
> inster -> insert

Will fix.

>
>> +        * in tun_put_user().
>> +        */
>> +       if (skb_vlan_tag_present(skb))
>> +               len -= skb_vlan_tag_present(skb) ? sizeof(struct veth) : 0;
> no need for testing skb_vlan_tag_present twice.

Right.

> more importantly, why trim these bytes unconditionally?
>
> only if the filter trims a packet to a length shorter than the the minimum
> could this cause problems. sk_filter_trim_cap with a lower bound avoids
> that: skb_vlan_tag_present(skb) ? sizeof(struct vlan_ethhdr) : 0;

The problem is, if the filter want to trim to packet to 50 bytes, we may 
get 54 bytes if vlan tag is existed. This seems wrong.

Thanks

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2018-01-08  3:55 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2018-01-05  3:54 [PATCH net-next V2 0/2] tun: allow to attahc eBPF filter Jason Wang
2018-01-05  3:54 ` [PATCH net-next V2 1/2] tuntap: rename struct tun_steering_prog to struct tun_prog Jason Wang
2018-01-05  3:54 ` [PATCH net-next V2 2/2] tun: allow to attach ebpf socket filter Jason Wang
2018-01-05 16:21   ` Willem de Bruijn
2018-01-08  3:55     ` Jason Wang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).