[PATCH v2 net-next] net: filter: export pkt_type

netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH v2 net-next] net: filter: export pkt_type_offset() helper
@ 2014-09-03 21:08 Denis Kirjanov
  2014-09-03 22:01 ` Daniel Borkmann
  0 siblings, 1 reply; 21+ messages in thread
From: Denis Kirjanov @ 2014-09-03 21:08 UTC (permalink / raw)
  To: netdev
  Cc: Denis Kirjanov, Markos Chandras, Martin Schwidefsky,
	Daniel Borkmann, Alexei Starovoitov

Currently we have 2 pkt_type_offset functions doing
the same thing and spread across the architecture files.
Let's use the generic helper routine.

Signed-off-by: Denis Kirjanov <kda@linux-powerpc.org>
Cc: Markos Chandras <markos.chandras@imgtec.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Daniel Borkmann <dborkman@redhat.com>
Cc: Alexei Starovoitov <alexei.starovoitov@gmail.com>

V2: Initialize the pkt_type_offset through the pure_initcall()
---
 arch/mips/net/bpf_jit.c      | 27 ++-------------------------
 arch/s390/net/bpf_jit_comp.c | 31 -------------------------------
 include/linux/filter.h       |  2 ++
 net/core/filter.c            | 16 +++++++++++++---
 4 files changed, 17 insertions(+), 59 deletions(-)

diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c
index 05a5661..bb9b53f 100644
--- a/arch/mips/net/bpf_jit.c
+++ b/arch/mips/net/bpf_jit.c
@@ -765,27 +765,6 @@ static u64 jit_get_skb_w(struct sk_buff *skb, unsigned offset)
 	return (u64)err << 32 | ntohl(ret);
 }
 
-#ifdef __BIG_ENDIAN_BITFIELD
-#define PKT_TYPE_MAX	(7 << 5)
-#else
-#define PKT_TYPE_MAX	7
-#endif
-static int pkt_type_offset(void)
-{
-	struct sk_buff skb_probe = {
-		.pkt_type = ~0,
-	};
-	u8 *ct = (u8 *)&skb_probe;
-	unsigned int off;
-
-	for (off = 0; off < sizeof(struct sk_buff); off++) {
-		if (ct[off] == PKT_TYPE_MAX)
-			return off;
-	}
-	pr_err_once("Please fix pkt_type_offset(), as pkt_type couldn't be found\n");
-	return -1;
-}
-
 static int build_body(struct jit_ctx *ctx)
 {
 	void *load_func[] = {jit_get_skb_b, jit_get_skb_h, jit_get_skb_w};
@@ -1332,11 +1311,9 @@ jmp_cmp:
 		case BPF_ANC | SKF_AD_PKTTYPE:
 			ctx->flags |= SEEN_SKB;
 
-			off = pkt_type_offset();
-
-			if (off < 0)
+			if (pkt_type_offset < 0)
 				return -1;
-			emit_load_byte(r_tmp, r_skb, off, ctx);
+			emit_load_byte(r_tmp, r_skb, pkt_type_offset, ctx);
 			/* Keep only the last 3 bits */
 			emit_andi(r_A, r_tmp, PKT_TYPE_MAX, ctx);
 #ifdef __BIG_ENDIAN_BITFIELD
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 61e45b7..caa0cab 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -223,37 +223,6 @@ static void bpf_jit_epilogue(struct bpf_jit *jit)
 	EMIT2(0x07fe);
 }
 
-/* Helper to find the offset of pkt_type in sk_buff
- * Make sure its still a 3bit field starting at the MSBs within a byte.
- */
-#define PKT_TYPE_MAX 0xe0
-static int pkt_type_offset;
-
-static int __init bpf_pkt_type_offset_init(void)
-{
-	struct sk_buff skb_probe = {
-		.pkt_type = ~0,
-	};
-	char *ct = (char *)&skb_probe;
-	int off;
-
-	pkt_type_offset = -1;
-	for (off = 0; off < sizeof(struct sk_buff); off++) {
-		if (!ct[off])
-			continue;
-		if (ct[off] == PKT_TYPE_MAX)
-			pkt_type_offset = off;
-		else {
-			/* Found non matching bit pattern, fix needed. */
-			WARN_ON_ONCE(1);
-			pkt_type_offset = -1;
-			return -1;
-		}
-	}
-	return 0;
-}
-device_initcall(bpf_pkt_type_offset_init);
-
 /*
  * make sure we dont leak kernel information to user
  */
diff --git a/include/linux/filter.h b/include/linux/filter.h
index a5227ab..6814b54 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -424,6 +424,8 @@ static inline void *bpf_load_pointer(const struct sk_buff *skb, int k,
 	return bpf_internal_load_pointer_neg_helper(skb, k, size);
 }
 
+extern __read_mostly int pkt_type_offset;
+
 #ifdef CONFIG_BPF_JIT
 #include <stdarg.h>
 #include <linux/linkage.h>
diff --git a/net/core/filter.c b/net/core/filter.c
index d814b8a..edd17f1 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -96,21 +96,29 @@ EXPORT_SYMBOL(sk_filter);
 #else
 #define PKT_TYPE_MAX	7
 #endif
-static unsigned int pkt_type_offset(void)
+
+__read_mostly int pkt_type_offset;
+
+static int __init init_pkt_type_offset(void)
 {
 	struct sk_buff skb_probe = { .pkt_type = ~0, };
 	u8 *ct = (u8 *) &skb_probe;
 	unsigned int off;
 
+	pkt_type_offset = -1;
 	for (off = 0; off < sizeof(struct sk_buff); off++) {
-		if (ct[off] == PKT_TYPE_MAX)
+		if (ct[off] == PKT_TYPE_MAX) {
+			pkt_type_offset = off;
 			return off;
+		}
 	}
 
 	pr_err_once("Please fix %s, as pkt_type couldn't be found!\n", __func__);
 	return -1;
 }
 
+pure_initcall(init_pkt_type_offset);
+
 static u64 __skb_get_pay_offset(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
 {
 	return __skb_get_poff((struct sk_buff *)(unsigned long) ctx);
@@ -190,8 +198,10 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 		break;
 
 	case SKF_AD_OFF + SKF_AD_PKTTYPE:
+		if (pkt_type_offset < 0)
+			return false;
 		*insn = BPF_LDX_MEM(BPF_B, BPF_REG_A, BPF_REG_CTX,
-				    pkt_type_offset());
+				    pkt_type_offset);
 		if (insn->off < 0)
 			return false;
 		insn++;
-- 
2.0.0

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 net-next] net: filter: export pkt_type_offset() helper
  2014-09-03 21:08 [PATCH v2 net-next] net: filter: export pkt_type_offset() helper Denis Kirjanov
@ 2014-09-03 22:01 ` Daniel Borkmann
  2014-09-03 23:14   ` Alexei Starovoitov
  0 siblings, 1 reply; 21+ messages in thread
From: Daniel Borkmann @ 2014-09-03 22:01 UTC (permalink / raw)
  To: Denis Kirjanov
  Cc: netdev, Markos Chandras, Martin Schwidefsky, Alexei Starovoitov

On 09/03/2014 11:08 PM, Denis Kirjanov wrote:
> Currently we have 2 pkt_type_offset functions doing
> the same thing and spread across the architecture files.
> Let's use the generic helper routine.
>
> Signed-off-by: Denis Kirjanov <kda@linux-powerpc.org>
> Cc: Markos Chandras <markos.chandras@imgtec.com>
> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
> Cc: Daniel Borkmann <dborkman@redhat.com>
> Cc: Alexei Starovoitov <alexei.starovoitov@gmail.com>
>
> V2: Initialize the pkt_type_offset through the pure_initcall()
> ---
>   arch/mips/net/bpf_jit.c      | 27 ++-------------------------
>   arch/s390/net/bpf_jit_comp.c | 31 -------------------------------
>   include/linux/filter.h       |  2 ++
>   net/core/filter.c            | 16 +++++++++++++---
>   4 files changed, 17 insertions(+), 59 deletions(-)
>
> diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c
> index 05a5661..bb9b53f 100644
> --- a/arch/mips/net/bpf_jit.c
> +++ b/arch/mips/net/bpf_jit.c
> @@ -765,27 +765,6 @@ static u64 jit_get_skb_w(struct sk_buff *skb, unsigned offset)
>   	return (u64)err << 32 | ntohl(ret);
>   }
>
> -#ifdef __BIG_ENDIAN_BITFIELD
> -#define PKT_TYPE_MAX	(7 << 5)
> -#else
> -#define PKT_TYPE_MAX	7
> -#endif
> -static int pkt_type_offset(void)
> -{
> -	struct sk_buff skb_probe = {
> -		.pkt_type = ~0,
> -	};
> -	u8 *ct = (u8 *)&skb_probe;
> -	unsigned int off;
> -
> -	for (off = 0; off < sizeof(struct sk_buff); off++) {
> -		if (ct[off] == PKT_TYPE_MAX)
> -			return off;
> -	}
> -	pr_err_once("Please fix pkt_type_offset(), as pkt_type couldn't be found\n");
> -	return -1;
> -}
> -
>   static int build_body(struct jit_ctx *ctx)
>   {
>   	void *load_func[] = {jit_get_skb_b, jit_get_skb_h, jit_get_skb_w};
> @@ -1332,11 +1311,9 @@ jmp_cmp:
>   		case BPF_ANC | SKF_AD_PKTTYPE:
>   			ctx->flags |= SEEN_SKB;
>
> -			off = pkt_type_offset();
> -
> -			if (off < 0)
> +			if (pkt_type_offset < 0)
>   				return -1;
> -			emit_load_byte(r_tmp, r_skb, off, ctx);
> +			emit_load_byte(r_tmp, r_skb, pkt_type_offset, ctx);
>   			/* Keep only the last 3 bits */
>   			emit_andi(r_A, r_tmp, PKT_TYPE_MAX, ctx);
>   #ifdef __BIG_ENDIAN_BITFIELD
> diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
> index 61e45b7..caa0cab 100644
> --- a/arch/s390/net/bpf_jit_comp.c
> +++ b/arch/s390/net/bpf_jit_comp.c
> @@ -223,37 +223,6 @@ static void bpf_jit_epilogue(struct bpf_jit *jit)
>   	EMIT2(0x07fe);
>   }
>
> -/* Helper to find the offset of pkt_type in sk_buff
> - * Make sure its still a 3bit field starting at the MSBs within a byte.
> - */
> -#define PKT_TYPE_MAX 0xe0
> -static int pkt_type_offset;
> -
> -static int __init bpf_pkt_type_offset_init(void)
> -{
> -	struct sk_buff skb_probe = {
> -		.pkt_type = ~0,
> -	};
> -	char *ct = (char *)&skb_probe;
> -	int off;
> -
> -	pkt_type_offset = -1;
> -	for (off = 0; off < sizeof(struct sk_buff); off++) {
> -		if (!ct[off])
> -			continue;
> -		if (ct[off] == PKT_TYPE_MAX)
> -			pkt_type_offset = off;
> -		else {
> -			/* Found non matching bit pattern, fix needed. */
> -			WARN_ON_ONCE(1);
> -			pkt_type_offset = -1;
> -			return -1;
> -		}
> -	}
> -	return 0;
> -}
> -device_initcall(bpf_pkt_type_offset_init);
> -
>   /*
>    * make sure we dont leak kernel information to user
>    */
> diff --git a/include/linux/filter.h b/include/linux/filter.h
> index a5227ab..6814b54 100644
> --- a/include/linux/filter.h
> +++ b/include/linux/filter.h
> @@ -424,6 +424,8 @@ static inline void *bpf_load_pointer(const struct sk_buff *skb, int k,
>   	return bpf_internal_load_pointer_neg_helper(skb, k, size);
>   }
>
> +extern __read_mostly int pkt_type_offset;
> +

Nit:

extern unsigned int pkt_type_offset;

(Perhaps this should also rather be named bpf_pkt_type_offset to not
  cause any current/future name collisions when it's in the header?)

>   #ifdef CONFIG_BPF_JIT
>   #include <stdarg.h>
>   #include <linux/linkage.h>
> diff --git a/net/core/filter.c b/net/core/filter.c
> index d814b8a..edd17f1 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -96,21 +96,29 @@ EXPORT_SYMBOL(sk_filter);
>   #else
>   #define PKT_TYPE_MAX	7
>   #endif
> -static unsigned int pkt_type_offset(void)
> +
> +__read_mostly int pkt_type_offset;

Nit:

unsigned int pkt_type_offset __read_mostly;

> +static int __init init_pkt_type_offset(void)
>   {
>   	struct sk_buff skb_probe = { .pkt_type = ~0, };
>   	u8 *ct = (u8 *) &skb_probe;
>   	unsigned int off;
>
> +	pkt_type_offset = -1;
>   	for (off = 0; off < sizeof(struct sk_buff); off++) {
> -		if (ct[off] == PKT_TYPE_MAX)
> +		if (ct[off] == PKT_TYPE_MAX) {
> +			pkt_type_offset = off;
>   			return off;
> +		}
>   	}

Why not BUG_ON() when pkt_type_offset could not be found?

That way we would know that something is broken and you can
spare the checks for negative offsets everywhere ...

>   	pr_err_once("Please fix %s, as pkt_type couldn't be found!\n", __func__);
>   	return -1;
>   }
>
> +pure_initcall(init_pkt_type_offset);
> +
>   static u64 __skb_get_pay_offset(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
>   {
>   	return __skb_get_poff((struct sk_buff *)(unsigned long) ctx);
> @@ -190,8 +198,10 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
>   		break;
>
>   	case SKF_AD_OFF + SKF_AD_PKTTYPE:
> +		if (pkt_type_offset < 0)
> +			return false;
>   		*insn = BPF_LDX_MEM(BPF_B, BPF_REG_A, BPF_REG_CTX,
> -				    pkt_type_offset());
> +				    pkt_type_offset);
>   		if (insn->off < 0)
>   			return false;
>   		insn++;
>

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 net-next] net: filter: export pkt_type_offset() helper
  2014-09-03 22:01 ` Daniel Borkmann
@ 2014-09-03 23:14   ` Alexei Starovoitov
  2014-09-04  1:08     ` Eric Dumazet
  0 siblings, 1 reply; 21+ messages in thread
From: Alexei Starovoitov @ 2014-09-03 23:14 UTC (permalink / raw)
  To: Daniel Borkmann, Eric Dumazet
  Cc: Denis Kirjanov, netdev@vger.kernel.org, Markos Chandras,
	Martin Schwidefsky

On Wed, Sep 3, 2014 at 3:01 PM, Daniel Borkmann <dborkman@redhat.com> wrote:
> On 09/03/2014 11:08 PM, Denis Kirjanov wrote:
>>
>
>> +static int __init init_pkt_type_offset(void)
>>   {
>>         struct sk_buff skb_probe = { .pkt_type = ~0, };
>>         u8 *ct = (u8 *) &skb_probe;
>>         unsigned int off;
>>
>> +       pkt_type_offset = -1;
>>         for (off = 0; off < sizeof(struct sk_buff); off++) {
>> -               if (ct[off] == PKT_TYPE_MAX)
>> +               if (ct[off] == PKT_TYPE_MAX) {
>> +                       pkt_type_offset = off;
>>                         return off;
>> +               }
>>         }
>
>
> Why not BUG_ON() when pkt_type_offset could not be found?
>
> That way we would know that something is broken and you can
> spare the checks for negative offsets everywhere ...

also such BUG_ON will be right in the face, since the kernel
won't even boot :)
I guess we can only hit it if compiler changes and starts
doing crazing things with bitfields.
Eric, you're the original author of this function, thoughts?

>>         case SKF_AD_OFF + SKF_AD_PKTTYPE:
>> +               if (pkt_type_offset < 0)
>> +                       return false;

with BUG_ON we wouldn't need this check.

>>                 *insn = BPF_LDX_MEM(BPF_B, BPF_REG_A, BPF_REG_CTX,
>> -                                   pkt_type_offset());
>> +                                   pkt_type_offset);
>>                 if (insn->off < 0)
>>                         return false;

and this one too.
and other checks in arch/*/jit

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 net-next] net: filter: export pkt_type_offset() helper
  2014-09-03 23:14   ` Alexei Starovoitov
@ 2014-09-04  1:08     ` Eric Dumazet
  2014-09-04  1:25       ` Hannes Frederic Sowa
  0 siblings, 1 reply; 21+ messages in thread
From: Eric Dumazet @ 2014-09-04  1:08 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Daniel Borkmann, Eric Dumazet, Denis Kirjanov,
	netdev@vger.kernel.org, Markos Chandras, Martin Schwidefsky

On Wed, 2014-09-03 at 16:14 -0700, Alexei Starovoitov wrote:
> On Wed, Sep 3, 2014 at 3:01 PM, Daniel Borkmann <dborkman@redhat.com> wrote:
> > On 09/03/2014 11:08 PM, Denis Kirjanov wrote:
> >>
> >
> >> +static int __init init_pkt_type_offset(void)
> >>   {
> >>         struct sk_buff skb_probe = { .pkt_type = ~0, };
> >>         u8 *ct = (u8 *) &skb_probe;
> >>         unsigned int off;
> >>
> >> +       pkt_type_offset = -1;
> >>         for (off = 0; off < sizeof(struct sk_buff); off++) {
> >> -               if (ct[off] == PKT_TYPE_MAX)
> >> +               if (ct[off] == PKT_TYPE_MAX) {
> >> +                       pkt_type_offset = off;
> >>                         return off;
> >> +               }
> >>         }
> >
> >
> > Why not BUG_ON() when pkt_type_offset could not be found?
> >
> > That way we would know that something is broken and you can
> > spare the checks for negative offsets everywhere ...
> 
> also such BUG_ON will be right in the face, since the kernel
> won't even boot :)
> I guess we can only hit it if compiler changes and starts
> doing crazing things with bitfields.
> Eric, you're the original author of this function, thoughts?

At the time it was written, BPF JIT would have failed and we would
revert to interpreter, thus BUG() was not needed : I used one
pr_err_once()

Note that pkt_type could be moved easily in sk_buff definition, so a
BUG() would be OK after this patch, since the check is done as a
pure_initcall() at boot time.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 net-next] net: filter: export pkt_type_offset() helper
  2014-09-04  1:08     ` Eric Dumazet
@ 2014-09-04  1:25       ` Hannes Frederic Sowa
  2014-09-04  2:05         ` Eric Dumazet
  0 siblings, 1 reply; 21+ messages in thread
From: Hannes Frederic Sowa @ 2014-09-04  1:25 UTC (permalink / raw)
  To: Eric Dumazet, Alexei Starovoitov
  Cc: Daniel Borkmann, Eric Dumazet, Denis Kirjanov, netdev,
	Markos Chandras, Martin Schwidefsky

On Thu, Sep 4, 2014, at 03:08, Eric Dumazet wrote:
> On Wed, 2014-09-03 at 16:14 -0700, Alexei Starovoitov wrote:
> > On Wed, Sep 3, 2014 at 3:01 PM, Daniel Borkmann <dborkman@redhat.com> wrote:
> > > On 09/03/2014 11:08 PM, Denis Kirjanov wrote:
> > >>
> > >
> > >> +static int __init init_pkt_type_offset(void)
> > >>   {
> > >>         struct sk_buff skb_probe = { .pkt_type = ~0, };
> > >>         u8 *ct = (u8 *) &skb_probe;
> > >>         unsigned int off;
> > >>
> > >> +       pkt_type_offset = -1;
> > >>         for (off = 0; off < sizeof(struct sk_buff); off++) {
> > >> -               if (ct[off] == PKT_TYPE_MAX)
> > >> +               if (ct[off] == PKT_TYPE_MAX) {
> > >> +                       pkt_type_offset = off;
> > >>                         return off;
> > >> +               }
> > >>         }
> > >
> > >
> > > Why not BUG_ON() when pkt_type_offset could not be found?
> > >
> > > That way we would know that something is broken and you can
> > > spare the checks for negative offsets everywhere ...
> > 
> > also such BUG_ON will be right in the face, since the kernel
> > won't even boot :)
> > I guess we can only hit it if compiler changes and starts
> > doing crazing things with bitfields.
> > Eric, you're the original author of this function, thoughts?
> 
> At the time it was written, BPF JIT would have failed and we would
> revert to interpreter, thus BUG() was not needed : I used one
> pr_err_once()
> 
> Note that pkt_type could be moved easily in sk_buff definition, so a
> BUG() would be OK after this patch, since the check is done as a
> pure_initcall() at boot time.

Can't we add an address marker to struct sk_buff?

Several possibilities are available:

ptrdiff_t pkt_type_offset[0]  before the pkt_type flags field

If one wants to make it more expressive:
typedef struct {} mark_struct_offset;
and add
mark_struct_offset pkt_type_offset;
at appropriate places

Or maybe an anonymous union?

pkt_type_offset would become a simple offsetof(struct sk_buff,
pkt_type_offset) then and there is no need for BUG_ON then.

Bye,
Hannes

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 net-next] net: filter: export pkt_type_offset() helper
  2014-09-04  1:25       ` Hannes Frederic Sowa
@ 2014-09-04  2:05         ` Eric Dumazet
  2014-09-04  2:43           ` Alexei Starovoitov
  0 siblings, 1 reply; 21+ messages in thread
From: Eric Dumazet @ 2014-09-04  2:05 UTC (permalink / raw)
  To: Hannes Frederic Sowa
  Cc: Alexei Starovoitov, Daniel Borkmann, Eric Dumazet, Denis Kirjanov,
	netdev, Markos Chandras, Martin Schwidefsky

On Thu, 2014-09-04 at 03:25 +0200, Hannes Frederic Sowa wrote:

> Can't we add an address marker to struct sk_buff?
> 
> Several possibilities are available:
> 
> ptrdiff_t pkt_type_offset[0]  before the pkt_type flags field
> 
> If one wants to make it more expressive:
> typedef struct {} mark_struct_offset;
> and add
> mark_struct_offset pkt_type_offset;
> at appropriate places
> 
> Or maybe an anonymous union?
> 
> pkt_type_offset would become a simple offsetof(struct sk_buff,
> pkt_type_offset) then and there is no need for BUG_ON then.


You can try, and make sure this works on all gcc compilers, even the one
Andrew Morton uses.

And of course, you need to not introduce holes doing so.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 net-next] net: filter: export pkt_type_offset() helper
  2014-09-04  2:05         ` Eric Dumazet
@ 2014-09-04  2:43           ` Alexei Starovoitov
       [not found]             ` <CAHj3AVn8q-JS0Eq0QrfpBo-WfJ+-wXumhNoeDAdkPZC7LSeB5g@mail.gmail.com>
  0 siblings, 1 reply; 21+ messages in thread
From: Alexei Starovoitov @ 2014-09-04  2:43 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Hannes Frederic Sowa, Daniel Borkmann, Eric Dumazet,
	Denis Kirjanov, netdev@vger.kernel.org, Markos Chandras,
	Martin Schwidefsky

On Wed, Sep 3, 2014 at 7:05 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> On Thu, 2014-09-04 at 03:25 +0200, Hannes Frederic Sowa wrote:
>
>> Can't we add an address marker to struct sk_buff?
>>
>> Several possibilities are available:
>>
>> ptrdiff_t pkt_type_offset[0]  before the pkt_type flags field
>>
>> If one wants to make it more expressive:
>> typedef struct {} mark_struct_offset;
>> and add
>> mark_struct_offset pkt_type_offset;
>> at appropriate places
>>
>> Or maybe an anonymous union?
>>
>> pkt_type_offset would become a simple offsetof(struct sk_buff,
>> pkt_type_offset) then and there is no need for BUG_ON then.
>
>
> You can try, and make sure this works on all gcc compilers, even the one
> Andrew Morton uses.
>
> And of course, you need to not introduce holes doing so.

good points.
I think Hannes's idea is the best. It will help to get rid of helper altogether.
For my bpf syscall proposal I've used anonymous unions and tested
them with gcc 4.2 - 4.9 on x64/i386/arm32/sparc64 and clang.
All compilers were doing just fine. So it should work.

^ permalink raw reply	[flat|nested] 21+ messages in thread

[parent not found: <CAHj3AVn8q-JS0Eq0QrfpBo-WfJ+-wXumhNoeDAdkPZC7LSeB5g@mail.gmail.com>]

* Re: [PATCH v2 net-next] net: filter: export pkt_type_offset() helper
       [not found]             ` <CAHj3AVn8q-JS0Eq0QrfpBo-WfJ+-wXumhNoeDAdkPZC7LSeB5g@mail.gmail.com>
@ 2014-09-04 11:50               ` Hannes Frederic Sowa
  2014-09-04 13:09                 ` Eric Dumazet
  0 siblings, 1 reply; 21+ messages in thread
From: Hannes Frederic Sowa @ 2014-09-04 11:50 UTC (permalink / raw)
  To: Denis Kirjanov
  Cc: Alexei Starovoitov, Eric Dumazet, Daniel Borkmann, Eric Dumazet,
	Denis Kirjanov, netdev@vger.kernel.org, Markos Chandras,
	Martin Schwidefsky

On Do, 2014-09-04 at 10:09 +0400, Denis Kirjanov wrote:
> On Thursday, September 4, 2014, Alexei Starovoitov <
> alexei.starovoitov@gmail.com> wrote:
> 
> > On Wed, Sep 3, 2014 at 7:05 PM, Eric Dumazet <eric.dumazet@gmail.com
> > <javascript:;>> wrote:
> > > On Thu, 2014-09-04 at 03:25 +0200, Hannes Frederic Sowa wrote:
> > >
> > >> Can't we add an address marker to struct sk_buff?
> > >>
> > >> Several possibilities are available:
> > >>
> > >> ptrdiff_t pkt_type_offset[0]  before the pkt_type flags field
> > >>
> > >> If one wants to make it more expressive:
> > >> typedef struct {} mark_struct_offset;
> > >> and add
> > >> mark_struct_offset pkt_type_offset;
> > >> at appropriate places
> > >>
> > >> Or maybe an anonymous union?
> > >>
> > >> pkt_type_offset would become a simple offsetof(struct sk_buff,
> > >> pkt_type_offset) then and there is no need for BUG_ON then.
> > >
> > >
> > > You can try, and make sure this works on all gcc compilers, even the one
> > > Andrew Morton uses.
> > >
> > > And of course, you need to not introduce holes doing so.

Sure, it will work as expected. kmemleak uses the field[0] technique and
is already used in struct sk_buff.

I checked below patch with pahole and it doesn't change anything in size
or fieldoffsets.

> >
> > good points.
> > I think Hannes's idea is the best.
> 
> Agreed.
> 
> > It will help to get rid of helper altogether.
> > For my bpf syscall proposal I've used anonymous unions and tested
> > them with gcc 4.2 - 4.9 on x64/i386/arm32/sparc64 and clang.
> > All compilers were doing just fine. So it should work.
> 
> Nice. Alexei, care to send a patch then?

Denis, I thought about something like this, you can incorperate this in
your patch if you can give it a test and check for other architectures,
thanks!

diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c
index 05a5661..0fd3f95 100644
--- a/arch/mips/net/bpf_jit.c
+++ b/arch/mips/net/bpf_jit.c
@@ -765,27 +765,6 @@ static u64 jit_get_skb_w(struct sk_buff *skb,
unsigned offset)
 	return (u64)err << 32 | ntohl(ret);
 }
 
-#ifdef __BIG_ENDIAN_BITFIELD
-#define PKT_TYPE_MAX	(7 << 5)
-#else
-#define PKT_TYPE_MAX	7
-#endif
-static int pkt_type_offset(void)
-{
-	struct sk_buff skb_probe = {
-		.pkt_type = ~0,
-	};
-	u8 *ct = (u8 *)&skb_probe;
-	unsigned int off;
-
-	for (off = 0; off < sizeof(struct sk_buff); off++) {
-		if (ct[off] == PKT_TYPE_MAX)
-			return off;
-	}
-	pr_err_once("Please fix pkt_type_offset(), as pkt_type couldn't be
found\n");
-	return -1;
-}
-
 static int build_body(struct jit_ctx *ctx)
 {
 	void *load_func[] = {jit_get_skb_b, jit_get_skb_h, jit_get_skb_w};
@@ -1332,11 +1311,7 @@ jmp_cmp:
 		case BPF_ANC | SKF_AD_PKTTYPE:
 			ctx->flags |= SEEN_SKB;
 
-			off = pkt_type_offset();
-
-			if (off < 0)
-				return -1;
-			emit_load_byte(r_tmp, r_skb, off, ctx);
+			emit_load_byte(r_tmp, r_skb, skb_pkt_type_offset(), ctx);
 			/* Keep only the last 3 bits */
 			emit_andi(r_A, r_tmp, PKT_TYPE_MAX, ctx);
 #ifdef __BIG_ENDIAN_BITFIELD
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 61e45b7..8039bb8 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -223,37 +223,6 @@ static void bpf_jit_epilogue(struct bpf_jit *jit)
 	EMIT2(0x07fe);
 }
 
-/* Helper to find the offset of pkt_type in sk_buff
- * Make sure its still a 3bit field starting at the MSBs within a byte.
- */
-#define PKT_TYPE_MAX 0xe0
-static int pkt_type_offset;
-
-static int __init bpf_pkt_type_offset_init(void)
-{
-	struct sk_buff skb_probe = {
-		.pkt_type = ~0,
-	};
-	char *ct = (char *)&skb_probe;
-	int off;
-
-	pkt_type_offset = -1;
-	for (off = 0; off < sizeof(struct sk_buff); off++) {
-		if (!ct[off])
-			continue;
-		if (ct[off] == PKT_TYPE_MAX)
-			pkt_type_offset = off;
-		else {
-			/* Found non matching bit pattern, fix needed. */
-			WARN_ON_ONCE(1);
-			pkt_type_offset = -1;
-			return -1;
-		}
-	}
-	return 0;
-}
-device_initcall(bpf_pkt_type_offset_init);
-
 /*
  * make sure we dont leak kernel information to user
  */
@@ -753,12 +722,10 @@ call_fn:	/* lg %r1,<d(function)>(%r13) */
 		}
 		break;
 	case BPF_ANC | SKF_AD_PKTTYPE:
-		if (pkt_type_offset < 0)
-			goto out;
 		/* lhi %r5,0 */
 		EMIT4(0xa7580000);
 		/* ic %r5,<d(pkt_type_offset)>(%r2) */
-		EMIT4_DISP(0x43502000, pkt_type_offset);
+		EMIT4_DISP(0x43502000, skb_pkt_type_offset());
 		/* srl %r5,5 */
 		EMIT4_DISP(0x88500000, 5);
 		break;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 02529fc..87b86aa 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -548,6 +548,10 @@ struct sk_buff {
 				ip_summed:2,
 				nohdr:1,
 				nfctinfo:3;
+	/* __pkt_type_offset marks the offset of the bitfield pkt_type
+	 * contains, so bpf can relatively address it
+	 */
+	int			__pkt_type_offset[0];
 	__u8			pkt_type:3,
 				fclone:2,
 				ipvs_property:1,
@@ -647,6 +651,17 @@ struct sk_buff {
 #define SKB_ALLOC_FCLONE	0x01
 #define SKB_ALLOC_RX		0x02
 
+#ifdef __BIG_ENDIAN_BITFIELD
+#define PKT_TYPE_MAX   (7 << 5)
+#else
+#define PKT_TYPE_MAX   7
+#endif
+
+static inline size_t skb_pkt_type_offset(void)
+{
+	return offsetof(struct sk_buff, __pkt_type_offset);
+}
+
 /* Returns true if the skb was allocated from PFMEMALLOC reserves */
 static inline bool skb_pfmemalloc(const struct sk_buff *skb)
 {
diff --git a/net/core/filter.c b/net/core/filter.c
index d814b8a..ca64d7a 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -87,30 +87,6 @@ int sk_filter(struct sock *sk, struct sk_buff *skb)
 }
 EXPORT_SYMBOL(sk_filter);
 
-/* Helper to find the offset of pkt_type in sk_buff structure. We want
- * to make sure its still a 3bit field starting at a byte boundary;
- * taken from arch/x86/net/bpf_jit_comp.c.
- */
-#ifdef __BIG_ENDIAN_BITFIELD
-#define PKT_TYPE_MAX	(7 << 5)
-#else
-#define PKT_TYPE_MAX	7
-#endif
-static unsigned int pkt_type_offset(void)
-{
-	struct sk_buff skb_probe = { .pkt_type = ~0, };
-	u8 *ct = (u8 *) &skb_probe;
-	unsigned int off;
-
-	for (off = 0; off < sizeof(struct sk_buff); off++) {
-		if (ct[off] == PKT_TYPE_MAX)
-			return off;
-	}
-
-	pr_err_once("Please fix %s, as pkt_type couldn't be found!\n",
__func__);
-	return -1;
-}
-
 static u64 __skb_get_pay_offset(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
 {
 	return __skb_get_poff((struct sk_buff *)(unsigned long) ctx);
@@ -191,7 +167,7 @@ static bool convert_bpf_extensions(struct
sock_filter *fp,
 
 	case SKF_AD_OFF + SKF_AD_PKTTYPE:
 		*insn = BPF_LDX_MEM(BPF_B, BPF_REG_A, BPF_REG_CTX,
-				    pkt_type_offset());
+				    skb_pkt_type_offset());
 		if (insn->off < 0)
 			return false;
 		insn++;

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 net-next] net: filter: export pkt_type_offset() helper
  2014-09-04 11:50               ` Hannes Frederic Sowa
@ 2014-09-04 13:09                 ` Eric Dumazet
  2014-09-04 13:40                   ` Hannes Frederic Sowa
  0 siblings, 1 reply; 21+ messages in thread
From: Eric Dumazet @ 2014-09-04 13:09 UTC (permalink / raw)
  To: Hannes Frederic Sowa
  Cc: Denis Kirjanov, Alexei Starovoitov, Daniel Borkmann, Eric Dumazet,
	Denis Kirjanov, netdev@vger.kernel.org, Markos Chandras,
	Martin Schwidefsky

On Thu, 2014-09-04 at 13:50 +0200, Hannes Frederic Sowa wrote:

> 
> Denis, I thought about something like this, you can incorperate this in
> your patch if you can give it a test and check for other architectures,
> thanks!
> 


> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> index 02529fc..87b86aa 100644
> --- a/include/linux/skbuff.h
> +++ b/include/linux/skbuff.h
> @@ -548,6 +548,10 @@ struct sk_buff {
>  				ip_summed:2,
>  				nohdr:1,
>  				nfctinfo:3;
> +	/* __pkt_type_offset marks the offset of the bitfield pkt_type
> +	 * contains, so bpf can relatively address it
> +	 */
> +	int			__pkt_type_offset[0];

Why is it 'int', while the following uses __u8 ?

This means pkt_type has to stay at this bit offset, and nothing will
detect at compile time or execution time if someone did a reorg or added
a new field (it seems to be the trend lately)

__u8                          bar:4,
                              pkt_type:3,
...




>  	__u8			pkt_type:3,
>  				fclone:2,
>  				ipvs_property:1,
> @@ -647,6 +651,17 @@ struct sk_buff {
>  #define SKB_ALLOC_FCLONE	0x01
>  #define SKB_ALLOC_RX		0x02
>  

> +#ifdef __BIG_ENDIAN_BITFIELD
> +#define PKT_TYPE_MAX   (7 << 5)
> +#else
> +#define PKT_TYPE_MAX   7
> +#endif

It had a sense right before static int pkt_type_offset(void), but here,
a casual reader might be lost.

I am saying this because a reorder of the bit fields is probably needed
to speedup __copy_skb_header() : Grouping together bit fields could
allow optimized word copies instead of bit field manipulations.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 net-next] net: filter: export pkt_type_offset() helper
  2014-09-04 13:09                 ` Eric Dumazet
@ 2014-09-04 13:40                   ` Hannes Frederic Sowa
  2014-09-04 14:11                     ` Eric Dumazet
  0 siblings, 1 reply; 21+ messages in thread
From: Hannes Frederic Sowa @ 2014-09-04 13:40 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Denis Kirjanov, Alexei Starovoitov, Daniel Borkmann, Eric Dumazet,
	Denis Kirjanov, netdev@vger.kernel.org, Markos Chandras,
	Martin Schwidefsky

On Do, 2014-09-04 at 06:09 -0700, Eric Dumazet wrote:
> On Thu, 2014-09-04 at 13:50 +0200, Hannes Frederic Sowa wrote:
> 
> > 
> > Denis, I thought about something like this, you can incorperate this in
> > your patch if you can give it a test and check for other architectures,
> > thanks!
> > 
> 
> 
> > diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> > index 02529fc..87b86aa 100644
> > --- a/include/linux/skbuff.h
> > +++ b/include/linux/skbuff.h
> > @@ -548,6 +548,10 @@ struct sk_buff {
> >  				ip_summed:2,
> >  				nohdr:1,
> >  				nfctinfo:3;
> > +	/* __pkt_type_offset marks the offset of the bitfield pkt_type
> > +	 * contains, so bpf can relatively address it
> > +	 */
> > +	int			__pkt_type_offset[0];
> 
> Why is it 'int', while the following uses __u8 ?

The type does not matter at all. Actually I wanted to use an empty
struct but was afraid it might not work on older compilers and didn't
want to check that with each version.

> This means pkt_type has to stay at this bit offset, and nothing will
> detect at compile time or execution time if someone did a reorg or added
> a new field (it seems to be the trend lately)
> 
> __u8                          bar:4,
>                               pkt_type:3,
> ...

That would be fine, one just has to adapt the PKT_TYPE_MAX macros in
case on reorders, __pkt_type_offset only needs to stay just in front of
the bitfield pkt_type is located in. I think we should move them up into
struct sk_buff, so they belong together and people see that they need to
change those in case they reorder fields.

I still think about using anonymous unions, they might let us compute
the pkt_type mask at compiletime, I think.

Also, maybe we can compute offset at compiletime if we switch to
anonymous union.

static inline unsigned int skb_pkt_type_offset(void)
{
 unsigned int mask;
 struct sk_buff skb = {.pkt_type = ~0U};
 mask = skb.__flags2;
 BUILD_BUG_ON(!__builtin_constant_p(mask));
 return mask;
}

something like that, haven't checked if that works.

> 
> 
> 
> >  	__u8			pkt_type:3,
> >  				fclone:2,
> >  				ipvs_property:1,
> > @@ -647,6 +651,17 @@ struct sk_buff {
> >  #define SKB_ALLOC_FCLONE	0x01
> >  #define SKB_ALLOC_RX		0x02
> >  
> 
> > +#ifdef __BIG_ENDIAN_BITFIELD
> > +#define PKT_TYPE_MAX   (7 << 5)
> > +#else
> > +#define PKT_TYPE_MAX   7
> > +#endif
> 
> It had a sense right before static int pkt_type_offset(void), but here,
> a casual reader might be lost.

Do you think we should just move PKT_TYPE_MAX macros right to the
definition of __pkt_type_offset?

> I am saying this because a reorder of the bit fields is probably needed
> to speedup __copy_skb_header() : Grouping together bit fields could
> allow optimized word copies instead of bit field manipulations.

Yes, I agree with you. It should be obvious to adapt the macros in case
someones reorders sk_buff. Either BUILD_BUG_ON or very obvious comments
+ declarations.

Thanks,
Hannes

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 net-next] net: filter: export pkt_type_offset() helper
  2014-09-04 13:40                   ` Hannes Frederic Sowa
@ 2014-09-04 14:11                     ` Eric Dumazet
  2014-09-04 14:22                       ` Hannes Frederic Sowa
  2014-09-04 14:23                       ` David Laight
  0 siblings, 2 replies; 21+ messages in thread
From: Eric Dumazet @ 2014-09-04 14:11 UTC (permalink / raw)
  To: Hannes Frederic Sowa
  Cc: Denis Kirjanov, Alexei Starovoitov, Daniel Borkmann, Eric Dumazet,
	Denis Kirjanov, netdev@vger.kernel.org, Markos Chandras,
	Martin Schwidefsky

On Thu, 2014-09-04 at 15:40 +0200, Hannes Frederic Sowa wrote:

> The type does not matter at all. Actually I wanted to use an empty
> struct but was afraid it might not work on older compilers and didn't
> want to check that with each version.


It matters. Really.

$ cat try.c
#include <stdio.h>

struct S_int {
	char a;
	int offset[0];
	char b:1;
};

struct S_char {
	char a;
	char offset[0];
	char b:1;
};

int main()
{
	printf("sizeof(S_int) %zu\n", sizeof(struct S_int));
	printf("sizeof(S_char) %zu\n", sizeof(struct S_char));
	return 0;
}
$ gcc -o try try.c && ./try
sizeof(S_int) 8
sizeof(S_char) 2

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 net-next] net: filter: export pkt_type_offset() helper
  2014-09-04 14:11                     ` Eric Dumazet
@ 2014-09-04 14:22                       ` Hannes Frederic Sowa
  2014-09-04 14:23                       ` David Laight
  1 sibling, 0 replies; 21+ messages in thread
From: Hannes Frederic Sowa @ 2014-09-04 14:22 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Denis Kirjanov, Alexei Starovoitov, Daniel Borkmann, Eric Dumazet,
	Denis Kirjanov, netdev@vger.kernel.org, Markos Chandras,
	Martin Schwidefsky

On Do, 2014-09-04 at 07:11 -0700, Eric Dumazet wrote:
> On Thu, 2014-09-04 at 15:40 +0200, Hannes Frederic Sowa wrote:
> 
> > The type does not matter at all. Actually I wanted to use an empty
> > struct but was afraid it might not work on older compilers and didn't
> > want to check that with each version.
> 
> 
> It matters. Really.
> 
> $ cat try.c
> #include <stdio.h>
> 
> struct S_int {
> 	char a;
> 	int offset[0];
> 	char b:1;
> };
> 
> struct S_char {
> 	char a;
> 	char offset[0];
> 	char b:1;
> };
> 
> int main()
> {
> 	printf("sizeof(S_int) %zu\n", sizeof(struct S_int));
> 	printf("sizeof(S_char) %zu\n", sizeof(struct S_char));
> 	return 0;
> }
> $ gcc -o try try.c && ./try
> sizeof(S_int) 8
> sizeof(S_char) 2

Sure, compiler must make sure to correctly align structure for all
members (so it must round up). But this is not an issue with struct
sk_buff. I agree that switching to __u8 for offset marker seems to
reduce confusion.

Statically compiling skb_pkt_type_mask as I proposed above didn't work
out.

New patch, what do you think?

diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c
index 05a5661..0fd3f95 100644
--- a/arch/mips/net/bpf_jit.c
+++ b/arch/mips/net/bpf_jit.c
@@ -765,27 +765,6 @@ static u64 jit_get_skb_w(struct sk_buff *skb, unsigned offset)
 	return (u64)err << 32 | ntohl(ret);
 }
 
-#ifdef __BIG_ENDIAN_BITFIELD
-#define PKT_TYPE_MAX	(7 << 5)
-#else
-#define PKT_TYPE_MAX	7
-#endif
-static int pkt_type_offset(void)
-{
-	struct sk_buff skb_probe = {
-		.pkt_type = ~0,
-	};
-	u8 *ct = (u8 *)&skb_probe;
-	unsigned int off;
-
-	for (off = 0; off < sizeof(struct sk_buff); off++) {
-		if (ct[off] == PKT_TYPE_MAX)
-			return off;
-	}
-	pr_err_once("Please fix pkt_type_offset(), as pkt_type couldn't be found\n");
-	return -1;
-}
-
 static int build_body(struct jit_ctx *ctx)
 {
 	void *load_func[] = {jit_get_skb_b, jit_get_skb_h, jit_get_skb_w};
@@ -1332,11 +1311,7 @@ jmp_cmp:
 		case BPF_ANC | SKF_AD_PKTTYPE:
 			ctx->flags |= SEEN_SKB;
 
-			off = pkt_type_offset();
-
-			if (off < 0)
-				return -1;
-			emit_load_byte(r_tmp, r_skb, off, ctx);
+			emit_load_byte(r_tmp, r_skb, skb_pkt_type_offset(), ctx);
 			/* Keep only the last 3 bits */
 			emit_andi(r_A, r_tmp, PKT_TYPE_MAX, ctx);
 #ifdef __BIG_ENDIAN_BITFIELD
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 61e45b7..8039bb8 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -223,37 +223,6 @@ static void bpf_jit_epilogue(struct bpf_jit *jit)
 	EMIT2(0x07fe);
 }
 
-/* Helper to find the offset of pkt_type in sk_buff
- * Make sure its still a 3bit field starting at the MSBs within a byte.
- */
-#define PKT_TYPE_MAX 0xe0
-static int pkt_type_offset;
-
-static int __init bpf_pkt_type_offset_init(void)
-{
-	struct sk_buff skb_probe = {
-		.pkt_type = ~0,
-	};
-	char *ct = (char *)&skb_probe;
-	int off;
-
-	pkt_type_offset = -1;
-	for (off = 0; off < sizeof(struct sk_buff); off++) {
-		if (!ct[off])
-			continue;
-		if (ct[off] == PKT_TYPE_MAX)
-			pkt_type_offset = off;
-		else {
-			/* Found non matching bit pattern, fix needed. */
-			WARN_ON_ONCE(1);
-			pkt_type_offset = -1;
-			return -1;
-		}
-	}
-	return 0;
-}
-device_initcall(bpf_pkt_type_offset_init);
-
 /*
  * make sure we dont leak kernel information to user
  */
@@ -753,12 +722,10 @@ call_fn:	/* lg %r1,<d(function)>(%r13) */
 		}
 		break;
 	case BPF_ANC | SKF_AD_PKTTYPE:
-		if (pkt_type_offset < 0)
-			goto out;
 		/* lhi %r5,0 */
 		EMIT4(0xa7580000);
 		/* ic %r5,<d(pkt_type_offset)>(%r2) */
-		EMIT4_DISP(0x43502000, pkt_type_offset);
+		EMIT4_DISP(0x43502000, skb_pkt_type_offset());
 		/* srl %r5,5 */
 		EMIT4_DISP(0x88500000, 5);
 		break;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 02529fc..944e931 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -548,11 +548,25 @@ struct sk_buff {
 				ip_summed:2,
 				nohdr:1,
 				nfctinfo:3;
+
+	/* pkt_type bitfield is relatively addressed from bpf engines:
+	 * if you reorder skb contents, ensure to update PKT_TYPE_MAX
+	 * and keep __pkt_type_offset marker right in front of bitfield
+	 * which does contain pkt_type member.
+	 */
+
+#ifdef __BIG_ENDIAN_BITFIELD
+#define PKT_TYPE_MAX   (7 << 5)
+#else
+#define PKT_TYPE_MAX   7
+#endif
+	__u8			__pkt_type_offset[0];
 	__u8			pkt_type:3,
 				fclone:2,
 				ipvs_property:1,
 				peeked:1,
 				nf_trace:1;
+
 	kmemcheck_bitfield_end(flags1);
 	__be16			protocol;
 
@@ -647,6 +661,11 @@ struct sk_buff {
 #define SKB_ALLOC_FCLONE	0x01
 #define SKB_ALLOC_RX		0x02
 
+static inline size_t skb_pkt_type_offset(void)
+{
+	return offsetof(struct sk_buff, __pkt_type_offset);
+}
+
 /* Returns true if the skb was allocated from PFMEMALLOC reserves */
 static inline bool skb_pfmemalloc(const struct sk_buff *skb)
 {
diff --git a/net/core/filter.c b/net/core/filter.c
index d814b8a..ca64d7a 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -87,30 +87,6 @@ int sk_filter(struct sock *sk, struct sk_buff *skb)
 }
 EXPORT_SYMBOL(sk_filter);
 
-/* Helper to find the offset of pkt_type in sk_buff structure. We want
- * to make sure its still a 3bit field starting at a byte boundary;
- * taken from arch/x86/net/bpf_jit_comp.c.
- */
-#ifdef __BIG_ENDIAN_BITFIELD
-#define PKT_TYPE_MAX	(7 << 5)
-#else
-#define PKT_TYPE_MAX	7
-#endif
-static unsigned int pkt_type_offset(void)
-{
-	struct sk_buff skb_probe = { .pkt_type = ~0, };
-	u8 *ct = (u8 *) &skb_probe;
-	unsigned int off;
-
-	for (off = 0; off < sizeof(struct sk_buff); off++) {
-		if (ct[off] == PKT_TYPE_MAX)
-			return off;
-	}
-
-	pr_err_once("Please fix %s, as pkt_type couldn't be found!\n", __func__);
-	return -1;
-}
-
 static u64 __skb_get_pay_offset(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
 {
 	return __skb_get_poff((struct sk_buff *)(unsigned long) ctx);
@@ -191,7 +167,7 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 
 	case SKF_AD_OFF + SKF_AD_PKTTYPE:
 		*insn = BPF_LDX_MEM(BPF_B, BPF_REG_A, BPF_REG_CTX,
-				    pkt_type_offset());
+				    skb_pkt_type_offset());
 		if (insn->off < 0)
 			return false;
 		insn++;

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* RE: [PATCH v2 net-next] net: filter: export pkt_type_offset() helper
  2014-09-04 14:11                     ` Eric Dumazet
  2014-09-04 14:22                       ` Hannes Frederic Sowa
@ 2014-09-04 14:23                       ` David Laight
  2014-09-04 14:32                         ` Eric Dumazet
  1 sibling, 1 reply; 21+ messages in thread
From: David Laight @ 2014-09-04 14:23 UTC (permalink / raw)
  To: 'Eric Dumazet', Hannes Frederic Sowa
  Cc: Denis Kirjanov, Alexei Starovoitov, Daniel Borkmann, Eric Dumazet,
	Denis Kirjanov, netdev@vger.kernel.org, Markos Chandras,
	Martin Schwidefsky

From: Eric Dumazet
> On Thu, 2014-09-04 at 15:40 +0200, Hannes Frederic Sowa wrote:
> 
> > The type does not matter at all. Actually I wanted to use an empty
> > struct but was afraid it might not work on older compilers and didn't
> > want to check that with each version.
> 
> 
> It matters. Really.
> 
> $ cat try.c
> #include <stdio.h>
> 
> struct S_int {
> 	char a;
> 	int offset[0];
> 	char b:1;
> };
> 
> struct S_char {
> 	char a;
> 	char offset[0];
> 	char b:1;
> };

Those are also both illegal C, zero sized arrays aren't allowed.

	David


^ permalink raw reply	[flat|nested] 21+ messages in thread

* RE: [PATCH v2 net-next] net: filter: export pkt_type_offset() helper
  2014-09-04 14:23                       ` David Laight
@ 2014-09-04 14:32                         ` Eric Dumazet
  2014-09-04 14:33                           ` Hannes Frederic Sowa
  0 siblings, 1 reply; 21+ messages in thread
From: Eric Dumazet @ 2014-09-04 14:32 UTC (permalink / raw)
  To: David Laight
  Cc: Hannes Frederic Sowa, Denis Kirjanov, Alexei Starovoitov,
	Daniel Borkmann, Eric Dumazet, Denis Kirjanov,
	netdev@vger.kernel.org, Markos Chandras, Martin Schwidefsky

On Thu, 2014-09-04 at 14:23 +0000, David Laight wrote:
> From: Eric Dumazet
> > On Thu, 2014-09-04 at 15:40 +0200, Hannes Frederic Sowa wrote:
> > 
> > > The type does not matter at all. Actually I wanted to use an empty
> > > struct but was afraid it might not work on older compilers and didn't
> > > want to check that with each version.
> > 
> > 
> > It matters. Really.
> > 
> > $ cat try.c
> > #include <stdio.h>
> > 
> > struct S_int {
> > 	char a;
> > 	int offset[0];
> > 	char b:1;
> > };
> > 
> > struct S_char {
> > 	char a;
> > 	char offset[0];
> > 	char b:1;
> > };
> 
> Those are also both illegal C, zero sized arrays aren't allowed.
> 
> 	David


This does not matter, we already use such constructs in the kernel.

Take a look at kmemcheck_bitfield_begin() for example.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 net-next] net: filter: export pkt_type_offset() helper
  2014-09-04 14:32                         ` Eric Dumazet
@ 2014-09-04 14:33                           ` Hannes Frederic Sowa
  2014-09-04 14:35                             ` Eric Dumazet
  0 siblings, 1 reply; 21+ messages in thread
From: Hannes Frederic Sowa @ 2014-09-04 14:33 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David Laight, Denis Kirjanov, Alexei Starovoitov, Daniel Borkmann,
	Eric Dumazet, Denis Kirjanov, netdev@vger.kernel.org,
	Markos Chandras, Martin Schwidefsky

On Do, 2014-09-04 at 07:32 -0700, Eric Dumazet wrote:
> On Thu, 2014-09-04 at 14:23 +0000, David Laight wrote:
> > From: Eric Dumazet
> > > On Thu, 2014-09-04 at 15:40 +0200, Hannes Frederic Sowa wrote:
> > > 
> > > > The type does not matter at all. Actually I wanted to use an empty
> > > > struct but was afraid it might not work on older compilers and didn't
> > > > want to check that with each version.
> > > 
> > > 
> > > It matters. Really.
> > > 
> > > $ cat try.c
> > > #include <stdio.h>
> > > 
> > > struct S_int {
> > > 	char a;
> > > 	int offset[0];
> > > 	char b:1;
> > > };
> > > 
> > > struct S_char {
> > > 	char a;
> > > 	char offset[0];
> > > 	char b:1;
> > > };
> > 
> > Those are also both illegal C, zero sized arrays aren't allowed.
> > 
> > 	David
> 
> 
> This does not matter, we already use such constructs in the kernel.
> 
> Take a look at kmemcheck_bitfield_begin() for example.

Which btw. also uses int, which might change alignment of structures.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 net-next] net: filter: export pkt_type_offset() helper
  2014-09-04 14:33                           ` Hannes Frederic Sowa
@ 2014-09-04 14:35                             ` Eric Dumazet
  2014-09-04 14:41                               ` Hannes Frederic Sowa
  0 siblings, 1 reply; 21+ messages in thread
From: Eric Dumazet @ 2014-09-04 14:35 UTC (permalink / raw)
  To: Hannes Frederic Sowa
  Cc: David Laight, Denis Kirjanov, Alexei Starovoitov, Daniel Borkmann,
	Eric Dumazet, Denis Kirjanov, netdev@vger.kernel.org,
	Markos Chandras, Martin Schwidefsky

On Thu, 2014-09-04 at 16:33 +0200, Hannes Frederic Sowa wrote:

> Which btw. also uses int, which might change alignment of structures.

You missed the point .

kmemcheck wants to make sure the whole word is set, or else you could
get false positives.

kmemcheck needs are quite different.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 net-next] net: filter: export pkt_type_offset() helper
  2014-09-04 14:35                             ` Eric Dumazet
@ 2014-09-04 14:41                               ` Hannes Frederic Sowa
  2014-09-04 20:51                                 ` Alexei Starovoitov
  0 siblings, 1 reply; 21+ messages in thread
From: Hannes Frederic Sowa @ 2014-09-04 14:41 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David Laight, Denis Kirjanov, Alexei Starovoitov, Daniel Borkmann,
	Eric Dumazet, Denis Kirjanov, netdev@vger.kernel.org,
	Markos Chandras, Martin Schwidefsky

On Do, 2014-09-04 at 07:35 -0700, Eric Dumazet wrote:
> On Thu, 2014-09-04 at 16:33 +0200, Hannes Frederic Sowa wrote:
> 
> > Which btw. also uses int, which might change alignment of structures.
> 
> You missed the point .
> 
> kmemcheck wants to make sure the whole word is set, or else you could
> get false positives.
> 
> kmemcheck needs are quite different.

Now that you said it, I understand. :)

You were right with the int vs. u8 thing all along. gcc aligns the
datatype on the next struct field and not on the whole field, as I
expected. So excuse my error above.

I think the latest proposals looks good?

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 net-next] net: filter: export pkt_type_offset() helper
  2014-09-04 14:41                               ` Hannes Frederic Sowa
@ 2014-09-04 20:51                                 ` Alexei Starovoitov
  2014-09-04 22:45                                   ` Hannes Frederic Sowa
  0 siblings, 1 reply; 21+ messages in thread
From: Alexei Starovoitov @ 2014-09-04 20:51 UTC (permalink / raw)
  To: Hannes Frederic Sowa
  Cc: Eric Dumazet, David Laight, Denis Kirjanov, Daniel Borkmann,
	Eric Dumazet, Denis Kirjanov, netdev@vger.kernel.org,
	Markos Chandras, Martin Schwidefsky

On Thu, Sep 4, 2014 at 7:41 AM, Hannes Frederic Sowa
<hannes@stressinduktion.org> wrote:
> On Do, 2014-09-04 at 07:35 -0700, Eric Dumazet wrote:
>> On Thu, 2014-09-04 at 16:33 +0200, Hannes Frederic Sowa wrote:
>>
>> > Which btw. also uses int, which might change alignment of structures.
>>
>> You missed the point .
>>
>> kmemcheck wants to make sure the whole word is set, or else you could
>> get false positives.
>>
>> kmemcheck needs are quite different.
>
> Now that you said it, I understand. :)
>
> You were right with the int vs. u8 thing all along. gcc aligns the
> datatype on the next struct field and not on the whole field, as I
> expected. So excuse my error above.
>
> I think the latest proposals looks good?

to me: yes
and I think your latest half-patch with:
+       __u8                    __pkt_type_offset[0];
also looks good.

Are you going to take it over from Denis here?

while at it would you fix sparc jit as well that has comment:
#if 0
                                /* GCC won't let us take the address of
                                 * a bit field even though we very much
                                 * know what we are doing here.
                                 */
                        case BPF_ANC | SKF_AD_PKTTYPE:
                                __emit_skb_load8(pkt_type, r_A);
                                emit_alu_K(SRL, 5);
                                break;
#endif
should be able to replace 'pkt_type' above with __pkt_type_offset...

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 net-next] net: filter: export pkt_type_offset() helper
  2014-09-04 20:51                                 ` Alexei Starovoitov
@ 2014-09-04 22:45                                   ` Hannes Frederic Sowa
  2014-09-12  4:01                                     ` Alexei Starovoitov
  0 siblings, 1 reply; 21+ messages in thread
From: Hannes Frederic Sowa @ 2014-09-04 22:45 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Eric Dumazet, David Laight, Denis Kirjanov, Daniel Borkmann,
	Eric Dumazet, Denis Kirjanov, netdev, Markos Chandras,
	Martin Schwidefsky

Hi,

On Thu, Sep 4, 2014, at 22:51, Alexei Starovoitov wrote:
> On Thu, Sep 4, 2014 at 7:41 AM, Hannes Frederic Sowa
> <hannes@stressinduktion.org> wrote:
> > On Do, 2014-09-04 at 07:35 -0700, Eric Dumazet wrote:
> >> On Thu, 2014-09-04 at 16:33 +0200, Hannes Frederic Sowa wrote:
> >>
> >> > Which btw. also uses int, which might change alignment of structures.
> >>
> >> You missed the point .
> >>
> >> kmemcheck wants to make sure the whole word is set, or else you could
> >> get false positives.
> >>
> >> kmemcheck needs are quite different.
> >
> > Now that you said it, I understand. :)
> >
> > You were right with the int vs. u8 thing all along. gcc aligns the
> > datatype on the next struct field and not on the whole field, as I
> > expected. So excuse my error above.
> >
> > I think the latest proposals looks good?
> 
> to me: yes
> and I think your latest half-patch with:
> +       __u8                    __pkt_type_offset[0];
> also looks good.
> 
> Are you going to take it over from Denis here?

I don't know. Denis, do you want to incorperate my changes or should I
take over here?

> while at it would you fix sparc jit as well that has comment:
> #if 0
>                                 /* GCC won't let us take the address of
>                                  * a bit field even though we very much
>                                  * know what we are doing here.
>                                  */
>                         case BPF_ANC | SKF_AD_PKTTYPE:
>                                 __emit_skb_load8(pkt_type, r_A);
>                                 emit_alu_K(SRL, 5);
>                                 break;
> #endif
> should be able to replace 'pkt_type' above with __pkt_type_offset...

I'll have a look then but cannot test sparc easily.

Bye,
Hannes

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v2 net-next] net: filter: export pkt_type_offset() helper
  2014-09-04 22:45                                   ` Hannes Frederic Sowa
@ 2014-09-12  4:01                                     ` Alexei Starovoitov
       [not found]                                       ` <CAOJe8K2eQ9ejhBTk7MzJJK5EB4D62U4uKDiFFQFh+T16E7=S3A@mail.gmail.com>
  0 siblings, 1 reply; 21+ messages in thread
From: Alexei Starovoitov @ 2014-09-12  4:01 UTC (permalink / raw)
  To: Hannes Frederic Sowa
  Cc: Eric Dumazet, David Laight, Denis Kirjanov, Daniel Borkmann,
	Eric Dumazet, Denis Kirjanov, netdev@vger.kernel.org,
	Markos Chandras, Martin Schwidefsky

On Thu, Sep 4, 2014 at 3:45 PM, Hannes Frederic Sowa
<hannes@stressinduktion.org> wrote:
>> >
>> > I think the latest proposals looks good?
>>
>> to me: yes
>> and I think your latest half-patch with:
>> +       __u8                    __pkt_type_offset[0];
>> also looks good.
>>
>> Are you going to take it over from Denis here?
>
> I don't know. Denis, do you want to incorperate my changes or should I
> take over here?

Hannes, Denis,
anyone of you going to continue on that?
imo it's a good cleanup.

^ permalink raw reply	[flat|nested] 21+ messages in thread

[parent not found: <CAOJe8K2eQ9ejhBTk7MzJJK5EB4D62U4uKDiFFQFh+T16E7=S3A@mail.gmail.com>]

* Re: [PATCH v2 net-next] net: filter: export pkt_type_offset() helper
       [not found]                                       ` <CAOJe8K2eQ9ejhBTk7MzJJK5EB4D62U4uKDiFFQFh+T16E7=S3A@mail.gmail.com>
@ 2014-09-12  8:25                                         ` Hannes Frederic Sowa
  0 siblings, 0 replies; 21+ messages in thread
From: Hannes Frederic Sowa @ 2014-09-12  8:25 UTC (permalink / raw)
  To: Denis Kirjanov, Alexei Starovoitov
  Cc: Eric Dumazet, David Laight, Denis Kirjanov, Daniel Borkmann,
	Eric Dumazet, netdev, Markos Chandras, Martin Schwidefsky

Hi,

On Fri, Sep 12, 2014, at 09:09, Denis Kirjanov wrote:
> Hi guys, sorry, I was on vacation and didn't notice you. I'll continue to
> work on that.

I have the patch still in my pipeline but just wanted to hear from you
again (or you time out), if you would still like to work on it.
Otherwise will send it out later today.

Bye,
Hannes

^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2014-09-12  8:25 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-09-03 21:08 [PATCH v2 net-next] net: filter: export pkt_type_offset() helper Denis Kirjanov
2014-09-03 22:01 ` Daniel Borkmann
2014-09-03 23:14   ` Alexei Starovoitov
2014-09-04  1:08     ` Eric Dumazet
2014-09-04  1:25       ` Hannes Frederic Sowa
2014-09-04  2:05         ` Eric Dumazet
2014-09-04  2:43           ` Alexei Starovoitov
     [not found]             ` <CAHj3AVn8q-JS0Eq0QrfpBo-WfJ+-wXumhNoeDAdkPZC7LSeB5g@mail.gmail.com>
2014-09-04 11:50               ` Hannes Frederic Sowa
2014-09-04 13:09                 ` Eric Dumazet
2014-09-04 13:40                   ` Hannes Frederic Sowa
2014-09-04 14:11                     ` Eric Dumazet
2014-09-04 14:22                       ` Hannes Frederic Sowa
2014-09-04 14:23                       ` David Laight
2014-09-04 14:32                         ` Eric Dumazet
2014-09-04 14:33                           ` Hannes Frederic Sowa
2014-09-04 14:35                             ` Eric Dumazet
2014-09-04 14:41                               ` Hannes Frederic Sowa
2014-09-04 20:51                                 ` Alexei Starovoitov
2014-09-04 22:45                                   ` Hannes Frederic Sowa
2014-09-12  4:01                                     ` Alexei Starovoitov
     [not found]                                       ` <CAOJe8K2eQ9ejhBTk7MzJJK5EB4D62U4uKDiFFQFh+T16E7=S3A@mail.gmail.com>
2014-09-12  8:25                                         ` Hannes Frederic Sowa

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).