* [PATCH] net: reduce the lines of code
@ 2010-11-19 2:04 Changli Gao
2010-11-19 6:35 ` Hagen Paul Pfeifer
0 siblings, 1 reply; 27+ messages in thread
From: Changli Gao @ 2010-11-19 2:04 UTC (permalink / raw)
To: David S. Miller; +Cc: netdev, Changli Gao
Use macros to reduce the regularity lines.
Signed-off-by: Changli Gao <xiaosuo@gmail.com>
---
net/core/filter.c | 96 +++-----------------------------------------------
net/core/filter_def.h | 55 ++++++++++++++++++++++++++++
2 files changed, 61 insertions(+), 90 deletions(-)
diff --git a/net/core/filter.c b/net/core/filter.c
index 15a545d..2a8841d 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -39,51 +39,9 @@
#include <linux/filter.h>
enum {
- BPF_S_RET_K = 0,
- BPF_S_RET_A,
- BPF_S_ALU_ADD_K,
- BPF_S_ALU_ADD_X,
- BPF_S_ALU_SUB_K,
- BPF_S_ALU_SUB_X,
- BPF_S_ALU_MUL_K,
- BPF_S_ALU_MUL_X,
- BPF_S_ALU_DIV_X,
- BPF_S_ALU_AND_K,
- BPF_S_ALU_AND_X,
- BPF_S_ALU_OR_K,
- BPF_S_ALU_OR_X,
- BPF_S_ALU_LSH_K,
- BPF_S_ALU_LSH_X,
- BPF_S_ALU_RSH_K,
- BPF_S_ALU_RSH_X,
- BPF_S_ALU_NEG,
- BPF_S_LD_W_ABS,
- BPF_S_LD_H_ABS,
- BPF_S_LD_B_ABS,
- BPF_S_LD_W_LEN,
- BPF_S_LD_W_IND,
- BPF_S_LD_H_IND,
- BPF_S_LD_B_IND,
- BPF_S_LD_IMM,
- BPF_S_LDX_W_LEN,
- BPF_S_LDX_B_MSH,
- BPF_S_LDX_IMM,
- BPF_S_MISC_TAX,
- BPF_S_MISC_TXA,
- BPF_S_ALU_DIV_K,
- BPF_S_LD_MEM,
- BPF_S_LDX_MEM,
- BPF_S_ST,
- BPF_S_STX,
- BPF_S_JMP_JA,
- BPF_S_JMP_JEQ_K,
- BPF_S_JMP_JEQ_X,
- BPF_S_JMP_JGE_K,
- BPF_S_JMP_JGE_X,
- BPF_S_JMP_JGT_K,
- BPF_S_JMP_JGT_X,
- BPF_S_JMP_JSET_K,
- BPF_S_JMP_JSET_X,
+#define BPF_DEFINE(user, kernel) kernel
+#include "filter_def.h"
+#undef BPF_DEFINE
};
/* No hurry in this branch */
@@ -436,51 +394,9 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
* Invalid instructions are initialized to 0.
*/
static const u8 codes[] = {
- [BPF_ALU|BPF_ADD|BPF_K] = BPF_S_ALU_ADD_K + 1,
- [BPF_ALU|BPF_ADD|BPF_X] = BPF_S_ALU_ADD_X + 1,
- [BPF_ALU|BPF_SUB|BPF_K] = BPF_S_ALU_SUB_K + 1,
- [BPF_ALU|BPF_SUB|BPF_X] = BPF_S_ALU_SUB_X + 1,
- [BPF_ALU|BPF_MUL|BPF_K] = BPF_S_ALU_MUL_K + 1,
- [BPF_ALU|BPF_MUL|BPF_X] = BPF_S_ALU_MUL_X + 1,
- [BPF_ALU|BPF_DIV|BPF_X] = BPF_S_ALU_DIV_X + 1,
- [BPF_ALU|BPF_AND|BPF_K] = BPF_S_ALU_AND_K + 1,
- [BPF_ALU|BPF_AND|BPF_X] = BPF_S_ALU_AND_X + 1,
- [BPF_ALU|BPF_OR|BPF_K] = BPF_S_ALU_OR_K + 1,
- [BPF_ALU|BPF_OR|BPF_X] = BPF_S_ALU_OR_X + 1,
- [BPF_ALU|BPF_LSH|BPF_K] = BPF_S_ALU_LSH_K + 1,
- [BPF_ALU|BPF_LSH|BPF_X] = BPF_S_ALU_LSH_X + 1,
- [BPF_ALU|BPF_RSH|BPF_K] = BPF_S_ALU_RSH_K + 1,
- [BPF_ALU|BPF_RSH|BPF_X] = BPF_S_ALU_RSH_X + 1,
- [BPF_ALU|BPF_NEG] = BPF_S_ALU_NEG + 1,
- [BPF_LD|BPF_W|BPF_ABS] = BPF_S_LD_W_ABS + 1,
- [BPF_LD|BPF_H|BPF_ABS] = BPF_S_LD_H_ABS + 1,
- [BPF_LD|BPF_B|BPF_ABS] = BPF_S_LD_B_ABS + 1,
- [BPF_LD|BPF_W|BPF_LEN] = BPF_S_LD_W_LEN + 1,
- [BPF_LD|BPF_W|BPF_IND] = BPF_S_LD_W_IND + 1,
- [BPF_LD|BPF_H|BPF_IND] = BPF_S_LD_H_IND + 1,
- [BPF_LD|BPF_B|BPF_IND] = BPF_S_LD_B_IND + 1,
- [BPF_LD|BPF_IMM] = BPF_S_LD_IMM + 1,
- [BPF_LDX|BPF_W|BPF_LEN] = BPF_S_LDX_W_LEN + 1,
- [BPF_LDX|BPF_B|BPF_MSH] = BPF_S_LDX_B_MSH + 1,
- [BPF_LDX|BPF_IMM] = BPF_S_LDX_IMM + 1,
- [BPF_MISC|BPF_TAX] = BPF_S_MISC_TAX + 1,
- [BPF_MISC|BPF_TXA] = BPF_S_MISC_TXA + 1,
- [BPF_RET|BPF_K] = BPF_S_RET_K + 1,
- [BPF_RET|BPF_A] = BPF_S_RET_A + 1,
- [BPF_ALU|BPF_DIV|BPF_K] = BPF_S_ALU_DIV_K + 1,
- [BPF_LD|BPF_MEM] = BPF_S_LD_MEM + 1,
- [BPF_LDX|BPF_MEM] = BPF_S_LDX_MEM + 1,
- [BPF_ST] = BPF_S_ST + 1,
- [BPF_STX] = BPF_S_STX + 1,
- [BPF_JMP|BPF_JA] = BPF_S_JMP_JA + 1,
- [BPF_JMP|BPF_JEQ|BPF_K] = BPF_S_JMP_JEQ_K + 1,
- [BPF_JMP|BPF_JEQ|BPF_X] = BPF_S_JMP_JEQ_X + 1,
- [BPF_JMP|BPF_JGE|BPF_K] = BPF_S_JMP_JGE_K + 1,
- [BPF_JMP|BPF_JGE|BPF_X] = BPF_S_JMP_JGE_X + 1,
- [BPF_JMP|BPF_JGT|BPF_K] = BPF_S_JMP_JGT_K + 1,
- [BPF_JMP|BPF_JGT|BPF_X] = BPF_S_JMP_JGT_X + 1,
- [BPF_JMP|BPF_JSET|BPF_K] = BPF_S_JMP_JSET_K + 1,
- [BPF_JMP|BPF_JSET|BPF_X] = BPF_S_JMP_JSET_X + 1,
+#define BPF_DEFINE(user, kernel) [user] = kernel + 1
+#include "filter_def.h"
+#undef BPF_DEFINE
};
int pc;
diff --git a/net/core/filter_def.h b/net/core/filter_def.h
new file mode 100644
index 0000000..51bd662
--- /dev/null
+++ b/net/core/filter_def.h
@@ -0,0 +1,55 @@
+#define BPF_A1(arg1) BPF_DEFINE(BPF_##arg1, BPF_S_##arg1)
+#define BPF_A2(arg1, arg2) \
+ BPF_DEFINE(BPF_##arg1|BPF_##arg2, BPF_S_##arg1##_##arg2)
+#define BPF_A3(arg1, arg2, arg3) \
+ BPF_DEFINE(BPF_##arg1|BPF_##arg2|BPF_##arg3, \
+ BPF_S_##arg1##_##arg2##_##arg3)
+
+BPF_A2(RET, K),
+BPF_A2(RET, A),
+BPF_A3(ALU, ADD, K),
+BPF_A3(ALU, ADD, X),
+BPF_A3(ALU, SUB, K),
+BPF_A3(ALU, SUB, X),
+BPF_A3(ALU, MUL, K),
+BPF_A3(ALU, MUL, X),
+BPF_A3(ALU, DIV, X),
+BPF_A3(ALU, AND, K),
+BPF_A3(ALU, AND, X),
+BPF_A3(ALU, OR, K),
+BPF_A3(ALU, OR, X),
+BPF_A3(ALU, LSH, K),
+BPF_A3(ALU, LSH, X),
+BPF_A3(ALU, RSH, K),
+BPF_A3(ALU, RSH, X),
+BPF_A2(ALU, NEG),
+BPF_A3(LD, W, ABS),
+BPF_A3(LD, H, ABS),
+BPF_A3(LD, B, ABS),
+BPF_A3(LD, W, LEN),
+BPF_A3(LD, W, IND),
+BPF_A3(LD, H, IND),
+BPF_A3(LD, B, IND),
+BPF_A2(LD, IMM),
+BPF_A3(LDX, W, LEN),
+BPF_A3(LDX, B, MSH),
+BPF_A2(LDX, IMM),
+BPF_A2(MISC, TAX),
+BPF_A2(MISC, TXA),
+BPF_A3(ALU, DIV, K),
+BPF_A2(LD, MEM),
+BPF_A2(LDX, MEM),
+BPF_A1(ST),
+BPF_A1(STX),
+BPF_A2(JMP, JA),
+BPF_A3(JMP, JEQ, K),
+BPF_A3(JMP, JEQ, X),
+BPF_A3(JMP, JGE, K),
+BPF_A3(JMP, JGE, X),
+BPF_A3(JMP, JGT, K),
+BPF_A3(JMP, JGT, X),
+BPF_A3(JMP, JSET, K),
+BPF_A3(JMP, JSET, X),
+#undef BPF_A1
+#undef BPF_A2
+#undef BPF_A3
^ permalink raw reply related [flat|nested] 27+ messages in thread* Re: [PATCH] net: reduce the lines of code
2010-11-19 2:04 [PATCH] net: reduce the lines of code Changli Gao
@ 2010-11-19 6:35 ` Hagen Paul Pfeifer
2010-11-19 7:17 ` Changli Gao
0 siblings, 1 reply; 27+ messages in thread
From: Hagen Paul Pfeifer @ 2010-11-19 6:35 UTC (permalink / raw)
To: Changli Gao; +Cc: David S. Miller, netdev
On Fri, 19 Nov 2010 10:04:44 +0800, Changli Gao <xiaosuo@gmail.com> wrote:
> Use macros to reduce the regularity lines.
This is complete awkward and does not fix anything - on the contrary it
makes it harder to understand the code and no advantage is achieved.
Hagen
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] net: reduce the lines of code
2010-11-19 6:35 ` Hagen Paul Pfeifer
@ 2010-11-19 7:17 ` Changli Gao
2010-11-19 7:51 ` Eric Dumazet
0 siblings, 1 reply; 27+ messages in thread
From: Changli Gao @ 2010-11-19 7:17 UTC (permalink / raw)
To: Hagen Paul Pfeifer; +Cc: David S. Miller, netdev
On Fri, Nov 19, 2010 at 2:35 PM, Hagen Paul Pfeifer <hagen@jauu.net> wrote:
>
> On Fri, 19 Nov 2010 10:04:44 +0800, Changli Gao <xiaosuo@gmail.com> wrote:
>> Use macros to reduce the regularity lines.
>
> This is complete awkward and does not fix anything - on the contrary it
> makes it harder to understand the code and no advantage is achieved.
>
Although it doesn't fix anything, It can simplify the adding of new
BPF instructions, one place in filter_def.h instead of two in
filter.c. I think if some code can be generated automatically, we'd
better not write it manually, as the chance of error is less when the
code is generated automatically.
--
Regards,
Changli Gao(xiaosuo@gmail.com)
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] net: reduce the lines of code
2010-11-19 7:17 ` Changli Gao
@ 2010-11-19 7:51 ` Eric Dumazet
2010-11-19 7:56 ` [PATCH net-next-2.6] filter: cleanup codes[] init Eric Dumazet
0 siblings, 1 reply; 27+ messages in thread
From: Eric Dumazet @ 2010-11-19 7:51 UTC (permalink / raw)
To: Changli Gao; +Cc: Hagen Paul Pfeifer, David S. Miller, netdev
Le vendredi 19 novembre 2010 à 15:17 +0800, Changli Gao a écrit :
> On Fri, Nov 19, 2010 at 2:35 PM, Hagen Paul Pfeifer <hagen@jauu.net> wrote:
> >
> > On Fri, 19 Nov 2010 10:04:44 +0800, Changli Gao <xiaosuo@gmail.com> wrote:
> >> Use macros to reduce the regularity lines.
> >
> > This is complete awkward and does not fix anything - on the contrary it
> > makes it harder to understand the code and no advantage is achieved.
> >
>
> Although it doesn't fix anything, It can simplify the adding of new
> BPF instructions, one place in filter_def.h instead of two in
> filter.c. I think if some code can be generated automatically, we'd
> better not write it manually, as the chance of error is less when the
> code is generated automatically.
>
Idea is good, but the way you did it is not.
I have two patches in testing. I'll post them today.
One to remove all the "+ 1" we do in codes[] init
One at check time to replace the divide by a constant instruction
(DIV_K) by a reciprocal one (a multiply at exec time)
^ permalink raw reply [flat|nested] 27+ messages in thread
* [PATCH net-next-2.6] filter: cleanup codes[] init
2010-11-19 7:51 ` Eric Dumazet
@ 2010-11-19 7:56 ` Eric Dumazet
2010-11-19 8:04 ` [PATCH net-next-2.6] filter: use reciprocal divide Eric Dumazet
2010-11-19 8:38 ` [PATCH net-next-2.6] filter: cleanup codes[] init Changli Gao
0 siblings, 2 replies; 27+ messages in thread
From: Eric Dumazet @ 2010-11-19 7:56 UTC (permalink / raw)
To: David S. Miller; +Cc: Hagen Paul Pfeifer, netdev, Changli Gao
Starting the translated instruction to 1 instead of 0 allows us to
remove one descrement at check time and makes codes[] array init
cleaner.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
---
net/core/filter.c | 95 +++++++++++++++++++++-----------------------
1 file changed, 47 insertions(+), 48 deletions(-)
diff --git a/net/core/filter.c b/net/core/filter.c
index 15a545d..a1edb5d 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -39,7 +39,7 @@
#include <linux/filter.h>
enum {
- BPF_S_RET_K = 0,
+ BPF_S_RET_K = 1,
BPF_S_RET_A,
BPF_S_ALU_ADD_K,
BPF_S_ALU_ADD_X,
@@ -436,51 +436,51 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
* Invalid instructions are initialized to 0.
*/
static const u8 codes[] = {
- [BPF_ALU|BPF_ADD|BPF_K] = BPF_S_ALU_ADD_K + 1,
- [BPF_ALU|BPF_ADD|BPF_X] = BPF_S_ALU_ADD_X + 1,
- [BPF_ALU|BPF_SUB|BPF_K] = BPF_S_ALU_SUB_K + 1,
- [BPF_ALU|BPF_SUB|BPF_X] = BPF_S_ALU_SUB_X + 1,
- [BPF_ALU|BPF_MUL|BPF_K] = BPF_S_ALU_MUL_K + 1,
- [BPF_ALU|BPF_MUL|BPF_X] = BPF_S_ALU_MUL_X + 1,
- [BPF_ALU|BPF_DIV|BPF_X] = BPF_S_ALU_DIV_X + 1,
- [BPF_ALU|BPF_AND|BPF_K] = BPF_S_ALU_AND_K + 1,
- [BPF_ALU|BPF_AND|BPF_X] = BPF_S_ALU_AND_X + 1,
- [BPF_ALU|BPF_OR|BPF_K] = BPF_S_ALU_OR_K + 1,
- [BPF_ALU|BPF_OR|BPF_X] = BPF_S_ALU_OR_X + 1,
- [BPF_ALU|BPF_LSH|BPF_K] = BPF_S_ALU_LSH_K + 1,
- [BPF_ALU|BPF_LSH|BPF_X] = BPF_S_ALU_LSH_X + 1,
- [BPF_ALU|BPF_RSH|BPF_K] = BPF_S_ALU_RSH_K + 1,
- [BPF_ALU|BPF_RSH|BPF_X] = BPF_S_ALU_RSH_X + 1,
- [BPF_ALU|BPF_NEG] = BPF_S_ALU_NEG + 1,
- [BPF_LD|BPF_W|BPF_ABS] = BPF_S_LD_W_ABS + 1,
- [BPF_LD|BPF_H|BPF_ABS] = BPF_S_LD_H_ABS + 1,
- [BPF_LD|BPF_B|BPF_ABS] = BPF_S_LD_B_ABS + 1,
- [BPF_LD|BPF_W|BPF_LEN] = BPF_S_LD_W_LEN + 1,
- [BPF_LD|BPF_W|BPF_IND] = BPF_S_LD_W_IND + 1,
- [BPF_LD|BPF_H|BPF_IND] = BPF_S_LD_H_IND + 1,
- [BPF_LD|BPF_B|BPF_IND] = BPF_S_LD_B_IND + 1,
- [BPF_LD|BPF_IMM] = BPF_S_LD_IMM + 1,
- [BPF_LDX|BPF_W|BPF_LEN] = BPF_S_LDX_W_LEN + 1,
- [BPF_LDX|BPF_B|BPF_MSH] = BPF_S_LDX_B_MSH + 1,
- [BPF_LDX|BPF_IMM] = BPF_S_LDX_IMM + 1,
- [BPF_MISC|BPF_TAX] = BPF_S_MISC_TAX + 1,
- [BPF_MISC|BPF_TXA] = BPF_S_MISC_TXA + 1,
- [BPF_RET|BPF_K] = BPF_S_RET_K + 1,
- [BPF_RET|BPF_A] = BPF_S_RET_A + 1,
- [BPF_ALU|BPF_DIV|BPF_K] = BPF_S_ALU_DIV_K + 1,
- [BPF_LD|BPF_MEM] = BPF_S_LD_MEM + 1,
- [BPF_LDX|BPF_MEM] = BPF_S_LDX_MEM + 1,
- [BPF_ST] = BPF_S_ST + 1,
- [BPF_STX] = BPF_S_STX + 1,
- [BPF_JMP|BPF_JA] = BPF_S_JMP_JA + 1,
- [BPF_JMP|BPF_JEQ|BPF_K] = BPF_S_JMP_JEQ_K + 1,
- [BPF_JMP|BPF_JEQ|BPF_X] = BPF_S_JMP_JEQ_X + 1,
- [BPF_JMP|BPF_JGE|BPF_K] = BPF_S_JMP_JGE_K + 1,
- [BPF_JMP|BPF_JGE|BPF_X] = BPF_S_JMP_JGE_X + 1,
- [BPF_JMP|BPF_JGT|BPF_K] = BPF_S_JMP_JGT_K + 1,
- [BPF_JMP|BPF_JGT|BPF_X] = BPF_S_JMP_JGT_X + 1,
- [BPF_JMP|BPF_JSET|BPF_K] = BPF_S_JMP_JSET_K + 1,
- [BPF_JMP|BPF_JSET|BPF_X] = BPF_S_JMP_JSET_X + 1,
+ [BPF_ALU|BPF_ADD|BPF_K] = BPF_S_ALU_ADD_K,
+ [BPF_ALU|BPF_ADD|BPF_X] = BPF_S_ALU_ADD_X,
+ [BPF_ALU|BPF_SUB|BPF_K] = BPF_S_ALU_SUB_K,
+ [BPF_ALU|BPF_SUB|BPF_X] = BPF_S_ALU_SUB_X,
+ [BPF_ALU|BPF_MUL|BPF_K] = BPF_S_ALU_MUL_K,
+ [BPF_ALU|BPF_MUL|BPF_X] = BPF_S_ALU_MUL_X,
+ [BPF_ALU|BPF_DIV|BPF_X] = BPF_S_ALU_DIV_X,
+ [BPF_ALU|BPF_AND|BPF_K] = BPF_S_ALU_AND_K,
+ [BPF_ALU|BPF_AND|BPF_X] = BPF_S_ALU_AND_X,
+ [BPF_ALU|BPF_OR|BPF_K] = BPF_S_ALU_OR_K,
+ [BPF_ALU|BPF_OR|BPF_X] = BPF_S_ALU_OR_X,
+ [BPF_ALU|BPF_LSH|BPF_K] = BPF_S_ALU_LSH_K,
+ [BPF_ALU|BPF_LSH|BPF_X] = BPF_S_ALU_LSH_X,
+ [BPF_ALU|BPF_RSH|BPF_K] = BPF_S_ALU_RSH_K,
+ [BPF_ALU|BPF_RSH|BPF_X] = BPF_S_ALU_RSH_X,
+ [BPF_ALU|BPF_NEG] = BPF_S_ALU_NEG,
+ [BPF_LD|BPF_W|BPF_ABS] = BPF_S_LD_W_ABS,
+ [BPF_LD|BPF_H|BPF_ABS] = BPF_S_LD_H_ABS,
+ [BPF_LD|BPF_B|BPF_ABS] = BPF_S_LD_B_ABS,
+ [BPF_LD|BPF_W|BPF_LEN] = BPF_S_LD_W_LEN,
+ [BPF_LD|BPF_W|BPF_IND] = BPF_S_LD_W_IND,
+ [BPF_LD|BPF_H|BPF_IND] = BPF_S_LD_H_IND,
+ [BPF_LD|BPF_B|BPF_IND] = BPF_S_LD_B_IND,
+ [BPF_LD|BPF_IMM] = BPF_S_LD_IMM,
+ [BPF_LDX|BPF_W|BPF_LEN] = BPF_S_LDX_W_LEN,
+ [BPF_LDX|BPF_B|BPF_MSH] = BPF_S_LDX_B_MSH,
+ [BPF_LDX|BPF_IMM] = BPF_S_LDX_IMM,
+ [BPF_MISC|BPF_TAX] = BPF_S_MISC_TAX,
+ [BPF_MISC|BPF_TXA] = BPF_S_MISC_TXA,
+ [BPF_RET|BPF_K] = BPF_S_RET_K,
+ [BPF_RET|BPF_A] = BPF_S_RET_A,
+ [BPF_ALU|BPF_DIV|BPF_K] = BPF_S_ALU_DIV_K,
+ [BPF_LD|BPF_MEM] = BPF_S_LD_MEM,
+ [BPF_LDX|BPF_MEM] = BPF_S_LDX_MEM,
+ [BPF_ST] = BPF_S_ST,
+ [BPF_STX] = BPF_S_STX,
+ [BPF_JMP|BPF_JA] = BPF_S_JMP_JA,
+ [BPF_JMP|BPF_JEQ|BPF_K] = BPF_S_JMP_JEQ_K,
+ [BPF_JMP|BPF_JEQ|BPF_X] = BPF_S_JMP_JEQ_X,
+ [BPF_JMP|BPF_JGE|BPF_K] = BPF_S_JMP_JGE_K,
+ [BPF_JMP|BPF_JGE|BPF_X] = BPF_S_JMP_JGE_X,
+ [BPF_JMP|BPF_JGT|BPF_K] = BPF_S_JMP_JGT_K,
+ [BPF_JMP|BPF_JGT|BPF_X] = BPF_S_JMP_JGT_X,
+ [BPF_JMP|BPF_JSET|BPF_K] = BPF_S_JMP_JSET_K,
+ [BPF_JMP|BPF_JSET|BPF_X] = BPF_S_JMP_JSET_X,
};
int pc;
@@ -495,8 +495,7 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
if (code >= ARRAY_SIZE(codes))
return -EINVAL;
code = codes[code];
- /* Undo the '+ 1' in codes[] after validation. */
- if (!code--)
+ if (!code)
return -EINVAL;
/* Some instructions need special checks */
switch (code) {
^ permalink raw reply related [flat|nested] 27+ messages in thread* [PATCH net-next-2.6] filter: use reciprocal divide
2010-11-19 7:56 ` [PATCH net-next-2.6] filter: cleanup codes[] init Eric Dumazet
@ 2010-11-19 8:04 ` Eric Dumazet
2010-11-19 8:18 ` Changli Gao
2010-11-19 8:38 ` [PATCH net-next-2.6] filter: cleanup codes[] init Changli Gao
1 sibling, 1 reply; 27+ messages in thread
From: Eric Dumazet @ 2010-11-19 8:04 UTC (permalink / raw)
To: David S. Miller; +Cc: Hagen Paul Pfeifer, netdev, Changli Gao
At compile time, we can replace the DIV_K instruction (divide by a
constant value) by a reciprocal divide.
At exec time, the expensive divide is replaced by a multiply, a less
expensive operation on most processors.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
---
net/core/filter.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/net/core/filter.c b/net/core/filter.c
index a1edb5d..13853c7 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -37,6 +37,7 @@
#include <asm/uaccess.h>
#include <asm/unaligned.h>
#include <linux/filter.h>
+#include <linux/reciprocal_div.h>
enum {
BPF_S_RET_K = 1,
@@ -202,7 +203,7 @@ unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int
A /= X;
continue;
case BPF_S_ALU_DIV_K:
- A /= f_k;
+ A = reciprocal_divide(A, f_k);
continue;
case BPF_S_ALU_AND_X:
A &= X;
@@ -503,6 +504,7 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
/* check for division by zero */
if (ftest->k == 0)
return -EINVAL;
+ ftest->k = reciprocal_value(ftest->k);
break;
case BPF_S_LD_MEM:
case BPF_S_LDX_MEM:
^ permalink raw reply related [flat|nested] 27+ messages in thread* Re: [PATCH net-next-2.6] filter: use reciprocal divide
2010-11-19 8:04 ` [PATCH net-next-2.6] filter: use reciprocal divide Eric Dumazet
@ 2010-11-19 8:18 ` Changli Gao
2010-11-19 18:07 ` David Miller
0 siblings, 1 reply; 27+ messages in thread
From: Changli Gao @ 2010-11-19 8:18 UTC (permalink / raw)
To: Eric Dumazet; +Cc: David S. Miller, Hagen Paul Pfeifer, netdev
On Fri, Nov 19, 2010 at 4:04 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> At compile time, we can replace the DIV_K instruction (divide by a
> constant value) by a reciprocal divide.
>
> At exec time, the expensive divide is replaced by a multiply, a less
> expensive operation on most processors.
>
> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Changli Gao <xiaosuo@gmail.com>
--
Regards,
Changli Gao(xiaosuo@gmail.com)
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH net-next-2.6] filter: use reciprocal divide
2010-11-19 8:18 ` Changli Gao
@ 2010-11-19 18:07 ` David Miller
0 siblings, 0 replies; 27+ messages in thread
From: David Miller @ 2010-11-19 18:07 UTC (permalink / raw)
To: xiaosuo; +Cc: eric.dumazet, hagen, netdev
From: Changli Gao <xiaosuo@gmail.com>
Date: Fri, 19 Nov 2010 16:18:05 +0800
> On Fri, Nov 19, 2010 at 4:04 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
>> At compile time, we can replace the DIV_K instruction (divide by a
>> constant value) by a reciprocal divide.
>>
>> At exec time, the expensive divide is replaced by a multiply, a less
>> expensive operation on most processors.
>>
>> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
>
> Acked-by: Changli Gao <xiaosuo@gmail.com>
Applied.
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH net-next-2.6] filter: cleanup codes[] init
2010-11-19 7:56 ` [PATCH net-next-2.6] filter: cleanup codes[] init Eric Dumazet
2010-11-19 8:04 ` [PATCH net-next-2.6] filter: use reciprocal divide Eric Dumazet
@ 2010-11-19 8:38 ` Changli Gao
2010-11-19 9:54 ` Eric Dumazet
1 sibling, 1 reply; 27+ messages in thread
From: Changli Gao @ 2010-11-19 8:38 UTC (permalink / raw)
To: Eric Dumazet; +Cc: David S. Miller, Hagen Paul Pfeifer, netdev
On Fri, Nov 19, 2010 at 3:56 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> Starting the translated instruction to 1 instead of 0 allows us to
> remove one descrement at check time and makes codes[] array init
> cleaner.
>
> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
> ---
> net/core/filter.c | 95 +++++++++++++++++++++-----------------------
> 1 file changed, 47 insertions(+), 48 deletions(-)
>
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 15a545d..a1edb5d 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -39,7 +39,7 @@
> #include <linux/filter.h>
>
> enum {
> - BPF_S_RET_K = 0,
> + BPF_S_RET_K = 1,
> BPF_S_RET_A,
> BPF_S_ALU_ADD_K,
> BPF_S_ALU_ADD_X,
> @@ -436,51 +436,51 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
> * Invalid instructions are initialized to 0.
> */
> static const u8 codes[] = {
> - [BPF_ALU|BPF_ADD|BPF_K] = BPF_S_ALU_ADD_K + 1,
> - [BPF_ALU|BPF_ADD|BPF_X] = BPF_S_ALU_ADD_X + 1,
> - [BPF_ALU|BPF_SUB|BPF_K] = BPF_S_ALU_SUB_K + 1,
> - [BPF_ALU|BPF_SUB|BPF_X] = BPF_S_ALU_SUB_X + 1,
> - [BPF_ALU|BPF_MUL|BPF_K] = BPF_S_ALU_MUL_K + 1,
> - [BPF_ALU|BPF_MUL|BPF_X] = BPF_S_ALU_MUL_X + 1,
> - [BPF_ALU|BPF_DIV|BPF_X] = BPF_S_ALU_DIV_X + 1,
> - [BPF_ALU|BPF_AND|BPF_K] = BPF_S_ALU_AND_K + 1,
> - [BPF_ALU|BPF_AND|BPF_X] = BPF_S_ALU_AND_X + 1,
> - [BPF_ALU|BPF_OR|BPF_K] = BPF_S_ALU_OR_K + 1,
> - [BPF_ALU|BPF_OR|BPF_X] = BPF_S_ALU_OR_X + 1,
> - [BPF_ALU|BPF_LSH|BPF_K] = BPF_S_ALU_LSH_K + 1,
> - [BPF_ALU|BPF_LSH|BPF_X] = BPF_S_ALU_LSH_X + 1,
> - [BPF_ALU|BPF_RSH|BPF_K] = BPF_S_ALU_RSH_K + 1,
> - [BPF_ALU|BPF_RSH|BPF_X] = BPF_S_ALU_RSH_X + 1,
> - [BPF_ALU|BPF_NEG] = BPF_S_ALU_NEG + 1,
> - [BPF_LD|BPF_W|BPF_ABS] = BPF_S_LD_W_ABS + 1,
> - [BPF_LD|BPF_H|BPF_ABS] = BPF_S_LD_H_ABS + 1,
> - [BPF_LD|BPF_B|BPF_ABS] = BPF_S_LD_B_ABS + 1,
> - [BPF_LD|BPF_W|BPF_LEN] = BPF_S_LD_W_LEN + 1,
> - [BPF_LD|BPF_W|BPF_IND] = BPF_S_LD_W_IND + 1,
> - [BPF_LD|BPF_H|BPF_IND] = BPF_S_LD_H_IND + 1,
> - [BPF_LD|BPF_B|BPF_IND] = BPF_S_LD_B_IND + 1,
> - [BPF_LD|BPF_IMM] = BPF_S_LD_IMM + 1,
> - [BPF_LDX|BPF_W|BPF_LEN] = BPF_S_LDX_W_LEN + 1,
> - [BPF_LDX|BPF_B|BPF_MSH] = BPF_S_LDX_B_MSH + 1,
> - [BPF_LDX|BPF_IMM] = BPF_S_LDX_IMM + 1,
> - [BPF_MISC|BPF_TAX] = BPF_S_MISC_TAX + 1,
> - [BPF_MISC|BPF_TXA] = BPF_S_MISC_TXA + 1,
> - [BPF_RET|BPF_K] = BPF_S_RET_K + 1,
> - [BPF_RET|BPF_A] = BPF_S_RET_A + 1,
> - [BPF_ALU|BPF_DIV|BPF_K] = BPF_S_ALU_DIV_K + 1,
> - [BPF_LD|BPF_MEM] = BPF_S_LD_MEM + 1,
> - [BPF_LDX|BPF_MEM] = BPF_S_LDX_MEM + 1,
> - [BPF_ST] = BPF_S_ST + 1,
> - [BPF_STX] = BPF_S_STX + 1,
> - [BPF_JMP|BPF_JA] = BPF_S_JMP_JA + 1,
> - [BPF_JMP|BPF_JEQ|BPF_K] = BPF_S_JMP_JEQ_K + 1,
> - [BPF_JMP|BPF_JEQ|BPF_X] = BPF_S_JMP_JEQ_X + 1,
> - [BPF_JMP|BPF_JGE|BPF_K] = BPF_S_JMP_JGE_K + 1,
> - [BPF_JMP|BPF_JGE|BPF_X] = BPF_S_JMP_JGE_X + 1,
> - [BPF_JMP|BPF_JGT|BPF_K] = BPF_S_JMP_JGT_K + 1,
> - [BPF_JMP|BPF_JGT|BPF_X] = BPF_S_JMP_JGT_X + 1,
> - [BPF_JMP|BPF_JSET|BPF_K] = BPF_S_JMP_JSET_K + 1,
> - [BPF_JMP|BPF_JSET|BPF_X] = BPF_S_JMP_JSET_X + 1,
> + [BPF_ALU|BPF_ADD|BPF_K] = BPF_S_ALU_ADD_K,
> + [BPF_ALU|BPF_ADD|BPF_X] = BPF_S_ALU_ADD_X,
> + [BPF_ALU|BPF_SUB|BPF_K] = BPF_S_ALU_SUB_K,
> + [BPF_ALU|BPF_SUB|BPF_X] = BPF_S_ALU_SUB_X,
> + [BPF_ALU|BPF_MUL|BPF_K] = BPF_S_ALU_MUL_K,
> + [BPF_ALU|BPF_MUL|BPF_X] = BPF_S_ALU_MUL_X,
> + [BPF_ALU|BPF_DIV|BPF_X] = BPF_S_ALU_DIV_X,
> + [BPF_ALU|BPF_AND|BPF_K] = BPF_S_ALU_AND_K,
> + [BPF_ALU|BPF_AND|BPF_X] = BPF_S_ALU_AND_X,
> + [BPF_ALU|BPF_OR|BPF_K] = BPF_S_ALU_OR_K,
> + [BPF_ALU|BPF_OR|BPF_X] = BPF_S_ALU_OR_X,
> + [BPF_ALU|BPF_LSH|BPF_K] = BPF_S_ALU_LSH_K,
> + [BPF_ALU|BPF_LSH|BPF_X] = BPF_S_ALU_LSH_X,
> + [BPF_ALU|BPF_RSH|BPF_K] = BPF_S_ALU_RSH_K,
> + [BPF_ALU|BPF_RSH|BPF_X] = BPF_S_ALU_RSH_X,
> + [BPF_ALU|BPF_NEG] = BPF_S_ALU_NEG,
> + [BPF_LD|BPF_W|BPF_ABS] = BPF_S_LD_W_ABS,
> + [BPF_LD|BPF_H|BPF_ABS] = BPF_S_LD_H_ABS,
> + [BPF_LD|BPF_B|BPF_ABS] = BPF_S_LD_B_ABS,
> + [BPF_LD|BPF_W|BPF_LEN] = BPF_S_LD_W_LEN,
> + [BPF_LD|BPF_W|BPF_IND] = BPF_S_LD_W_IND,
> + [BPF_LD|BPF_H|BPF_IND] = BPF_S_LD_H_IND,
> + [BPF_LD|BPF_B|BPF_IND] = BPF_S_LD_B_IND,
> + [BPF_LD|BPF_IMM] = BPF_S_LD_IMM,
> + [BPF_LDX|BPF_W|BPF_LEN] = BPF_S_LDX_W_LEN,
> + [BPF_LDX|BPF_B|BPF_MSH] = BPF_S_LDX_B_MSH,
> + [BPF_LDX|BPF_IMM] = BPF_S_LDX_IMM,
> + [BPF_MISC|BPF_TAX] = BPF_S_MISC_TAX,
> + [BPF_MISC|BPF_TXA] = BPF_S_MISC_TXA,
> + [BPF_RET|BPF_K] = BPF_S_RET_K,
> + [BPF_RET|BPF_A] = BPF_S_RET_A,
> + [BPF_ALU|BPF_DIV|BPF_K] = BPF_S_ALU_DIV_K,
> + [BPF_LD|BPF_MEM] = BPF_S_LD_MEM,
> + [BPF_LDX|BPF_MEM] = BPF_S_LDX_MEM,
> + [BPF_ST] = BPF_S_ST,
> + [BPF_STX] = BPF_S_STX,
> + [BPF_JMP|BPF_JA] = BPF_S_JMP_JA,
> + [BPF_JMP|BPF_JEQ|BPF_K] = BPF_S_JMP_JEQ_K,
> + [BPF_JMP|BPF_JEQ|BPF_X] = BPF_S_JMP_JEQ_X,
> + [BPF_JMP|BPF_JGE|BPF_K] = BPF_S_JMP_JGE_K,
> + [BPF_JMP|BPF_JGE|BPF_X] = BPF_S_JMP_JGE_X,
> + [BPF_JMP|BPF_JGT|BPF_K] = BPF_S_JMP_JGT_K,
> + [BPF_JMP|BPF_JGT|BPF_X] = BPF_S_JMP_JGT_X,
> + [BPF_JMP|BPF_JSET|BPF_K] = BPF_S_JMP_JSET_K,
> + [BPF_JMP|BPF_JSET|BPF_X] = BPF_S_JMP_JSET_X,
> };
> int pc;
>
> @@ -495,8 +495,7 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
> if (code >= ARRAY_SIZE(codes))
> return -EINVAL;
> code = codes[code];
> - /* Undo the '+ 1' in codes[] after validation. */
> - if (!code--)
> + if (!code)
> return -EINVAL;
> /* Some instructions need special checks */
> switch (code) {
>
>
>
I compared the asm code of sk_run_filter.
Here is the original:
switch (fentry->code) {
ffffffff8138c70c: 66 83 38 2c cmpw $0x2c,(%rax)
/*
* Process array of filter instructions.
*/
for (pc = 0; pc < flen; pc++) {
const struct sock_filter *fentry = &filter[pc];
u32 f_k = fentry->k;
ffffffff8138c710: 44 8b 78 04 mov 0x4(%rax),%r15d
switch (fentry->code) {
ffffffff8138c714: 0f 87 53 02 00 00 ja
ffffffff8138c96d <sk_run_filter+0x2a8>
ffffffff8138c71a: 0f b7 10 movzwl (%rax),%edx
ffffffff8138c71d: ff 24 d5 00 00 00 00 jmpq *0x0(,%rdx,8)
And here is the patched:
switch (fentry->code) {
ffffffff8138c708: 8b 10 mov (%rax),%edx
/*
* Process array of filter instructions.
*/
for (pc = 0; pc < flen; pc++) {
const struct sock_filter *fentry = &filter[pc];
u32 f_k = fentry->k;
ffffffff8138c70a: 44 8b 78 04 mov 0x4(%rax),%r15d
switch (fentry->code) {
ffffffff8138c70e: ff ca dec %edx
ffffffff8138c710: 66 83 fa 2c cmp $0x2c,%dx
ffffffff8138c714: 0f 87 53 02 00 00 ja
ffffffff8138c96d <sk_run_filter+0x2ac>
ffffffff8138c71a: 0f b7 d2 movzwl %dx,%edx
ffffffff8138c71d: ff 24 d5 00 00 00 00 jmpq *0x0(,%rdx,8)
As you see, an additional 'dec %edx' instruction is inserted.
sk_chk_filter() only runs 1 times, I think we can afford the 'dec
instruction' and 'dirty' code, but sk_run_filter() runs much often,
this additional dec instruction isn't affordable.
--
Regards,
Changli Gao(xiaosuo@gmail.com)
^ permalink raw reply [flat|nested] 27+ messages in thread* Re: [PATCH net-next-2.6] filter: cleanup codes[] init
2010-11-19 8:38 ` [PATCH net-next-2.6] filter: cleanup codes[] init Changli Gao
@ 2010-11-19 9:54 ` Eric Dumazet
2010-11-19 11:17 ` [PATCH net-next-2.6] filter: optimize sk_run_filter Eric Dumazet
2010-11-19 12:21 ` [PATCH net-next-2.6] filter: cleanup codes[] init Changli Gao
0 siblings, 2 replies; 27+ messages in thread
From: Eric Dumazet @ 2010-11-19 9:54 UTC (permalink / raw)
To: Changli Gao; +Cc: David S. Miller, Hagen Paul Pfeifer, netdev
Le vendredi 19 novembre 2010 à 16:38 +0800, Changli Gao a écrit :
> I compared the asm code of sk_run_filter.
> As you see, an additional 'dec %edx' instruction is inserted.
> sk_chk_filter() only runs 1 times, I think we can afford the 'dec
> instruction' and 'dirty' code, but sk_run_filter() runs much often,
> this additional dec instruction isn't affordable.
>
Maybe on your setup. By the way, the
u32 f_k = fentry->k;
that David added in commit 57fe93b374a6b871
was much more a problem on arches with not enough registers.
x86_32 for example : compiler use a register (%esi on my gcc-4.5.1) to
store f_k, and more important A register is now stored in stack instead
of a cpu register.
On my compilers
gcc version 4.4.5 (Ubuntu/Linaro 4.4.4-14ubuntu5) 64bit
gcc-4.5.1 (self compiled) 32bit
result code was the same, before and after patch
Most probably you have "CONFIG_CC_OPTIMIZE_FOR_SIZE=y" which
unfortunately is known to generate poor looking code.
39b: 49 8d 14 c6 lea (%r14,%rax,8),%rdx
39f: 66 83 3a 2d cmpw $0x2d,(%rdx)
3a3: 8b 42 04 mov 0x4(%rdx),%eax // f_k = fentry->k;
3a6: 76 28 jbe 3d0 <sk_run_filter+0x70>
3d0: 0f b7 0a movzwl (%rdx),%ecx
3d3: ff 24 cd 00 00 00 00 jmpq *0x0(,%rcx,8)
32bit code:
2e0: 8d 04 df lea (%edi,%ebx,8),%eax
2e3: 66 83 38 2d cmpw $0x2d,(%eax)
2e7: 8b 70 04 mov 0x4(%eax),%esi // f_k = fentry->k;
2ea: 76 1c jbe 308 <sk_run_filter+0x58>
308: 0f b7 10 movzwl (%eax),%edx
30b: ff 24 95 38 00 00 00 jmp *0x38(,%edx,4)
DIV_X instruction :
480: 8b 45 a4 mov -0x5c(%ebp),%eax
483: 85 c0 test %eax,%eax
485: 0f 84 9d fe ff ff je 328 <sk_run_filter+0x78>
48b: 8b 45 ac mov -0x54(%ebp),%eax // A
48e: 31 d2 xor %edx,%edx
490: f7 75 a4 divl -0x5c(%ebp)
493: 89 45 ac mov %eax,-0x54(%ebp) // A
496: e9 85 fe ff ff jmp 320 <sk_run_filter+0x70>
I believe we should revert the u32 f_k = fentry->k; part
fentry->k as is fast as f_k if stored on stack, and avoids one
instruction if fentry->k is not needed.
^ permalink raw reply [flat|nested] 27+ messages in thread
* [PATCH net-next-2.6] filter: optimize sk_run_filter
2010-11-19 9:54 ` Eric Dumazet
@ 2010-11-19 11:17 ` Eric Dumazet
2010-11-19 12:32 ` Changli Gao
2010-11-19 16:21 ` [PATCH net-next-2.6] " David Miller
2010-11-19 12:21 ` [PATCH net-next-2.6] filter: cleanup codes[] init Changli Gao
1 sibling, 2 replies; 27+ messages in thread
From: Eric Dumazet @ 2010-11-19 11:17 UTC (permalink / raw)
To: David Miller; +Cc: Hagen Paul Pfeifer, netdev, Changli Gao
Le vendredi 19 novembre 2010 à 10:54 +0100, Eric Dumazet a écrit :
> I believe we should revert the u32 f_k = fentry->k; part
>
> fentry->k as is fast as f_k if stored on stack, and avoids one
> instruction if fentry->k is not needed.
>
>
A revert is not good on arches with decent number of registers (x86_64
for example).
Maybe have some CONFIG_ARCH_HAS_{FEW|MANY}_REGISTERS is needed, (or
already exist ?)
Here is the patch to save 400 bytes on x86_32, and really speedup the
damn thing on all arches.
Thanks
[PATCH net-next-2.6] filter: optimize sk_run_filter
remove pc variable to avoid arithmetic to compute fentry at each filter
instruction. Jumps directly manipulate fentry pointer.
As the last instruction of filter[] is guaranteed to be a RETURN, and
all jumps are before the last instruction, we dont need to check filter
bounds (number of instructions in filter array) at each iteration, so we
remove it from sk_run_filter() params.
On x86_32 remove f_k var introduced in commit 57fe93b374a6b871
(filter: make sure filters dont read uninitialized memory)
Note : We could use a CONFIG_ARCH_HAS_{FEW|MANY}_REGISTERS in order to
avoid too many ifdefs in this code.
This helps compiler to use cpu registers to hold fentry and A
accumulator.
On x86_32, this saves 401 bytes, and more important, sk_run_filter()
runs much faster because less register pressure (One less conditional
branch per BPF instruction)
# size net/core/filter.o net/core/filter_pre.o
text data bss dec hex filename
2948 0 0 2948 b84 net/core/filter.o
3349 0 0 3349 d15 net/core/filter_pre.o
on x86_64 :
# size net/core/filter.o net/core/filter_pre.o
text data bss dec hex filename
5173 0 0 5173 1435 net/core/filter.o
5224 0 0 5224 1468 net/core/filter_pre.o
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Changli Gao <xiaosuo@gmail.com>
Cc: Hagen Paul Pfeifer <hagen@jauu.net>
---
include/linux/filter.h | 2
net/core/filter.c | 93 +++++++++++++++++++-------------------
net/core/timestamping.c | 2
net/packet/af_packet.c | 2
4 files changed, 51 insertions(+), 48 deletions(-)
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 151f5d7..447a775 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -147,7 +147,7 @@ struct sock;
extern int sk_filter(struct sock *sk, struct sk_buff *skb);
extern unsigned int sk_run_filter(struct sk_buff *skb,
- struct sock_filter *filter, int flen);
+ const struct sock_filter *filter);
extern int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk);
extern int sk_detach_filter(struct sock *sk);
extern int sk_chk_filter(struct sock_filter *filter, int flen);
diff --git a/net/core/filter.c b/net/core/filter.c
index 15a545d..9e77b3c 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -137,7 +137,7 @@ int sk_filter(struct sock *sk, struct sk_buff *skb)
rcu_read_lock_bh();
filter = rcu_dereference_bh(sk->sk_filter);
if (filter) {
- unsigned int pkt_len = sk_run_filter(skb, filter->insns, filter->len);
+ unsigned int pkt_len = sk_run_filter(skb, filter->insns);
err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
}
@@ -151,14 +151,15 @@ EXPORT_SYMBOL(sk_filter);
* sk_run_filter - run a filter on a socket
* @skb: buffer to run the filter on
* @filter: filter to apply
- * @flen: length of filter
*
* Decode and apply filter instructions to the skb->data.
- * Return length to keep, 0 for none. skb is the data we are
- * filtering, filter is the array of filter instructions, and
- * len is the number of filter blocks in the array.
+ * Return length to keep, 0 for none. @skb is the data we are
+ * filtering, @filter is the array of filter instructions.
+ * Because all jumps are guaranteed to be before last instruction,
+ * and last instruction guaranteed to be a RET, we dont need to check
+ * flen. (We used to pass to this function the length of filter)
*/
-unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
+unsigned int sk_run_filter(struct sk_buff *skb, const struct sock_filter *fentry)
{
void *ptr;
u32 A = 0; /* Accumulator */
@@ -167,34 +168,36 @@ unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int
unsigned long memvalid = 0;
u32 tmp;
int k;
- int pc;
BUILD_BUG_ON(BPF_MEMWORDS > BITS_PER_LONG);
/*
* Process array of filter instructions.
*/
- for (pc = 0; pc < flen; pc++) {
- const struct sock_filter *fentry = &filter[pc];
- u32 f_k = fentry->k;
+ for (;; fentry++) {
+#if defined(CONFIG_X86_32)
+#define K (fentry->k)
+#else
+ const u32 K = fentry->k;
+#endif
switch (fentry->code) {
case BPF_S_ALU_ADD_X:
A += X;
continue;
case BPF_S_ALU_ADD_K:
- A += f_k;
+ A += K;
continue;
case BPF_S_ALU_SUB_X:
A -= X;
continue;
case BPF_S_ALU_SUB_K:
- A -= f_k;
+ A -= K;
continue;
case BPF_S_ALU_MUL_X:
A *= X;
continue;
case BPF_S_ALU_MUL_K:
- A *= f_k;
+ A *= K;
continue;
case BPF_S_ALU_DIV_X:
if (X == 0)
@@ -202,64 +205,64 @@ unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int
A /= X;
continue;
case BPF_S_ALU_DIV_K:
- A /= f_k;
+ A /= K;
continue;
case BPF_S_ALU_AND_X:
A &= X;
continue;
case BPF_S_ALU_AND_K:
- A &= f_k;
+ A &= K;
continue;
case BPF_S_ALU_OR_X:
A |= X;
continue;
case BPF_S_ALU_OR_K:
- A |= f_k;
+ A |= K;
continue;
case BPF_S_ALU_LSH_X:
A <<= X;
continue;
case BPF_S_ALU_LSH_K:
- A <<= f_k;
+ A <<= K;
continue;
case BPF_S_ALU_RSH_X:
A >>= X;
continue;
case BPF_S_ALU_RSH_K:
- A >>= f_k;
+ A >>= K;
continue;
case BPF_S_ALU_NEG:
A = -A;
continue;
case BPF_S_JMP_JA:
- pc += f_k;
+ fentry += K;
continue;
case BPF_S_JMP_JGT_K:
- pc += (A > f_k) ? fentry->jt : fentry->jf;
+ fentry += (A > K) ? fentry->jt : fentry->jf;
continue;
case BPF_S_JMP_JGE_K:
- pc += (A >= f_k) ? fentry->jt : fentry->jf;
+ fentry += (A >= K) ? fentry->jt : fentry->jf;
continue;
case BPF_S_JMP_JEQ_K:
- pc += (A == f_k) ? fentry->jt : fentry->jf;
+ fentry += (A == K) ? fentry->jt : fentry->jf;
continue;
case BPF_S_JMP_JSET_K:
- pc += (A & f_k) ? fentry->jt : fentry->jf;
+ fentry += (A & K) ? fentry->jt : fentry->jf;
continue;
case BPF_S_JMP_JGT_X:
- pc += (A > X) ? fentry->jt : fentry->jf;
+ fentry += (A > X) ? fentry->jt : fentry->jf;
continue;
case BPF_S_JMP_JGE_X:
- pc += (A >= X) ? fentry->jt : fentry->jf;
+ fentry += (A >= X) ? fentry->jt : fentry->jf;
continue;
case BPF_S_JMP_JEQ_X:
- pc += (A == X) ? fentry->jt : fentry->jf;
+ fentry += (A == X) ? fentry->jt : fentry->jf;
continue;
case BPF_S_JMP_JSET_X:
- pc += (A & X) ? fentry->jt : fentry->jf;
+ fentry += (A & X) ? fentry->jt : fentry->jf;
continue;
case BPF_S_LD_W_ABS:
- k = f_k;
+ k = K;
load_w:
ptr = load_pointer(skb, k, 4, &tmp);
if (ptr != NULL) {
@@ -268,7 +271,7 @@ load_w:
}
break;
case BPF_S_LD_H_ABS:
- k = f_k;
+ k = K;
load_h:
ptr = load_pointer(skb, k, 2, &tmp);
if (ptr != NULL) {
@@ -277,7 +280,7 @@ load_h:
}
break;
case BPF_S_LD_B_ABS:
- k = f_k;
+ k = K;
load_b:
ptr = load_pointer(skb, k, 1, &tmp);
if (ptr != NULL) {
@@ -292,34 +295,34 @@ load_b:
X = skb->len;
continue;
case BPF_S_LD_W_IND:
- k = X + f_k;
+ k = X + K;
goto load_w;
case BPF_S_LD_H_IND:
- k = X + f_k;
+ k = X + K;
goto load_h;
case BPF_S_LD_B_IND:
- k = X + f_k;
+ k = X + K;
goto load_b;
case BPF_S_LDX_B_MSH:
- ptr = load_pointer(skb, f_k, 1, &tmp);
+ ptr = load_pointer(skb, K, 1, &tmp);
if (ptr != NULL) {
X = (*(u8 *)ptr & 0xf) << 2;
continue;
}
return 0;
case BPF_S_LD_IMM:
- A = f_k;
+ A = K;
continue;
case BPF_S_LDX_IMM:
- X = f_k;
+ X = K;
continue;
case BPF_S_LD_MEM:
- A = (memvalid & (1UL << f_k)) ?
- mem[f_k] : 0;
+ A = (memvalid & (1UL << K)) ?
+ mem[K] : 0;
continue;
case BPF_S_LDX_MEM:
- X = (memvalid & (1UL << f_k)) ?
- mem[f_k] : 0;
+ X = (memvalid & (1UL << K)) ?
+ mem[K] : 0;
continue;
case BPF_S_MISC_TAX:
X = A;
@@ -328,16 +331,16 @@ load_b:
A = X;
continue;
case BPF_S_RET_K:
- return f_k;
+ return K;
case BPF_S_RET_A:
return A;
case BPF_S_ST:
- memvalid |= 1UL << f_k;
- mem[f_k] = A;
+ memvalid |= 1UL << K;
+ mem[K] = A;
continue;
case BPF_S_STX:
- memvalid |= 1UL << f_k;
- mem[f_k] = X;
+ memvalid |= 1UL << K;
+ mem[K] = X;
continue;
default:
WARN_ON(1);
diff --git a/net/core/timestamping.c b/net/core/timestamping.c
index 0ae6c22..dac7ed6 100644
--- a/net/core/timestamping.c
+++ b/net/core/timestamping.c
@@ -31,7 +31,7 @@ static unsigned int classify(struct sk_buff *skb)
if (likely(skb->dev &&
skb->dev->phydev &&
skb->dev->phydev->drv))
- return sk_run_filter(skb, ptp_filter, ARRAY_SIZE(ptp_filter));
+ return sk_run_filter(skb, ptp_filter);
else
return PTP_CLASS_NONE;
}
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 2096456..b6372dd 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -519,7 +519,7 @@ static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk,
rcu_read_lock_bh();
filter = rcu_dereference_bh(sk->sk_filter);
if (filter != NULL)
- res = sk_run_filter(skb, filter->insns, filter->len);
+ res = sk_run_filter(skb, filter->insns);
rcu_read_unlock_bh();
return res;
^ permalink raw reply related [flat|nested] 27+ messages in thread* Re: [PATCH net-next-2.6] filter: optimize sk_run_filter
2010-11-19 11:17 ` [PATCH net-next-2.6] filter: optimize sk_run_filter Eric Dumazet
@ 2010-11-19 12:32 ` Changli Gao
2010-11-19 12:57 ` Changli Gao
2010-11-19 16:21 ` [PATCH net-next-2.6] " David Miller
1 sibling, 1 reply; 27+ messages in thread
From: Changli Gao @ 2010-11-19 12:32 UTC (permalink / raw)
To: Eric Dumazet; +Cc: David Miller, Hagen Paul Pfeifer, netdev
On Fri, Nov 19, 2010 at 7:17 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> Le vendredi 19 novembre 2010 à 10:54 +0100, Eric Dumazet a écrit :
>
>> I believe we should revert the u32 f_k = fentry->k; part
>>
>> fentry->k as is fast as f_k if stored on stack, and avoids one
>> instruction if fentry->k is not needed.
>>
>>
>
> A revert is not good on arches with decent number of registers (x86_64
> for example).
>
> Maybe have some CONFIG_ARCH_HAS_{FEW|MANY}_REGISTERS is needed, (or
> already exist ?)
>
> Here is the patch to save 400 bytes on x86_32, and really speedup the
> damn thing on all arches.
>
> Thanks
>
> [PATCH net-next-2.6] filter: optimize sk_run_filter
>
> remove pc variable to avoid arithmetic to compute fentry at each filter
> instruction. Jumps directly manipulate fentry pointer.
>
> As the last instruction of filter[] is guaranteed to be a RETURN, and
> all jumps are before the last instruction, we dont need to check filter
> bounds (number of instructions in filter array) at each iteration, so we
> remove it from sk_run_filter() params.
>
> On x86_32 remove f_k var introduced in commit 57fe93b374a6b871
> (filter: make sure filters dont read uninitialized memory)
>
> Note : We could use a CONFIG_ARCH_HAS_{FEW|MANY}_REGISTERS in order to
> avoid too many ifdefs in this code.
>
> This helps compiler to use cpu registers to hold fentry and A
> accumulator.
>
> On x86_32, this saves 401 bytes, and more important, sk_run_filter()
> runs much faster because less register pressure (One less conditional
> branch per BPF instruction)
>
> # size net/core/filter.o net/core/filter_pre.o
> text data bss dec hex filename
> 2948 0 0 2948 b84 net/core/filter.o
> 3349 0 0 3349 d15 net/core/filter_pre.o
>
> on x86_64 :
> # size net/core/filter.o net/core/filter_pre.o
> text data bss dec hex filename
> 5173 0 0 5173 1435 net/core/filter.o
> 5224 0 0 5224 1468 net/core/filter_pre.o
>
> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
> Cc: Changli Gao <xiaosuo@gmail.com>
> Cc: Hagen Paul Pfeifer <hagen@jauu.net>
> ---
> include/linux/filter.h | 2
> net/core/filter.c | 93 +++++++++++++++++++-------------------
> net/core/timestamping.c | 2
> net/packet/af_packet.c | 2
> 4 files changed, 51 insertions(+), 48 deletions(-)
>
you missed the users of sk_run_filter in directory dev/.
--
Regards,
Changli Gao(xiaosuo@gmail.com)
^ permalink raw reply [flat|nested] 27+ messages in thread* Re: [PATCH net-next-2.6] filter: optimize sk_run_filter
2010-11-19 12:32 ` Changli Gao
@ 2010-11-19 12:57 ` Changli Gao
2010-11-19 13:16 ` [PATCH net-next-2.6 v2] " Eric Dumazet
0 siblings, 1 reply; 27+ messages in thread
From: Changli Gao @ 2010-11-19 12:57 UTC (permalink / raw)
To: Eric Dumazet; +Cc: David Miller, Hagen Paul Pfeifer, netdev
On Fri, Nov 19, 2010 at 8:32 PM, Changli Gao <xiaosuo@gmail.com> wrote:
>
> you missed the users of sk_run_filter in directory dev/.
>
Sorry, the directory is drivers/.
--
Regards,
Changli Gao(xiaosuo@gmail.com)
^ permalink raw reply [flat|nested] 27+ messages in thread
* [PATCH net-next-2.6 v2] filter: optimize sk_run_filter
2010-11-19 12:57 ` Changli Gao
@ 2010-11-19 13:16 ` Eric Dumazet
2010-11-19 14:13 ` Changli Gao
2010-11-19 14:17 ` Tetsuo Handa
0 siblings, 2 replies; 27+ messages in thread
From: Eric Dumazet @ 2010-11-19 13:16 UTC (permalink / raw)
To: Changli Gao; +Cc: David Miller, Hagen Paul Pfeifer, netdev
Le vendredi 19 novembre 2010 à 20:57 +0800, Changli Gao a écrit :
> On Fri, Nov 19, 2010 at 8:32 PM, Changli Gao <xiaosuo@gmail.com> wrote:
> >
> > you missed the users of sk_run_filter in directory dev/.
> >
>
> Sorry, the directory is drivers/.
>
>
You're right, thanks !
[PATCH net-next-2.6 v2] filter: optimize sk_run_filter
Remove pc variable to avoid arithmetic to compute fentry at each filter
instruction. Jumps directly manipulate fentry pointer.
As the last instruction of filter[] is guaranteed to be a RETURN, and
all jumps are before the last instruction, we dont need to check filter
bounds (number of instructions in filter array) at each iteration, so we
remove it from sk_run_filter() params.
On x86_32 remove f_k var introduced in commit 57fe93b374a6b871
(filter: make sure filters dont read uninitialized memory)
Note : We could use a CONFIG_ARCH_HAS_{FEW|MANY}_REGISTERS in order to
avoid too many ifdefs in this code.
This helps compiler to use cpu registers to hold fentry and A
accumulator.
On x86_32, this saves 401 bytes, and more important, sk_run_filter()
runs much faster because less register pressure (One less conditional
branch per BPF instruction)
# size net/core/filter.o net/core/filter_pre.o
text data bss dec hex filename
2948 0 0 2948 b84 net/core/filter.o
3349 0 0 3349 d15 net/core/filter_pre.o
on x86_64 :
# size net/core/filter.o net/core/filter_pre.o
text data bss dec hex filename
5173 0 0 5173 1435 net/core/filter.o
5224 0 0 5224 1468 net/core/filter_pre.o
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Changli Gao <xiaosuo@gmail.com>
Cc: Hagen Paul Pfeifer <hagen@jauu.net>
---
V2: add missing changes for drivers/isdn/i4l/isdn_ppp.c &
drivers/net/ppp_generic.c
drivers/isdn/i4l/isdn_ppp.c | 14 ++---
drivers/net/ppp_generic.c | 12 +---
include/linux/filter.h | 2
net/core/filter.c | 93 +++++++++++++++++-----------------
net/core/timestamping.c | 2
net/packet/af_packet.c | 2
6 files changed, 61 insertions(+), 64 deletions(-)
diff --git a/drivers/isdn/i4l/isdn_ppp.c b/drivers/isdn/i4l/isdn_ppp.c
index 97c5cc2..9e8162c 100644
--- a/drivers/isdn/i4l/isdn_ppp.c
+++ b/drivers/isdn/i4l/isdn_ppp.c
@@ -1147,15 +1147,14 @@ isdn_ppp_push_higher(isdn_net_dev * net_dev, isdn_net_local * lp, struct sk_buff
}
if (is->pass_filter
- && sk_run_filter(skb, is->pass_filter, is->pass_len) == 0) {
+ && sk_run_filter(skb, is->pass_filter) == 0) {
if (is->debug & 0x2)
printk(KERN_DEBUG "IPPP: inbound frame filtered.\n");
kfree_skb(skb);
return;
}
if (!(is->active_filter
- && sk_run_filter(skb, is->active_filter,
- is->active_len) == 0)) {
+ && sk_run_filter(skb, is->active_filter) == 0)) {
if (is->debug & 0x2)
printk(KERN_DEBUG "IPPP: link-active filter: reseting huptimer.\n");
lp->huptimer = 0;
@@ -1294,15 +1293,14 @@ isdn_ppp_xmit(struct sk_buff *skb, struct net_device *netdev)
}
if (ipt->pass_filter
- && sk_run_filter(skb, ipt->pass_filter, ipt->pass_len) == 0) {
+ && sk_run_filter(skb, ipt->pass_filter) == 0) {
if (ipt->debug & 0x4)
printk(KERN_DEBUG "IPPP: outbound frame filtered.\n");
kfree_skb(skb);
goto unlock;
}
if (!(ipt->active_filter
- && sk_run_filter(skb, ipt->active_filter,
- ipt->active_len) == 0)) {
+ && sk_run_filter(skb, ipt->active_filter) == 0)) {
if (ipt->debug & 0x4)
printk(KERN_DEBUG "IPPP: link-active filter: reseting huptimer.\n");
lp->huptimer = 0;
@@ -1492,9 +1490,9 @@ int isdn_ppp_autodial_filter(struct sk_buff *skb, isdn_net_local *lp)
}
drop |= is->pass_filter
- && sk_run_filter(skb, is->pass_filter, is->pass_len) == 0;
+ && sk_run_filter(skb, is->pass_filter) == 0;
drop |= is->active_filter
- && sk_run_filter(skb, is->active_filter, is->active_len) == 0;
+ && sk_run_filter(skb, is->active_filter) == 0;
skb_push(skb, IPPP_MAX_HEADER - 4);
return drop;
diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c
index 09cf56d..0c91598a 100644
--- a/drivers/net/ppp_generic.c
+++ b/drivers/net/ppp_generic.c
@@ -1136,8 +1136,7 @@ ppp_send_frame(struct ppp *ppp, struct sk_buff *skb)
a four-byte PPP header on each packet */
*skb_push(skb, 2) = 1;
if (ppp->pass_filter &&
- sk_run_filter(skb, ppp->pass_filter,
- ppp->pass_len) == 0) {
+ sk_run_filter(skb, ppp->pass_filter) == 0) {
if (ppp->debug & 1)
printk(KERN_DEBUG "PPP: outbound frame not passed\n");
kfree_skb(skb);
@@ -1145,8 +1144,7 @@ ppp_send_frame(struct ppp *ppp, struct sk_buff *skb)
}
/* if this packet passes the active filter, record the time */
if (!(ppp->active_filter &&
- sk_run_filter(skb, ppp->active_filter,
- ppp->active_len) == 0))
+ sk_run_filter(skb, ppp->active_filter) == 0))
ppp->last_xmit = jiffies;
skb_pull(skb, 2);
#else
@@ -1758,8 +1756,7 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb)
*skb_push(skb, 2) = 0;
if (ppp->pass_filter &&
- sk_run_filter(skb, ppp->pass_filter,
- ppp->pass_len) == 0) {
+ sk_run_filter(skb, ppp->pass_filter) == 0) {
if (ppp->debug & 1)
printk(KERN_DEBUG "PPP: inbound frame "
"not passed\n");
@@ -1767,8 +1764,7 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb)
return;
}
if (!(ppp->active_filter &&
- sk_run_filter(skb, ppp->active_filter,
- ppp->active_len) == 0))
+ sk_run_filter(skb, ppp->active_filter) == 0))
ppp->last_recv = jiffies;
__skb_pull(skb, 2);
} else
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 151f5d7..447a775 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -147,7 +147,7 @@ struct sock;
extern int sk_filter(struct sock *sk, struct sk_buff *skb);
extern unsigned int sk_run_filter(struct sk_buff *skb,
- struct sock_filter *filter, int flen);
+ const struct sock_filter *filter);
extern int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk);
extern int sk_detach_filter(struct sock *sk);
extern int sk_chk_filter(struct sock_filter *filter, int flen);
diff --git a/net/core/filter.c b/net/core/filter.c
index 15a545d..9e77b3c 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -137,7 +137,7 @@ int sk_filter(struct sock *sk, struct sk_buff *skb)
rcu_read_lock_bh();
filter = rcu_dereference_bh(sk->sk_filter);
if (filter) {
- unsigned int pkt_len = sk_run_filter(skb, filter->insns, filter->len);
+ unsigned int pkt_len = sk_run_filter(skb, filter->insns);
err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
}
@@ -151,14 +151,15 @@ EXPORT_SYMBOL(sk_filter);
* sk_run_filter - run a filter on a socket
* @skb: buffer to run the filter on
* @filter: filter to apply
- * @flen: length of filter
*
* Decode and apply filter instructions to the skb->data.
- * Return length to keep, 0 for none. skb is the data we are
- * filtering, filter is the array of filter instructions, and
- * len is the number of filter blocks in the array.
+ * Return length to keep, 0 for none. @skb is the data we are
+ * filtering, @filter is the array of filter instructions.
+ * Because all jumps are guaranteed to be before last instruction,
+ * and last instruction guaranteed to be a RET, we dont need to check
+ * flen. (We used to pass to this function the length of filter)
*/
-unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
+unsigned int sk_run_filter(struct sk_buff *skb, const struct sock_filter *fentry)
{
void *ptr;
u32 A = 0; /* Accumulator */
@@ -167,34 +168,36 @@ unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int
unsigned long memvalid = 0;
u32 tmp;
int k;
- int pc;
BUILD_BUG_ON(BPF_MEMWORDS > BITS_PER_LONG);
/*
* Process array of filter instructions.
*/
- for (pc = 0; pc < flen; pc++) {
- const struct sock_filter *fentry = &filter[pc];
- u32 f_k = fentry->k;
+ for (;; fentry++) {
+#if defined(CONFIG_X86_32)
+#define K (fentry->k)
+#else
+ const u32 K = fentry->k;
+#endif
switch (fentry->code) {
case BPF_S_ALU_ADD_X:
A += X;
continue;
case BPF_S_ALU_ADD_K:
- A += f_k;
+ A += K;
continue;
case BPF_S_ALU_SUB_X:
A -= X;
continue;
case BPF_S_ALU_SUB_K:
- A -= f_k;
+ A -= K;
continue;
case BPF_S_ALU_MUL_X:
A *= X;
continue;
case BPF_S_ALU_MUL_K:
- A *= f_k;
+ A *= K;
continue;
case BPF_S_ALU_DIV_X:
if (X == 0)
@@ -202,64 +205,64 @@ unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int
A /= X;
continue;
case BPF_S_ALU_DIV_K:
- A /= f_k;
+ A /= K;
continue;
case BPF_S_ALU_AND_X:
A &= X;
continue;
case BPF_S_ALU_AND_K:
- A &= f_k;
+ A &= K;
continue;
case BPF_S_ALU_OR_X:
A |= X;
continue;
case BPF_S_ALU_OR_K:
- A |= f_k;
+ A |= K;
continue;
case BPF_S_ALU_LSH_X:
A <<= X;
continue;
case BPF_S_ALU_LSH_K:
- A <<= f_k;
+ A <<= K;
continue;
case BPF_S_ALU_RSH_X:
A >>= X;
continue;
case BPF_S_ALU_RSH_K:
- A >>= f_k;
+ A >>= K;
continue;
case BPF_S_ALU_NEG:
A = -A;
continue;
case BPF_S_JMP_JA:
- pc += f_k;
+ fentry += K;
continue;
case BPF_S_JMP_JGT_K:
- pc += (A > f_k) ? fentry->jt : fentry->jf;
+ fentry += (A > K) ? fentry->jt : fentry->jf;
continue;
case BPF_S_JMP_JGE_K:
- pc += (A >= f_k) ? fentry->jt : fentry->jf;
+ fentry += (A >= K) ? fentry->jt : fentry->jf;
continue;
case BPF_S_JMP_JEQ_K:
- pc += (A == f_k) ? fentry->jt : fentry->jf;
+ fentry += (A == K) ? fentry->jt : fentry->jf;
continue;
case BPF_S_JMP_JSET_K:
- pc += (A & f_k) ? fentry->jt : fentry->jf;
+ fentry += (A & K) ? fentry->jt : fentry->jf;
continue;
case BPF_S_JMP_JGT_X:
- pc += (A > X) ? fentry->jt : fentry->jf;
+ fentry += (A > X) ? fentry->jt : fentry->jf;
continue;
case BPF_S_JMP_JGE_X:
- pc += (A >= X) ? fentry->jt : fentry->jf;
+ fentry += (A >= X) ? fentry->jt : fentry->jf;
continue;
case BPF_S_JMP_JEQ_X:
- pc += (A == X) ? fentry->jt : fentry->jf;
+ fentry += (A == X) ? fentry->jt : fentry->jf;
continue;
case BPF_S_JMP_JSET_X:
- pc += (A & X) ? fentry->jt : fentry->jf;
+ fentry += (A & X) ? fentry->jt : fentry->jf;
continue;
case BPF_S_LD_W_ABS:
- k = f_k;
+ k = K;
load_w:
ptr = load_pointer(skb, k, 4, &tmp);
if (ptr != NULL) {
@@ -268,7 +271,7 @@ load_w:
}
break;
case BPF_S_LD_H_ABS:
- k = f_k;
+ k = K;
load_h:
ptr = load_pointer(skb, k, 2, &tmp);
if (ptr != NULL) {
@@ -277,7 +280,7 @@ load_h:
}
break;
case BPF_S_LD_B_ABS:
- k = f_k;
+ k = K;
load_b:
ptr = load_pointer(skb, k, 1, &tmp);
if (ptr != NULL) {
@@ -292,34 +295,34 @@ load_b:
X = skb->len;
continue;
case BPF_S_LD_W_IND:
- k = X + f_k;
+ k = X + K;
goto load_w;
case BPF_S_LD_H_IND:
- k = X + f_k;
+ k = X + K;
goto load_h;
case BPF_S_LD_B_IND:
- k = X + f_k;
+ k = X + K;
goto load_b;
case BPF_S_LDX_B_MSH:
- ptr = load_pointer(skb, f_k, 1, &tmp);
+ ptr = load_pointer(skb, K, 1, &tmp);
if (ptr != NULL) {
X = (*(u8 *)ptr & 0xf) << 2;
continue;
}
return 0;
case BPF_S_LD_IMM:
- A = f_k;
+ A = K;
continue;
case BPF_S_LDX_IMM:
- X = f_k;
+ X = K;
continue;
case BPF_S_LD_MEM:
- A = (memvalid & (1UL << f_k)) ?
- mem[f_k] : 0;
+ A = (memvalid & (1UL << K)) ?
+ mem[K] : 0;
continue;
case BPF_S_LDX_MEM:
- X = (memvalid & (1UL << f_k)) ?
- mem[f_k] : 0;
+ X = (memvalid & (1UL << K)) ?
+ mem[K] : 0;
continue;
case BPF_S_MISC_TAX:
X = A;
@@ -328,16 +331,16 @@ load_b:
A = X;
continue;
case BPF_S_RET_K:
- return f_k;
+ return K;
case BPF_S_RET_A:
return A;
case BPF_S_ST:
- memvalid |= 1UL << f_k;
- mem[f_k] = A;
+ memvalid |= 1UL << K;
+ mem[K] = A;
continue;
case BPF_S_STX:
- memvalid |= 1UL << f_k;
- mem[f_k] = X;
+ memvalid |= 1UL << K;
+ mem[K] = X;
continue;
default:
WARN_ON(1);
diff --git a/net/core/timestamping.c b/net/core/timestamping.c
index 0ae6c22..dac7ed6 100644
--- a/net/core/timestamping.c
+++ b/net/core/timestamping.c
@@ -31,7 +31,7 @@ static unsigned int classify(struct sk_buff *skb)
if (likely(skb->dev &&
skb->dev->phydev &&
skb->dev->phydev->drv))
- return sk_run_filter(skb, ptp_filter, ARRAY_SIZE(ptp_filter));
+ return sk_run_filter(skb, ptp_filter);
else
return PTP_CLASS_NONE;
}
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 2096456..b6372dd 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -519,7 +519,7 @@ static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk,
rcu_read_lock_bh();
filter = rcu_dereference_bh(sk->sk_filter);
if (filter != NULL)
- res = sk_run_filter(skb, filter->insns, filter->len);
+ res = sk_run_filter(skb, filter->insns);
rcu_read_unlock_bh();
return res;
^ permalink raw reply related [flat|nested] 27+ messages in thread* Re: [PATCH net-next-2.6 v2] filter: optimize sk_run_filter
2010-11-19 13:16 ` [PATCH net-next-2.6 v2] " Eric Dumazet
@ 2010-11-19 14:13 ` Changli Gao
2010-11-19 17:52 ` David Miller
2010-11-19 14:17 ` Tetsuo Handa
1 sibling, 1 reply; 27+ messages in thread
From: Changli Gao @ 2010-11-19 14:13 UTC (permalink / raw)
To: Eric Dumazet; +Cc: David Miller, Hagen Paul Pfeifer, netdev
On Fri, Nov 19, 2010 at 9:16 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> Le vendredi 19 novembre 2010 à 20:57 +0800, Changli Gao a écrit :
>> On Fri, Nov 19, 2010 at 8:32 PM, Changli Gao <xiaosuo@gmail.com> wrote:
>> >
>> > you missed the users of sk_run_filter in directory dev/.
>> >
>>
>> Sorry, the directory is drivers/.
>>
>>
>
> You're right, thanks !
>
> [PATCH net-next-2.6 v2] filter: optimize sk_run_filter
>
> Remove pc variable to avoid arithmetic to compute fentry at each filter
> instruction. Jumps directly manipulate fentry pointer.
>
> As the last instruction of filter[] is guaranteed to be a RETURN, and
> all jumps are before the last instruction, we dont need to check filter
> bounds (number of instructions in filter array) at each iteration, so we
> remove it from sk_run_filter() params.
>
> On x86_32 remove f_k var introduced in commit 57fe93b374a6b871
> (filter: make sure filters dont read uninitialized memory)
>
> Note : We could use a CONFIG_ARCH_HAS_{FEW|MANY}_REGISTERS in order to
> avoid too many ifdefs in this code.
>
> This helps compiler to use cpu registers to hold fentry and A
> accumulator.
>
> On x86_32, this saves 401 bytes, and more important, sk_run_filter()
> runs much faster because less register pressure (One less conditional
> branch per BPF instruction)
>
> # size net/core/filter.o net/core/filter_pre.o
> text data bss dec hex filename
> 2948 0 0 2948 b84 net/core/filter.o
> 3349 0 0 3349 d15 net/core/filter_pre.o
>
> on x86_64 :
> # size net/core/filter.o net/core/filter_pre.o
> text data bss dec hex filename
> 5173 0 0 5173 1435 net/core/filter.o
> 5224 0 0 5224 1468 net/core/filter_pre.o
>
> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
> Cc: Changli Gao <xiaosuo@gmail.com>
> Cc: Hagen Paul Pfeifer <hagen@jauu.net>
Acked-by: Changli Gao <xiaosuo@gmail.com>
--
Regards,
Changli Gao(xiaosuo@gmail.com)
^ permalink raw reply [flat|nested] 27+ messages in thread* Re: [PATCH net-next-2.6 v2] filter: optimize sk_run_filter
2010-11-19 14:13 ` Changli Gao
@ 2010-11-19 17:52 ` David Miller
0 siblings, 0 replies; 27+ messages in thread
From: David Miller @ 2010-11-19 17:52 UTC (permalink / raw)
To: xiaosuo; +Cc: eric.dumazet, hagen, netdev
From: Changli Gao <xiaosuo@gmail.com>
Date: Fri, 19 Nov 2010 22:13:07 +0800
> On Fri, Nov 19, 2010 at 9:16 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
>> [PATCH net-next-2.6 v2] filter: optimize sk_run_filter
>>
>> Remove pc variable to avoid arithmetic to compute fentry at each filter
>> instruction. Jumps directly manipulate fentry pointer.
>>
>> As the last instruction of filter[] is guaranteed to be a RETURN, and
>> all jumps are before the last instruction, we dont need to check filter
>> bounds (number of instructions in filter array) at each iteration, so we
>> remove it from sk_run_filter() params.
>>
>> On x86_32 remove f_k var introduced in commit 57fe93b374a6b871
>> (filter: make sure filters dont read uninitialized memory)
>>
>> Note : We could use a CONFIG_ARCH_HAS_{FEW|MANY}_REGISTERS in order to
>> avoid too many ifdefs in this code.
>>
>> This helps compiler to use cpu registers to hold fentry and A
>> accumulator.
>>
>> On x86_32, this saves 401 bytes, and more important, sk_run_filter()
>> runs much faster because less register pressure (One less conditional
>> branch per BPF instruction)
>>
>> # size net/core/filter.o net/core/filter_pre.o
>> text data bss dec hex filename
>> 2948 0 0 2948 b84 net/core/filter.o
>> 3349 0 0 3349 d15 net/core/filter_pre.o
>>
>> on x86_64 :
>> # size net/core/filter.o net/core/filter_pre.o
>> text data bss dec hex filename
>> 5173 0 0 5173 1435 net/core/filter.o
>> 5224 0 0 5224 1468 net/core/filter_pre.o
>>
>> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
>> Cc: Changli Gao <xiaosuo@gmail.com>
>> Cc: Hagen Paul Pfeifer <hagen@jauu.net>
>
> Acked-by: Changli Gao <xiaosuo@gmail.com>
Ok, I'm applying this to net-next-2.6 for now. It keeps the
"f_k" situation optimal for all cases, on every platform I've
taken a look at the asm output (sparc64, x86-32, x86-64).
I can't currently think of a way to get rid of that ifdef,
so for now it's a small price to pay to get this optimal.
Thanks Eric!
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH net-next-2.6 v2] filter: optimize sk_run_filter
2010-11-19 13:16 ` [PATCH net-next-2.6 v2] " Eric Dumazet
2010-11-19 14:13 ` Changli Gao
@ 2010-11-19 14:17 ` Tetsuo Handa
2010-11-19 14:35 ` Eric Dumazet
1 sibling, 1 reply; 27+ messages in thread
From: Tetsuo Handa @ 2010-11-19 14:17 UTC (permalink / raw)
To: eric.dumazet, xiaosuo; +Cc: davem, hagen, netdev
Just my thought again...
Eric Dumazet wrote:
> @@ -167,34 +168,36 @@ unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int
> unsigned long memvalid = 0;
> u32 tmp;
> int k;
Is this 'k' useful?
> - int pc;
>
> BUILD_BUG_ON(BPF_MEMWORDS > BITS_PER_LONG);
> /*
> * Process array of filter instructions.
> */
> - for (pc = 0; pc < flen; pc++) {
> - const struct sock_filter *fentry = &filter[pc];
> - u32 f_k = fentry->k;
> + for (;; fentry++) {
> +#if defined(CONFIG_X86_32)
> +#define K (fentry->k)
> +#else
> + const u32 K = fentry->k;
What happens if we use
u32 f_k = fentry->k;
and
> case BPF_S_LD_W_ABS:
> - k = f_k;
> + k = K;
remove this assignment and
> load_w:
> ptr = load_pointer(skb, k, 4, &tmp);
change to
ptr = load_pointer(skb, (int) f_k, 4, &tmp);
and
> case BPF_S_LD_W_IND:
> - k = X + f_k;
> + k = X + K;
change to
f_k += X;
?
> goto load_w;
^ permalink raw reply [flat|nested] 27+ messages in thread* Re: [PATCH net-next-2.6 v2] filter: optimize sk_run_filter
2010-11-19 14:17 ` Tetsuo Handa
@ 2010-11-19 14:35 ` Eric Dumazet
0 siblings, 0 replies; 27+ messages in thread
From: Eric Dumazet @ 2010-11-19 14:35 UTC (permalink / raw)
To: Tetsuo Handa; +Cc: xiaosuo, davem, hagen, netdev
Le vendredi 19 novembre 2010 à 23:17 +0900, Tetsuo Handa a écrit :
> Just my thought again...
>
> Eric Dumazet wrote:
> > @@ -167,34 +168,36 @@ unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int
> > unsigned long memvalid = 0;
> > u32 tmp;
> > int k;
>
> Is this 'k' useful?
>
Yes this is useful, please read later we have :
k = X + K;
goto load_w;
K is readonly, k is a rw temp variable (on stack)
> > - int pc;
> >
> > BUILD_BUG_ON(BPF_MEMWORDS > BITS_PER_LONG);
> > /*
> > * Process array of filter instructions.
> > */
> > - for (pc = 0; pc < flen; pc++) {
> > - const struct sock_filter *fentry = &filter[pc];
> > - u32 f_k = fentry->k;
> > + for (;; fentry++) {
> > +#if defined(CONFIG_X86_32)
> > +#define K (fentry->k)
> > +#else
> > + const u32 K = fentry->k;
>
> What happens if we use
>
> u32 f_k = fentry->k;
>
> and
>
> > case BPF_S_LD_W_ABS:
> > - k = f_k;
> > + k = K;
>
> remove this assignment and
>
> > load_w:
> > ptr = load_pointer(skb, k, 4, &tmp);
>
> change to
>
> ptr = load_pointer(skb, (int) f_k, 4, &tmp);
>
> and
>
> > case BPF_S_LD_W_IND:
> > - k = X + f_k;
> > + k = X + K;
>
> change to
>
> f_k += X;
>
> ?
> > goto load_w;
This wont work, K is really a constant, part of the filter program,
while k is the (existing) temp variable.
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH net-next-2.6] filter: optimize sk_run_filter
2010-11-19 11:17 ` [PATCH net-next-2.6] filter: optimize sk_run_filter Eric Dumazet
2010-11-19 12:32 ` Changli Gao
@ 2010-11-19 16:21 ` David Miller
2010-11-19 16:55 ` Eric Dumazet
1 sibling, 1 reply; 27+ messages in thread
From: David Miller @ 2010-11-19 16:21 UTC (permalink / raw)
To: eric.dumazet; +Cc: hagen, netdev, xiaosuo
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 19 Nov 2010 12:17:52 +0100
> Le vendredi 19 novembre 2010 à 10:54 +0100, Eric Dumazet a écrit :
>
>> I believe we should revert the u32 f_k = fentry->k; part
>>
>> fentry->k as is fast as f_k if stored on stack, and avoids one
>> instruction if fentry->k is not needed.
>>
>>
>
> A revert is not good on arches with decent number of registers (x86_64
> for example).
-EFIX_THE_DAMN_COMPILER
We never make calls out of this function or touch volatile memory or
create a full memory barrier between the assignment of f_k and it's
uses.
Therefore if common sub-expression elimination is working the compiler
will be able to decide properly whether to access things via memory or
use a register for the value.
Remember this is why we have that ACCESS_ONCE() thing.
We can't have it both ways, either ACCESS_ONCE() should be removed or
we should never make changes like your's and instead should submit
compiler bug reports :-)
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH net-next-2.6] filter: optimize sk_run_filter
2010-11-19 16:21 ` [PATCH net-next-2.6] " David Miller
@ 2010-11-19 16:55 ` Eric Dumazet
2010-11-19 17:05 ` David Miller
0 siblings, 1 reply; 27+ messages in thread
From: Eric Dumazet @ 2010-11-19 16:55 UTC (permalink / raw)
To: David Miller; +Cc: hagen, netdev, xiaosuo
Le vendredi 19 novembre 2010 à 08:21 -0800, David Miller a écrit :
> -EFIX_THE_DAMN_COMPILER
>
> We never make calls out of this function or touch volatile memory or
> create a full memory barrier between the assignment of f_k and it's
> uses.
>
> Therefore if common sub-expression elimination is working the compiler
> will be able to decide properly whether to access things via memory or
> use a register for the value.
>
> Remember this is why we have that ACCESS_ONCE() thing.
>
> We can't have it both ways, either ACCESS_ONCE() should be removed or
> we should never make changes like your's and instead should submit
> compiler bug reports :-)
Compiler is OK IMHO in this case. It does exactly what is required.
Compiler cannot load fentry->k before the switch() if some expression
dont use it, as it could trigger a fault.
After the "f_k = fentry->k;" commit, it was requested to do so.
Unfortunatly on x86_32 it also chose that f_k was more valuable in a cpu
register and accumulator A lost its register to get a stack slot
instead.
Not many BPF instructions use K, and if used, its used _once_ per BPF
instruction. There is no real gain to put it on a register, but code
size if (and only if) it is held in a cpu register, because each
assembler instruction using a register instead of stack is a bit
shorter.
In the end, I believe the "f_k = fentry->k;" was a good looking idea,
and good for some arches, but we forgot x86_32 (and probably some others)
have few available registers to play with.
Have a good week end !
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH net-next-2.6] filter: optimize sk_run_filter
2010-11-19 16:55 ` Eric Dumazet
@ 2010-11-19 17:05 ` David Miller
2010-11-19 17:15 ` Stephen Hemminger
2010-11-19 17:16 ` Eric Dumazet
0 siblings, 2 replies; 27+ messages in thread
From: David Miller @ 2010-11-19 17:05 UTC (permalink / raw)
To: eric.dumazet; +Cc: hagen, netdev, xiaosuo
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 19 Nov 2010 17:55:59 +0100
> Unfortunatly on x86_32 it also chose that f_k was more valuable in a cpu
> register and accumulator A lost its register to get a stack slot
> instead.
Ok that tradeoff is terrible, but it depends upon knowledge we haven't
given to the compiler (yet).
Let me think about this a bit...
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH net-next-2.6] filter: optimize sk_run_filter
2010-11-19 17:05 ` David Miller
@ 2010-11-19 17:15 ` Stephen Hemminger
2010-11-19 17:21 ` David Miller
2010-11-19 17:16 ` Eric Dumazet
1 sibling, 1 reply; 27+ messages in thread
From: Stephen Hemminger @ 2010-11-19 17:15 UTC (permalink / raw)
To: David Miller; +Cc: eric.dumazet, hagen, netdev, xiaosuo
On Fri, 19 Nov 2010 09:05:16 -0800 (PST)
David Miller <davem@davemloft.net> wrote:
> From: Eric Dumazet <eric.dumazet@gmail.com>
> Date: Fri, 19 Nov 2010 17:55:59 +0100
>
> > Unfortunatly on x86_32 it also chose that f_k was more valuable in a cpu
> > register and accumulator A lost its register to get a stack slot
> > instead.
>
> Ok that tradeoff is terrible, but it depends upon knowledge we haven't
> given to the compiler (yet).
>
> Let me think about this a bit...
Are 'register' modifiers a no-op on current GCC?
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH net-next-2.6] filter: optimize sk_run_filter
2010-11-19 17:15 ` Stephen Hemminger
@ 2010-11-19 17:21 ` David Miller
0 siblings, 0 replies; 27+ messages in thread
From: David Miller @ 2010-11-19 17:21 UTC (permalink / raw)
To: shemminger; +Cc: eric.dumazet, hagen, netdev, xiaosuo
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Fri, 19 Nov 2010 09:15:37 -0800
> On Fri, 19 Nov 2010 09:05:16 -0800 (PST)
> David Miller <davem@davemloft.net> wrote:
>
>> From: Eric Dumazet <eric.dumazet@gmail.com>
>> Date: Fri, 19 Nov 2010 17:55:59 +0100
>>
>> > Unfortunatly on x86_32 it also chose that f_k was more valuable in a cpu
>> > register and accumulator A lost its register to get a stack slot
>> > instead.
>>
>> Ok that tradeoff is terrible, but it depends upon knowledge we haven't
>> given to the compiler (yet).
>>
>> Let me think about this a bit...
>
> Are 'register' modifiers a no-op on current GCC?
Yes, they are.
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH net-next-2.6] filter: optimize sk_run_filter
2010-11-19 17:05 ` David Miller
2010-11-19 17:15 ` Stephen Hemminger
@ 2010-11-19 17:16 ` Eric Dumazet
2010-11-19 17:25 ` David Miller
1 sibling, 1 reply; 27+ messages in thread
From: Eric Dumazet @ 2010-11-19 17:16 UTC (permalink / raw)
To: David Miller; +Cc: hagen, netdev, xiaosuo
Le vendredi 19 novembre 2010 à 09:05 -0800, David Miller a écrit :
> From: Eric Dumazet <eric.dumazet@gmail.com>
> Date: Fri, 19 Nov 2010 17:55:59 +0100
>
> > Unfortunatly on x86_32 it also chose that f_k was more valuable in a cpu
> > register and accumulator A lost its register to get a stack slot
> > instead.
>
> Ok that tradeoff is terrible, but it depends upon knowledge we haven't
> given to the compiler (yet).
>
> Let me think about this a bit...
By the way, I tried the 'register' keyword, and I knew it was a stupid
idea before even trying, since compiler ignored me : "What the hell do
you think you can tell me how to optimize this code ?"
(I am quite sure my laptop even smiled and sent to his neighbours a
broadcast saying "Hey buddies, this dumb guy tried the register C
keyword, isnt it funny ?")
Oh well...
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH net-next-2.6] filter: optimize sk_run_filter
2010-11-19 17:16 ` Eric Dumazet
@ 2010-11-19 17:25 ` David Miller
0 siblings, 0 replies; 27+ messages in thread
From: David Miller @ 2010-11-19 17:25 UTC (permalink / raw)
To: eric.dumazet; +Cc: hagen, netdev, xiaosuo
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 19 Nov 2010 18:16:24 +0100
> Le vendredi 19 novembre 2010 à 09:05 -0800, David Miller a écrit :
>> From: Eric Dumazet <eric.dumazet@gmail.com>
>> Date: Fri, 19 Nov 2010 17:55:59 +0100
>>
>> > Unfortunatly on x86_32 it also chose that f_k was more valuable in a cpu
>> > register and accumulator A lost its register to get a stack slot
>> > instead.
>>
>> Ok that tradeoff is terrible, but it depends upon knowledge we haven't
>> given to the compiler (yet).
>>
>> Let me think about this a bit...
>
> By the way, I tried the 'register' keyword, and I knew it was a stupid
> idea before even trying, since compiler ignored me : "What the hell do
> you think you can tell me how to optimize this code ?"
>
> (I am quite sure my laptop even smiled and sent to his neighbours a
> broadcast saying "Hey buddies, this dumb guy tried the register C
> keyword, isnt it funny ?")
:-)
I wonder though, since "fentry" is const in your recent patch,
you might not even need that X86_32 ifdef thing. GCC should
actually now be able to see that fentry->k cannot change, even
when we make external function calls via load_pointer() and such.
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH net-next-2.6] filter: cleanup codes[] init
2010-11-19 9:54 ` Eric Dumazet
2010-11-19 11:17 ` [PATCH net-next-2.6] filter: optimize sk_run_filter Eric Dumazet
@ 2010-11-19 12:21 ` Changli Gao
2010-11-19 18:07 ` David Miller
1 sibling, 1 reply; 27+ messages in thread
From: Changli Gao @ 2010-11-19 12:21 UTC (permalink / raw)
To: Eric Dumazet; +Cc: David S. Miller, Hagen Paul Pfeifer, netdev
On Fri, Nov 19, 2010 at 5:54 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
>
> Most probably you have "CONFIG_CC_OPTIMIZE_FOR_SIZE=y" which
> unfortunately is known to generate poor looking code.
Yes. So
Acked-by: Changli Gao <xiaosuo@gmail.com>
--
Regards,
Changli Gao(xiaosuo@gmail.com)
^ permalink raw reply [flat|nested] 27+ messages in thread
end of thread, other threads:[~2010-11-19 18:07 UTC | newest]
Thread overview: 27+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-11-19 2:04 [PATCH] net: reduce the lines of code Changli Gao
2010-11-19 6:35 ` Hagen Paul Pfeifer
2010-11-19 7:17 ` Changli Gao
2010-11-19 7:51 ` Eric Dumazet
2010-11-19 7:56 ` [PATCH net-next-2.6] filter: cleanup codes[] init Eric Dumazet
2010-11-19 8:04 ` [PATCH net-next-2.6] filter: use reciprocal divide Eric Dumazet
2010-11-19 8:18 ` Changli Gao
2010-11-19 18:07 ` David Miller
2010-11-19 8:38 ` [PATCH net-next-2.6] filter: cleanup codes[] init Changli Gao
2010-11-19 9:54 ` Eric Dumazet
2010-11-19 11:17 ` [PATCH net-next-2.6] filter: optimize sk_run_filter Eric Dumazet
2010-11-19 12:32 ` Changli Gao
2010-11-19 12:57 ` Changli Gao
2010-11-19 13:16 ` [PATCH net-next-2.6 v2] " Eric Dumazet
2010-11-19 14:13 ` Changli Gao
2010-11-19 17:52 ` David Miller
2010-11-19 14:17 ` Tetsuo Handa
2010-11-19 14:35 ` Eric Dumazet
2010-11-19 16:21 ` [PATCH net-next-2.6] " David Miller
2010-11-19 16:55 ` Eric Dumazet
2010-11-19 17:05 ` David Miller
2010-11-19 17:15 ` Stephen Hemminger
2010-11-19 17:21 ` David Miller
2010-11-19 17:16 ` Eric Dumazet
2010-11-19 17:25 ` David Miller
2010-11-19 12:21 ` [PATCH net-next-2.6] filter: cleanup codes[] init Changli Gao
2010-11-19 18:07 ` David Miller
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).