* [PATCH 5/8] bnx2x: don't reset device while reading its configuration.
From: Dmitry Kravkov @ 2011-08-30 10:08 UTC (permalink / raw)
To: davem, netdev; +Cc: Dmitry Kravkov, Eilon Greenstein
In-Reply-To: <1314698926-24525-1-git-send-email-dmitry@broadcom.com>
Signed-off-by: Dmitry Kravkov <dmitry@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
---
drivers/net/bnx2x/bnx2x_main.c | 24 +++++++++++++++---------
drivers/net/bnx2x/bnx2x_reg.h | 2 +-
2 files changed, 16 insertions(+), 10 deletions(-)
diff --git a/drivers/net/bnx2x/bnx2x_main.c b/drivers/net/bnx2x/bnx2x_main.c
index 9633e9b..00dc8f0 100644
--- a/drivers/net/bnx2x/bnx2x_main.c
+++ b/drivers/net/bnx2x/bnx2x_main.c
@@ -5822,7 +5822,7 @@ static int bnx2x_init_hw_common(struct bnx2x *bp)
* take the UNDI lock to protect undi_unload flow from accessing
* registers while we're resetting the chip
*/
- bnx2x_acquire_hw_lock(bp, HW_LOCK_RESOURCE_UNDI);
+ bnx2x_acquire_hw_lock(bp, HW_LOCK_RESOURCE_RESET);
bnx2x_reset_common(bp);
REG_WR(bp, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_1_SET, 0xffffffff);
@@ -5834,7 +5834,7 @@ static int bnx2x_init_hw_common(struct bnx2x *bp)
}
REG_WR(bp, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_2_SET, val);
- bnx2x_release_hw_lock(bp, HW_LOCK_RESOURCE_UNDI);
+ bnx2x_release_hw_lock(bp, HW_LOCK_RESOURCE_RESET);
bnx2x_init_block(bp, BLOCK_MISC, PHASE_COMMON);
@@ -8570,10 +8570,12 @@ static void __devinit bnx2x_undi_unload(struct bnx2x *bp)
/* Check if there is any driver already loaded */
val = REG_RD(bp, MISC_REG_UNPREPARED);
if (val == 0x1) {
- /* Check if it is the UNDI driver
+
+ bnx2x_acquire_hw_lock(bp, HW_LOCK_RESOURCE_RESET);
+ /*
+ * Check if it is the UNDI driver
* UNDI driver initializes CID offset for normal bell to 0x7
*/
- bnx2x_acquire_hw_lock(bp, HW_LOCK_RESOURCE_UNDI);
val = REG_RD(bp, DORQ_REG_NORM_CID_OFST);
if (val == 0x7) {
u32 reset_code = DRV_MSG_CODE_UNLOAD_REQ_WOL_DIS;
@@ -8611,9 +8613,6 @@ static void __devinit bnx2x_undi_unload(struct bnx2x *bp)
bnx2x_fw_command(bp, reset_code, 0);
}
- /* now it's safe to release the lock */
- bnx2x_release_hw_lock(bp, HW_LOCK_RESOURCE_UNDI);
-
bnx2x_undi_int_disable(bp);
port = BP_PORT(bp);
@@ -8663,8 +8662,10 @@ static void __devinit bnx2x_undi_unload(struct bnx2x *bp)
bp->fw_seq =
(SHMEM_RD(bp, func_mb[bp->pf_num].drv_mb_header) &
DRV_MSG_SEQ_NUMBER_MASK);
- } else
- bnx2x_release_hw_lock(bp, HW_LOCK_RESOURCE_UNDI);
+ }
+
+ /* now it's safe to release the lock */
+ bnx2x_release_hw_lock(bp, HW_LOCK_RESOURCE_RESET);
}
}
@@ -9440,6 +9441,10 @@ static int __devinit bnx2x_get_hwinfo(struct bnx2x *bp)
bp->igu_base_sb = 0;
} else {
bp->common.int_block = INT_BLOCK_IGU;
+
+ /* do not allow device reset during IGU info preocessing */
+ bnx2x_acquire_hw_lock(bp, HW_LOCK_RESOURCE_RESET);
+
val = REG_RD(bp, IGU_REG_BLOCK_CONFIGURATION);
if (val & IGU_BLOCK_CONFIGURATION_REG_BACKWARD_COMP_EN) {
@@ -9471,6 +9476,7 @@ static int __devinit bnx2x_get_hwinfo(struct bnx2x *bp)
bnx2x_get_igu_cam_info(bp);
+ bnx2x_release_hw_lock(bp, HW_LOCK_RESOURCE_RESET);
}
/*
diff --git a/drivers/net/bnx2x/bnx2x_reg.h b/drivers/net/bnx2x/bnx2x_reg.h
index 40266c1..dac217d 100644
--- a/drivers/net/bnx2x/bnx2x_reg.h
+++ b/drivers/net/bnx2x/bnx2x_reg.h
@@ -5766,7 +5766,7 @@
#define HW_LOCK_RESOURCE_RECOVERY_LEADER_0 8
#define HW_LOCK_RESOURCE_RECOVERY_LEADER_1 9
#define HW_LOCK_RESOURCE_SPIO 2
-#define HW_LOCK_RESOURCE_UNDI 5
+#define HW_LOCK_RESOURCE_RESET 5
#define AEU_INPUTS_ATTN_BITS_ATC_HW_INTERRUPT (0x1<<4)
#define AEU_INPUTS_ATTN_BITS_ATC_PARITY_ERROR (0x1<<5)
#define AEU_INPUTS_ATTN_BITS_BRB_PARITY_ERROR (0x1<<18)
--
1.7.2.2
^ permalink raw reply related
* [PATCH 4/8] bnx2x: fix MF for 4-port devices
From: Dmitry Kravkov @ 2011-08-30 10:08 UTC (permalink / raw)
To: davem, netdev; +Cc: Dmitry Kravkov, Eilon Greenstein
In-Reply-To: <1314698926-24525-1-git-send-email-dmitry@broadcom.com>
Number of VNs for 4-port devices is 2 instead of 4
Signed-off-by: Dmitry Kravkov <dmitry@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
---
drivers/net/bnx2x/bnx2x.h | 15 +++++++------
drivers/net/bnx2x/bnx2x_main.c | 43 +++++++++++++++++++++++---------------
drivers/net/bnx2x/bnx2x_stats.c | 4 +-
3 files changed, 36 insertions(+), 26 deletions(-)
diff --git a/drivers/net/bnx2x/bnx2x.h b/drivers/net/bnx2x/bnx2x.h
index 8529732..2621a1c 100644
--- a/drivers/net/bnx2x/bnx2x.h
+++ b/drivers/net/bnx2x/bnx2x.h
@@ -1166,11 +1166,12 @@ struct bnx2x {
#define BP_PORT(bp) (bp->pfid & 1)
#define BP_FUNC(bp) (bp->pfid)
#define BP_ABS_FUNC(bp) (bp->pf_num)
-#define BP_E1HVN(bp) (bp->pfid >> 1)
-#define BP_VN(bp) (BP_E1HVN(bp)) /*remove when approved*/
-#define BP_L_ID(bp) (BP_E1HVN(bp) << 2)
-#define BP_FW_MB_IDX(bp) (BP_PORT(bp) +\
- BP_VN(bp) * ((CHIP_IS_E1x(bp) || (CHIP_MODE_IS_4_PORT(bp))) ? 2 : 1))
+#define BP_VN(bp) ((bp)->pfid >> 1)
+#define BP_MAX_VN_NUM(bp) (CHIP_MODE_IS_4_PORT(bp) ? 2 : 4)
+#define BP_L_ID(bp) (BP_VN(bp) << 2)
+#define BP_FW_MB_IDX_VN(bp, vn) (BP_PORT(bp) +\
+ (vn) * ((CHIP_IS_E1x(bp) || (CHIP_MODE_IS_4_PORT(bp))) ? 2 : 1))
+#define BP_FW_MB_IDX(bp) BP_FW_MB_IDX_VN(bp, BP_VN(bp))
struct net_device *dev;
struct pci_dev *pdev;
@@ -1833,7 +1834,7 @@ static inline u32 reg_poll(struct bnx2x *bp, u32 reg, u32 expected, int ms,
#define MAX_DMAE_C_PER_PORT 8
#define INIT_DMAE_C(bp) (BP_PORT(bp) * MAX_DMAE_C_PER_PORT + \
- BP_E1HVN(bp))
+ BP_VN(bp))
#define PMF_DMAE_C(bp) (BP_PORT(bp) * MAX_DMAE_C_PER_PORT + \
E1HVN_MAX)
@@ -1859,7 +1860,7 @@ static inline u32 reg_poll(struct bnx2x *bp, u32 reg, u32 expected, int ms,
/* must be used on a CID before placing it on a HW ring */
#define HW_CID(bp, x) ((BP_PORT(bp) << 23) | \
- (BP_E1HVN(bp) << BNX2X_SWCID_SHIFT) | \
+ (BP_VN(bp) << BNX2X_SWCID_SHIFT) | \
(x))
#define SP_DESC_CNT (BCM_PAGE_SIZE / sizeof(struct eth_spe))
diff --git a/drivers/net/bnx2x/bnx2x_main.c b/drivers/net/bnx2x/bnx2x_main.c
index 3f93e86..9633e9b 100644
--- a/drivers/net/bnx2x/bnx2x_main.c
+++ b/drivers/net/bnx2x/bnx2x_main.c
@@ -407,8 +407,8 @@ u32 bnx2x_dmae_opcode(struct bnx2x *bp, u8 src_type, u8 dst_type,
opcode |= (DMAE_CMD_SRC_RESET | DMAE_CMD_DST_RESET);
opcode |= (BP_PORT(bp) ? DMAE_CMD_PORT_1 : DMAE_CMD_PORT_0);
- opcode |= ((BP_E1HVN(bp) << DMAE_CMD_E1HVN_SHIFT) |
- (BP_E1HVN(bp) << DMAE_COMMAND_DST_VN_SHIFT));
+ opcode |= ((BP_VN(bp) << DMAE_CMD_E1HVN_SHIFT) |
+ (BP_VN(bp) << DMAE_COMMAND_DST_VN_SHIFT));
opcode |= (DMAE_COM_SET_ERR << DMAE_COMMAND_ERR_POLICY_SHIFT);
#ifdef __BIG_ENDIAN
@@ -1419,7 +1419,7 @@ static void bnx2x_hc_int_enable(struct bnx2x *bp)
if (!CHIP_IS_E1(bp)) {
/* init leading/trailing edge */
if (IS_MF(bp)) {
- val = (0xee0f | (1 << (BP_E1HVN(bp) + 4)));
+ val = (0xee0f | (1 << (BP_VN(bp) + 4)));
if (bp->port.pmf)
/* enable nig and gpio3 attention */
val |= 0x1100;
@@ -1471,7 +1471,7 @@ static void bnx2x_igu_int_enable(struct bnx2x *bp)
/* init leading/trailing edge */
if (IS_MF(bp)) {
- val = (0xee0f | (1 << (BP_E1HVN(bp) + 4)));
+ val = (0xee0f | (1 << (BP_VN(bp) + 4)));
if (bp->port.pmf)
/* enable nig and gpio3 attention */
val |= 0x1100;
@@ -2287,7 +2287,7 @@ static void bnx2x_calc_vn_weight_sum(struct bnx2x *bp)
int vn;
bp->vn_weight_sum = 0;
- for (vn = VN_0; vn < E1HVN_MAX; vn++) {
+ for (vn = VN_0; vn < BP_MAX_VN_NUM(bp); vn++) {
u32 vn_cfg = bp->mf_config[vn];
u32 vn_min_rate = ((vn_cfg & FUNC_MF_CFG_MIN_BW_MASK) >>
FUNC_MF_CFG_MIN_BW_SHIFT) * 100;
@@ -2320,12 +2320,18 @@ static void bnx2x_calc_vn_weight_sum(struct bnx2x *bp)
CMNG_FLAGS_PER_PORT_FAIRNESS_VN;
}
+/* returns func by VN for current port */
+static inline int func_by_vn(struct bnx2x *bp, int vn)
+{
+ return 2 * vn + BP_PORT(bp);
+}
+
static void bnx2x_init_vn_minmax(struct bnx2x *bp, int vn)
{
struct rate_shaping_vars_per_vn m_rs_vn;
struct fairness_vars_per_vn m_fair_vn;
u32 vn_cfg = bp->mf_config[vn];
- int func = 2*vn + BP_PORT(bp);
+ int func = func_by_vn(bp, vn);
u16 vn_min_rate, vn_max_rate;
int i;
@@ -2422,7 +2428,7 @@ void bnx2x_read_mf_cfg(struct bnx2x *bp)
*
* and there are 2 functions per port
*/
- for (vn = VN_0; vn < E1HVN_MAX; vn++) {
+ for (vn = VN_0; vn < BP_MAX_VN_NUM(bp); vn++) {
int /*abs*/func = n * (2 * vn + BP_PORT(bp)) + BP_PATH(bp);
if (func >= E1H_FUNC_MAX)
@@ -2454,7 +2460,7 @@ static void bnx2x_cmng_fns_init(struct bnx2x *bp, u8 read_cfg, u8 cmng_type)
/* calculate and set min-max rate for each vn */
if (bp->port.pmf)
- for (vn = VN_0; vn < E1HVN_MAX; vn++)
+ for (vn = VN_0; vn < BP_MAX_VN_NUM(bp); vn++)
bnx2x_init_vn_minmax(bp, vn);
/* always enable rate shaping and fairness */
@@ -2473,16 +2479,15 @@ static void bnx2x_cmng_fns_init(struct bnx2x *bp, u8 read_cfg, u8 cmng_type)
static inline void bnx2x_link_sync_notify(struct bnx2x *bp)
{
- int port = BP_PORT(bp);
int func;
int vn;
/* Set the attention towards other drivers on the same port */
- for (vn = VN_0; vn < E1HVN_MAX; vn++) {
- if (vn == BP_E1HVN(bp))
+ for (vn = VN_0; vn < BP_MAX_VN_NUM(bp); vn++) {
+ if (vn == BP_VN(bp))
continue;
- func = ((vn << 1) | port);
+ func = func_by_vn(bp, vn);
REG_WR(bp, MISC_REG_AEU_GENERAL_ATTN_0 +
(LINK_SYNC_ATTENTION_BIT_FUNC_0 + func)*4, 1);
}
@@ -2577,7 +2582,7 @@ static void bnx2x_pmf_update(struct bnx2x *bp)
bnx2x_dcbx_pmf_update(bp);
/* enable nig attention */
- val = (0xff0f | (1 << (BP_E1HVN(bp) + 4)));
+ val = (0xff0f | (1 << (BP_VN(bp) + 4)));
if (bp->common.int_block == INT_BLOCK_HC) {
REG_WR(bp, HC_REG_TRAILING_EDGE_0 + port*8, val);
REG_WR(bp, HC_REG_LEADING_EDGE_0 + port*8, val);
@@ -6686,12 +6691,16 @@ static int bnx2x_init_hw_func(struct bnx2x *bp)
if (CHIP_MODE_IS_4_PORT(bp))
dsb_idx = BP_FUNC(bp);
else
- dsb_idx = BP_E1HVN(bp);
+ dsb_idx = BP_VN(bp);
prod_offset = (CHIP_INT_MODE_IS_BC(bp) ?
IGU_BC_BASE_DSB_PROD + dsb_idx :
IGU_NORM_BASE_DSB_PROD + dsb_idx);
+ /*
+ * igu prods come in chunks of E1HVN_MAX (4) -
+ * does not matters what is the current chip mode
+ */
for (i = 0; i < (num_segs * E1HVN_MAX);
i += E1HVN_MAX) {
addr = IGU_REG_PROD_CONS_MEMORY +
@@ -7585,7 +7594,7 @@ u32 bnx2x_send_unload_req(struct bnx2x *bp, int unload_mode)
u32 val;
/* The mac address is written to entries 1-4 to
preserve entry 0 which is used by the PMF */
- u8 entry = (BP_E1HVN(bp) + 1)*8;
+ u8 entry = (BP_VN(bp) + 1)*8;
val = (mac_addr[0] << 8) | mac_addr[1];
EMAC_WR(bp, EMAC_REG_EMAC_MAC_MATCH + entry, val);
@@ -8792,13 +8801,13 @@ static void __devinit bnx2x_get_common_hwinfo(struct bnx2x *bp)
static void __devinit bnx2x_get_igu_cam_info(struct bnx2x *bp)
{
int pfid = BP_FUNC(bp);
- int vn = BP_E1HVN(bp);
int igu_sb_id;
u32 val;
u8 fid, igu_sb_cnt = 0;
bp->igu_base_sb = 0xff;
if (CHIP_INT_MODE_IS_BC(bp)) {
+ int vn = BP_VN(bp);
igu_sb_cnt = bp->igu_sb_cnt;
bp->igu_base_sb = (CHIP_MODE_IS_4_PORT(bp) ? pfid : vn) *
FP_SB_MAX_E1x;
@@ -9488,7 +9497,7 @@ static int __devinit bnx2x_get_hwinfo(struct bnx2x *bp)
bp->mf_ov = 0;
bp->mf_mode = 0;
- vn = BP_E1HVN(bp);
+ vn = BP_VN(bp);
if (!CHIP_IS_E1(bp) && !BP_NOMCP(bp)) {
BNX2X_DEV_INFO("shmem2base 0x%x, size %d, mfcfg offset %d\n",
diff --git a/drivers/net/bnx2x/bnx2x_stats.c b/drivers/net/bnx2x/bnx2x_stats.c
index f5d9b42..9908f2b 100644
--- a/drivers/net/bnx2x/bnx2x_stats.c
+++ b/drivers/net/bnx2x/bnx2x_stats.c
@@ -1392,7 +1392,7 @@ static void bnx2x_port_stats_base_init(struct bnx2x *bp)
static void bnx2x_func_stats_base_init(struct bnx2x *bp)
{
- int vn, vn_max = IS_MF(bp) ? E1HVN_MAX : E1VN_MAX;
+ int vn, vn_max = IS_MF(bp) ? BP_MAX_VN_NUM(bp) : E1VN_MAX;
u32 func_stx;
/* sanity */
@@ -1405,7 +1405,7 @@ static void bnx2x_func_stats_base_init(struct bnx2x *bp)
func_stx = bp->func_stx;
for (vn = VN_0; vn < vn_max; vn++) {
- int mb_idx = CHIP_IS_E1x(bp) ? 2*vn + BP_PORT(bp) : vn;
+ int mb_idx = BP_FW_MB_IDX_VN(bp, vn);
bp->func_stx = SHMEM_RD(bp, func_mb[mb_idx].fw_mb_param);
bnx2x_func_stats_init(bp);
--
1.7.2.2
^ permalink raw reply related
* Your Winning Coupon No.PBL2348974321
From: POWERBALL @ 2011-08-30 9:49 UTC (permalink / raw)
for more infomation regarding the sum of £980,000:00 Pound
^ permalink raw reply
* Re: cls_rsvp.h fix
From: Eric Dumazet @ 2011-08-30 10:20 UTC (permalink / raw)
To: igorm; +Cc: netdev
In-Reply-To: <258b72c5817cd9a8437533d7908b546e.squirrel@kondor.etf.bg.ac.rs>
Le mardi 30 août 2011 à 11:33 +0200, "Igor Maravić" a écrit :
> File cls_rsvp.h was outdated. I'm sending you fix for this file.
>
> Best regards
> Igor Maravić
Hi Igor
Please read Documentation/SubmittingPatches to properly format and sign
your patch.
^ permalink raw reply
* Re: [PATCH 10/14] intel: convert to SKB paged frag API.
From: Jeff Kirsher @ 2011-08-30 10:52 UTC (permalink / raw)
To: Ian Campbell
Cc: netdev@vger.kernel.org, Brandeburg, Jesse, Allan, Bruce W,
Wyborny, Carolyn, Skidmore, Donald C, Rose, Gregory V
In-Reply-To: <1314695910-22344-10-git-send-email-ian.campbell@citrix.com>
[-- Attachment #1: Type: text/plain, Size: 1265 bytes --]
On Tue, 2011-08-30 at 02:18 -0700, Ian Campbell wrote:
> Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
> Cc: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
> Cc: Jesse Brandeburg <jesse.brandeburg@intel.com>
> Cc: Bruce Allan <bruce.w.allan@intel.com>
> Cc: Carolyn Wyborny <carolyn.wyborny@intel.com>
> Cc: Don Skidmore <donald.c.skidmore@intel.com>
> Cc: Greg Rose <gregory.v.rose@intel.com>
> Cc: PJ Waskiewicz <peter.p.waskiewicz.jr@intel.com>
> Cc: Alex Duyck <alexander.h.duyck@intel.com>
> Cc: John Ronciak <john.ronciak@intel.com>
> Cc: e1000-devel@lists.sourceforge.net
> Cc: netdev@vger.kernel.org
> ---
> drivers/net/ethernet/intel/e1000/e1000_main.c | 16
> +++++++++-------
> drivers/net/ethernet/intel/e1000e/netdev.c | 7 +++----
> drivers/net/ethernet/intel/igb/igb_main.c | 5 +----
> drivers/net/ethernet/intel/igbvf/netdev.c | 5 +----
> drivers/net/ethernet/intel/ixgb/ixgb_main.c | 6 +++---
> drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 3 +--
> drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 10 ++++------
> 7 files changed, 22 insertions(+), 30 deletions(-)
The changes look fine to me.
Acked-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 490 bytes --]
^ permalink raw reply
* AWARD
From: BALL @ 2011-08-30 10:23 UTC (permalink / raw)
for more information regarding the sum of £980,000:00 Pound
^ permalink raw reply
* [PATCH] bridge: netfilter: work around shared nfct struct
From: Florian Westphal @ 2011-08-30 10:57 UTC (permalink / raw)
To: netfilter-devel; +Cc: netdev, Florian Westphal
When incoking iptables hooks from bridge netfilter, the assumption
that non-confirmed skb->nfct is never shared does no longer hold,
as bridge code clones skbs when e.g. forwarding packets to multiple
bridge ports.
When NFQUEUE is used, we can BUG because nf_nat_setup_info can be
invoked simultaneously for the same conntrack:
[ 3196.798768] kernel BUG at net/ipv4/netfilter/nf_nat_core.c:300!
[..]
[ 3196.798768] [<ffffffff8120d73e>] ? nf_hook_slow+0x21a/0x282
[ 3196.798768] [<ffffffffa03207e4>] ? br_handle_frame_finish+0x0/0x13b [bridge]
[ 3196.798768] [<ffffffffa02a61a5>] ? alloc_null_binding+0x47/0x4c [iptable_nat]
[ 3196.798768] [<ffffffffa02a64eb>] ? nf_nat_fn+0x193/0x1fb [iptable_nat]
[ 3196.798768] [<ffffffff8120d4c5>] ? nf_iterate+0x40/0x9f
[ 3196.798768] [<ffffffff8120d73e>] ? nf_hook_slow+0x21a/0x282
[ 3196.798768] [<ffffffff81213c94>] ? ip_local_deliver_finish+0x0/0x1f1
[ 3196.798768] [<ffffffff81213c94>] ? ip_local_deliver_finish+0x0/0x1f1
[ 3196.798768] [<ffffffff8120d73e>] ? nf_hook_slow+0x21a/0x282
[ 3196.798768] [<ffffffff8121369c>] ? ip_rcv_finish+0x0/0x340
[ 3196.798768] [<ffffffff81213ed7>] ? ip_local_deliver+0x52/0x6c
[ 3196.798768] [<ffffffff812139c2>] ? ip_rcv_finish+0x326/0x340
[ 3196.798768] [<ffffffff81213c4f>] ? ip_rcv+0x273/0x2b8
[ 3196.798768] [<ffffffff811f1384>] ? process_backlog+0x8d/0xc6
[ 3196.798768] [<ffffffff811f2f85>] ? net_rx_action+0xa2/0x1cf
[ 3196.798768] [<ffffffff8103d3c2>] ? __do_softirq+0x8b/0x10b
[ 3196.798768] [<ffffffff8100c9dc>] ? call_softirq+0x1c/0x28
[ 3196.798768] [<ffffffff8100dd15>] ? do_softirq+0x31/0x66
[ 3196.798768] [<ffffffff8103d267>] ? irq_exit+0x36/0x78
[ 3196.798768] [<ffffffff8100d41a>] ? do_IRQ+0xa0/0xb6
[ 3196.798768] [<ffffffff8100c253>] ? ret_from_intr+0x0/0xa
[..]
[ 3196.798768] Code: be 2b 01 00 00 48 c7 c7 e8 cd 29 a0 e8 e8 d7 d9 e0 45 85 ff 49 8b 45 78 75 06 48 c1 e8 07 eb 04 48 c1 e8 08 83 e0 01 85 c0 74 04 <0f> 0b eb fe 49 8d 75 50 48 8d bc 24 80 00 00 00 e8 83 38 f7 ff
[ 3196.798768] RIP [<ffffffffa029b68f>] nf_nat_setup_info+0x8a/0x564 [nf_nat]
[ 3196.798768] RSP <ffff880001603bf0>
Fix this by changing ->nfct of all clones to untracked.
This should be OK, because if we do a full copy of ->nfct we'd
end up trying to confirm the same tuples multiple times, which results in
NF_DROP for the cloned skbs.
Also, we only need to do this if the conntrack is unconfirmed.
Signed-off-by: Florian Westphal <fw@strlen.de>
---
net/bridge/br_netfilter.c | 34 ++++++++++++++++++++++++++++++++++
1 files changed, 34 insertions(+), 0 deletions(-)
I have one alternate patch that changes nf_nat_setup_info
to detect conflicts by forcing serialization via ct->lock spinlock.
But it is silly to do this for the sake of bridge netfilter only...
Any other ideas?
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 3fa1231..7d47f34 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -42,6 +42,10 @@
#include <linux/sysctl.h>
#endif
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+#include <net/netfilter/nf_conntrack.h>
+#endif
+
#define skb_origaddr(skb) (((struct bridge_skb_cb *) \
(skb->nf_bridge->data))->daddr.ipv4)
#define store_orig_dstaddr(skb) (skb_origaddr(skb) = ip_hdr(skb)->daddr)
@@ -158,10 +162,40 @@ static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb)
return skb->nf_bridge;
}
+
+/* conntrack assumes exclusive ownership of skb->nfct
+ * if conntrack has not yet been confirmed.
+ *
+ * Without this, we may BUG because we might try to set up
+ * NAT bindings for the same conntrack struct simultaneously.
+ *
+ * Work around this by forcing untracked state.
+ */
+static inline void nf_bridge_unshare_nfct(struct sk_buff *skb)
+{
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+ struct nf_conn *ct, *ct_orig = (void *) skb->nfct;
+
+ if (!ct_orig || nf_ct_is_untracked(ct_orig))
+ return;
+
+ if (likely(nf_ct_is_confirmed(ct_orig)) ||
+ atomic_read(&ct_orig->ct_general.use) == 1)
+ return;
+
+ ct = nf_ct_untracked_get();
+ atomic_inc(&ct->ct_general.use);
+ nf_conntrack_put(skb->nfct);
+ skb->nfct = &ct->ct_general;
+#endif
+}
+
static inline struct nf_bridge_info *nf_bridge_unshare(struct sk_buff *skb)
{
struct nf_bridge_info *nf_bridge = skb->nf_bridge;
+ nf_bridge_unshare_nfct(skb);
+
if (atomic_read(&nf_bridge->use) > 1) {
struct nf_bridge_info *tmp = nf_bridge_alloc(skb);
--
1.7.3.4
^ permalink raw reply related
* Re: [PATCH 05/24] batman-adv: Remove unnecessary OOM logging messages
From: Marek Lindner @ 2011-08-30 10:58 UTC (permalink / raw)
To: b.a.t.m.a.n-ZwoEplunGu2X36UT3dwllkB+6BGkLq7r
Cc: Joe Perches, netdev-u79uwXL29TY76Z2rM5mHXA, David S. Miller,
Simon Wunderlich, linux-kernel-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <c469b9fb57fb75547e3e9092ebfdb3a79480b28f.1314650069.git.joe-6d6DIl74uiNBDgjK7y7TUQ@public.gmane.org>
On Monday, August 29, 2011 23:17:24 Joe Perches wrote:
> Removing unnecessary messages saves code and text.
>
> Site specific OOM messages are duplications of a generic MM
> out of memory message and aren't really useful, so just
> delete them.
Applied in our tree.
Thanks,
Marek
^ permalink raw reply
* [PATCH] cls_rsvp.h was outdated
From: "Igor Maravić" @ 2011-08-30 11:11 UTC (permalink / raw)
To: netdev; +Cc: linux-kernel
File cls_rsvp.h in /net/sched was outdated. I'm sending you patch for this
file.
Patch was done against Linux 2.6.38-8
Signed-off-by: Igor Maravić <igorm@etf.rs>
--- linux-2.6.38.8/net/sched/cls_rsvp.h.orig 2011-08-30 12:46:42.663443918
+0200
+++ linux-2.6.38.8/net/sched/cls_rsvp.h 2011-08-30 10:06:01.000000000 +0200
@@ -425,7 +425,7 @@ static int rsvp_change(struct tcf_proto
struct rsvp_filter *f, **fp;
struct rsvp_session *s, **sp;
struct tc_rsvp_pinfo *pinfo = NULL;
- struct nlattr *opt = tca[TCA_OPTIONS-1];
+ struct nlattr *opt = tca[TCA_OPTIONS];
struct nlattr *tb[TCA_RSVP_MAX + 1];
struct tcf_exts e;
unsigned h1, h2;
@@ -439,7 +439,7 @@ static int rsvp_change(struct tcf_proto
if (err < 0)
return err;
- err = tcf_exts_validate(tp, tb, tca[TCA_RATE-1], &e, &rsvp_ext_map);
+ err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &rsvp_ext_map);
if (err < 0)
return err;
@@ -448,8 +448,8 @@ static int rsvp_change(struct tcf_proto
if (f->handle != handle && handle)
goto errout2;
- if (tb[TCA_RSVP_CLASSID-1]) {
- f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID-1]);
+ if (tb[TCA_RSVP_CLASSID]) {
+ f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);
tcf_bind_filter(tp, &f->res, base);
}
@@ -461,7 +461,7 @@ static int rsvp_change(struct tcf_proto
err = -EINVAL;
if (handle)
goto errout2;
- if (tb[TCA_RSVP_DST-1] == NULL)
+ if (tb[TCA_RSVP_DST] == NULL)
goto errout2;
err = -ENOBUFS;
@@ -470,19 +470,19 @@ static int rsvp_change(struct tcf_proto
goto errout2;
h2 = 16;
- if (tb[TCA_RSVP_SRC-1]) {
- memcpy(f->src, nla_data(tb[TCA_RSVP_SRC-1]), sizeof(f->src));
+ if (tb[TCA_RSVP_SRC]) {
+ memcpy(f->src, nla_data(tb[TCA_RSVP_SRC]), sizeof(f->src));
h2 = hash_src(f->src);
}
- if (tb[TCA_RSVP_PINFO-1]) {
- pinfo = nla_data(tb[TCA_RSVP_PINFO-1]);
+ if (tb[TCA_RSVP_PINFO]) {
+ pinfo = nla_data(tb[TCA_RSVP_PINFO]);
f->spi = pinfo->spi;
f->tunnelhdr = pinfo->tunnelhdr;
}
- if (tb[TCA_RSVP_CLASSID-1])
- f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID-1]);
+ if (tb[TCA_RSVP_CLASSID])
+ f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);
- dst = nla_data(tb[TCA_RSVP_DST-1]);
+ dst = nla_data(tb[TCA_RSVP_DST]);
h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid
: 0);
err = -ENOMEM;
@@ -642,7 +642,7 @@ nla_put_failure:
}
static struct tcf_proto_ops RSVP_OPS = {
- .next = NULL,
+ //.next = NULL,
.kind = RSVP_ID,
.classify = rsvp_classify,
.init = rsvp_init,
^ permalink raw reply
* Re: [PATCH] MAINTAINERS: Update ATLX driver maintainers
From: Ian Campbell @ 2011-08-30 11:31 UTC (permalink / raw)
To: Franco Fichtner
Cc: netdev@vger.kernel.org, Jay Cliburn, Chris Snook, Jie Yang,
Andrew Morton, Joe Perches
In-Reply-To: <4E5CC8C5.2080705@lastsummer.de>
On Tue, 2011-08-30 at 12:25 +0100, Franco Fichtner wrote:
> Hi Ian,
>
> On 08/30/2011 11:34 AM, Ian Campbell wrote:
> > jie.yang@atheros.com bounces and I get a 550 "Unknown address error". Perhaps
> > they have moved on?
>
> Atheros is now part of Qualcomm. There is a patch by Luis floating
> around fixing the MAINTAINERS file properly, but it hasn't been applied
> yet. No need to apply this one here.
OK thanks Franco.
Ian.
^ permalink raw reply
* Re: [PATCH] cls_rsvp.h was outdated
From: Eric Dumazet @ 2011-08-30 11:38 UTC (permalink / raw)
To: igorm; +Cc: netdev, linux-kernel
In-Reply-To: <0f79c88be72b75d0526d3f3c2ebf826b.squirrel@kondor.etf.bg.ac.rs>
Le mardi 30 août 2011 à 13:11 +0200, "Igor Maravić" a écrit :
> File cls_rsvp.h in /net/sched was outdated. I'm sending you patch for this
> file.
> Patch was done against Linux 2.6.38-8
>
> Signed-off-by: Igor Maravić <igorm@etf.rs>
>
> --- linux-2.6.38.8/net/sched/cls_rsvp.h.orig 2011-08-30 12:46:42.663443918
> +0200
Two last points :
1) Your mail client added line wraps, please take a look at
Documentation/email-clients.txt
>
> static struct tcf_proto_ops RSVP_OPS = {
> - .next = NULL,
> + //.next = NULL,
> .kind = RSVP_ID,
> .classify = rsvp_classify,
> .init = rsvp_init,
>
2) Dont add // comments, just remove the line.
You also could add __read_mostly here :
static struct tcf_proto_ops RSVP_OPS __read_mostly = {
Thanks
^ permalink raw reply
* Re: [PATCH] MAINTAINERS: Update ATLX driver maintainers
From: Franco Fichtner @ 2011-08-30 11:25 UTC (permalink / raw)
To: Ian Campbell
Cc: netdev, Jay Cliburn, Chris Snook, Jie Yang, Andrew Morton,
Joe Perches
In-Reply-To: <1314696887-22518-1-git-send-email-ian.campbell@citrix.com>
Hi Ian,
On 08/30/2011 11:34 AM, Ian Campbell wrote:
> jie.yang@atheros.com bounces and I get a 550 "Unknown address error". Perhaps
> they have moved on?
Atheros is now part of Qualcomm. There is a patch by Luis floating
around fixing the MAINTAINERS file properly, but it hasn't been applied
yet. No need to apply this one here.
Franco
^ permalink raw reply
* RE: [PATCH] MAINTAINERS: Update Cisco VIC driver maintainers
From: David Wang (dwang2) @ 2011-08-30 11:42 UTC (permalink / raw)
To: Ian Campbell, netdev
Cc: Christian Benvenuti (benve), Roopa Prabhu (roprabhu),
Andrew Morton, Joe Perches
In-Reply-To: <1314697269-22594-1-git-send-email-ian.campbell@citrix.com>
Ian,
Vasanthy is no longer with Cisco; we will be removing her from any
future submissions.
Regards,
- Dave
> -----Original Message-----
> From: Ian Campbell [mailto:ian.campbell@citrix.com]
> Sent: Tuesday, August 30, 2011 2:41 AM
> To: netdev@vger.kernel.org
> Cc: Ian Campbell; Christian Benvenuti (benve); Roopa Prabhu
> (roprabhu); David Wang (dwang2); Andrew Morton; Joe Perches
> Subject: [PATCH] MAINTAINERS: Update Cisco VIC driver maintainers
>
> vkolluri@cisco.com bounces and I get "Unknown address error 550".
>
> Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
> Cc: Christian Benvenuti <benve@cisco.com>
> Cc: Roopa Prabhu <roprabhu@cisco.com>
> Cc: David Wang <dwang2@cisco.com>
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Cc: Joe Perches <joe@perches.com>
> Cc: netdev@vger.kernel.org
> ---
> MAINTAINERS | 1 -
> 1 files changed, 0 insertions(+), 1 deletions(-)
>
> diff --git a/MAINTAINERS b/MAINTAINERS
> index cb6ad5f..a5e0b11 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -1757,7 +1757,6 @@ F: Documentation/zh_CN/
>
> CISCO VIC ETHERNET NIC DRIVER
> M: Christian Benvenuti <benve@cisco.com>
> -M: Vasanthy Kolluri <vkolluri@cisco.com>
> M: Roopa Prabhu <roprabhu@cisco.com>
> M: David Wang <dwang2@cisco.com>
> S: Supported
> --
> 1.7.2.5
>
>
^ permalink raw reply
* [PATCH] cls_rsvp.h was outdated
From: "Igor Maravić" @ 2011-08-30 12:12 UTC (permalink / raw)
To: netdev; +Cc: linux-kernel, eric.dumazet
File cls_rsvp.h in /net/sched was outdated. I'm sending you patch for this
file.
Patch was done against Linux 2.6.38-8
Signed-off-by: Igor Maravić <igorm@etf.rs>
---
--- linux-2.6.38.8/net/sched/cls_rsvp.h.orig 2011-08-30 12:46:42.663443918
+0200
+++ linux-2.6.38.8/net/sched/cls_rsvp.h 2011-08-30 13:45:02.135445119 +0200
@@ -425,7 +425,7 @@ static int rsvp_change(struct tcf_proto
struct rsvp_filter *f, **fp;
struct rsvp_session *s, **sp;
struct tc_rsvp_pinfo *pinfo = NULL;
- struct nlattr *opt = tca[TCA_OPTIONS-1];
+ struct nlattr *opt = tca[TCA_OPTIONS];
struct nlattr *tb[TCA_RSVP_MAX + 1];
struct tcf_exts e;
unsigned h1, h2;
@@ -439,7 +439,7 @@ static int rsvp_change(struct tcf_proto
if (err < 0)
return err;
- err = tcf_exts_validate(tp, tb, tca[TCA_RATE-1], &e, &rsvp_ext_map);
+ err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &rsvp_ext_map);
if (err < 0)
return err;
@@ -448,8 +448,8 @@ static int rsvp_change(struct tcf_proto
if (f->handle != handle && handle)
goto errout2;
- if (tb[TCA_RSVP_CLASSID-1]) {
- f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID-1]);
+ if (tb[TCA_RSVP_CLASSID]) {
+ f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);
tcf_bind_filter(tp, &f->res, base);
}
@@ -461,7 +461,7 @@ static int rsvp_change(struct tcf_proto
err = -EINVAL;
if (handle)
goto errout2;
- if (tb[TCA_RSVP_DST-1] == NULL)
+ if (tb[TCA_RSVP_DST] == NULL)
goto errout2;
err = -ENOBUFS;
@@ -470,19 +470,19 @@ static int rsvp_change(struct tcf_proto
goto errout2;
h2 = 16;
- if (tb[TCA_RSVP_SRC-1]) {
- memcpy(f->src, nla_data(tb[TCA_RSVP_SRC-1]), sizeof(f->src));
+ if (tb[TCA_RSVP_SRC]) {
+ memcpy(f->src, nla_data(tb[TCA_RSVP_SRC]), sizeof(f->src));
h2 = hash_src(f->src);
}
- if (tb[TCA_RSVP_PINFO-1]) {
- pinfo = nla_data(tb[TCA_RSVP_PINFO-1]);
+ if (tb[TCA_RSVP_PINFO]) {
+ pinfo = nla_data(tb[TCA_RSVP_PINFO]);
f->spi = pinfo->spi;
f->tunnelhdr = pinfo->tunnelhdr;
}
- if (tb[TCA_RSVP_CLASSID-1])
- f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID-1]);
+ if (tb[TCA_RSVP_CLASSID])
+ f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);
- dst = nla_data(tb[TCA_RSVP_DST-1]);
+ dst = nla_data(tb[TCA_RSVP_DST]);
h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid
: 0);
err = -ENOMEM;
@@ -641,8 +641,7 @@ nla_put_failure:
return -1;
}
-static struct tcf_proto_ops RSVP_OPS = {
- .next = NULL,
+static struct tcf_proto_ops RSVP_OPS __read_mostly = {
.kind = RSVP_ID,
.classify = rsvp_classify,
.init = rsvp_init,
^ permalink raw reply
* Re: [PATCH] bridge: netfilter: work around shared nfct struct
From: Patrick McHardy @ 2011-08-30 12:43 UTC (permalink / raw)
To: Florian Westphal; +Cc: netfilter-devel, netdev
In-Reply-To: <1314701827-21702-1-git-send-email-fw@strlen.de>
On 30.08.2011 12:57, Florian Westphal wrote:
> When incoking iptables hooks from bridge netfilter, the assumption
> that non-confirmed skb->nfct is never shared does no longer hold,
> as bridge code clones skbs when e.g. forwarding packets to multiple
> bridge ports.
>
> When NFQUEUE is used, we can BUG because nf_nat_setup_info can be
> invoked simultaneously for the same conntrack:
I'm wondering how this can happen, when flooding packets to multiple
ports, they are still processed by the same CPU one after another,
so for the second and further packets, nf_nat should notice that
the mappings are already set up.
> [ 3196.798768] kernel BUG at net/ipv4/netfilter/nf_nat_core.c:300!
> [..]
> [ 3196.798768] [<ffffffff8120d73e>] ? nf_hook_slow+0x21a/0x282
> [ 3196.798768] [<ffffffffa03207e4>] ? br_handle_frame_finish+0x0/0x13b [bridge]
> [ 3196.798768] [<ffffffffa02a61a5>] ? alloc_null_binding+0x47/0x4c [iptable_nat]
> [ 3196.798768] [<ffffffffa02a64eb>] ? nf_nat_fn+0x193/0x1fb [iptable_nat]
> [ 3196.798768] [<ffffffff8120d4c5>] ? nf_iterate+0x40/0x9f
> [ 3196.798768] [<ffffffff8120d73e>] ? nf_hook_slow+0x21a/0x282
> [ 3196.798768] [<ffffffff81213c94>] ? ip_local_deliver_finish+0x0/0x1f1
> [ 3196.798768] [<ffffffff81213c94>] ? ip_local_deliver_finish+0x0/0x1f1
> [ 3196.798768] [<ffffffff8120d73e>] ? nf_hook_slow+0x21a/0x282
> [ 3196.798768] [<ffffffff8121369c>] ? ip_rcv_finish+0x0/0x340
> [ 3196.798768] [<ffffffff81213ed7>] ? ip_local_deliver+0x52/0x6c
> [ 3196.798768] [<ffffffff812139c2>] ? ip_rcv_finish+0x326/0x340
> [ 3196.798768] [<ffffffff81213c4f>] ? ip_rcv+0x273/0x2b8
> [ 3196.798768] [<ffffffff811f1384>] ? process_backlog+0x8d/0xc6
> [ 3196.798768] [<ffffffff811f2f85>] ? net_rx_action+0xa2/0x1cf
> [ 3196.798768] [<ffffffff8103d3c2>] ? __do_softirq+0x8b/0x10b
> [ 3196.798768] [<ffffffff8100c9dc>] ? call_softirq+0x1c/0x28
> [ 3196.798768] [<ffffffff8100dd15>] ? do_softirq+0x31/0x66
> [ 3196.798768] [<ffffffff8103d267>] ? irq_exit+0x36/0x78
> [ 3196.798768] [<ffffffff8100d41a>] ? do_IRQ+0xa0/0xb6
> [ 3196.798768] [<ffffffff8100c253>] ? ret_from_intr+0x0/0xa
> [..]
> [ 3196.798768] Code: be 2b 01 00 00 48 c7 c7 e8 cd 29 a0 e8 e8 d7 d9 e0 45 85 ff 49 8b 45 78 75 06 48 c1 e8 07 eb 04 48 c1 e8 08 83 e0 01 85 c0 74 04 <0f> 0b eb fe 49 8d 75 50 48 8d bc 24 80 00 00 00 e8 83 38 f7 ff
> [ 3196.798768] RIP [<ffffffffa029b68f>] nf_nat_setup_info+0x8a/0x564 [nf_nat]
> [ 3196.798768] RSP <ffff880001603bf0>
>
> Fix this by changing ->nfct of all clones to untracked.
>
> This should be OK, because if we do a full copy of ->nfct we'd
> end up trying to confirm the same tuples multiple times, which results in
> NF_DROP for the cloned skbs.
>
> Also, we only need to do this if the conntrack is unconfirmed.
>
> Signed-off-by: Florian Westphal <fw@strlen.de>
> ---
> net/bridge/br_netfilter.c | 34 ++++++++++++++++++++++++++++++++++
> 1 files changed, 34 insertions(+), 0 deletions(-)
>
> I have one alternate patch that changes nf_nat_setup_info
> to detect conflicts by forcing serialization via ct->lock spinlock.
>
> But it is silly to do this for the sake of bridge netfilter only...
>
> Any other ideas?
> diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
> index 3fa1231..7d47f34 100644
> --- a/net/bridge/br_netfilter.c
> +++ b/net/bridge/br_netfilter.c
> @@ -42,6 +42,10 @@
> #include <linux/sysctl.h>
> #endif
>
> +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
> +#include <net/netfilter/nf_conntrack.h>
> +#endif
> +
> #define skb_origaddr(skb) (((struct bridge_skb_cb *) \
> (skb->nf_bridge->data))->daddr.ipv4)
> #define store_orig_dstaddr(skb) (skb_origaddr(skb) = ip_hdr(skb)->daddr)
> @@ -158,10 +162,40 @@ static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb)
> return skb->nf_bridge;
> }
>
> +
> +/* conntrack assumes exclusive ownership of skb->nfct
> + * if conntrack has not yet been confirmed.
> + *
> + * Without this, we may BUG because we might try to set up
> + * NAT bindings for the same conntrack struct simultaneously.
> + *
> + * Work around this by forcing untracked state.
> + */
> +static inline void nf_bridge_unshare_nfct(struct sk_buff *skb)
> +{
> +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
> + struct nf_conn *ct, *ct_orig = (void *) skb->nfct;
> +
> + if (!ct_orig || nf_ct_is_untracked(ct_orig))
> + return;
> +
> + if (likely(nf_ct_is_confirmed(ct_orig)) ||
> + atomic_read(&ct_orig->ct_general.use) == 1)
> + return;
> +
> + ct = nf_ct_untracked_get();
This will introduce a module dependency on nf_conntrack, which we really
shouldn't be doing.
> + atomic_inc(&ct->ct_general.use);
> + nf_conntrack_put(skb->nfct);
> + skb->nfct = &ct->ct_general;
> +#endif
^ permalink raw reply
* Re: [PATCH 06/24] netfilter: Remove unnecessary OOM logging messages
From: Patrick McHardy @ 2011-08-30 12:46 UTC (permalink / raw)
To: Joe Perches
Cc: Bart De Schuymer, Wensong Zhang, Simon Horman, Julian Anastasov,
Stephen Hemminger, David S. Miller, Alexey Kuznetsov,
James Morris, Hideaki YOSHIFUJI, netfilter-devel, netfilter,
coreteam, bridge, netdev, linux-kernel, lvs-devel
In-Reply-To: <13c1c12486cae409dfa5254b1435e660f2b17e05.1314650069.git.joe@perches.com>
On 29.08.2011 23:17, Joe Perches wrote:
> Removing unnecessary messages saves code and text.
>
> Site specific OOM messages are duplications of a generic MM
> out of memory message and aren't really useful, so just
> delete them.
Looks good to me. Do you want me to apply this patch or are you
intending to have the entire series go through Dave?
^ permalink raw reply
* Re: [PATCH] bridge: netfilter: work around shared nfct struct
From: Florian Westphal @ 2011-08-30 12:54 UTC (permalink / raw)
To: Patrick McHardy; +Cc: Florian Westphal, netfilter-devel, netdev
In-Reply-To: <4E5CDADC.7000902@trash.net>
Patrick McHardy <kaber@trash.net> wrote:
> On 30.08.2011 12:57, Florian Westphal wrote:
> > When incoking iptables hooks from bridge netfilter, the assumption
> > that non-confirmed skb->nfct is never shared does no longer hold,
> > as bridge code clones skbs when e.g. forwarding packets to multiple
> > bridge ports.
> >
> > When NFQUEUE is used, we can BUG because nf_nat_setup_info can be
> > invoked simultaneously for the same conntrack:
>
> I'm wondering how this can happen, when flooding packets to multiple
> ports, they are still processed by the same CPU one after another,
> so for the second and further packets, nf_nat should notice that
> the mappings are already set up.
Main problem is that we end up with same ->nfct in both
INPUT and POSTROUTING (br_pass_frame_up vs. br_forward).
its extremely unlikely but reproduceable with something like
hping2 -i u1200 -2 -p 138 -d 128 192.168.0.255
(assuming bridge interface has an address within that network).
Also, with recent change nf_reinject can be run in parallel.
(the original problem was observed on 2.6.32.24, but i can
reproduce it with nf-next, too).
^ permalink raw reply
* 802.1Q VLAN random tag injected when vlan configured on forcedeth interface
From: Ruslan N. Marchenko @ 2011-08-30 12:51 UTC (permalink / raw)
To: netdev
Hi guys,
I've faced with strange behaviour of 8021q driver: when enabling vlan subinterface on eth interface I'm getting ~50% packetloss due to packets are marked with incorrect tags (and eventually dropped by kernel since no vlans configured for such IDs).
Scenario:
[ 0.476950] cpufreq-nforce2: No nForce2 chipset.
[ 1.519133] forcedeth: Reverse Engineered nForce ethernet driver. Version 0.64.
[ 1.519991] forcedeth 0000:00:0a.0: PCI INT A -> Link[LMAC] -> GSI 22 (level, low) -> IRQ 22
[ 1.520037] forcedeth 0000:00:0a.0: setting latency timer to 64
[ 1.586526] forcedeth 0000:00:0a.0: ifname eth0, PHY OUI 0x732 @ 3, addr 00:26:18:40:21:61
[ 1.586542] forcedeth 0000:00:0a.0: highdma csum pwrctl gbit lnktim msi desc-v3
modprobe 8021q
- network still works properly, packets are comming not marked at all.
ip li add link eth0 name vl6 type vlan id 6
- from this moment massive packetdrop starting to happen, almost half of the *incoming* packets are shown in tcpdump as
14:15:52.859296 00:13:f7:1e:fe:e4 > 00:26:18:40:21:61, ethertype 802.1Q (0x8100), length 102: vlan 64, p 3, ethertype IPv4, [|ip]
14:15:56.869572 00:13:f7:1e:fe:e4 > 00:26:18:40:21:61, ethertype 802.1Q (0x8100), length 102: vlan 2112, p 7, ethertype IPv4, [|ip]
mostly only these two tags appears (64 & 2112). Moreover this happens as on native vlan level (pure ethernet) so on tagged subinterface (as if qinq double tagging) for properly tagged with ID 6 incomming packets.
I've tried disabling all offloads:
Offload parameters for eth0:
rx-checksumming: off
tx-checksumming: off
scatter-gather: off
tcp-segmentation-offload: off
udp-fragmentation-offload: off
generic-segmentation-offload: off
generic-receive-offload: off
large-receive-offload: off
rx-vlan-offload: off
tx-vlan-offload: off
ntuple-filters: off
receive-hashing: off
- doesn't have any effect.
Once executing
ip li del vl6 type vlan
misterious tags disappear and everything works smoothly. Don't know who injects that garbage into frames - 8021q or forcedeth driver :(
Any ideas or suggestions to narrow the problem down?
Additional data.
Link level data dump example for broken frame:
12:35:32.175523 00:13:f7:1e:fe:e4 > 00:26:18:40:21:61, ethertype 802.1Q (0x8100), length 102: vlan 2112, p 2, ethertype IPv4, [|ip]
0x0000: 0026 1840 2161 0013 f71e fee4 8100 4840
0x0010: 0800 4500 0054 7a12 0000 4001 eb0f
0x0C-0D - TPID: ethertype 802.1Q (0x8100)
0x0E-0F - TCI (0100100001000000) PCP 010, CFI 0, VID 100001000000/0x840/2112
0x10-11 - ethertype IPv4
normal ping reply follows, which appears untagged in 50% cases with vlan configured and 100% cases without.
Interface is plugged into openwrt box into non-switched (wan) gigabit port with vid 6 subinterface configured.
Regards,
Ruslan
^ permalink raw reply
* Re: [PATCH] bridge: netfilter: work around shared nfct struct
From: Patrick McHardy @ 2011-08-30 13:08 UTC (permalink / raw)
To: Florian Westphal; +Cc: netfilter-devel, netdev
In-Reply-To: <20110830125453.GC7548@Chamillionaire.breakpoint.cc>
On 30.08.2011 14:54, Florian Westphal wrote:
> Patrick McHardy <kaber@trash.net> wrote:
>> On 30.08.2011 12:57, Florian Westphal wrote:
>>> When incoking iptables hooks from bridge netfilter, the assumption
>>> that non-confirmed skb->nfct is never shared does no longer hold,
>>> as bridge code clones skbs when e.g. forwarding packets to multiple
>>> bridge ports.
>>>
>>> When NFQUEUE is used, we can BUG because nf_nat_setup_info can be
>>> invoked simultaneously for the same conntrack:
>>
>> I'm wondering how this can happen, when flooding packets to multiple
>> ports, they are still processed by the same CPU one after another,
>> so for the second and further packets, nf_nat should notice that
>> the mappings are already set up.
>
> Main problem is that we end up with same ->nfct in both
> INPUT and POSTROUTING (br_pass_frame_up vs. br_forward).
>
> its extremely unlikely but reproduceable with something like
> hping2 -i u1200 -2 -p 138 -d 128 192.168.0.255
>
> (assuming bridge interface has an address within that network).
>
> Also, with recent change nf_reinject can be run in parallel.
> (the original problem was observed on 2.6.32.24, but i can
> reproduce it with nf-next, too).
I see. We still need to avoid the module dependency on nf_conntrack
though, so I think this will have to be fixed in nf_nat_fn().
^ permalink raw reply
* [PATCH] net: sh_eth: remove duplicated #include
From: Huang Weiyi @ 2011-08-30 13:09 UTC (permalink / raw)
To: davem; +Cc: netdev, Huang Weiyi
Remove duplicated #include('s) in
drivers/net/sh_eth.c
Signed-off-by: Huang Weiyi <weiyi.huang@gmail.com>
---
drivers/net/sh_eth.c | 1 -
1 files changed, 0 insertions(+), 1 deletions(-)
diff --git a/drivers/net/sh_eth.c b/drivers/net/sh_eth.c
index 1c1666e..190f619 100644
--- a/drivers/net/sh_eth.c
+++ b/drivers/net/sh_eth.c
@@ -31,7 +31,6 @@
#include <linux/phy.h>
#include <linux/cache.h>
#include <linux/io.h>
-#include <linux/interrupt.h>
#include <linux/pm_runtime.h>
#include <linux/slab.h>
#include <linux/ethtool.h>
--
1.6.1.3
^ permalink raw reply related
* [PATCH] cls_rsvp.h was outdated
From: "Igor Maravić" @ 2011-08-30 13:10 UTC (permalink / raw)
To: netdev; +Cc: linux-kernel
File cls_rsvp.h in /net/sched was outdated. I'm sending you patch for this
file.
Sorry for the word-wrap in previous messages
Signed-off-by: Igor Maravić <igorm@etf.rs>
---
--- linux-2.6.38.8/net/sched/cls_rsvp.h.orig 2011-08-30 12:46:42.663443918
+0200
+++ linux-2.6.38.8/net/sched/cls_rsvp.h 2011-08-30 13:45:02.135445119 +0200
@@ -425,7 +425,7 @@ static int rsvp_change(struct tcf_proto
struct rsvp_filter *f, **fp;
struct rsvp_session *s, **sp;
struct tc_rsvp_pinfo *pinfo = NULL;
- struct nlattr *opt = tca[TCA_OPTIONS-1];
+ struct nlattr *opt = tca[TCA_OPTIONS];
struct nlattr *tb[TCA_RSVP_MAX + 1];
struct tcf_exts e;
unsigned h1, h2;
@@ -439,7 +439,7 @@ static int rsvp_change(struct tcf_proto
if (err < 0)
return err;
- err = tcf_exts_validate(tp, tb, tca[TCA_RATE-1], &e, &rsvp_ext_map);
+ err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &rsvp_ext_map);
if (err < 0)
return err;
@@ -448,8 +448,8 @@ static int rsvp_change(struct tcf_proto
if (f->handle != handle && handle)
goto errout2;
- if (tb[TCA_RSVP_CLASSID-1]) {
- f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID-1]);
+ if (tb[TCA_RSVP_CLASSID]) {
+ f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);
tcf_bind_filter(tp, &f->res, base);
}
@@ -461,7 +461,7 @@ static int rsvp_change(struct tcf_proto
err = -EINVAL;
if (handle)
goto errout2;
- if (tb[TCA_RSVP_DST-1] == NULL)
+ if (tb[TCA_RSVP_DST] == NULL)
goto errout2;
err = -ENOBUFS;
@@ -470,19 +470,19 @@ static int rsvp_change(struct tcf_proto
goto errout2;
h2 = 16;
- if (tb[TCA_RSVP_SRC-1]) {
- memcpy(f->src, nla_data(tb[TCA_RSVP_SRC-1]), sizeof(f->src));
+ if (tb[TCA_RSVP_SRC]) {
+ memcpy(f->src, nla_data(tb[TCA_RSVP_SRC]), sizeof(f->src));
h2 = hash_src(f->src);
}
- if (tb[TCA_RSVP_PINFO-1]) {
- pinfo = nla_data(tb[TCA_RSVP_PINFO-1]);
+ if (tb[TCA_RSVP_PINFO]) {
+ pinfo = nla_data(tb[TCA_RSVP_PINFO]);
f->spi = pinfo->spi;
f->tunnelhdr = pinfo->tunnelhdr;
}
- if (tb[TCA_RSVP_CLASSID-1])
- f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID-1]);
+ if (tb[TCA_RSVP_CLASSID])
+ f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);
- dst = nla_data(tb[TCA_RSVP_DST-1]);
+ dst = nla_data(tb[TCA_RSVP_DST]);
h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid
: 0);
err = -ENOMEM;
@@ -641,8 +641,7 @@ nla_put_failure:
return -1;
}
-static struct tcf_proto_ops RSVP_OPS = {
- .next = NULL,
+static struct tcf_proto_ops RSVP_OPS __read_mostly = {
.kind = RSVP_ID,
.classify = rsvp_classify,
.init = rsvp_init,
^ permalink raw reply
* [PATCH] cls_rsvp.h was outdated
From: "Igor Maravić" @ 2011-08-30 13:12 UTC (permalink / raw)
To: netdev; +Cc: linux-kernel
File cls_rsvp.h in /net/sched was outdated. I'm sending you patch for this
file.
Sorry for the word-wrap in previous messages
Signed-off-by: Igor Maravić <igorm@etf.rs>
---
--- linux-2.6.38.8/net/sched/cls_rsvp.h.orig 2011-08-30 12:46:42.663443918 +0200
+++ linux-2.6.38.8/net/sched/cls_rsvp.h 2011-08-30 13:45:02.135445119 +0200
@@ -425,7 +425,7 @@ static int rsvp_change(struct tcf_proto
struct rsvp_filter *f, **fp;
struct rsvp_session *s, **sp;
struct tc_rsvp_pinfo *pinfo = NULL;
- struct nlattr *opt = tca[TCA_OPTIONS-1];
+ struct nlattr *opt = tca[TCA_OPTIONS];
struct nlattr *tb[TCA_RSVP_MAX + 1];
struct tcf_exts e;
unsigned h1, h2;
@@ -439,7 +439,7 @@ static int rsvp_change(struct tcf_proto
if (err < 0)
return err;
- err = tcf_exts_validate(tp, tb, tca[TCA_RATE-1], &e, &rsvp_ext_map);
+ err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &rsvp_ext_map);
if (err < 0)
return err;
@@ -448,8 +448,8 @@ static int rsvp_change(struct tcf_proto
if (f->handle != handle && handle)
goto errout2;
- if (tb[TCA_RSVP_CLASSID-1]) {
- f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID-1]);
+ if (tb[TCA_RSVP_CLASSID]) {
+ f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);
tcf_bind_filter(tp, &f->res, base);
}
@@ -461,7 +461,7 @@ static int rsvp_change(struct tcf_proto
err = -EINVAL;
if (handle)
goto errout2;
- if (tb[TCA_RSVP_DST-1] == NULL)
+ if (tb[TCA_RSVP_DST] == NULL)
goto errout2;
err = -ENOBUFS;
@@ -470,19 +470,19 @@ static int rsvp_change(struct tcf_proto
goto errout2;
h2 = 16;
- if (tb[TCA_RSVP_SRC-1]) {
- memcpy(f->src, nla_data(tb[TCA_RSVP_SRC-1]), sizeof(f->src));
+ if (tb[TCA_RSVP_SRC]) {
+ memcpy(f->src, nla_data(tb[TCA_RSVP_SRC]), sizeof(f->src));
h2 = hash_src(f->src);
}
- if (tb[TCA_RSVP_PINFO-1]) {
- pinfo = nla_data(tb[TCA_RSVP_PINFO-1]);
+ if (tb[TCA_RSVP_PINFO]) {
+ pinfo = nla_data(tb[TCA_RSVP_PINFO]);
f->spi = pinfo->spi;
f->tunnelhdr = pinfo->tunnelhdr;
}
- if (tb[TCA_RSVP_CLASSID-1])
- f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID-1]);
+ if (tb[TCA_RSVP_CLASSID])
+ f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);
- dst = nla_data(tb[TCA_RSVP_DST-1]);
+ dst = nla_data(tb[TCA_RSVP_DST]);
h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid : 0);
err = -ENOMEM;
@@ -641,8 +641,7 @@ nla_put_failure:
return -1;
}
-static struct tcf_proto_ops RSVP_OPS = {
- .next = NULL,
+static struct tcf_proto_ops RSVP_OPS __read_mostly = {
.kind = RSVP_ID,
.classify = rsvp_classify,
.init = rsvp_init,
^ permalink raw reply
* Re: [PATCH] bridge: netfilter: work around shared nfct struct
From: Florian Westphal @ 2011-08-30 13:19 UTC (permalink / raw)
To: Patrick McHardy; +Cc: Florian Westphal, netfilter-devel, netdev
In-Reply-To: <4E5CE0BD.7040103@trash.net>
Patrick McHardy <kaber@trash.net> wrote:
> On 30.08.2011 14:54, Florian Westphal wrote:
> > Patrick McHardy <kaber@trash.net> wrote:
> >> On 30.08.2011 12:57, Florian Westphal wrote:
> >>> When incoking iptables hooks from bridge netfilter, the assumption
> >>> that non-confirmed skb->nfct is never shared does no longer hold,
> >>> as bridge code clones skbs when e.g. forwarding packets to multiple
> >>> bridge ports.
> >>>
> >>> When NFQUEUE is used, we can BUG because nf_nat_setup_info can be
> >>> invoked simultaneously for the same conntrack:
> >>
> >> I'm wondering how this can happen, when flooding packets to multiple
> >> ports, they are still processed by the same CPU one after another,
> >> so for the second and further packets, nf_nat should notice that
> >> the mappings are already set up.
> >
> > Main problem is that we end up with same ->nfct in both
> > INPUT and POSTROUTING (br_pass_frame_up vs. br_forward).
> >
> > its extremely unlikely but reproduceable with something like
> > hping2 -i u1200 -2 -p 138 -d 128 192.168.0.255
> >
> > (assuming bridge interface has an address within that network).
> >
> > Also, with recent change nf_reinject can be run in parallel.
> > (the original problem was observed on 2.6.32.24, but i can
> > reproduce it with nf-next, too).
>
> I see. We still need to avoid the module dependency on nf_conntrack
> though, so I think this will have to be fixed in nf_nat_fn().
Right, I failed to spot the call to the destroy hook 8-/
I'll submit an alternate patch shortly.
^ permalink raw reply
* [PATCH v2] tcp: Change possible SYN flooding messages
From: Eric Dumazet @ 2011-08-30 13:21 UTC (permalink / raw)
To: Tom Herbert, David Miller; +Cc: netdev
In-Reply-To: <alpine.DEB.2.00.1108102229130.5341@pokey.mtv.corp.google.com>
"Possible SYN flooding on port xxxx " messages can fill logs on servers.
Change logic to log the message only once per listener, and add two new
SNMP counters to track :
TCPReqQFullDoCookies : number of times a SYNCOOKIE was replied to client
TCPReqQFullDrop : number of times a SYN request was dropped because
syncookies were not enabled.
Based on a prior patch from Tom Herbert, and suggestions from David.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Tom Herbert <therbert@google.com>
---
include/linux/snmp.h | 2 +
include/net/request_sock.h | 3 +-
include/net/tcp.h | 3 ++
net/ipv4/proc.c | 2 +
net/ipv4/tcp_ipv4.c | 49 ++++++++++++++++++++---------------
net/ipv6/tcp_ipv6.c | 31 ++--------------------
6 files changed, 40 insertions(+), 50 deletions(-)
diff --git a/include/linux/snmp.h b/include/linux/snmp.h
index 12b2b18..e16557a 100644
--- a/include/linux/snmp.h
+++ b/include/linux/snmp.h
@@ -231,6 +231,8 @@ enum
LINUX_MIB_TCPDEFERACCEPTDROP,
LINUX_MIB_IPRPFILTER, /* IP Reverse Path Filter (rp_filter) */
LINUX_MIB_TCPTIMEWAITOVERFLOW, /* TCPTimeWaitOverflow */
+ LINUX_MIB_TCPREQQFULLDOCOOKIES, /* TCPReqQFullDoCookies */
+ LINUX_MIB_TCPREQQFULLDROP, /* TCPReqQFullDrop */
__LINUX_MIB_MAX
};
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index 99e6e19..4c0766e 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -96,7 +96,8 @@ extern int sysctl_max_syn_backlog;
*/
struct listen_sock {
u8 max_qlen_log;
- /* 3 bytes hole, try to use */
+ u8 synflood_warned;
+ /* 2 bytes hole, try to use */
int qlen;
int qlen_young;
int clock_hand;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 149a415..e9b48b0 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -460,6 +460,9 @@ extern int tcp_write_wakeup(struct sock *);
extern void tcp_send_fin(struct sock *sk);
extern void tcp_send_active_reset(struct sock *sk, gfp_t priority);
extern int tcp_send_synack(struct sock *);
+extern int tcp_syn_flood_action(struct sock *sk,
+ const struct sk_buff *skb,
+ const char *proto);
extern void tcp_push_one(struct sock *, unsigned int mss_now);
extern void tcp_send_ack(struct sock *sk);
extern void tcp_send_delayed_ack(struct sock *sk);
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index b14ec7d..4bfad5d 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -254,6 +254,8 @@ static const struct snmp_mib snmp4_net_list[] = {
SNMP_MIB_ITEM("TCPDeferAcceptDrop", LINUX_MIB_TCPDEFERACCEPTDROP),
SNMP_MIB_ITEM("IPReversePathFilter", LINUX_MIB_IPRPFILTER),
SNMP_MIB_ITEM("TCPTimeWaitOverflow", LINUX_MIB_TCPTIMEWAITOVERFLOW),
+ SNMP_MIB_ITEM("TCPReqQFullDoCookies", LINUX_MIB_TCPREQQFULLDOCOOKIES),
+ SNMP_MIB_ITEM("TCPReqQFullDrop", LINUX_MIB_TCPREQQFULLDROP),
SNMP_MIB_SENTINEL
};
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index b3f2611..c29912c 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -808,20 +808,38 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req)
kfree(inet_rsk(req)->opt);
}
-static void syn_flood_warning(const struct sk_buff *skb)
+/*
+ * Return 1 if a syncookie should be sent
+ */
+int tcp_syn_flood_action(struct sock *sk,
+ const struct sk_buff *skb,
+ const char *proto)
{
- const char *msg;
+ const char *msg = "Dropping request";
+ int want_cookie = 0;
+ struct listen_sock *lopt;
+
+
#ifdef CONFIG_SYN_COOKIES
- if (sysctl_tcp_syncookies)
+ if (sysctl_tcp_syncookies) {
msg = "Sending cookies";
- else
+ want_cookie = 1;
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
+ } else
#endif
- msg = "Dropping request";
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
- pr_info("TCP: Possible SYN flooding on port %d. %s.\n",
- ntohs(tcp_hdr(skb)->dest), msg);
+ lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
+ if (!lopt->synflood_warned) {
+ lopt->synflood_warned = 1;
+ pr_info("%s: Possible SYN flooding on port %d. %s. "
+ " Check SNMP counters.\n",
+ proto, ntohs(tcp_hdr(skb)->dest), msg);
+ }
+ return want_cookie;
}
+EXPORT_SYMBOL(tcp_syn_flood_action);
/*
* Save and compile IPv4 options into the request_sock if needed.
@@ -1235,11 +1253,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
__be32 saddr = ip_hdr(skb)->saddr;
__be32 daddr = ip_hdr(skb)->daddr;
__u32 isn = TCP_SKB_CB(skb)->when;
-#ifdef CONFIG_SYN_COOKIES
int want_cookie = 0;
-#else
-#define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */
-#endif
/* Never answer to SYNs send to broadcast or multicast */
if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
@@ -1250,14 +1264,9 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
* evidently real one.
*/
if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
- if (net_ratelimit())
- syn_flood_warning(skb);
-#ifdef CONFIG_SYN_COOKIES
- if (sysctl_tcp_syncookies) {
- want_cookie = 1;
- } else
-#endif
- goto drop;
+ want_cookie = tcp_syn_flood_action(sk, skb, "TCP");
+ if (!want_cookie)
+ goto drop;
}
/* Accept backlog is full. If we have already queued enough
@@ -1303,9 +1312,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
while (l-- > 0)
*c++ ^= *hash_location++;
-#ifdef CONFIG_SYN_COOKIES
want_cookie = 0; /* not our kind of cookie */
-#endif
tmp_ext.cookie_out_never = 0; /* false */
tmp_ext.cookie_plus = tmp_opt.cookie_plus;
} else if (!tp->rx_opt.cookie_in_always) {
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 44a5859..12bdb9a 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -531,20 +531,6 @@ static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req,
return tcp_v6_send_synack(sk, req, rvp);
}
-static inline void syn_flood_warning(struct sk_buff *skb)
-{
-#ifdef CONFIG_SYN_COOKIES
- if (sysctl_tcp_syncookies)
- printk(KERN_INFO
- "TCPv6: Possible SYN flooding on port %d. "
- "Sending cookies.\n", ntohs(tcp_hdr(skb)->dest));
- else
-#endif
- printk(KERN_INFO
- "TCPv6: Possible SYN flooding on port %d. "
- "Dropping request.\n", ntohs(tcp_hdr(skb)->dest));
-}
-
static void tcp_v6_reqsk_destructor(struct request_sock *req)
{
kfree_skb(inet6_rsk(req)->pktopts);
@@ -1179,11 +1165,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
struct tcp_sock *tp = tcp_sk(sk);
__u32 isn = TCP_SKB_CB(skb)->when;
struct dst_entry *dst = NULL;
-#ifdef CONFIG_SYN_COOKIES
int want_cookie = 0;
-#else
-#define want_cookie 0
-#endif
if (skb->protocol == htons(ETH_P_IP))
return tcp_v4_conn_request(sk, skb);
@@ -1192,14 +1174,9 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
goto drop;
if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
- if (net_ratelimit())
- syn_flood_warning(skb);
-#ifdef CONFIG_SYN_COOKIES
- if (sysctl_tcp_syncookies)
- want_cookie = 1;
- else
-#endif
- goto drop;
+ want_cookie = tcp_syn_flood_action(sk, skb, "TCPv6");
+ if (!want_cookie)
+ goto drop;
}
if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
@@ -1249,9 +1226,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
while (l-- > 0)
*c++ ^= *hash_location++;
-#ifdef CONFIG_SYN_COOKIES
want_cookie = 0; /* not our kind of cookie */
-#endif
tmp_ext.cookie_out_never = 0; /* false */
tmp_ext.cookie_plus = tmp_opt.cookie_plus;
} else if (!tp->rx_opt.cookie_in_always) {
^ permalink raw reply related
* Re: 802.1Q VLAN random tag injected when vlan configured on forcedeth interface
From: Eric Dumazet @ 2011-08-30 13:23 UTC (permalink / raw)
To: Ruslan N. Marchenko; +Cc: netdev
In-Reply-To: <20110830125111.GA28341@ruff.mobi>
Le mardi 30 août 2011 à 14:51 +0200, Ruslan N. Marchenko a écrit :
> Hi guys,
> I've faced with strange behaviour of 8021q driver: when enabling vlan subinterface on eth interface I'm getting ~50% packetloss due to packets are marked with incorrect tags (and eventually dropped by kernel since no vlans configured for such IDs).
> Scenario:
> [ 0.476950] cpufreq-nforce2: No nForce2 chipset.
> [ 1.519133] forcedeth: Reverse Engineered nForce ethernet driver. Version 0.64.
> [ 1.519991] forcedeth 0000:00:0a.0: PCI INT A -> Link[LMAC] -> GSI 22 (level, low) -> IRQ 22
> [ 1.520037] forcedeth 0000:00:0a.0: setting latency timer to 64
> [ 1.586526] forcedeth 0000:00:0a.0: ifname eth0, PHY OUI 0x732 @ 3, addr 00:26:18:40:21:61
> [ 1.586542] forcedeth 0000:00:0a.0: highdma csum pwrctl gbit lnktim msi desc-v3
>
> modprobe 8021q
>
> - network still works properly, packets are comming not marked at all.
>
> ip li add link eth0 name vl6 type vlan id 6
>
> - from this moment massive packetdrop starting to happen, almost half of the *incoming* packets are shown in tcpdump as
> 14:15:52.859296 00:13:f7:1e:fe:e4 > 00:26:18:40:21:61, ethertype 802.1Q (0x8100), length 102: vlan 64, p 3, ethertype IPv4, [|ip]
> 14:15:56.869572 00:13:f7:1e:fe:e4 > 00:26:18:40:21:61, ethertype 802.1Q (0x8100), length 102: vlan 2112, p 7, ethertype IPv4, [|ip]
>
> mostly only these two tags appears (64 & 2112). Moreover this happens as on native vlan level (pure ethernet) so on tagged subinterface (as if qinq double tagging) for properly tagged with ID 6 incomming packets.
>
> I've tried disabling all offloads:
>
> Offload parameters for eth0:
> rx-checksumming: off
> tx-checksumming: off
> scatter-gather: off
> tcp-segmentation-offload: off
> udp-fragmentation-offload: off
> generic-segmentation-offload: off
> generic-receive-offload: off
> large-receive-offload: off
> rx-vlan-offload: off
> tx-vlan-offload: off
> ntuple-filters: off
> receive-hashing: off
>
> - doesn't have any effect.
> Once executing
> ip li del vl6 type vlan
> misterious tags disappear and everything works smoothly. Don't know who injects that garbage into frames - 8021q or forcedeth driver :(
> Any ideas or suggestions to narrow the problem down?
>
> Additional data.
> Link level data dump example for broken frame:
> 12:35:32.175523 00:13:f7:1e:fe:e4 > 00:26:18:40:21:61, ethertype 802.1Q (0x8100), length 102: vlan 2112, p 2, ethertype IPv4, [|ip]
> 0x0000: 0026 1840 2161 0013 f71e fee4 8100 4840
> 0x0010: 0800 4500 0054 7a12 0000 4001 eb0f
> 0x0C-0D - TPID: ethertype 802.1Q (0x8100)
> 0x0E-0F - TCI (0100100001000000) PCP 010, CFI 0, VID 100001000000/0x840/2112
> 0x10-11 - ethertype IPv4
> normal ping reply follows, which appears untagged in 50% cases with vlan configured and 100% cases without.
>
> Interface is plugged into openwrt box into non-switched (wan) gigabit port with vid 6 subinterface configured.
>
What kernel version are you using ?
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox