Netdev List
 help / color / mirror / Atom feed
* [PATCH 5/8] bnx2x: don't reset device while reading its configuration.
From: Dmitry Kravkov @ 2011-08-30 10:08 UTC (permalink / raw)
  To: davem, netdev; +Cc: Dmitry Kravkov, Eilon Greenstein
In-Reply-To: <1314698926-24525-1-git-send-email-dmitry@broadcom.com>

Signed-off-by: Dmitry Kravkov <dmitry@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
---
 drivers/net/bnx2x/bnx2x_main.c |   24 +++++++++++++++---------
 drivers/net/bnx2x/bnx2x_reg.h  |    2 +-
 2 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/drivers/net/bnx2x/bnx2x_main.c b/drivers/net/bnx2x/bnx2x_main.c
index 9633e9b..00dc8f0 100644
--- a/drivers/net/bnx2x/bnx2x_main.c
+++ b/drivers/net/bnx2x/bnx2x_main.c
@@ -5822,7 +5822,7 @@ static int bnx2x_init_hw_common(struct bnx2x *bp)
 	 * take the UNDI lock to protect undi_unload flow from accessing
 	 * registers while we're resetting the chip
 	 */
-	bnx2x_acquire_hw_lock(bp, HW_LOCK_RESOURCE_UNDI);
+	bnx2x_acquire_hw_lock(bp, HW_LOCK_RESOURCE_RESET);
 
 	bnx2x_reset_common(bp);
 	REG_WR(bp, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_1_SET, 0xffffffff);
@@ -5834,7 +5834,7 @@ static int bnx2x_init_hw_common(struct bnx2x *bp)
 	}
 	REG_WR(bp, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_2_SET, val);
 
-	bnx2x_release_hw_lock(bp, HW_LOCK_RESOURCE_UNDI);
+	bnx2x_release_hw_lock(bp, HW_LOCK_RESOURCE_RESET);
 
 	bnx2x_init_block(bp, BLOCK_MISC, PHASE_COMMON);
 
@@ -8570,10 +8570,12 @@ static void __devinit bnx2x_undi_unload(struct bnx2x *bp)
 	/* Check if there is any driver already loaded */
 	val = REG_RD(bp, MISC_REG_UNPREPARED);
 	if (val == 0x1) {
-		/* Check if it is the UNDI driver
+
+		bnx2x_acquire_hw_lock(bp, HW_LOCK_RESOURCE_RESET);
+		/*
+		 * Check if it is the UNDI driver
 		 * UNDI driver initializes CID offset for normal bell to 0x7
 		 */
-		bnx2x_acquire_hw_lock(bp, HW_LOCK_RESOURCE_UNDI);
 		val = REG_RD(bp, DORQ_REG_NORM_CID_OFST);
 		if (val == 0x7) {
 			u32 reset_code = DRV_MSG_CODE_UNLOAD_REQ_WOL_DIS;
@@ -8611,9 +8613,6 @@ static void __devinit bnx2x_undi_unload(struct bnx2x *bp)
 				bnx2x_fw_command(bp, reset_code, 0);
 			}
 
-			/* now it's safe to release the lock */
-			bnx2x_release_hw_lock(bp, HW_LOCK_RESOURCE_UNDI);
-
 			bnx2x_undi_int_disable(bp);
 			port = BP_PORT(bp);
 
@@ -8663,8 +8662,10 @@ static void __devinit bnx2x_undi_unload(struct bnx2x *bp)
 			bp->fw_seq =
 			      (SHMEM_RD(bp, func_mb[bp->pf_num].drv_mb_header) &
 				DRV_MSG_SEQ_NUMBER_MASK);
-		} else
-			bnx2x_release_hw_lock(bp, HW_LOCK_RESOURCE_UNDI);
+		}
+
+		/* now it's safe to release the lock */
+		bnx2x_release_hw_lock(bp, HW_LOCK_RESOURCE_RESET);
 	}
 }
 
@@ -9440,6 +9441,10 @@ static int __devinit bnx2x_get_hwinfo(struct bnx2x *bp)
 		bp->igu_base_sb = 0;
 	} else {
 		bp->common.int_block = INT_BLOCK_IGU;
+
+		/* do not allow device reset during IGU info preocessing */
+		bnx2x_acquire_hw_lock(bp, HW_LOCK_RESOURCE_RESET);
+
 		val = REG_RD(bp, IGU_REG_BLOCK_CONFIGURATION);
 
 		if (val & IGU_BLOCK_CONFIGURATION_REG_BACKWARD_COMP_EN) {
@@ -9471,6 +9476,7 @@ static int __devinit bnx2x_get_hwinfo(struct bnx2x *bp)
 
 		bnx2x_get_igu_cam_info(bp);
 
+		bnx2x_release_hw_lock(bp, HW_LOCK_RESOURCE_RESET);
 	}
 
 	/*
diff --git a/drivers/net/bnx2x/bnx2x_reg.h b/drivers/net/bnx2x/bnx2x_reg.h
index 40266c1..dac217d 100644
--- a/drivers/net/bnx2x/bnx2x_reg.h
+++ b/drivers/net/bnx2x/bnx2x_reg.h
@@ -5766,7 +5766,7 @@
 #define HW_LOCK_RESOURCE_RECOVERY_LEADER_0			 8
 #define HW_LOCK_RESOURCE_RECOVERY_LEADER_1			 9
 #define HW_LOCK_RESOURCE_SPIO					 2
-#define HW_LOCK_RESOURCE_UNDI					 5
+#define HW_LOCK_RESOURCE_RESET					 5
 #define AEU_INPUTS_ATTN_BITS_ATC_HW_INTERRUPT			 (0x1<<4)
 #define AEU_INPUTS_ATTN_BITS_ATC_PARITY_ERROR			 (0x1<<5)
 #define AEU_INPUTS_ATTN_BITS_BRB_PARITY_ERROR			 (0x1<<18)
-- 
1.7.2.2

^ permalink raw reply related

* [PATCH 4/8] bnx2x: fix MF for 4-port devices
From: Dmitry Kravkov @ 2011-08-30 10:08 UTC (permalink / raw)
  To: davem, netdev; +Cc: Dmitry Kravkov, Eilon Greenstein
In-Reply-To: <1314698926-24525-1-git-send-email-dmitry@broadcom.com>

Number of VNs for 4-port devices is 2 instead of 4

Signed-off-by: Dmitry Kravkov <dmitry@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
---
 drivers/net/bnx2x/bnx2x.h       |   15 +++++++------
 drivers/net/bnx2x/bnx2x_main.c  |   43 +++++++++++++++++++++++---------------
 drivers/net/bnx2x/bnx2x_stats.c |    4 +-
 3 files changed, 36 insertions(+), 26 deletions(-)

diff --git a/drivers/net/bnx2x/bnx2x.h b/drivers/net/bnx2x/bnx2x.h
index 8529732..2621a1c 100644
--- a/drivers/net/bnx2x/bnx2x.h
+++ b/drivers/net/bnx2x/bnx2x.h
@@ -1166,11 +1166,12 @@ struct bnx2x {
 #define BP_PORT(bp)			(bp->pfid & 1)
 #define BP_FUNC(bp)			(bp->pfid)
 #define BP_ABS_FUNC(bp)			(bp->pf_num)
-#define BP_E1HVN(bp)			(bp->pfid >> 1)
-#define BP_VN(bp)			(BP_E1HVN(bp)) /*remove when approved*/
-#define BP_L_ID(bp)			(BP_E1HVN(bp) << 2)
-#define BP_FW_MB_IDX(bp)		(BP_PORT(bp) +\
-	  BP_VN(bp) * ((CHIP_IS_E1x(bp) || (CHIP_MODE_IS_4_PORT(bp))) ? 2  : 1))
+#define BP_VN(bp)			((bp)->pfid >> 1)
+#define BP_MAX_VN_NUM(bp)		(CHIP_MODE_IS_4_PORT(bp) ? 2 : 4)
+#define BP_L_ID(bp)			(BP_VN(bp) << 2)
+#define BP_FW_MB_IDX_VN(bp, vn)		(BP_PORT(bp) +\
+	  (vn) * ((CHIP_IS_E1x(bp) || (CHIP_MODE_IS_4_PORT(bp))) ? 2  : 1))
+#define BP_FW_MB_IDX(bp)		BP_FW_MB_IDX_VN(bp, BP_VN(bp))
 
 	struct net_device	*dev;
 	struct pci_dev		*pdev;
@@ -1833,7 +1834,7 @@ static inline u32 reg_poll(struct bnx2x *bp, u32 reg, u32 expected, int ms,
 
 #define MAX_DMAE_C_PER_PORT		8
 #define INIT_DMAE_C(bp)			(BP_PORT(bp) * MAX_DMAE_C_PER_PORT + \
-					 BP_E1HVN(bp))
+					 BP_VN(bp))
 #define PMF_DMAE_C(bp)			(BP_PORT(bp) * MAX_DMAE_C_PER_PORT + \
 					 E1HVN_MAX)
 
@@ -1859,7 +1860,7 @@ static inline u32 reg_poll(struct bnx2x *bp, u32 reg, u32 expected, int ms,
 
 /* must be used on a CID before placing it on a HW ring */
 #define HW_CID(bp, x)			((BP_PORT(bp) << 23) | \
-					 (BP_E1HVN(bp) << BNX2X_SWCID_SHIFT) | \
+					 (BP_VN(bp) << BNX2X_SWCID_SHIFT) | \
 					 (x))
 
 #define SP_DESC_CNT		(BCM_PAGE_SIZE / sizeof(struct eth_spe))
diff --git a/drivers/net/bnx2x/bnx2x_main.c b/drivers/net/bnx2x/bnx2x_main.c
index 3f93e86..9633e9b 100644
--- a/drivers/net/bnx2x/bnx2x_main.c
+++ b/drivers/net/bnx2x/bnx2x_main.c
@@ -407,8 +407,8 @@ u32 bnx2x_dmae_opcode(struct bnx2x *bp, u8 src_type, u8 dst_type,
 	opcode |= (DMAE_CMD_SRC_RESET | DMAE_CMD_DST_RESET);
 
 	opcode |= (BP_PORT(bp) ? DMAE_CMD_PORT_1 : DMAE_CMD_PORT_0);
-	opcode |= ((BP_E1HVN(bp) << DMAE_CMD_E1HVN_SHIFT) |
-		   (BP_E1HVN(bp) << DMAE_COMMAND_DST_VN_SHIFT));
+	opcode |= ((BP_VN(bp) << DMAE_CMD_E1HVN_SHIFT) |
+		   (BP_VN(bp) << DMAE_COMMAND_DST_VN_SHIFT));
 	opcode |= (DMAE_COM_SET_ERR << DMAE_COMMAND_ERR_POLICY_SHIFT);
 
 #ifdef __BIG_ENDIAN
@@ -1419,7 +1419,7 @@ static void bnx2x_hc_int_enable(struct bnx2x *bp)
 	if (!CHIP_IS_E1(bp)) {
 		/* init leading/trailing edge */
 		if (IS_MF(bp)) {
-			val = (0xee0f | (1 << (BP_E1HVN(bp) + 4)));
+			val = (0xee0f | (1 << (BP_VN(bp) + 4)));
 			if (bp->port.pmf)
 				/* enable nig and gpio3 attention */
 				val |= 0x1100;
@@ -1471,7 +1471,7 @@ static void bnx2x_igu_int_enable(struct bnx2x *bp)
 
 	/* init leading/trailing edge */
 	if (IS_MF(bp)) {
-		val = (0xee0f | (1 << (BP_E1HVN(bp) + 4)));
+		val = (0xee0f | (1 << (BP_VN(bp) + 4)));
 		if (bp->port.pmf)
 			/* enable nig and gpio3 attention */
 			val |= 0x1100;
@@ -2287,7 +2287,7 @@ static void bnx2x_calc_vn_weight_sum(struct bnx2x *bp)
 	int vn;
 
 	bp->vn_weight_sum = 0;
-	for (vn = VN_0; vn < E1HVN_MAX; vn++) {
+	for (vn = VN_0; vn < BP_MAX_VN_NUM(bp); vn++) {
 		u32 vn_cfg = bp->mf_config[vn];
 		u32 vn_min_rate = ((vn_cfg & FUNC_MF_CFG_MIN_BW_MASK) >>
 				   FUNC_MF_CFG_MIN_BW_SHIFT) * 100;
@@ -2320,12 +2320,18 @@ static void bnx2x_calc_vn_weight_sum(struct bnx2x *bp)
 					CMNG_FLAGS_PER_PORT_FAIRNESS_VN;
 }
 
+/* returns func by VN for current port */
+static inline int func_by_vn(struct bnx2x *bp, int vn)
+{
+	return 2 * vn + BP_PORT(bp);
+}
+
 static void bnx2x_init_vn_minmax(struct bnx2x *bp, int vn)
 {
 	struct rate_shaping_vars_per_vn m_rs_vn;
 	struct fairness_vars_per_vn m_fair_vn;
 	u32 vn_cfg = bp->mf_config[vn];
-	int func = 2*vn + BP_PORT(bp);
+	int func = func_by_vn(bp, vn);
 	u16 vn_min_rate, vn_max_rate;
 	int i;
 
@@ -2422,7 +2428,7 @@ void bnx2x_read_mf_cfg(struct bnx2x *bp)
 	 *
 	 *      and there are 2 functions per port
 	 */
-	for (vn = VN_0; vn < E1HVN_MAX; vn++) {
+	for (vn = VN_0; vn < BP_MAX_VN_NUM(bp); vn++) {
 		int /*abs*/func = n * (2 * vn + BP_PORT(bp)) + BP_PATH(bp);
 
 		if (func >= E1H_FUNC_MAX)
@@ -2454,7 +2460,7 @@ static void bnx2x_cmng_fns_init(struct bnx2x *bp, u8 read_cfg, u8 cmng_type)
 
 		/* calculate and set min-max rate for each vn */
 		if (bp->port.pmf)
-			for (vn = VN_0; vn < E1HVN_MAX; vn++)
+			for (vn = VN_0; vn < BP_MAX_VN_NUM(bp); vn++)
 				bnx2x_init_vn_minmax(bp, vn);
 
 		/* always enable rate shaping and fairness */
@@ -2473,16 +2479,15 @@ static void bnx2x_cmng_fns_init(struct bnx2x *bp, u8 read_cfg, u8 cmng_type)
 
 static inline void bnx2x_link_sync_notify(struct bnx2x *bp)
 {
-	int port = BP_PORT(bp);
 	int func;
 	int vn;
 
 	/* Set the attention towards other drivers on the same port */
-	for (vn = VN_0; vn < E1HVN_MAX; vn++) {
-		if (vn == BP_E1HVN(bp))
+	for (vn = VN_0; vn < BP_MAX_VN_NUM(bp); vn++) {
+		if (vn == BP_VN(bp))
 			continue;
 
-		func = ((vn << 1) | port);
+		func = func_by_vn(bp, vn);
 		REG_WR(bp, MISC_REG_AEU_GENERAL_ATTN_0 +
 		       (LINK_SYNC_ATTENTION_BIT_FUNC_0 + func)*4, 1);
 	}
@@ -2577,7 +2582,7 @@ static void bnx2x_pmf_update(struct bnx2x *bp)
 	bnx2x_dcbx_pmf_update(bp);
 
 	/* enable nig attention */
-	val = (0xff0f | (1 << (BP_E1HVN(bp) + 4)));
+	val = (0xff0f | (1 << (BP_VN(bp) + 4)));
 	if (bp->common.int_block == INT_BLOCK_HC) {
 		REG_WR(bp, HC_REG_TRAILING_EDGE_0 + port*8, val);
 		REG_WR(bp, HC_REG_LEADING_EDGE_0 + port*8, val);
@@ -6686,12 +6691,16 @@ static int bnx2x_init_hw_func(struct bnx2x *bp)
 			if (CHIP_MODE_IS_4_PORT(bp))
 				dsb_idx = BP_FUNC(bp);
 			else
-				dsb_idx = BP_E1HVN(bp);
+				dsb_idx = BP_VN(bp);
 
 			prod_offset = (CHIP_INT_MODE_IS_BC(bp) ?
 				       IGU_BC_BASE_DSB_PROD + dsb_idx :
 				       IGU_NORM_BASE_DSB_PROD + dsb_idx);
 
+			/*
+			 * igu prods come in chunks of E1HVN_MAX (4) -
+			 * does not matters what is the current chip mode
+			 */
 			for (i = 0; i < (num_segs * E1HVN_MAX);
 			     i += E1HVN_MAX) {
 				addr = IGU_REG_PROD_CONS_MEMORY +
@@ -7585,7 +7594,7 @@ u32 bnx2x_send_unload_req(struct bnx2x *bp, int unload_mode)
 		u32 val;
 		/* The mac address is written to entries 1-4 to
 		   preserve entry 0 which is used by the PMF */
-		u8 entry = (BP_E1HVN(bp) + 1)*8;
+		u8 entry = (BP_VN(bp) + 1)*8;
 
 		val = (mac_addr[0] << 8) | mac_addr[1];
 		EMAC_WR(bp, EMAC_REG_EMAC_MAC_MATCH + entry, val);
@@ -8792,13 +8801,13 @@ static void __devinit bnx2x_get_common_hwinfo(struct bnx2x *bp)
 static void __devinit bnx2x_get_igu_cam_info(struct bnx2x *bp)
 {
 	int pfid = BP_FUNC(bp);
-	int vn = BP_E1HVN(bp);
 	int igu_sb_id;
 	u32 val;
 	u8 fid, igu_sb_cnt = 0;
 
 	bp->igu_base_sb = 0xff;
 	if (CHIP_INT_MODE_IS_BC(bp)) {
+		int vn = BP_VN(bp);
 		igu_sb_cnt = bp->igu_sb_cnt;
 		bp->igu_base_sb = (CHIP_MODE_IS_4_PORT(bp) ? pfid : vn) *
 			FP_SB_MAX_E1x;
@@ -9488,7 +9497,7 @@ static int __devinit bnx2x_get_hwinfo(struct bnx2x *bp)
 
 	bp->mf_ov = 0;
 	bp->mf_mode = 0;
-	vn = BP_E1HVN(bp);
+	vn = BP_VN(bp);
 
 	if (!CHIP_IS_E1(bp) && !BP_NOMCP(bp)) {
 		BNX2X_DEV_INFO("shmem2base 0x%x, size %d, mfcfg offset %d\n",
diff --git a/drivers/net/bnx2x/bnx2x_stats.c b/drivers/net/bnx2x/bnx2x_stats.c
index f5d9b42..9908f2b 100644
--- a/drivers/net/bnx2x/bnx2x_stats.c
+++ b/drivers/net/bnx2x/bnx2x_stats.c
@@ -1392,7 +1392,7 @@ static void bnx2x_port_stats_base_init(struct bnx2x *bp)
 
 static void bnx2x_func_stats_base_init(struct bnx2x *bp)
 {
-	int vn, vn_max = IS_MF(bp) ? E1HVN_MAX : E1VN_MAX;
+	int vn, vn_max = IS_MF(bp) ? BP_MAX_VN_NUM(bp) : E1VN_MAX;
 	u32 func_stx;
 
 	/* sanity */
@@ -1405,7 +1405,7 @@ static void bnx2x_func_stats_base_init(struct bnx2x *bp)
 	func_stx = bp->func_stx;
 
 	for (vn = VN_0; vn < vn_max; vn++) {
-		int mb_idx = CHIP_IS_E1x(bp) ? 2*vn + BP_PORT(bp) : vn;
+		int mb_idx = BP_FW_MB_IDX_VN(bp, vn);
 
 		bp->func_stx = SHMEM_RD(bp, func_mb[mb_idx].fw_mb_param);
 		bnx2x_func_stats_init(bp);
-- 
1.7.2.2

^ permalink raw reply related

* Your Winning Coupon No.PBL2348974321
From: POWERBALL @ 2011-08-30  9:49 UTC (permalink / raw)


for more infomation regarding the sum of £980,000:00 Pound

^ permalink raw reply

* Re: cls_rsvp.h fix
From: Eric Dumazet @ 2011-08-30 10:20 UTC (permalink / raw)
  To: igorm; +Cc: netdev
In-Reply-To: <258b72c5817cd9a8437533d7908b546e.squirrel@kondor.etf.bg.ac.rs>

Le mardi 30 août 2011 à 11:33 +0200, "Igor Maravić" a écrit :
> File cls_rsvp.h was outdated. I'm sending you fix for this file.
> 
> Best regards
> Igor Maravić

Hi Igor

Please read Documentation/SubmittingPatches to properly format and sign
your patch.

^ permalink raw reply

* Re: [PATCH 10/14] intel: convert to SKB paged frag API.
From: Jeff Kirsher @ 2011-08-30 10:52 UTC (permalink / raw)
  To: Ian Campbell
  Cc: netdev@vger.kernel.org, Brandeburg, Jesse, Allan, Bruce W,
	Wyborny, Carolyn, Skidmore, Donald C, Rose, Gregory V
In-Reply-To: <1314695910-22344-10-git-send-email-ian.campbell@citrix.com>

[-- Attachment #1: Type: text/plain, Size: 1265 bytes --]

On Tue, 2011-08-30 at 02:18 -0700, Ian Campbell wrote:
> Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
> Cc: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
> Cc: Jesse Brandeburg <jesse.brandeburg@intel.com>
> Cc: Bruce Allan <bruce.w.allan@intel.com>
> Cc: Carolyn Wyborny <carolyn.wyborny@intel.com>
> Cc: Don Skidmore <donald.c.skidmore@intel.com>
> Cc: Greg Rose <gregory.v.rose@intel.com>
> Cc: PJ Waskiewicz <peter.p.waskiewicz.jr@intel.com>
> Cc: Alex Duyck <alexander.h.duyck@intel.com>
> Cc: John Ronciak <john.ronciak@intel.com>
> Cc: e1000-devel@lists.sourceforge.net
> Cc: netdev@vger.kernel.org
> ---
>  drivers/net/ethernet/intel/e1000/e1000_main.c     |   16
> +++++++++-------
>  drivers/net/ethernet/intel/e1000e/netdev.c        |    7 +++----
>  drivers/net/ethernet/intel/igb/igb_main.c         |    5 +----
>  drivers/net/ethernet/intel/igbvf/netdev.c         |    5 +----
>  drivers/net/ethernet/intel/ixgb/ixgb_main.c       |    6 +++---
>  drivers/net/ethernet/intel/ixgbe/ixgbe_main.c     |    3 +--
>  drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c |   10 ++++------
>  7 files changed, 22 insertions(+), 30 deletions(-) 

The changes look fine to me.

Acked-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 490 bytes --]

^ permalink raw reply

* AWARD
From: BALL @ 2011-08-30 10:23 UTC (permalink / raw)


for more information regarding the sum of £980,000:00 Pound

^ permalink raw reply

* [PATCH] bridge: netfilter: work around shared nfct struct
From: Florian Westphal @ 2011-08-30 10:57 UTC (permalink / raw)
  To: netfilter-devel; +Cc: netdev, Florian Westphal

When incoking iptables hooks from bridge netfilter, the assumption
that non-confirmed skb->nfct is never shared does no longer hold,
as bridge code clones skbs when e.g. forwarding packets to multiple
bridge ports.

When NFQUEUE is used, we can BUG because nf_nat_setup_info can be
invoked simultaneously for the same conntrack:

[ 3196.798768] kernel BUG at net/ipv4/netfilter/nf_nat_core.c:300!
[..]
[ 3196.798768]  [<ffffffff8120d73e>] ? nf_hook_slow+0x21a/0x282
[ 3196.798768]  [<ffffffffa03207e4>] ? br_handle_frame_finish+0x0/0x13b [bridge]
[ 3196.798768]  [<ffffffffa02a61a5>] ? alloc_null_binding+0x47/0x4c [iptable_nat]
[ 3196.798768]  [<ffffffffa02a64eb>] ? nf_nat_fn+0x193/0x1fb [iptable_nat]
[ 3196.798768]  [<ffffffff8120d4c5>] ? nf_iterate+0x40/0x9f
[ 3196.798768]  [<ffffffff8120d73e>] ? nf_hook_slow+0x21a/0x282
[ 3196.798768]  [<ffffffff81213c94>] ? ip_local_deliver_finish+0x0/0x1f1
[ 3196.798768]  [<ffffffff81213c94>] ? ip_local_deliver_finish+0x0/0x1f1
[ 3196.798768]  [<ffffffff8120d73e>] ? nf_hook_slow+0x21a/0x282
[ 3196.798768]  [<ffffffff8121369c>] ? ip_rcv_finish+0x0/0x340
[ 3196.798768]  [<ffffffff81213ed7>] ? ip_local_deliver+0x52/0x6c
[ 3196.798768]  [<ffffffff812139c2>] ? ip_rcv_finish+0x326/0x340
[ 3196.798768]  [<ffffffff81213c4f>] ? ip_rcv+0x273/0x2b8
[ 3196.798768]  [<ffffffff811f1384>] ? process_backlog+0x8d/0xc6
[ 3196.798768]  [<ffffffff811f2f85>] ? net_rx_action+0xa2/0x1cf
[ 3196.798768]  [<ffffffff8103d3c2>] ? __do_softirq+0x8b/0x10b
[ 3196.798768]  [<ffffffff8100c9dc>] ? call_softirq+0x1c/0x28
[ 3196.798768]  [<ffffffff8100dd15>] ? do_softirq+0x31/0x66
[ 3196.798768]  [<ffffffff8103d267>] ? irq_exit+0x36/0x78
[ 3196.798768]  [<ffffffff8100d41a>] ? do_IRQ+0xa0/0xb6
[ 3196.798768]  [<ffffffff8100c253>] ? ret_from_intr+0x0/0xa
[..]
[ 3196.798768] Code: be 2b 01 00 00 48 c7 c7 e8 cd 29 a0 e8 e8 d7 d9 e0 45 85 ff 49 8b 45 78 75 06 48 c1 e8 07 eb 04 48 c1 e8 08 83 e0 01 85 c0 74 04 <0f> 0b eb fe 49 8d 75 50 48 8d bc 24 80 00 00 00 e8 83 38 f7 ff
[ 3196.798768] RIP  [<ffffffffa029b68f>] nf_nat_setup_info+0x8a/0x564 [nf_nat]
[ 3196.798768]  RSP <ffff880001603bf0>

Fix this by changing ->nfct of all clones to untracked.

This should be OK, because if we do a full copy of ->nfct we'd
end up trying to confirm the same tuples multiple times, which results in
NF_DROP for the cloned skbs.

Also, we only need to do this if the conntrack is unconfirmed.

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 net/bridge/br_netfilter.c |   34 ++++++++++++++++++++++++++++++++++
 1 files changed, 34 insertions(+), 0 deletions(-)

 I have one alternate patch that changes nf_nat_setup_info
 to detect conflicts by forcing serialization via ct->lock spinlock.

 But it is silly to do this for the sake of bridge netfilter only...

 Any other ideas?
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 3fa1231..7d47f34 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -42,6 +42,10 @@
 #include <linux/sysctl.h>
 #endif
 
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+#include <net/netfilter/nf_conntrack.h>
+#endif
+
 #define skb_origaddr(skb)	 (((struct bridge_skb_cb *) \
 				 (skb->nf_bridge->data))->daddr.ipv4)
 #define store_orig_dstaddr(skb)	 (skb_origaddr(skb) = ip_hdr(skb)->daddr)
@@ -158,10 +162,40 @@ static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb)
 	return skb->nf_bridge;
 }
 
+
+/* conntrack assumes exclusive ownership of skb->nfct
+ * if conntrack has not yet been confirmed.
+ *
+ * Without this, we may BUG because we might try to set up
+ * NAT bindings for the same conntrack struct simultaneously.
+ *
+ * Work around this by forcing untracked state.
+ */
+static inline void nf_bridge_unshare_nfct(struct sk_buff *skb)
+{
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+	struct nf_conn *ct, *ct_orig = (void *) skb->nfct;
+
+	if (!ct_orig || nf_ct_is_untracked(ct_orig))
+		return;
+
+	if (likely(nf_ct_is_confirmed(ct_orig)) ||
+	    atomic_read(&ct_orig->ct_general.use) == 1)
+		return;
+
+	ct = nf_ct_untracked_get();
+	atomic_inc(&ct->ct_general.use);
+	nf_conntrack_put(skb->nfct);
+	skb->nfct = &ct->ct_general;
+#endif
+}
+
 static inline struct nf_bridge_info *nf_bridge_unshare(struct sk_buff *skb)
 {
 	struct nf_bridge_info *nf_bridge = skb->nf_bridge;
 
+	nf_bridge_unshare_nfct(skb);
+
 	if (atomic_read(&nf_bridge->use) > 1) {
 		struct nf_bridge_info *tmp = nf_bridge_alloc(skb);
 
-- 
1.7.3.4


^ permalink raw reply related

* Re: [PATCH 05/24] batman-adv: Remove unnecessary OOM logging messages
From: Marek Lindner @ 2011-08-30 10:58 UTC (permalink / raw)
  To: b.a.t.m.a.n-ZwoEplunGu2X36UT3dwllkB+6BGkLq7r
  Cc: Joe Perches, netdev-u79uwXL29TY76Z2rM5mHXA, David S. Miller,
	Simon Wunderlich, linux-kernel-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <c469b9fb57fb75547e3e9092ebfdb3a79480b28f.1314650069.git.joe-6d6DIl74uiNBDgjK7y7TUQ@public.gmane.org>

On Monday, August 29, 2011 23:17:24 Joe Perches wrote:
> Removing unnecessary messages saves code and text.
> 
> Site specific OOM messages are duplications of a generic MM
> out of memory message and aren't really useful, so just
> delete them.

Applied in our tree.

Thanks,
Marek

^ permalink raw reply

* [PATCH] cls_rsvp.h was outdated
From: "Igor Maravić" @ 2011-08-30 11:11 UTC (permalink / raw)
  To: netdev; +Cc: linux-kernel

File cls_rsvp.h in /net/sched was outdated. I'm sending you patch for this
file.
Patch was done against Linux 2.6.38-8

Signed-off-by: Igor Maravić <igorm@etf.rs>

--- linux-2.6.38.8/net/sched/cls_rsvp.h.orig	2011-08-30 12:46:42.663443918
+0200
+++ linux-2.6.38.8/net/sched/cls_rsvp.h	2011-08-30 10:06:01.000000000 +0200
@@ -425,7 +425,7 @@ static int rsvp_change(struct tcf_proto
 	struct rsvp_filter *f, **fp;
 	struct rsvp_session *s, **sp;
 	struct tc_rsvp_pinfo *pinfo = NULL;
-	struct nlattr *opt = tca[TCA_OPTIONS-1];
+	struct nlattr *opt = tca[TCA_OPTIONS];
 	struct nlattr *tb[TCA_RSVP_MAX + 1];
 	struct tcf_exts e;
 	unsigned h1, h2;
@@ -439,7 +439,7 @@ static int rsvp_change(struct tcf_proto
 	if (err < 0)
 		return err;

-	err = tcf_exts_validate(tp, tb, tca[TCA_RATE-1], &e, &rsvp_ext_map);
+	err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &rsvp_ext_map);
 	if (err < 0)
 		return err;

@@ -448,8 +448,8 @@ static int rsvp_change(struct tcf_proto

 		if (f->handle != handle && handle)
 			goto errout2;
-		if (tb[TCA_RSVP_CLASSID-1]) {
-			f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID-1]);
+		if (tb[TCA_RSVP_CLASSID]) {
+			f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);
 			tcf_bind_filter(tp, &f->res, base);
 		}

@@ -461,7 +461,7 @@ static int rsvp_change(struct tcf_proto
 	err = -EINVAL;
 	if (handle)
 		goto errout2;
-	if (tb[TCA_RSVP_DST-1] == NULL)
+	if (tb[TCA_RSVP_DST] == NULL)
 		goto errout2;

 	err = -ENOBUFS;
@@ -470,19 +470,19 @@ static int rsvp_change(struct tcf_proto
 		goto errout2;

 	h2 = 16;
-	if (tb[TCA_RSVP_SRC-1]) {
-		memcpy(f->src, nla_data(tb[TCA_RSVP_SRC-1]), sizeof(f->src));
+	if (tb[TCA_RSVP_SRC]) {
+		memcpy(f->src, nla_data(tb[TCA_RSVP_SRC]), sizeof(f->src));
 		h2 = hash_src(f->src);
 	}
-	if (tb[TCA_RSVP_PINFO-1]) {
-		pinfo = nla_data(tb[TCA_RSVP_PINFO-1]);
+	if (tb[TCA_RSVP_PINFO]) {
+		pinfo = nla_data(tb[TCA_RSVP_PINFO]);
 		f->spi = pinfo->spi;
 		f->tunnelhdr = pinfo->tunnelhdr;
 	}
-	if (tb[TCA_RSVP_CLASSID-1])
-		f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID-1]);
+	if (tb[TCA_RSVP_CLASSID])
+		f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);

-	dst = nla_data(tb[TCA_RSVP_DST-1]);
+	dst = nla_data(tb[TCA_RSVP_DST]);
 	h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid
: 0);

 	err = -ENOMEM;
@@ -642,7 +642,7 @@ nla_put_failure:
 }

 static struct tcf_proto_ops RSVP_OPS = {
-	.next		=	NULL,
+	//.next		=	NULL,
 	.kind		=	RSVP_ID,
 	.classify	=	rsvp_classify,
 	.init		=	rsvp_init,

^ permalink raw reply

* Re: [PATCH] MAINTAINERS: Update ATLX driver maintainers
From: Ian Campbell @ 2011-08-30 11:31 UTC (permalink / raw)
  To: Franco Fichtner
  Cc: netdev@vger.kernel.org, Jay Cliburn, Chris Snook, Jie Yang,
	Andrew Morton, Joe Perches
In-Reply-To: <4E5CC8C5.2080705@lastsummer.de>

On Tue, 2011-08-30 at 12:25 +0100, Franco Fichtner wrote:
> Hi Ian,
> 
> On 08/30/2011 11:34 AM, Ian Campbell wrote:
> > jie.yang@atheros.com bounces and I get a 550 "Unknown address error". Perhaps
> > they have moved on?
> 
> Atheros is now part of Qualcomm. There is a patch by Luis floating
> around fixing the MAINTAINERS file properly, but it hasn't been applied
> yet. No need to apply this one here.

OK thanks Franco.

Ian.

^ permalink raw reply

* Re: [PATCH] cls_rsvp.h was outdated
From: Eric Dumazet @ 2011-08-30 11:38 UTC (permalink / raw)
  To: igorm; +Cc: netdev, linux-kernel
In-Reply-To: <0f79c88be72b75d0526d3f3c2ebf826b.squirrel@kondor.etf.bg.ac.rs>

Le mardi 30 août 2011 à 13:11 +0200, "Igor Maravić" a écrit :
> File cls_rsvp.h in /net/sched was outdated. I'm sending you patch for this
> file.
> Patch was done against Linux 2.6.38-8
> 
> Signed-off-by: Igor Maravić <igorm@etf.rs>
> 
> --- linux-2.6.38.8/net/sched/cls_rsvp.h.orig	2011-08-30 12:46:42.663443918
> +0200

Two last points :

1) Your mail client added line wraps, please take a look at
Documentation/email-clients.txt


> 
>  static struct tcf_proto_ops RSVP_OPS = {
> -	.next		=	NULL,
> +	//.next		=	NULL,
>  	.kind		=	RSVP_ID,
>  	.classify	=	rsvp_classify,
>  	.init		=	rsvp_init,
> 

2) Dont add // comments, just remove the line.

You also could add __read_mostly here :

static struct tcf_proto_ops RSVP_OPS __read_mostly = {


Thanks

^ permalink raw reply

* Re: [PATCH] MAINTAINERS: Update ATLX driver maintainers
From: Franco Fichtner @ 2011-08-30 11:25 UTC (permalink / raw)
  To: Ian Campbell
  Cc: netdev, Jay Cliburn, Chris Snook, Jie Yang, Andrew Morton,
	Joe Perches
In-Reply-To: <1314696887-22518-1-git-send-email-ian.campbell@citrix.com>

Hi Ian,

On 08/30/2011 11:34 AM, Ian Campbell wrote:
> jie.yang@atheros.com bounces and I get a 550 "Unknown address error". Perhaps
> they have moved on?

Atheros is now part of Qualcomm. There is a patch by Luis floating
around fixing the MAINTAINERS file properly, but it hasn't been applied
yet. No need to apply this one here.


Franco

^ permalink raw reply

* RE: [PATCH] MAINTAINERS: Update Cisco VIC driver maintainers
From: David Wang (dwang2) @ 2011-08-30 11:42 UTC (permalink / raw)
  To: Ian Campbell, netdev
  Cc: Christian Benvenuti (benve), Roopa Prabhu (roprabhu),
	Andrew Morton, Joe Perches
In-Reply-To: <1314697269-22594-1-git-send-email-ian.campbell@citrix.com>

Ian,

Vasanthy is no longer with Cisco; we will be removing her from any
future submissions.

Regards,

- Dave 

> -----Original Message-----
> From: Ian Campbell [mailto:ian.campbell@citrix.com] 
> Sent: Tuesday, August 30, 2011 2:41 AM
> To: netdev@vger.kernel.org
> Cc: Ian Campbell; Christian Benvenuti (benve); Roopa Prabhu 
> (roprabhu); David Wang (dwang2); Andrew Morton; Joe Perches
> Subject: [PATCH] MAINTAINERS: Update Cisco VIC driver maintainers
> 
> vkolluri@cisco.com bounces and I get "Unknown address error 550".
> 
> Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
> Cc: Christian Benvenuti <benve@cisco.com>
> Cc: Roopa Prabhu <roprabhu@cisco.com>
> Cc: David Wang <dwang2@cisco.com>
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Cc: Joe Perches <joe@perches.com>
> Cc: netdev@vger.kernel.org
> ---
>  MAINTAINERS |    1 -
>  1 files changed, 0 insertions(+), 1 deletions(-)
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index cb6ad5f..a5e0b11 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -1757,7 +1757,6 @@ F:	Documentation/zh_CN/
>  
>  CISCO VIC ETHERNET NIC DRIVER
>  M:	Christian Benvenuti <benve@cisco.com>
> -M:	Vasanthy Kolluri <vkolluri@cisco.com>
>  M:	Roopa Prabhu <roprabhu@cisco.com>
>  M:	David Wang <dwang2@cisco.com>
>  S:	Supported
> --
> 1.7.2.5
> 
> 

^ permalink raw reply

* [PATCH] cls_rsvp.h was outdated
From: "Igor Maravić" @ 2011-08-30 12:12 UTC (permalink / raw)
  To: netdev; +Cc: linux-kernel, eric.dumazet

File cls_rsvp.h in /net/sched was outdated. I'm sending you patch for this
file.
Patch was done against Linux 2.6.38-8

Signed-off-by: Igor Maravić <igorm@etf.rs>

---

--- linux-2.6.38.8/net/sched/cls_rsvp.h.orig	2011-08-30 12:46:42.663443918
+0200
+++ linux-2.6.38.8/net/sched/cls_rsvp.h	2011-08-30 13:45:02.135445119 +0200
@@ -425,7 +425,7 @@ static int rsvp_change(struct tcf_proto
 	struct rsvp_filter *f, **fp;
 	struct rsvp_session *s, **sp;
 	struct tc_rsvp_pinfo *pinfo = NULL;
-	struct nlattr *opt = tca[TCA_OPTIONS-1];
+	struct nlattr *opt = tca[TCA_OPTIONS];
 	struct nlattr *tb[TCA_RSVP_MAX + 1];
 	struct tcf_exts e;
 	unsigned h1, h2;
@@ -439,7 +439,7 @@ static int rsvp_change(struct tcf_proto
 	if (err < 0)
 		return err;

-	err = tcf_exts_validate(tp, tb, tca[TCA_RATE-1], &e, &rsvp_ext_map);
+	err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &rsvp_ext_map);
 	if (err < 0)
 		return err;

@@ -448,8 +448,8 @@ static int rsvp_change(struct tcf_proto

 		if (f->handle != handle && handle)
 			goto errout2;
-		if (tb[TCA_RSVP_CLASSID-1]) {
-			f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID-1]);
+		if (tb[TCA_RSVP_CLASSID]) {
+			f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);
 			tcf_bind_filter(tp, &f->res, base);
 		}

@@ -461,7 +461,7 @@ static int rsvp_change(struct tcf_proto
 	err = -EINVAL;
 	if (handle)
 		goto errout2;
-	if (tb[TCA_RSVP_DST-1] == NULL)
+	if (tb[TCA_RSVP_DST] == NULL)
 		goto errout2;

 	err = -ENOBUFS;
@@ -470,19 +470,19 @@ static int rsvp_change(struct tcf_proto
 		goto errout2;

 	h2 = 16;
-	if (tb[TCA_RSVP_SRC-1]) {
-		memcpy(f->src, nla_data(tb[TCA_RSVP_SRC-1]), sizeof(f->src));
+	if (tb[TCA_RSVP_SRC]) {
+		memcpy(f->src, nla_data(tb[TCA_RSVP_SRC]), sizeof(f->src));
 		h2 = hash_src(f->src);
 	}
-	if (tb[TCA_RSVP_PINFO-1]) {
-		pinfo = nla_data(tb[TCA_RSVP_PINFO-1]);
+	if (tb[TCA_RSVP_PINFO]) {
+		pinfo = nla_data(tb[TCA_RSVP_PINFO]);
 		f->spi = pinfo->spi;
 		f->tunnelhdr = pinfo->tunnelhdr;
 	}
-	if (tb[TCA_RSVP_CLASSID-1])
-		f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID-1]);
+	if (tb[TCA_RSVP_CLASSID])
+		f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);

-	dst = nla_data(tb[TCA_RSVP_DST-1]);
+	dst = nla_data(tb[TCA_RSVP_DST]);
 	h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid
: 0);

 	err = -ENOMEM;
@@ -641,8 +641,7 @@ nla_put_failure:
 	return -1;
 }

-static struct tcf_proto_ops RSVP_OPS = {
-	.next		=	NULL,
+static struct tcf_proto_ops RSVP_OPS __read_mostly = {
 	.kind		=	RSVP_ID,
 	.classify	=	rsvp_classify,
 	.init		=	rsvp_init,

^ permalink raw reply

* Re: [PATCH] bridge: netfilter: work around shared nfct struct
From: Patrick McHardy @ 2011-08-30 12:43 UTC (permalink / raw)
  To: Florian Westphal; +Cc: netfilter-devel, netdev
In-Reply-To: <1314701827-21702-1-git-send-email-fw@strlen.de>

On 30.08.2011 12:57, Florian Westphal wrote:
> When incoking iptables hooks from bridge netfilter, the assumption
> that non-confirmed skb->nfct is never shared does no longer hold,
> as bridge code clones skbs when e.g. forwarding packets to multiple
> bridge ports.
> 
> When NFQUEUE is used, we can BUG because nf_nat_setup_info can be
> invoked simultaneously for the same conntrack:

I'm wondering how this can happen, when flooding packets to multiple
ports, they are still processed by the same CPU one after another,
so for the second and further packets, nf_nat should notice that
the mappings are already set up.

> [ 3196.798768] kernel BUG at net/ipv4/netfilter/nf_nat_core.c:300!
> [..]
> [ 3196.798768]  [<ffffffff8120d73e>] ? nf_hook_slow+0x21a/0x282
> [ 3196.798768]  [<ffffffffa03207e4>] ? br_handle_frame_finish+0x0/0x13b [bridge]
> [ 3196.798768]  [<ffffffffa02a61a5>] ? alloc_null_binding+0x47/0x4c [iptable_nat]
> [ 3196.798768]  [<ffffffffa02a64eb>] ? nf_nat_fn+0x193/0x1fb [iptable_nat]
> [ 3196.798768]  [<ffffffff8120d4c5>] ? nf_iterate+0x40/0x9f
> [ 3196.798768]  [<ffffffff8120d73e>] ? nf_hook_slow+0x21a/0x282
> [ 3196.798768]  [<ffffffff81213c94>] ? ip_local_deliver_finish+0x0/0x1f1
> [ 3196.798768]  [<ffffffff81213c94>] ? ip_local_deliver_finish+0x0/0x1f1
> [ 3196.798768]  [<ffffffff8120d73e>] ? nf_hook_slow+0x21a/0x282
> [ 3196.798768]  [<ffffffff8121369c>] ? ip_rcv_finish+0x0/0x340
> [ 3196.798768]  [<ffffffff81213ed7>] ? ip_local_deliver+0x52/0x6c
> [ 3196.798768]  [<ffffffff812139c2>] ? ip_rcv_finish+0x326/0x340
> [ 3196.798768]  [<ffffffff81213c4f>] ? ip_rcv+0x273/0x2b8
> [ 3196.798768]  [<ffffffff811f1384>] ? process_backlog+0x8d/0xc6
> [ 3196.798768]  [<ffffffff811f2f85>] ? net_rx_action+0xa2/0x1cf
> [ 3196.798768]  [<ffffffff8103d3c2>] ? __do_softirq+0x8b/0x10b
> [ 3196.798768]  [<ffffffff8100c9dc>] ? call_softirq+0x1c/0x28
> [ 3196.798768]  [<ffffffff8100dd15>] ? do_softirq+0x31/0x66
> [ 3196.798768]  [<ffffffff8103d267>] ? irq_exit+0x36/0x78
> [ 3196.798768]  [<ffffffff8100d41a>] ? do_IRQ+0xa0/0xb6
> [ 3196.798768]  [<ffffffff8100c253>] ? ret_from_intr+0x0/0xa
> [..]
> [ 3196.798768] Code: be 2b 01 00 00 48 c7 c7 e8 cd 29 a0 e8 e8 d7 d9 e0 45 85 ff 49 8b 45 78 75 06 48 c1 e8 07 eb 04 48 c1 e8 08 83 e0 01 85 c0 74 04 <0f> 0b eb fe 49 8d 75 50 48 8d bc 24 80 00 00 00 e8 83 38 f7 ff
> [ 3196.798768] RIP  [<ffffffffa029b68f>] nf_nat_setup_info+0x8a/0x564 [nf_nat]
> [ 3196.798768]  RSP <ffff880001603bf0>
> 
> Fix this by changing ->nfct of all clones to untracked.
> 
> This should be OK, because if we do a full copy of ->nfct we'd
> end up trying to confirm the same tuples multiple times, which results in
> NF_DROP for the cloned skbs.
> 
> Also, we only need to do this if the conntrack is unconfirmed.
> 
> Signed-off-by: Florian Westphal <fw@strlen.de>
> ---
>  net/bridge/br_netfilter.c |   34 ++++++++++++++++++++++++++++++++++
>  1 files changed, 34 insertions(+), 0 deletions(-)
> 
>  I have one alternate patch that changes nf_nat_setup_info
>  to detect conflicts by forcing serialization via ct->lock spinlock.
> 
>  But it is silly to do this for the sake of bridge netfilter only...
> 
>  Any other ideas?
> diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
> index 3fa1231..7d47f34 100644
> --- a/net/bridge/br_netfilter.c
> +++ b/net/bridge/br_netfilter.c
> @@ -42,6 +42,10 @@
>  #include <linux/sysctl.h>
>  #endif
>  
> +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
> +#include <net/netfilter/nf_conntrack.h>
> +#endif
> +
>  #define skb_origaddr(skb)	 (((struct bridge_skb_cb *) \
>  				 (skb->nf_bridge->data))->daddr.ipv4)
>  #define store_orig_dstaddr(skb)	 (skb_origaddr(skb) = ip_hdr(skb)->daddr)
> @@ -158,10 +162,40 @@ static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb)
>  	return skb->nf_bridge;
>  }
>  
> +
> +/* conntrack assumes exclusive ownership of skb->nfct
> + * if conntrack has not yet been confirmed.
> + *
> + * Without this, we may BUG because we might try to set up
> + * NAT bindings for the same conntrack struct simultaneously.
> + *
> + * Work around this by forcing untracked state.
> + */
> +static inline void nf_bridge_unshare_nfct(struct sk_buff *skb)
> +{
> +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
> +	struct nf_conn *ct, *ct_orig = (void *) skb->nfct;
> +
> +	if (!ct_orig || nf_ct_is_untracked(ct_orig))
> +		return;
> +
> +	if (likely(nf_ct_is_confirmed(ct_orig)) ||
> +	    atomic_read(&ct_orig->ct_general.use) == 1)
> +		return;
> +
> +	ct = nf_ct_untracked_get();

This will introduce a module dependency on nf_conntrack, which we really
shouldn't be doing.

> +	atomic_inc(&ct->ct_general.use);
> +	nf_conntrack_put(skb->nfct);
> +	skb->nfct = &ct->ct_general;
> +#endif

^ permalink raw reply

* Re: [PATCH 06/24] netfilter: Remove unnecessary OOM logging messages
From: Patrick McHardy @ 2011-08-30 12:46 UTC (permalink / raw)
  To: Joe Perches
  Cc: Bart De Schuymer, Wensong Zhang, Simon Horman, Julian Anastasov,
	Stephen Hemminger, David S. Miller, Alexey Kuznetsov,
	James Morris, Hideaki YOSHIFUJI, netfilter-devel, netfilter,
	coreteam, bridge, netdev, linux-kernel, lvs-devel
In-Reply-To: <13c1c12486cae409dfa5254b1435e660f2b17e05.1314650069.git.joe@perches.com>

On 29.08.2011 23:17, Joe Perches wrote:
> Removing unnecessary messages saves code and text.
> 
> Site specific OOM messages are duplications of a generic MM
> out of memory message and aren't really useful, so just
> delete them.

Looks good to me. Do you want me to apply this patch or are you
intending to have the entire series go through Dave?

^ permalink raw reply

* Re: [PATCH] bridge: netfilter: work around shared nfct struct
From: Florian Westphal @ 2011-08-30 12:54 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: Florian Westphal, netfilter-devel, netdev
In-Reply-To: <4E5CDADC.7000902@trash.net>

Patrick McHardy <kaber@trash.net> wrote:
> On 30.08.2011 12:57, Florian Westphal wrote:
> > When incoking iptables hooks from bridge netfilter, the assumption
> > that non-confirmed skb->nfct is never shared does no longer hold,
> > as bridge code clones skbs when e.g. forwarding packets to multiple
> > bridge ports.
> > 
> > When NFQUEUE is used, we can BUG because nf_nat_setup_info can be
> > invoked simultaneously for the same conntrack:
> 
> I'm wondering how this can happen, when flooding packets to multiple
> ports, they are still processed by the same CPU one after another,
> so for the second and further packets, nf_nat should notice that
> the mappings are already set up.

Main problem is that we end up with same ->nfct in both
INPUT and POSTROUTING (br_pass_frame_up vs. br_forward).

its extremely unlikely but reproduceable with something like
hping2 -i u1200 -2 -p 138 -d 128 192.168.0.255

(assuming bridge interface has an address within that network).

Also, with recent change nf_reinject can be run in parallel.
(the original problem was observed on 2.6.32.24, but i can
 reproduce it with nf-next, too).

^ permalink raw reply

* 802.1Q VLAN random tag injected when vlan configured on forcedeth interface
From: Ruslan N. Marchenko @ 2011-08-30 12:51 UTC (permalink / raw)
  To: netdev

Hi guys,
I've faced with strange behaviour of 8021q driver: when enabling vlan subinterface on eth interface I'm getting ~50% packetloss due to packets are marked with incorrect tags (and eventually dropped by kernel since no vlans configured for such IDs).
Scenario:
[    0.476950] cpufreq-nforce2: No nForce2 chipset.
[    1.519133] forcedeth: Reverse Engineered nForce ethernet driver. Version 0.64.
[    1.519991] forcedeth 0000:00:0a.0: PCI INT A -> Link[LMAC] -> GSI 22 (level, low) -> IRQ 22
[    1.520037] forcedeth 0000:00:0a.0: setting latency timer to 64
[    1.586526] forcedeth 0000:00:0a.0: ifname eth0, PHY OUI 0x732 @ 3, addr 00:26:18:40:21:61
[    1.586542] forcedeth 0000:00:0a.0: highdma csum pwrctl gbit lnktim msi desc-v3

modprobe 8021q

- network still works properly, packets are comming not marked at all.

ip li add link eth0 name vl6 type vlan id 6

- from this moment massive packetdrop starting to happen, almost half of the *incoming* packets are shown in tcpdump as 
14:15:52.859296 00:13:f7:1e:fe:e4 > 00:26:18:40:21:61, ethertype 802.1Q (0x8100), length 102: vlan 64, p 3, ethertype IPv4, [|ip]
14:15:56.869572 00:13:f7:1e:fe:e4 > 00:26:18:40:21:61, ethertype 802.1Q (0x8100), length 102: vlan 2112, p 7, ethertype IPv4, [|ip]

mostly only these two tags appears (64 & 2112). Moreover this happens as on native vlan level (pure ethernet) so on tagged subinterface (as if qinq double tagging) for properly tagged with ID 6 incomming packets.

I've tried disabling all offloads:

Offload parameters for eth0:
rx-checksumming: off
tx-checksumming: off
scatter-gather: off
tcp-segmentation-offload: off
udp-fragmentation-offload: off
generic-segmentation-offload: off
generic-receive-offload: off
large-receive-offload: off
rx-vlan-offload: off
tx-vlan-offload: off
ntuple-filters: off
receive-hashing: off

- doesn't have any effect.
Once executing 
ip li del vl6 type vlan
misterious tags disappear and everything works smoothly. Don't know who injects that garbage into frames - 8021q or forcedeth driver :(
Any ideas or suggestions to narrow the problem down?

Additional data.
Link level data dump example for broken frame:
12:35:32.175523 00:13:f7:1e:fe:e4 > 00:26:18:40:21:61, ethertype 802.1Q (0x8100), length 102: vlan 2112, p 2, ethertype IPv4, [|ip]
        0x0000:  0026 1840 2161 0013 f71e fee4 8100 4840
        0x0010:  0800 4500 0054 7a12 0000 4001 eb0f
0x0C-0D - TPID: ethertype 802.1Q (0x8100) 
0x0E-0F - TCI (0100100001000000) PCP 010, CFI 0, VID 100001000000/0x840/2112
0x10-11 - ethertype IPv4
normal ping reply follows, which appears untagged in 50% cases with vlan configured and 100% cases without.

Interface is plugged into openwrt box into non-switched (wan) gigabit port with vid 6 subinterface configured.

Regards,
Ruslan

^ permalink raw reply

* Re: [PATCH] bridge: netfilter: work around shared nfct struct
From: Patrick McHardy @ 2011-08-30 13:08 UTC (permalink / raw)
  To: Florian Westphal; +Cc: netfilter-devel, netdev
In-Reply-To: <20110830125453.GC7548@Chamillionaire.breakpoint.cc>

On 30.08.2011 14:54, Florian Westphal wrote:
> Patrick McHardy <kaber@trash.net> wrote:
>> On 30.08.2011 12:57, Florian Westphal wrote:
>>> When incoking iptables hooks from bridge netfilter, the assumption
>>> that non-confirmed skb->nfct is never shared does no longer hold,
>>> as bridge code clones skbs when e.g. forwarding packets to multiple
>>> bridge ports.
>>>
>>> When NFQUEUE is used, we can BUG because nf_nat_setup_info can be
>>> invoked simultaneously for the same conntrack:
>>
>> I'm wondering how this can happen, when flooding packets to multiple
>> ports, they are still processed by the same CPU one after another,
>> so for the second and further packets, nf_nat should notice that
>> the mappings are already set up.
> 
> Main problem is that we end up with same ->nfct in both
> INPUT and POSTROUTING (br_pass_frame_up vs. br_forward).
> 
> its extremely unlikely but reproduceable with something like
> hping2 -i u1200 -2 -p 138 -d 128 192.168.0.255
> 
> (assuming bridge interface has an address within that network).
> 
> Also, with recent change nf_reinject can be run in parallel.
> (the original problem was observed on 2.6.32.24, but i can
>  reproduce it with nf-next, too).

I see. We still need to avoid the module dependency on nf_conntrack
though, so I think this will have to be fixed in nf_nat_fn().

^ permalink raw reply

* [PATCH] net: sh_eth: remove duplicated #include
From: Huang Weiyi @ 2011-08-30 13:09 UTC (permalink / raw)
  To: davem; +Cc: netdev, Huang Weiyi

Remove duplicated #include('s) in
  drivers/net/sh_eth.c

Signed-off-by: Huang Weiyi <weiyi.huang@gmail.com>
---
 drivers/net/sh_eth.c |    1 -
 1 files changed, 0 insertions(+), 1 deletions(-)

diff --git a/drivers/net/sh_eth.c b/drivers/net/sh_eth.c
index 1c1666e..190f619 100644
--- a/drivers/net/sh_eth.c
+++ b/drivers/net/sh_eth.c
@@ -31,7 +31,6 @@
 #include <linux/phy.h>
 #include <linux/cache.h>
 #include <linux/io.h>
-#include <linux/interrupt.h>
 #include <linux/pm_runtime.h>
 #include <linux/slab.h>
 #include <linux/ethtool.h>
-- 
1.6.1.3

^ permalink raw reply related

* [PATCH] cls_rsvp.h was outdated
From: "Igor Maravić" @ 2011-08-30 13:10 UTC (permalink / raw)
  To: netdev; +Cc: linux-kernel

File cls_rsvp.h in /net/sched was outdated. I'm sending you patch for this
file.

Sorry for the word-wrap in previous messages

Signed-off-by: Igor Maravić <igorm@etf.rs>

---

--- linux-2.6.38.8/net/sched/cls_rsvp.h.orig	2011-08-30 12:46:42.663443918
+0200
+++ linux-2.6.38.8/net/sched/cls_rsvp.h	2011-08-30 13:45:02.135445119 +0200
@@ -425,7 +425,7 @@ static int rsvp_change(struct tcf_proto
 	struct rsvp_filter *f, **fp;
 	struct rsvp_session *s, **sp;
 	struct tc_rsvp_pinfo *pinfo = NULL;
-	struct nlattr *opt = tca[TCA_OPTIONS-1];
+	struct nlattr *opt = tca[TCA_OPTIONS];
 	struct nlattr *tb[TCA_RSVP_MAX + 1];
 	struct tcf_exts e;
 	unsigned h1, h2;
@@ -439,7 +439,7 @@ static int rsvp_change(struct tcf_proto
 	if (err < 0)
 		return err;

-	err = tcf_exts_validate(tp, tb, tca[TCA_RATE-1], &e, &rsvp_ext_map);
+	err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &rsvp_ext_map);
 	if (err < 0)
 		return err;

@@ -448,8 +448,8 @@ static int rsvp_change(struct tcf_proto

 		if (f->handle != handle && handle)
 			goto errout2;
-		if (tb[TCA_RSVP_CLASSID-1]) {
-			f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID-1]);
+		if (tb[TCA_RSVP_CLASSID]) {
+			f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);
 			tcf_bind_filter(tp, &f->res, base);
 		}

@@ -461,7 +461,7 @@ static int rsvp_change(struct tcf_proto
 	err = -EINVAL;
 	if (handle)
 		goto errout2;
-	if (tb[TCA_RSVP_DST-1] == NULL)
+	if (tb[TCA_RSVP_DST] == NULL)
 		goto errout2;

 	err = -ENOBUFS;
@@ -470,19 +470,19 @@ static int rsvp_change(struct tcf_proto
 		goto errout2;

 	h2 = 16;
-	if (tb[TCA_RSVP_SRC-1]) {
-		memcpy(f->src, nla_data(tb[TCA_RSVP_SRC-1]), sizeof(f->src));
+	if (tb[TCA_RSVP_SRC]) {
+		memcpy(f->src, nla_data(tb[TCA_RSVP_SRC]), sizeof(f->src));
 		h2 = hash_src(f->src);
 	}
-	if (tb[TCA_RSVP_PINFO-1]) {
-		pinfo = nla_data(tb[TCA_RSVP_PINFO-1]);
+	if (tb[TCA_RSVP_PINFO]) {
+		pinfo = nla_data(tb[TCA_RSVP_PINFO]);
 		f->spi = pinfo->spi;
 		f->tunnelhdr = pinfo->tunnelhdr;
 	}
-	if (tb[TCA_RSVP_CLASSID-1])
-		f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID-1]);
+	if (tb[TCA_RSVP_CLASSID])
+		f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);

-	dst = nla_data(tb[TCA_RSVP_DST-1]);
+	dst = nla_data(tb[TCA_RSVP_DST]);
 	h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid
: 0);

 	err = -ENOMEM;
@@ -641,8 +641,7 @@ nla_put_failure:
 	return -1;
 }

-static struct tcf_proto_ops RSVP_OPS = {
-	.next		=	NULL,
+static struct tcf_proto_ops RSVP_OPS __read_mostly = {
 	.kind		=	RSVP_ID,
 	.classify	=	rsvp_classify,
 	.init		=	rsvp_init,

^ permalink raw reply

* [PATCH] cls_rsvp.h was outdated
From: "Igor Maravić" @ 2011-08-30 13:12 UTC (permalink / raw)
  To: netdev; +Cc: linux-kernel

File cls_rsvp.h in /net/sched was outdated. I'm sending you patch for this
file.

Sorry for the word-wrap in previous messages

Signed-off-by: Igor Maravić <igorm@etf.rs>

---

--- linux-2.6.38.8/net/sched/cls_rsvp.h.orig	2011-08-30 12:46:42.663443918 +0200
+++ linux-2.6.38.8/net/sched/cls_rsvp.h	2011-08-30 13:45:02.135445119 +0200
@@ -425,7 +425,7 @@ static int rsvp_change(struct tcf_proto
 	struct rsvp_filter *f, **fp;
 	struct rsvp_session *s, **sp;
 	struct tc_rsvp_pinfo *pinfo = NULL;
-	struct nlattr *opt = tca[TCA_OPTIONS-1];
+	struct nlattr *opt = tca[TCA_OPTIONS];
 	struct nlattr *tb[TCA_RSVP_MAX + 1];
 	struct tcf_exts e;
 	unsigned h1, h2;
@@ -439,7 +439,7 @@ static int rsvp_change(struct tcf_proto
 	if (err < 0)
 		return err;

-	err = tcf_exts_validate(tp, tb, tca[TCA_RATE-1], &e, &rsvp_ext_map);
+	err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &rsvp_ext_map);
 	if (err < 0)
 		return err;

@@ -448,8 +448,8 @@ static int rsvp_change(struct tcf_proto

 		if (f->handle != handle && handle)
 			goto errout2;
-		if (tb[TCA_RSVP_CLASSID-1]) {
-			f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID-1]);
+		if (tb[TCA_RSVP_CLASSID]) {
+			f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);
 			tcf_bind_filter(tp, &f->res, base);
 		}

@@ -461,7 +461,7 @@ static int rsvp_change(struct tcf_proto
 	err = -EINVAL;
 	if (handle)
 		goto errout2;
-	if (tb[TCA_RSVP_DST-1] == NULL)
+	if (tb[TCA_RSVP_DST] == NULL)
 		goto errout2;

 	err = -ENOBUFS;
@@ -470,19 +470,19 @@ static int rsvp_change(struct tcf_proto
 		goto errout2;

 	h2 = 16;
-	if (tb[TCA_RSVP_SRC-1]) {
-		memcpy(f->src, nla_data(tb[TCA_RSVP_SRC-1]), sizeof(f->src));
+	if (tb[TCA_RSVP_SRC]) {
+		memcpy(f->src, nla_data(tb[TCA_RSVP_SRC]), sizeof(f->src));
 		h2 = hash_src(f->src);
 	}
-	if (tb[TCA_RSVP_PINFO-1]) {
-		pinfo = nla_data(tb[TCA_RSVP_PINFO-1]);
+	if (tb[TCA_RSVP_PINFO]) {
+		pinfo = nla_data(tb[TCA_RSVP_PINFO]);
 		f->spi = pinfo->spi;
 		f->tunnelhdr = pinfo->tunnelhdr;
 	}
-	if (tb[TCA_RSVP_CLASSID-1])
-		f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID-1]);
+	if (tb[TCA_RSVP_CLASSID])
+		f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);

-	dst = nla_data(tb[TCA_RSVP_DST-1]);
+	dst = nla_data(tb[TCA_RSVP_DST]);
 	h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid : 0);

 	err = -ENOMEM;
@@ -641,8 +641,7 @@ nla_put_failure:
 	return -1;
 }

-static struct tcf_proto_ops RSVP_OPS = {
-	.next		=	NULL,
+static struct tcf_proto_ops RSVP_OPS __read_mostly = {
 	.kind		=	RSVP_ID,
 	.classify	=	rsvp_classify,
 	.init		=	rsvp_init,

^ permalink raw reply

* Re: [PATCH] bridge: netfilter: work around shared nfct struct
From: Florian Westphal @ 2011-08-30 13:19 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: Florian Westphal, netfilter-devel, netdev
In-Reply-To: <4E5CE0BD.7040103@trash.net>

Patrick McHardy <kaber@trash.net> wrote:
> On 30.08.2011 14:54, Florian Westphal wrote:
> > Patrick McHardy <kaber@trash.net> wrote:
> >> On 30.08.2011 12:57, Florian Westphal wrote:
> >>> When incoking iptables hooks from bridge netfilter, the assumption
> >>> that non-confirmed skb->nfct is never shared does no longer hold,
> >>> as bridge code clones skbs when e.g. forwarding packets to multiple
> >>> bridge ports.
> >>>
> >>> When NFQUEUE is used, we can BUG because nf_nat_setup_info can be
> >>> invoked simultaneously for the same conntrack:
> >>
> >> I'm wondering how this can happen, when flooding packets to multiple
> >> ports, they are still processed by the same CPU one after another,
> >> so for the second and further packets, nf_nat should notice that
> >> the mappings are already set up.
> > 
> > Main problem is that we end up with same ->nfct in both
> > INPUT and POSTROUTING (br_pass_frame_up vs. br_forward).
> > 
> > its extremely unlikely but reproduceable with something like
> > hping2 -i u1200 -2 -p 138 -d 128 192.168.0.255
> > 
> > (assuming bridge interface has an address within that network).
> > 
> > Also, with recent change nf_reinject can be run in parallel.
> > (the original problem was observed on 2.6.32.24, but i can
> >  reproduce it with nf-next, too).
> 
> I see. We still need to avoid the module dependency on nf_conntrack
> though, so I think this will have to be fixed in nf_nat_fn().

Right, I failed to spot the call to the destroy hook 8-/

I'll submit an alternate patch shortly.

^ permalink raw reply

* [PATCH v2] tcp: Change possible SYN flooding messages
From: Eric Dumazet @ 2011-08-30 13:21 UTC (permalink / raw)
  To: Tom Herbert, David Miller; +Cc: netdev
In-Reply-To: <alpine.DEB.2.00.1108102229130.5341@pokey.mtv.corp.google.com>

"Possible SYN flooding on port xxxx " messages can fill logs on servers.

Change logic to log the message only once per listener, and add two new
SNMP counters to track :

TCPReqQFullDoCookies : number of times a SYNCOOKIE was replied to client

TCPReqQFullDrop : number of times a SYN request was dropped because
syncookies were not enabled.

Based on a prior patch from Tom Herbert, and suggestions from David.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Tom Herbert <therbert@google.com>
---
 include/linux/snmp.h       |    2 +
 include/net/request_sock.h |    3 +-
 include/net/tcp.h          |    3 ++
 net/ipv4/proc.c            |    2 +
 net/ipv4/tcp_ipv4.c        |   49 ++++++++++++++++++++---------------
 net/ipv6/tcp_ipv6.c        |   31 ++--------------------
 6 files changed, 40 insertions(+), 50 deletions(-)

diff --git a/include/linux/snmp.h b/include/linux/snmp.h
index 12b2b18..e16557a 100644
--- a/include/linux/snmp.h
+++ b/include/linux/snmp.h
@@ -231,6 +231,8 @@ enum
 	LINUX_MIB_TCPDEFERACCEPTDROP,
 	LINUX_MIB_IPRPFILTER, /* IP Reverse Path Filter (rp_filter) */
 	LINUX_MIB_TCPTIMEWAITOVERFLOW,		/* TCPTimeWaitOverflow */
+	LINUX_MIB_TCPREQQFULLDOCOOKIES,		/* TCPReqQFullDoCookies */
+	LINUX_MIB_TCPREQQFULLDROP,		/* TCPReqQFullDrop */
 	__LINUX_MIB_MAX
 };
 
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index 99e6e19..4c0766e 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -96,7 +96,8 @@ extern int sysctl_max_syn_backlog;
  */
 struct listen_sock {
 	u8			max_qlen_log;
-	/* 3 bytes hole, try to use */
+	u8			synflood_warned;
+	/* 2 bytes hole, try to use */
 	int			qlen;
 	int			qlen_young;
 	int			clock_hand;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 149a415..e9b48b0 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -460,6 +460,9 @@ extern int tcp_write_wakeup(struct sock *);
 extern void tcp_send_fin(struct sock *sk);
 extern void tcp_send_active_reset(struct sock *sk, gfp_t priority);
 extern int tcp_send_synack(struct sock *);
+extern int tcp_syn_flood_action(struct sock *sk,
+				const struct sk_buff *skb,
+				const char *proto);
 extern void tcp_push_one(struct sock *, unsigned int mss_now);
 extern void tcp_send_ack(struct sock *sk);
 extern void tcp_send_delayed_ack(struct sock *sk);
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index b14ec7d..4bfad5d 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -254,6 +254,8 @@ static const struct snmp_mib snmp4_net_list[] = {
 	SNMP_MIB_ITEM("TCPDeferAcceptDrop", LINUX_MIB_TCPDEFERACCEPTDROP),
 	SNMP_MIB_ITEM("IPReversePathFilter", LINUX_MIB_IPRPFILTER),
 	SNMP_MIB_ITEM("TCPTimeWaitOverflow", LINUX_MIB_TCPTIMEWAITOVERFLOW),
+	SNMP_MIB_ITEM("TCPReqQFullDoCookies", LINUX_MIB_TCPREQQFULLDOCOOKIES),
+	SNMP_MIB_ITEM("TCPReqQFullDrop", LINUX_MIB_TCPREQQFULLDROP),
 	SNMP_MIB_SENTINEL
 };
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index b3f2611..c29912c 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -808,20 +808,38 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req)
 	kfree(inet_rsk(req)->opt);
 }
 
-static void syn_flood_warning(const struct sk_buff *skb)
+/*
+ * Return 1 if a syncookie should be sent
+ */
+int tcp_syn_flood_action(struct sock *sk,
+			 const struct sk_buff *skb,
+			 const char *proto)
 {
-	const char *msg;
+	const char *msg = "Dropping request";
+	int want_cookie = 0;
+	struct listen_sock *lopt;
+
+
 
 #ifdef CONFIG_SYN_COOKIES
-	if (sysctl_tcp_syncookies)
+	if (sysctl_tcp_syncookies) {
 		msg = "Sending cookies";
-	else
+		want_cookie = 1;
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
+	} else
 #endif
-		msg = "Dropping request";
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
 
-	pr_info("TCP: Possible SYN flooding on port %d. %s.\n",
-				ntohs(tcp_hdr(skb)->dest), msg);
+	lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
+	if (!lopt->synflood_warned) {
+		lopt->synflood_warned = 1;
+		pr_info("%s: Possible SYN flooding on port %d. %s. "
+			" Check SNMP counters.\n",
+			proto, ntohs(tcp_hdr(skb)->dest), msg);
+	}
+	return want_cookie;
 }
+EXPORT_SYMBOL(tcp_syn_flood_action);
 
 /*
  * Save and compile IPv4 options into the request_sock if needed.
@@ -1235,11 +1253,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	__be32 saddr = ip_hdr(skb)->saddr;
 	__be32 daddr = ip_hdr(skb)->daddr;
 	__u32 isn = TCP_SKB_CB(skb)->when;
-#ifdef CONFIG_SYN_COOKIES
 	int want_cookie = 0;
-#else
-#define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */
-#endif
 
 	/* Never answer to SYNs send to broadcast or multicast */
 	if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
@@ -1250,14 +1264,9 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	 * evidently real one.
 	 */
 	if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
-		if (net_ratelimit())
-			syn_flood_warning(skb);
-#ifdef CONFIG_SYN_COOKIES
-		if (sysctl_tcp_syncookies) {
-			want_cookie = 1;
-		} else
-#endif
-		goto drop;
+		want_cookie = tcp_syn_flood_action(sk, skb, "TCP");
+		if (!want_cookie)
+			goto drop;
 	}
 
 	/* Accept backlog is full. If we have already queued enough
@@ -1303,9 +1312,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 		while (l-- > 0)
 			*c++ ^= *hash_location++;
 
-#ifdef CONFIG_SYN_COOKIES
 		want_cookie = 0;	/* not our kind of cookie */
-#endif
 		tmp_ext.cookie_out_never = 0; /* false */
 		tmp_ext.cookie_plus = tmp_opt.cookie_plus;
 	} else if (!tp->rx_opt.cookie_in_always) {
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 44a5859..12bdb9a 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -531,20 +531,6 @@ static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req,
 	return tcp_v6_send_synack(sk, req, rvp);
 }
 
-static inline void syn_flood_warning(struct sk_buff *skb)
-{
-#ifdef CONFIG_SYN_COOKIES
-	if (sysctl_tcp_syncookies)
-		printk(KERN_INFO
-		       "TCPv6: Possible SYN flooding on port %d. "
-		       "Sending cookies.\n", ntohs(tcp_hdr(skb)->dest));
-	else
-#endif
-		printk(KERN_INFO
-		       "TCPv6: Possible SYN flooding on port %d. "
-		       "Dropping request.\n", ntohs(tcp_hdr(skb)->dest));
-}
-
 static void tcp_v6_reqsk_destructor(struct request_sock *req)
 {
 	kfree_skb(inet6_rsk(req)->pktopts);
@@ -1179,11 +1165,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	struct tcp_sock *tp = tcp_sk(sk);
 	__u32 isn = TCP_SKB_CB(skb)->when;
 	struct dst_entry *dst = NULL;
-#ifdef CONFIG_SYN_COOKIES
 	int want_cookie = 0;
-#else
-#define want_cookie 0
-#endif
 
 	if (skb->protocol == htons(ETH_P_IP))
 		return tcp_v4_conn_request(sk, skb);
@@ -1192,14 +1174,9 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 		goto drop;
 
 	if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
-		if (net_ratelimit())
-			syn_flood_warning(skb);
-#ifdef CONFIG_SYN_COOKIES
-		if (sysctl_tcp_syncookies)
-			want_cookie = 1;
-		else
-#endif
-		goto drop;
+		want_cookie = tcp_syn_flood_action(sk, skb, "TCPv6");
+		if (!want_cookie)
+			goto drop;
 	}
 
 	if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
@@ -1249,9 +1226,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 		while (l-- > 0)
 			*c++ ^= *hash_location++;
 
-#ifdef CONFIG_SYN_COOKIES
 		want_cookie = 0;	/* not our kind of cookie */
-#endif
 		tmp_ext.cookie_out_never = 0; /* false */
 		tmp_ext.cookie_plus = tmp_opt.cookie_plus;
 	} else if (!tp->rx_opt.cookie_in_always) {

^ permalink raw reply related

* Re: 802.1Q VLAN random tag injected when vlan configured on forcedeth interface
From: Eric Dumazet @ 2011-08-30 13:23 UTC (permalink / raw)
  To: Ruslan N. Marchenko; +Cc: netdev
In-Reply-To: <20110830125111.GA28341@ruff.mobi>

Le mardi 30 août 2011 à 14:51 +0200, Ruslan N. Marchenko a écrit :
> Hi guys,
> I've faced with strange behaviour of 8021q driver: when enabling vlan subinterface on eth interface I'm getting ~50% packetloss due to packets are marked with incorrect tags (and eventually dropped by kernel since no vlans configured for such IDs).
> Scenario:
> [    0.476950] cpufreq-nforce2: No nForce2 chipset.
> [    1.519133] forcedeth: Reverse Engineered nForce ethernet driver. Version 0.64.
> [    1.519991] forcedeth 0000:00:0a.0: PCI INT A -> Link[LMAC] -> GSI 22 (level, low) -> IRQ 22
> [    1.520037] forcedeth 0000:00:0a.0: setting latency timer to 64
> [    1.586526] forcedeth 0000:00:0a.0: ifname eth0, PHY OUI 0x732 @ 3, addr 00:26:18:40:21:61
> [    1.586542] forcedeth 0000:00:0a.0: highdma csum pwrctl gbit lnktim msi desc-v3
> 
> modprobe 8021q
> 
> - network still works properly, packets are comming not marked at all.
> 
> ip li add link eth0 name vl6 type vlan id 6
> 
> - from this moment massive packetdrop starting to happen, almost half of the *incoming* packets are shown in tcpdump as 
> 14:15:52.859296 00:13:f7:1e:fe:e4 > 00:26:18:40:21:61, ethertype 802.1Q (0x8100), length 102: vlan 64, p 3, ethertype IPv4, [|ip]
> 14:15:56.869572 00:13:f7:1e:fe:e4 > 00:26:18:40:21:61, ethertype 802.1Q (0x8100), length 102: vlan 2112, p 7, ethertype IPv4, [|ip]
> 
> mostly only these two tags appears (64 & 2112). Moreover this happens as on native vlan level (pure ethernet) so on tagged subinterface (as if qinq double tagging) for properly tagged with ID 6 incomming packets.
> 
> I've tried disabling all offloads:
> 
> Offload parameters for eth0:
> rx-checksumming: off
> tx-checksumming: off
> scatter-gather: off
> tcp-segmentation-offload: off
> udp-fragmentation-offload: off
> generic-segmentation-offload: off
> generic-receive-offload: off
> large-receive-offload: off
> rx-vlan-offload: off
> tx-vlan-offload: off
> ntuple-filters: off
> receive-hashing: off
> 
> - doesn't have any effect.
> Once executing 
> ip li del vl6 type vlan
> misterious tags disappear and everything works smoothly. Don't know who injects that garbage into frames - 8021q or forcedeth driver :(
> Any ideas or suggestions to narrow the problem down?
> 
> Additional data.
> Link level data dump example for broken frame:
> 12:35:32.175523 00:13:f7:1e:fe:e4 > 00:26:18:40:21:61, ethertype 802.1Q (0x8100), length 102: vlan 2112, p 2, ethertype IPv4, [|ip]
>         0x0000:  0026 1840 2161 0013 f71e fee4 8100 4840
>         0x0010:  0800 4500 0054 7a12 0000 4001 eb0f
> 0x0C-0D - TPID: ethertype 802.1Q (0x8100) 
> 0x0E-0F - TCI (0100100001000000) PCP 010, CFI 0, VID 100001000000/0x840/2112
> 0x10-11 - ethertype IPv4
> normal ping reply follows, which appears untagged in 50% cases with vlan configured and 100% cases without.
> 
> Interface is plugged into openwrt box into non-switched (wan) gigabit port with vid 6 subinterface configured.
> 

What kernel version are you using ?

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox