Netdev List
 help / color / mirror / Atom feed
* [PATCH 02/15] rtlwifi: rtl8723ae: Add new driver - Part 2
From: Larry Finger @ 2012-09-17 21:18 UTC (permalink / raw)
  To: linville-2XuSBdqkA4R54TAoqtyWWQ
  Cc: linux-wireless-u79uwXL29TY76Z2rM5mHXA, Larry Finger,
	netdev-u79uwXL29TY76Z2rM5mHXA, chaoming_li-kXabqFNEczNtrwSWzY7KCg
In-Reply-To: <1347483294-6943-1-git-send-email-Larry.Finger-tQ5ms3gMjBLk1uMJSBkQmQ@public.gmane.org>

This patch is part 2 of the addition of files for a new driver to handle
the Realtek RTL8723AE wireless device.

Signed-off-by: Larry Finger <Larry.Finger-tQ5ms3gMjBLk1uMJSBkQmQ@public.gmane.org>
Cc: <chaoming_li-kXabqFNEczNtrwSWzY7KCg@public.gmane.org>
---
 drivers/net/wireless/rtlwifi/rtl8723ae/fw.c |  758 +++++++++++++++++++++++++++
 drivers/net/wireless/rtlwifi/rtl8723ae/fw.h |   99 ++++
 2 files changed, 857 insertions(+)
 create mode 100644 drivers/net/wireless/rtlwifi/rtl8723ae/fw.c
 create mode 100644 drivers/net/wireless/rtlwifi/rtl8723ae/fw.h

Index: wireless-testing-new/drivers/net/wireless/rtlwifi/rtl8723ae/fw.c
===================================================================
--- /dev/null
+++ wireless-testing-new/drivers/net/wireless/rtlwifi/rtl8723ae/fw.c
@@ -0,0 +1,758 @@
+/******************************************************************************
+ *
+ * Copyright(c) 2009-2012  Realtek Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110, USA
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called LICENSE.
+ *
+ * Contact Information:
+ * wlanfae <wlanfae-Rasf1IRRPZFBDgjK7y7TUQ@public.gmane.org>
+ * Realtek Corporation, No. 2, Innovation Road II, Hsinchu Science Park,
+ * Hsinchu 300, Taiwan.
+ *
+ * Larry Finger <Larry.Finger-tQ5ms3gMjBLk1uMJSBkQmQ@public.gmane.org>
+ *
+ *****************************************************************************/
+
+#include "../wifi.h"
+#include "../pci.h"
+#include "../base.h"
+#include "reg.h"
+#include "def.h"
+#include "fw.h"
+
+static void _rtl8723ae_enable_fw_download(struct ieee80211_hw *hw, bool enable)
+{
+	struct rtl_priv *rtlpriv = rtl_priv(hw);
+	u8 tmp;
+	if (enable) {
+		tmp = rtl_read_byte(rtlpriv, REG_SYS_FUNC_EN + 1);
+		rtl_write_byte(rtlpriv, REG_SYS_FUNC_EN + 1,
+				       tmp | 0x04);
+
+		tmp = rtl_read_byte(rtlpriv, REG_MCUFWDL);
+		rtl_write_byte(rtlpriv, REG_MCUFWDL, tmp | 0x01);
+
+		tmp = rtl_read_byte(rtlpriv, REG_MCUFWDL + 2);
+		rtl_write_byte(rtlpriv, REG_MCUFWDL + 2, tmp & 0xf7);
+	} else {
+		tmp = rtl_read_byte(rtlpriv, REG_MCUFWDL);
+		rtl_write_byte(rtlpriv, REG_MCUFWDL, tmp & 0xfe);
+
+		rtl_write_byte(rtlpriv, REG_MCUFWDL + 1, 0x00);
+	}
+}
+
+static void _rtl8723ae_fw_block_write(struct ieee80211_hw *hw,
+				      const u8 *buffer, u32 size)
+{
+	struct rtl_priv *rtlpriv = rtl_priv(hw);
+	u32 blockSize = sizeof(u32);
+	u8 *bufferPtr = (u8 *) buffer;
+	u32 *pu4BytePtr = (u32 *) buffer;
+	u32 i, offset, blockCount, remainSize;
+
+	blockCount = size / blockSize;
+	remainSize = size % blockSize;
+
+	for (i = 0; i < blockCount; i++) {
+		offset = i * blockSize;
+		rtl_write_dword(rtlpriv, (FW_8192C_START_ADDRESS + offset),
+				*(pu4BytePtr + i));
+	}
+
+	if (remainSize) {
+		offset = blockCount * blockSize;
+		bufferPtr += offset;
+		for (i = 0; i < remainSize; i++) {
+			rtl_write_byte(rtlpriv, (FW_8192C_START_ADDRESS +
+						 offset + i), *(bufferPtr + i));
+		}
+	}
+}
+
+static void _rtl8723ae_fw_page_write(struct ieee80211_hw *hw,
+				     u32 page, const u8 *buffer, u32 size)
+{
+	struct rtl_priv *rtlpriv = rtl_priv(hw);
+	u8 value8;
+	u8 u8page = (u8) (page & 0x07);
+
+	value8 = (rtl_read_byte(rtlpriv, REG_MCUFWDL + 2) & 0xF8) | u8page;
+
+	rtl_write_byte(rtlpriv, (REG_MCUFWDL + 2), value8);
+	_rtl8723ae_fw_block_write(hw, buffer, size);
+}
+
+static void _rtl8723ae_write_fw(struct ieee80211_hw *hw,
+				enum version_8723e version, u8 *buffer,
+				u32 size)
+{
+	struct rtl_priv *rtlpriv = rtl_priv(hw);
+	u8 *bufferPtr = (u8 *) buffer;
+	u32 page_nums, remain_size;
+	u32 page, offset;
+
+	RT_TRACE(rtlpriv, COMP_FW, DBG_TRACE, "FW size is %d bytes,\n", size);
+
+	page_nums = size / FW_8192C_PAGE_SIZE;
+	remain_size = size % FW_8192C_PAGE_SIZE;
+
+	if (page_nums > 6) {
+		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
+			 "Page numbers should not greater then 6\n");
+	}
+
+	for (page = 0; page < page_nums; page++) {
+		offset = page * FW_8192C_PAGE_SIZE;
+		_rtl8723ae_fw_page_write(hw, page, (bufferPtr + offset),
+					 FW_8192C_PAGE_SIZE);
+	}
+
+	if (remain_size) {
+		offset = page_nums * FW_8192C_PAGE_SIZE;
+		page = page_nums;
+		_rtl8723ae_fw_page_write(hw, page, (bufferPtr + offset),
+					 remain_size);
+	}
+
+	RT_TRACE(rtlpriv, COMP_FW, DBG_TRACE, "FW write done.\n");
+
+}
+
+static int _rtl8723ae_fw_free_to_go(struct ieee80211_hw *hw)
+{
+	struct rtl_priv *rtlpriv = rtl_priv(hw);
+	int err = -EIO;
+	u32 counter = 0;
+	u32 value32;
+
+	do {
+		value32 = rtl_read_dword(rtlpriv, REG_MCUFWDL);
+	} while ((counter++ < FW_8192C_POLLING_TIMEOUT_COUNT) &&
+		 (!(value32 & FWDL_ChkSum_rpt)));
+
+	if (counter >= FW_8192C_POLLING_TIMEOUT_COUNT) {
+		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
+			 "chksum report faill ! REG_MCUFWDL:0x%08x .\n",
+			 value32);
+		goto exit;
+	}
+
+	RT_TRACE(rtlpriv, COMP_FW, DBG_TRACE,
+		 "Checksum report OK ! REG_MCUFWDL:0x%08x .\n", value32);
+
+	value32 = rtl_read_dword(rtlpriv, REG_MCUFWDL);
+	value32 |= MCUFWDL_RDY;
+	value32 &= ~WINTINI_RDY;
+	rtl_write_dword(rtlpriv, REG_MCUFWDL, value32);
+
+	counter = 0;
+
+	do {
+		value32 = rtl_read_dword(rtlpriv, REG_MCUFWDL);
+		if (value32 & WINTINI_RDY) {
+			RT_TRACE(rtlpriv, COMP_FW, DBG_TRACE,
+				 "Polling FW ready success!! REG_MCUFWDL:0x%08x .\n",
+				 value32);
+			err = 0;
+			goto exit;
+		}
+
+		mdelay(FW_8192C_POLLING_DELAY);
+
+	} while (counter++ < FW_8192C_POLLING_TIMEOUT_COUNT);
+
+	RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
+		 "Polling FW ready fail!! REG_MCUFWDL:0x%08x .\n", value32);
+
+exit:
+	return err;
+}
+
+int rtl8723ae_download_fw(struct ieee80211_hw *hw)
+{
+	struct rtl_priv *rtlpriv = rtl_priv(hw);
+	struct rtl_hal *rtlhal = rtl_hal(rtl_priv(hw));
+	struct rtl8723ae_firmware_header *pfwheader;
+	u8 *pfwdata;
+	u32 fwsize;
+	int err;
+	enum version_8723e version = rtlhal->version;
+
+	if (!rtlhal->pfirmware)
+		return 1;
+
+	pfwheader = (struct rtl8723ae_firmware_header *)rtlhal->pfirmware;
+	pfwdata = (u8 *) rtlhal->pfirmware;
+	fwsize = rtlhal->fwsize;
+
+	if (IS_FW_HEADER_EXIST(pfwheader)) {
+		RT_TRACE(rtlpriv, COMP_FW, DBG_DMESG,
+			 "Firmware Version(%d), Signature(%#x),Size(%d)\n",
+			 pfwheader->version, pfwheader->signature,
+			 (int)sizeof(struct rtl8723ae_firmware_header));
+
+		pfwdata = pfwdata + sizeof(struct rtl8723ae_firmware_header);
+		fwsize = fwsize - sizeof(struct rtl8723ae_firmware_header);
+	}
+
+	if (rtl_read_byte(rtlpriv, REG_MCUFWDL)&BIT(7)) {
+		rtl8723ae_firmware_selfreset(hw);
+		rtl_write_byte(rtlpriv, REG_MCUFWDL, 0x00);
+	}
+	_rtl8723ae_enable_fw_download(hw, true);
+	_rtl8723ae_write_fw(hw, version, pfwdata, fwsize);
+	_rtl8723ae_enable_fw_download(hw, false);
+
+	err = _rtl8723ae_fw_free_to_go(hw);
+	if (err) {
+		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
+			 "Firmware is not ready to run!\n");
+	} else {
+		RT_TRACE(rtlpriv, COMP_FW, DBG_TRACE,
+			 "Firmware is ready to run!\n");
+	}
+
+	return 0;
+}
+
+static bool rtl8723ae_check_fw_read_last_h2c(struct ieee80211_hw *hw, u8 boxnum)
+{
+	struct rtl_priv *rtlpriv = rtl_priv(hw);
+	u8 val_hmetfr, val_mcutst_1;
+	bool result = false;
+
+	val_hmetfr = rtl_read_byte(rtlpriv, REG_HMETFR);
+	val_mcutst_1 = rtl_read_byte(rtlpriv, (REG_MCUTST_1 + boxnum));
+
+	if (((val_hmetfr >> boxnum) & BIT(0)) == 0 && val_mcutst_1 == 0)
+		result = true;
+	return result;
+}
+
+static void _rtl8723ae_fill_h2c_command(struct ieee80211_hw *hw,
+					u8 element_id, u32 cmd_len,
+					u8 *p_cmdbuffer)
+{
+	struct rtl_priv *rtlpriv = rtl_priv(hw);
+	struct rtl_hal *rtlhal = rtl_hal(rtl_priv(hw));
+	u8 boxnum;
+	u16 box_reg = 0, box_extreg = 0;
+	u8 u1tmp;
+	bool isfw_read = false;
+	u8 buf_index = 0;
+	bool bwrite_sucess = false;
+	u8 wait_h2c_limmit = 100;
+	u8 wait_writeh2c_limmit = 100;
+	u8 boxcontent[4], boxextcontent[2];
+	u32 h2c_waitcounter = 0;
+	unsigned long flag;
+	u8 idx;
+
+	RT_TRACE(rtlpriv, COMP_CMD, DBG_LOUD, "come in\n");
+
+	while (true) {
+		spin_lock_irqsave(&rtlpriv->locks.h2c_lock, flag);
+		if (rtlhal->h2c_setinprogress) {
+			RT_TRACE(rtlpriv, COMP_CMD, DBG_LOUD,
+				 "H2C set in progress! Wait to set..element_id(%d).\n",
+				 element_id);
+
+			while (rtlhal->h2c_setinprogress) {
+				spin_unlock_irqrestore(&rtlpriv->locks.h2c_lock,
+						       flag);
+				h2c_waitcounter++;
+				RT_TRACE(rtlpriv, COMP_CMD, DBG_LOUD,
+					 "Wait 100 us (%d times)...\n",
+					 h2c_waitcounter);
+				udelay(100);
+
+				if (h2c_waitcounter > 1000)
+					return;
+				spin_lock_irqsave(&rtlpriv->locks.h2c_lock,
+						  flag);
+			}
+			spin_unlock_irqrestore(&rtlpriv->locks.h2c_lock, flag);
+		} else {
+			rtlhal->h2c_setinprogress = true;
+			spin_unlock_irqrestore(&rtlpriv->locks.h2c_lock, flag);
+			break;
+		}
+	}
+
+	while (!bwrite_sucess) {
+		wait_writeh2c_limmit--;
+		if (wait_writeh2c_limmit == 0) {
+			RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
+				 "Write H2C fail because no trigger "
+				 "for FW INT!\n");
+			break;
+		}
+
+		boxnum = rtlhal->last_hmeboxnum;
+		switch (boxnum) {
+		case 0:
+			box_reg = REG_HMEBOX_0;
+			box_extreg = REG_HMEBOX_EXT_0;
+			break;
+		case 1:
+			box_reg = REG_HMEBOX_1;
+			box_extreg = REG_HMEBOX_EXT_1;
+			break;
+		case 2:
+			box_reg = REG_HMEBOX_2;
+			box_extreg = REG_HMEBOX_EXT_2;
+			break;
+		case 3:
+			box_reg = REG_HMEBOX_3;
+			box_extreg = REG_HMEBOX_EXT_3;
+			break;
+		default:
+			RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
+				 "switch case not process\n");
+			break;
+		}
+
+		isfw_read = rtl8723ae_check_fw_read_last_h2c(hw, boxnum);
+		while (!isfw_read) {
+
+			wait_h2c_limmit--;
+			if (wait_h2c_limmit == 0) {
+				RT_TRACE(rtlpriv, COMP_CMD, DBG_LOUD,
+					 "Wating too long for FW read "
+					  "clear HMEBox(%d)!\n", boxnum);
+				break;
+			}
+
+			udelay(10);
+
+			isfw_read = rtl8723ae_check_fw_read_last_h2c(hw,
+								     boxnum);
+			u1tmp = rtl_read_byte(rtlpriv, 0x1BF);
+			RT_TRACE(rtlpriv, COMP_CMD, DBG_LOUD,
+				 "Wating for FW read clear HMEBox(%d)!!! "
+				 "0x1BF = %2x\n", boxnum, u1tmp);
+		}
+
+		if (!isfw_read) {
+			RT_TRACE(rtlpriv, COMP_CMD, DBG_LOUD,
+				 "Write H2C register BOX[%d] fail!!!!! "
+				 "Fw do not read.\n", boxnum);
+			break;
+		}
+
+		memset(boxcontent, 0, sizeof(boxcontent));
+		memset(boxextcontent, 0, sizeof(boxextcontent));
+		boxcontent[0] = element_id;
+		RT_TRACE(rtlpriv, COMP_CMD, DBG_LOUD,
+			 "Write element_id box_reg(%4x) = %2x\n",
+			  box_reg, element_id);
+
+		switch (cmd_len) {
+		case 1:
+			boxcontent[0] &= ~(BIT(7));
+			memcpy((u8 *) (boxcontent) + 1,
+			       p_cmdbuffer + buf_index, 1);
+
+			for (idx = 0; idx < 4; idx++) {
+				rtl_write_byte(rtlpriv, box_reg + idx,
+					       boxcontent[idx]);
+			}
+			break;
+		case 2:
+			boxcontent[0] &= ~(BIT(7));
+			memcpy((u8 *) (boxcontent) + 1,
+			       p_cmdbuffer + buf_index, 2);
+
+			for (idx = 0; idx < 4; idx++) {
+				rtl_write_byte(rtlpriv, box_reg + idx,
+					       boxcontent[idx]);
+			}
+			break;
+		case 3:
+			boxcontent[0] &= ~(BIT(7));
+			memcpy((u8 *) (boxcontent) + 1,
+			       p_cmdbuffer + buf_index, 3);
+
+			for (idx = 0; idx < 4; idx++) {
+				rtl_write_byte(rtlpriv, box_reg + idx,
+					       boxcontent[idx]);
+			}
+			break;
+		case 4:
+			boxcontent[0] |= (BIT(7));
+			memcpy((u8 *) (boxextcontent),
+			       p_cmdbuffer + buf_index, 2);
+			memcpy((u8 *) (boxcontent) + 1,
+			       p_cmdbuffer + buf_index + 2, 2);
+
+			for (idx = 0; idx < 2; idx++) {
+				rtl_write_byte(rtlpriv, box_extreg + idx,
+					       boxextcontent[idx]);
+			}
+
+			for (idx = 0; idx < 4; idx++) {
+				rtl_write_byte(rtlpriv, box_reg + idx,
+					       boxcontent[idx]);
+			}
+			break;
+		case 5:
+			boxcontent[0] |= (BIT(7));
+			memcpy((u8 *) (boxextcontent),
+			       p_cmdbuffer + buf_index, 2);
+			memcpy((u8 *) (boxcontent) + 1,
+			       p_cmdbuffer + buf_index + 2, 3);
+
+			for (idx = 0; idx < 2; idx++) {
+				rtl_write_byte(rtlpriv, box_extreg + idx,
+					       boxextcontent[idx]);
+			}
+
+			for (idx = 0; idx < 4; idx++) {
+				rtl_write_byte(rtlpriv, box_reg + idx,
+					       boxcontent[idx]);
+			}
+			break;
+		default:
+			RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
+				 "switch case not process\n");
+			break;
+		}
+
+		bwrite_sucess = true;
+
+		rtlhal->last_hmeboxnum = boxnum + 1;
+		if (rtlhal->last_hmeboxnum == 4)
+			rtlhal->last_hmeboxnum = 0;
+
+		RT_TRACE(rtlpriv, COMP_CMD, DBG_LOUD,
+			 "pHalData->last_hmeboxnum  = %d\n",
+			 rtlhal->last_hmeboxnum);
+	}
+
+	spin_lock_irqsave(&rtlpriv->locks.h2c_lock, flag);
+	rtlhal->h2c_setinprogress = false;
+	spin_unlock_irqrestore(&rtlpriv->locks.h2c_lock, flag);
+
+	RT_TRACE(rtlpriv, COMP_CMD, DBG_LOUD, "go out\n");
+}
+
+void rtl8723ae_fill_h2c_cmd(struct ieee80211_hw *hw,
+			    u8 element_id, u32 cmd_len, u8 *p_cmdbuffer)
+{
+	struct rtl_priv *rtlpriv = rtl_priv(hw);
+	struct rtl_hal *rtlhal = rtl_hal(rtlpriv);
+	u32 tmp_cmdbuf[2];
+
+	if (rtlhal->fw_ready == false) {
+		RT_ASSERT(false,
+			 "return H2C cmd because of Fw download fail!!!\n");
+		return;
+	}
+
+	memset(tmp_cmdbuf, 0, 8);
+	memcpy(tmp_cmdbuf, p_cmdbuffer, cmd_len);
+	_rtl8723ae_fill_h2c_command(hw, element_id, cmd_len, (u8 *)&tmp_cmdbuf);
+
+	return;
+}
+
+void rtl8723ae_firmware_selfreset(struct ieee80211_hw *hw)
+{
+	u8 u1tmp;
+	u8 delay = 100;
+	struct rtl_priv *rtlpriv = rtl_priv(hw);
+
+	rtl_write_byte(rtlpriv, REG_HMETFR + 3, 0x20);
+	u1tmp = rtl_read_byte(rtlpriv, REG_SYS_FUNC_EN + 1);
+
+	while (u1tmp & BIT(2)) {
+		delay--;
+		if (delay == 0)
+			break;
+		udelay(50);
+		u1tmp = rtl_read_byte(rtlpriv, REG_SYS_FUNC_EN + 1);
+	}
+	if (delay == 0) {
+		u1tmp = rtl_read_byte(rtlpriv, REG_SYS_FUNC_EN + 1);
+		rtl_write_byte(rtlpriv, REG_SYS_FUNC_EN + 1, u1tmp&(~BIT(2)));
+	}
+}
+
+void rtl8723ae_set_fw_pwrmode_cmd(struct ieee80211_hw *hw, u8 mode)
+{
+	struct rtl_priv *rtlpriv = rtl_priv(hw);
+	u8 u1_h2c_set_pwrmode[3] = { 0 };
+	struct rtl_ps_ctl *ppsc = rtl_psc(rtl_priv(hw));
+
+	RT_TRACE(rtlpriv, COMP_POWER, DBG_LOUD, "FW LPS mode = %d\n", mode);
+
+	SET_H2CCMD_PWRMODE_PARM_MODE(u1_h2c_set_pwrmode, mode);
+	SET_H2CCMD_PWRMODE_PARM_SMART_PS(u1_h2c_set_pwrmode, 1);
+	SET_H2CCMD_PWRMODE_PARM_BCN_PASS_TIME(u1_h2c_set_pwrmode,
+					      ppsc->reg_max_lps_awakeintvl);
+
+	RT_PRINT_DATA(rtlpriv, COMP_CMD, DBG_DMESG,
+		      "rtl8723ae_set_fw_rsvdpagepkt(): u1_h2c_set_pwrmode\n",
+		      u1_h2c_set_pwrmode, 3);
+	rtl8723ae_fill_h2c_cmd(hw, H2C_SETPWRMODE, 3, u1_h2c_set_pwrmode);
+
+}
+
+static bool _rtl8723ae_cmd_send_packet(struct ieee80211_hw *hw,
+				       struct sk_buff *skb)
+{
+	struct rtl_priv *rtlpriv = rtl_priv(hw);
+	struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw));
+	struct rtl8192_tx_ring *ring;
+	struct rtl_tx_desc *pdesc;
+	u8 own;
+	unsigned long flags;
+	struct sk_buff *pskb = NULL;
+
+	ring = &rtlpci->tx_ring[BEACON_QUEUE];
+
+	pskb = __skb_dequeue(&ring->queue);
+	if (pskb)
+		kfree_skb(pskb);
+
+	spin_lock_irqsave(&rtlpriv->locks.irq_th_lock, flags);
+
+	pdesc = &ring->desc[0];
+	own = (u8) rtlpriv->cfg->ops->get_desc((u8 *) pdesc, true, HW_DESC_OWN);
+
+	rtlpriv->cfg->ops->fill_tx_cmddesc(hw, (u8 *) pdesc, 1, 1, skb);
+
+	__skb_queue_tail(&ring->queue, skb);
+
+	spin_unlock_irqrestore(&rtlpriv->locks.irq_th_lock, flags);
+
+	rtlpriv->cfg->ops->tx_polling(hw, BEACON_QUEUE);
+
+	return true;
+}
+
+#define BEACON_PG		0 /* ->1 */
+#define PSPOLL_PG		2
+#define NULL_PG			3
+#define PROBERSP_PG		4 /* ->5 */
+
+#define TOTAL_RESERVED_PKT_LEN	768
+
+static u8 reserved_page_packet[TOTAL_RESERVED_PKT_LEN] = {
+	/* page 0 beacon */
+	0x80, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0x00, 0xE0, 0x4C, 0x76, 0x00, 0x42,
+	0x00, 0x40, 0x10, 0x10, 0x00, 0x03, 0x50, 0x08,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x64, 0x00, 0x00, 0x04, 0x00, 0x0C, 0x6C, 0x69,
+	0x6E, 0x6B, 0x73, 0x79, 0x73, 0x5F, 0x77, 0x6C,
+	0x61, 0x6E, 0x01, 0x04, 0x82, 0x84, 0x8B, 0x96,
+	0x03, 0x01, 0x01, 0x06, 0x02, 0x00, 0x00, 0x2A,
+	0x01, 0x00, 0x32, 0x08, 0x24, 0x30, 0x48, 0x6C,
+	0x0C, 0x12, 0x18, 0x60, 0x2D, 0x1A, 0x6C, 0x18,
+	0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x3D, 0x00, 0xDD, 0x06, 0x00, 0xE0, 0x4C, 0x02,
+	0x01, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+
+	/* page 1 beacon */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x10, 0x00, 0x20, 0x8C, 0x00, 0x12, 0x10, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+
+	/* page 2  ps-poll */
+	0xA4, 0x10, 0x01, 0xC0, 0x00, 0x40, 0x10, 0x10,
+	0x00, 0x03, 0x00, 0xE0, 0x4C, 0x76, 0x00, 0x42,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x18, 0x00, 0x20, 0x8C, 0x00, 0x12, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80,
+	0x80, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+
+	/* page 3  null */
+	0x48, 0x01, 0x00, 0x00, 0x00, 0x40, 0x10, 0x10,
+	0x00, 0x03, 0x00, 0xE0, 0x4C, 0x76, 0x00, 0x42,
+	0x00, 0x40, 0x10, 0x10, 0x00, 0x03, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x72, 0x00, 0x20, 0x8C, 0x00, 0x12, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80,
+	0x80, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+
+	/* page 4  probe_resp */
+	0x50, 0x00, 0x00, 0x00, 0x00, 0x40, 0x10, 0x10,
+	0x00, 0x03, 0x00, 0xE0, 0x4C, 0x76, 0x00, 0x42,
+	0x00, 0x40, 0x10, 0x10, 0x00, 0x03, 0x00, 0x00,
+	0x9E, 0x46, 0x15, 0x32, 0x27, 0xF2, 0x2D, 0x00,
+	0x64, 0x00, 0x00, 0x04, 0x00, 0x0C, 0x6C, 0x69,
+	0x6E, 0x6B, 0x73, 0x79, 0x73, 0x5F, 0x77, 0x6C,
+	0x61, 0x6E, 0x01, 0x04, 0x82, 0x84, 0x8B, 0x96,
+	0x03, 0x01, 0x01, 0x06, 0x02, 0x00, 0x00, 0x2A,
+	0x01, 0x00, 0x32, 0x08, 0x24, 0x30, 0x48, 0x6C,
+	0x0C, 0x12, 0x18, 0x60, 0x2D, 0x1A, 0x6C, 0x18,
+	0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x3D, 0x00, 0xDD, 0x06, 0x00, 0xE0, 0x4C, 0x02,
+	0x01, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+
+	/* page 5  probe_resp */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+void rtl8723ae_set_fw_rsvdpagepkt(struct ieee80211_hw *hw, bool dl_finished)
+{
+	struct rtl_priv *rtlpriv = rtl_priv(hw);
+	struct rtl_mac *mac = rtl_mac(rtl_priv(hw));
+	struct sk_buff *skb = NULL;
+
+	u32 totalpacketlen;
+	bool rtstatus;
+	u8 u1RsvdPageLoc[3] = { 0 };
+	bool dlok = false;
+
+	u8 *beacon;
+	u8 *p_pspoll;
+	u8 *nullfunc;
+	u8 *p_probersp;
+	/*---------------------------------------------------------
+				(1) beacon
+	---------------------------------------------------------*/
+	beacon = &reserved_page_packet[BEACON_PG * 128];
+	SET_80211_HDR_ADDRESS2(beacon, mac->mac_addr);
+	SET_80211_HDR_ADDRESS3(beacon, mac->bssid);
+
+	/*-------------------------------------------------------
+				(2) ps-poll
+	--------------------------------------------------------*/
+	p_pspoll = &reserved_page_packet[PSPOLL_PG * 128];
+	SET_80211_PS_POLL_AID(p_pspoll, (mac->assoc_id | 0xc000));
+	SET_80211_PS_POLL_BSSID(p_pspoll, mac->bssid);
+	SET_80211_PS_POLL_TA(p_pspoll, mac->mac_addr);
+
+	SET_H2CCMD_RSVDPAGE_LOC_PSPOLL(u1RsvdPageLoc, PSPOLL_PG);
+
+	/*--------------------------------------------------------
+				(3) null data
+	---------------------------------------------------------*/
+	nullfunc = &reserved_page_packet[NULL_PG * 128];
+	SET_80211_HDR_ADDRESS1(nullfunc, mac->bssid);
+	SET_80211_HDR_ADDRESS2(nullfunc, mac->mac_addr);
+	SET_80211_HDR_ADDRESS3(nullfunc, mac->bssid);
+
+	SET_H2CCMD_RSVDPAGE_LOC_NULL_DATA(u1RsvdPageLoc, NULL_PG);
+
+	/*---------------------------------------------------------
+				(4) probe response
+	----------------------------------------------------------*/
+	p_probersp = &reserved_page_packet[PROBERSP_PG * 128];
+	SET_80211_HDR_ADDRESS1(p_probersp, mac->bssid);
+	SET_80211_HDR_ADDRESS2(p_probersp, mac->mac_addr);
+	SET_80211_HDR_ADDRESS3(p_probersp, mac->bssid);
+
+	SET_H2CCMD_RSVDPAGE_LOC_PROBE_RSP(u1RsvdPageLoc, PROBERSP_PG);
+
+	totalpacketlen = TOTAL_RESERVED_PKT_LEN;
+
+	RT_PRINT_DATA(rtlpriv, COMP_CMD, DBG_LOUD,
+		      "rtl8723ae_set_fw_rsvdpagepkt(): HW_VAR_SET_TX_CMD: ALL\n",
+		      &reserved_page_packet[0], totalpacketlen);
+	RT_PRINT_DATA(rtlpriv, COMP_CMD, DBG_DMESG,
+		      "rtl8723ae_set_fw_rsvdpagepkt(): HW_VAR_SET_TX_CMD: ALL\n",
+		      u1RsvdPageLoc, 3);
+
+
+	skb = dev_alloc_skb(totalpacketlen);
+	memcpy((u8 *) skb_put(skb, totalpacketlen),
+	       &reserved_page_packet, totalpacketlen);
+
+	rtstatus = _rtl8723ae_cmd_send_packet(hw, skb);
+
+	if (rtstatus)
+		dlok = true;
+
+	if (dlok) {
+		RT_TRACE(rtlpriv, COMP_POWER, DBG_LOUD,
+			 "Set RSVD page location to Fw.\n");
+		RT_PRINT_DATA(rtlpriv, COMP_CMD, DBG_DMESG,
+				"H2C_RSVDPAGE:\n",
+				u1RsvdPageLoc, 3);
+		rtl8723ae_fill_h2c_cmd(hw, H2C_RSVDPAGE,
+				       sizeof(u1RsvdPageLoc), u1RsvdPageLoc);
+	} else
+		RT_TRACE(rtlpriv, COMP_ERR, DBG_WARNING,
+			 "Set RSVD page location to Fw FAIL!!!!!!.\n");
+}
+
+void rtl8723ae_set_fw_joinbss_report_cmd(struct ieee80211_hw *hw, u8 mstatus)
+{
+	u8 u1_joinbssrpt_parm[1] = { 0 };
+
+	SET_H2CCMD_JOINBSSRPT_PARM_OPMODE(u1_joinbssrpt_parm, mstatus);
+
+	rtl8723ae_fill_h2c_cmd(hw, H2C_JOINBSSRPT, 1, u1_joinbssrpt_parm);
+}
+
Index: wireless-testing-new/drivers/net/wireless/rtlwifi/rtl8723ae/fw.h
===================================================================
--- /dev/null
+++ wireless-testing-new/drivers/net/wireless/rtlwifi/rtl8723ae/fw.h
@@ -0,0 +1,99 @@
+/******************************************************************************
+ *
+ * Copyright(c) 2009-2012  Realtek Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110, USA
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called LICENSE.
+ *
+ * Contact Information:
+ * wlanfae <wlanfae-Rasf1IRRPZFBDgjK7y7TUQ@public.gmane.org>
+ * Realtek Corporation, No. 2, Innovation Road II, Hsinchu Science Park,
+ * Hsinchu 300, Taiwan.
+ * Larry Finger <Larry.Finger-tQ5ms3gMjBLk1uMJSBkQmQ@public.gmane.org>
+ *
+ *****************************************************************************/
+
+#ifndef __RTL92C__FW__H__
+#define __RTL92C__FW__H__
+
+#define FW_8192C_SIZE				0x3000
+#define FW_8192C_START_ADDRESS			0x1000
+#define FW_8192C_END_ADDRESS			0x3FFF
+#define FW_8192C_PAGE_SIZE			4096
+#define FW_8192C_POLLING_DELAY			5
+#define FW_8192C_POLLING_TIMEOUT_COUNT		1000
+
+#define IS_FW_HEADER_EXIST(_pfwhdr)		\
+	((_pfwhdr->signature&0xFF00) == 0x2300 || \
+	(_pfwhdr->signature&0xFF00) == 0x2301 ||  \
+	(_pfwhdr->signature&0xFF00) == 0x2302)
+
+struct rtl8723ae_firmware_header {
+	u16 signature;
+	u8 category;
+	u8 function;
+	u16 version;
+	u8 subversion;
+	u8 rsvd1;
+	u8 month;
+	u8 date;
+	u8 hour;
+	u8 minute;
+	u16 ramcodeSize;
+	u16 rsvd2;
+	u32 svnindex;
+	u32 rsvd3;
+	u32 rsvd4;
+	u32 rsvd5;
+};
+
+enum rtl8192c_h2c_cmd {
+	H2C_AP_OFFLOAD = 0,
+	H2C_SETPWRMODE = 1,
+	H2C_JOINBSSRPT = 2,
+	H2C_RSVDPAGE = 3,
+	H2C_RSSI_REPORT = 5,
+	H2C_RA_MASK = 6,
+	MAX_H2CCMD
+};
+
+#define pagenum_128(_len)	(u32)(((_len)>>7) + ((_len)&0x7F ? 1 : 0))
+
+#define SET_H2CCMD_PWRMODE_PARM_MODE(__ph2ccmd, __val)			\
+	SET_BITS_TO_LE_1BYTE(__ph2ccmd, 0, 8, __val)
+#define SET_H2CCMD_PWRMODE_PARM_SMART_PS(__ph2ccmd, __val)		\
+	SET_BITS_TO_LE_1BYTE((__ph2ccmd)+1, 0, 8, __val)
+#define SET_H2CCMD_PWRMODE_PARM_BCN_PASS_TIME(__ph2ccmd, __val)	\
+	SET_BITS_TO_LE_1BYTE((__ph2ccmd)+2, 0, 8, __val)
+#define SET_H2CCMD_JOINBSSRPT_PARM_OPMODE(__ph2ccmd, __val)		\
+	SET_BITS_TO_LE_1BYTE(__ph2ccmd, 0, 8, __val)
+#define SET_H2CCMD_RSVDPAGE_LOC_PROBE_RSP(__ph2ccmd, __val)		\
+	SET_BITS_TO_LE_1BYTE(__ph2ccmd, 0, 8, __val)
+#define SET_H2CCMD_RSVDPAGE_LOC_PSPOLL(__ph2ccmd, __val)		\
+	SET_BITS_TO_LE_1BYTE((__ph2ccmd)+1, 0, 8, __val)
+#define SET_H2CCMD_RSVDPAGE_LOC_NULL_DATA(__ph2ccmd, __val)		\
+	SET_BITS_TO_LE_1BYTE((__ph2ccmd)+2, 0, 8, __val)
+
+int rtl8723ae_download_fw(struct ieee80211_hw *hw);
+void rtl8723ae_fill_h2c_cmd(struct ieee80211_hw *hw, u8 element_id,
+			    u32 cmd_len, u8 *p_cmdbuffer);
+void rtl8723ae_firmware_selfreset(struct ieee80211_hw *hw);
+void rtl8723ae_set_fw_pwrmode_cmd(struct ieee80211_hw *hw, u8 mode);
+void rtl8723ae_set_fw_rsvdpagepkt(struct ieee80211_hw *hw, bool b_dl_finished);
+void rtl8723ae_set_fw_joinbss_report_cmd(struct ieee80211_hw *hw, u8 mstatus);
+
+#endif
+
--
To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [patch net] sky2: fix rx filter setup on link up
From: Stephen Hemminger @ 2012-09-17 21:15 UTC (permalink / raw)
  To: Jiri Pirko; +Cc: netdev, davem, mlindner, linux-kernel
In-Reply-To: <20120917204724.GA1749@minipsycho.orion>

On Mon, 17 Sep 2012 22:47:24 +0200
Jiri Pirko <jiri@resnulli.us> wrote:

> Mon, Sep 17, 2012 at 06:12:14PM CEST, shemminger@vyatta.com wrote:
> >On Mon, 17 Sep 2012 17:10:17 +0200
> >Jiri Pirko <jiri@resnulli.us> wrote:
> >
> >> In my case I have following problem. sky2_set_multicast() sets registers
> >> GM_MC_ADDR_H[1-4] correctly to:
> >> 0000 0800 0001 0410
> >> However, when adapter gets link and sky2_link_up() is called, the values
> >> are for some reason different:
> >> 0000 0800 0016 0410
> >
> >Rather than papering over the problem, it would be better to
> >trace back what is setting those registers and fix that code.
> 
> Yes, I did that. No code at sky2.[ch] is writing to this registers other
> than sky2_set_multicast() and sky2_gmac_reset() (I hooked on sky2_write*()).
> So I strongly believe this is a HW issue (maybe only issue of my revision
> "Yukon-2 EC chip revision 2")
> 
> >
> >> This in my case prevents iface to be able to receive packets with dst mac
> >> 01:80:C2:00:00:02 (LACPDU dst mac), which I set up previously by
> >> SIOCADDMULTI.
> >> 
> >> So remember computed rx_filter data and write it to GM_MC_ADDR_H[1-4] on
> >> link_up.
> >>
> >
> >Please do some more root cause analysis. Just save/restoring the
> >registers is just a temporary workaround.

Are you sure it isn't IPv6 or something else setting additional mulitcast
addresses. You may need to instrument the set_multicast call.

^ permalink raw reply

* Re: [net] e1000: Small packets may get corrupted during padding by HW
From: Eric Dumazet @ 2012-09-17 21:02 UTC (permalink / raw)
  To: Alexander Duyck
  Cc: Dave, Tushar N, Fastabend, John R, Michal Miroslaw,
	Kirsher, Jeffrey T, davem@davemloft.net, netdev@vger.kernel.org,
	gospo@redhat.com, sassmann@redhat.com
In-Reply-To: <50578DE4.7080806@intel.com>

On Mon, 2012-09-17 at 13:53 -0700, Alexander Duyck wrote:
> On 09/17/2012 12:58 AM, Eric Dumazet wrote:
> > On Mon, 2012-09-17 at 07:33 +0000, Dave, Tushar N wrote:
> >>> -----Original Message-----
> >>> From: netdev-owner@vger.kernel.org [mailto:netdev-owner@vger.kernel.org]
> >>> On Behalf Of John Fastabend
> >>> Also wouldn't you want an unlikely() in your patch?
> >> No because it is quite normal to have packet < ETH_ZLEN. e.g. ARP packets.
> > ARP packets ? Hardly a performance problem.
> >
> > Or make sure all these packets have enough tailroom, or else you are
> > going to hit the cost of reallocating packets.
> >
> > I would better point TCP pure ACK packets, since their size can be 54
> > bytes.
> >
> > diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
> > index cfe6ffe..aefc681 100644
> > --- a/net/ipv4/tcp_output.c
> > +++ b/net/ipv4/tcp_output.c
> > @@ -3083,8 +3083,9 @@ void tcp_send_ack(struct sock *sk)
> >  	/* We are not putting this on the write queue, so
> >  	 * tcp_transmit_skb() will set the ownership to this
> >  	 * sock.
> > +	 * Add 64 bytes of tailroom so that some drivers can use skb_pad()
> >  	 */
> > -	buff = alloc_skb(MAX_TCP_HEADER, sk_gfp_atomic(sk, GFP_ATOMIC));
> > +	buff = alloc_skb(MAX_TCP_HEADER + 64, sk_gfp_atomic(sk, GFP_ATOMIC));
> >  	if (buff == NULL) {
> >  		inet_csk_schedule_ack(sk);
> >  		inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN;
> For most systems that extra padding should already be added since
> alloc_skb will cache line align the buffer anyway.
> 

Please define 'most systems' ?

> A more general fix might be to make it so that alloc_skb cannot allocate
> less than 60 byte buffers on systems with a cache line size smaller than
> 64 bytes.

Nope, because we do a skb_reserve(skb, MAX_TCP_HEADER)

So we might have no bytes available at all after this MAX_TCP_HEADER
area.

Relying on extra padding in alloc_skb() is hacky anyway, as it
depends on external factors (external to TCP stack)

^ permalink raw reply

* Re: [net] e1000: Small packets may get corrupted during padding by HW
From: Alexander Duyck @ 2012-09-17 20:53 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Dave, Tushar N, Fastabend, John R, Michal Miroslaw,
	Kirsher, Jeffrey T, davem@davemloft.net, netdev@vger.kernel.org,
	gospo@redhat.com, sassmann@redhat.com
In-Reply-To: <1347868702.26523.79.camel@edumazet-glaptop>

On 09/17/2012 12:58 AM, Eric Dumazet wrote:
> On Mon, 2012-09-17 at 07:33 +0000, Dave, Tushar N wrote:
>>> -----Original Message-----
>>> From: netdev-owner@vger.kernel.org [mailto:netdev-owner@vger.kernel.org]
>>> On Behalf Of John Fastabend
>>> Also wouldn't you want an unlikely() in your patch?
>> No because it is quite normal to have packet < ETH_ZLEN. e.g. ARP packets.
> ARP packets ? Hardly a performance problem.
>
> Or make sure all these packets have enough tailroom, or else you are
> going to hit the cost of reallocating packets.
>
> I would better point TCP pure ACK packets, since their size can be 54
> bytes.
>
> diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
> index cfe6ffe..aefc681 100644
> --- a/net/ipv4/tcp_output.c
> +++ b/net/ipv4/tcp_output.c
> @@ -3083,8 +3083,9 @@ void tcp_send_ack(struct sock *sk)
>  	/* We are not putting this on the write queue, so
>  	 * tcp_transmit_skb() will set the ownership to this
>  	 * sock.
> +	 * Add 64 bytes of tailroom so that some drivers can use skb_pad()
>  	 */
> -	buff = alloc_skb(MAX_TCP_HEADER, sk_gfp_atomic(sk, GFP_ATOMIC));
> +	buff = alloc_skb(MAX_TCP_HEADER + 64, sk_gfp_atomic(sk, GFP_ATOMIC));
>  	if (buff == NULL) {
>  		inet_csk_schedule_ack(sk);
>  		inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN;
For most systems that extra padding should already be added since
alloc_skb will cache line align the buffer anyway.

A more general fix might be to make it so that alloc_skb cannot allocate
less than 60 byte buffers on systems with a cache line size smaller than
64 bytes.

Thanks,

Alex

^ permalink raw reply

* Re: [patch net] sky2: fix rx filter setup on link up
From: Jiri Pirko @ 2012-09-17 20:47 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: netdev, davem, mlindner, linux-kernel
In-Reply-To: <20120917091214.5cd9b0f5@nehalam.linuxnetplumber.net>

Mon, Sep 17, 2012 at 06:12:14PM CEST, shemminger@vyatta.com wrote:
>On Mon, 17 Sep 2012 17:10:17 +0200
>Jiri Pirko <jiri@resnulli.us> wrote:
>
>> In my case I have following problem. sky2_set_multicast() sets registers
>> GM_MC_ADDR_H[1-4] correctly to:
>> 0000 0800 0001 0410
>> However, when adapter gets link and sky2_link_up() is called, the values
>> are for some reason different:
>> 0000 0800 0016 0410
>
>Rather than papering over the problem, it would be better to
>trace back what is setting those registers and fix that code.

Yes, I did that. No code at sky2.[ch] is writing to this registers other
than sky2_set_multicast() and sky2_gmac_reset() (I hooked on sky2_write*()).
So I strongly believe this is a HW issue (maybe only issue of my revision
"Yukon-2 EC chip revision 2")

>
>> This in my case prevents iface to be able to receive packets with dst mac
>> 01:80:C2:00:00:02 (LACPDU dst mac), which I set up previously by
>> SIOCADDMULTI.
>> 
>> So remember computed rx_filter data and write it to GM_MC_ADDR_H[1-4] on
>> link_up.
>>
>
>Please do some more root cause analysis. Just save/restoring the
>registers is just a temporary workaround.

^ permalink raw reply

* [PATCH NEXT V2] rtlwifi: rtl8192c: rtl8192ce: Add support for B-CUT version of RTL8188CE
From: Larry Finger @ 2012-09-17 20:35 UTC (permalink / raw)
  To: linville; +Cc: linux-wireless, Larry Finger, netdev, Anisse Astier, Li Chaoming

Realtek devices with designation RTL8188CE-VL have the so-called B-cut
of the wireless chip. This patch adds the special programming needed by
these devices.

Signed-off-by: Larry Finger <Larry.Finger@lwfinger.net>
Cc: Anisse Astier <anisse@astier.eu>
Cc: Li Chaoming <chaoming_li@realsil.com.cn>
---
 drivers/net/wireless/rtlwifi/rtl8192c/phy_common.c |   21 +++++++
 drivers/net/wireless/rtlwifi/rtl8192ce/def.h       |    3 +
 drivers/net/wireless/rtlwifi/rtl8192ce/hw.c        |   61 ++++++++++++++++++--
 drivers/net/wireless/rtlwifi/rtl8192ce/phy.c       |    4 +-
 drivers/net/wireless/rtlwifi/rtl8192ce/sw.c        |    6 +-
 drivers/net/wireless/rtlwifi/rtl8192ce/trx.c       |    4 +-
 6 files changed, 87 insertions(+), 12 deletions(-)
---
V1 => V2	Remove extraneous white space.


John,

This patch is too invasive to backport to the stable kernels, thus it should
be applied to 3.7.

Thanks,

Larry
---

Index: wireless-testing-new/drivers/net/wireless/rtlwifi/rtl8192c/phy_common.c
===================================================================
--- wireless-testing-new.orig/drivers/net/wireless/rtlwifi/rtl8192c/phy_common.c
+++ wireless-testing-new/drivers/net/wireless/rtlwifi/rtl8192c/phy_common.c
@@ -724,6 +724,26 @@ u8 rtl92c_phy_sw_chnl(struct ieee80211_h
 }
 EXPORT_SYMBOL(rtl92c_phy_sw_chnl);
 
+static void _rtl92c_phy_sw_rf_setting(struct ieee80211_hw *hw, u8 channel)
+{
+	struct rtl_priv *rtlpriv = rtl_priv(hw);
+	struct rtl_phy *rtlphy = &(rtlpriv->phy);
+	struct rtl_hal *rtlhal = rtl_hal(rtl_priv(hw));
+
+	if (IS_81xxC_VENDOR_UMC_B_CUT(rtlhal->version)) {
+		if (channel == 6 && rtlphy->current_chan_bw ==
+		    HT_CHANNEL_WIDTH_20)
+			rtl_set_rfreg(hw, RF90_PATH_A, RF_RX_G1, MASKDWORD,
+				      0x00255);
+		else{
+			u32 backupRF0x1A = (u32)rtl_get_rfreg(hw, RF90_PATH_A,
+					    RF_RX_G1, RFREG_OFFSET_MASK);
+			rtl_set_rfreg(hw, RF90_PATH_A, RF_RX_G1, MASKDWORD,
+				      backupRF0x1A);
+		}
+	}
+}
+
 static bool _rtl92c_phy_set_sw_chnl_cmdarray(struct swchnlcmd *cmdtable,
 					     u32 cmdtableidx, u32 cmdtablesz,
 					     enum swchnlcmd_id cmdid,
@@ -837,6 +857,7 @@ bool _rtl92c_phy_sw_chnl_step_by_step(st
 					      currentcmd->para1,
 					      RFREG_OFFSET_MASK,
 					      rtlphy->rfreg_chnlval[rfpath]);
+			_rtl92c_phy_sw_rf_setting(hw, channel);
 			}
 			break;
 		default:
Index: wireless-testing-new/drivers/net/wireless/rtlwifi/rtl8192ce/def.h
===================================================================
--- wireless-testing-new.orig/drivers/net/wireless/rtlwifi/rtl8192ce/def.h
+++ wireless-testing-new/drivers/net/wireless/rtlwifi/rtl8192ce/def.h
@@ -116,6 +116,9 @@
 	LE_BITS_TO_4BYTE(((__pcmdfbhdr) + 4), 20, 12)
 
 #define CHIP_VER_B			BIT(4)
+#define CHIP_BONDING_IDENTIFIER(_value) (((_value) >> 22) & 0x3)
+#define CHIP_BONDING_92C_1T2R		0x1
+#define RF_TYPE_1T2R			BIT(1)
 #define CHIP_92C_BITMASK		BIT(0)
 #define CHIP_UNKNOWN			BIT(7)
 #define CHIP_92C_1T2R			0x03
Index: wireless-testing-new/drivers/net/wireless/rtlwifi/rtl8192ce/hw.c
===================================================================
--- wireless-testing-new.orig/drivers/net/wireless/rtlwifi/rtl8192ce/hw.c
+++ wireless-testing-new/drivers/net/wireless/rtlwifi/rtl8192ce/hw.c
@@ -896,7 +896,6 @@ int rtl92ce_hw_init(struct ieee80211_hw
 	struct rtl_phy *rtlphy = &(rtlpriv->phy);
 	struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw));
 	struct rtl_ps_ctl *ppsc = rtl_psc(rtl_priv(hw));
-	static bool iqk_initialized; /* initialized to false */
 	bool rtstatus = true;
 	bool is92c;
 	int err;
@@ -921,9 +920,28 @@ int rtl92ce_hw_init(struct ieee80211_hw
 
 	rtlhal->last_hmeboxnum = 0;
 	rtl92c_phy_mac_config(hw);
+	/* because last function modify RCR, so we update
+	 * rcr var here, or TP will unstable for receive_config
+	 * is wrong, RX RCR_ACRC32 will cause TP unstabel & Rx
+	 * RCR_APP_ICV will cause mac80211 unassoc for cisco 1252*/
+	rtlpci->receive_config = rtl_read_dword(rtlpriv, REG_RCR);
+	rtlpci->receive_config &= ~(RCR_ACRC32 | RCR_AICV);
+	rtl_write_dword(rtlpriv, REG_RCR, rtlpci->receive_config);
 	rtl92c_phy_bb_config(hw);
 	rtlphy->rf_mode = RF_OP_BY_SW_3WIRE;
 	rtl92c_phy_rf_config(hw);
+	if (IS_VENDOR_UMC_A_CUT(rtlhal->version) &&
+	    !IS_92C_SERIAL(rtlhal->version)) {
+		rtl_set_rfreg(hw, RF90_PATH_A, RF_RX_G1, MASKDWORD, 0x30255);
+		rtl_set_rfreg(hw, RF90_PATH_A, RF_RX_G2, MASKDWORD, 0x50a00);
+	} else if (IS_81xxC_VENDOR_UMC_B_CUT(rtlhal->version)) {
+		rtl_set_rfreg(hw, RF90_PATH_A, 0x0C, MASKDWORD, 0x894AE);
+		rtl_set_rfreg(hw, RF90_PATH_A, 0x0A, MASKDWORD, 0x1AF31);
+		rtl_set_rfreg(hw, RF90_PATH_A, RF_IPA, MASKDWORD, 0x8F425);
+		rtl_set_rfreg(hw, RF90_PATH_A, RF_SYN_G2, MASKDWORD, 0x4F200);
+		rtl_set_rfreg(hw, RF90_PATH_A, RF_RCK1, MASKDWORD, 0x44053);
+		rtl_set_rfreg(hw, RF90_PATH_A, RF_RCK2, MASKDWORD, 0x80201);
+	}
 	rtlphy->rfreg_chnlval[0] = rtl_get_rfreg(hw, (enum radio_path)0,
 						 RF_CHNLBW, RFREG_OFFSET_MASK);
 	rtlphy->rfreg_chnlval[1] = rtl_get_rfreg(hw, (enum radio_path)1,
@@ -945,11 +963,11 @@ int rtl92ce_hw_init(struct ieee80211_hw
 
 	if (ppsc->rfpwr_state == ERFON) {
 		rtl92c_phy_set_rfpath_switch(hw, 1);
-		if (iqk_initialized) {
+		if (rtlphy->iqk_initialized) {
 			rtl92c_phy_iq_calibrate(hw, true);
 		} else {
 			rtl92c_phy_iq_calibrate(hw, false);
-			iqk_initialized = true;
+			rtlphy->iqk_initialized = true;
 		}
 
 		rtl92c_dm_check_txpower_tracking(hw);
@@ -1004,6 +1022,13 @@ static enum version_8192c _rtl92ce_read_
 				   ? CHIP_VENDOR_UMC_B_CUT : CHIP_UNKNOWN) |
 				   CHIP_VENDOR_UMC));
 		}
+		if (IS_92C_SERIAL(version)) {
+			value32 = rtl_read_dword(rtlpriv, REG_HPON_FSM);
+			version = (enum version_8192c)(version |
+				   ((CHIP_BONDING_IDENTIFIER(value32)
+				   == CHIP_BONDING_92C_1T2R) ?
+				   RF_TYPE_1T2R : 0));
+		}
 	}
 
 	switch (version) {
@@ -1019,12 +1044,30 @@ static enum version_8192c _rtl92ce_read_
 	case VERSION_A_CHIP_88C:
 		versionid = "A_CHIP_88C";
 		break;
+	case VERSION_NORMAL_UMC_CHIP_92C_1T2R_A_CUT:
+		versionid = "A_CUT_92C_1T2R";
+		break;
+	case VERSION_NORMAL_UMC_CHIP_92C_A_CUT:
+		versionid = "A_CUT_92C";
+		break;
+	case VERSION_NORMAL_UMC_CHIP_88C_A_CUT:
+		versionid = "A_CUT_88C";
+		break;
+	case VERSION_NORMAL_UMC_CHIP_92C_1T2R_B_CUT:
+		versionid = "B_CUT_92C_1T2R";
+		break;
+	case VERSION_NORMAL_UMC_CHIP_92C_B_CUT:
+		versionid = "B_CUT_92C";
+		break;
+	case VERSION_NORMAL_UMC_CHIP_88C_B_CUT:
+		versionid = "B_CUT_88C";
+		break;
 	default:
 		versionid = "Unknown. Bug?";
 		break;
 	}
 
-	RT_TRACE(rtlpriv, COMP_INIT, DBG_TRACE,
+	RT_TRACE(rtlpriv, COMP_INIT, DBG_EMERG,
 		 "Chip Version ID: %s\n", versionid);
 
 	switch (version & 0x3) {
@@ -1197,6 +1240,7 @@ static void _rtl92ce_poweroff_adapter(st
 {
 	struct rtl_priv *rtlpriv = rtl_priv(hw);
 	struct rtl_pci_priv *rtlpcipriv = rtl_pcipriv(hw);
+	struct rtl_hal *rtlhal = rtl_hal(rtlpriv);
 	u8 u1b_tmp;
 	u32 u4b_tmp;
 
@@ -1225,7 +1269,8 @@ static void _rtl92ce_poweroff_adapter(st
 	rtl_write_word(rtlpriv, REG_GPIO_IO_SEL, 0x0790);
 	rtl_write_word(rtlpriv, REG_LEDCFG0, 0x8080);
 	rtl_write_byte(rtlpriv, REG_AFE_PLL_CTRL, 0x80);
-	rtl_write_byte(rtlpriv, REG_SPS0_CTRL, 0x23);
+	if (!IS_81xxC_VENDOR_UMC_B_CUT(rtlhal->version))
+		rtl_write_byte(rtlpriv, REG_SPS0_CTRL, 0x23);
 	if (rtlpcipriv->bt_coexist.bt_coexistence) {
 		u4b_tmp = rtl_read_dword(rtlpriv, REG_AFE_XTAL_CTRL);
 		u4b_tmp |= 0x03824800;
@@ -1254,6 +1299,9 @@ void rtl92ce_card_disable(struct ieee802
 		rtlpriv->cfg->ops->led_control(hw, LED_CTL_POWER_OFF);
 	RT_SET_PS_LEVEL(ppsc, RT_RF_OFF_LEVL_HALT_NIC);
 	_rtl92ce_poweroff_adapter(hw);
+
+	/* after power off we should do iqk again */
+	rtlpriv->phy.iqk_initialized = false;
 }
 
 void rtl92ce_interrupt_recognized(struct ieee80211_hw *hw,
@@ -1355,9 +1403,9 @@ static void _rtl92ce_read_txpower_info_f
 			tempval = hwinfo[EEPROM_TXPOWERHT40_2SDIFF + i];
 		else
 			tempval = EEPROM_DEFAULT_HT40_2SDIFF;
-		rtlefuse->eeprom_chnlarea_txpwr_ht40_2sdiif[RF90_PATH_A][i] =
+		rtlefuse->eprom_chnl_txpwr_ht40_2sdf[RF90_PATH_A][i] =
 		    (tempval & 0xf);
-		rtlefuse->eeprom_chnlarea_txpwr_ht40_2sdiif[RF90_PATH_B][i] =
+		rtlefuse->eprom_chnl_txpwr_ht40_2sdf[RF90_PATH_B][i] =
 		    ((tempval & 0xf0) >> 4);
 	}
 
@@ -1381,7 +1429,7 @@ static void _rtl92ce_read_txpower_info_f
 				"RF(%d) EEPROM HT40 2S Diff Area(%d) = 0x%x\n",
 				rf_path, i,
 				rtlefuse->
-				eeprom_chnlarea_txpwr_ht40_2sdiif[rf_path][i]);
+				eprom_chnl_txpwr_ht40_2sdf[rf_path][i]);
 
 	for (rf_path = 0; rf_path < 2; rf_path++) {
 		for (i = 0; i < 14; i++) {
@@ -1396,14 +1444,14 @@ static void _rtl92ce_read_txpower_info_f
 			if ((rtlefuse->
 			     eeprom_chnlarea_txpwr_ht40_1s[rf_path][index] -
 			     rtlefuse->
-			     eeprom_chnlarea_txpwr_ht40_2sdiif[rf_path][index])
+			     eprom_chnl_txpwr_ht40_2sdf[rf_path][index])
 			    > 0) {
 				rtlefuse->txpwrlevel_ht40_2s[rf_path][i] =
 				    rtlefuse->
 				    eeprom_chnlarea_txpwr_ht40_1s[rf_path]
 				    [index] -
 				    rtlefuse->
-				    eeprom_chnlarea_txpwr_ht40_2sdiif[rf_path]
+				    eprom_chnl_txpwr_ht40_2sdf[rf_path]
 				    [index];
 			} else {
 				rtlefuse->txpwrlevel_ht40_2s[rf_path][i] = 0;
@@ -1912,6 +1960,8 @@ static void rtl92ce_update_hal_rate_mask
 			ratr_bitmap &= 0x0f0ff0ff;
 		break;
 	}
+	sta_entry->ratr_index = ratr_index;
+
 	RT_TRACE(rtlpriv, COMP_RATR, DBG_DMESG,
 		 "ratr_bitmap :%x\n", ratr_bitmap);
 	*(u32 *)&rate_mask = (ratr_bitmap & 0x0fffffff) |
Index: wireless-testing-new/drivers/net/wireless/rtlwifi/rtl8192ce/phy.c
===================================================================
--- wireless-testing-new.orig/drivers/net/wireless/rtlwifi/rtl8192ce/phy.c
+++ wireless-testing-new/drivers/net/wireless/rtlwifi/rtl8192ce/phy.c
@@ -82,6 +82,8 @@ bool rtl92c_phy_mac_config(struct ieee80
 
 	if (is92c)
 		rtl_write_byte(rtlpriv, 0x14, 0x71);
+	else
+		rtl_write_byte(rtlpriv, 0x04CA, 0x0A);
 	return rtstatus;
 }
 
Index: wireless-testing-new/drivers/net/wireless/rtlwifi/rtl8192ce/sw.c
===================================================================
--- wireless-testing-new.orig/drivers/net/wireless/rtlwifi/rtl8192ce/sw.c
+++ wireless-testing-new/drivers/net/wireless/rtlwifi/rtl8192ce/sw.c
@@ -162,12 +162,10 @@ int rtl92c_init_sw_vars(struct ieee80211
 
 	/* request fw */
 	if (IS_VENDOR_UMC_A_CUT(rtlhal->version) &&
-	    !IS_92C_SERIAL(rtlhal->version)) {
+	    !IS_92C_SERIAL(rtlhal->version))
 		rtlpriv->cfg->fw_name = "rtlwifi/rtl8192cfwU.bin";
-	} else if (IS_81xxC_VENDOR_UMC_B_CUT(rtlhal->version)) {
+	else if (IS_81xxC_VENDOR_UMC_B_CUT(rtlhal->version))
 		rtlpriv->cfg->fw_name = "rtlwifi/rtl8192cfwU_B.bin";
-		pr_info("****** This B_CUT device may not work with kernels 3.6 and earlier\n");
-	}
 
 	rtlpriv->max_fw_size = 0x4000;
 	pr_info("Using firmware %s\n", rtlpriv->cfg->fw_name);
Index: wireless-testing-new/drivers/net/wireless/rtlwifi/rtl8192ce/trx.c
===================================================================
--- wireless-testing-new.orig/drivers/net/wireless/rtlwifi/rtl8192ce/trx.c
+++ wireless-testing-new/drivers/net/wireless/rtlwifi/rtl8192ce/trx.c
@@ -127,11 +127,11 @@ static void _rtl92ce_query_rxphystatus(s
 {
 	struct rtl_priv *rtlpriv = rtl_priv(hw);
 	struct phy_sts_cck_8192s_t *cck_buf;
+	struct rtl_ps_ctl *ppsc = rtl_psc(rtlpriv);
 	s8 rx_pwr_all = 0, rx_pwr[4];
 	u8 evm, pwdb_all, rf_rx_num = 0;
 	u8 i, max_spatial_stream;
 	u32 rssi, total_rssi = 0;
-	bool in_powersavemode = false;
 	bool is_cck_rate;
 
 	is_cck_rate = RX_HAL_IS_CCK_RATE(pdesc);
@@ -147,7 +147,7 @@ static void _rtl92ce_query_rxphystatus(s
 		u8 report, cck_highpwr;
 		cck_buf = (struct phy_sts_cck_8192s_t *)p_drvinfo;
 
-		if (!in_powersavemode)
+		if (ppsc->rfpwr_state == ERFON)
 			cck_highpwr = (u8) rtl_get_bbreg(hw,
 						 RFPGA0_XA_HSSIPARAMETER2,
 						 BIT(9));

^ permalink raw reply

* Re: [PATCH v3] net-tcp: TCP/IP stack bypass for loopback connections
From: Eric Dumazet @ 2012-09-17 20:20 UTC (permalink / raw)
  To: Bruce "Brutus" Curtis; +Cc: David S. Miller, Eric Dumazet, netdev
In-Reply-To: <1347908305-13541-1-git-send-email-brutus@google.com>

On Mon, 2012-09-17 at 11:58 -0700, Bruce "Brutus" Curtis wrote:
> From: "Bruce \"Brutus\" Curtis" <brutus@google.com>
> 
> TCP/IP loopback socket pair stack bypass, based on an idea by, and
> rough upstream patch from, David Miller <davem@davemloft.net> called
> "friends", the data structure modifcations and connection scheme are
> reused with extensive data-path changes.

...

>  
> +		if (skb->friend) {
> +			/*
> +			 * If friends haven't been made yet, our sk_friend
> +			 * still == NULL, then update with the ACK's friend
> +			 * value (the listen()er's sock addr) which is used
> +			 * as a place holder.
> +			 */
> +			cmpxchg(&sk->sk_friend, NULL, skb->friend);
> +		}


There is a fundamental issue with this patch

Setting skb->friend to a socket structure, without holding a reference
on it is going to add subtle races and bugs.

In this code, we have no guarantee the socket pointed by skb->friend was
eventually freed and/or reused.

But adding references might be overkill, as we need to unref them in
some places, in hot path.

^ permalink raw reply

* Re: [PATCH] xfrm_user: return error pointer instead of NULL
From: Mathias Krause @ 2012-09-17 20:15 UTC (permalink / raw)
  To: Steffen Klassert; +Cc: David S. Miller, netdev, linux-kernel, stable
In-Reply-To: <20120917071642.GC13023@secunet.com>

On Mon, Sep 17, 2012 at 9:16 AM, Steffen Klassert
<steffen.klassert@secunet.com> wrote:
> On Thu, Sep 13, 2012 at 11:41:26PM +0200, Mathias Krause wrote:
>> When dump_one_state() returns an error, e.g. because of a too small
>> buffer to dump the whole xfrm state, xfrm_state_netlink() returns NULL
>> instead of an error pointer. But its callers expect an error pointer
>> and therefore continue to operate on a NULL skbuff.
>>
>> This could lead to a privilege escalation (execution of user code in
>> kernel context) if the attacker has CAP_NET_ADMIN and is able to map
>> address 0.
>
> Or it simply crashes with a NULL pointer dereference.

..while holding the xfrm_cfg_mutex, therefore effectively disabling
the XFRM netlink interface. So it's at least a DOS in that case ;)

Regards,
Mathias

^ permalink raw reply

* [PATCH net-next] netdev: make address const in device address management
From: Stephen Hemminger @ 2012-09-17 20:03 UTC (permalink / raw)
  To: David Miller, Jeff Kirsher, John Fastabend; +Cc: netdev

The internal functions for add/deleting addresses don't change
their argument.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>

---
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |    4 +-
 drivers/net/macvlan.c                         |    4 +-
 include/linux/netdevice.h                     |   28 +++++++++---------
 net/bridge/br_fdb.c                           |    6 +--
 net/bridge/br_private.h                       |    4 +-
 net/core/dev_addr_lists.c                     |   40 +++++++++++++-------------
 6 files changed, 44 insertions(+), 42 deletions(-)

--- a/drivers/net/macvlan.c	2012-09-17 11:02:16.230810156 -0700
+++ b/drivers/net/macvlan.c	2012-09-17 11:24:34.577369142 -0700
@@ -548,7 +548,7 @@ static int macvlan_vlan_rx_kill_vid(stru
 
 static int macvlan_fdb_add(struct ndmsg *ndm,
 			   struct net_device *dev,
-			   unsigned char *addr,
+			   const unsigned char *addr,
 			   u16 flags)
 {
 	struct macvlan_dev *vlan = netdev_priv(dev);
@@ -567,7 +567,7 @@ static int macvlan_fdb_add(struct ndmsg
 
 static int macvlan_fdb_del(struct ndmsg *ndm,
 			   struct net_device *dev,
-			   unsigned char *addr)
+			   const unsigned char *addr)
 {
 	struct macvlan_dev *vlan = netdev_priv(dev);
 	int err = -EINVAL;
--- a/include/linux/netdevice.h	2012-09-17 11:02:16.230810156 -0700
+++ b/include/linux/netdevice.h	2012-09-17 11:24:34.577369142 -0700
@@ -907,10 +907,10 @@ struct netdev_fcoe_hbainfo {
  *	Must return >0 or -errno if it changed dev->features itself.
  *
  * int (*ndo_fdb_add)(struct ndmsg *ndm, struct net_device *dev,
- *		      unsigned char *addr, u16 flags)
+ *		      const unsigned char *addr, u16 flags)
  *	Adds an FDB entry to dev for addr.
  * int (*ndo_fdb_del)(struct ndmsg *ndm, struct net_device *dev,
- *		      unsigned char *addr)
+ *		      const unsigned char *addr)
  *	Deletes the FDB entry from dev coresponding to addr.
  * int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb,
  *		       struct net_device *dev, int idx)
@@ -1017,11 +1017,11 @@ struct net_device_ops {
 
 	int			(*ndo_fdb_add)(struct ndmsg *ndm,
 					       struct net_device *dev,
-					       unsigned char *addr,
+					       const unsigned char *addr,
 					       u16 flags);
 	int			(*ndo_fdb_del)(struct ndmsg *ndm,
 					       struct net_device *dev,
-					       unsigned char *addr);
+					       const unsigned char *addr);
 	int			(*ndo_fdb_dump)(struct sk_buff *skb,
 						struct netlink_callback *cb,
 						struct net_device *dev,
@@ -2561,9 +2561,9 @@ extern void __hw_addr_flush(struct netde
 extern void __hw_addr_init(struct netdev_hw_addr_list *list);
 
 /* Functions used for device addresses handling */
-extern int dev_addr_add(struct net_device *dev, unsigned char *addr,
+extern int dev_addr_add(struct net_device *dev, const unsigned char *addr,
 			unsigned char addr_type);
-extern int dev_addr_del(struct net_device *dev, unsigned char *addr,
+extern int dev_addr_del(struct net_device *dev, const unsigned char *addr,
 			unsigned char addr_type);
 extern int dev_addr_add_multiple(struct net_device *to_dev,
 				 struct net_device *from_dev,
@@ -2575,20 +2575,20 @@ extern void dev_addr_flush(struct net_de
 extern int dev_addr_init(struct net_device *dev);
 
 /* Functions used for unicast addresses handling */
-extern int dev_uc_add(struct net_device *dev, unsigned char *addr);
-extern int dev_uc_add_excl(struct net_device *dev, unsigned char *addr);
-extern int dev_uc_del(struct net_device *dev, unsigned char *addr);
+extern int dev_uc_add(struct net_device *dev, const unsigned char *addr);
+extern int dev_uc_add_excl(struct net_device *dev, const unsigned char *addr);
+extern int dev_uc_del(struct net_device *dev, const unsigned char *addr);
 extern int dev_uc_sync(struct net_device *to, struct net_device *from);
 extern void dev_uc_unsync(struct net_device *to, struct net_device *from);
 extern void dev_uc_flush(struct net_device *dev);
 extern void dev_uc_init(struct net_device *dev);
 
 /* Functions used for multicast addresses handling */
-extern int dev_mc_add(struct net_device *dev, unsigned char *addr);
-extern int dev_mc_add_global(struct net_device *dev, unsigned char *addr);
-extern int dev_mc_add_excl(struct net_device *dev, unsigned char *addr);
-extern int dev_mc_del(struct net_device *dev, unsigned char *addr);
-extern int dev_mc_del_global(struct net_device *dev, unsigned char *addr);
+extern int dev_mc_add(struct net_device *dev, const unsigned char *addr);
+extern int dev_mc_add_global(struct net_device *dev, const unsigned char *addr);
+extern int dev_mc_add_excl(struct net_device *dev, const unsigned char *addr);
+extern int dev_mc_del(struct net_device *dev, const unsigned char *addr);
+extern int dev_mc_del_global(struct net_device *dev, const unsigned char *addr);
 extern int dev_mc_sync(struct net_device *to, struct net_device *from);
 extern void dev_mc_unsync(struct net_device *to, struct net_device *from);
 extern void dev_mc_flush(struct net_device *dev);
--- a/net/bridge/br_fdb.c	2012-09-17 11:02:16.230810156 -0700
+++ b/net/bridge/br_fdb.c	2012-09-17 11:24:34.577369142 -0700
@@ -609,7 +609,7 @@ static int fdb_add_entry(struct net_brid
 
 /* Add new permanent fdb entry with RTM_NEWNEIGH */
 int br_fdb_add(struct ndmsg *ndm, struct net_device *dev,
-	       unsigned char *addr, u16 nlh_flags)
+	       const unsigned char *addr, u16 nlh_flags)
 {
 	struct net_bridge_port *p;
 	int err = 0;
@@ -639,7 +639,7 @@ int br_fdb_add(struct ndmsg *ndm, struct
 	return err;
 }
 
-static int fdb_delete_by_addr(struct net_bridge_port *p, u8 *addr)
+static int fdb_delete_by_addr(struct net_bridge_port *p, const u8 *addr)
 {
 	struct net_bridge *br = p->br;
 	struct hlist_head *head = &br->hash[br_mac_hash(addr)];
@@ -655,7 +655,7 @@ static int fdb_delete_by_addr(struct net
 
 /* Remove neighbor entry with RTM_DELNEIGH */
 int br_fdb_delete(struct ndmsg *ndm, struct net_device *dev,
-		  unsigned char *addr)
+		  const unsigned char *addr)
 {
 	struct net_bridge_port *p;
 	int err;
--- a/net/bridge/br_private.h	2012-09-17 11:02:16.230810156 -0700
+++ b/net/bridge/br_private.h	2012-09-17 11:24:34.577369142 -0700
@@ -363,10 +363,10 @@ extern void br_fdb_update(struct net_bri
 
 extern int br_fdb_delete(struct ndmsg *ndm,
 			 struct net_device *dev,
-			 unsigned char *addr);
+			 const unsigned char *addr);
 extern int br_fdb_add(struct ndmsg *nlh,
 		      struct net_device *dev,
-		      unsigned char *addr,
+		      const unsigned char *addr,
 		      u16 nlh_flags);
 extern int br_fdb_dump(struct sk_buff *skb,
 		       struct netlink_callback *cb,
--- a/net/core/dev_addr_lists.c	2012-08-15 08:59:22.934704464 -0700
+++ b/net/core/dev_addr_lists.c	2012-09-17 11:21:55.302968736 -0700
@@ -22,7 +22,7 @@
  */
 
 static int __hw_addr_create_ex(struct netdev_hw_addr_list *list,
-			       unsigned char *addr, int addr_len,
+			       const unsigned char *addr, int addr_len,
 			       unsigned char addr_type, bool global)
 {
 	struct netdev_hw_addr *ha;
@@ -46,7 +46,7 @@ static int __hw_addr_create_ex(struct ne
 }
 
 static int __hw_addr_add_ex(struct netdev_hw_addr_list *list,
-			    unsigned char *addr, int addr_len,
+			    const unsigned char *addr, int addr_len,
 			    unsigned char addr_type, bool global)
 {
 	struct netdev_hw_addr *ha;
@@ -72,14 +72,15 @@ static int __hw_addr_add_ex(struct netde
 	return __hw_addr_create_ex(list, addr, addr_len, addr_type, global);
 }
 
-static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr,
-			 int addr_len, unsigned char addr_type)
+static int __hw_addr_add(struct netdev_hw_addr_list *list,
+			 const unsigned char *addr, int addr_len,
+			 unsigned char addr_type)
 {
 	return __hw_addr_add_ex(list, addr, addr_len, addr_type, false);
 }
 
 static int __hw_addr_del_ex(struct netdev_hw_addr_list *list,
-			    unsigned char *addr, int addr_len,
+			    const unsigned char *addr, int addr_len,
 			    unsigned char addr_type, bool global)
 {
 	struct netdev_hw_addr *ha;
@@ -104,8 +105,9 @@ static int __hw_addr_del_ex(struct netde
 	return -ENOENT;
 }
 
-static int __hw_addr_del(struct netdev_hw_addr_list *list, unsigned char *addr,
-			 int addr_len, unsigned char addr_type)
+static int __hw_addr_del(struct netdev_hw_addr_list *list,
+			 const unsigned char *addr, int addr_len,
+			 unsigned char addr_type)
 {
 	return __hw_addr_del_ex(list, addr, addr_len, addr_type, false);
 }
@@ -278,7 +280,7 @@ EXPORT_SYMBOL(dev_addr_init);
  *
  *	The caller must hold the rtnl_mutex.
  */
-int dev_addr_add(struct net_device *dev, unsigned char *addr,
+int dev_addr_add(struct net_device *dev, const unsigned char *addr,
 		 unsigned char addr_type)
 {
 	int err;
@@ -303,7 +305,7 @@ EXPORT_SYMBOL(dev_addr_add);
  *
  *	The caller must hold the rtnl_mutex.
  */
-int dev_addr_del(struct net_device *dev, unsigned char *addr,
+int dev_addr_del(struct net_device *dev, const unsigned char *addr,
 		 unsigned char addr_type)
 {
 	int err;
@@ -390,7 +392,7 @@ EXPORT_SYMBOL(dev_addr_del_multiple);
  *	@dev: device
  *	@addr: address to add
  */
-int dev_uc_add_excl(struct net_device *dev, unsigned char *addr)
+int dev_uc_add_excl(struct net_device *dev, const unsigned char *addr)
 {
 	struct netdev_hw_addr *ha;
 	int err;
@@ -421,7 +423,7 @@ EXPORT_SYMBOL(dev_uc_add_excl);
  *	Add a secondary unicast address to the device or increase
  *	the reference count if it already exists.
  */
-int dev_uc_add(struct net_device *dev, unsigned char *addr)
+int dev_uc_add(struct net_device *dev, const unsigned char *addr)
 {
 	int err;
 
@@ -443,7 +445,7 @@ EXPORT_SYMBOL(dev_uc_add);
  *	Release reference to a secondary unicast address and remove it
  *	from the device if the reference count drops to zero.
  */
-int dev_uc_del(struct net_device *dev, unsigned char *addr)
+int dev_uc_del(struct net_device *dev, const unsigned char *addr)
 {
 	int err;
 
@@ -543,7 +545,7 @@ EXPORT_SYMBOL(dev_uc_init);
  *	@dev: device
  *	@addr: address to add
  */
-int dev_mc_add_excl(struct net_device *dev, unsigned char *addr)
+int dev_mc_add_excl(struct net_device *dev, const unsigned char *addr)
 {
 	struct netdev_hw_addr *ha;
 	int err;
@@ -566,7 +568,7 @@ out:
 }
 EXPORT_SYMBOL(dev_mc_add_excl);
 
-static int __dev_mc_add(struct net_device *dev, unsigned char *addr,
+static int __dev_mc_add(struct net_device *dev, const unsigned char *addr,
 			bool global)
 {
 	int err;
@@ -587,7 +589,7 @@ static int __dev_mc_add(struct net_devic
  *	Add a multicast address to the device or increase
  *	the reference count if it already exists.
  */
-int dev_mc_add(struct net_device *dev, unsigned char *addr)
+int dev_mc_add(struct net_device *dev, const unsigned char *addr)
 {
 	return __dev_mc_add(dev, addr, false);
 }
@@ -600,13 +602,13 @@ EXPORT_SYMBOL(dev_mc_add);
  *
  *	Add a global multicast address to the device.
  */
-int dev_mc_add_global(struct net_device *dev, unsigned char *addr)
+int dev_mc_add_global(struct net_device *dev, const unsigned char *addr)
 {
 	return __dev_mc_add(dev, addr, true);
 }
 EXPORT_SYMBOL(dev_mc_add_global);
 
-static int __dev_mc_del(struct net_device *dev, unsigned char *addr,
+static int __dev_mc_del(struct net_device *dev, const unsigned char *addr,
 			bool global)
 {
 	int err;
@@ -628,7 +630,7 @@ static int __dev_mc_del(struct net_devic
  *	Release reference to a multicast address and remove it
  *	from the device if the reference count drops to zero.
  */
-int dev_mc_del(struct net_device *dev, unsigned char *addr)
+int dev_mc_del(struct net_device *dev, const unsigned char *addr)
 {
 	return __dev_mc_del(dev, addr, false);
 }
@@ -642,7 +644,7 @@ EXPORT_SYMBOL(dev_mc_del);
  *	Release reference to a multicast address and remove it
  *	from the device if the reference count drops to zero.
  */
-int dev_mc_del_global(struct net_device *dev, unsigned char *addr)
+int dev_mc_del_global(struct net_device *dev, const unsigned char *addr)
 {
 	return __dev_mc_del(dev, addr, true);
 }
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c	2012-09-17 11:02:16.230810156 -0700
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c	2012-09-17 11:25:15.112962043 -0700
@@ -6890,7 +6890,7 @@ static int ixgbe_set_features(struct net
 
 static int ixgbe_ndo_fdb_add(struct ndmsg *ndm,
 			     struct net_device *dev,
-			     unsigned char *addr,
+			     const unsigned char *addr,
 			     u16 flags)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(dev);
@@ -6927,7 +6927,7 @@ static int ixgbe_ndo_fdb_add(struct ndms
 
 static int ixgbe_ndo_fdb_del(struct ndmsg *ndm,
 			     struct net_device *dev,
-			     unsigned char *addr)
+			     const unsigned char *addr)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(dev);
 	int err = -EOPNOTSUPP;

^ permalink raw reply

* Re: D-Link DGE-530T and r8169.c
From: James R. Hay @ 2012-09-17 20:01 UTC (permalink / raw)
  To: Francois Romieu; +Cc: nic_swsd, netdev
In-Reply-To: <20120917174706.GA24838@electric-eye.fr.zoreil.com>


Thank you very much.  Evidently I'm a bit further behind on the kernel 
version than I thought.

Jim.

On Mon, 17 Sep 2012, Francois Romieu wrote:

--(snip)--

>>
>> 	{ PCI_DEVICE(PCI_VENDOR_ID_DLINK,	0x4302), 0, 0, RTL_CFG_0 },
>
> The ID above was added by 93a3aa25933461d76141179fc94aa32d5f9d954a between
> v3.0 and v3.1. See:
>
> http://git.kernel.org/?p=linux/kernel/git/torvalds/linux.git;a=commitdiff;h=93a3aa25933461d76141179fc94aa32d5f9d954a
>
> -- 
> Ueimor
>

James R. Hay				jrhay@HayA.QC.CA
Hay-Net Networks			http://www.hay-net.net
P.O. Box 46051
Pointe Claire, QC
H9R 5R4

^ permalink raw reply

* Re: [PATCH net-next v3 0/4] Take care of xfrm policy when checking dst entries
From: David Miller @ 2012-09-17 19:54 UTC (permalink / raw)
  To: nicolas.dichtel
  Cc: vyasevich, eric.dumazet, sds, james.l.morris, eparis, sri,
	linux-sctp, netdev
In-Reply-To: <50577F74.9040200@6wind.com>

From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Mon, 17 Sep 2012 21:52:20 +0200

> Le 17/09/2012 20:25, David Miller a écrit :
>> From: Vlad Yasevich <vyasevich@gmail.com>
>> Date: Mon, 17 Sep 2012 14:14:35 -0400
>>
>>> I think he expected you to take Eric's patch that removed those
>>> pieces.
>>
>> Eric's patch was a cleanup, so it went into 'net-next'.
>>
>> Nicolas's patch is a bonafide bug fix so needs to be
>> targetted at 'net'
> My fault, it was 'net-next'. I will rebase them against 'net'.
> 
> Should I post the last one (4/4) separately, against net-next? Because
> without the '3/4', the cleanup is a bit larger and will cause a
> conflict during the merge.

Actually Nicolas, hold off on this for a bit.

I'm reconsidering putting Eric's patch into 'net' and that would
allow me to use your v3 patches as-is.

Thanks.

^ permalink raw reply

* Re: [PATCH net-next v3 0/4] Take care of xfrm policy when checking dst entries
From: Nicolas Dichtel @ 2012-09-17 19:52 UTC (permalink / raw)
  To: David Miller
  Cc: vyasevich, eric.dumazet, sds, james.l.morris, eparis, sri,
	linux-sctp, netdev
In-Reply-To: <20120917.142504.115219640757684297.davem@davemloft.net>

Le 17/09/2012 20:25, David Miller a écrit :
> From: Vlad Yasevich <vyasevich@gmail.com>
> Date: Mon, 17 Sep 2012 14:14:35 -0400
>
>> I think he expected you to take Eric's patch that removed those
>> pieces.
>
> Eric's patch was a cleanup, so it went into 'net-next'.
>
> Nicolas's patch is a bonafide bug fix so needs to be
> targetted at 'net'
My fault, it was 'net-next'. I will rebase them against 'net'.

Should I post the last one (4/4) separately, against net-next? Because without 
the '3/4', the cleanup is a bit larger and will cause a conflict during the merge.

^ permalink raw reply

* Re: [net] e1000: Small packets may get corrupted during padding by HW
From: Alexander Duyck @ 2012-09-17 19:40 UTC (permalink / raw)
  To: Michał Mirosław
  Cc: Jeff Kirsher, davem, Tushar Dave, netdev, gospo, sassmann
In-Reply-To: <CAHXqBFJZnGUSUutN=L-_LpCN7dXLPFY0ndJnQJ10NCcbzPVLww@mail.gmail.com>

On 09/15/2012 01:44 PM, Michał Mirosław wrote:
> 2012/9/15 Jeff Kirsher <jeffrey.t.kirsher@intel.com>:
>> From: Tushar Dave <tushar.n.dave@intel.com>
>>
>> On PCI/PCI-X HW, if packet size is less than ETH_ZLEN,
>> packets may get corrupted during padding by HW.
>> To WA this issue, pad all small packets manually.
>>
>> Signed-off-by: Tushar Dave <tushar.n.dave@intel.com>
>> Tested-by: Aaron Brown <aaron.f.brown@intel.com>
>> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
>> ---
>>  drivers/net/ethernet/intel/e1000/e1000_main.c | 11 +++++++++++
>>  1 file changed, 11 insertions(+)
>>
>> diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c
>> index 3bfbb8d..bde337e 100644
>> --- a/drivers/net/ethernet/intel/e1000/e1000_main.c
>> +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c
>> @@ -3149,6 +3149,17 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb,
>>                 return NETDEV_TX_OK;
>>         }
>>
>> +       /* On PCI/PCI-X HW, if packet size is less than ETH_ZLEN,
>> +        * packets may get corrupted during padding by HW.
>> +        * To WA this issue, pad all small packets manually.
>> +        */
>> +       if (skb->len < ETH_ZLEN) {
>> +               if (skb_pad(skb, ETH_ZLEN - skb->len))
>> +                       return NETDEV_TX_OK;
>> +               skb->len = ETH_ZLEN;
>> +               skb_set_tail_pointer(skb, ETH_ZLEN);
>> +       }
>> +
> Isn't there a skb_padto() that does just this?
>
> Best Regards,
> Michał Mirosław
>
The problem is skb_padto() doesn't update the packet length, it just
adds the padding but doesn't do anything to account for it.

Thanks,

Alex

^ permalink raw reply

* [PATCH] asix: Support DLink DUB-E100 H/W Ver C1
From: Søren Holm @ 2012-09-17 19:23 UTC (permalink / raw)
  To: netdev; +Cc: Søren Holm, stable

Signed-off-by: Søren Holm <sgh@sgh.dk>
Cc: stable@vger.kernel.org
---
 drivers/net/usb/asix.c |    4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/usb/asix.c b/drivers/net/usb/asix.c
index 3ae80ec..12f372e 100644
--- a/drivers/net/usb/asix.c
+++ b/drivers/net/usb/asix.c
@@ -1604,6 +1604,10 @@ static const struct usb_device_id	products [] = {
 	USB_DEVICE (0x2001, 0x3c05),
 	.driver_info = (unsigned long) &ax88772_info,
 }, {
+	// DLink DUB-E100 H/W Ver C1
+	USB_DEVICE (0x2001, 0x1a02),
+	.driver_info = (unsigned long) &ax88772_info,
+}, {
 	// Linksys USB1000
 	USB_DEVICE (0x1737, 0x0039),
 	.driver_info = (unsigned long) &ax88178_info,
-- 
1.7.10.4

^ permalink raw reply related

* Re: [TCP]: functionality of delayed_ack in Bic and Cubic Algorithm ?
From: Stephen Hemminger @ 2012-09-17 19:29 UTC (permalink / raw)
  To: Yi Li; +Cc: netdev
In-Reply-To: <50568C3A.9060908@gmail.com>

On Mon, 17 Sep 2012 10:34:34 +0800
Yi Li <lovelylich@gmail.com> wrote:

> Hi All,
> I am try to understand the patch:
> http://patchwork.usersys.redhat.com/patch/43827/.
> But I am not sure of the functionality of delayed_ack filed in Bic and
> Cubic.
> I have found the following mails:
> http://oss.sgi.com/archives/netdev/2005-02/msg00808.html
> which is the first patch introducing the *delayed_ack* field.
> ( But I am not fully understand that material, That's why I have asked
> here.)
> 
> So, here is my understanding of this field, and I am not sure whether it
> is right. :-(
> Question One:
> From comment in *struct bictcp*, delayed_ack is "the ratio of
> Packets/ACKs << 4"
> and it's updating is in function bictcp_acked():
> 
>     /* Track delayed acknowledgment ratio using sliding window
>     * ratio = (15*ratio + sample) / 16
>     */
>     static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us)
>     {
>     const struct inet_connection_sock *icsk = inet_csk(sk);
>     const struct tcp_sock *tp = tcp_sk(sk);
>     struct bictcp *ca = inet_csk_ca(sk);
>     u32 delay;
> 
>     if (icsk->icsk_ca_state == TCP_CA_Open) {
>     cnt -= ca->delayed_ack >> ACK_RATIO_SHIFT;
>     ca->delayed_ack += cnt;
>     }
> 
> After googling, I know ratio == delayed_ack >> ACK_RATIO_SHIFT. so here
> we are updating
> the Packets/Acks ratio, basing on the history of ratio (15/16) and the
> current ratio(1/16).
> The current ratio is cnt packets acked by the current acknowledgement,
> divided by the current
> count of acknowledgements(of course it is 1 ack packet). Right?
> 
> Question Two:
> And we update the ca->cnt in function bictcp_update():
> ca->cnt = (ca->cnt << ACK_RATIO_SHIFT) / ca->delayed_ack;
> if (ca->cnt == 0) /* cannot be zero */
> ca->cnt = 1;
> It means ca->cnt= ca->cnt * Acks/Packets. Suppose normal delayed ack,
> Acks/Packets should be 1/2.
> So, ca->cnt will be cut in half. As a result, snd_cwnd will increase one
> times more rapidly, and this is just a
> compensation for delayed ack. So, TCP will still work normally. Right?
>


In Cubic, congestion window is always increased if the ack is okay,
and the flow is limited by the congestion window.  If the receiver
is using delayed acknowledgement, the code wants to adapt to that
problem. MacOS was particularly bad and would only ack every four
packets, and without this change, the window would grow very slowly.

The delayed_ack is maintained as a scaled value (instead of floating point)
because floating point is slow and not easily used in the kernel.

The update is a sliding average is updated while processing a TCP
acknowledgement. 1/16 of the average is the value cnt (the number
of packets acked in this pass) and the rest (15/16) is the
last sliding average.  If the receiver was acking every other packet
the field ca->delayed_ack would converge to 32 = 2<<4.

The ca->cnt update is first estimated based on curves an
the tcp_friendliness limitation.  The ca->cnt is then adjusted using
delayed_ack. The idea is that congestion window should grow at
the same rate independent of the delayed ack strategy used by the
receiver.


Note: Only look at the CUBIC code , ignore BIC, it is deprecated and should not be used.
It remains in Linux kernel only for historical reference.

^ permalink raw reply

* [PATCH v3] net-tcp: TCP/IP stack bypass for loopback connections
From: Bruce "Brutus" Curtis @ 2012-09-17 18:58 UTC (permalink / raw)
  To: David S. Miller; +Cc: Eric Dumazet, netdev, Bruce "Brutus" Curtis

From: "Bruce \"Brutus\" Curtis" <brutus@google.com>

TCP/IP loopback socket pair stack bypass, based on an idea by, and
rough upstream patch from, David Miller <davem@davemloft.net> called
"friends", the data structure modifcations and connection scheme are
reused with extensive data-path changes.

A new sysctl, net.ipv4.tcp_friends, is added:
  0: disable friends and use the stock data path.
  1: enable friends and bypass the stack data path, the default.

Note, when friends is enabled any loopback interpose, e.g. tcpdump,
will only see the TCP/IP packets during connection establishment and
finish, all data bypasses the stack and instead is delivered to the
destination socket directly.

Testing done on a 4 socket 2.2GHz "Quad-Core AMD Opteron(tm) Processor
8354 CPU" based system, netperf results for a single connection show
increased TCP_STREAM throughput, increased TCP_RR and TCP_CRR transaction
rate for most message sizes vs baseline and comparable to AF_UNIX.

Significant increase (up to 4.88x) in aggregate throughput for multiple
netperf runs (STREAM 32KB I/O x N) is seen.

Some netperf results:

Default netperf: netperf
                 netperf -t STREAM_STREAM
                 netperf

         Baseline  AF_UNIX      Friends
         Mbits/S   Mbits/S      Mbits/S
           9319       669   7%   11798 127% 1764%

Note, for the default netperf runs AF_UNIX (STREAM_STREAM) is at a big
disadvantage as it's default socket buffer sizes and messages sizes
(derived from the socket buffer sizes) are small compared with TCP,
also for baseline TCP as no socket buffer sizes are set autosizing is
used, in the above baseline results the final netperf send buffer size
was 650k+ and the netserver recieve buffer was 2g+ while friends is
fixed at 16384 and 87380.

A second set of runs done with the same fixed socket buffers and message
sizes are done.

Orange-to-Orange netperf:
                 netperf -- -s 8192,43690 -S 8192,43690 -m 16384 -M 87380
                 netperf -t STREAM_STREAM -- -s 51882 -m 16384 -M 87380
                 netperf -- -s 8192,43690 -S 8192,43690 -m 16384 -M 87380

         Baseline  AF_UNIX      Friends
         Mbits/S   Mbits/S      Mbits/S
           4014      7717 192%   11716 292% 152%

All subsequent AF_UNIX (STREAM_STREAM, STREAM_RR) tests are done with
"-s 51882" such that the same total effective socket buffering is used
as for the TCP runs defaults (16384+87380/2).

STREAM 32KB I/O x N: netperf -l 100 -t TCP_STREAM -- -m 32K -M 32K
                     netperf -l 100 -t STREAM_STREAM -- -s 51882 -m 32K -M 32K
                     netperf -l 100 -t TCP_STREAM -- -m 32K -M 32K

          Baseline  AF_UNIX      Friends
   N  COC Mbits/S   Mbits/S      Mbits/S
   1   -    9510      7472  97%   11851 125% 159%
   2   -   18037     19602  86%   22942 127% 117%
  16   2   75132    324053 402%  351214 467% 108%
  32   4   66346    282396 332%  303547 458% 107%
 256  32   72427     81883 138%  294526 407% 360%
 512  64   79874     85047 116%  295999 371% 348%
1600 200  110380    148549 223%  426542 386% 287%

COC = Cpu Over Commit ratio (16 core platform)

STREAM: netperf -l 100 -t TCP_STREAM
        netperf -l 100 -t STREAM_STREAM -- -s 51882 -m 16384 -M 87380
        netperf -l 100 -t TCP_STREAM

netperf  Baseline  AF_UNIX      Friends
-m/-M N  Mbits/S   Mbits/S      Mbits/S
  64        930       415  45%     414  45% 100%
  1K       5127      4362  85%    3235  63%  74%
  8K       6302      7830 124%    9041 155% 124%
 32K       8557      9284 108%   14469 169% 156%
 64K       9411      9438 100%   15206 162% 161%
128K      10233      9560  93%   15495 151% 162%
256K      10690      9639  90%   15410 144% 160%
512K      10984      9465  86%   14045 128% 148%
  1M       9384      9434 101%   12913 138% 137%
 16M       7520      9195 122%   12233 163% 133%

RR: netperf -l 100 -t TCP_RR
    netperf -l 100 -t STREAM_RR -- -s 51882
    netperf -l 100 -t TCP_RR

netperf  Baseline  AF_UNIX      Friends
-r N,N   Trans./S  Trans./S     Trans./S
  64      49808     88076 177%   92371 185% 105%
  1K      45111     81297 180%   84171 187% 104%
  8K      27256     29888 110%   31861 117% 107%
 32K      11138     11860 106%   12535 113% 106%
 64K       7210      6307  87%    7682 107% 122%
128K       4399      3335  76%    3911  89% 117%
256K       2550      1856  73%    2342  92% 126%
512K       1030      1026 100%    1236 120% 120%
  1M        424       501 118%     515 121% 103%
 16M       27.1      33.5 124%    34.5 127% 103%

CRR: netperf -l 100 -t TCP_CRR
     netperf -l 100 -t TCP_CRR

netperf  Baseline  AF_UNIX      Friends
  -r N   Trans./S  Trans./S     Trans./S
  64      16848         -        19374 115%   -
  1K      15575         -        18938 122%   -
  8K      12581         -        14993 119%   -
 32K       7740         -         8451 109%   -
 64K       4484         -         6003 134%   -
128K       2394         -         3387 141%   -
256K       1341         -         2126 159%   -
512K        651         -         1055 162%   -
  1M        375         -          475 127%   -
 16M       28.8         -         34.1 118%   -

SPLICE 32KB I/O:

Where: "Z" source File /dev/zero, "-" source user memory
       "N" sink File /dev/null, "-" sink user memory
       "S" Splice on, "-" Splice off

Source
 Sink   Baseline  AF_UNIX      Friends
 FSFS   Mbits/S   Mbits/S      Mbits/S
 ----     9147         -        12020 131%   -
 Z---     7731         -        11680 151%   -
 --N-     9200         -        11728 127%   -
 Z-N-     8877         -        11633 131%   -
 -S--    20894         -        21956 105%   -
 ZS--     8226         -         9218 112%   -
 -SN-    18729         -        24025 128%   -
 ZSN-     8023         -         9043 113%   -
 ---S     8636         -         8255  96%   -
 Z--S     8399         -         8417 100%   -
 --NS    13698         -        10222  75%   -
 Z-NS    11851         -        10237  86%   -
 -S-S    14254         -        18314 128%   -
 ZS-S     7788         -         8504 109%   -
 -SNS    19005         -        20409 107%   -
 ZSNS    13161         -        15079 115%   -

Signed-off-by: Bruce \"Brutus\" Curtis <brutus@google.com>
---
 Documentation/networking/ip-sysctl.txt |    8 +
 include/linux/skbuff.h                 |    2 +
 include/net/request_sock.h             |    1 +
 include/net/sock.h                     |   32 ++-
 include/net/tcp.h                      |   13 +-
 net/core/skbuff.c                      |    1 +
 net/core/sock.c                        |    1 +
 net/core/stream.c                      |   36 ++
 net/ipv4/inet_connection_sock.c        |   20 +
 net/ipv4/sysctl_net_ipv4.c             |    7 +
 net/ipv4/tcp.c                         |  603 +++++++++++++++++++++++++++-----
 net/ipv4/tcp_input.c                   |   22 +-
 net/ipv4/tcp_ipv4.c                    |    2 +
 net/ipv4/tcp_minisocks.c               |    4 +
 net/ipv4/tcp_output.c                  |   16 +-
 net/ipv6/tcp_ipv6.c                    |    1 +
 16 files changed, 679 insertions(+), 90 deletions(-)

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index c7fc107..cccc948 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -214,6 +214,14 @@ tcp_fack - BOOLEAN
 	Enable FACK congestion avoidance and fast retransmission.
 	The value is not used, if tcp_sack is not enabled.
 
+tcp_friends - BOOLEAN
+	If set, TCP loopback socket pair stack bypass is enabled such
+	that all data sent will be directly queued to the receiver's
+	socket for receive. Note, normal connection establishment and
+	finish is used to make friends so any loopback interpose, e.g.
+	tcpdump, will see these TCP segements but no data segments.
+	Default: 1
+
 tcp_fin_timeout - INTEGER
 	Time to hold socket in state FIN-WAIT-2, if it was closed
 	by our side. Peer can be broken and never close its side,
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index b33a3a1..a2e86a6 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -332,6 +332,7 @@ typedef unsigned char *sk_buff_data_t;
  *	@cb: Control buffer. Free for use by every layer. Put private vars here
  *	@_skb_refdst: destination entry (with norefcount bit)
  *	@sp: the security path, used for xfrm
+ *	@friend: loopback friend socket
  *	@len: Length of actual data
  *	@data_len: Data length
  *	@mac_len: Length of link layer header
@@ -407,6 +408,7 @@ struct sk_buff {
 #ifdef CONFIG_XFRM
 	struct	sec_path	*sp;
 #endif
+	struct sock		*friend;
 	unsigned int		len,
 				data_len;
 	__u16			mac_len,
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index b01d8dd..f83d0a1 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -63,6 +63,7 @@ struct request_sock {
 	unsigned long			expires;
 	const struct request_sock_ops	*rsk_ops;
 	struct sock			*sk;
+	struct sock			*friend;
 	u32				secid;
 	u32				peer_secid;
 };
diff --git a/include/net/sock.h b/include/net/sock.h
index 84bdaec..c97ed53 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -197,6 +197,7 @@ struct cg_proto;
   *	@sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings
   *	@sk_lock:	synchronizer
   *	@sk_rcvbuf: size of receive buffer in bytes
+  *	@sk_friend: loopback friend socket
   *	@sk_wq: sock wait queue and async head
   *	@sk_rx_dst: receive input route used by early tcp demux
   *	@sk_dst_cache: destination cache
@@ -287,6 +288,14 @@ struct sock {
 	socket_lock_t		sk_lock;
 	struct sk_buff_head	sk_receive_queue;
 	/*
+	 * If socket has a friend (sk_friend != NULL) then a send skb is
+	 * enqueued directly to the friend's sk_receive_queue such that:
+	 *
+	 *        sk_sndbuf -> sk_sndbuf + sk_friend->sk_rcvbuf
+	 *   sk_wmem_queued -> sk_friend->sk_rmem_alloc
+	 */
+	struct sock		*sk_friend;
+	/*
 	 * The backlog queue is special, it is always used with
 	 * the per-socket spinlock held and requires low latency
 	 * access. Therefore we special case it's implementation.
@@ -705,24 +714,40 @@ static inline bool sk_acceptq_is_full(const struct sock *sk)
 	return sk->sk_ack_backlog > sk->sk_max_ack_backlog;
 }
 
+static inline int sk_wmem_queued_get(const struct sock *sk)
+{
+	if (sk->sk_friend)
+		return atomic_read(&sk->sk_friend->sk_rmem_alloc);
+	else
+		return sk->sk_wmem_queued;
+}
+
+static inline int sk_sndbuf_get(const struct sock *sk)
+{
+	if (sk->sk_friend)
+		return sk->sk_sndbuf + sk->sk_friend->sk_rcvbuf;
+	else
+		return sk->sk_sndbuf;
+}
+
 /*
  * Compute minimal free write space needed to queue new packets.
  */
 static inline int sk_stream_min_wspace(const struct sock *sk)
 {
-	return sk->sk_wmem_queued >> 1;
+	return sk_wmem_queued_get(sk) >> 1;
 }
 
 static inline int sk_stream_wspace(const struct sock *sk)
 {
-	return sk->sk_sndbuf - sk->sk_wmem_queued;
+	return sk_sndbuf_get(sk) - sk_wmem_queued_get(sk);
 }
 
 extern void sk_stream_write_space(struct sock *sk);
 
 static inline bool sk_stream_memory_free(const struct sock *sk)
 {
-	return sk->sk_wmem_queued < sk->sk_sndbuf;
+	return sk_wmem_queued_get(sk) < sk_sndbuf_get(sk);
 }
 
 /* OOB backlog add */
@@ -831,6 +856,7 @@ static inline void sock_rps_reset_rxhash(struct sock *sk)
 	})
 
 extern int sk_stream_wait_connect(struct sock *sk, long *timeo_p);
+extern int sk_stream_wait_friend(struct sock *sk, long *timeo_p);
 extern int sk_stream_wait_memory(struct sock *sk, long *timeo_p);
 extern void sk_stream_wait_close(struct sock *sk, long timeo_p);
 extern int sk_stream_error(struct sock *sk, int flags, int err);
diff --git a/include/net/tcp.h b/include/net/tcp.h
index a8cb00c..ffe82e7 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -292,6 +292,7 @@ extern int sysctl_tcp_thin_dupack;
 extern int sysctl_tcp_early_retrans;
 extern int sysctl_tcp_limit_output_bytes;
 extern int sysctl_tcp_challenge_ack_limit;
+extern int sysctl_tcp_friends;
 
 extern atomic_long_t tcp_memory_allocated;
 extern struct percpu_counter tcp_sockets_allocated;
@@ -688,6 +689,15 @@ void tcp_send_window_probe(struct sock *sk);
 #define TCPHDR_ECE 0x40
 #define TCPHDR_CWR 0x80
 
+/* If skb->friend, TCP friends per packet state.
+ */
+struct friend_skb_parm {
+	bool	tail_inuse;		/* In use by skb->friend send while */
+					/* on sk_receive_queue for tail put */
+};
+
+#define TCP_FRIEND_CB(tcb) (&(tcb)->header.hf)
+
 /* This is what the send packet queuing engine uses to pass
  * TCP per-packet control information to the transmission code.
  * We also store the host-order sequence numbers in here too.
@@ -700,6 +710,7 @@ struct tcp_skb_cb {
 #if IS_ENABLED(CONFIG_IPV6)
 		struct inet6_skb_parm	h6;
 #endif
+		struct friend_skb_parm	hf;
 	} header;	/* For incoming frames		*/
 	__u32		seq;		/* Starting sequence number	*/
 	__u32		end_seq;	/* SEQ + FIN + SYN + datalen	*/
@@ -1042,7 +1053,7 @@ static inline bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	if (sysctl_tcp_low_latency || !tp->ucopy.task)
+	if (sysctl_tcp_low_latency || !tp->ucopy.task || sk->sk_friend)
 		return false;
 
 	__skb_queue_tail(&tp->ucopy.prequeue, skb);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index fe00d12..7cb73e6 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -703,6 +703,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
 #ifdef CONFIG_XFRM
 	new->sp			= secpath_get(old->sp);
 #endif
+	new->friend		= old->friend;
 	memcpy(new->cb, old->cb, sizeof(old->cb));
 	new->csum		= old->csum;
 	new->local_df		= old->local_df;
diff --git a/net/core/sock.c b/net/core/sock.c
index d765156..1670bb7 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2134,6 +2134,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 #ifdef CONFIG_NET_DMA
 	skb_queue_head_init(&sk->sk_async_wait_queue);
 #endif
+	sk->sk_friend		=	NULL;
 
 	sk->sk_send_head	=	NULL;
 
diff --git a/net/core/stream.c b/net/core/stream.c
index f5df85d..85e5b03 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -83,6 +83,42 @@ int sk_stream_wait_connect(struct sock *sk, long *timeo_p)
 EXPORT_SYMBOL(sk_stream_wait_connect);
 
 /**
+ * sk_stream_wait_friend - Wait for a socket to make friends
+ * @sk: sock to wait on
+ * @timeo_p: for how long to wait
+ *
+ * Must be called with the socket locked.
+ */
+int sk_stream_wait_friend(struct sock *sk, long *timeo_p)
+{
+	struct task_struct *tsk = current;
+	DEFINE_WAIT(wait);
+	int done;
+
+	do {
+		int err = sock_error(sk);
+		if (err)
+			return err;
+		if (!sk->sk_friend)
+			return -EBADFD;
+		if (!*timeo_p)
+			return -EAGAIN;
+		if (signal_pending(tsk))
+			return sock_intr_errno(*timeo_p);
+
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+		sk->sk_write_pending++;
+		done = sk_wait_event(sk, timeo_p,
+				     !sk->sk_err &&
+				     sk->sk_friend->sk_friend);
+		finish_wait(sk_sleep(sk), &wait);
+		sk->sk_write_pending--;
+	} while (!done);
+	return 0;
+}
+EXPORT_SYMBOL(sk_stream_wait_friend);
+
+/**
  * sk_stream_closing - Return 1 if we still have things to send in our buffers.
  * @sk: socket to verify
  */
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 8464b79..9a0be59 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -648,6 +648,26 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
 	if (newsk != NULL) {
 		struct inet_connection_sock *newicsk = inet_csk(newsk);
 
+		if (req->friend) {
+			/*
+			 * Make friends with the requestor but the ACK of
+			 * the request is already in-flight so the race is
+			 * on to make friends before the ACK is processed.
+			 * If the requestor's sk_friend value is != NULL
+			 * then the requestor has already processed the
+			 * ACK so indicate state change to wake'm up.
+			 */
+			struct sock *was;
+
+			sock_hold(req->friend);
+			newsk->sk_friend = req->friend;
+			sock_hold(newsk);
+			was = xchg(&req->friend->sk_friend, newsk);
+			/* If requester already connect()ed, maybe sleeping */
+			if (was && !sock_flag(req->friend, SOCK_DEAD))
+				sk->sk_state_change(req->friend);
+		}
+
 		newsk->sk_state = TCP_SYN_RECV;
 		newicsk->icsk_bind_hash = NULL;
 
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 9205e49..9048381 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -794,6 +794,13 @@ static struct ctl_table ipv4_table[] = {
 		.proc_handler	= proc_dointvec_minmax,
 		.extra1		= &zero
 	},
+	{
+		.procname	= "tcp_friends",
+		.data		= &sysctl_tcp_friends,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
 	{ }
 };
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index df83d74..f45f243 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -310,6 +310,56 @@ struct tcp_splice_state {
 };
 
 /*
+ * Validate friendp, if not a friend return 0, else if friend is also a
+ * friend return 1, else friendp points to a listen()er so wait for our
+ * friend to be ready then update friendp with pointer to the real friend
+ * and return 1, else an error has occurred so return a -errno.
+ */
+static inline int tcp_friend_validate(struct sock *sk, struct sock **friendp,
+			      long *timeo)
+{
+	struct sock *friend = *friendp;
+
+	if (!friend)
+		return 0;
+	if (unlikely(!friend->sk_friend)) {
+		/* Friendship not complete, wait? */
+		int err;
+
+		if (!timeo)
+			return -EAGAIN;
+		err = sk_stream_wait_friend(sk, timeo);
+		if (err < 0)
+			return err;
+		*friendp = sk->sk_friend;
+	}
+	return 1;
+}
+
+static inline int tcp_friend_send_lock(struct sock *friend)
+{
+	int err = 0;
+
+	spin_lock_bh(&friend->sk_lock.slock);
+	if (unlikely(friend->sk_shutdown & RCV_SHUTDOWN)) {
+		spin_unlock_bh(&friend->sk_lock.slock);
+		err = -ECONNRESET;
+	}
+
+	return err;
+}
+
+static inline void tcp_friend_recv_lock(struct sock *friend)
+{
+	spin_lock_bh(&friend->sk_lock.slock);
+}
+
+static void tcp_friend_unlock(struct sock *friend)
+{
+	spin_unlock_bh(&friend->sk_lock.slock);
+}
+
+/*
  * Pressure flag: try to collapse.
  * Technical note: it is used by multiple contexts non atomically.
  * All the __sk_mem_schedule() is of this nature: accounting
@@ -590,6 +640,76 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
 }
 EXPORT_SYMBOL(tcp_ioctl);
 
+/*
+ * Friend receive_queue tail skb space? If true, set tail_inuse.
+ * Else if RCV_SHUTDOWN, return *copy = -ECONNRESET.
+ */
+static inline struct sk_buff *tcp_friend_tail(struct sock *friend, int *copy)
+{
+	struct sk_buff	*skb = NULL;
+	int		sz = 0;
+
+	if (skb_peek_tail(&friend->sk_receive_queue)) {
+		sz = tcp_friend_send_lock(friend);
+		if (!sz) {
+			skb = skb_peek_tail(&friend->sk_receive_queue);
+			if (skb && skb->friend) {
+				if (!*copy)
+					sz = skb_tailroom(skb);
+				else {
+					sz = *copy - skb->len;
+					if (sz < 0)
+						sz = 0;
+				}
+				if (sz > 0)
+					TCP_FRIEND_CB(TCP_SKB_CB(skb))->
+							tail_inuse = true;
+			}
+			tcp_friend_unlock(friend);
+		}
+	}
+
+	*copy = sz;
+	return skb;
+}
+
+static inline void tcp_friend_seq(struct sock *sk, int copy, int charge)
+{
+	struct sock	*friend = sk->sk_friend;
+	struct tcp_sock *tp = tcp_sk(friend);
+
+	if (charge) {
+		sk_mem_charge(friend, charge);
+		atomic_add(charge, &friend->sk_rmem_alloc);
+	}
+	tp->rcv_nxt += copy;
+	tp->rcv_wup += copy;
+	tcp_friend_unlock(friend);
+
+	tp = tcp_sk(sk);
+	tp->snd_nxt += copy;
+	tp->pushed_seq += copy;
+	tp->snd_una += copy;
+	tp->snd_up += copy;
+}
+
+static inline bool tcp_friend_push(struct sock *sk, struct sk_buff *skb)
+{
+	struct sock	*friend = sk->sk_friend;
+	int		wait = false;
+
+	skb_set_owner_r(skb, friend);
+	__skb_queue_tail(&friend->sk_receive_queue, skb);
+	if (!sk_rmem_schedule(friend, skb, skb->truesize))
+		wait = true;
+
+	tcp_friend_seq(sk, skb->len, 0);
+	if (skb == skb_peek(&friend->sk_receive_queue))
+		friend->sk_data_ready(friend, 0);
+
+	return wait;
+}
+
 static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb)
 {
 	TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
@@ -606,8 +726,13 @@ static inline void skb_entail(struct sock *sk, struct sk_buff *skb)
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
 
-	skb->csum    = 0;
 	tcb->seq     = tcb->end_seq = tp->write_seq;
+	if (sk->sk_friend) {
+		skb->friend = sk;
+		TCP_FRIEND_CB(tcb)->tail_inuse = false;
+		return;
+	}
+	skb->csum    = 0;
 	tcb->tcp_flags = TCPHDR_ACK;
 	tcb->sacked  = 0;
 	skb_header_release(skb);
@@ -627,7 +752,10 @@ static inline void tcp_mark_urg(struct tcp_sock *tp, int flags)
 static inline void tcp_push(struct sock *sk, int flags, int mss_now,
 			    int nonagle)
 {
-	if (tcp_send_head(sk)) {
+	if (sk->sk_friend) {
+		if (skb_peek(&sk->sk_friend->sk_receive_queue))
+			sk->sk_friend->sk_data_ready(sk->sk_friend, 0);
+	} else if (tcp_send_head(sk)) {
 		struct tcp_sock *tp = tcp_sk(sk);
 
 		if (!(flags & MSG_MORE) || forced_push(tp))
@@ -759,6 +887,21 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
 }
 EXPORT_SYMBOL(tcp_splice_read);
 
+static inline struct sk_buff *tcp_friend_alloc_skb(struct sock *sk, int size)
+{
+	struct sk_buff *skb;
+
+	skb = alloc_skb(size, sk->sk_allocation);
+	if (skb)
+		skb->avail_size = skb_tailroom(skb);
+	else {
+		sk->sk_prot->enter_memory_pressure(sk);
+		sk_stream_moderate_sndbuf(sk);
+	}
+
+	return skb;
+}
+
 struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp)
 {
 	struct sk_buff *skb;
@@ -822,21 +965,62 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
 	return max(xmit_size_goal, mss_now);
 }
 
+static unsigned int tcp_friend_xmit_size_goal(struct sock *sk, int size_goal)
+{
+	u32 size = SKB_DATA_ALIGN(size_goal);
+	u32 overhead = sizeof(struct skb_shared_info) + sizeof(struct sk_buff);
+
+	/*
+	 * If alloc >= largest skb use largest order, else check
+	 * for optimal tail fill size, else use largest order.
+	 */
+	if (size >= SKB_MAX_ORDER(0, 4))
+		size = SKB_MAX_ORDER(0, 4);
+	else if (size <= (SKB_MAX_ORDER(0, 0) >> 3))
+		size = SKB_MAX_ORDER(0, 0);
+	else if (size <= (SKB_MAX_ORDER(0, 1) >> 3))
+		size = SKB_MAX_ORDER(0, 1);
+	else if (size <= (SKB_MAX_ORDER(0, 0) >> 1))
+		size = SKB_MAX_ORDER(0, 0);
+	else if (size <= (SKB_MAX_ORDER(0, 1) >> 1))
+		size = SKB_MAX_ORDER(0, 1);
+	else if (size <= (SKB_MAX_ORDER(0, 2) >> 1))
+		size = SKB_MAX_ORDER(0, 2);
+	else if (size <= (SKB_MAX_ORDER(0, 3) >> 1))
+		size = SKB_MAX_ORDER(0, 3);
+	else
+		size = SKB_MAX_ORDER(0, 4);
+
+	/* At least 2 true sized in sk_buf */
+	if (size + overhead > (sk_sndbuf_get(sk) >> 1))
+		size = (sk_sndbuf_get(sk) >> 1) - overhead;
+
+	return size;
+}
+
 static int tcp_send_mss(struct sock *sk, int *size_goal, int flags)
 {
 	int mss_now;
+	int tmp;
 
-	mss_now = tcp_current_mss(sk);
-	*size_goal = tcp_xmit_size_goal(sk, mss_now, !(flags & MSG_OOB));
+	if (sk->sk_friend) {
+		mss_now = tcp_friend_xmit_size_goal(sk, *size_goal);
+		tmp = mss_now;
+	} else {
+		mss_now = tcp_current_mss(sk);
+		tmp = tcp_xmit_size_goal(sk, mss_now, !(flags & MSG_OOB));
+	}
 
+	*size_goal = tmp;
 	return mss_now;
 }
 
 static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset,
 			 size_t psize, int flags)
 {
+	struct sock *friend = sk->sk_friend;
 	struct tcp_sock *tp = tcp_sk(sk);
-	int mss_now, size_goal;
+	int mss_now, size_goal = psize;
 	int err;
 	ssize_t copied;
 	long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
@@ -851,6 +1035,10 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse
 			goto out_err;
 	}
 
+	err = tcp_friend_validate(sk, &friend, &timeo);
+	if (err < 0)
+		goto out_err;
+
 	clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
 
 	mss_now = tcp_send_mss(sk, &size_goal, flags);
@@ -861,25 +1049,47 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse
 		goto out_err;
 
 	while (psize > 0) {
-		struct sk_buff *skb = tcp_write_queue_tail(sk);
+		struct sk_buff *skb;
+		struct tcp_skb_cb *tcb;
 		struct page *page = pages[poffset / PAGE_SIZE];
 		int copy, i;
 		int offset = poffset % PAGE_SIZE;
 		int size = min_t(size_t, psize, PAGE_SIZE - offset);
 		bool can_coalesce;
 
-		if (!tcp_send_head(sk) || (copy = size_goal - skb->len) <= 0) {
+		if (friend) {
+			copy = size_goal;
+			skb = tcp_friend_tail(friend, &copy);
+			if (copy < 0) {
+				sk->sk_err = -copy;
+				err = -EPIPE;
+				goto out_err;
+			}
+		} else if (!tcp_send_head(sk)) {
+			skb = NULL;
+			copy = 0;
+		} else {
+			skb = tcp_write_queue_tail(sk);
+			copy = size_goal - skb->len;
+		}
+
+		if (copy <= 0) {
 new_segment:
 			if (!sk_stream_memory_free(sk))
 				goto wait_for_sndbuf;
 
-			skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation);
+			if (friend)
+				skb = tcp_friend_alloc_skb(sk, 0);
+			else
+				skb = sk_stream_alloc_skb(sk, 0,
+							  sk->sk_allocation);
 			if (!skb)
 				goto wait_for_memory;
 
 			skb_entail(sk, skb);
 			copy = size_goal;
 		}
+		tcb = TCP_SKB_CB(skb);
 
 		if (copy > size)
 			copy = size;
@@ -887,10 +1097,14 @@ new_segment:
 		i = skb_shinfo(skb)->nr_frags;
 		can_coalesce = skb_can_coalesce(skb, i, page, offset);
 		if (!can_coalesce && i >= MAX_SKB_FRAGS) {
-			tcp_mark_push(tp, skb);
+			if (friend) {
+				if (TCP_FRIEND_CB(tcb)->tail_inuse)
+					TCP_FRIEND_CB(tcb)->tail_inuse = false;
+			} else
+				tcp_mark_push(tp, skb);
 			goto new_segment;
 		}
-		if (!sk_wmem_schedule(sk, copy))
+		if (!friend && !sk_wmem_schedule(sk, copy))
 			goto wait_for_memory;
 
 		if (can_coalesce) {
@@ -903,19 +1117,42 @@ new_segment:
 		skb->len += copy;
 		skb->data_len += copy;
 		skb->truesize += copy;
+		tp->write_seq += copy;
+
+		copied += copy;
+		poffset += copy;
+		psize -= copy;
+
+		if (friend) {
+			err = tcp_friend_send_lock(friend);
+			if (err) {
+				sk->sk_err = -err;
+				err = -EPIPE;
+				goto out_err;
+			}
+			tcb->end_seq += copy;
+			if (TCP_FRIEND_CB(tcb)->tail_inuse) {
+				TCP_FRIEND_CB(tcb)->tail_inuse = false;
+				tcp_friend_seq(sk, copy, copy);
+			} else {
+				if (tcp_friend_push(sk, skb))
+					goto wait_for_sndbuf;
+			}
+			if (!psize)
+				goto out;
+			continue;
+		}
+
+		tcb->end_seq += copy;
+		skb_shinfo(skb)->gso_segs = 0;
 		sk->sk_wmem_queued += copy;
 		sk_mem_charge(sk, copy);
 		skb->ip_summed = CHECKSUM_PARTIAL;
-		tp->write_seq += copy;
-		TCP_SKB_CB(skb)->end_seq += copy;
-		skb_shinfo(skb)->gso_segs = 0;
 
-		if (!copied)
-			TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH;
+		if (copied == copy)
+			tcb->tcp_flags &= ~TCPHDR_PSH;
 
-		copied += copy;
-		poffset += copy;
-		if (!(psize -= copy))
+		if (!psize)
 			goto out;
 
 		if (skb->len < size_goal || (flags & MSG_OOB))
@@ -936,7 +1173,8 @@ wait_for_memory:
 		if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
 			goto do_error;
 
-		mss_now = tcp_send_mss(sk, &size_goal, flags);
+		if (!friend)
+			mss_now = tcp_send_mss(sk, &size_goal, flags);
 	}
 
 out:
@@ -1027,10 +1265,12 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 		size_t size)
 {
 	struct iovec *iov;
+	struct sock *friend = sk->sk_friend;
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
+	struct tcp_skb_cb *tcb;
 	int iovlen, flags, err, copied = 0;
-	int mss_now = 0, size_goal, copied_syn = 0, offset = 0;
+	int mss_now = 0, size_goal = size, copied_syn = 0, offset = 0;
 	bool sg;
 	long timeo;
 
@@ -1058,6 +1298,10 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 			goto do_error;
 	}
 
+	err = tcp_friend_validate(sk, &friend, &timeo);
+	if (err < 0)
+		goto out;
+
 	if (unlikely(tp->repair)) {
 		if (tp->repair_queue == TCP_RECV_QUEUE) {
 			copied = tcp_send_rcvq(sk, msg, size);
@@ -1106,24 +1350,38 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 			int copy = 0;
 			int max = size_goal;
 
-			skb = tcp_write_queue_tail(sk);
-			if (tcp_send_head(sk)) {
-				if (skb->ip_summed == CHECKSUM_NONE)
-					max = mss_now;
-				copy = max - skb->len;
+			if (friend) {
+				skb = tcp_friend_tail(friend, &copy);
+				if (copy < 0) {
+					sk->sk_err = -copy;
+					err = -EPIPE;
+					goto out_err;
+				}
+			} else {
+				skb = tcp_write_queue_tail(sk);
+				if (tcp_send_head(sk)) {
+					if (skb->ip_summed == CHECKSUM_NONE)
+						max = mss_now;
+					copy = max - skb->len;
+				}
 			}
 
 			if (copy <= 0) {
 new_segment:
-				/* Allocate new segment. If the interface is SG,
-				 * allocate skb fitting to single page.
-				 */
 				if (!sk_stream_memory_free(sk))
 					goto wait_for_sndbuf;
 
-				skb = sk_stream_alloc_skb(sk,
-							  select_size(sk, sg),
-							  sk->sk_allocation);
+				if (friend)
+					skb = tcp_friend_alloc_skb(sk, max);
+				else {
+					/* Allocate new segment. If the
+					 * interface is SG, allocate skb
+					 * fitting to single page.
+					 */
+					skb = sk_stream_alloc_skb(sk,
+							select_size(sk, sg),
+							sk->sk_allocation);
+				}
 				if (!skb)
 					goto wait_for_memory;
 
@@ -1137,6 +1395,7 @@ new_segment:
 				copy = size_goal;
 				max = size_goal;
 			}
+			tcb = TCP_SKB_CB(skb);
 
 			/* Try to append data to the end of skb. */
 			if (copy > seglen)
@@ -1155,6 +1414,8 @@ new_segment:
 				struct page *page = sk->sk_sndmsg_page;
 				int off;
 
+				BUG_ON(friend);
+
 				if (page && page_count(page) == 1)
 					sk->sk_sndmsg_off = 0;
 
@@ -1224,16 +1485,37 @@ new_segment:
 				sk->sk_sndmsg_off = off + copy;
 			}
 
-			if (!copied)
-				TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH;
-
 			tp->write_seq += copy;
-			TCP_SKB_CB(skb)->end_seq += copy;
-			skb_shinfo(skb)->gso_segs = 0;
 
 			from += copy;
 			copied += copy;
-			if ((seglen -= copy) == 0 && iovlen == 0)
+			seglen -= copy;
+
+			if (friend) {
+				err = tcp_friend_send_lock(friend);
+				if (err) {
+					sk->sk_err = -err;
+					err = -EPIPE;
+					goto out_err;
+				}
+				tcb->end_seq += copy;
+				if (TCP_FRIEND_CB(tcb)->tail_inuse) {
+					TCP_FRIEND_CB(tcb)->tail_inuse = false;
+					tcp_friend_seq(sk, copy, 0);
+				} else {
+					if (tcp_friend_push(sk, skb))
+						goto wait_for_sndbuf;
+				}
+				continue;
+			}
+
+			tcb->end_seq += copy;
+			skb_shinfo(skb)->gso_segs = 0;
+
+			if (copied == copy)
+				tcb->tcp_flags &= ~TCPHDR_PSH;
+
+			if (seglen == 0 && iovlen == 0)
 				goto out;
 
 			if (skb->len < max || (flags & MSG_OOB) || unlikely(tp->repair))
@@ -1255,7 +1537,8 @@ wait_for_memory:
 			if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
 				goto do_error;
 
-			mss_now = tcp_send_mss(sk, &size_goal, flags);
+			if (!friend)
+				mss_now = tcp_send_mss(sk, &size_goal, flags);
 		}
 	}
 
@@ -1266,7 +1549,12 @@ out:
 	return copied + copied_syn;
 
 do_fault:
-	if (!skb->len) {
+	if (skb->friend) {
+		if (TCP_FRIEND_CB(tcb)->tail_inuse)
+			TCP_FRIEND_CB(tcb)->tail_inuse = false;
+		else
+			__kfree_skb(skb);
+	} else if (!skb->len) {
 		tcp_unlink_write_queue(skb, sk);
 		/* It is the one place in all of TCP, except connection
 		 * reset, where we can be unlinking the send_head.
@@ -1285,6 +1573,13 @@ out_err:
 }
 EXPORT_SYMBOL(tcp_sendmsg);
 
+static inline void tcp_friend_write_space(struct sock *sk)
+{
+	/* Queued data below 1/4th of sndbuf? */
+	if ((sk_sndbuf_get(sk) >> 2) > sk_wmem_queued_get(sk))
+		sk->sk_friend->sk_write_space(sk->sk_friend);
+}
+
 /*
  *	Handle reading urgent data. BSD has very simple semantics for
  *	this, no blocking and very strange errors 8)
@@ -1363,7 +1658,12 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied)
 	struct tcp_sock *tp = tcp_sk(sk);
 	bool time_to_ack = false;
 
-	struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
+	struct sk_buff *skb;
+
+	if (sk->sk_friend)
+		return;
+
+	skb = skb_peek(&sk->sk_receive_queue);
 
 	WARN(skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq),
 	     "cleanup rbuf bug: copied %X seq %X rcvnxt %X\n",
@@ -1467,17 +1767,27 @@ static void tcp_service_net_dma(struct sock *sk, bool wait)
 }
 #endif
 
-static inline struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off)
+static inline struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off,
+					   size_t *len)
 {
 	struct sk_buff *skb;
 	u32 offset;
+	size_t avail;
 
 	skb_queue_walk(&sk->sk_receive_queue, skb) {
-		offset = seq - TCP_SKB_CB(skb)->seq;
-		if (tcp_hdr(skb)->syn)
-			offset--;
-		if (offset < skb->len || tcp_hdr(skb)->fin) {
+		struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
+
+		offset = seq - tcb->seq;
+		if (skb->friend)
+			avail = (u32)(tcb->end_seq - seq);
+		else {
+			if (tcp_hdr(skb)->syn)
+				offset--;
+			avail = skb->len - offset;
+		}
+		if (avail > 0 || (!skb->friend && tcp_hdr(skb)->fin)) {
 			*off = offset;
+			*len = avail;
 			return skb;
 		}
 	}
@@ -1503,15 +1813,24 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
 	u32 seq = tp->copied_seq;
 	u32 offset;
 	int copied = 0;
+	size_t len;
+	int err;
+	struct sock *friend = sk->sk_friend;
+	long timeo = sock_rcvtimeo(sk, false);
 
 	if (sk->sk_state == TCP_LISTEN)
 		return -ENOTCONN;
-	while ((skb = tcp_recv_skb(sk, seq, &offset)) != NULL) {
-		if (offset < skb->len) {
-			int used;
-			size_t len;
 
-			len = skb->len - offset;
+	err = tcp_friend_validate(sk, &friend, &timeo);
+	if (err < 0)
+		return err;
+	if (friend)
+		tcp_friend_recv_lock(sk);
+
+	while ((skb = tcp_recv_skb(sk, seq, &offset, &len)) != NULL) {
+		if (len > 0) {
+			int used;
+	again:
 			/* Stop reading if we hit a patch of urgent data */
 			if (tp->urg_data) {
 				u32 urg_offset = tp->urg_seq - seq;
@@ -1520,6 +1839,9 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
 				if (!len)
 					break;
 			}
+			if (friend)
+				tcp_friend_unlock(sk);
+
 			used = recv_actor(desc, skb, offset, len);
 			if (used < 0) {
 				if (!copied)
@@ -1530,33 +1852,65 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
 				copied += used;
 				offset += used;
 			}
-			/*
-			 * If recv_actor drops the lock (e.g. TCP splice
-			 * receive) the skb pointer might be invalid when
-			 * getting here: tcp_collapse might have deleted it
-			 * while aggregating skbs from the socket queue.
-			 */
-			skb = tcp_recv_skb(sk, seq-1, &offset);
-			if (!skb || (offset+1 != skb->len))
-				break;
+
+			if (friend)
+				tcp_friend_recv_lock(sk);
+			if (skb->friend) {
+				len = (u32)(TCP_SKB_CB(skb)->end_seq - seq);
+				if (len > 0) {
+					/*
+					 * Friend did an skb_put() while we
+					 * were away so process the same skb.
+					 */
+					if (!desc->count)
+						break;
+					tp->copied_seq = seq;
+					goto again;
+				}
+			} else {
+				/*
+				 * If recv_actor drops the lock (e.g. TCP
+				 * splice receive) the skb pointer might be
+				 * invalid when getting here: tcp_collapse
+				 * might have deleted it while aggregating
+				 * skbs from the socket queue.
+				 */
+				skb = tcp_recv_skb(sk, seq-1, &offset, &len);
+				if (!skb || (offset+1 != skb->len))
+					break;
+			}
 		}
-		if (tcp_hdr(skb)->fin) {
+		if (!skb->friend && tcp_hdr(skb)->fin) {
 			sk_eat_skb(sk, skb, false);
 			++seq;
 			break;
 		}
-		sk_eat_skb(sk, skb, false);
+		if (skb->friend) {
+			if (!TCP_FRIEND_CB(TCP_SKB_CB(skb))->tail_inuse) {
+				__skb_unlink(skb, &sk->sk_receive_queue);
+				__kfree_skb(skb);
+				tcp_friend_write_space(sk);
+			}
+			tcp_friend_unlock(sk);
+			tcp_friend_recv_lock(sk);
+		} else
+			sk_eat_skb(sk, skb, 0);
 		if (!desc->count)
 			break;
 		tp->copied_seq = seq;
 	}
 	tp->copied_seq = seq;
 
-	tcp_rcv_space_adjust(sk);
+	if (friend) {
+		tcp_friend_unlock(sk);
+		tcp_friend_write_space(sk);
+	} else {
+		tcp_rcv_space_adjust(sk);
 
-	/* Clean up data we have read: This will do ACK frames. */
-	if (copied > 0)
-		tcp_cleanup_rbuf(sk, copied);
+		/* Clean up data we have read: This will do ACK frames. */
+		if (copied > 0)
+			tcp_cleanup_rbuf(sk, copied);
+	}
 	return copied;
 }
 EXPORT_SYMBOL(tcp_read_sock);
@@ -1572,6 +1926,7 @@ EXPORT_SYMBOL(tcp_read_sock);
 int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 		size_t len, int nonblock, int flags, int *addr_len)
 {
+	struct sock *friend = sk->sk_friend;
 	struct tcp_sock *tp = tcp_sk(sk);
 	int copied = 0;
 	u32 peek_seq;
@@ -1584,6 +1939,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	bool copied_early = false;
 	struct sk_buff *skb;
 	u32 urg_hole = 0;
+	bool locked = false;
 
 	lock_sock(sk);
 
@@ -1593,6 +1949,10 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 
 	timeo = sock_rcvtimeo(sk, nonblock);
 
+	err = tcp_friend_validate(sk, &friend, &timeo);
+	if (err < 0)
+		goto out;
+
 	/* Urgent data needs to be handled specially. */
 	if (flags & MSG_OOB)
 		goto recv_urg;
@@ -1631,7 +1991,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 			available = TCP_SKB_CB(skb)->seq + skb->len - (*seq);
 		if ((available < target) &&
 		    (len > sysctl_tcp_dma_copybreak) && !(flags & MSG_PEEK) &&
-		    !sysctl_tcp_low_latency &&
+		    !sysctl_tcp_low_latency && !friend &&
 		    net_dma_find_channel()) {
 			preempt_enable_no_resched();
 			tp->ucopy.pinned_list =
@@ -1642,7 +2002,10 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	}
 #endif
 
+	err = 0;
+
 	do {
+		struct tcp_skb_cb *tcb;
 		u32 offset;
 
 		/* Are we at urgent data? Stop if we have read anything or have SIGURG pending. */
@@ -1650,37 +2013,77 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 			if (copied)
 				break;
 			if (signal_pending(current)) {
-				copied = timeo ? sock_intr_errno(timeo) : -EAGAIN;
+				err = timeo ? sock_intr_errno(timeo) : -EAGAIN;
 				break;
 			}
 		}
 
-		/* Next get a buffer. */
+		/*
+		 * Next get a buffer. Note, for friends sendmsg() queues
+		 * data directly to our sk_receive_queue by holding our
+		 * slock and either tail queuing a new skb or adding new
+		 * data to the tail skb. In the later case tail_inuse is
+		 * set, slock dropped, copyin, skb->len updated, re-hold
+		 * slock, end_seq updated, so we can only use the bytes
+		 * from *seq to end_seq!
+		 */
+		if (friend && !locked) {
+			tcp_friend_recv_lock(sk);
+			locked = true;
+		}
 
 		skb_queue_walk(&sk->sk_receive_queue, skb) {
+			tcb = TCP_SKB_CB(skb);
+			offset = *seq - tcb->seq;
+			if (friend) {
+				if (skb->friend) {
+					used = (u32)(tcb->end_seq - *seq);
+					if (used > 0) {
+						tcp_friend_unlock(sk);
+						locked = false;
+						/* Can use it all */
+						goto found_ok_skb;
+					}
+					/* No data to copyout */
+					if (flags & MSG_PEEK)
+						continue;
+					if (!TCP_FRIEND_CB(tcb)->tail_inuse)
+						goto unlink;
+					break;
+				}
+				tcp_friend_unlock(sk);
+				locked = false;
+			}
+
 			/* Now that we have two receive queues this
 			 * shouldn't happen.
 			 */
-			if (WARN(before(*seq, TCP_SKB_CB(skb)->seq),
+			if (WARN(before(*seq, tcb->seq),
 				 "recvmsg bug: copied %X seq %X rcvnxt %X fl %X\n",
-				 *seq, TCP_SKB_CB(skb)->seq, tp->rcv_nxt,
-				 flags))
+				 *seq, tcb->seq, tp->rcv_nxt, flags))
 				break;
 
-			offset = *seq - TCP_SKB_CB(skb)->seq;
 			if (tcp_hdr(skb)->syn)
 				offset--;
-			if (offset < skb->len)
+			if (offset < skb->len) {
+				/* Ok so how much can we use? */
+				used = skb->len - offset;
 				goto found_ok_skb;
+			}
 			if (tcp_hdr(skb)->fin)
 				goto found_fin_ok;
 			WARN(!(flags & MSG_PEEK),
 			     "recvmsg bug 2: copied %X seq %X rcvnxt %X fl %X\n",
-			     *seq, TCP_SKB_CB(skb)->seq, tp->rcv_nxt, flags);
+			     *seq, tcb->seq, tp->rcv_nxt, flags);
 		}
 
 		/* Well, if we have backlog, try to process it now yet. */
 
+		if (friend && locked) {
+			tcp_friend_unlock(sk);
+			locked = false;
+		}
+
 		if (copied >= target && !sk->sk_backlog.tail)
 			break;
 
@@ -1727,7 +2130,8 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 
 		tcp_cleanup_rbuf(sk, copied);
 
-		if (!sysctl_tcp_low_latency && tp->ucopy.task == user_recv) {
+		if (!sysctl_tcp_low_latency && !friend &&
+		    tp->ucopy.task == user_recv) {
 			/* Install new reader */
 			if (!user_recv && !(flags & (MSG_TRUNC | MSG_PEEK))) {
 				user_recv = current;
@@ -1821,8 +2225,6 @@ do_prequeue:
 		continue;
 
 	found_ok_skb:
-		/* Ok so how much can we use? */
-		used = skb->len - offset;
 		if (len < used)
 			used = len;
 
@@ -1879,7 +2281,7 @@ do_prequeue:
 				if (err) {
 					/* Exception. Bailout! */
 					if (!copied)
-						copied = -EFAULT;
+						copied = err;
 					break;
 				}
 			}
@@ -1888,6 +2290,7 @@ do_prequeue:
 		*seq += used;
 		copied += used;
 		len -= used;
+		offset += used;
 
 		tcp_rcv_space_adjust(sk);
 
@@ -1896,11 +2299,45 @@ skip_copy:
 			tp->urg_data = 0;
 			tcp_fast_path_check(sk);
 		}
-		if (used + offset < skb->len)
+
+		if (skb->friend) {
+			tcp_friend_recv_lock(sk);
+			locked = true;
+			used = (u32)(tcb->end_seq - *seq);
+			if (used) {
+				/*
+				 * Friend did an skb_put() while we were away
+				 * so if more to do process the same skb.
+				 */
+				if (len > 0) {
+					tcp_friend_unlock(sk);
+					locked = false;
+					goto found_ok_skb;
+				}
+				continue;
+			}
+			if (TCP_FRIEND_CB(tcb)->tail_inuse) {
+				/* Give sendmsg a chance */
+				tcp_friend_unlock(sk);
+				locked = false;
+				continue;
+			}
+			if (!(flags & MSG_PEEK)) {
+		unlink:
+				__skb_unlink(skb, &sk->sk_receive_queue);
+				__kfree_skb(skb);
+				tcp_friend_unlock(sk);
+				locked = false;
+				tcp_friend_write_space(sk);
+			}
 			continue;
+		}
 
-		if (tcp_hdr(skb)->fin)
+		if (offset < skb->len)
+			continue;
+		else if (tcp_hdr(skb)->fin)
 			goto found_fin_ok;
+
 		if (!(flags & MSG_PEEK)) {
 			sk_eat_skb(sk, skb, copied_early);
 			copied_early = false;
@@ -1917,6 +2354,9 @@ skip_copy:
 		break;
 	} while (len > 0);
 
+	if (friend && locked)
+		tcp_friend_unlock(sk);
+
 	if (user_recv) {
 		if (!skb_queue_empty(&tp->ucopy.prequeue)) {
 			int chunk;
@@ -2095,6 +2535,9 @@ void tcp_close(struct sock *sk, long timeout)
 		goto adjudge_to_death;
 	}
 
+	if (sk->sk_friend)
+		sock_put(sk->sk_friend);
+
 	/*  We need to flush the recv. buffs.  We do this only on the
 	 *  descriptor close, not protocol-sourced closes, because the
 	 *  reader process may not have drained the data yet!
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index e2bec81..39dd601 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -530,6 +530,9 @@ void tcp_rcv_space_adjust(struct sock *sk)
 	int time;
 	int space;
 
+	if (sk->sk_friend)
+		return;
+
 	if (tp->rcvq_space.time == 0)
 		goto new_measure;
 
@@ -4326,8 +4329,9 @@ static int tcp_prune_queue(struct sock *sk);
 static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb,
 				 unsigned int size)
 {
-	if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
-	    !sk_rmem_schedule(sk, skb, size)) {
+	if (!sk->sk_friend &&
+	    (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
+	    !sk_rmem_schedule(sk, skb, size))) {
 
 		if (tcp_prune_queue(sk) < 0)
 			return -1;
@@ -5712,6 +5716,16 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 		 *    state to ESTABLISHED..."
 		 */
 
+		if (skb->friend) {
+			/*
+			 * If friends haven't been made yet, our sk_friend
+			 * still == NULL, then update with the ACK's friend
+			 * value (the listen()er's sock addr) which is used
+			 * as a place holder.
+			 */
+			cmpxchg(&sk->sk_friend, NULL, skb->friend);
+		}
+
 		TCP_ECN_rcv_synack(tp, th);
 
 		tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
@@ -5787,9 +5801,9 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 		    tcp_rcv_fastopen_synack(sk, skb, &foc))
 			return -1;
 
-		if (sk->sk_write_pending ||
+		if (!skb->friend && (sk->sk_write_pending ||
 		    icsk->icsk_accept_queue.rskq_defer_accept ||
-		    icsk->icsk_ack.pingpong) {
+		    icsk->icsk_ack.pingpong)) {
 			/* Save one ACK. Data will be ready after
 			 * several ticks, if write_pending is set.
 			 *
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index e64abed..cf1c4a5 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1512,6 +1512,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
 #endif
 
+	req->friend = skb->friend;
+
 	tcp_clear_options(&tmp_opt);
 	tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
 	tmp_opt.user_mss  = tp->rx_opt.user_mss;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index e965319..48c57cc 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -268,6 +268,9 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
 	const struct tcp_sock *tp = tcp_sk(sk);
 	bool recycle_ok = false;
 
+	if (sk->sk_friend)
+		goto out;
+
 	if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp)
 		recycle_ok = tcp_remember_stamp(sk);
 
@@ -347,6 +350,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
 	}
 
 	tcp_update_metrics(sk);
+out:
 	tcp_done(sk);
 }
 
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index cfe6ffe..e4fb111 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -65,6 +65,9 @@ int sysctl_tcp_base_mss __read_mostly = TCP_BASE_MSS;
 /* By default, RFC2861 behavior.  */
 int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
 
+/* By default, TCP loopback bypass */
+int sysctl_tcp_friends __read_mostly = 1;
+
 int sysctl_tcp_cookie_size __read_mostly = 0; /* TCP_COOKIE_MAX */
 EXPORT_SYMBOL_GPL(sysctl_tcp_cookie_size);
 
@@ -1025,9 +1028,13 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 	tcb = TCP_SKB_CB(skb);
 	memset(&opts, 0, sizeof(opts));
 
-	if (unlikely(tcb->tcp_flags & TCPHDR_SYN))
+	if (unlikely(tcb->tcp_flags & TCPHDR_SYN)) {
+		/* Only try to make friends if enabled */
+		if (sysctl_tcp_friends)
+			skb->friend = sk;
+
 		tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5);
-	else
+	} else
 		tcp_options_size = tcp_established_options(sk, skb, &opts,
 							   &md5);
 	tcp_header_size = tcp_options_size + sizeof(struct tcphdr);
@@ -2721,6 +2728,11 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 	}
 
 	memset(&opts, 0, sizeof(opts));
+
+	/* Only try to make friends if enabled */
+	if (sysctl_tcp_friends)
+		skb->friend = sk;
+
 #ifdef CONFIG_SYN_COOKIES
 	if (unlikely(req->cookie_ts))
 		TCP_SKB_CB(skb)->when = cookie_init_timestamp(req);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 09078b9..f71592e 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1049,6 +1049,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	tcp_rsk(req)->af_specific = &tcp_request_sock_ipv6_ops;
 #endif
 
+	req->friend = skb->friend;
 	tcp_clear_options(&tmp_opt);
 	tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
 	tmp_opt.user_mss = tp->rx_opt.user_mss;
-- 
1.7.7.3

^ permalink raw reply related

* Re: [PATCH net-next v3 0/4] Take care of xfrm policy when checking dst entries
From: David Miller @ 2012-09-17 18:25 UTC (permalink / raw)
  To: vyasevich
  Cc: nicolas.dichtel, eric.dumazet, sds, james.l.morris, eparis, sri,
	linux-sctp, netdev
In-Reply-To: <5057688B.3030509@gmail.com>

From: Vlad Yasevich <vyasevich@gmail.com>
Date: Mon, 17 Sep 2012 14:14:35 -0400

> I think he expected you to take Eric's patch that removed those
> pieces.

Eric's patch was a cleanup, so it went into 'net-next'.

Nicolas's patch is a bonafide bug fix so needs to be
targetted at 'net'

^ permalink raw reply

* Re: [PATCH net-next v3 0/4] Take care of xfrm policy when checking dst entries
From: Vlad Yasevich @ 2012-09-17 18:14 UTC (permalink / raw)
  To: David Miller
  Cc: nicolas.dichtel, eric.dumazet, sds, james.l.morris, eparis, sri,
	linux-sctp, netdev
In-Reply-To: <20120917.124953.1599275868994343219.davem@davemloft.net>

On 09/17/2012 12:49 PM, David Miller wrote:
> From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
> Date: Tue, 11 Sep 2012 10:09:43 +0200
>
>> The goal of these patches is to fix the following problem: a session is
>> established (TCP, SCTP) and after a new policy is inserted. The current
>> code does not recalculate the route, thus the traffic is not encrypted.
>>
>> The patch propose to check flow_cache_genid value when checking a dst
>> entry, which is incremented each time a policy is inserted or deleted.
>>
>> v2: use net->ipv4.rt_genid instead of flow_cache_genid (and thus save a test
>>      in fast path). Also move it to net->rt_genid, to be able to use it for IPv6
>>      too. Note that IPv6 will have one more test in fast path.
>>
>> v3: remove unrelated "#ifdef CONFIG_XFRM" in IPv6 part
>>      bump rt_genid in selinux code (same place than flow_cache_genid)
>>
>> Patches are tested with TCP and SCTP, IPv4 and IPv6.
>
> These patches don't apply cleanly at all.
>
> In the net/ipv4/route.c code we don't initialize the genid to zero,
> we stick a random value there.
>
> And we don't increment it by one on flushes, instead we increment
> it by a random amount.
>
> I wonder what tree these were even against, the differences were
> so great.
>

I think he expected you to take Eric's patch that removed those pieces.

-vlad

^ permalink raw reply

* Re: D-Link DGE-530T and r8169.c
From: Francois Romieu @ 2012-09-17 17:47 UTC (permalink / raw)
  To: James R. Hay; +Cc: nic_swsd, netdev
In-Reply-To: <alpine.LNX.2.00.1209171139540.11109@hay.haya.qc.ca>

James R. Hay <jrhay@haya.qc.ca> :
[...]
> At line 199 of r8169.c is the following:
> 
> 	{ PCI_DEVICE(PCI_VENDOR_ID_DLINK,	0x4300), 0, 0, RTL_CFG_0 },
> 
> While I changed the 0x4300 to 0x4302 and recompiled the kernel to
> get the driver to recognize the card I believe that the appropriate
> fix would be to add the following line immediately after:
> 
> 	{ PCI_DEVICE(PCI_VENDOR_ID_DLINK,	0x4302), 0, 0, RTL_CFG_0 },

The ID above was added by 93a3aa25933461d76141179fc94aa32d5f9d954a between
v3.0 and v3.1. See: 

http://git.kernel.org/?p=linux/kernel/git/torvalds/linux.git;a=commitdiff;h=93a3aa25933461d76141179fc94aa32d5f9d954a

-- 
Ueimor

^ permalink raw reply

* RE: [PATCH] netxen: check for root bus in netxen_mask_aer_correctable
From: Rajesh Borundia @ 2012-09-17 17:53 UTC (permalink / raw)
  To: nikolay@redhat.com, Sony Chacko; +Cc: netdev
In-Reply-To: <1347637803-4837-1-git-send-email-nikolay@redhat.com>


________________________________________
From: nikolay@redhat.com [nikolay@redhat.com]
Sent: Friday, September 14, 2012 9:20 PM
To: Sony Chacko
Cc: Rajesh Borundia; netdev
Subject: [PATCH] netxen: check for root bus in netxen_mask_aer_correctable

From: Nikolay Aleksandrov <nikolay@redhat.com>

Add a check if pdev->bus->self == NULL (root bus). When attaching
a netxen NIC to a VM it can be on the root bus and the guest would
crash in netxen_mask_aer_correctable() because of a NULL pointer
dereference if CONFIG_PCIEAER is present.

Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com>
---
 drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
index 342b3a7..e2a4858 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
@@ -1378,6 +1378,10 @@ static void netxen_mask_aer_correctable(struct netxen_adapter *adapter)
        struct pci_dev *root = pdev->bus->self;
        u32 aer_pos;

+       /* root bus? */
+       if (!root)
+               return;
+
        if (adapter->ahw.board_type != NETXEN_BRDTYPE_P3_4_GB_MM &&
                adapter->ahw.board_type != NETXEN_BRDTYPE_P3_10G_TP)
                return;
--
1.7.11.4


Looks okay.

^ permalink raw reply related

* Re: [PATCH] ncm: allow for NULL terminations
From: Rick Jones @ 2012-09-17 17:39 UTC (permalink / raw)
  To: Ben Hutchings; +Cc: Alan Cox, netdev
In-Reply-To: <1347896724.2685.2.camel@bwh-desktop.uk.solarflarecom.com>

On 09/17/2012 08:45 AM, Ben Hutchings wrote:
> On Mon, 2012-09-17 at 11:58 +0100, Alan Cox wrote:
>> From: Alan Cox <alan@linux.intel.com>
>>
>> The strings are passed to snprintf so must be null terminated. It seems the
>> copy length is incorrectly set.
>
> Please use strlcpy() instead.  (I thought someone had already gone round
> the get_drvinfo implementations and fixed them to do that, actually.)

That may have been my "floor sweeping" exercise of before, but I didn't 
go into drivers/net/usb/ at the time.

rick

>
> Ben.
>
>> Signed-off-by: Alan Cox <alan@linux.intel.com>
>> ---
>>
>>   drivers/net/usb/cdc_ncm.c |    6 +++---
>>   1 file changed, 3 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
>> index 4cd582a..af8cce7 100644
>> --- a/drivers/net/usb/cdc_ncm.c
>> +++ b/drivers/net/usb/cdc_ncm.c
>> @@ -145,10 +145,10 @@ cdc_ncm_get_drvinfo(struct net_device *net, struct ethtool_drvinfo *info)
>>   {
>>   	struct usbnet *dev = netdev_priv(net);
>>
>> -	strncpy(info->driver, dev->driver_name, sizeof(info->driver));
>> -	strncpy(info->version, DRIVER_VERSION, sizeof(info->version));
>> +	strncpy(info->driver, dev->driver_name, sizeof(info->driver) - 1);
>> +	strncpy(info->version, DRIVER_VERSION, sizeof(info->version) - 1);
>>   	strncpy(info->fw_version, dev->driver_info->description,
>> -		sizeof(info->fw_version));
>> +		sizeof(info->fw_version) - 1);
>>   	usb_make_path(dev->udev, info->bus_info, sizeof(info->bus_info));
>>   }
>>
>>
>

^ permalink raw reply

* Re: [PATCH] tcp: restore rcv_wscale in a repair mode
From: David Miller @ 2012-09-17 17:25 UTC (permalink / raw)
  To: avagin; +Cc: netdev, linux-kernel, xemul, kuznet, jmorris, yoshfuji, kaber
In-Reply-To: <1347571687-3539973-1-git-send-email-avagin@openvz.org>

From: Andrew Vagin <avagin@openvz.org>
Date: Fri, 14 Sep 2012 01:28:07 +0400

> This patch doesn't break a backward compatibility.
> If someone uses it in a old scheme, a rcv window
> will be restored with the same bug (rcv_wscale = 0).

The whole world is not little-endian.  This does in fact break
backwards compatability.

You will need to extend this in a more reasonable manner.

^ permalink raw reply

* Re: [PATCH] phy: Replace genphy_update_link() call with phy_read_status()
From: David Miller @ 2012-09-17 17:22 UTC (permalink / raw)
  To: asv; +Cc: netdev, afleming
In-Reply-To: <5051894B.5060001@sysgo.com>

From: Alexander Sverdlin <asv@sysgo.com>
Date: Thu, 13 Sep 2012 09:20:43 +0200

> From: Alexander Sverdlin <alexander.sverdlin@sysgo.com>
> 
> Replace genphy_update_link() call with phy_read_status() 
> 
> Code in phy.c should not call genphy_*() functions directly, this breaks PHY layer abstraction.
> Some drivers may re-implement "read_status" callback and it's not being called in one place of
> PHY state machine, where genphy_update_link() is called instead. So fix it.
> For drivers that rely on genphy_* implementation nothing changed, as genphy_read_status() calls
> genphy_update_link() anyway.
> 
> Signed-off-by: Alexander Sverdlin <alexander.sverdlin@sysgo.com>

This is a behavioral change, not just a change to make sure the right
interfaces are used.

genphy_update_link() does only a small subset of the operations
performed by the generic read_status callback.

I'm not applying this patch, because all of those extra operations
might be unexpected in some situations and break some configurations.

^ permalink raw reply

* Re: [PATCH] Generalise "auto-negotiation done" function, move generic PHY code to phy_device.c
From: David Miller @ 2012-09-17 17:19 UTC (permalink / raw)
  To: asv; +Cc: netdev, afleming, alexander.sverdlin.ext
In-Reply-To: <504DCF39.3000704@sysgo.com>

From: Alexander Sverdlin <asv@sysgo.com>
Date: Mon, 10 Sep 2012 13:30:01 +0200

> From: Alexander Sverdlin <alexander.sverdlin@sysgo.com>
> 
> Generalise "auto-negotiation done" function, move generic PHY code to phy_device.c 
> 
> Not all devices have "auto-negotiation done" bit at the place, as expected by
> phy_aneg_done() in phy.c. Example of such device is Marvell 88E61xx Ethernet 
> switch which could be controlled by Linux PHY layer, if struct phy_driver had
> abstraction for above function. So move hardware-dependent implementation details
> for "generic" PHY to phy_device.c, and modify all PHY drivers to use new field.
> Now phy.c contains only high-level state-machine functionality, leaving 
> hardware-layer to different drivers.
> 
> Signed-off-by: Alexander Sverdlin <alexander.sverdlin@sysgo.com>

You're adding this abstration, and even describe where it would be needed,
but you aren't providing the changes that make use of this new abstration
at all.

That's rather pointless, and we want to see the users of new interfaces
before we add them.

Resubmit this when you can also submit the patch which the Marvell driver
needs, which would actually make use of this.

^ permalink raw reply

* Re: [PATCH 0/6] llc2: Simplify llc_station
From: David Miller @ 2012-09-17 17:12 UTC (permalink / raw)
  To: ben; +Cc: acme, netdev
In-Reply-To: <20120917.131017.1646567691887140571.davem@davemloft.net>

From: David Miller <davem@davemloft.net>
Date: Mon, 17 Sep 2012 13:10:17 -0400 (EDT)

> From: David Miller <davem@davemloft.net>
> Date: Mon, 17 Sep 2012 13:05:31 -0400 (EDT)
> 
>> From: Ben Hutchings <ben@decadent.org.uk>
>> Date: Sun, 16 Sep 2012 04:09:42 +0100
>> 
>>> There seem to have been some grand plans for llc_station, but as they
>>> haven't been fulfilled it's just unnecessarily complicated.
>> 
>> I went over these a few times, they look correct, so I am going
>> to apply them to net-next.
>> 
>> If there are any problems let's hope that exposure in the tree
>> helps shake those out.
> 
> It doesn't even build properly, please fix this and resubmit:
> 
> ERROR: "sysctl_llc_station_ack_timeout" [net/llc/llc2.ko] undefined!

Actually, since I trusted you when you said you build tested this,
I pushed it out to net-next pre-maturely.

I'm going to fix this meanwhile by simply removing the sysctl that
references this symbol.

But you need to check for me whether that's ok or not.

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox