* [PATCH net-next-2.6 1/19 v4] can: EG20T PCH: Separate Interface Register(IF1/IF2)
From: Tomoya MORINAGA @ 2010-11-26 0:29 UTC (permalink / raw)
To: Wolfgang Grandegger, Wolfram Sang, Christian Pellegrin,
Barry Song, Samuel Ortiz
Cc: qi.wang, yong.y.wang, andrew.chih.howe.khor, joel.clark,
kok.howg.ewe, margie.foster
Separate interface register from whole of register structure.
CAN register of Intel PCH EG20T has 2 sets of interface register.
To reduce whole of code size, separate interface register.
As a result, the number of function also can be reduced.
Signed-off-by: Tomoya MORINAGA <tomoya-linux@dsn.okisemi.com>
---
drivers/net/can/pch_can.c | 445
++++++++++++++++++++-------------------------
1 files changed, 201 insertions(+), 244 deletions(-)
diff --git a/drivers/net/can/pch_can.c b/drivers/net/can/pch_can.c
index 238622a..dae8ed1 100644
--- a/drivers/net/can/pch_can.c
+++ b/drivers/net/can/pch_can.c
@@ -113,6 +113,11 @@
#define PCH_FIFO_THRESH 16
+enum pch_ifreg {
+ PCH_RX_IFREG,
+ PCH_TX_IFREG,
+};
+
enum pch_can_mode {
PCH_CAN_ENABLE,
PCH_CAN_DISABLE,
@@ -122,6 +127,21 @@ enum pch_can_mode {
PCH_CAN_RUN
};
+struct pch_can_if_regs {
+ u32 creq;
+ u32 cmask;
+ u32 mask1;
+ u32 mask2;
+ u32 id1;
+ u32 id2;
+ u32 mcont;
+ u32 dataa1;
+ u32 dataa2;
+ u32 datab1;
+ u32 datab2;
+ u32 rsv[13];
+};
+
struct pch_can_regs {
u32 cont;
u32 stat;
@@ -130,38 +150,21 @@ struct pch_can_regs {
u32 intr;
u32 opt;
u32 brpe;
- u32 reserve1;
- u32 if1_creq;
- u32 if1_cmask;
- u32 if1_mask1;
- u32 if1_mask2;
- u32 if1_id1;
- u32 if1_id2;
- u32 if1_mcont;
- u32 if1_dataa1;
- u32 if1_dataa2;
- u32 if1_datab1;
- u32 if1_datab2;
- u32 reserve2;
- u32 reserve3[12];
- u32 if2_creq;
- u32 if2_cmask;
- u32 if2_mask1;
- u32 if2_mask2;
- u32 if2_id1;
- u32 if2_id2;
- u32 if2_mcont;
- u32 if2_dataa1;
- u32 if2_dataa2;
- u32 if2_datab1;
- u32 if2_datab2;
- u32 reserve4;
- u32 reserve5[20];
+ u32 reserve;
+ struct pch_can_if_regs ifregs[2]; /* [0]=if1 [1]=if2 */
+ u32 reserve1[8];
u32 treq1;
u32 treq2;
- u32 reserve6[2];
- u32 reserve7[56];
- u32 reserve8[3];
+ u32 reserve2[6];
+ u32 data1;
+ u32 data2;
+ u32 reserve3[6];
+ u32 canipend1;
+ u32 canipend2;
+ u32 reserve4[6];
+ u32 canmval1;
+ u32 canmval2;
+ u32 reserve5[37];
u32 srst;
};
@@ -303,143 +306,87 @@ static void pch_can_check_if_busy(u32 __iomem
*creq_addr, u32 num)
pr_err("%s:IF1 BUSY Flag is set forever.\n", __func__);
}
-static void pch_can_set_rx_enable(struct pch_can_priv *priv, u32 buff_num,
- u32 set)
+static void pch_can_set_rxtx(struct pch_can_priv *priv, u32 buff_num,
+ u32 set, enum pch_ifreg dir)
{
unsigned long flags;
+ u32 ie;
+
+ if (dir)
+ ie = PCH_IF_MCONT_TXIE;
+ else
+ ie = PCH_IF_MCONT_RXIE;
spin_lock_irqsave(&priv->msgif_reg_lock, flags);
/* Reading the receive buffer data from RAM to Interface1 registers */
- iowrite32(PCH_CMASK_RX_TX_GET, &priv->regs->if1_cmask);
- pch_can_check_if_busy(&priv->regs->if1_creq, buff_num);
+ iowrite32(PCH_CMASK_RX_TX_GET, &priv->regs->ifregs[dir].cmask);
+ pch_can_check_if_busy(&priv->regs->ifregs[dir].creq, buff_num);
/* Setting the IF1MASK1 register to access MsgVal and RxIE bits */
iowrite32(PCH_CMASK_RDWR | PCH_CMASK_ARB | PCH_CMASK_CTRL,
- &priv->regs->if1_cmask);
+ &priv->regs->ifregs[dir].cmask);
if (set == PCH_ENABLE) {
/* Setting the MsgVal and RxIE bits */
- pch_can_bit_set(&priv->regs->if1_mcont, PCH_IF_MCONT_RXIE);
- pch_can_bit_set(&priv->regs->if1_id2, PCH_ID_MSGVAL);
+ pch_can_bit_set(&priv->regs->ifregs[dir].mcont, ie);
+ pch_can_bit_set(&priv->regs->ifregs[dir].id2, PCH_ID_MSGVAL);
} else if (set == PCH_DISABLE) {
/* Resetting the MsgVal and RxIE bits */
- pch_can_bit_clear(&priv->regs->if1_mcont, PCH_IF_MCONT_RXIE);
- pch_can_bit_clear(&priv->regs->if1_id2, PCH_ID_MSGVAL);
+ pch_can_bit_clear(&priv->regs->ifregs[dir].mcont, ie);
+ pch_can_bit_clear(&priv->regs->ifregs[dir].id2, PCH_ID_MSGVAL);
}
- pch_can_check_if_busy(&priv->regs->if1_creq, buff_num);
+ pch_can_check_if_busy(&priv->regs->ifregs[dir].creq, buff_num);
spin_unlock_irqrestore(&priv->msgif_reg_lock, flags);
}
-static void pch_can_rx_enable_all(struct pch_can_priv *priv)
-{
- int i;
-
- /* Traversing to obtain the object configured as receivers. */
- for (i = 0; i < PCH_OBJ_NUM; i++) {
- if (priv->msg_obj[i] == PCH_MSG_OBJ_RX)
- pch_can_set_rx_enable(priv, i + 1, PCH_ENABLE);
- }
-}
-static void pch_can_rx_disable_all(struct pch_can_priv *priv)
+static void pch_can_set_rx_all(struct pch_can_priv *priv, u32 set)
{
int i;
/* Traversing to obtain the object configured as receivers. */
for (i = 0; i < PCH_OBJ_NUM; i++) {
if (priv->msg_obj[i] == PCH_MSG_OBJ_RX)
- pch_can_set_rx_enable(priv, i + 1, PCH_DISABLE);
- }
-}
-
-static void pch_can_set_tx_enable(struct pch_can_priv *priv, u32 buff_num,
- u32 set)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&priv->msgif_reg_lock, flags);
- /* Reading the Msg buffer from Message RAM to Interface2 registers. */
- iowrite32(PCH_CMASK_RX_TX_GET, &priv->regs->if2_cmask);
- pch_can_check_if_busy(&priv->regs->if2_creq, buff_num);
-
- /* Setting the IF2CMASK register for accessing the
- MsgVal and TxIE bits */
- iowrite32(PCH_CMASK_RDWR | PCH_CMASK_ARB | PCH_CMASK_CTRL,
- &priv->regs->if2_cmask);
-
- if (set == PCH_ENABLE) {
- /* Setting the MsgVal and TxIE bits */
- pch_can_bit_set(&priv->regs->if2_mcont, PCH_IF_MCONT_TXIE);
- pch_can_bit_set(&priv->regs->if2_id2, PCH_ID_MSGVAL);
- } else if (set == PCH_DISABLE) {
- /* Resetting the MsgVal and TxIE bits. */
- pch_can_bit_clear(&priv->regs->if2_mcont, PCH_IF_MCONT_TXIE);
- pch_can_bit_clear(&priv->regs->if2_id2, PCH_ID_MSGVAL);
- }
-
- pch_can_check_if_busy(&priv->regs->if2_creq, buff_num);
- spin_unlock_irqrestore(&priv->msgif_reg_lock, flags);
-}
-
-static void pch_can_tx_enable_all(struct pch_can_priv *priv)
-{
- int i;
-
- /* Traversing to obtain the object configured as transmit object. */
- for (i = 0; i < PCH_OBJ_NUM; i++) {
- if (priv->msg_obj[i] == PCH_MSG_OBJ_TX)
- pch_can_set_tx_enable(priv, i + 1, PCH_ENABLE);
+ pch_can_set_rxtx(priv, i + 1, set, PCH_RX_IFREG);
}
}
-static void pch_can_tx_disable_all(struct pch_can_priv *priv)
+static void pch_can_set_tx_all(struct pch_can_priv *priv, u32 set)
{
int i;
/* Traversing to obtain the object configured as transmit object. */
for (i = 0; i < PCH_OBJ_NUM; i++) {
if (priv->msg_obj[i] == PCH_MSG_OBJ_TX)
- pch_can_set_tx_enable(priv, i + 1, PCH_DISABLE);
+ pch_can_set_rxtx(priv, i + 1, set, PCH_TX_IFREG);
}
}
-static void pch_can_get_rx_enable(struct pch_can_priv *priv, u32 buff_num,
- u32 *enable)
+static u32 pch_can_get_rxtx_ir(struct pch_can_priv *priv, u32 buff_num,
+ enum pch_ifreg dir)
{
unsigned long flags;
+ u32 ie, enable;
- spin_lock_irqsave(&priv->msgif_reg_lock, flags);
- iowrite32(PCH_CMASK_RX_TX_GET, &priv->regs->if1_cmask);
- pch_can_check_if_busy(&priv->regs->if1_creq, buff_num);
-
- if (((ioread32(&priv->regs->if1_id2)) & PCH_ID_MSGVAL) &&
- ((ioread32(&priv->regs->if1_mcont)) &
- PCH_IF_MCONT_RXIE))
- *enable = PCH_ENABLE;
+ if (dir)
+ ie = PCH_IF_MCONT_RXIE;
else
- *enable = PCH_DISABLE;
- spin_unlock_irqrestore(&priv->msgif_reg_lock, flags);
-}
-
-static void pch_can_get_tx_enable(struct pch_can_priv *priv, u32 buff_num,
- u32 *enable)
-{
- unsigned long flags;
+ ie = PCH_IF_MCONT_TXIE;
spin_lock_irqsave(&priv->msgif_reg_lock, flags);
- iowrite32(PCH_CMASK_RX_TX_GET, &priv->regs->if2_cmask);
- pch_can_check_if_busy(&priv->regs->if2_creq, buff_num);
+ iowrite32(PCH_CMASK_RX_TX_GET, &priv->regs->ifregs[dir].cmask);
+ pch_can_check_if_busy(&priv->regs->ifregs[dir].creq, buff_num);
- if (((ioread32(&priv->regs->if2_id2)) & PCH_ID_MSGVAL) &&
- ((ioread32(&priv->regs->if2_mcont)) &
- PCH_IF_MCONT_TXIE)) {
- *enable = PCH_ENABLE;
+ if (((ioread32(&priv->regs->ifregs[dir].id2)) & PCH_ID_MSGVAL) &&
+ ((ioread32(&priv->regs->ifregs[dir].mcont)) & ie)) {
+ enable = PCH_ENABLE;
} else {
- *enable = PCH_DISABLE;
+ enable = PCH_DISABLE;
}
spin_unlock_irqrestore(&priv->msgif_reg_lock, flags);
+ return enable;
}
static int pch_can_int_pending(struct pch_can_priv *priv)
@@ -453,15 +400,17 @@ static void pch_can_set_rx_buffer_link(struct
pch_can_priv *priv,
unsigned long flags;
spin_lock_irqsave(&priv->msgif_reg_lock, flags);
- iowrite32(PCH_CMASK_RX_TX_GET, &priv->regs->if1_cmask);
- pch_can_check_if_busy(&priv->regs->if1_creq, buffer_num);
- iowrite32(PCH_CMASK_RDWR | PCH_CMASK_CTRL, &priv->regs->if1_cmask);
+ iowrite32(PCH_CMASK_RX_TX_GET, &priv->regs->ifregs[0].cmask);
+ pch_can_check_if_busy(&priv->regs->ifregs[0].creq, buffer_num);
+ iowrite32(PCH_CMASK_RDWR | PCH_CMASK_CTRL,
+ &priv->regs->ifregs[0].cmask);
if (set == PCH_ENABLE)
- pch_can_bit_clear(&priv->regs->if1_mcont, PCH_IF_MCONT_EOB);
+ pch_can_bit_clear(&priv->regs->ifregs[0].mcont,
+ PCH_IF_MCONT_EOB);
else
- pch_can_bit_set(&priv->regs->if1_mcont, PCH_IF_MCONT_EOB);
+ pch_can_bit_set(&priv->regs->ifregs[0].mcont, PCH_IF_MCONT_EOB);
- pch_can_check_if_busy(&priv->regs->if1_creq, buffer_num);
+ pch_can_check_if_busy(&priv->regs->ifregs[0].creq, buffer_num);
spin_unlock_irqrestore(&priv->msgif_reg_lock, flags);
}
@@ -471,10 +420,10 @@ static void pch_can_get_rx_buffer_link(struct
pch_can_priv *priv,
unsigned long flags;
spin_lock_irqsave(&priv->msgif_reg_lock, flags);
- iowrite32(PCH_CMASK_RX_TX_GET, &priv->regs->if1_cmask);
- pch_can_check_if_busy(&priv->regs->if1_creq, buffer_num);
+ iowrite32(PCH_CMASK_RX_TX_GET, &priv->regs->ifregs[0].cmask);
+ pch_can_check_if_busy(&priv->regs->ifregs[0].creq, buffer_num);
- if (ioread32(&priv->regs->if1_mcont) & PCH_IF_MCONT_EOB)
+ if (ioread32(&priv->regs->ifregs[0].mcont) & PCH_IF_MCONT_EOB)
*link = PCH_DISABLE;
else
*link = PCH_ENABLE;
@@ -486,37 +435,37 @@ static void pch_can_clear_buffers(struct
pch_can_priv *priv)
int i;
for (i = 0; i < PCH_RX_OBJ_NUM; i++) {
- iowrite32(PCH_CMASK_RX_TX_SET, &priv->regs->if1_cmask);
- iowrite32(0xffff, &priv->regs->if1_mask1);
- iowrite32(0xffff, &priv->regs->if1_mask2);
- iowrite32(0x0, &priv->regs->if1_id1);
- iowrite32(0x0, &priv->regs->if1_id2);
- iowrite32(0x0, &priv->regs->if1_mcont);
- iowrite32(0x0, &priv->regs->if1_dataa1);
- iowrite32(0x0, &priv->regs->if1_dataa2);
- iowrite32(0x0, &priv->regs->if1_datab1);
- iowrite32(0x0, &priv->regs->if1_datab2);
+ iowrite32(PCH_CMASK_RX_TX_SET, &priv->regs->ifregs[0].cmask);
+ iowrite32(0xffff, &priv->regs->ifregs[0].mask1);
+ iowrite32(0xffff, &priv->regs->ifregs[0].mask2);
+ iowrite32(0x0, &priv->regs->ifregs[0].id1);
+ iowrite32(0x0, &priv->regs->ifregs[0].id2);
+ iowrite32(0x0, &priv->regs->ifregs[0].mcont);
+ iowrite32(0x0, &priv->regs->ifregs[0].dataa1);
+ iowrite32(0x0, &priv->regs->ifregs[0].dataa2);
+ iowrite32(0x0, &priv->regs->ifregs[0].datab1);
+ iowrite32(0x0, &priv->regs->ifregs[0].datab2);
iowrite32(PCH_CMASK_RDWR | PCH_CMASK_MASK |
PCH_CMASK_ARB | PCH_CMASK_CTRL,
- &priv->regs->if1_cmask);
- pch_can_check_if_busy(&priv->regs->if1_creq, i+1);
+ &priv->regs->ifregs[0].cmask);
+ pch_can_check_if_busy(&priv->regs->ifregs[0].creq, i+1);
}
for (i = i; i < PCH_OBJ_NUM; i++) {
- iowrite32(PCH_CMASK_RX_TX_SET, &priv->regs->if2_cmask);
- iowrite32(0xffff, &priv->regs->if2_mask1);
- iowrite32(0xffff, &priv->regs->if2_mask2);
- iowrite32(0x0, &priv->regs->if2_id1);
- iowrite32(0x0, &priv->regs->if2_id2);
- iowrite32(0x0, &priv->regs->if2_mcont);
- iowrite32(0x0, &priv->regs->if2_dataa1);
- iowrite32(0x0, &priv->regs->if2_dataa2);
- iowrite32(0x0, &priv->regs->if2_datab1);
- iowrite32(0x0, &priv->regs->if2_datab2);
+ iowrite32(PCH_CMASK_RX_TX_SET, &priv->regs->ifregs[1].cmask);
+ iowrite32(0xffff, &priv->regs->ifregs[1].mask1);
+ iowrite32(0xffff, &priv->regs->ifregs[1].mask2);
+ iowrite32(0x0, &priv->regs->ifregs[1].id1);
+ iowrite32(0x0, &priv->regs->ifregs[1].id2);
+ iowrite32(0x0, &priv->regs->ifregs[1].mcont);
+ iowrite32(0x0, &priv->regs->ifregs[1].dataa1);
+ iowrite32(0x0, &priv->regs->ifregs[1].dataa2);
+ iowrite32(0x0, &priv->regs->ifregs[1].datab1);
+ iowrite32(0x0, &priv->regs->ifregs[1].datab2);
iowrite32(PCH_CMASK_RDWR | PCH_CMASK_MASK |
PCH_CMASK_ARB | PCH_CMASK_CTRL,
- &priv->regs->if2_cmask);
- pch_can_check_if_busy(&priv->regs->if2_creq, i+1);
+ &priv->regs->ifregs[1].cmask);
+ pch_can_check_if_busy(&priv->regs->ifregs[1].creq, i+1);
}
}
@@ -530,58 +479,60 @@ static void pch_can_config_rx_tx_buffers(struct
pch_can_priv *priv)
for (i = 0; i < PCH_OBJ_NUM; i++) {
if (priv->msg_obj[i] == PCH_MSG_OBJ_RX) {
iowrite32(PCH_CMASK_RX_TX_GET,
- &priv->regs->if1_cmask);
- pch_can_check_if_busy(&priv->regs->if1_creq, i+1);
+ &priv->regs->ifregs[0].cmask);
+ pch_can_check_if_busy(&priv->regs->ifregs[0].creq, i+1);
- iowrite32(0x0, &priv->regs->if1_id1);
- iowrite32(0x0, &priv->regs->if1_id2);
+ iowrite32(0x0, &priv->regs->ifregs[0].id1);
+ iowrite32(0x0, &priv->regs->ifregs[0].id2);
- pch_can_bit_set(&priv->regs->if1_mcont,
+ pch_can_bit_set(&priv->regs->ifregs[0].mcont,
PCH_IF_MCONT_UMASK);
/* Set FIFO mode set to 0 except last Rx Obj*/
- pch_can_bit_clear(&priv->regs->if1_mcont,
+ pch_can_bit_clear(&priv->regs->ifregs[0].mcont,
PCH_IF_MCONT_EOB);
/* In case FIFO mode, Last EoB of Rx Obj must be 1 */
if (i == (PCH_RX_OBJ_NUM - 1))
- pch_can_bit_set(&priv->regs->if1_mcont,
+ pch_can_bit_set(&priv->regs->ifregs[0].mcont,
PCH_IF_MCONT_EOB);
- iowrite32(0, &priv->regs->if1_mask1);
- pch_can_bit_clear(&priv->regs->if1_mask2,
+ iowrite32(0, &priv->regs->ifregs[0].mask1);
+ pch_can_bit_clear(&priv->regs->ifregs[0].mask2,
0x1fff | PCH_MASK2_MDIR_MXTD);
/* Setting CMASK for writing */
iowrite32(PCH_CMASK_RDWR | PCH_CMASK_MASK |
PCH_CMASK_ARB | PCH_CMASK_CTRL,
- &priv->regs->if1_cmask);
+ &priv->regs->ifregs[0].cmask);
- pch_can_check_if_busy(&priv->regs->if1_creq, i+1);
+ pch_can_check_if_busy(&priv->regs->ifregs[0].creq, i+1);
} else if (priv->msg_obj[i] == PCH_MSG_OBJ_TX) {
iowrite32(PCH_CMASK_RX_TX_GET,
- &priv->regs->if2_cmask);
- pch_can_check_if_busy(&priv->regs->if2_creq, i+1);
+ &priv->regs->ifregs[1].cmask);
+ pch_can_check_if_busy(&priv->regs->ifregs[1].creq, i+1);
/* Resetting DIR bit for reception */
- iowrite32(0x0, &priv->regs->if2_id1);
- iowrite32(0x0, &priv->regs->if2_id2);
- pch_can_bit_set(&priv->regs->if2_id2, PCH_ID2_DIR);
+ iowrite32(0x0, &priv->regs->ifregs[1].id1);
+ iowrite32(0x0, &priv->regs->ifregs[1].id2);
+ pch_can_bit_set(&priv->regs->ifregs[1].id2,
+ PCH_ID2_DIR);
/* Setting EOB bit for transmitter */
- iowrite32(PCH_IF_MCONT_EOB, &priv->regs->if2_mcont);
+ iowrite32(PCH_IF_MCONT_EOB,
+ &priv->regs->ifregs[1].mcont);
- pch_can_bit_set(&priv->regs->if2_mcont,
+ pch_can_bit_set(&priv->regs->ifregs[1].mcont,
PCH_IF_MCONT_UMASK);
- iowrite32(0, &priv->regs->if2_mask1);
- pch_can_bit_clear(&priv->regs->if2_mask2, 0x1fff);
+ iowrite32(0, &priv->regs->ifregs[1].mask1);
+ pch_can_bit_clear(&priv->regs->ifregs[1].mask2, 0x1fff);
/* Setting CMASK for writing */
iowrite32(PCH_CMASK_RDWR | PCH_CMASK_MASK |
PCH_CMASK_ARB | PCH_CMASK_CTRL,
- &priv->regs->if2_cmask);
+ &priv->regs->ifregs[1].cmask);
- pch_can_check_if_busy(&priv->regs->if2_creq, i+1);
+ pch_can_check_if_busy(&priv->regs->ifregs[1].creq, i+1);
}
}
spin_unlock_irqrestore(&priv->msgif_reg_lock, flags);
@@ -611,10 +562,10 @@ static void pch_can_release(struct pch_can_priv *priv)
pch_can_set_int_enables(priv, PCH_CAN_NONE);
/* Disabling all the receive object. */
- pch_can_rx_disable_all(priv);
+ pch_can_set_rx_all(priv, 0);
/* Disabling all the transmit object. */
- pch_can_tx_disable_all(priv);
+ pch_can_set_tx_all(priv, 0);
}
/* This function clears interrupt(s) from the CAN device. */
@@ -630,31 +581,31 @@ static void pch_can_int_clr(struct pch_can_priv
*priv, u32 mask)
/* Setting CMASK for clearing interrupts for
frame transmission. */
iowrite32(PCH_CMASK_RDWR | PCH_CMASK_CTRL | PCH_CMASK_ARB,
- &priv->regs->if2_cmask);
+ &priv->regs->ifregs[1].cmask);
/* Resetting the ID registers. */
- pch_can_bit_set(&priv->regs->if2_id2,
+ pch_can_bit_set(&priv->regs->ifregs[1].id2,
PCH_ID2_DIR | (0x7ff << 2));
- iowrite32(0x0, &priv->regs->if2_id1);
+ iowrite32(0x0, &priv->regs->ifregs[1].id1);
/* Claring NewDat, TxRqst & IntPnd */
- pch_can_bit_clear(&priv->regs->if2_mcont,
+ pch_can_bit_clear(&priv->regs->ifregs[1].mcont,
PCH_IF_MCONT_NEWDAT | PCH_IF_MCONT_INTPND |
PCH_IF_MCONT_TXRQXT);
- pch_can_check_if_busy(&priv->regs->if2_creq, mask);
+ pch_can_check_if_busy(&priv->regs->ifregs[1].creq, mask);
} else if (priv->msg_obj[mask - 1] == PCH_MSG_OBJ_RX) {
/* Setting CMASK for clearing the reception interrupts. */
iowrite32(PCH_CMASK_RDWR | PCH_CMASK_CTRL | PCH_CMASK_ARB,
- &priv->regs->if1_cmask);
+ &priv->regs->ifregs[0].cmask);
/* Clearing the Dir bit. */
- pch_can_bit_clear(&priv->regs->if1_id2, PCH_ID2_DIR);
+ pch_can_bit_clear(&priv->regs->ifregs[0].id2, PCH_ID2_DIR);
/* Clearing NewDat & IntPnd */
- pch_can_bit_clear(&priv->regs->if1_mcont,
+ pch_can_bit_clear(&priv->regs->ifregs[0].mcont,
PCH_IF_MCONT_NEWDAT | PCH_IF_MCONT_INTPND);
- pch_can_check_if_busy(&priv->regs->if1_creq, mask);
+ pch_can_check_if_busy(&priv->regs->ifregs[0].creq, mask);
}
}
@@ -685,8 +636,8 @@ static void pch_can_error(struct net_device *ndev,
u32 status)
return;
if (status & PCH_BUS_OFF) {
- pch_can_tx_disable_all(priv);
- pch_can_rx_disable_all(priv);
+ pch_can_set_tx_all(priv, 0);
+ pch_can_set_rx_all(priv, 0);
state = CAN_STATE_BUS_OFF;
cf->can_id |= CAN_ERR_BUSOFF;
can_bus_off(ndev);
@@ -783,22 +734,22 @@ static int pch_can_rx_normal(struct net_device
*ndev, u32 int_stat)
struct net_device_stats *stats = &(priv->ndev->stats);
/* Reading the messsage object from the Message RAM */
- iowrite32(PCH_CMASK_RX_TX_GET, &priv->regs->if1_cmask);
- pch_can_check_if_busy(&priv->regs->if1_creq, int_stat);
+ iowrite32(PCH_CMASK_RX_TX_GET, &priv->regs->ifregs[0].cmask);
+ pch_can_check_if_busy(&priv->regs->ifregs[0].creq, int_stat);
/* Reading the MCONT register. */
- reg = ioread32(&priv->regs->if1_mcont);
+ reg = ioread32(&priv->regs->ifregs[0].mcont);
reg &= 0xffff;
for (k = int_stat; !(reg & PCH_IF_MCONT_EOB); k++) {
/* If MsgLost bit set. */
if (reg & PCH_IF_MCONT_MSGLOST) {
dev_err(&priv->ndev->dev, "Msg Obj is overwritten.\n");
- pch_can_bit_clear(&priv->regs->if1_mcont,
+ pch_can_bit_clear(&priv->regs->ifregs[0].mcont,
PCH_IF_MCONT_MSGLOST);
iowrite32(PCH_CMASK_RDWR | PCH_CMASK_CTRL,
- &priv->regs->if1_cmask);
- pch_can_check_if_busy(&priv->regs->if1_creq, k);
+ &priv->regs->ifregs[0].cmask);
+ pch_can_check_if_busy(&priv->regs->ifregs[0].creq, k);
skb = alloc_can_err_skb(ndev, &cf);
if (!skb)
@@ -824,29 +775,30 @@ static int pch_can_rx_normal(struct net_device
*ndev, u32 int_stat)
return -ENOMEM;
/* Get Received data */
- ide = ((ioread32(&priv->regs->if1_id2)) & PCH_ID2_XTD) >> 14;
+ ide = ((ioread32(&priv->regs->ifregs[0].id2)) & PCH_ID2_XTD) >>
+ 14;
if (ide) {
- id = (ioread32(&priv->regs->if1_id1) & 0xffff);
- id |= (((ioread32(&priv->regs->if1_id2)) &
+ id = (ioread32(&priv->regs->ifregs[0].id1) & 0xffff);
+ id |= (((ioread32(&priv->regs->ifregs[0].id2)) &
0x1fff) << 16);
cf->can_id = (id & CAN_EFF_MASK) | CAN_EFF_FLAG;
} else {
- id = (((ioread32(&priv->regs->if1_id2)) &
- (CAN_SFF_MASK << 2)) >> 2);
+ id = (((ioread32(&priv->regs->ifregs[0].id2)) &
+ (CAN_SFF_MASK << 2)) >> 2);
cf->can_id = (id & CAN_SFF_MASK);
}
- rtr = (ioread32(&priv->regs->if1_id2) & PCH_ID2_DIR);
+ rtr = (ioread32(&priv->regs->ifregs[0].id2) & PCH_ID2_DIR);
if (rtr) {
cf->can_dlc = 0;
cf->can_id |= CAN_RTR_FLAG;
} else {
- cf->can_dlc = ((ioread32(&priv->regs->if1_mcont)) &
- 0x0f);
+ cf->can_dlc = ((ioread32(&priv->regs->ifregs[0].mcont))
+ & 0x0f);
}
for (i = 0, j = 0; i < cf->can_dlc; j++) {
- reg = ioread32(&priv->regs->if1_dataa1 + j*4);
+ reg = ioread32(&priv->regs->ifregs[0].dataa1 + j*4);
cf->data[i++] = cpu_to_le32(reg & 0xff);
if (i == cf->can_dlc)
break;
@@ -860,15 +812,16 @@ static int pch_can_rx_normal(struct net_device
*ndev, u32 int_stat)
if (k < PCH_FIFO_THRESH) {
iowrite32(PCH_CMASK_RDWR | PCH_CMASK_CTRL |
- PCH_CMASK_ARB, &priv->regs->if1_cmask);
+ PCH_CMASK_ARB, &priv->regs->ifregs[0].cmask);
/* Clearing the Dir bit. */
- pch_can_bit_clear(&priv->regs->if1_id2, PCH_ID2_DIR);
+ pch_can_bit_clear(&priv->regs->ifregs[0].id2,
+ PCH_ID2_DIR);
/* Clearing NewDat & IntPnd */
- pch_can_bit_clear(&priv->regs->if1_mcont,
+ pch_can_bit_clear(&priv->regs->ifregs[0].mcont,
PCH_IF_MCONT_INTPND);
- pch_can_check_if_busy(&priv->regs->if1_creq, k);
+ pch_can_check_if_busy(&priv->regs->ifregs[0].creq, k);
} else if (k > PCH_FIFO_THRESH) {
pch_can_int_clr(priv, k);
} else if (k == PCH_FIFO_THRESH) {
@@ -878,9 +831,9 @@ static int pch_can_rx_normal(struct net_device
*ndev, u32 int_stat)
}
RX_NEXT:
/* Reading the messsage object from the Message RAM */
- iowrite32(PCH_CMASK_RX_TX_GET, &priv->regs->if1_cmask);
- pch_can_check_if_busy(&priv->regs->if1_creq, k + 1);
- reg = ioread32(&priv->regs->if1_mcont);
+ iowrite32(PCH_CMASK_RX_TX_GET, &priv->regs->ifregs[0].cmask);
+ pch_can_check_if_busy(&priv->regs->ifregs[0].creq, k + 1);
+ reg = ioread32(&priv->regs->ifregs[0].mcont);
}
return rcv_pkts;
@@ -910,8 +863,9 @@ INT_STAT:
if (reg_stat & PCH_TX_OK) {
spin_lock_irqsave(&priv->msgif_reg_lock, flags);
- iowrite32(PCH_CMASK_RX_TX_GET, &priv->regs->if2_cmask);
- pch_can_check_if_busy(&priv->regs->if2_creq,
+ iowrite32(PCH_CMASK_RX_TX_GET,
+ &priv->regs->ifregs[1].cmask);
+ pch_can_check_if_busy(&priv->regs->ifregs[1].creq,
ioread32(&priv->regs->intr));
spin_unlock_irqrestore(&priv->msgif_reg_lock, flags);
pch_can_bit_clear(&priv->regs->stat, PCH_TX_OK);
@@ -938,10 +892,11 @@ MSG_OBJ:
can_get_echo_skb(ndev, int_stat - PCH_RX_OBJ_NUM - 1);
spin_lock_irqsave(&priv->msgif_reg_lock, flags);
iowrite32(PCH_CMASK_RX_TX_GET | PCH_CMASK_CLRINTPND,
- &priv->regs->if2_cmask);
- dlc = ioread32(&priv->regs->if2_mcont) &
+ &priv->regs->ifregs[1].cmask);
+ dlc = ioread32(&priv->regs->ifregs[1].mcont) &
PCH_IF_MCONT_DLC;
- pch_can_check_if_busy(&priv->regs->if2_creq, int_stat);
+ pch_can_check_if_busy(&priv->regs->ifregs[1].creq,
+ int_stat);
spin_unlock_irqrestore(&priv->msgif_reg_lock, flags);
if (dlc > 8)
dlc = 8;
@@ -996,8 +951,8 @@ static void pch_can_start(struct net_device *ndev)
pch_set_bittiming(ndev);
pch_can_set_optmode(priv);
- pch_can_tx_enable_all(priv);
- pch_can_rx_enable_all(priv);
+ pch_can_set_tx_all(priv, 1);
+ pch_can_set_rx_all(priv, 1);
/* Setting the CAN to run mode. */
pch_can_set_run_mode(priv, PCH_CAN_RUN);
@@ -1125,54 +1080,55 @@ static netdev_tx_t pch_xmit(struct sk_buff *skb,
struct net_device *ndev)
spin_lock_irqsave(&priv->msgif_reg_lock, flags);
/* Reading the Msg Obj from the Msg RAM to the Interface register. */
- iowrite32(PCH_CMASK_RX_TX_GET, &priv->regs->if2_cmask);
- pch_can_check_if_busy(&priv->regs->if2_creq, tx_buffer_avail);
+ iowrite32(PCH_CMASK_RX_TX_GET, &priv->regs->ifregs[1].cmask);
+ pch_can_check_if_busy(&priv->regs->ifregs[1].creq, tx_buffer_avail);
/* Setting the CMASK register. */
- pch_can_bit_set(&priv->regs->if2_cmask, PCH_CMASK_ALL);
+ pch_can_bit_set(&priv->regs->ifregs[1].cmask, PCH_CMASK_ALL);
/* If ID extended is set. */
- pch_can_bit_clear(&priv->regs->if2_id1, 0xffff);
- pch_can_bit_clear(&priv->regs->if2_id2, 0x1fff | PCH_ID2_XTD);
+ pch_can_bit_clear(&priv->regs->ifregs[1].id1, 0xffff);
+ pch_can_bit_clear(&priv->regs->ifregs[1].id2, 0x1fff | PCH_ID2_XTD);
if (cf->can_id & CAN_EFF_FLAG) {
- pch_can_bit_set(&priv->regs->if2_id1, cf->can_id & 0xffff);
- pch_can_bit_set(&priv->regs->if2_id2,
+ pch_can_bit_set(&priv->regs->ifregs[1].id1,
+ cf->can_id & 0xffff);
+ pch_can_bit_set(&priv->regs->ifregs[1].id2,
((cf->can_id >> 16) & 0x1fff) | PCH_ID2_XTD);
} else {
- pch_can_bit_set(&priv->regs->if2_id1, 0);
- pch_can_bit_set(&priv->regs->if2_id2,
+ pch_can_bit_set(&priv->regs->ifregs[1].id1, 0);
+ pch_can_bit_set(&priv->regs->ifregs[1].id2,
(cf->can_id & CAN_SFF_MASK) << 2);
}
/* If remote frame has to be transmitted.. */
if (cf->can_id & CAN_RTR_FLAG)
- pch_can_bit_clear(&priv->regs->if2_id2, PCH_ID2_DIR);
+ pch_can_bit_clear(&priv->regs->ifregs[1].id2, PCH_ID2_DIR);
for (i = 0, j = 0; i < cf->can_dlc; j++) {
iowrite32(le32_to_cpu(cf->data[i++]),
- (&priv->regs->if2_dataa1) + j*4);
+ (&priv->regs->ifregs[1].dataa1) + j*4);
if (i == cf->can_dlc)
break;
iowrite32(le32_to_cpu(cf->data[i++] << 8),
- (&priv->regs->if2_dataa1) + j*4);
+ (&priv->regs->ifregs[1].dataa1) + j*4);
}
can_put_echo_skb(skb, ndev, tx_buffer_avail - PCH_RX_OBJ_NUM - 1);
/* Updating the size of the data. */
- pch_can_bit_clear(&priv->regs->if2_mcont, 0x0f);
- pch_can_bit_set(&priv->regs->if2_mcont, cf->can_dlc);
+ pch_can_bit_clear(&priv->regs->ifregs[1].mcont, 0x0f);
+ pch_can_bit_set(&priv->regs->ifregs[1].mcont, cf->can_dlc);
/* Clearing IntPend, NewDat & TxRqst */
- pch_can_bit_clear(&priv->regs->if2_mcont,
+ pch_can_bit_clear(&priv->regs->ifregs[1].mcont,
PCH_IF_MCONT_NEWDAT | PCH_IF_MCONT_INTPND |
PCH_IF_MCONT_TXRQXT);
/* Setting NewDat, TxRqst bits */
- pch_can_bit_set(&priv->regs->if2_mcont,
+ pch_can_bit_set(&priv->regs->ifregs[1].mcont,
PCH_IF_MCONT_NEWDAT | PCH_IF_MCONT_TXRQXT);
- pch_can_check_if_busy(&priv->regs->if2_creq, tx_buffer_avail);
+ pch_can_check_if_busy(&priv->regs->ifregs[1].creq, tx_buffer_avail);
spin_unlock_irqrestore(&priv->msgif_reg_lock, flags);
@@ -1234,25 +1190,25 @@ static int pch_can_suspend(struct pci_dev *pdev,
pm_message_t state)
/* Save Tx buffer enable state */
for (i = 0; i < PCH_OBJ_NUM; i++) {
if (priv->msg_obj[i] == PCH_MSG_OBJ_TX)
- pch_can_get_tx_enable(priv, i + 1,
- &(priv->tx_enable[i]));
+ priv->tx_enable[i] = pch_can_get_rxtx_ir(priv, i + 1,
+ PCH_TX_IFREG);
}
/* Disable all Transmit buffers */
- pch_can_tx_disable_all(priv);
+ pch_can_set_tx_all(priv, 0);
/* Save Rx buffer enable state */
for (i = 0; i < PCH_OBJ_NUM; i++) {
if (priv->msg_obj[i] == PCH_MSG_OBJ_RX) {
- pch_can_get_rx_enable(priv, i + 1,
- &(priv->rx_enable[i]));
+ priv->rx_enable[i] = pch_can_get_rxtx_ir(priv, i + 1,
+ PCH_RX_IFREG);
pch_can_get_rx_buffer_link(priv, i + 1,
&(priv->rx_link[i]));
}
}
/* Disable all Receive buffers */
- pch_can_rx_disable_all(priv);
+ pch_can_set_rx_all(priv, 0);
retval = pci_save_state(pdev);
if (retval) {
dev_err(&pdev->dev, "pci_save_state failed.\n");
@@ -1301,10 +1257,9 @@ static int pch_can_resume(struct pci_dev *pdev)
/* Enabling the transmit buffer. */
for (i = 0; i < PCH_OBJ_NUM; i++) {
- if (priv->msg_obj[i] == PCH_MSG_OBJ_TX) {
- pch_can_set_tx_enable(priv, i + 1,
- priv->tx_enable[i]);
- }
+ if (priv->msg_obj[i] == PCH_MSG_OBJ_TX)
+ pch_can_set_rxtx(priv, i, priv->tx_enable[i],
+ PCH_TX_IFREG);
}
/* Configuring the receive buffer and enabling them. */
@@ -1315,7 +1270,9 @@ static int pch_can_resume(struct pci_dev *pdev)
priv->rx_link[i]);
/* Restore buffer enables */
- pch_can_set_rx_enable(priv, i + 1, priv->rx_enable[i]);
+ pch_can_set_rxtx(priv, i, priv->rx_enable[i],
+ PCH_RX_IFREG);
+
}
}
--
1.6.0.6
^ permalink raw reply related
* skge.c patch to limit DMA for ASUS A8N-SLI Premium board
From: Shannon Wynter @ 2010-11-26 0:06 UTC (permalink / raw)
To: netdev
[-- Attachment #1: Type: text/plain, Size: 421 bytes --]
Hi
I would like to contribute a patch to add ASUS A8N-SLI Premium to the
list of boards that require the quirk to limit DMA to 32bit in the
skge.c module.
It took 2 hours of fiddling with my machine last night to get the
network card working - the lights were on but no-one was at home.
I never would have found it if not for the original bug listed on redhat
for the Gigabyte motherboard.
Kind regards
Shannon.
[-- Attachment #2: skge-quirk-for-asus-a8n-sli-DMA.patch --]
[-- Type: text/x-patch, Size: 516 bytes --]
diff -uNr linux-2.6.37-rc3.old/drivers/net/skge.c linux-2.6.37-rc3.new//drivers/net/skge.c
--- linux-2.6.37-rc3.old/drivers/net/skge.c 2010-11-26 08:35:21.000000000 +1000
+++ linux-2.6.37-rc3.new//drivers/net/skge.c 2010-11-26 08:58:52.000000000 +1000
@@ -4158,6 +4158,13 @@
DMI_MATCH(DMI_BOARD_NAME, "nForce"),
},
},
+ {
+ .ident = "ASUSTeK A8N-SLI Premium",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
+ DMI_MATCH(DMI_BOARD_NAME, "A8N-SLI Premium"),
+ },
+ },
{}
};
^ permalink raw reply
* Re: [PATCH net-next-2.6 12/17 v3] can: EG20T PCH: Fix bit timing calculation issue
From: Tomoya MORINAGA @ 2010-11-26 0:06 UTC (permalink / raw)
To: Marc Kleine-Budde
Cc: andrew.chih.howe.khor-ral2JQCrhuEAvxtiuMwx3w, Samuel Ortiz,
margie.foster-ral2JQCrhuEAvxtiuMwx3w,
netdev-u79uwXL29TY76Z2rM5mHXA,
linux-kernel-u79uwXL29TY76Z2rM5mHXA,
socketcan-core-0fE9KPoRgkgATYTw5x5z8w,
yong.y.wang-ral2JQCrhuEAvxtiuMwx3w,
kok.howg.ewe-ral2JQCrhuEAvxtiuMwx3w, Wolfgang Grandegger,
joel.clark-ral2JQCrhuEAvxtiuMwx3w, David S. Miller,
Christian Pellegrin, qi.wang-ral2JQCrhuEAvxtiuMwx3w
In-Reply-To: <4CED0BF1.1060305@pengutronix.de>
On Wednesday, November 24, 2010 9:58 PM, Marc Kleine-Budde wrote :
> Masking here shouldn't be necessary but won't hurt, better play safe.
EG20T's baud rate pre-scalar register is divided to 2 register BITT and BRPE.
Thus, This mask is necessary.
In fact, deleting this mask, it couldn't communicate correctly.
---
Thanks,
Tomoya MORINAGA
OKI SEMICONDUCTOR CO., LTD.
^ permalink raw reply
* Re: [PATCH 1/1] NET: wan/x25_asy, move lapb_unregister to x25_asy_close_tty
From: Andrew Hendry @ 2010-11-25 23:39 UTC (permalink / raw)
To: Sergey Lapin
Cc: Jiri Slaby, davem, netdev, linux-kernel, jirislaby,
Mikhail Ulyanov
In-Reply-To: <20101125225659.GA13807@build.ihdev.net>
Looks like some more X.25 users. Not sure on your usage, but I should point out:
https://patchwork.kernel.org/patch/332981/
X.25 protocol needs to have the big kernel lock removed so it can stay
around in the future.
I have been working through them, but there are still some of the more
complex ones remaining.
The test setups I am using are:
Sockets<->X25<->x25loop<->X25<->Sockets
Sockets<->X25<->xotd<->network<->xotd<->X25<->Sockets
and I know some users have:
Sockets<->X25<->xotd<->network<->CiscoXOT<->devices.
Where x25loop is a userspace tun device which shuffles the LCIs, does
some basic call handling and loops the calls back into kernel X25.
xotd is a basic implementation of the X25 over TCP RFC.
Are you using something like this?
Sockets<->X25<->lapb<->x25_async<->.....
If you are using X25 I would be interested to see how your setup goes
with the X25 BKL cleanup work, and if anyone wanted to help with the
remaining BKLs :)
On Fri, Nov 26, 2010 at 9:56 AM, Sergey Lapin <slapin@ossfans.org> wrote:
> On Thu, Nov 25, 2010 at 12:54:54AM +0100, Jiri Slaby wrote:
>> We register lapb when tty is created, but unregister it only when the
>> device is UP. So move the lapb_unregister to x25_asy_close_tty after
>> the device is down.
>>
>> Signed-off-by: Jiri Slaby <jslaby@suse.cz>
>> Reported-by: Sergey Lapin <slapin@ossfans.org>
>> Cc: Andrew Hendry <andrew.hendry@gmail.com>
> Tested-by: Sergey Lapin <slapin@ossfans.org>
> Tested-by: Mikhail Ulyanov <ulyanov.mikhail@gmail.com>
>
^ permalink raw reply
* Re: [PATCH 1/1] NET: wan/x25_asy, move lapb_unregister to x25_asy_close_tty
From: Sergey Lapin @ 2010-11-25 22:56 UTC (permalink / raw)
To: Jiri Slaby
Cc: davem, netdev, linux-kernel, jirislaby, Andrew Hendry,
Mikhail Ulyanov
In-Reply-To: <1290642894-4577-1-git-send-email-jslaby@suse.cz>
On Thu, Nov 25, 2010 at 12:54:54AM +0100, Jiri Slaby wrote:
> We register lapb when tty is created, but unregister it only when the
> device is UP. So move the lapb_unregister to x25_asy_close_tty after
> the device is down.
>
> Signed-off-by: Jiri Slaby <jslaby@suse.cz>
> Reported-by: Sergey Lapin <slapin@ossfans.org>
> Cc: Andrew Hendry <andrew.hendry@gmail.com>
Tested-by: Sergey Lapin <slapin@ossfans.org>
Tested-by: Mikhail Ulyanov <ulyanov.mikhail@gmail.com>
^ permalink raw reply
* Re: Unplug ethernet cable, the route persists. Why?
From: Jarek Poplawski @ 2010-11-25 22:18 UTC (permalink / raw)
To: Ben Gamari; +Cc: Mike Caoco, Stephen Hemminger, Netdev, LKML
In-Reply-To: <87bp5df9j9.fsf@gmail.com>
Ben Gamari wrote:
> On Wed, 24 Nov 2010 12:29:43 -0800 (PST), Mike Caoco <caoco2002@yahoo.com> wrote:
>> So if you rely on NetworkManager or Connman or Quagga to remove the
>> route, the routing daemons will recompute the route table anyway. So
>> why cannot this be done in the kernel?
>
> This is policy. In the Linux world we generally strive to separate
> policy from mechanism, leaving the former to userspace. This allows
> (potentially complex) policy decisions to be made in user-space. The
> reason for this is two-fold: First, every line of kernel code introduces
> the potentially for a bug and error handling in the kernel is generally
> more complex than it is in user-space. Secondly, allowing user-space to
> handle policy allows users to do things with the kernel that kernel
> developers did not envision. This flexibility is one reason why the
> kernel is so suited for running on anything from your cell-phone to 4000
> processor big iron.
Secondly and a half, if you add a specific route you may really
mean it, and prefer not to send at all than use default.
Cheers,
Jarek P.
^ permalink raw reply
* Re: [PATCH 2/2] The new jhash implementation
From: Jozsef Kadlecsik @ 2010-11-25 21:14 UTC (permalink / raw)
To: Eric Dumazet
Cc: linux-kernel, netdev, netfilter-devel, Linus Torvalds,
Rusty Russell
In-Reply-To: <1290705669.2858.381.camel@edumazet-laptop>
On Thu, 25 Nov 2010, Eric Dumazet wrote:
> Le jeudi 25 novembre 2010 ? 15:41 +0100, Jozsef Kadlecsik a ?crit :
>
> > +/* __jhash_final - final mixing of 3 32-bit values (a,b,c) into c */
> > +#define __jhash_final(a, b, c) \
> > +{ \
> > + c ^= b; c -= rol32(b, 14); \
> > + a ^= c; a -= rol32(c, 11); \
> > + b ^= a; b -= rol32(a, 25); \
> > + c ^= b; c -= rol32(b, 16); \
> > + a ^= c; a -= rol32(c, 4); \
> > + b ^= a; b -= rol32(a, 14); \
> > + c ^= b; c -= rol32(b, 24); \
> > +}
> > +
>
> So we now have a special __jhash_final(a, b, c) thing for the last
> values.
Yes, and...
> > +/* jhash_3words - hash exactly 3, 2 or 1 word(s) */
> > +u32 jhash_3words(u32 a, u32 b, u32 c, u32 initval)
> > +{
> > + a += JHASH_INITVAL;
> > + b += JHASH_INITVAL;
> > + c += initval;
> > +
> > + __jhash_mix(a, b, c);
> > +
> > + return c;
> > +}
> > +EXPORT_SYMBOL(jhash_3words);
>
> But you dont use it in jhash_3words().
... excellent spotting! That should be __jhash_final in jhash_3words.
> I do think jhash_3words() should stay inlined, unless maybe
> CONFIG_CC_OPTIMIZE_FOR_SIZE=y
>
> We hit it several time per packet in network stack in RX path.
>
> Once in skb_get_rxhash() (unless device fills skb->rxhash)
> Once at least in conntrack (if used).
> Once in UDP or TCP stack
There follows the new version of the second patch: jhash_3words is moved
back to inlined form and the bug in it fixed. Thanks for your thorough
reviewing indeed!
The current jhash.h implements the lookup2() hash function by Bob Jenkins.
However, lookup2() is outdated as Bob wrote a new hash function called
lookup3(). The new hash function
- mixes better than lookup2(): it passes the check that every input bit
changes every output bit 50% of the time, while lookup2() failed it.
- performs better: compiled with -O2 on Core2 Duo, lookup3() 20-40% faster
than lookup2() depending on the key length.
The patch replaces the lookup2() implementation of the 'jhash*'
functions with that of lookup3().
You can read a longer comparison of the two and other hash functions at
http://burtleburtle.net/bob/hash/doobs.html.
Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
include/linux/jhash.h | 140 ++++++++-----------------------------------------
lib/Makefile | 2 +-
lib/jhash.c | 127 ++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 151 insertions(+), 118 deletions(-)
create mode 100644 lib/jhash.c
diff --git a/include/linux/jhash.h b/include/linux/jhash.h
index ced1159..963abad 100644
--- a/include/linux/jhash.h
+++ b/include/linux/jhash.h
@@ -1,131 +1,37 @@
#ifndef _LINUX_JHASH_H
#define _LINUX_JHASH_H
-/* jhash.h: Jenkins hash support.
- *
- * Copyright (C) 1996 Bob Jenkins (bob_jenkins@burtleburtle.net)
- *
- * http://burtleburtle.net/bob/hash/
- *
- * These are the credits from Bob's sources:
- *
- * lookup2.c, by Bob Jenkins, December 1996, Public Domain.
- * hash(), hash2(), hash3, and mix() are externally useful functions.
- * Routines to test the hash are included if SELF_TEST is defined.
- * You can use this free for any purpose. It has no warranty.
- *
- * Copyright (C) 2003 David S. Miller (davem@redhat.com)
- *
- * I've modified Bob's hash to be useful in the Linux kernel, and
- * any bugs present are surely my fault. -DaveM
- */
-
-/* NOTE: Arguments are modified. */
-#define __jhash_mix(a, b, c) \
-{ \
- a -= b; a -= c; a ^= (c>>13); \
- b -= c; b -= a; b ^= (a<<8); \
- c -= a; c -= b; c ^= (b>>13); \
- a -= b; a -= c; a ^= (c>>12); \
- b -= c; b -= a; b ^= (a<<16); \
- c -= a; c -= b; c ^= (b>>5); \
- a -= b; a -= c; a ^= (c>>3); \
- b -= c; b -= a; b ^= (a<<10); \
- c -= a; c -= b; c ^= (b>>15); \
-}
-
-/* The golden ration: an arbitrary value */
-#define JHASH_GOLDEN_RATIO 0x9e3779b9
-
-/* The most generic version, hashes an arbitrary sequence
- * of bytes. No alignment or length assumptions are made about
- * the input key.
- */
-static inline u32 jhash(const void *key, u32 length, u32 initval)
-{
- u32 a, b, c, len;
- const u8 *k = key;
-
- len = length;
- a = b = JHASH_GOLDEN_RATIO;
- c = initval;
-
- while (len >= 12) {
- a += (k[0] +((u32)k[1]<<8) +((u32)k[2]<<16) +((u32)k[3]<<24));
- b += (k[4] +((u32)k[5]<<8) +((u32)k[6]<<16) +((u32)k[7]<<24));
- c += (k[8] +((u32)k[9]<<8) +((u32)k[10]<<16)+((u32)k[11]<<24));
-
- __jhash_mix(a,b,c);
-
- k += 12;
- len -= 12;
- }
-
- c += length;
- switch (len) {
- case 11: c += ((u32)k[10]<<24);
- case 10: c += ((u32)k[9]<<16);
- case 9 : c += ((u32)k[8]<<8);
- case 8 : b += ((u32)k[7]<<24);
- case 7 : b += ((u32)k[6]<<16);
- case 6 : b += ((u32)k[5]<<8);
- case 5 : b += k[4];
- case 4 : a += ((u32)k[3]<<24);
- case 3 : a += ((u32)k[2]<<16);
- case 2 : a += ((u32)k[1]<<8);
- case 1 : a += k[0];
- };
-
- __jhash_mix(a,b,c);
-
- return c;
+/* Best hash sizes are of power of two */
+#define jhash_size(n) ((u32)1<<(n))
+/* Mask the hash value, i.e (value & jhash_mask(n)) instead of (value % n) */
+#define jhash_mask(n) (jhash_size(n)-1)
+
+/* __jhash_final - final mixing of 3 32-bit values (a,b,c) into c */
+#define __jhash_final(a, b, c) \
+{ \
+ c ^= b; c -= rol32(b, 14); \
+ a ^= c; a -= rol32(c, 11); \
+ b ^= a; b -= rol32(a, 25); \
+ c ^= b; c -= rol32(b, 16); \
+ a ^= c; a -= rol32(c, 4); \
+ b ^= a; b -= rol32(a, 14); \
+ c ^= b; c -= rol32(b, 24); \
}
-/* A special optimized version that handles 1 or more of u32s.
- * The length parameter here is the number of u32s in the key.
- */
-static inline u32 jhash2(const u32 *k, u32 length, u32 initval)
-{
- u32 a, b, c, len;
-
- a = b = JHASH_GOLDEN_RATIO;
- c = initval;
- len = length;
-
- while (len >= 3) {
- a += k[0];
- b += k[1];
- c += k[2];
- __jhash_mix(a, b, c);
- k += 3; len -= 3;
- }
-
- c += length * 4;
-
- switch (len) {
- case 2 : b += k[1];
- case 1 : a += k[0];
- };
-
- __jhash_mix(a,b,c);
-
- return c;
-}
+/* An arbitrary initial parameter */
+#define JHASH_INITVAL 0xdeadbeef
+extern u32 jhash(const void *key, u32 length, u32 initval);
+extern u32 jhash2(const u32 *k, u32 length, u32 initval);
-/* A special ultra-optimized versions that knows they are hashing exactly
- * 3, 2 or 1 word(s).
- *
- * NOTE: In particular the "c += length; __jhash_mix(a,b,c);" normally
- * done at the end is not done here.
- */
+/* jhash_3words - hash exactly 3, 2 or 1 word(s) */
static inline u32 jhash_3words(u32 a, u32 b, u32 c, u32 initval)
{
- a += JHASH_GOLDEN_RATIO;
- b += JHASH_GOLDEN_RATIO;
+ a += JHASH_INITVAL;
+ b += JHASH_INITVAL;
c += initval;
- __jhash_mix(a, b, c);
+ __jhash_final(a, b, c);
return c;
}
diff --git a/lib/Makefile b/lib/Makefile
index e6a3763..a1a4932 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -10,7 +10,7 @@ endif
lib-y := ctype.o string.o vsprintf.o cmdline.o \
rbtree.o radix-tree.o dump_stack.o \
idr.o int_sqrt.o extable.o prio_tree.o \
- sha1.o irq_regs.o reciprocal_div.o argv_split.o \
+ jhash.o sha1.o irq_regs.o reciprocal_div.o argv_split.o \
proportions.o prio_heap.o ratelimit.o show_mem.o \
is_single_threaded.o plist.o decompress.o flex_array.o
diff --git a/lib/jhash.c b/lib/jhash.c
new file mode 100644
index 0000000..e0c8d11
--- /dev/null
+++ b/lib/jhash.c
@@ -0,0 +1,127 @@
+/* jhash.c: Jenkins hash support.
+ *
+ * Copyright (C) 2006. Bob Jenkins (bob_jenkins@burtleburtle.net)
+ *
+ * http://burtleburtle.net/bob/hash/
+ *
+ * These are the credits from Bob's sources:
+ *
+ * lookup3.c, by Bob Jenkins, May 2006, Public Domain.
+ *
+ * These are functions for producing 32-bit hashes for hash table lookup.
+ * hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
+ * are externally useful functions. Routines to test the hash are included
+ * if SELF_TEST is defined. You can use this free for any purpose. It's in
+ * the public domain. It has no warranty.
+ *
+ * Copyright (C) 2009-2010 Jozsef Kadlecsik (kadlec@blackhole.kfki.hu)
+ *
+ * I've modified Bob's hash to be useful in the Linux kernel, and
+ * any bugs present are my fault.
+ * Jozsef
+ */
+#include <linux/bitops.h>
+#include <linux/module.h>
+#include <linux/unaligned/packed_struct.h>
+#include <linux/jhash.h>
+
+/* __jhash_mix -- mix 3 32-bit values reversibly. */
+#define __jhash_mix(a, b, c) \
+{ \
+ a -= c; a ^= rol32(c, 4); c += b; \
+ b -= a; b ^= rol32(a, 6); a += c; \
+ c -= b; c ^= rol32(b, 8); b += a; \
+ a -= c; a ^= rol32(c, 16); c += b; \
+ b -= a; b ^= rol32(a, 19); a += c; \
+ c -= b; c ^= rol32(b, 4); b += a; \
+}
+
+/* jhash - hash an arbitrary key
+ * @k: sequence of bytes as key
+ * @length: the length of the key
+ * @initval: the previous hash, or an arbitray value
+ *
+ * The generic version, hashes an arbitrary sequence of bytes.
+ * No alignment or length assumptions are made about the input key.
+ *
+ * Returns the hash value of the key. The result depends on endianness.
+ */
+u32 jhash(const void *key, u32 length, u32 initval)
+{
+ u32 a, b, c;
+ const u8 *k = key;
+
+ /* Set up the internal state */
+ a = b = c = JHASH_INITVAL + length + initval;
+
+ /* All but the last block: affect some 32 bits of (a,b,c) */
+ while (length > 12) {
+ a += __get_unaligned_cpu32(k);
+ b += __get_unaligned_cpu32(k + 4);
+ c += __get_unaligned_cpu32(k + 8);
+ __jhash_mix(a, b, c);
+ length -= 12;
+ k += 12;
+ }
+ /* Last block: affect all 32 bits of (c) */
+ /* All the case statements fall through */
+ switch (length) {
+ case 12: c += (u32)k[11]<<24;
+ case 11: c += (u32)k[10]<<16;
+ case 10: c += (u32)k[9]<<8;
+ case 9: c += k[8];
+ case 8: b += (u32)k[7]<<24;
+ case 7: b += (u32)k[6]<<16;
+ case 6: b += (u32)k[5]<<8;
+ case 5: b += k[4];
+ case 4: a += (u32)k[3]<<24;
+ case 3: a += (u32)k[2]<<16;
+ case 2: a += (u32)k[1]<<8;
+ case 1: a += k[0];
+ __jhash_final(a, b, c);
+ case 0: /* Nothing left to add */
+ break;
+ }
+
+ return c;
+}
+EXPORT_SYMBOL(jhash);
+
+/* jhash2 - hash an array of u32's
+ * @k: the key which must be an array of u32's
+ * @length: the number of u32's in the key
+ * @initval: the previous hash, or an arbitray value
+ *
+ * Returns the hash value of the key.
+ */
+u32 jhash2(const u32 *k, u32 length, u32 initval)
+{
+ u32 a, b, c;
+
+ /* Set up the internal state */
+ a = b = c = JHASH_INITVAL + (length<<2) + initval;
+
+ /* Handle most of the key */
+ while (length > 3) {
+ a += k[0];
+ b += k[1];
+ c += k[2];
+ __jhash_mix(a, b, c);
+ length -= 3;
+ k += 3;
+ }
+
+ /* Handle the last 3 u32's: all the case statements fall through */
+ switch (length) {
+ case 3: c += k[2];
+ case 2: b += k[1];
+ case 1: a += k[0];
+ __jhash_final(a, b, c);
+ case 0: /* Nothing left to add */
+ break;
+ }
+
+ return c;
+}
+EXPORT_SYMBOL(jhash2);
+
--
1.7.0.4
Best regards,
Jozsef
-
E-mail : kadlec@blackhole.kfki.hu, kadlec@mail.kfki.hu
PGP key : http://www.kfki.hu/~kadlec/pgp_public_key.txt
Address : KFKI Research Institute for Particle and Nuclear Physics
H-1525 Budapest 114, POB. 49, Hungary
^ permalink raw reply related
* Re: Unplug ethernet cable, the route persists. Why?
From: Ben Gamari @ 2010-11-25 20:11 UTC (permalink / raw)
To: Mike Caoco, Stephen Hemminger; +Cc: Netdev, LKML
In-Reply-To: <144174.46619.qm@web63401.mail.re1.yahoo.com>
On Wed, 24 Nov 2010 12:29:43 -0800 (PST), Mike Caoco <caoco2002@yahoo.com> wrote:
> So if you rely on NetworkManager or Connman or Quagga to remove the
> route, the routing daemons will recompute the route table anyway. So
> why cannot this be done in the kernel?
This is policy. In the Linux world we generally strive to separate
policy from mechanism, leaving the former to userspace. This allows
(potentially complex) policy decisions to be made in user-space. The
reason for this is two-fold: First, every line of kernel code introduces
the potentially for a bug and error handling in the kernel is generally
more complex than it is in user-space. Secondly, allowing user-space to
handle policy allows users to do things with the kernel that kernel
developers did not envision. This flexibility is one reason why the
kernel is so suited for running on anything from your cell-phone to 4000
processor big iron.
> Even when no NetworkManager/Quagga is present, I think it is a
> legitimate reason to recompute the route when a cable is unplugged,
> which should not be a frequent event unless when under error
> conditions.
There have to be real, demonstrable benefits for moving policy into the
kernel (i.e. the recent discussions concerning per-session
cgroups). Considering how long the Linux networking subsystem has
existed, I highly doubt there is a good reason to move this sort of
routing policy into the kernel that has not already been discussed.
Cheers,
- Ben
^ permalink raw reply
* Re: Unplug ethernet cable, the route persists. Why?
From: Hans de Bruin @ 2010-11-25 19:10 UTC (permalink / raw)
To: Mike Caoco; +Cc: Netdev, LKML
In-Reply-To: <242082.99180.qm@web63407.mail.re1.yahoo.com>
On 11/24/2010 08:48 PM, Mike Caoco wrote:
> Hello,
>
> This may have been discussed, but all search engines couldn't give me a good answer...
>
> I notice that when an interface is up/running, a local route is in the routing table:
>
> $ ifconfig eth1
> eth1 Link encap:Ethernet HWaddr 00:13:20:0e:2f:ed
> inet addr:192.168.1.125 Bcast:192.168.1.255 Mask:255.255.255.0
> inet6 addr: fe80::213:20ff:fe0e:2fed/64 Scope:Link
> UP BROADCAST RUNNING MULTICAST MTU:1500 Metric:1
> RX packets:35984995 errors:0 dropped:0 overruns:0 frame:0
> TX packets:7409151 errors:0 dropped:0 overruns:0 carrier:0
> collisions:0 txqueuelen:1000
> RX bytes:3252413825 (3.2 GB) TX bytes:1340077250 (1.3 GB)
>
> $ ip route
> 192.168.20.0/24 dev eth0 proto kernel scope link src 192.168.20.120
> 192.168.1.0/24 dev eth1 proto kernel scope link src 192.168.1.125
> default via 192.168.20.254 dev eth1 metric 100
>
> After I unplug the cable from eth1, the RUNNING flag disappears, but the route is still there:
>
> $ ifconfig eth1
> eth1 Link encap:Ethernet HWaddr 00:13:20:0e:2f:ed
> inet addr:192.168.1.125 Bcast:192.168.1.255 Mask:255.255.255.0
> inet6 addr: fe80::213:20ff:fe0e:2fed/64 Scope:Link
> UP BROADCAST MULTICAST MTU:1500 Metric:1
> RX packets:35985023 errors:0 dropped:0 overruns:0 frame:0
> TX packets:7409151 errors:0 dropped:0 overruns:0 carrier:0
> collisions:0 txqueuelen:1000
> RX bytes:3252415633 (3.2 GB) TX bytes:1340077250 (1.3 GB)
>
> $ ip route
> 192.168.20.0/24 dev eth0 proto kernel scope link src 192.168.20.120
> 192.168.1.0/24 dev eth1 proto kernel scope link src 192.168.1.125
> default via 192.168.20.254 dev eth1 metric 100
>
> And that *prevents* from using the default route to reach 192.168.1/24 subnet after eth1 is out.
Well suppose the default route is used, and the source address is of the
packets stay 192.168.1.125, then there is no way for the peers to
respond to the packets. I do not know wat sets the source address in
linux, but on windows it will allway's be the one you do not want.
--
Hans
^ permalink raw reply
* [PATCH] cxgb3: Removing unused return variable
From: leitao @ 2010-11-25 17:53 UTC (permalink / raw)
To: divy; +Cc: netdev, Breno Leitao
Currently the ret variable is not used for anything other than
receive the value of the t3_adapter_error(), which will always be 0,
because the reset parameter is 0.
Signed-off-by: Breno Leitao <leitao@linux.vnet.ibm.com>
---
drivers/net/cxgb3/cxgb3_main.c | 3 +--
1 files changed, 1 insertions(+), 2 deletions(-)
diff --git a/drivers/net/cxgb3/cxgb3_main.c b/drivers/net/cxgb3/cxgb3_main.c
index 046d846..3864617 100644
--- a/drivers/net/cxgb3/cxgb3_main.c
+++ b/drivers/net/cxgb3/cxgb3_main.c
@@ -3006,12 +3006,11 @@ static pci_ers_result_t t3_io_error_detected(struct pci_dev *pdev,
pci_channel_state_t state)
{
struct adapter *adapter = pci_get_drvdata(pdev);
- int ret;
if (state == pci_channel_io_perm_failure)
return PCI_ERS_RESULT_DISCONNECT;
- ret = t3_adapter_error(adapter, 0, 0);
+ t3_adapter_error(adapter, 0, 0);
/* Request a slot reset. */
return PCI_ERS_RESULT_NEED_RESET;
--
1.7.1
^ permalink raw reply related
* Re: [PATCH 2/2] The new jhash implementation
From: Eric Dumazet @ 2010-11-25 17:21 UTC (permalink / raw)
To: Jozsef Kadlecsik
Cc: linux-kernel, netdev, netfilter-devel, Linus Torvalds,
Rusty Russell
In-Reply-To: <alpine.DEB.2.00.1011251520270.1198@blackhole.kfki.hu>
Le jeudi 25 novembre 2010 à 15:41 +0100, Jozsef Kadlecsik a écrit :
...
> +/* __jhash_mix -- mix 3 32-bit values reversibly. */
> +#define __jhash_mix(a, b, c) \
> +{ \
> + a -= c; a ^= rol32(c, 4); c += b; \
> + b -= a; b ^= rol32(a, 6); a += c; \
> + c -= b; c ^= rol32(b, 8); b += a; \
> + a -= c; a ^= rol32(c, 16); c += b; \
> + b -= a; b ^= rol32(a, 19); a += c; \
> + c -= b; c ^= rol32(b, 4); b += a; \
> +}
> +
> +/* __jhash_final - final mixing of 3 32-bit values (a,b,c) into c */
> +#define __jhash_final(a, b, c) \
> +{ \
> + c ^= b; c -= rol32(b, 14); \
> + a ^= c; a -= rol32(c, 11); \
> + b ^= a; b -= rol32(a, 25); \
> + c ^= b; c -= rol32(b, 16); \
> + a ^= c; a -= rol32(c, 4); \
> + b ^= a; b -= rol32(a, 14); \
> + c ^= b; c -= rol32(b, 24); \
> +}
> +
So we now have a special __jhash_final(a, b, c) thing for the last
values.
> +/* jhash_3words - hash exactly 3, 2 or 1 word(s) */
> +u32 jhash_3words(u32 a, u32 b, u32 c, u32 initval)
> +{
> + a += JHASH_INITVAL;
> + b += JHASH_INITVAL;
> + c += initval;
> +
> + __jhash_mix(a, b, c);
> +
> + return c;
> +}
> +EXPORT_SYMBOL(jhash_3words);
But you dont use it in jhash_3words().
I do think jhash_3words() should stay inlined, unless maybe
CONFIG_CC_OPTIMIZE_FOR_SIZE=y
We hit it several time per packet in network stack in RX path.
Once in skb_get_rxhash() (unless device fills skb->rxhash)
Once at least in conntrack (if used).
Once in UDP or TCP stack
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply
* Re: [PATCH 2/2 v7] xps: Transmit Packet Steering
From: Ben Hutchings @ 2010-11-25 17:12 UTC (permalink / raw)
To: Tom Herbert; +Cc: davem, netdev, eric.dumazet
In-Reply-To: <alpine.DEB.2.00.1011211501430.14906@pokey.mtv.corp.google.com>
On Sun, 2010-11-21 at 15:17 -0800, Tom Herbert wrote:
[...]
> Each transmit queue can be associated with a number of CPUs which will
> use the queue to send packets. This is configured as a CPU mask on a
> per queue basis in:
>
> /sys/class/net/eth<n>/queues/tx-<n>/xps_cpus
>
> The mappings are stored per device in an inverted data structure that
> maps CPUs to queues. In the netdevice structure this is an array of
> num_possible_cpu structures where each structure holds and array of
> queue_indexes for queues which that CPU can use.
[...]
I was thinking we could also use CPU reverse-mapping from IRQ affinity
to set this up automatically - maybe leaving an option for manual
override?
Ben.
--
Ben Hutchings, Senior Software Engineer, Solarflare Communications
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.
^ permalink raw reply
* Re: [PATCH] ipv4: mitigate an integer underflow when comparing tcp timestamps
From: ZHANG, Le @ 2010-11-25 16:55 UTC (permalink / raw)
To: David Miller
Cc: eric.dumazet, netdev, linux-kernel, kuznet, pekkas, jmorris,
yoshfuji, kaber
In-Reply-To: <20101114.115536.71112799.davem@davemloft.net>
[-- Attachment #1: Type: text/plain, Size: 953 bytes --]
On 11:55 Sun 14 Nov , David Miller wrote:
> From: Eric Dumazet <eric.dumazet@gmail.com>
> Date: Sun, 14 Nov 2010 09:52:25 +0100
>
> > Really, if you have multiple clients behind a common NAT, you cannot use
> > this code at all, since NAT doesnt usually change TCP timestamps.
>
> NAT is %100 incompatible with TW recycling, full stop.
>
> There is no maybe, or maybe not.
>
> If you are behind NAT you must not turn this feature on, ever.
Sorry, this question may be OT on this list, but I am just curious:
Is there any other OS has implemented this feature like Linux?
To be very specific, by this feature, I mean rejecting old duplicates based
on per-host cache of last timestamp received from any connections.
As suggested in RFC1323 Appendix B.2 (b).
Does anyone, by any chance, know the answer? Thanks in advance!
--
ZHANG, Le
http://zhangle.is-a-geek.org
0260 C902 B8F8 6506 6586 2B90 BC51 C808 1E4E 2973
[-- Attachment #2: Type: application/pgp-signature, Size: 198 bytes --]
^ permalink raw reply
* Re: TCP_MAXSEG vs TCP/generic segmentation offload (tso/gso)
From: Eric Dumazet @ 2010-11-25 16:44 UTC (permalink / raw)
To: Niels Möller; +Cc: linux-kernel, netdev
In-Reply-To: <nn1v69qsjd.fsf@stalhein.lysator.liu.se>
Le jeudi 25 novembre 2010 à 17:25 +0100, Niels Möller a écrit :
>
> I was under the impression that TSO (and maybe GSO) implied more
> cleverness in the network card; that the network card more or less gets
> to decide by itself how to divide a tcp stream into segments. And for
> example in the atl1c driver which I looked a bit into, this was what the
> REG_MTU register was for. Seems I have gotten this totally wrong.
>
You were not totally wrong, but device does not use its own MTU to
perform the split : We give it the MSS of the flow.
You can have multiple flows in parallel, each with its own MSS, while
device has a single MTU.
> Maybe Documentation/networking/netdevices.txt could clarify how it
> works. Currently, it says
>
> : Segmentation Offload (GSO, TSO) is an exception to this rule. The
> : upper layer protocol may pass a large socket buffer to the device
> : transmit routine, and the device will break that up into separate
> : packets based on the current MTU.
MTU means : maximum transmission unit. But each layer has its own :)
In this context, TCP protocol, so MSS should be taken into account.
By default, MSS derives from device MTU (ipv4 without options case :
MSS = MTU - 40), but user can change it with TCP_MAXSEG.
^ permalink raw reply
* Re: TCP_MAXSEG vs TCP/generic segmentation offload (tso/gso)
From: Niels Möller @ 2010-11-25 16:25 UTC (permalink / raw)
To: Eric Dumazet; +Cc: linux-kernel, netdev
In-Reply-To: <1290698312.2858.341.camel@edumazet-laptop>
Eric Dumazet <eric.dumazet@gmail.com> writes:
> I believe TCP_MAXSEG is working fine, but GRO/GSO dont care at all :
> They coalesce frames whatever their size is.
I was under the impression that TSO (and maybe GSO) implied more
cleverness in the network card; that the network card more or less gets
to decide by itself how to divide a tcp stream into segments. And for
example in the atl1c driver which I looked a bit into, this was what the
REG_MTU register was for. Seems I have gotten this totally wrong.
Maybe Documentation/networking/netdevices.txt could clarify how it
works. Currently, it says
: Segmentation Offload (GSO, TSO) is an exception to this rule. The
: upper layer protocol may pass a large socket buffer to the device
: transmit routine, and the device will break that up into separate
: packets based on the current MTU.
Regards, and thanks for your patience,
/Niels
--
Niels Möller. PGP-encrypted email is preferred. Keyid C0B98E26.
Internet email is subject to wholesale government surveillance.
^ permalink raw reply
* Re: [PATCHv2] hso: fix disable_net
From: Johan Hovold @ 2010-11-25 16:25 UTC (permalink / raw)
To: Filip Aben
Cc: davem-fT/PcQaiUtIeIZ0/mPfg9Q, linux-usb-u79uwXL29TY76Z2rM5mHXA,
netdev-u79uwXL29TY76Z2rM5mHXA, pki-/L4m51SJ8HhmR6Xm/wNWPw
In-Reply-To: <1290700866.2112.60.camel@filip-linux>
On Thu, Nov 25, 2010 at 5:01 PM, Filip Aben <f.aben-x9gZzRpC1QbQT0dZR+AlfA@public.gmane.org> wrote:
> On Thu, 2010-11-25 at 16:03 +0100, Johan Hovold wrote:
>> Please add the appropriate Reported-by-tags as well, e.g.
>>
>> Reported-by: Piotr Isajew <pki-/L4m51SJ8HhmR6Xm/wNWPw@public.gmane.org>
>> Reported-by: Johan Hovold <jhovold-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
>
> I would have gladly done so if you included that in your comment on the
> previous patch :)
What is stopping you from submitting a v3? :)
But perhaps David can pick those up from the thread. Not sure how this
is usually dealt with on netdev.
Thanks,
Johan
--
To unsubscribe from this list: send the line "unsubscribe linux-usb" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply
* Re: [PATCHv2] hso: fix disable_net
From: Filip Aben @ 2010-11-25 16:01 UTC (permalink / raw)
To: Johan Hovold
Cc: davem-fT/PcQaiUtIeIZ0/mPfg9Q, linux-usb-u79uwXL29TY76Z2rM5mHXA,
netdev-u79uwXL29TY76Z2rM5mHXA, pki-/L4m51SJ8HhmR6Xm/wNWPw
In-Reply-To: <AANLkTikODWoXB1PmB+SFbFWenYm_-o+RJ+czK9LnaXmn-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
On Thu, 2010-11-25 at 16:03 +0100, Johan Hovold wrote:
> On Thu, Nov 25, 2010 at 2:40 PM, Filip Aben <f.aben-x9gZzRpC1QbQT0dZR+AlfA@public.gmane.org> wrote:
> > The HSO driver incorrectly creates a serial device instead of a net
> > device when disable_net is set. It shouldn't create anything for the
> > network interface.
> >
> > Signed-off-by: Filip Aben <f.aben-x9gZzRpC1QbQT0dZR+AlfA@public.gmane.org>
>
> Please add the appropriate Reported-by-tags as well, e.g.
>
> Reported-by: Piotr Isajew <pki-/L4m51SJ8HhmR6Xm/wNWPw@public.gmane.org>
> Reported-by: Johan Hovold <jhovold-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
I would have gladly done so if you included that in your comment on the
previous patch :)
>
> As this patch is meant to fix a driver crash, perhaps it should be marked for
> stable as well?
>
I don't think it's that critical: it only applies to people explicitly
loading the module with disable_net set. But feel free to queue it up
for stable.
Thanks,
Filip-
--
To unsubscribe from this list: send the line "unsubscribe linux-usb" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply
* Re: TCP_MAXSEG vs TCP/generic segmentation offload (tso/gso)
From: Eric Dumazet @ 2010-11-25 15:18 UTC (permalink / raw)
To: Niels Möller; +Cc: linux-kernel, netdev
In-Reply-To: <nnaakxqw1u.fsf@stalhein.lysator.liu.se>
Le jeudi 25 novembre 2010 à 16:09 +0100, Niels Möller a écrit :
> Eric Dumazet <eric.dumazet@gmail.com> writes:
>
> > So... there is no 'bug', unless you trust too much tcpdump output.
>
> I really expected tcpdump -e to display the actual values in the link
> layer header, including the correct frame size. It's more than a bit
> confusing if that is not the case...
>
> In the future, I will try to remember to always run tcpdump on a network
> node which (i) is different from the sending one, and (ii) has GRO
> disabled (and hence will discard packets if it has trouble processing
> them all, rather than coalesce them).
>
Just disable GSO and TSO on sending machine, then tcpdump will show you
individual frames.
> What about the TCP_MAXSEG socket option, should that work? From a quick
> look at driver source code, I could only see the handling of the
> per-interface MTU, no per-socket segment size.
TCP_MAXSEG is certainly not handled in driver layer, but TCP layer.
/* If user gave his TCP_MAXSEG, record it to clamp */
if (tp->rx_opt.user_mss)
tp->rx_opt.mss_clamp = tp->rx_opt.user_mss;
I believe TCP_MAXSEG is working fine, but GRO/GSO dont care at all :
They coalesce frames whatever their size is.
^ permalink raw reply
* Re: TCP_MAXSEG vs TCP/generic segmentation offload (tso/gso)
From: Niels Möller @ 2010-11-25 15:09 UTC (permalink / raw)
To: Eric Dumazet; +Cc: linux-kernel, netdev
In-Reply-To: <1290695253.2858.336.camel@edumazet-laptop>
Eric Dumazet <eric.dumazet@gmail.com> writes:
> So... there is no 'bug', unless you trust too much tcpdump output.
I really expected tcpdump -e to display the actual values in the link
layer header, including the correct frame size. It's more than a bit
confusing if that is not the case...
In the future, I will try to remember to always run tcpdump on a network
node which (i) is different from the sending one, and (ii) has GRO
disabled (and hence will discard packets if it has trouble processing
them all, rather than coalesce them).
What about the TCP_MAXSEG socket option, should that work? From a quick
look at driver source code, I could only see the handling of the
per-interface MTU, no per-socket segment size.
Thanks for the quick reply,
/Niels
--
Niels Möller. PGP-encrypted email is preferred. Keyid C0B98E26.
Internet email is subject to wholesale government surveillance.
^ permalink raw reply
* Re: [PATCHv2] hso: fix disable_net
From: Johan Hovold @ 2010-11-25 15:03 UTC (permalink / raw)
To: Filip Aben; +Cc: davem, linux-usb, netdev, pki
In-Reply-To: <1290692450.2112.21.camel@filip-linux>
On Thu, Nov 25, 2010 at 2:40 PM, Filip Aben <f.aben@option.com> wrote:
> The HSO driver incorrectly creates a serial device instead of a net
> device when disable_net is set. It shouldn't create anything for the
> network interface.
>
> Signed-off-by: Filip Aben <f.aben@option.com>
Please add the appropriate Reported-by-tags as well, e.g.
Reported-by: Piotr Isajew <pki@ex.com.pl>
Reported-by: Johan Hovold <jhovold@gmail.com>
As this patch is meant to fix a driver crash, perhaps it should be marked for
stable as well?
Thanks,
Johan
^ permalink raw reply
* Re: [PATCH 2/2] The new jhash implementation
From: Jozsef Kadlecsik @ 2010-11-25 14:41 UTC (permalink / raw)
To: Eric Dumazet
Cc: linux-kernel, netdev, netfilter-devel, Linus Torvalds,
Rusty Russell
In-Reply-To: <1290692943.2858.303.camel@edumazet-laptop>
On Thu, 25 Nov 2010, Eric Dumazet wrote:
> I agree jhash() should be not be inlined.
>
> I am not sure for other variants.
Yeah, I have got the same feelings. I decided to un-inline all of them
because that way the internal macros are not exposed at all.
> > +u32 jhash(const void *key, u32 length, u32 initval)
> > +{
> > + u32 a, b, c;
> > + const u8 *k = key;
> > +
> > + /* Set up the internal state */
> > + a = b = c = JHASH_INITVAL + length + initval;
> > +
> > + /* All but the last block: affect some 32 bits of (a,b,c) */
> > + while (length > 12) {
> > + a += k[0] + ((u32)k[1]<<8) + ((u32)k[2]<<16) + ((u32)k[3]<<24);
>
> disassembly code on x86_32 for the previous line :
>
> 26: 66 90 xchg %ax,%ax
> 28: 0f b6 72 01 movzbl 0x1(%edx),%esi
> 2c: 0f b6 4a 02 movzbl 0x2(%edx),%ecx
> 30: c1 e6 08 shl $0x8,%esi
> 33: c1 e1 10 shl $0x10,%ecx
> 36: 8d 0c 0e lea (%esi,%ecx,1),%ecx
> 39: 0f b6 32 movzbl (%edx),%esi
> 3c: 8d 34 31 lea (%ecx,%esi,1),%esi
> 3f: 0f b6 4a 03 movzbl 0x3(%edx),%ecx
> 43: c1 e1 18 shl $0x18,%ecx
> 46: 8d 0c 0e lea (%esi,%ecx,1),%ecx
>
> or (CONFIG_CC_OPTIMIZE_FOR_SIZE=y) :
>
> 1b: 0f b6 7b 01 movzbl 0x1(%ebx),%edi
> 1f: c1 e7 08 shl $0x8,%edi
> 22: 89 7d f0 mov %edi,-0x10(%ebp)
> 25: 0f b6 7b 02 movzbl 0x2(%ebx),%edi
> 29: c1 e7 10 shl $0x10,%edi
> 2c: 03 7d f0 add -0x10(%ebp),%edi
> 2f: 89 7d f0 mov %edi,-0x10(%ebp)
> 32: 0f b6 3b movzbl (%ebx),%edi
> 35: 03 7d f0 add -0x10(%ebp),%edi
> 38: 89 7d f0 mov %edi,-0x10(%ebp)
> 3b: 0f b6 7b 03 movzbl 0x3(%ebx),%edi
> 3f: c1 e7 18 shl $0x18,%edi
> 42: 03 7d f0 add -0x10(%ebp),%edi
>
>
> I suggest :
>
> #include <linux/unaligned/packed_struct.h>
> ...
> a += __get_unaligned_cpu32(k);
> b += __get_unaligned_cpu32(k+4);
> c += __get_unaligned_cpu32(k+8);
>
> Fits nicely in registers.
Good idea, thanks!
Here follows the updated second patch:
The current jhash.h implements the lookup2() hash function by Bob Jenkins.
However, lookup2() is outdated as Bob wrote a new hash function called
lookup3(). The new hash function
- mixes better than lookup2(): it passes the check that every input bit
changes every output bit 50% of the time, while lookup2() failed it.
- performs better: compiled with -O2 on Core2 Duo, lookup3() 20-40% faster
than lookup2() depending on the key length.
The patch replaces the lookup2() implementation of the 'jhash*'
functions with that of lookup3().
You can read a longer comparison of the two and other hash functions at
http://burtleburtle.net/bob/hash/doobs.html.
Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
include/linux/jhash.h | 136 +++----------------------------------------
lib/Makefile | 2 +-
lib/jhash.c | 154 +++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 163 insertions(+), 129 deletions(-)
create mode 100644 lib/jhash.c
diff --git a/include/linux/jhash.h b/include/linux/jhash.h
index ced1159..ca69ac3 100644
--- a/include/linux/jhash.h
+++ b/include/linux/jhash.h
@@ -1,134 +1,14 @@
#ifndef _LINUX_JHASH_H
#define _LINUX_JHASH_H
-/* jhash.h: Jenkins hash support.
- *
- * Copyright (C) 1996 Bob Jenkins (bob_jenkins@burtleburtle.net)
- *
- * http://burtleburtle.net/bob/hash/
- *
- * These are the credits from Bob's sources:
- *
- * lookup2.c, by Bob Jenkins, December 1996, Public Domain.
- * hash(), hash2(), hash3, and mix() are externally useful functions.
- * Routines to test the hash are included if SELF_TEST is defined.
- * You can use this free for any purpose. It has no warranty.
- *
- * Copyright (C) 2003 David S. Miller (davem@redhat.com)
- *
- * I've modified Bob's hash to be useful in the Linux kernel, and
- * any bugs present are surely my fault. -DaveM
- */
-
-/* NOTE: Arguments are modified. */
-#define __jhash_mix(a, b, c) \
-{ \
- a -= b; a -= c; a ^= (c>>13); \
- b -= c; b -= a; b ^= (a<<8); \
- c -= a; c -= b; c ^= (b>>13); \
- a -= b; a -= c; a ^= (c>>12); \
- b -= c; b -= a; b ^= (a<<16); \
- c -= a; c -= b; c ^= (b>>5); \
- a -= b; a -= c; a ^= (c>>3); \
- b -= c; b -= a; b ^= (a<<10); \
- c -= a; c -= b; c ^= (b>>15); \
-}
-
-/* The golden ration: an arbitrary value */
-#define JHASH_GOLDEN_RATIO 0x9e3779b9
-
-/* The most generic version, hashes an arbitrary sequence
- * of bytes. No alignment or length assumptions are made about
- * the input key.
- */
-static inline u32 jhash(const void *key, u32 length, u32 initval)
-{
- u32 a, b, c, len;
- const u8 *k = key;
-
- len = length;
- a = b = JHASH_GOLDEN_RATIO;
- c = initval;
-
- while (len >= 12) {
- a += (k[0] +((u32)k[1]<<8) +((u32)k[2]<<16) +((u32)k[3]<<24));
- b += (k[4] +((u32)k[5]<<8) +((u32)k[6]<<16) +((u32)k[7]<<24));
- c += (k[8] +((u32)k[9]<<8) +((u32)k[10]<<16)+((u32)k[11]<<24));
-
- __jhash_mix(a,b,c);
-
- k += 12;
- len -= 12;
- }
-
- c += length;
- switch (len) {
- case 11: c += ((u32)k[10]<<24);
- case 10: c += ((u32)k[9]<<16);
- case 9 : c += ((u32)k[8]<<8);
- case 8 : b += ((u32)k[7]<<24);
- case 7 : b += ((u32)k[6]<<16);
- case 6 : b += ((u32)k[5]<<8);
- case 5 : b += k[4];
- case 4 : a += ((u32)k[3]<<24);
- case 3 : a += ((u32)k[2]<<16);
- case 2 : a += ((u32)k[1]<<8);
- case 1 : a += k[0];
- };
-
- __jhash_mix(a,b,c);
-
- return c;
-}
-
-/* A special optimized version that handles 1 or more of u32s.
- * The length parameter here is the number of u32s in the key.
- */
-static inline u32 jhash2(const u32 *k, u32 length, u32 initval)
-{
- u32 a, b, c, len;
-
- a = b = JHASH_GOLDEN_RATIO;
- c = initval;
- len = length;
-
- while (len >= 3) {
- a += k[0];
- b += k[1];
- c += k[2];
- __jhash_mix(a, b, c);
- k += 3; len -= 3;
- }
-
- c += length * 4;
-
- switch (len) {
- case 2 : b += k[1];
- case 1 : a += k[0];
- };
-
- __jhash_mix(a,b,c);
-
- return c;
-}
-
-
-/* A special ultra-optimized versions that knows they are hashing exactly
- * 3, 2 or 1 word(s).
- *
- * NOTE: In particular the "c += length; __jhash_mix(a,b,c);" normally
- * done at the end is not done here.
- */
-static inline u32 jhash_3words(u32 a, u32 b, u32 c, u32 initval)
-{
- a += JHASH_GOLDEN_RATIO;
- b += JHASH_GOLDEN_RATIO;
- c += initval;
-
- __jhash_mix(a, b, c);
-
- return c;
-}
+/* Best hash sizes are of power of two */
+#define jhash_size(n) ((u32)1<<(n))
+/* Mask the hash value, i.e (value & jhash_mask(n)) instead of (value % n) */
+#define jhash_mask(n) (jhash_size(n)-1)
+
+extern u32 jhash(const void *key, u32 length, u32 initval);
+extern u32 jhash2(const u32 *k, u32 length, u32 initval);
+extern u32 jhash_3words(u32 a, u32 b, u32 c, u32 initval);
static inline u32 jhash_2words(u32 a, u32 b, u32 initval)
{
diff --git a/lib/Makefile b/lib/Makefile
index e6a3763..a1a4932 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -10,7 +10,7 @@ endif
lib-y := ctype.o string.o vsprintf.o cmdline.o \
rbtree.o radix-tree.o dump_stack.o \
idr.o int_sqrt.o extable.o prio_tree.o \
- sha1.o irq_regs.o reciprocal_div.o argv_split.o \
+ jhash.o sha1.o irq_regs.o reciprocal_div.o argv_split.o \
proportions.o prio_heap.o ratelimit.o show_mem.o \
is_single_threaded.o plist.o decompress.o flex_array.o
diff --git a/lib/jhash.c b/lib/jhash.c
new file mode 100644
index 0000000..538277b
--- /dev/null
+++ b/lib/jhash.c
@@ -0,0 +1,154 @@
+/* jhash.c: Jenkins hash support.
+ *
+ * Copyright (C) 2006. Bob Jenkins (bob_jenkins@burtleburtle.net)
+ *
+ * http://burtleburtle.net/bob/hash/
+ *
+ * These are the credits from Bob's sources:
+ *
+ * lookup3.c, by Bob Jenkins, May 2006, Public Domain.
+ *
+ * These are functions for producing 32-bit hashes for hash table lookup.
+ * hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
+ * are externally useful functions. Routines to test the hash are included
+ * if SELF_TEST is defined. You can use this free for any purpose. It's in
+ * the public domain. It has no warranty.
+ *
+ * Copyright (C) 2009-2010 Jozsef Kadlecsik (kadlec@blackhole.kfki.hu)
+ *
+ * I've modified Bob's hash to be useful in the Linux kernel, and
+ * any bugs present are my fault.
+ * Jozsef
+ */
+#include <linux/bitops.h>
+#include <linux/module.h>
+#include <linux/unaligned/packed_struct.h>
+#include <linux/jhash.h>
+
+/* __jhash_mix -- mix 3 32-bit values reversibly. */
+#define __jhash_mix(a, b, c) \
+{ \
+ a -= c; a ^= rol32(c, 4); c += b; \
+ b -= a; b ^= rol32(a, 6); a += c; \
+ c -= b; c ^= rol32(b, 8); b += a; \
+ a -= c; a ^= rol32(c, 16); c += b; \
+ b -= a; b ^= rol32(a, 19); a += c; \
+ c -= b; c ^= rol32(b, 4); b += a; \
+}
+
+/* __jhash_final - final mixing of 3 32-bit values (a,b,c) into c */
+#define __jhash_final(a, b, c) \
+{ \
+ c ^= b; c -= rol32(b, 14); \
+ a ^= c; a -= rol32(c, 11); \
+ b ^= a; b -= rol32(a, 25); \
+ c ^= b; c -= rol32(b, 16); \
+ a ^= c; a -= rol32(c, 4); \
+ b ^= a; b -= rol32(a, 14); \
+ c ^= b; c -= rol32(b, 24); \
+}
+
+/* An arbitrary initial parameter */
+#define JHASH_INITVAL 0xdeadbeef
+
+/* jhash - hash an arbitrary key
+ * @k: sequence of bytes as key
+ * @length: the length of the key
+ * @initval: the previous hash, or an arbitray value
+ *
+ * The generic version, hashes an arbitrary sequence of bytes.
+ * No alignment or length assumptions are made about the input key.
+ *
+ * Returns the hash value of the key. The result depends on endianness.
+ */
+u32 jhash(const void *key, u32 length, u32 initval)
+{
+ u32 a, b, c;
+ const u8 *k = key;
+
+ /* Set up the internal state */
+ a = b = c = JHASH_INITVAL + length + initval;
+
+ /* All but the last block: affect some 32 bits of (a,b,c) */
+ while (length > 12) {
+ a += __get_unaligned_cpu32(k);
+ b += __get_unaligned_cpu32(k + 4);
+ c += __get_unaligned_cpu32(k + 8);
+ __jhash_mix(a, b, c);
+ length -= 12;
+ k += 12;
+ }
+ /* Last block: affect all 32 bits of (c) */
+ /* All the case statements fall through */
+ switch (length) {
+ case 12: c += (u32)k[11]<<24;
+ case 11: c += (u32)k[10]<<16;
+ case 10: c += (u32)k[9]<<8;
+ case 9: c += k[8];
+ case 8: b += (u32)k[7]<<24;
+ case 7: b += (u32)k[6]<<16;
+ case 6: b += (u32)k[5]<<8;
+ case 5: b += k[4];
+ case 4: a += (u32)k[3]<<24;
+ case 3: a += (u32)k[2]<<16;
+ case 2: a += (u32)k[1]<<8;
+ case 1: a += k[0];
+ __jhash_final(a, b, c);
+ case 0: /* Nothing left to add */
+ break;
+ }
+
+ return c;
+}
+EXPORT_SYMBOL(jhash);
+
+/* jhash2 - hash an array of u32's
+ * @k: the key which must be an array of u32's
+ * @length: the number of u32's in the key
+ * @initval: the previous hash, or an arbitray value
+ *
+ * Returns the hash value of the key.
+ */
+u32 jhash2(const u32 *k, u32 length, u32 initval)
+{
+ u32 a, b, c;
+
+ /* Set up the internal state */
+ a = b = c = JHASH_INITVAL + (length<<2) + initval;
+
+ /* Handle most of the key */
+ while (length > 3) {
+ a += k[0];
+ b += k[1];
+ c += k[2];
+ __jhash_mix(a, b, c);
+ length -= 3;
+ k += 3;
+ }
+
+ /* Handle the last 3 u32's: all the case statements fall through */
+ switch (length) {
+ case 3: c += k[2];
+ case 2: b += k[1];
+ case 1: a += k[0];
+ __jhash_final(a, b, c);
+ case 0: /* Nothing left to add */
+ break;
+ }
+
+ return c;
+}
+EXPORT_SYMBOL(jhash2);
+
+/* jhash_3words - hash exactly 3, 2 or 1 word(s) */
+u32 jhash_3words(u32 a, u32 b, u32 c, u32 initval)
+{
+ a += JHASH_INITVAL;
+ b += JHASH_INITVAL;
+ c += initval;
+
+ __jhash_mix(a, b, c);
+
+ return c;
+}
+EXPORT_SYMBOL(jhash_3words);
--
1.7.0.4
Best regards,
Jozsef
-
E-mail : kadlec@blackhole.kfki.hu, kadlec@mail.kfki.hu
PGP key : http://www.kfki.hu/~kadlec/pgp_public_key.txt
Address : KFKI Research Institute for Particle and Nuclear Physics
H-1525 Budapest 114, POB. 49, Hungary
^ permalink raw reply related
* Re: TCP_MAXSEG vs TCP/generic segmentation offload (tso/gso)
From: Eric Dumazet @ 2010-11-25 14:27 UTC (permalink / raw)
To: Niels Möller; +Cc: linux-kernel, netdev
In-Reply-To: <nny68hqzzn.fsf@stalhein.lysator.liu.se>
Le jeudi 25 novembre 2010 à 14:44 +0100, Niels Möller a écrit :
> [ This is a slightly updated repost of a an October 21 mail to the
> linux-net list. Any hints or advice appreciated. /Niels ]
>
CC netdev
> I have been observing large ethernet packets when generating TCP traffic
> over a local ethernet, up to a bit over 20000 bytes, even though the
> interface MTU is 1500 bytes.
>
> Furthermore, I tried to use setsockopt with TCP_MAXSEG to limit the TCP
> segment size further, to 1000 bytes, and that didn't have any effect.
>
> When bugreporting a related problem to the debian kernel maintainers, I
> was told that the behaviour may be linked to the use of TCP segmentation
> offload (see http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=600286).
>
> Disabling TSO and GSO using ethtool solves both problems: Generated
> packets now are limited in size by both the interface MTU and the
> segment size set with setsockopt. (Except the atl1c driver, where
> ethtool -K eth0 tso off only results in a "Cannot set device tcp
> segmentation offload settings: Operation not supported").
>
> Before I try to write proper bug reports on specific network drivers (I
> have seen problems with several network drivers on different machines,
> unfortunately using different linux versions), I would like to know:
>
> 1. Is TCP_MAXSEG supposed to work at all with network drivers that do
> tcp segmentation offload?
>
> 2. If it is supposed to work, can someone give a rough sketch on how the
> per-socket segment size, set with setsockopt(... TCP_MAXSEG,...), is
> passed down to the driver and to the network hardware? I suspect it
> ought to be passed with each "pseudo-packet" to be transmitted.
>
> I have spent some time searching the documentation and the net for
> answers, without result, hence I'm posting to this list. I'm not
> subscribed, so please cc any replies.
>
> (Regarding packets larger than the interface MTU, that seems clearly
> buggy to me, and I think I already know enough to be able to file proper
> bug reports. And in the atl1c driver, it appears to have been fixed
> between 1.0.0.1-NAPI and 1.0.1.0-NAPI).
GSO is a software technique. Same for GRO.
Physical frames are indeed 1500 bytes (on regular ethernet links)
tcpdump gives you the high level view, before segmentation done in lower
levels (by NIC itself or in linux stack) in Transmit path.
We also have GRO in receive path, able to coalesce several 1500 bytes
frames into a single one (if same tcp flow), so that overhead in stacks
is lowered (netfilter, IP stack, tcp stack, bridge, routing ...)
So... there is no 'bug', unless you trust too much tcpdump output.
^ permalink raw reply
* [PATCHv2 2/2] USB CDC NCM host driver
From: Alexey Orishko @ 2010-11-25 14:14 UTC (permalink / raw)
To: gregkh, linux-usb
Cc: netdev, oliver, yauheni.kaliuta, balbi, sjur.brandeland,
Alexey Orishko
In-Reply-To: <1290694472-2680-1-git-send-email-alexey.orishko@stericsson.com>
The patch provides USB CDC NCM host driver support in the Linux Kernel.
Changes:
drivers/net/usb/cdc_ncm.c:
- initial submission of the CDC NCM host driver;
- verified on Intel 32/64 bit, Intel Atom, ST-Ericsson U8500 (ARM)
- throughput measured over 100 Mbits duplex;
- driver supports 16-bit NTB format only, but it is more than enough for
transfers up to 64K;
- driver can handle up to 32 datagrams in received NTB;
- timer is used to collect several packets in Tx direction
drivers/net/usb/Kconfig:
- a new entry to compile CDC NCM host driver
drivers/net/usb/Makefile:
- a new entry to compile CDC NCM host driver
Signed-off-by: Alexey Orishko <alexey.orishko@stericsson.com>
---
Patch v2:
based on suggestion moved Changes to the comment log
drivers/net/usb/Kconfig | 19 +
drivers/net/usb/Makefile | 1 +
drivers/net/usb/cdc_ncm.c | 1224 +++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 1244 insertions(+), 0 deletions(-)
create mode 100644 drivers/net/usb/cdc_ncm.c
diff --git a/drivers/net/usb/Kconfig b/drivers/net/usb/Kconfig
index 52ffabe..6f600cc 100644
--- a/drivers/net/usb/Kconfig
+++ b/drivers/net/usb/Kconfig
@@ -196,6 +196,25 @@ config USB_NET_CDC_EEM
IEEE 802 "local assignment" bit is set in the address, a "usbX"
name is used instead.
+config USB_NET_CDC_NCM
+ tristate "CDC NCM support"
+ depends on USB_USBNET
+ default y
+ help
+ This driver provides support for CDC NCM (Network Control Model
+ Device USB Class Specification). The CDC NCM specification is
+ available from <http://www.usb.org/>.
+
+ Say "y" to link the driver statically, or "m" to build a
+ dynamically linked module.
+
+ This driver should work with at least the following devices:
+ * ST-Ericsson M700 LTE FDD/TDD Mobile Broadband Modem (ref. design)
+ * ST-Ericsson M5730 HSPA+ Mobile Broadband Modem (reference design)
+ * ST-Ericsson M570 HSPA+ Mobile Broadband Modem (reference design)
+ * ST-Ericsson M343 HSPA Mobile Broadband Modem (reference design)
+ * Ericsson F5521gw Mobile Broadband Module
+
config USB_NET_DM9601
tristate "Davicom DM9601 based USB 1.1 10/100 ethernet devices"
depends on USB_USBNET
diff --git a/drivers/net/usb/Makefile b/drivers/net/usb/Makefile
index a19b025..cac1703 100644
--- a/drivers/net/usb/Makefile
+++ b/drivers/net/usb/Makefile
@@ -26,4 +26,5 @@ obj-$(CONFIG_USB_CDC_PHONET) += cdc-phonet.o
obj-$(CONFIG_USB_IPHETH) += ipheth.o
obj-$(CONFIG_USB_SIERRA_NET) += sierra_net.o
obj-$(CONFIG_USB_NET_CX82310_ETH) += cx82310_eth.o
+obj-$(CONFIG_USB_NET_CDC_NCM) += cdc_ncm.o
diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
new file mode 100644
index 0000000..80755cd
--- /dev/null
+++ b/drivers/net/usb/cdc_ncm.c
@@ -0,0 +1,1224 @@
+/*
+ * cdc_ncm.c
+ *
+ * Copyright (C) ST-Ericsson 2010
+ * Contact: Alexey Orishko <alexey.orishko@stericsson.com>
+ * Original author: Hans Petter Selasky <hans.petter.selasky@stericsson.com>
+ *
+ * USB Host Driver for Network Control Model (NCM)
+ * http://www.usb.org/developers/devclass_docs/NCM10.zip
+ *
+ * The NCM encoding, decoding and initialization logic
+ * derives from FreeBSD 8.x. if_cdce.c and if_cdcereg.h
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose this file to be licensed under the terms
+ * of the GNU General Public License (GPL) Version 2 or the 2-clause
+ * BSD license listed below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/netdevice.h>
+#include <linux/ctype.h>
+#include <linux/ethtool.h>
+#include <linux/workqueue.h>
+#include <linux/mii.h>
+#include <linux/crc32.h>
+#include <linux/usb.h>
+#include <linux/version.h>
+#include <linux/timer.h>
+#include <linux/spinlock.h>
+#include <linux/atomic.h>
+#include <linux/usb/usbnet.h>
+#include <linux/usb/cdc.h>
+
+#define DRIVER_VERSION "23-Nov-2010"
+
+/* CDC NCM subclass 3.2.1 */
+#define USB_CDC_NCM_NDP16_LENGTH_MIN 0x10
+
+/* Maximum NTB length */
+#define CDC_NCM_NTB_MAX_SIZE_TX 16384 /* bytes */
+#define CDC_NCM_NTB_MAX_SIZE_RX 16384 /* bytes */
+
+/* Minimum value for MaxDatagramSize, ch. 6.2.9 */
+#define CDC_NCM_MIN_DATAGRAM_SIZE 1514 /* bytes */
+
+#define CDC_NCM_MIN_TX_PKT 512 /* bytes */
+
+/* Default value for MaxDatagramSize */
+#define CDC_NCM_MAX_DATAGRAM_SIZE 2048 /* bytes */
+
+/*
+ * Maximum amount of datagrams in NCM Datagram Pointer Table, not counting
+ * the last NULL entry. Any additional datagrams in NTB would be discarded.
+ */
+#define CDC_NCM_DPT_DATAGRAMS_MAX 32
+
+/* Restart the timer, if amount of datagrams is less than given value */
+#define CDC_NCM_RESTART_TIMER_DATAGRAM_CNT 3
+
+/*
+ * We need to align the beginning of the IP packet (not the Ethernet framing).
+ * The following macro aligns the given offset to the given remainder
+ * and modulus, which must be power of two.
+ */
+#define CDC_NCM_ALIGN(rem, offset, mod) \
+ ((long)(((long)(rem)) - ((long)((-(long)(offset)) & (-(long)(mod))))))
+
+/* The following macro defines the minimum header space */
+#define CDC_NCM_MIN_HDR_SIZE \
+ (sizeof(struct usb_cdc_ncm_nth16) + sizeof(struct usb_cdc_ncm_ndp16) + \
+ (CDC_NCM_DPT_DATAGRAMS_MAX + 1) * sizeof(struct usb_cdc_ncm_dpe16))
+
+struct connection_speed_change {
+ __le32 USBitRate; /* holds 3GPP downlink value, bits per second */
+ __le32 DSBitRate; /* holds 3GPP uplink value, bits per second */
+} __attribute__ ((packed));
+
+struct cdc_ncm_data {
+ struct usb_cdc_ncm_nth16 nth16;
+ struct usb_cdc_ncm_ndp16 ndp16;
+ struct usb_cdc_ncm_dpe16 dpe16[CDC_NCM_DPT_DATAGRAMS_MAX + 1];
+};
+
+struct cdc_ncm_ctx {
+ struct cdc_ncm_data rx_ncm;
+ struct cdc_ncm_data tx_ncm;
+ struct usb_cdc_ncm_ntb_parameters ncm_parm;
+ struct timer_list tx_timer;
+
+ const struct usb_cdc_ncm_desc *func_desc;
+ const struct usb_cdc_header_desc *header_desc;
+ const struct usb_cdc_union_desc *union_desc;
+ const struct usb_cdc_ether_desc *ether_desc;
+
+ struct net_device *netdev;
+ struct usb_device *udev;
+ struct usb_host_endpoint *in_ep;
+ struct usb_host_endpoint *out_ep;
+ struct usb_host_endpoint *status_ep;
+ struct usb_interface *intf;
+ struct usb_interface *control;
+ struct usb_interface *data;
+
+ struct sk_buff *tx_curr_skb;
+ struct sk_buff *tx_rem_skb;
+
+ spinlock_t mtx;
+
+ u32 tx_timer_pending;
+ u32 tx_curr_offset;
+ u32 tx_curr_last_offset;
+ u32 tx_curr_frame_num;
+ u32 rx_speed;
+ u32 tx_speed;
+ u32 rx_max;
+ u32 tx_max;
+ u32 max_datagram_size;
+ u16 tx_max_datagrams;
+ u16 tx_remainder;
+ u16 tx_modulus;
+ u16 tx_ndp_modulus;
+ u16 tx_seq;
+ u16 connected;
+ u8 data_claimed;
+ u8 control_claimed;
+};
+
+static void cdc_ncm_tx_timeout(unsigned long arg);
+static const struct driver_info cdc_ncm_info;
+static struct usb_driver cdc_ncm_driver;
+static struct ethtool_ops cdc_ncm_ethtool_ops;
+
+static const struct usb_device_id cdc_devs[] = {
+ { USB_INTERFACE_INFO(USB_CLASS_COMM,
+ USB_CDC_SUBCLASS_NCM, USB_CDC_PROTO_NONE),
+ .driver_info = (unsigned long)&cdc_ncm_info,
+ },
+ {
+ },
+};
+
+MODULE_DEVICE_TABLE(usb, cdc_devs);
+
+static void
+cdc_ncm_get_drvinfo(struct net_device *net, struct ethtool_drvinfo *info)
+{
+ struct usbnet *dev = netdev_priv(net);
+
+ strncpy(info->driver, dev->driver_name, sizeof(info->driver));
+ strncpy(info->version, DRIVER_VERSION, sizeof(info->version));
+ strncpy(info->fw_version, dev->driver_info->description,
+ sizeof(info->fw_version));
+ usb_make_path(dev->udev, info->bus_info, sizeof(info->bus_info));
+}
+
+static int
+cdc_ncm_do_request(struct cdc_ncm_ctx *ctx, struct usb_cdc_notification *req,
+ void *data, u16 flags, u16 *actlen, u16 timeout)
+{
+ int err;
+
+ err = usb_control_msg(ctx->udev, (req->bmRequestType & USB_DIR_IN) ?
+ usb_rcvctrlpipe(ctx->udev, 0) :
+ usb_sndctrlpipe(ctx->udev, 0),
+ req->bNotificationType, req->bmRequestType,
+ req->wValue,
+ req->wIndex, data,
+ req->wLength, timeout);
+
+ if (err < 0) {
+ if (actlen)
+ *actlen = 0;
+ return err;
+ }
+
+ if (actlen)
+ *actlen = err;
+
+ return 0;
+}
+
+static u8 cdc_ncm_setup(struct cdc_ncm_ctx *ctx)
+{
+ struct usb_cdc_notification req;
+ u32 val;
+ __le16 max_datagram_size;
+ u8 flags;
+ u8 iface_no;
+ int err;
+
+ iface_no = ctx->control->cur_altsetting->desc.bInterfaceNumber;
+
+ req.bmRequestType = USB_TYPE_CLASS | USB_DIR_IN | USB_RECIP_INTERFACE;
+ req.bNotificationType = USB_CDC_GET_NTB_PARAMETERS;
+ req.wValue = 0;
+ req.wIndex = cpu_to_le16(iface_no);
+ req.wLength = cpu_to_le16(sizeof(ctx->ncm_parm));
+
+ err = cdc_ncm_do_request(ctx, &req, &ctx->ncm_parm, 0, NULL, 1000);
+ if (err) {
+ pr_debug("failed GET_NTB_PARAMETERS\n");
+ return 1;
+ }
+
+ /* read correct set of parameters according to device mode */
+ ctx->rx_max = le32_to_cpu(ctx->ncm_parm.dwNtbInMaxSize);
+ ctx->tx_max = le32_to_cpu(ctx->ncm_parm.dwNtbOutMaxSize);
+ ctx->tx_remainder = le16_to_cpu(ctx->ncm_parm.wNdpOutPayloadRemainder);
+ ctx->tx_modulus = le16_to_cpu(ctx->ncm_parm.wNdpOutDivisor);
+ ctx->tx_ndp_modulus = le16_to_cpu(ctx->ncm_parm.wNdpOutAlignment);
+
+ if (ctx->func_desc != NULL)
+ flags = ctx->func_desc->bmNetworkCapabilities;
+ else
+ flags = 0;
+
+ pr_debug("dwNtbInMaxSize=%u dwNtbOutMaxSize=%u "
+ "wNdpOutPayloadRemainder=%u wNdpOutDivisor=%u "
+ "wNdpOutAlignment=%u flags=0x%x\n",
+ ctx->rx_max, ctx->tx_max, ctx->tx_remainder, ctx->tx_modulus,
+ ctx->tx_ndp_modulus, flags);
+
+ /* max count of tx datagrams without terminating NULL entry */
+ ctx->tx_max_datagrams = CDC_NCM_DPT_DATAGRAMS_MAX;
+
+ /* verify maximum size of received NTB in bytes */
+ if ((ctx->rx_max <
+ (CDC_NCM_MIN_HDR_SIZE + CDC_NCM_MIN_DATAGRAM_SIZE)) ||
+ (ctx->rx_max > CDC_NCM_NTB_MAX_SIZE_RX)) {
+ pr_debug("Using default maximum receive length=%d\n",
+ CDC_NCM_NTB_MAX_SIZE_RX);
+ ctx->rx_max = CDC_NCM_NTB_MAX_SIZE_RX;
+ }
+
+ /* verify maximum size of transmitted NTB in bytes */
+ if ((ctx->tx_max <
+ (CDC_NCM_MIN_HDR_SIZE + CDC_NCM_MIN_DATAGRAM_SIZE)) ||
+ (ctx->tx_max > CDC_NCM_NTB_MAX_SIZE_TX)) {
+ pr_debug("Using default maximum transmit length=%d\n",
+ CDC_NCM_NTB_MAX_SIZE_TX);
+ ctx->tx_max = CDC_NCM_NTB_MAX_SIZE_TX;
+ }
+
+ /*
+ * verify that the structure alignment is:
+ * - power of two
+ * - not greater than the maximum transmit length
+ * - not less than four bytes
+ */
+ val = ctx->tx_ndp_modulus;
+
+ if ((val < USB_CDC_NCM_NDP_ALIGN_MIN_SIZE) ||
+ (val != ((-val) & val)) || (val >= ctx->tx_max)) {
+ pr_debug("Using default alignment: 4 bytes\n");
+ ctx->tx_ndp_modulus = USB_CDC_NCM_NDP_ALIGN_MIN_SIZE;
+ }
+
+ /*
+ * verify that the payload alignment is:
+ * - power of two
+ * - not greater than the maximum transmit length
+ * - not less than four bytes
+ */
+ val = ctx->tx_modulus;
+
+ if ((val < USB_CDC_NCM_NDP_ALIGN_MIN_SIZE) ||
+ (val != ((-val) & val)) || (val >= ctx->tx_max)) {
+ pr_debug("Using default transmit modulus: 4 bytes\n");
+ ctx->tx_modulus = USB_CDC_NCM_NDP_ALIGN_MIN_SIZE;
+ }
+
+ /* verify the payload remainder */
+ if (ctx->tx_remainder >= ctx->tx_modulus) {
+ pr_debug("Using default transmit remainder: 0 bytes\n");
+ ctx->tx_remainder = 0;
+ }
+
+ /* adjust TX-remainder according to NCM specification. */
+ ctx->tx_remainder = ((ctx->tx_remainder - ETH_HLEN) &
+ (ctx->tx_modulus - 1));
+
+ /* additional configuration */
+
+ /* set CRC Mode */
+ req.bmRequestType = USB_TYPE_CLASS | USB_DIR_OUT | USB_RECIP_INTERFACE;
+ req.bNotificationType = USB_CDC_SET_CRC_MODE;
+ req.wValue = cpu_to_le16(USB_CDC_NCM_CRC_NOT_APPENDED);
+ req.wIndex = cpu_to_le16(iface_no);
+ req.wLength = 0;
+
+ err = cdc_ncm_do_request(ctx, &req, NULL, 0, NULL, 1000);
+ if (err)
+ pr_debug("Setting CRC mode off failed\n");
+
+ /* set NTB format */
+ req.bmRequestType = USB_TYPE_CLASS | USB_DIR_OUT | USB_RECIP_INTERFACE;
+ req.bNotificationType = USB_CDC_SET_NTB_FORMAT;
+ req.wValue = cpu_to_le16(USB_CDC_NCM_NTB16_FORMAT);
+ req.wIndex = cpu_to_le16(iface_no);
+ req.wLength = 0;
+
+ err = cdc_ncm_do_request(ctx, &req, NULL, 0, NULL, 1000);
+ if (err)
+ pr_debug("Setting NTB format to 16-bit failed\n");
+
+ /* set Max Datagram Size (MTU) */
+ req.bmRequestType = USB_TYPE_CLASS | USB_DIR_IN | USB_RECIP_INTERFACE;
+ req.bNotificationType = USB_CDC_GET_MAX_DATAGRAM_SIZE;
+ req.wValue = 0;
+ req.wIndex = cpu_to_le16(iface_no);
+ req.wLength = cpu_to_le16(2);
+
+ err = cdc_ncm_do_request(ctx, &req, &max_datagram_size, 0, NULL, 1000);
+ if (err) {
+ pr_debug(" GET_MAX_DATAGRAM_SIZE failed, using size=%u\n",
+ CDC_NCM_MIN_DATAGRAM_SIZE);
+ /* use default */
+ ctx->max_datagram_size = CDC_NCM_MIN_DATAGRAM_SIZE;
+ } else {
+ ctx->max_datagram_size = le16_to_cpu(max_datagram_size);
+
+ if (ctx->max_datagram_size < CDC_NCM_MIN_DATAGRAM_SIZE)
+ ctx->max_datagram_size = CDC_NCM_MIN_DATAGRAM_SIZE;
+ else if (ctx->max_datagram_size > CDC_NCM_MAX_DATAGRAM_SIZE)
+ ctx->max_datagram_size = CDC_NCM_MAX_DATAGRAM_SIZE;
+ }
+
+ if (ctx->netdev->mtu != (ctx->max_datagram_size - ETH_HLEN))
+ ctx->netdev->mtu = ctx->max_datagram_size - ETH_HLEN;
+
+ return 0;
+}
+
+static void
+cdc_ncm_find_endpoints(struct cdc_ncm_ctx *ctx, struct usb_interface *intf)
+{
+ struct usb_host_endpoint *e;
+ u8 ep;
+
+ for (ep = 0; ep < intf->cur_altsetting->desc.bNumEndpoints; ep++) {
+
+ e = intf->cur_altsetting->endpoint + ep;
+ switch (e->desc.bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) {
+ case USB_ENDPOINT_XFER_INT:
+ if (usb_endpoint_dir_in(&e->desc)) {
+ if (ctx->status_ep == NULL)
+ ctx->status_ep = e;
+ }
+ break;
+
+ case USB_ENDPOINT_XFER_BULK:
+ if (usb_endpoint_dir_in(&e->desc)) {
+ if (ctx->in_ep == NULL)
+ ctx->in_ep = e;
+ } else {
+ if (ctx->out_ep == NULL)
+ ctx->out_ep = e;
+ }
+ break;
+
+ default:
+ break;
+ }
+ }
+}
+
+static void cdc_ncm_free(struct cdc_ncm_ctx *ctx)
+{
+ if (ctx == NULL)
+ return;
+
+ del_timer_sync(&ctx->tx_timer);
+
+ if (ctx->data_claimed) {
+ usb_set_intfdata(ctx->data, NULL);
+ usb_driver_release_interface(driver_of(ctx->intf), ctx->data);
+ }
+
+ if (ctx->control_claimed) {
+ usb_set_intfdata(ctx->control, NULL);
+ usb_driver_release_interface(driver_of(ctx->intf),
+ ctx->control);
+ }
+
+ if (ctx->tx_rem_skb != NULL) {
+ dev_kfree_skb_any(ctx->tx_rem_skb);
+ ctx->tx_rem_skb = NULL;
+ }
+
+ if (ctx->tx_curr_skb != NULL) {
+ dev_kfree_skb_any(ctx->tx_curr_skb);
+ ctx->tx_curr_skb = NULL;
+ }
+
+ kfree(ctx);
+}
+
+static int cdc_ncm_bind(struct usbnet *dev, struct usb_interface *intf)
+{
+ struct cdc_ncm_ctx *ctx;
+ struct usb_driver *driver;
+ u8 *buf;
+ int len;
+ int temp;
+ u8 iface_no;
+
+ ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
+ if (ctx == NULL)
+ goto error;
+
+ memset(ctx, 0, sizeof(*ctx));
+
+ init_timer(&ctx->tx_timer);
+ spin_lock_init(&ctx->mtx);
+ ctx->netdev = dev->net;
+
+ /* store ctx pointer in device data field */
+ dev->data[0] = (unsigned long)ctx;
+
+ /* get some pointers */
+ driver = driver_of(intf);
+ buf = intf->cur_altsetting->extra;
+ len = intf->cur_altsetting->extralen;
+
+ ctx->udev = dev->udev;
+ ctx->intf = intf;
+
+ /* parse through descriptors associated with control interface */
+ while ((len > 0) && (buf[0] > 2) && (buf[0] <= len)) {
+
+ if (buf[1] != USB_DT_CS_INTERFACE)
+ goto advance;
+
+ switch (buf[2]) {
+ case USB_CDC_UNION_TYPE:
+ if (buf[0] < sizeof(*(ctx->union_desc)))
+ break;
+
+ ctx->union_desc =
+ (const struct usb_cdc_union_desc *)buf;
+
+ ctx->control = usb_ifnum_to_if(dev->udev,
+ ctx->union_desc->bMasterInterface0);
+ ctx->data = usb_ifnum_to_if(dev->udev,
+ ctx->union_desc->bSlaveInterface0);
+ break;
+
+ case USB_CDC_ETHERNET_TYPE:
+ if (buf[0] < sizeof(*(ctx->ether_desc)))
+ break;
+
+ ctx->ether_desc =
+ (const struct usb_cdc_ether_desc *)buf;
+
+ dev->hard_mtu =
+ le16_to_cpu(ctx->ether_desc->wMaxSegmentSize);
+
+ if (dev->hard_mtu <
+ (CDC_NCM_MIN_DATAGRAM_SIZE - ETH_HLEN))
+ dev->hard_mtu =
+ CDC_NCM_MIN_DATAGRAM_SIZE - ETH_HLEN;
+
+ else if (dev->hard_mtu >
+ (CDC_NCM_MAX_DATAGRAM_SIZE - ETH_HLEN))
+ dev->hard_mtu =
+ CDC_NCM_MAX_DATAGRAM_SIZE - ETH_HLEN;
+ break;
+
+ case USB_CDC_NCM_TYPE:
+ if (buf[0] < sizeof(*(ctx->func_desc)))
+ break;
+
+ ctx->func_desc = (const struct usb_cdc_ncm_desc *)buf;
+ break;
+
+ default:
+ break;
+ }
+advance:
+ /* advance to next descriptor */
+ temp = buf[0];
+ buf += temp;
+ len -= temp;
+ }
+
+ /* check if we got everything */
+ if ((ctx->control == NULL) || (ctx->data == NULL) ||
+ (ctx->ether_desc == NULL))
+ goto error;
+
+ /* claim interfaces, if any */
+ if (ctx->data != intf) {
+ temp = usb_driver_claim_interface(driver, ctx->data, dev);
+ if (temp)
+ goto error;
+ ctx->data_claimed = 1;
+ }
+
+ if (ctx->control != intf) {
+ temp = usb_driver_claim_interface(driver, ctx->control, dev);
+ if (temp)
+ goto error;
+ ctx->control_claimed = 1;
+ }
+
+ iface_no = ctx->data->cur_altsetting->desc.bInterfaceNumber;
+
+ /* reset data interface */
+ temp = usb_set_interface(dev->udev, iface_no, 0);
+ if (temp)
+ goto error;
+
+ /* initialize data interface */
+ if (cdc_ncm_setup(ctx))
+ goto error;
+
+ /* configure data interface */
+ temp = usb_set_interface(dev->udev, iface_no, 1);
+ if (temp)
+ goto error;
+
+ cdc_ncm_find_endpoints(ctx, ctx->data);
+ cdc_ncm_find_endpoints(ctx, ctx->control);
+
+ if ((ctx->in_ep == NULL) || (ctx->out_ep == NULL) ||
+ (ctx->status_ep == NULL))
+ goto error;
+
+ dev->net->ethtool_ops = &cdc_ncm_ethtool_ops;
+
+ usb_set_intfdata(ctx->data, dev);
+ usb_set_intfdata(ctx->control, dev);
+ usb_set_intfdata(ctx->intf, dev);
+
+ temp = usbnet_get_ethernet_addr(dev, ctx->ether_desc->iMACAddress);
+ if (temp)
+ goto error;
+
+ dev_info(&dev->udev->dev, "MAC-Address: "
+ "0x%02x:0x%02x:0x%02x:0x%02x:0x%02x:0x%02x\n",
+ dev->net->dev_addr[0], dev->net->dev_addr[1],
+ dev->net->dev_addr[2], dev->net->dev_addr[3],
+ dev->net->dev_addr[4], dev->net->dev_addr[5]);
+
+ dev->in = usb_rcvbulkpipe(dev->udev,
+ ctx->in_ep->desc.bEndpointAddress & USB_ENDPOINT_NUMBER_MASK);
+ dev->out = usb_sndbulkpipe(dev->udev,
+ ctx->out_ep->desc.bEndpointAddress & USB_ENDPOINT_NUMBER_MASK);
+ dev->status = ctx->status_ep;
+ dev->rx_urb_size = ctx->rx_max;
+
+ /*
+ * We should get an event when network connection is "connected" or
+ * "disconnected". Set network connection in "disconnected" state
+ * (carrier is OFF) during attach, so the IP network stack does not
+ * start IPv6 negotiation and more.
+ */
+ netif_carrier_off(dev->net);
+ ctx->tx_speed = ctx->rx_speed = 0;
+ return 0;
+
+error:
+ cdc_ncm_free((struct cdc_ncm_ctx *)dev->data[0]);
+ dev->data[0] = 0;
+ dev_info(&dev->udev->dev, "Descriptor failure\n");
+ return -ENODEV;
+}
+
+static void cdc_ncm_unbind(struct usbnet *dev, struct usb_interface *intf)
+{
+ struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0];
+ struct usb_driver *driver;
+
+ if (ctx == NULL)
+ return; /* no setup */
+
+ driver = driver_of(intf);
+
+ usb_set_intfdata(ctx->data, NULL);
+ usb_set_intfdata(ctx->control, NULL);
+ usb_set_intfdata(ctx->intf, NULL);
+
+ /* release interfaces, if any */
+ if (ctx->data_claimed) {
+ usb_driver_release_interface(driver, ctx->data);
+ ctx->data_claimed = 0;
+ }
+
+ if (ctx->control_claimed) {
+ usb_driver_release_interface(driver, ctx->control);
+ ctx->control_claimed = 0;
+ }
+
+ cdc_ncm_free(ctx);
+}
+
+static void cdc_ncm_zero_fill(u8 *ptr, u32 first, u32 end, u32 max)
+{
+ if (first >= max)
+ return;
+ if (first >= end)
+ return;
+ if (end > max)
+ end = max;
+ memset(ptr + first, 0, end - first);
+}
+
+static struct sk_buff *
+cdc_ncm_fill_tx_frame(struct cdc_ncm_ctx *ctx, struct sk_buff *skb)
+{
+ struct sk_buff *skb_out;
+ u32 rem;
+ u32 offset;
+ u32 last_offset;
+ u16 n = 0;
+ u8 timeout = 0;
+
+ /* if there is a remaining skb, it gets priority */
+ if (skb != NULL)
+ swap(skb, ctx->tx_rem_skb);
+ else
+ timeout = 1;
+
+ /*
+ * +----------------+
+ * | skb_out |
+ * +----------------+
+ * ^ offset
+ * ^ last_offset
+ */
+
+ /* check if we are resuming an OUT skb */
+ if (ctx->tx_curr_skb != NULL) {
+ /* pop variables */
+ skb_out = ctx->tx_curr_skb;
+ offset = ctx->tx_curr_offset;
+ last_offset = ctx->tx_curr_last_offset;
+ n = ctx->tx_curr_frame_num;
+
+ } else {
+ /* reset variables */
+ skb_out = alloc_skb(ctx->tx_max, GFP_ATOMIC);
+ if (skb_out == NULL) {
+ if (skb != NULL) {
+ dev_kfree_skb_any(skb);
+ ctx->netdev->stats.tx_dropped++;
+ }
+ goto exit_no_skb;
+ }
+
+ /* make room for NTH and NDP */
+ offset = CDC_NCM_ALIGN(0, sizeof(struct usb_cdc_ncm_nth16),
+ ctx->tx_ndp_modulus) +
+ sizeof(struct usb_cdc_ncm_ndp16) +
+ (ctx->tx_max_datagrams + 1) *
+ sizeof(struct usb_cdc_ncm_dpe16);
+
+ /* store last valid offset before alignment */
+ last_offset = offset;
+ /* align first Datagram offset correctly */
+ offset = CDC_NCM_ALIGN(ctx->tx_remainder, offset,
+ ctx->tx_modulus);
+ /* zero buffer till the first IP datagram */
+ cdc_ncm_zero_fill(skb_out->data, 0, offset, offset);
+ n = 0;
+ ctx->tx_curr_frame_num = 0;
+ }
+
+ for (; n < ctx->tx_max_datagrams; n++) {
+ /* check if end of transmit buffer is reached */
+ if (offset >= ctx->tx_max)
+ break;
+
+ /* compute maximum buffer size */
+ rem = ctx->tx_max - offset;
+
+ if (skb == NULL) {
+ skb = ctx->tx_rem_skb;
+ ctx->tx_rem_skb = NULL;
+
+ /* check for end of skb */
+ if (skb == NULL)
+ break;
+ }
+
+ if (skb->len > rem) {
+ if (n == 0) {
+ /* won't fit, MTU problem? */
+ dev_kfree_skb_any(skb);
+ skb = NULL;
+ ctx->netdev->stats.tx_dropped++;
+ } else {
+ /* no room for skb - store for later */
+ if (ctx->tx_rem_skb != NULL) {
+ dev_kfree_skb_any(ctx->tx_rem_skb);
+ ctx->netdev->stats.tx_dropped++;
+ }
+ ctx->tx_rem_skb = skb;
+ skb = NULL;
+
+ /* loop one more time */
+ timeout = 1;
+ }
+ break;
+ }
+
+ memcpy(((u8 *)skb_out->data) + offset, skb->data, skb->len);
+
+ ctx->tx_ncm.dpe16[n].wDatagramLength = cpu_to_le16(skb->len);
+ ctx->tx_ncm.dpe16[n].wDatagramIndex = cpu_to_le16(offset);
+
+ /* update offset */
+ offset += skb->len;
+
+ /* store last valid offset before alignment */
+ last_offset = offset;
+
+ /* align offset correctly */
+ offset = CDC_NCM_ALIGN(ctx->tx_remainder, offset,
+ ctx->tx_modulus);
+
+ /* zero padding */
+ cdc_ncm_zero_fill(skb_out->data, last_offset, offset,
+ ctx->tx_max);
+ dev_kfree_skb_any(skb);
+ skb = NULL;
+ }
+
+ /* free up any dangling skb */
+ if (skb != NULL) {
+ dev_kfree_skb_any(skb);
+ skb = NULL;
+ ctx->netdev->stats.tx_dropped++;
+ }
+
+ ctx->tx_curr_frame_num = n;
+
+ if (n == 0) {
+ /* wait for more frames */
+ /* push variables */
+ ctx->tx_curr_skb = skb_out;
+ ctx->tx_curr_offset = offset;
+ ctx->tx_curr_last_offset = last_offset;
+ goto exit_no_skb;
+
+ } else if ((n < ctx->tx_max_datagrams) && (timeout == 0)) {
+ /* wait for more frames */
+ /* push variables */
+ ctx->tx_curr_skb = skb_out;
+ ctx->tx_curr_offset = offset;
+ ctx->tx_curr_last_offset = last_offset;
+ /* set the pending count */
+ if (n < CDC_NCM_RESTART_TIMER_DATAGRAM_CNT)
+ ctx->tx_timer_pending = 2;
+ goto exit_no_skb;
+
+ } else {
+ /* frame goes out */
+ /* variables will be reset at next call */
+ }
+
+ /* check for overflow */
+ if (last_offset > ctx->tx_max)
+ last_offset = ctx->tx_max;
+
+ /* revert offset */
+ offset = last_offset;
+
+ /*
+ * If collected data size is less or equal CDC_NCM_MIN_TX_PKT bytes,
+ * we send buffers as it is. If we get more data, it would be more
+ * efficient for USB HS mobile device with DMA engine to receive a full
+ * size NTB, than canceling DMA transfer and receiving a short packet.
+ */
+ if (offset > CDC_NCM_MIN_TX_PKT)
+ offset = ctx->tx_max;
+
+ /* final zero padding */
+ cdc_ncm_zero_fill(skb_out->data, last_offset, offset, ctx->tx_max);
+
+ /* store last offset */
+ last_offset = offset;
+
+ if ((last_offset < ctx->tx_max) && ((last_offset %
+ le16_to_cpu(ctx->out_ep->desc.wMaxPacketSize)) == 0)) {
+ /* force short packet */
+ *(((u8 *)skb_out->data) + last_offset) = 0;
+ last_offset++;
+ }
+
+ /* zero the rest of the DPEs plus the last NULL entry */
+ for (; n <= CDC_NCM_DPT_DATAGRAMS_MAX; n++) {
+ ctx->tx_ncm.dpe16[n].wDatagramLength = 0;
+ ctx->tx_ncm.dpe16[n].wDatagramIndex = 0;
+ }
+
+ /* fill out 16-bit NTB header */
+ ctx->tx_ncm.nth16.dwSignature = cpu_to_le32(USB_CDC_NCM_NTH16_SIGN);
+ ctx->tx_ncm.nth16.wHeaderLength =
+ cpu_to_le16(sizeof(ctx->tx_ncm.nth16));
+ ctx->tx_ncm.nth16.wSequence = cpu_to_le16(ctx->tx_seq);
+ ctx->tx_ncm.nth16.wBlockLength = cpu_to_le16(last_offset);
+ ctx->tx_ncm.nth16.wFpIndex = CDC_NCM_ALIGN(0,
+ sizeof(struct usb_cdc_ncm_nth16),
+ ctx->tx_ndp_modulus);
+
+ memcpy(skb_out->data, &(ctx->tx_ncm.nth16), sizeof(ctx->tx_ncm.nth16));
+ ctx->tx_seq++;
+
+ /* fill out 16-bit NDP table */
+ ctx->tx_ncm.ndp16.dwSignature =
+ cpu_to_le32(USB_CDC_NCM_NDP16_NOCRC_SIGN);
+ rem = sizeof(ctx->tx_ncm.ndp16) + ((ctx->tx_curr_frame_num + 1) *
+ sizeof(struct usb_cdc_ncm_dpe16));
+ ctx->tx_ncm.ndp16.wLength = cpu_to_le16(rem);
+ ctx->tx_ncm.ndp16.wNextFpIndex = 0; /* reserved */
+
+ memcpy(((u8 *)skb_out->data) + ctx->tx_ncm.nth16.wFpIndex,
+ &(ctx->tx_ncm.ndp16),
+ sizeof(ctx->tx_ncm.ndp16));
+
+ memcpy(((u8 *)skb_out->data) + ctx->tx_ncm.nth16.wFpIndex +
+ sizeof(ctx->tx_ncm.ndp16),
+ &(ctx->tx_ncm.dpe16),
+ (ctx->tx_curr_frame_num + 1) *
+ sizeof(struct usb_cdc_ncm_dpe16));
+
+ /* set frame length */
+ skb_put(skb_out, last_offset);
+
+ /* return skb */
+ ctx->tx_curr_skb = NULL;
+ return skb_out;
+
+exit_no_skb:
+ return NULL;
+}
+
+static void cdc_ncm_tx_timeout_start(struct cdc_ncm_ctx *ctx)
+{
+ /* start timer, if not already started */
+ if (timer_pending(&ctx->tx_timer) == 0) {
+ ctx->tx_timer.function = &cdc_ncm_tx_timeout;
+ ctx->tx_timer.data = (unsigned long)ctx;
+ ctx->tx_timer.expires = jiffies + ((HZ + 999) / 1000);
+ add_timer(&ctx->tx_timer);
+ }
+}
+
+static void cdc_ncm_tx_timeout(unsigned long arg)
+{
+ struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)arg;
+ u8 restart;
+
+ spin_lock(&ctx->mtx);
+ if (ctx->tx_timer_pending != 0) {
+ ctx->tx_timer_pending--;
+ restart = 1;
+ } else
+ restart = 0;
+
+ spin_unlock(&ctx->mtx);
+
+ if (restart)
+ cdc_ncm_tx_timeout_start(ctx);
+ else if (ctx->netdev != NULL)
+ usbnet_start_xmit(NULL, ctx->netdev);
+}
+
+static struct sk_buff *
+cdc_ncm_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags)
+{
+ struct sk_buff *skb_out;
+ struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0];
+ u8 need_timer = 0;
+
+ /*
+ * The Ethernet API we are using does not support transmitting
+ * multiple Ethernet frames in a single call. This driver will
+ * accumulate multiple Ethernet frames and send out a larger
+ * USB frame when the USB buffer is full or when a single jiffies
+ * timeout happens.
+ */
+ if (ctx == NULL)
+ goto error;
+
+ spin_lock(&ctx->mtx);
+ skb_out = cdc_ncm_fill_tx_frame(ctx, skb);
+ if (ctx->tx_curr_skb != NULL)
+ need_timer = 1;
+ spin_unlock(&ctx->mtx);
+
+ /* Start timer, if there is a remaining skb */
+ if (need_timer)
+ cdc_ncm_tx_timeout_start(ctx);
+
+ if (skb_out)
+ dev->net->stats.tx_packets += ctx->tx_curr_frame_num;
+ return skb_out;
+
+error:
+ if (skb != NULL)
+ dev_kfree_skb_any(skb);
+
+ return NULL;
+}
+
+static int cdc_ncm_rx_fixup(struct usbnet *dev, struct sk_buff *skb_in)
+{
+ struct sk_buff *skb;
+ struct cdc_ncm_ctx *ctx;
+ int sumlen;
+ int actlen;
+ int temp;
+ int nframes;
+ int x;
+ int offset;
+
+ ctx = (struct cdc_ncm_ctx *)dev->data[0];
+ if (ctx == NULL)
+ goto error;
+
+ actlen = skb_in->len;
+ sumlen = CDC_NCM_NTB_MAX_SIZE_RX;
+
+ if (actlen < (sizeof(ctx->rx_ncm.nth16) + sizeof(ctx->rx_ncm.ndp16))) {
+ pr_debug("frame too short\n");
+ goto error;
+ }
+
+ memcpy(&(ctx->rx_ncm.nth16), ((u8 *)skb_in->data),
+ sizeof(ctx->rx_ncm.nth16));
+
+ if (le32_to_cpu(ctx->rx_ncm.nth16.dwSignature) !=
+ USB_CDC_NCM_NTH16_SIGN) {
+ pr_debug("invalid NTH16 signature <%u>\n",
+ le32_to_cpu(ctx->rx_ncm.nth16.dwSignature));
+ goto error;
+ }
+
+ temp = le16_to_cpu(ctx->rx_ncm.nth16.wBlockLength);
+ if (temp > sumlen) {
+ pr_debug("unsupported NTB block length %u/%u\n", temp, sumlen);
+ goto error;
+ }
+
+ temp = le16_to_cpu(ctx->rx_ncm.nth16.wFpIndex);
+ if ((temp + sizeof(ctx->rx_ncm.ndp16)) > actlen) {
+ pr_debug("invalid DPT16 index\n");
+ goto error;
+ }
+
+ memcpy(&(ctx->rx_ncm.ndp16), ((u8 *)skb_in->data) + temp,
+ sizeof(ctx->rx_ncm.ndp16));
+
+ if (le32_to_cpu(ctx->rx_ncm.ndp16.dwSignature) !=
+ USB_CDC_NCM_NDP16_NOCRC_SIGN) {
+ pr_debug("invalid DPT16 signature <%u>\n",
+ le32_to_cpu(ctx->rx_ncm.ndp16.dwSignature));
+ goto error;
+ }
+
+ if (le16_to_cpu(ctx->rx_ncm.ndp16.wLength) <
+ USB_CDC_NCM_NDP16_LENGTH_MIN) {
+ pr_debug("invalid DPT16 length <%u>\n",
+ le32_to_cpu(ctx->rx_ncm.ndp16.dwSignature));
+ goto error;
+ }
+
+ nframes = ((le16_to_cpu(ctx->rx_ncm.ndp16.wLength) -
+ sizeof(struct usb_cdc_ncm_ndp16)) /
+ sizeof(struct usb_cdc_ncm_dpe16));
+ nframes--; /* we process NDP entries except for the last one */
+
+ pr_debug("nframes = %u\n", nframes);
+
+ temp += sizeof(ctx->rx_ncm.ndp16);
+
+ if ((temp + nframes * (sizeof(struct usb_cdc_ncm_dpe16))) > actlen) {
+ pr_debug("Invalid nframes = %d\n", nframes);
+ goto error;
+ }
+
+ if (nframes > CDC_NCM_DPT_DATAGRAMS_MAX) {
+ pr_debug("Truncating number of frames from %u to %u\n",
+ nframes, CDC_NCM_DPT_DATAGRAMS_MAX);
+ nframes = CDC_NCM_DPT_DATAGRAMS_MAX;
+ }
+
+ memcpy(&(ctx->rx_ncm.dpe16), ((u8 *)skb_in->data) + temp,
+ nframes * (sizeof(struct usb_cdc_ncm_dpe16)));
+
+ for (x = 0; x < nframes; x++) {
+ offset = le16_to_cpu(ctx->rx_ncm.dpe16[x].wDatagramIndex);
+ temp = le16_to_cpu(ctx->rx_ncm.dpe16[x].wDatagramLength);
+
+ /*
+ * CDC NCM ch. 3.7
+ * All entries after first NULL entry are to be ignored
+ */
+ if ((offset == 0) || (temp == 0)) {
+ if (!x)
+ goto error; /* empty NTB */
+ break;
+ }
+
+ /* sanity checking */
+ if (((offset + temp) > actlen) ||
+ (temp > CDC_NCM_MAX_DATAGRAM_SIZE) || (temp < ETH_HLEN)) {
+ pr_debug("invalid frame detected (ignored)"
+ "offset[%u]=%u, length=%u, skb=%p\n",
+ x, offset, temp, skb);
+ if (!x)
+ goto error;
+ break;
+
+ } else {
+ skb = skb_clone(skb_in, GFP_ATOMIC);
+ skb->len = temp;
+ skb->data = ((u8 *)skb_in->data) + offset;
+ skb_set_tail_pointer(skb, temp);
+ usbnet_skb_return(dev, skb);
+ }
+ }
+ return 1;
+error:
+ return 0;
+}
+
+static void
+cdc_ncm_speed_change(struct cdc_ncm_ctx *ctx,
+ struct connection_speed_change *data)
+{
+ uint32_t rx_speed = le32_to_cpu(data->USBitRate);
+ uint32_t tx_speed = le32_to_cpu(data->DSBitRate);
+
+ /*
+ * Currently the USB-NET API does not support reporting the actual
+ * device speed. Do print it instead.
+ */
+ if ((tx_speed != ctx->tx_speed) || (rx_speed != ctx->rx_speed)) {
+ ctx->tx_speed = tx_speed;
+ ctx->rx_speed = rx_speed;
+
+ if ((tx_speed > 1000000) && (rx_speed > 1000000)) {
+ printk(KERN_INFO KBUILD_MODNAME
+ ": %s: %u mbit/s downlink "
+ "%u mbit/s uplink\n",
+ ctx->netdev->name,
+ (unsigned int)(rx_speed / 1000000U),
+ (unsigned int)(tx_speed / 1000000U));
+ } else {
+ printk(KERN_INFO KBUILD_MODNAME
+ ": %s: %u kbit/s downlink "
+ "%u kbit/s uplink\n",
+ ctx->netdev->name,
+ (unsigned int)(rx_speed / 1000U),
+ (unsigned int)(tx_speed / 1000U));
+ }
+ }
+}
+
+static void cdc_ncm_status(struct usbnet *dev, struct urb *urb)
+{
+ struct cdc_ncm_ctx *ctx;
+ struct usb_cdc_notification *event;
+
+ ctx = (struct cdc_ncm_ctx *)dev->data[0];
+
+ if (urb->actual_length < sizeof(*event))
+ return;
+
+ /* test for split data in 8-byte chunks */
+ if (test_and_clear_bit(EVENT_STS_SPLIT, &dev->flags)) {
+ cdc_ncm_speed_change(ctx,
+ (struct connection_speed_change *)urb->transfer_buffer);
+ return;
+ }
+
+ event = urb->transfer_buffer;
+
+ switch (event->bNotificationType) {
+ case USB_CDC_NOTIFY_NETWORK_CONNECTION:
+ /*
+ * According to the CDC NCM specification ch.7.1
+ * USB_CDC_NOTIFY_NETWORK_CONNECTION notification shall be
+ * sent by device after USB_CDC_NOTIFY_SPEED_CHANGE.
+ */
+ ctx->connected = event->wValue;
+
+ printk(KERN_INFO KBUILD_MODNAME ": %s: network connection:"
+ " %sconnected\n",
+ ctx->netdev->name, ctx->connected ? "" : "dis");
+
+ if (ctx->connected)
+ netif_carrier_on(dev->net);
+ else {
+ netif_carrier_off(dev->net);
+ ctx->tx_speed = ctx->rx_speed = 0;
+ }
+ break;
+
+ case USB_CDC_NOTIFY_SPEED_CHANGE:
+ if (urb->actual_length <
+ (sizeof(*event) + sizeof(struct connection_speed_change)))
+ set_bit(EVENT_STS_SPLIT, &dev->flags);
+ else
+ cdc_ncm_speed_change(ctx,
+ (struct connection_speed_change *) &event[1]);
+ break;
+
+ default:
+ dev_err(&dev->udev->dev, "NCM: unexpected "
+ "notification 0x%02x!\n", event->bNotificationType);
+ break;
+ }
+}
+
+static int cdc_ncm_check_connect(struct usbnet *dev)
+{
+ struct cdc_ncm_ctx *ctx;
+
+ ctx = (struct cdc_ncm_ctx *)dev->data[0];
+ if (ctx == NULL)
+ return 1; /* disconnected */
+
+ return !ctx->connected;
+}
+
+static int
+cdc_ncm_probe(struct usb_interface *udev, const struct usb_device_id *prod)
+{
+ return usbnet_probe(udev, prod);
+}
+
+static void cdc_ncm_disconnect(struct usb_interface *intf)
+{
+ struct usbnet *dev = usb_get_intfdata(intf);
+
+ if (dev == NULL)
+ return; /* already disconnected */
+
+ usbnet_disconnect(intf);
+}
+
+static int cdc_ncm_manage_power(struct usbnet *dev, int status)
+{
+ dev->intf->needs_remote_wakeup = status;
+ return 0;
+}
+
+static const struct driver_info cdc_ncm_info = {
+ .description = "CDC NCM",
+ .flags = FLAG_NO_SETINT | FLAG_MULTI_PACKET,
+ .bind = cdc_ncm_bind,
+ .unbind = cdc_ncm_unbind,
+ .check_connect = cdc_ncm_check_connect,
+ .manage_power = cdc_ncm_manage_power,
+ .status = cdc_ncm_status,
+ .rx_fixup = cdc_ncm_rx_fixup,
+ .tx_fixup = cdc_ncm_tx_fixup,
+};
+
+static struct usb_driver cdc_ncm_driver = {
+ .name = "cdc_ncm",
+ .id_table = cdc_devs,
+ .probe = cdc_ncm_probe,
+ .disconnect = cdc_ncm_disconnect,
+ .suspend = usbnet_suspend,
+ .resume = usbnet_resume,
+ .supports_autosuspend = 1,
+};
+
+static struct ethtool_ops cdc_ncm_ethtool_ops = {
+ .get_drvinfo = cdc_ncm_get_drvinfo,
+ .get_link = usbnet_get_link,
+ .get_msglevel = usbnet_get_msglevel,
+ .set_msglevel = usbnet_set_msglevel,
+ .get_settings = usbnet_get_settings,
+ .set_settings = usbnet_set_settings,
+ .nway_reset = usbnet_nway_reset,
+};
+
+static int __init cdc_ncm_init(void)
+{
+ printk(KERN_INFO KBUILD_MODNAME ": " DRIVER_VERSION "\n");
+ return usb_register(&cdc_ncm_driver);
+}
+
+module_init(cdc_ncm_init);
+
+static void __exit cdc_ncm_exit(void)
+{
+ usb_deregister(&cdc_ncm_driver);
+}
+
+module_exit(cdc_ncm_exit);
+
+MODULE_AUTHOR("Hans Petter Selasky");
+MODULE_DESCRIPTION("USB CDC NCM host driver");
+MODULE_LICENSE("Dual BSD/GPL");
--
1.7.0.4
^ permalink raw reply related
* [PATCHv2 1/2] usbnet: changes for upcoming cdc_ncm driver
From: Alexey Orishko @ 2010-11-25 14:14 UTC (permalink / raw)
To: gregkh, linux-usb
Cc: netdev, oliver, yauheni.kaliuta, balbi, sjur.brandeland,
Alexey Orishko
Changes:
include/linux/usb/usbnet.h:
- a new flag to indicate driver's capability to accumulate IP packets in Tx
direction and extract several packets from single skb in Rx direction.
drivers/net/usb/usbnet.c:
- the procedure of counting packets in usbnet was updated due to the
accumulating of IP packets in the driver;
- no short packets are sent if indicated by the flag in the driver_info
structure.
Signed-off-by: Alexey Orishko <alexey.orishko@stericsson.com>
---
Patch v2:
based on suggestion moved Changes to the comment log
updated comments in the code
drivers/net/usb/usbnet.c | 45 ++++++++++++++++++++++++++++++-------------
include/linux/usb/usbnet.h | 6 +++++
2 files changed, 37 insertions(+), 14 deletions(-)
diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index c04d49e..cff74b8 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -391,14 +391,19 @@ static inline void rx_process (struct usbnet *dev, struct sk_buff *skb)
goto error;
// else network stack removes extra byte if we forced a short packet
- if (skb->len)
- usbnet_skb_return (dev, skb);
- else {
- netif_dbg(dev, rx_err, dev->net, "drop\n");
-error:
- dev->net->stats.rx_errors++;
- skb_queue_tail (&dev->done, skb);
+ if (skb->len) {
+ /* all data was already cloned from skb inside the driver */
+ if (dev->driver_info->flags & FLAG_MULTI_PACKET)
+ dev_kfree_skb_any(skb);
+ else
+ usbnet_skb_return(dev, skb);
+ return;
}
+
+ netif_dbg(dev, rx_err, dev->net, "drop\n");
+error:
+ dev->net->stats.rx_errors++;
+ skb_queue_tail(&dev->done, skb);
}
/*-------------------------------------------------------------------------*/
@@ -971,7 +976,8 @@ static void tx_complete (struct urb *urb)
struct usbnet *dev = entry->dev;
if (urb->status == 0) {
- dev->net->stats.tx_packets++;
+ if (!(dev->driver_info->flags & FLAG_MULTI_PACKET))
+ dev->net->stats.tx_packets++;
dev->net->stats.tx_bytes += entry->length;
} else {
dev->net->stats.tx_errors++;
@@ -1044,8 +1050,13 @@ netdev_tx_t usbnet_start_xmit (struct sk_buff *skb,
if (info->tx_fixup) {
skb = info->tx_fixup (dev, skb, GFP_ATOMIC);
if (!skb) {
- netif_dbg(dev, tx_err, dev->net, "can't tx_fixup skb\n");
- goto drop;
+ if (netif_msg_tx_err(dev)) {
+ netif_dbg(dev, tx_err, dev->net, "can't tx_fixup skb\n");
+ goto drop;
+ } else {
+ /* cdc_ncm collected packet; waits for more */
+ goto not_drop;
+ }
}
}
length = skb->len;
@@ -1067,13 +1078,18 @@ netdev_tx_t usbnet_start_xmit (struct sk_buff *skb,
/* don't assume the hardware handles USB_ZERO_PACKET
* NOTE: strictly conforming cdc-ether devices should expect
* the ZLP here, but ignore the one-byte packet.
+ * NOTE2: CDC NCM specification is different from CDC ECM when
+ * handling ZLP/short packets, so cdc_ncm driver will make short
+ * packet itself if needed.
*/
if (length % dev->maxpacket == 0) {
if (!(info->flags & FLAG_SEND_ZLP)) {
- urb->transfer_buffer_length++;
- if (skb_tailroom(skb)) {
- skb->data[skb->len] = 0;
- __skb_put(skb, 1);
+ if (!(info->flags & FLAG_MULTI_PACKET)) {
+ urb->transfer_buffer_length++;
+ if (skb_tailroom(skb)) {
+ skb->data[skb->len] = 0;
+ __skb_put(skb, 1);
+ }
}
} else
urb->transfer_flags |= URB_ZERO_PACKET;
@@ -1122,6 +1138,7 @@ netdev_tx_t usbnet_start_xmit (struct sk_buff *skb,
netif_dbg(dev, tx_err, dev->net, "drop, code %d\n", retval);
drop:
dev->net->stats.tx_dropped++;
+not_drop:
if (skb)
dev_kfree_skb_any (skb);
usb_free_urb (urb);
diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index 7ae27a4..44842c8 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -97,6 +97,12 @@ struct driver_info {
#define FLAG_LINK_INTR 0x0800 /* updates link (carrier) status */
+/*
+ * Indicates to usbnet, that USB driver accumulates multiple IP packets.
+ * Affects statistic (counters) and short packet handling.
+ */
+#define FLAG_MULTI_PACKET 0x1000
+
/* init device ... can sleep, or cause probe() failure */
int (*bind)(struct usbnet *, struct usb_interface *);
--
1.7.0.4
^ permalink raw reply related
* Re: Fwd: Simple kernel attack using socketpair. easy, 100% reproductiblle, works under guest. no way to protect :(
From: Eric Dumazet @ 2010-11-25 14:11 UTC (permalink / raw)
To: Марк Коренберг,
David Miller
Cc: netdev
In-Reply-To: <AANLkTi=NTFXXY42nh+j3xM4n-NTPaz=fWKmAY=MpsHze@mail.gmail.com>
Le jeudi 25 novembre 2010 à 13:35 +0500, Марк Коренберг a écrit :
> quick and dirty fix will be not to allow to pass unix socket inside
> unix socket. I think it would not break much applications.
Really, if it was not needed, net/unix/garbage.c would not exist at
all...
It is needed by some apps.
[PATCH] af_unix: limit recursion level
Its easy to eat all kernel memory and trigger NMI watchdog, using an
exploit program that queues unix sockets on top of others.
lkml ref : http://lkml.org/lkml/2010/11/25/8
This mechanism is used in applications, one choice we have is to have a
recursion limit.
Other limits might be needed as well (if we queue other types of files),
since the passfd mechanism is currently limited by socket receive queue
sizes only.
Add a recursion_level to unix socket, allowing up to 4 levels.
Each time we send an unix socket through sendfd mechanism, we copy its
recursion level (plus one) to receiver. This recursion level is cleared
when socket receive queue is emptied.
Reported-by: Марк Коренберг <socketpair@gmail.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
---
include/net/af_unix.h | 2 ++
net/unix/af_unix.c | 37 ++++++++++++++++++++++++++++++++-----
net/unix/garbage.c | 2 +-
3 files changed, 35 insertions(+), 6 deletions(-)
diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index 90c9e28..18e5c3f 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -10,6 +10,7 @@ extern void unix_inflight(struct file *fp);
extern void unix_notinflight(struct file *fp);
extern void unix_gc(void);
extern void wait_for_unix_gc(void);
+extern struct sock *unix_get_socket(struct file *filp);
#define UNIX_HASH_SIZE 256
@@ -56,6 +57,7 @@ struct unix_sock {
spinlock_t lock;
unsigned int gc_candidate : 1;
unsigned int gc_maybe_cycle : 1;
+ unsigned char recursion_level;
struct socket_wq peer_wq;
};
#define unix_sk(__sk) ((struct unix_sock *)__sk)
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 3c95304..2268e67 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1343,9 +1343,25 @@ static void unix_destruct_scm(struct sk_buff *skb)
sock_wfree(skb);
}
+#define MAX_RECURSION_LEVEL 4
+
static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
{
int i;
+ unsigned char max_level = 0;
+ int unix_sock_count = 0;
+
+ for (i = scm->fp->count - 1; i >= 0; i--) {
+ struct sock *sk = unix_get_socket(scm->fp->fp[i]);
+
+ if (sk) {
+ unix_sock_count++;
+ max_level = max(max_level,
+ unix_sk(sk)->recursion_level);
+ }
+ }
+ if (unlikely(max_level > MAX_RECURSION_LEVEL))
+ return -ETOOMANYREFS;
/*
* Need to duplicate file references for the sake of garbage
@@ -1356,9 +1372,11 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
if (!UNIXCB(skb).fp)
return -ENOMEM;
- for (i = scm->fp->count-1; i >= 0; i--)
- unix_inflight(scm->fp->fp[i]);
- return 0;
+ if (unix_sock_count) {
+ for (i = scm->fp->count - 1; i >= 0; i--)
+ unix_inflight(scm->fp->fp[i]);
+ }
+ return max_level;
}
static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
@@ -1393,6 +1411,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
struct sk_buff *skb;
long timeo;
struct scm_cookie tmp_scm;
+ int max_level;
if (NULL == siocb->scm)
siocb->scm = &tmp_scm;
@@ -1431,8 +1450,9 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
goto out;
err = unix_scm_to_skb(siocb->scm, skb, true);
- if (err)
+ if (err < 0)
goto out_free;
+ max_level = err + 1;
unix_get_secdata(siocb->scm, skb);
skb_reset_transport_header(skb);
@@ -1514,6 +1534,8 @@ restart:
if (sock_flag(other, SOCK_RCVTSTAMP))
__net_timestamp(skb);
skb_queue_tail(&other->sk_receive_queue, skb);
+ if (max_level > unix_sk(other)->recursion_level)
+ unix_sk(other)->recursion_level = max_level;
unix_state_unlock(other);
other->sk_data_ready(other, len);
sock_put(other);
@@ -1544,6 +1566,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
int sent = 0;
struct scm_cookie tmp_scm;
bool fds_sent = false;
+ int max_level;
if (NULL == siocb->scm)
siocb->scm = &tmp_scm;
@@ -1607,10 +1630,11 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
/* Only send the fds in the first buffer */
err = unix_scm_to_skb(siocb->scm, skb, !fds_sent);
- if (err) {
+ if (err < 0) {
kfree_skb(skb);
goto out_err;
}
+ max_level = err + 1;
fds_sent = true;
err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
@@ -1626,6 +1650,8 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
goto pipe_err_free;
skb_queue_tail(&other->sk_receive_queue, skb);
+ if (max_level > unix_sk(other)->recursion_level)
+ unix_sk(other)->recursion_level = max_level;
unix_state_unlock(other);
other->sk_data_ready(other, size);
sent += size;
@@ -1845,6 +1871,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
unix_state_lock(sk);
skb = skb_dequeue(&sk->sk_receive_queue);
if (skb == NULL) {
+ unix_sk(sk)->recursion_level = 0;
if (copied >= target)
goto unlock;
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index c8df6fd..a2d99a5 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -96,7 +96,7 @@ static DECLARE_WAIT_QUEUE_HEAD(unix_gc_wait);
unsigned int unix_tot_inflight;
-static struct sock *unix_get_socket(struct file *filp)
+struct sock *unix_get_socket(struct file *filp)
{
struct sock *u_sock = NULL;
struct inode *inode = filp->f_path.dentry->d_inode;
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox