* [net-next.git 4/7] stmmac: add Rx watchdog optimization to mitigate the DMA irqs
From: Giuseppe CAVALLARO @ 2012-09-05 15:03 UTC (permalink / raw)
To: netdev; +Cc: bhutchings, davem, Giuseppe Cavallaro
In-Reply-To: <1346857432-24657-1-git-send-email-peppe.cavallaro@st.com>
GMAC devices newer than databook 3.50 has an embedded timer
that can be used for mitigating the number of interrupts.
So this patch adds this optimizations.
Old MAC will continue to use NAPI.
At any rate, the Rx watchdog can be disable (on bugged HW) by
passing from the platform the riwt_off field.
In this implementation the rx timer stored in the Reg9 is fixed
to the max value.
V2: added a platform parameter to force to disable the rx-watchdog
for example on new core where it is bugged.
Signed-off-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
---
drivers/net/ethernet/stmicro/stmmac/common.h | 7 ++
drivers/net/ethernet/stmicro/stmmac/dwmac1000.h | 3 -
.../net/ethernet/stmicro/stmmac/dwmac1000_dma.c | 6 ++
drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h | 3 +-
drivers/net/ethernet/stmicro/stmmac/stmmac.h | 1 +
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 72 ++++++++++++++------
include/linux/stmmac.h | 1 +
7 files changed, 68 insertions(+), 25 deletions(-)
diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index 1d6bd3e..63d4bad 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -48,6 +48,10 @@
#define CHIP_DBG(fmt, args...) do { } while (0)
#endif
+/* Synopsys Core versions */
+#define DWMAC_CORE_3_40 0x34
+#define DWMAC_CORE_3_50 0x35
+
#undef FRAME_FILTER_DEBUG
/* #define FRAME_FILTER_DEBUG */
@@ -165,6 +169,7 @@ struct stmmac_extra_stats {
#define DMA_HW_FEAT_SAVLANINS 0x08000000 /* Source Addr or VLAN Insertion */
#define DMA_HW_FEAT_ACTPHYIF 0x70000000 /* Active/selected PHY interface */
#define DEFAULT_DMA_PBL 8
+#define DEFAULT_DMA_RIWT 0xff /* Max RI Watchdog Timer count */
enum rx_frame_status { /* IPC status */
good_frame = 0,
@@ -301,6 +306,8 @@ struct stmmac_dma_ops {
struct stmmac_extra_stats *x);
/* If supported then get the optional core features */
unsigned int (*get_hw_feature) (void __iomem *ioaddr);
+ /* Manage HW RX Watchdog*/
+ void (*rx_watchdog) (void __iomem *ioaddr, u8 timer);
};
struct stmmac_ops {
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
index 0e4cace..7ad56af 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
@@ -230,8 +230,5 @@ enum rtc_control {
#define GMAC_MMC_TX_INTR 0x108
#define GMAC_MMC_RX_CSUM_OFFLOAD 0x208
-/* Synopsys Core versions */
-#define DWMAC_CORE_3_40 0x34
-
extern const struct stmmac_dma_ops dwmac1000_dma_ops;
#endif /* __DWMAC1000_H__ */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c
index 0335000..e2c9431 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c
@@ -174,6 +174,11 @@ static unsigned int dwmac1000_get_hw_feature(void __iomem *ioaddr)
return readl(ioaddr + DMA_HW_FEATURE);
}
+static void dwmac1000_rx_watchdog(void __iomem *ioaddr, u8 timer)
+{
+ writel(timer, ioaddr + DMA_RX_WATCHDOG);
+}
+
const struct stmmac_dma_ops dwmac1000_dma_ops = {
.init = dwmac1000_dma_init,
.dump_regs = dwmac1000_dump_dma_regs,
@@ -187,4 +192,5 @@ const struct stmmac_dma_ops dwmac1000_dma_ops = {
.stop_rx = dwmac_dma_stop_rx,
.dma_interrupt = dwmac_dma_interrupt,
.get_hw_feature = dwmac1000_get_hw_feature,
+ .rx_watchdog = dwmac1000_rx_watchdog,
};
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h b/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h
index e49c9a0..4eeff5d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h
@@ -35,7 +35,8 @@
#define DMA_CONTROL 0x00001018 /* Ctrl (Operational Mode) */
#define DMA_INTR_ENA 0x0000101c /* Interrupt Enable */
#define DMA_MISSED_FRAME_CTR 0x00001020 /* Missed Frame Counter */
-#define DMA_AXI_BUS_MODE 0x00001028 /* AXI Bus Mode */
+#define DMA_RX_WATCHDOG 0x00001024 /* Receive Int Watchdog Timer */
+#define DMA_AXI_BUS_MODE 0x00001028 /* AXI Bus Mode */
#define DMA_CUR_TX_BUF_ADDR 0x00001050 /* Current Host Tx Buffer */
#define DMA_CUR_RX_BUF_ADDR 0x00001054 /* Current Host Rx Buffer */
#define DMA_HW_FEATURE 0x00001058 /* HW Feature Register */
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index 0f5ab28..c113f28 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -89,6 +89,7 @@ struct stmmac_priv {
int eee_active;
int tx_lpi_timer;
struct timer_list txtimer;
+ int napi_mode;
u32 tx_count_frames;
u32 tx_coal_frames;
u32 tx_coal_timer;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index d7f5482..55bb3c9 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -133,6 +133,7 @@ MODULE_PARM_DESC(eee_timer, "LPI tx expiration time in msec");
#define STMMAC_LPI_TIMER(x) (jiffies + msecs_to_jiffies(x))
static irqreturn_t stmmac_interrupt(int irq, void *dev_id);
+static int stmmac_rx(struct stmmac_priv *priv, int limit);
#ifdef CONFIG_STMMAC_DEBUG_FS
static int stmmac_init_fs(struct net_device *dev);
@@ -516,7 +517,7 @@ static void init_dma_desc_rings(struct net_device *dev)
unsigned int txsize = priv->dma_tx_size;
unsigned int rxsize = priv->dma_rx_size;
unsigned int bfsize;
- int dis_ic = 0;
+ int dis_ic = 1;
int des3_as_data_buf = 0;
/* Set the max buffer size according to the DESC mode
@@ -603,6 +604,8 @@ static void init_dma_desc_rings(struct net_device *dev)
priv->dirty_tx = 0;
priv->cur_tx = 0;
+ if (priv->napi_mode)
+ dis_ic = 0;
/* Clear the Rx/Tx descriptors */
priv->hw->desc->init_rx_desc(priv->dma_rx, rxsize, dis_ic);
priv->hw->desc->init_tx_desc(priv->dma_tx, txsize);
@@ -746,7 +749,7 @@ static void stmmac_tx(struct stmmac_priv *priv)
skb_recycle_check(skb, priv->dma_buf_sz))
__skb_queue_head(&priv->rx_recycle, skb);
else
- dev_kfree_skb(skb);
+ dev_kfree_skb_any(skb);
priv->tx_skbuff[entry] = NULL;
}
@@ -816,12 +819,15 @@ static void stmmac_tx_err(struct stmmac_priv *priv)
netif_wake_queue(priv->dev);
}
-static void stmmac_rx_schedule(struct stmmac_priv *priv)
+static void stmmac_rx_work(struct stmmac_priv *priv)
{
- if (likely(napi_schedule_prep(&priv->napi))) {
- stmmac_disable_irq(priv);
- __napi_schedule(&priv->napi);
- }
+ if (priv->napi_mode) {
+ if (likely(napi_schedule_prep(&priv->napi))) {
+ stmmac_disable_irq(priv);
+ __napi_schedule(&priv->napi);
+ }
+ } else
+ stmmac_rx(priv, priv->dma_rx_size);
}
static void stmmac_dma_interrupt(struct stmmac_priv *priv)
@@ -831,7 +837,7 @@ static void stmmac_dma_interrupt(struct stmmac_priv *priv)
status = priv->hw->dma->dma_interrupt(priv->ioaddr, &priv->xstats);
if (likely(status == handle_rx)) {
priv->xstats.rx_normal_irq_n++;
- stmmac_rx_schedule(priv);
+ stmmac_rx_work(priv);
}
if (likely(status == handle_tx)) {
priv->xstats.tx_normal_irq_n++;
@@ -1139,7 +1145,17 @@ static int stmmac_open(struct net_device *dev)
if (!ret)
add_timer(&priv->txtimer);
- napi_enable(&priv->napi);
+ /* Enable NAPI on chip older than the 3.50 where the Rx watchdog
+ * is not supported.
+ */
+ if (priv->napi_mode)
+ napi_enable(&priv->napi);
+ else if (priv->hw->dma->rx_watchdog)
+ /* Program RX Watchdog register to the default values
+ * FIXME: provide user value for RIWT
+ */
+ priv->hw->dma->rx_watchdog(priv->ioaddr, DEFAULT_DMA_RIWT);
+
skb_queue_head_init(&priv->rx_recycle);
netif_start_queue(dev);
@@ -1183,7 +1199,8 @@ static int stmmac_release(struct net_device *dev)
netif_stop_queue(dev);
- napi_disable(&priv->napi);
+ if (priv->napi_mode)
+ napi_disable(&priv->napi);
skb_queue_purge(&priv->rx_recycle);
/* Free the IRQ lines */
@@ -1448,14 +1465,15 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit)
#endif
skb->protocol = eth_type_trans(skb, priv->dev);
- if (unlikely(!priv->plat->rx_coe)) {
- /* No RX COE for old mac10/100 devices */
+ if (unlikely(!priv->plat->rx_coe))
skb_checksum_none_assert(skb);
- netif_receive_skb(skb);
- } else {
+ else
skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+ if (priv->napi_mode)
napi_gro_receive(&priv->napi, skb);
- }
+ else
+ netif_rx(skb);
priv->dev->stats.rx_packets++;
priv->dev->stats.rx_bytes += frame_len;
@@ -2025,7 +2043,15 @@ struct stmmac_priv *stmmac_dvr_probe(struct device *device,
if (flow_ctrl)
priv->flow_ctrl = FLOW_AUTO; /* RX/TX pause on */
- netif_napi_add(ndev, &priv->napi, stmmac_poll, 64);
+ /* Rx Watchdog is available in MAC newer than the 3.40.
+ * In some case, for example on bugged HW, this feature
+ * has to be disable and this can be done by passing the
+ * riwt_off field from the platform. In this case we will use NAPI. */
+ if ((priv->synopsys_id < DWMAC_CORE_3_50) || (priv->plat->riwt_off)) {
+ priv->napi_mode = 1;
+ netif_napi_add(ndev, &priv->napi, stmmac_poll, 64);
+ } else
+ pr_info(" Enable RX mitigation via HW Watchdog Timer\n");
spin_lock_init(&priv->lock);
spin_lock_init(&priv->tx_lock);
@@ -2068,7 +2094,8 @@ error_mdio_register:
error_clk_get:
unregister_netdev(ndev);
error_netdev_register:
- netif_napi_del(&priv->napi);
+ if (priv->napi_mode)
+ netif_napi_del(&priv->napi);
free_netdev(ndev);
return NULL;
@@ -2102,7 +2129,7 @@ int stmmac_dvr_remove(struct net_device *ndev)
int stmmac_suspend(struct net_device *ndev)
{
struct stmmac_priv *priv = netdev_priv(ndev);
- int dis_ic = 0;
+ int dis_ic = 1;
unsigned long flags;
if (!ndev || !netif_running(ndev))
@@ -2116,8 +2143,10 @@ int stmmac_suspend(struct net_device *ndev)
netif_device_detach(ndev);
netif_stop_queue(ndev);
- napi_disable(&priv->napi);
-
+ if (priv->napi_mode) {
+ dis_ic = 0;
+ napi_disable(&priv->napi);
+ }
/* Stop TX/RX DMA */
priv->hw->dma->stop_tx(priv->ioaddr);
priv->hw->dma->stop_rx(priv->ioaddr);
@@ -2166,7 +2195,8 @@ int stmmac_resume(struct net_device *ndev)
priv->hw->dma->start_tx(priv->ioaddr);
priv->hw->dma->start_rx(priv->ioaddr);
- napi_enable(&priv->napi);
+ if (priv->napi_mode)
+ napi_enable(&priv->napi);
netif_start_queue(ndev);
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index a1547ea..de5b2f8 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -104,6 +104,7 @@ struct plat_stmmacenet_data {
int bugged_jumbo;
int pmt;
int force_sf_dma_mode;
+ int riwt_off;
void (*fix_mac_speed)(void *priv, unsigned int speed);
void (*bus_setup)(void __iomem *ioaddr);
int (*init)(struct platform_device *pdev);
--
1.7.4.4
^ permalink raw reply related
* [net-next.git 5/7] stmmac: get/set coalesce parameters via ethtool
From: Giuseppe CAVALLARO @ 2012-09-05 15:03 UTC (permalink / raw)
To: netdev; +Cc: bhutchings, davem, Giuseppe Cavallaro
In-Reply-To: <1346857432-24657-1-git-send-email-peppe.cavallaro@st.com>
This patch is to get/set the tx/rx coalesce parameters
via ethtool interface.
Tests have been done on several platform with
different GMAC chips w/o w/ RX watchdog feature.
Signed-off-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
---
drivers/net/ethernet/stmicro/stmmac/common.h | 8 ++-
drivers/net/ethernet/stmicro/stmmac/stmmac.h | 1 +
.../net/ethernet/stmicro/stmmac/stmmac_ethtool.c | 74 ++++++++++++++++++++
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 56 +++++----------
4 files changed, 100 insertions(+), 39 deletions(-)
diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index 63d4bad..ee9cd8f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -169,7 +169,13 @@ struct stmmac_extra_stats {
#define DMA_HW_FEAT_SAVLANINS 0x08000000 /* Source Addr or VLAN Insertion */
#define DMA_HW_FEAT_ACTPHYIF 0x70000000 /* Active/selected PHY interface */
#define DEFAULT_DMA_PBL 8
-#define DEFAULT_DMA_RIWT 0xff /* Max RI Watchdog Timer count */
+
+/* Coalesce defines */
+#define MAX_DMA_RIWT 0xff /* Max RI Watchdog Timer count */
+#define MIN_DMA_RIWT 0x20
+#define STMMAC_COAL_TX_TIMER 40000
+#define STMMAC_MAX_COAL_TX_TICK 100000
+#define STMMAC_TX_MAX_FRAMES(x) (x/4)
enum rx_frame_status { /* IPC status */
good_frame = 0,
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index c113f28..577dc13 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -93,6 +93,7 @@ struct stmmac_priv {
u32 tx_count_frames;
u32 tx_coal_frames;
u32 tx_coal_timer;
+ u32 rx_riwt;
};
extern int phyaddr;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index 48ad0bc..1428489 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -522,6 +522,78 @@ static int stmmac_ethtool_op_set_eee(struct net_device *dev,
return phy_ethtool_set_eee(priv->phydev, edata);
}
+static u32 stmmac_usec2riwt(u32 usec, struct stmmac_priv *priv)
+{
+ unsigned long clk = clk_get_rate(priv->stmmac_clk);
+
+ if (!clk)
+ return 0;
+
+ return (usec * (clk / 1000000)) / 256;
+}
+
+static u32 stmmac_riwt2usec(u32 riwt, struct stmmac_priv *priv)
+{
+ unsigned long clk = clk_get_rate(priv->stmmac_clk);
+
+ if (!clk)
+ return 0;
+
+ return (riwt * 256) / (clk / 1000000);
+}
+
+static int stmmac_get_coalesce(struct net_device *dev,
+ struct ethtool_coalesce *ec)
+{
+ struct stmmac_priv *priv = netdev_priv(dev);
+
+ ec->tx_coalesce_usecs = priv->tx_coal_timer;
+ ec->tx_max_coalesced_frames = priv->tx_coal_frames;
+
+ if (!priv->napi_mode)
+ ec->rx_coalesce_usecs = stmmac_riwt2usec(priv->rx_riwt, priv);
+
+ return 0;
+}
+
+static int stmmac_set_coalesce(struct net_device *dev,
+ struct ethtool_coalesce *ec)
+{
+ struct stmmac_priv *priv = netdev_priv(dev);
+ unsigned int max_tx_coal_frames;
+ unsigned int rx_riwt;
+
+ /* No rx interrupts will be generated if both are zero */
+ if (ec->rx_coalesce_usecs == 0)
+ return -EINVAL;
+
+ /* No tx interrupts will be generated if both are zero */
+ if ((ec->tx_coalesce_usecs == 0) &&
+ (ec->tx_max_coalesced_frames == 0))
+ return -EINVAL;
+
+ max_tx_coal_frames = STMMAC_TX_MAX_FRAMES(priv->dma_tx_size);
+
+ if ((ec->tx_coalesce_usecs > STMMAC_COAL_TX_TIMER) ||
+ (ec->tx_max_coalesced_frames > max_tx_coal_frames))
+ return -EINVAL;
+
+ rx_riwt = stmmac_usec2riwt(ec->rx_coalesce_usecs, priv);
+
+ if ((rx_riwt > MAX_DMA_RIWT) || (rx_riwt < MIN_DMA_RIWT))
+ return -EINVAL;
+ else if (priv->napi_mode)
+ return -EOPNOTSUPP;
+
+ /* Only copy relevant parameters, ignore all others. */
+ priv->tx_coal_frames = ec->tx_max_coalesced_frames;
+ priv->tx_coal_timer = ec->tx_coalesce_usecs;
+ priv->rx_riwt = rx_riwt;
+ priv->hw->dma->rx_watchdog(priv->ioaddr, priv->rx_riwt);
+
+ return 0;
+}
+
static const struct ethtool_ops stmmac_ethtool_ops = {
.begin = stmmac_check_if_running,
.get_drvinfo = stmmac_ethtool_getdrvinfo,
@@ -542,6 +614,8 @@ static const struct ethtool_ops stmmac_ethtool_ops = {
.set_eee = stmmac_ethtool_op_set_eee,
.get_sset_count = stmmac_get_sset_count,
.get_ts_info = ethtool_op_get_ts_info,
+ .get_coalesce = stmmac_get_coalesce,
+ .set_coalesce = stmmac_set_coalesce,
};
void stmmac_set_ethtool_ops(struct net_device *netdev)
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 55bb3c9..cea4196 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -77,8 +77,6 @@
#define STMMAC_ALIGN(x) L1_CACHE_ALIGN(x)
#define JUMBO_LEN 9000
-#define STMMAC_TX_TM 40000
-#define STMMAC_TX_MAX_FRAMES 64 /* Max coalesced frame */
/* Module parameters */
#define TX_TIMEO 5000 /* default 5 seconds */
@@ -140,6 +138,8 @@ static int stmmac_init_fs(struct net_device *dev);
static void stmmac_exit_fs(void);
#endif
+#define STMMAC_COAL_TIMER(x) (jiffies + usecs_to_jiffies(x))
+
/**
* stmmac_verify_args - verify the driver parameters.
* Description: it verifies if some wrong parameter is passed to the driver.
@@ -999,34 +999,16 @@ static int stmmac_init_dma_engine(struct stmmac_priv *priv)
priv->dma_rx_phy);
}
-static int stmmac_check_coal(int size, int max_coal_frames)
-{
- int ret = 0;
-
- if (max_coal_frames >= size)
- return ret;
-
- return max_coal_frames;
-}
-
-static int stmmac_init_tx_coalesce(struct stmmac_priv *priv)
+static void stmmac_init_tx_coalesce(struct stmmac_priv *priv)
{
- int ret = -EOPNOTSUPP;
-
- priv->tx_coal_frames = stmmac_check_coal(priv->dma_tx_size,
- STMMAC_TX_MAX_FRAMES);
- if (priv->tx_coal_frames) {
- /* Set Tx coalesce parameters and timers */
- priv->tx_coal_timer = jiffies + usecs_to_jiffies(STMMAC_TX_TM);
- init_timer(&priv->txtimer);
- priv->txtimer.expires = priv->tx_coal_timer;
- priv->txtimer.data = (unsigned long)priv;
- priv->txtimer.function = stmmac_txtimer;
-
- ret = 0;
- }
-
- return ret;
+ /* Set Tx coalesce parameters and timers */
+ priv->tx_coal_frames = STMMAC_TX_MAX_FRAMES(priv->dma_tx_size);
+ priv->tx_coal_timer = STMMAC_COAL_TX_TIMER;
+ init_timer(&priv->txtimer);
+ priv->txtimer.expires = STMMAC_COAL_TIMER(priv->tx_coal_timer);
+ priv->txtimer.data = (unsigned long)priv;
+ priv->txtimer.function = stmmac_txtimer;
+ add_timer(&priv->txtimer);
}
/**
@@ -1141,20 +1123,17 @@ static int stmmac_open(struct net_device *dev)
priv->tx_lpi_timer = STMMAC_DEFAULT_TWT_LS_TIMER;
priv->eee_enabled = stmmac_eee_init(priv);
- ret = stmmac_init_tx_coalesce(priv);
- if (!ret)
- add_timer(&priv->txtimer);
+ stmmac_init_tx_coalesce(priv);
/* Enable NAPI on chip older than the 3.50 where the Rx watchdog
* is not supported.
*/
if (priv->napi_mode)
napi_enable(&priv->napi);
- else if (priv->hw->dma->rx_watchdog)
- /* Program RX Watchdog register to the default values
- * FIXME: provide user value for RIWT
- */
- priv->hw->dma->rx_watchdog(priv->ioaddr, DEFAULT_DMA_RIWT);
+ else if (priv->hw->dma->rx_watchdog) {
+ priv->rx_riwt = MAX_DMA_RIWT;
+ priv->hw->dma->rx_watchdog(priv->ioaddr, MAX_DMA_RIWT);
+ }
skb_queue_head_init(&priv->rx_recycle);
netif_start_queue(dev);
@@ -1318,7 +1297,8 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
if (priv->tx_coal_frames > priv->tx_count_frames) {
priv->hw->desc->clear_tx_ic(desc);
priv->xstats.tx_reset_ic_bit++;
- mod_timer(&priv->txtimer, priv->tx_coal_timer);
+ mod_timer(&priv->txtimer,
+ STMMAC_COAL_TIMER(priv->tx_coal_timer));
} else
priv->tx_count_frames = 0;
--
1.7.4.4
^ permalink raw reply related
* [net-next.git 6/7] stmmac: update the doc with new IRQ mitigation
From: Giuseppe CAVALLARO @ 2012-09-05 15:03 UTC (permalink / raw)
To: netdev; +Cc: bhutchings, davem, Giuseppe Cavallaro
In-Reply-To: <1346857432-24657-1-git-send-email-peppe.cavallaro@st.com>
This patch updates the stmmac.txt adding some information
about the new rx/tx mitigation schema adopted in the driver.
Signed-off-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
---
Documentation/networking/stmmac.txt | 28 +++++++++++++++-------------
1 files changed, 15 insertions(+), 13 deletions(-)
diff --git a/Documentation/networking/stmmac.txt b/Documentation/networking/stmmac.txt
index ef9ee71..f9fa6db 100644
--- a/Documentation/networking/stmmac.txt
+++ b/Documentation/networking/stmmac.txt
@@ -29,11 +29,9 @@ The kernel configuration option is STMMAC_ETH:
dma_txsize: DMA tx ring size;
buf_sz: DMA buffer size;
tc: control the HW FIFO threshold;
- tx_coe: Enable/Disable Tx Checksum Offload engine;
watchdog: transmit timeout (in milliseconds);
flow_ctrl: Flow control ability [on/off];
pause: Flow Control Pause Time;
- tmrate: timer period (only if timer optimisation is configured).
3) Command line options
Driver parameters can be also passed in command line by using:
@@ -60,17 +58,19 @@ Then the poll method will be scheduled at some future point.
The incoming packets are stored, by the DMA, in a list of pre-allocated socket
buffers in order to avoid the memcpy (Zero-copy).
-4.3) Timer-Driver Interrupt
-Instead of having the device that asynchronously notifies the frame receptions,
-the driver configures a timer to generate an interrupt at regular intervals.
-Based on the granularity of the timer, the frames that are received by the
-device will experience different levels of latency. Some NICs have dedicated
-timer device to perform this task. STMMAC can use either the RTC device or the
-TMU channel 2 on STLinux platforms.
-The timers frequency can be passed to the driver as parameter; when change it,
-take care of both hardware capability and network stability/performance impact.
-Several performance tests on STM platforms showed this optimisation allows to
-spare the CPU while having the maximum throughput.
+4.3) Interrupt Mitigation
+The driver is able to mitigate the number of its DMA interrupts
+using NAPI for the reception on chips older than the 3.50.
+New chips have an HW RX-Watchdog used for this mitigation.
+
+On Tx-side, the mitigation schema is based on a SW timer that calls the
+tx function (stmmac_tx) to reclaim the resource after transmitting the
+frames.
+Also there is another parameter (like a threshold) used to program
+the descriptors avoiding to set the interrupt on completion bit in
+when the frame is sent (xmit).
+
+Mitigation parameters can be tuned by ethtool.
4.4) WOL
Wake up on Lan feature through Magic and Unicast frames are supported for the
@@ -121,6 +121,7 @@ struct plat_stmmacenet_data {
int bugged_jumbo;
int pmt;
int force_sf_dma_mode;
+ int riwt_off;
void (*fix_mac_speed)(void *priv, unsigned int speed);
void (*bus_setup)(void __iomem *ioaddr);
int (*init)(struct platform_device *pdev);
@@ -156,6 +157,7 @@ Where:
o pmt: core has the embedded power module (optional).
o force_sf_dma_mode: force DMA to use the Store and Forward mode
instead of the Threshold.
+ o riwt_off: force to disable the RX watchdog feature and switch to NAPI mode.
o fix_mac_speed: this callback is used for modifying some syscfg registers
(on ST SoCs) according to the link speed negotiated by the
physical layer .
--
1.7.4.4
^ permalink raw reply related
* [net-next.git 7/7] stmmac: update the driver version to Sept_2012
From: Giuseppe CAVALLARO @ 2012-09-05 15:03 UTC (permalink / raw)
To: netdev; +Cc: bhutchings, davem, Giuseppe Cavallaro
In-Reply-To: <1346857432-24657-1-git-send-email-peppe.cavallaro@st.com>
Many new feauture have been introduced in the driver:
sysFS, Rx HW watchdog... so this patch updates the
driver's version.
Signed-off-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
---
drivers/net/ethernet/stmicro/stmmac/stmmac.h | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index 577dc13..eda34c4 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -24,7 +24,7 @@
#define __STMMAC_H__
#define STMMAC_RESOURCE_NAME "stmmaceth"
-#define DRV_MODULE_VERSION "March_2012"
+#define DRV_MODULE_VERSION "Sept_2012"
#include <linux/clk.h>
#include <linux/stmmac.h>
--
1.7.4.4
^ permalink raw reply related
* [net-next.git 0/7 (V2)] stmmac: remove dead code for STMMAC_TIMER and add new mitigation schema.
From: Giuseppe CAVALLARO @ 2012-09-05 15:03 UTC (permalink / raw)
To: netdev; +Cc: bhutchings, davem, Giuseppe Cavallaro
These patch series remove the STMMAC_TIMER option no longer updated
and never used and add a new mitigation schema.
Having removed the Timer opt, this has made the driver slim.
On top of this work, it has been easier to introduce the new
mitigation schema based on HW RX-watchdog (available in new cores).
In fact, 3.50 and newer cores have an HW RX-Watchdog that can be used for
mitigating the Rx-interrupts and first results look promising.
Running n-u-t-t-c-p with the following parameters:
Throughput: 500Mbps
UDP Buffer size: 1328bytes
TCP Buffer size: 65536bytes
for example, I got on ST box (arm-based) these improvements:
--------------------------------------------------------------------
Original | With New Mitigation patch
--------------------------------------------------------------------
Test CPU usage pkt/loss | CPU usage pkt/loss
Type Mbps % % |Mbps % %
--------------------------------------------------------------------
UDP-RX 395.5065 95 20.89 |499.9966 25 0.00%
UDP-TX 499.5578 100 0.08915 |499.7156 100 0.06029%
TCP-RX 499.9221 77 |499.8648 41
TCP-TX 389.5719 99 |499.2802 79
--------------------------------------------------------------------
... no regression on ST boxes (SH based) I always test.
This is a brief explanation of the new mitigation schema although there
is a patch that updates the driver's documentation.
o On Rx-side I have:
New GMACs will use the RX-watchdog timer; old ones will continue to
use NAPI to mitigate the RX DMA interrupts.
For the RX-watchdog, there is a parameter that is the RI Watchdog
Timer count. It indicates the number of system clock cycles and can be
set via *ethtool*.
o On Tx-side, the mitigation schema is based on a SW timer
that calls the tx function (stmmac_tx) to reclaim the resource after
transmitting the frames.
Also there is another parameter (a threshold) used to program
the descriptors avoiding to set the interrupt on completion bit in
when the frame is sent (xmit). This means that the stmmac_tx can be
called by the ISR too. Also this parameter can be tuned via ethtool.
V2: the new version of the patches add the ethtool support to get/set
coalesce parameters and totally removed the sysFS support.
Giuseppe Cavallaro (7):
stmmac: remove dead code for TIMER
stmmac: manage tx clean out of rx_poll
stmmac: add the initial tx coalesce schema
stmmac: add Rx watchdog optimization to mitigate the DMA irqs
stmmac: get/set coalesce parameters via ethtool
stmmac: update the doc with new IRQ mitigation
stmmac: update the driver version to Sept_2012
Documentation/networking/stmmac.txt | 28 ++-
drivers/net/ethernet/stmicro/stmmac/Kconfig | 25 --
drivers/net/ethernet/stmicro/stmmac/Makefile | 1 -
drivers/net/ethernet/stmicro/stmmac/common.h | 30 ++-
drivers/net/ethernet/stmicro/stmmac/dwmac1000.h | 3 -
.../net/ethernet/stmicro/stmmac/dwmac1000_dma.c | 6 +
drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h | 3 +-
drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c | 7 +-
drivers/net/ethernet/stmicro/stmmac/stmmac.h | 14 +-
.../net/ethernet/stmicro/stmmac/stmmac_ethtool.c | 83 +++++++-
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 245 ++++++++------------
drivers/net/ethernet/stmicro/stmmac/stmmac_timer.c | 134 -----------
drivers/net/ethernet/stmicro/stmmac/stmmac_timer.h | 46 ----
include/linux/stmmac.h | 1 +
14 files changed, 239 insertions(+), 387 deletions(-)
delete mode 100644 drivers/net/ethernet/stmicro/stmmac/stmmac_timer.c
delete mode 100644 drivers/net/ethernet/stmicro/stmmac/stmmac_timer.h
--
1.7.4.4
^ permalink raw reply
* Re: [PATCH 1/4] net: mvneta: driver for Marvell Armada 370/XP network unit
From: Florian Fainelli @ 2012-09-05 15:25 UTC (permalink / raw)
To: linux-arm-kernel
Cc: Lior Amsalem, Andrew Lunn, Ike Pan, Albert Stone, Nadav Haklai,
Ian Molton, Lennert Buytenhek, David Marlin, Rami Rosen,
Yehuda Yitschak, Jani Monoses, Tawfik Bayouk, Dan Frazier,
Eran Ben-Avi, Li Li, Leif Lindholm, Sebastian Hesselbarth,
Jason Cooper, Arnd Bergmann, Jon Masters, Ben Dooks,
Gregory Clement, Thomas Petazzoni
In-Reply-To: <1346764004-16332-2-git-send-email-thomas.petazzoni@free-electrons.com>
Hello Thomas,
The overall driver looks very nice, my biggest concern with this driver being
that it does not implement phylib and therefore reimplements a bit of existing
code. I am not commentin on how to represent this MDIO/PHY devices using
Device Tree since this has been addressed already.
Once you register a MDIO bus for your interface, please make sure that you
give it a unique name in the system (<pdev->name>-<pdev->id>).
Other comments inline.
On Tuesday 04 September 2012 15:06:41 Thomas Petazzoni wrote:
[snip]
> +
> +/* Increment txq get counter */
> +static void mvneta_inc_get(struct mvneta_tx_queue *txq)
> +{
> + txq->txq_get_index++;
> + if (txq->txq_get_index == txq->size)
> + txq->txq_get_index = 0;
> +}
> +
> +/* Increment txq put counter */
> +static void mvneta_inc_put(struct mvneta_tx_queue *txq)
> +{
> + txq->txq_put_index++;
> + if (txq->txq_put_index == txq->size)
> + txq->txq_put_index = 0;
> +}
I would make it clear that these helpers operate on the txq, and suffix it with
_txq.
> +
> +
> +/* Clear all MIB counters */
> +static void mvneta_mib_counters_clear(struct mvneta_port *pp)
> +{
> + int i;
> + u32 dummy;
> +
> + /* Perform dummy reads from MIB counters */
> + for (i = 0; i < MVNETA_MIB_LATE_COLLISION; i += 4)
> + dummy = mvreg_read(pp, (MVNETA_MIB_COUNTERS_BASE + i));
> +}
> +
> +/* Read speed, duplex, and flow control from port status register */
> +static int mvneta_link_status(struct mvneta_port *pp,
> + struct mvneta_lnk_status *status)
> +{
> + u32 val;
> +
> + val = mvreg_read(pp, MVNETA_GMAC_STATUS);
> +
> + if (val & MVNETA_GMAC_SPEED_1000_MASK)
> + status->speed = MVNETA_SPEED_1000;
> + else if (val & MVNETA_GMAC_SPEED_100_MASK)
> + status->speed = MVNETA_SPEED_100;
> + else
> + status->speed = MVNETA_SPEED_10;
> +
> + if (val & MVNETA_GMAC_LINK_UP_MASK)
> + status->linkup = 1;
> + else
> + status->linkup = 0;
> +
> + if (val & MVNETA_GMAC_FULL_DUPLEX_MASK)
> + status->duplex = MVNETA_DUPLEX_FULL;
> + else
> + status->duplex = MVNETA_DUPLEX_HALF;
> +
> + if (val & MVNETA_GMAC_TX_FLOW_CTRL_ACTIVE_MASK)
> + status->tx_fc = MVNETA_FC_ACTIVE;
> + else if (val & MVNETA_GMAC_TX_FLOW_CTRL_ENABLE_MASK)
> + status->tx_fc = MVNETA_FC_ENABLE;
> + else
> + status->tx_fc = MVNETA_FC_DISABLE;
> +
> + if (val & MVNETA_GMAC_RX_FLOW_CTRL_ACTIVE_MASK)
> + status->rx_fc = MVNETA_FC_ACTIVE;
> + else if (val & MVNETA_GMAC_RX_FLOW_CTRL_ENABLE_MASK)
> + status->rx_fc = MVNETA_FC_ENABLE;
> + else
> + status->rx_fc = MVNETA_FC_DISABLE;
> +
I would rather see you use a struct phy_device and update its properties
instead of keeping a local copy of it. This would allow you to have consistent
reporting through ethtool, I have more comments on this later on as well.
> +static void mvneta_rxq_non_occup_desc_add(struct mvneta_port *pp,
> + struct mvneta_rx_queue *rxq,
> + int rx_desc)
> +{
> + u32 val;
> +
> + /* Only 255 descriptors can be added at once */
> + while (rx_desc > 0xff) {
> + val = (0xff << MVNETA_RXQ_ADD_NON_OCCUPIED_OFFS);
> + mvreg_write(pp, MVNETA_RXQ_STATUS_UPDATE_REG(rxq->id), val);
> + rx_desc = rx_desc - 0xff;
> + }
You could probably use a define here for 255/0xff.
[snip]
> + m_delay = 0;
This does not look like an useful name, count would be better
> + do {
> + if (m_delay >= MVNETA_RX_DISABLE_TIMEOUT_MSEC) {
> + netdev_info(pp->dev,
> + "TIMEOUT for RX stopped ! rx_queue_cmd: 0x08%x\n",
> + val);
Please use a different logging level such as netdev_err() or netdev_warn() for
instance.
> + break;
> + }
> + mdelay(1);
> + m_delay++;
What about using msleep() instead here?
> +
> + val = mvreg_read(pp, MVNETA_RXQ_CMD);
> + } while (val & 0xff);
> +
> + /* Stop Tx port activity. Check port Tx activity. Issue stop
> + command for active channels only */
> + val = (mvreg_read(pp, MVNETA_TXQ_CMD)) & MVNETA_TXQ_ENABLE_MASK;
> +
> + if (val != 0)
> + mvreg_write(pp, MVNETA_TXQ_CMD,
> + (val << MVNETA_TXQ_DISABLE_OFFS));
> +
> + /* Wait for all Tx activity to terminate. */
> + m_delay = 0;
> + do {
> + if (m_delay >= MVNETA_TX_DISABLE_TIMEOUT_MSEC) {
> + netdev_info(pp->dev,
> + "TIMEOUT for TX stopped tx_queue_cmd - 0x%08x\n",
> + val);
> + break;
> + }
> + mdelay(1);
> + m_delay++;
> +
> + /* Check TX Command reg that all Txqs are stopped */
> + val = mvreg_read(pp, MVNETA_TXQ_CMD);
Ditto
> +
> + } while (val & 0xff);
> + tx_fifo_empty_mask |= MVNETA_TX_FIFO_EMPTY_MASK;
> + tx_in_prog_mask |= MVNETA_TX_IN_PRGRS_MASK;
> +
> + /* Double check to verify that TX FIFO is empty */
> + m_delay = 0;
> + while (1) {
> + do {
> + if (m_delay >= MVNETA_TX_FIFO_EMPTY_TIMEOUT) {
> + netdev_info(pp->dev,
> + "TX FIFO empty timeout status=0x08%x,
empty=%x, in_prog=%x",
> + val, tx_fifo_empty_mask,
> + tx_in_prog_mask);
> + break;
> + }
> + mdelay(1);
> + m_delay++;
Ditto
> +
> + val = mvreg_read(pp, MVNETA_PORT_STATUS);
> + } while (((val & tx_fifo_empty_mask) != tx_fifo_empty_mask)
> + || ((val & tx_in_prog_mask) != 0));
> +
> + if (m_delay >= MVNETA_TX_FIFO_EMPTY_TIMEOUT)
> + break;
> +
> + val = mvreg_read(pp, MVNETA_PORT_STATUS);
> + if (((val & tx_fifo_empty_mask) == tx_fifo_empty_mask) &&
> + ((val & tx_in_prog_mask) == 0))
> + break;
> + else
> + netdev_info(pp->dev, "TX FIFO Empty double check failed. %d
msec status=0x%x, empty=0x%x, in_prog=0x%x\n",
> + m_delay, val, tx_fifo_empty_mask,
> + tx_in_prog_mask);
> + }
> +
> + udelay(200);
> +}
[snip]
> +
> +/* This method sets defaults to the NETA port:
> + * Clears interrupt Cause and Mask registers.
> + * Clears all MAC tables.
> + * Sets defaults to all registers.
> + * Resets RX and TX descriptor rings.
> + * Resets PHY.
> + * This method can be called after mvneta_port_down() to return the port
> + * settings to defaults.
> + */
Please use standard kernel-doc style comments.
[snip]
> +/* Read the Link Up bit (LinkUp) in port MAC control register */
> +static int mvneta_link_is_up(struct mvneta_port *pp)
> +{
> + u32 val;
> + val = mvreg_read(pp, MVNETA_GMAC_STATUS);
> + if (val & MVNETA_GMAC_LINK_UP_MASK)
> + return 1;
return mvreg_read(pp, MVNETA_GMAC_STATUS) & MVNETA_GMAC_LINK_UP_MASK;
and make it static inline.
> +
> + return 0;
> +}
> +
> +/* Get phy address */
> +static int mvneta_phy_addr_get(struct mvneta_port *pp)
> +{
> + unsigned int val;
> +
> + val = mvreg_read(pp, MVNETA_PHY_ADDR);
> + val &= 0x1f;
Use PHY_MAX_ADDR - 1 instead here.
> + return val;
> +}
> +
[snip]
> +/* Display status (link, duplex, speed) of the port */
> +void mvneta_link_status_print(struct mvneta_port *pp)
> +{
> + struct mvneta_lnk_status link;
> + char *speedstr, *duplexstr;
> +
> + mvneta_link_status(pp, &link);
> +
> + if (link.linkup) {
> + if (link.speed == MVNETA_SPEED_1000)
> + speedstr = "1 Gbps";
> + else if (link.speed == MVNETA_SPEED_100)
> + speedstr = "100 Mbps";
> + else
> + speedstr = "10 Mbps";
> +
> + if (link.duplex == MVNETA_DUPLEX_FULL)
> + duplexstr = "full";
> + else
> + duplexstr = "half";
> +
> + netdev_info(pp->dev,
> + "link up, %s duplex, speed %s\n",
> + duplexstr, speedstr);
> + } else
> + netdev_info(pp->dev, "link down\n");
> +}
You should rather define a phylib adjust_link callback to do this. Otherwise
please reduce the indentation by handling the case when the link is down first.
> +
> +/* Display more error info */
> +static void mvneta_rx_error(struct mvneta_port *pp,
> + struct mvneta_rx_desc *rx_desc)
> +{
> + u32 status = rx_desc->status;
> +
> + if (pp->dev)
> + pp->dev->stats.rx_errors++;
Please do this outside of this function and just let it print the error.
> +
> + if ((status & MVNETA_RXD_FIRST_LAST_DESC_MASK)
> + != MVNETA_RXD_FIRST_LAST_DESC_MASK) {
> + netdev_err(pp->dev,
> + "bad rx status %08x (buffer oversize), size=%d\n",
> + rx_desc->status, rx_desc->data_size);
> + return;
> + }
[snip]
> +
> +/* Refill processing */
> +static int mvneta_rx_refill(struct mvneta_port *pp,
> + struct mvneta_rx_desc *rx_desc)
> +
> +{
> + unsigned long phys_addr;
> + struct sk_buff *skb;
> +
> + skb = netdev_alloc_skb(pp->dev, pp->pkt_size);
> + if (!skb) {
> + mvneta_add_cleanup_timer(pp);
> + return 1;
> + }
> +
> + phys_addr = dma_map_single(pp->dev->dev.parent, skb->head,
> + MVNETA_RX_BUF_SIZE(pp->pkt_size),
> + DMA_FROM_DEVICE);
Check that your phys_addr cookie has been successfully mapped using
dma_mapping_error().
[snip]
> +/* Main tx processing */
> +static int mvneta_tx(struct sk_buff *skb, struct net_device *dev)
> +{
> + struct mvneta_port *pp = netdev_priv(dev);
> +
> + int frags = 0;
> + int res = NETDEV_TX_OK;
> + u32 tx_cmd;
> + struct mvneta_tx_queue *txq = NULL;
> + struct mvneta_tx_desc *tx_desc;
> +
> + if (!test_bit(MVNETA_F_STARTED_BIT, &pp->flags))
> + goto out;
Is not this equivalent to !netif_running(dev)? At least print some message so
we know that this is not supposed to happen.
> +
> + txq = &pp->txqs[mvneta_txq_def];
> +
> + frags = skb_shinfo(skb)->nr_frags + 1;
> +
> + tx_desc = mvneta_tx_desc_get(pp, txq, frags);
> + if (tx_desc == NULL) {
> + frags = 0;
> + dev->stats.tx_dropped++;
> + res = NETDEV_TX_BUSY;
> + goto out;
> + }
> +
> + tx_cmd = mvneta_skb_tx_csum(pp, skb);
> +
> + tx_desc->data_size = skb_headlen(skb);
> +
> + tx_desc->buf_phys_addr = dma_map_single(dev->dev.parent, skb->data,
> + tx_desc->data_size,
> + DMA_TO_DEVICE);
Please check this dma_map_single() return value too.
[snip]
> +static int mvneta_addr_crc(unsigned char *addr)
> +{
> + int crc = 0;
> + int i;
> +
> + for (i = 0; i < 6; i++) {
ETH_ALEN instead of 6 to make it clear it operates on addresses.
> + int j;
> +
> + crc = (crc ^ addr[i]) << 8;
> + for (j = 7; j >= 0; j--) {
> + if (crc & (0x100 << j))
> + crc ^= 0x107 << j;
> + }
> + }
> +
> + return crc;
> +}
[snip]
> +
> +/* Interrupt handling - the callback for request_irq() */
> +static irqreturn_t mvneta_isr(int irq, void *dev_id)
> +{
> + struct mvneta_port *pp = (struct mvneta_port *)dev_id;
> +
> + /* Mask all interrupts */
> + mvreg_write(pp, MVNETA_INTR_NEW_MASK, 0);
> +
> + /* Verify that the device not already on the polling list */
> + if (napi_schedule_prep(&pp->napi))
> + __napi_schedule(&pp->napi);
Does not the hardware generate interrupts for tx completion, PHY interrupts
etc ...?
> +
> + return IRQ_HANDLED;
> +}
> +
> +/* Handle link event */
> +static void mvneta_link_event(struct mvneta_port *pp)
> +{
> + struct net_device *dev = pp->dev;
> +
> + /* Check Link status on ethernet port */
> +
> + if (mvneta_link_is_up(pp)) {
> + mvneta_port_up(pp);
> + set_bit(MVNETA_F_LINK_UP_BIT, &pp->flags);
> +
> + if (dev) {
> + netif_carrier_on(dev);
> + netif_tx_wake_all_queues(dev);
> + }
> + } else {
> + if (dev) {
> + netif_carrier_off(dev);
> + netif_tx_stop_all_queues(dev);
> + }
> + mvneta_port_down(pp);
> + clear_bit(MVNETA_F_LINK_UP_BIT, &pp->flags);
> + }
> +
> + mvneta_link_status_print(pp);
Again, this is taken care of by phylib nicely, and does not require you to
have this F_LINK_UP_BIT.
[snip]
> +
> +/* Handle rxq fill: allocates rxq skbs; called when initializing a port */
> +static int mvneta_rxq_fill(struct mvneta_port *pp, struct mvneta_rx_queue
*rxq,
> + int num)
> +{
> + int i;
> + struct sk_buff *skb;
> + struct mvneta_rx_desc *rx_desc;
> + unsigned long phys_addr;
> + struct net_device *dev = pp->dev;
> +
> + for (i = 0; i < num; i++) {
> + skb = dev_alloc_skb(pp->pkt_size);
> + if (!skb) {
> + netdev_err(pp->dev, "%s:rxq %d, %d of %d buffs filled\n",
> + __func__, rxq->id, i, num);
> + break;
> + }
> +
> + rx_desc = rxq->descs + i;
> + memset(rx_desc, 0, sizeof(struct mvneta_rx_desc));
> + phys_addr = dma_map_single(dev->dev.parent, skb->head,
> + MVNETA_RX_BUF_SIZE(pp->pkt_size),
> + DMA_FROM_DEVICE);
Here again, check phys_addr.
> + mvneta_rx_desc_fill(rx_desc, phys_addr, (u32)skb);
> + }
> +
> + /* add this num of RX descriptors as non occupied (ready to get pkts) */
> + mvneta_rxq_non_occup_desc_add(pp, rxq, i);
> +
> + return i;
> +}
> +
[snip]
> +
> +/* Create a specified RX queue */
> +static int mvneta_rxq_init(struct mvneta_port *pp,
> + struct mvneta_rx_queue *rxq)
> +
> +{
> + rxq->size = pp->rx_ring_size;
> +
> + /* Allocate DMA descriptors array */
> + rxq->descs_orig = dma_alloc_coherent(pp->dev->dev.parent,
> + MVNETA_RX_TOTAL_DESCS_SIZE(rxq),
> + &rxq->descs_phys_orig,
> + GFP_KERNEL);
> + if (rxq->descs_orig == NULL) {
Use dma_mapping_error() instead.
> + netdev_err(pp->dev, "rxQ=%d: Can't allocate %d bytes for %d RX
descr\n",
> + rxq->id, MVNETA_RX_TOTAL_DESCS_SIZE(rxq), rxq->size);
> + return -ENOMEM;
> + }
> +
> + /* Make sure descriptor address is cache line size aligned */
> + rxq->descs = PTR_ALIGN(rxq->descs_orig, MVNETA_CPU_D_CACHE_LINE_SIZE);
> + rxq->descs_phys = ALIGN(rxq->descs_phys_orig,
> + MVNETA_CPU_D_CACHE_LINE_SIZE);
> +
> + rxq->last_desc = rxq->size - 1;
Don't you need some kind of barrier here? I do not know exactly how coherent
your peripherals and memory are, just wondering.
> +
> + /* Set Rx descriptors queue starting address */
> + mvreg_write(pp, MVNETA_RXQ_BASE_ADDR_REG(rxq->id), rxq->descs_phys);
> + mvreg_write(pp, MVNETA_RXQ_SIZE_REG(rxq->id), rxq->size);
> +
> + /* Set Offset */
> + mvneta_rxq_offset_set(pp, rxq, NET_SKB_PAD);
> +
> + /* Set coalescing pkts and time */
> + mvneta_rx_pkts_coal_set(pp, rxq, rxq->pkts_coal);
> + mvneta_rx_time_coal_set(pp, rxq, rxq->time_coal);
> +
> + /* Fill RXQ with buffers from RX pool */
> + mvneta_rxq_buf_size_set(pp, rxq, MVNETA_RX_BUF_SIZE(pp->pkt_size));
> + mvneta_rxq_bm_disable(pp, rxq);
> + mvneta_rxq_fill(pp, rxq, rxq->size);
> +
> + return 0;
> +}
> +
[snip]
> +static int mvneta_txq_init(struct mvneta_port *pp,
> + struct mvneta_tx_queue *txq)
> +{
> + txq->size = pp->tx_ring_size;
> +
> + /* Allocate DMA descriptors array */
> + txq->descs_orig = dma_alloc_coherent(pp->dev->dev.parent,
> + MVNETA_TX_TOTAL_DESCS_SIZE(txq),
> + &txq->descs_phys_orig,
> + GFP_KERNEL);
> + if (txq->descs_orig == NULL) {
Use dma_mapping_error().
> + netdev_err(pp->dev, "txQ=%d: Can't allocate %d bytes for %d TX
descr\n",
> + txq->id, MVNETA_TX_TOTAL_DESCS_SIZE(txq), txq->size);
> + return -ENOMEM;
> + }
> +
[snip]
> +/* Fill rx buffers, start Rx/Tx activity, set coalesing,
> +* clear and unmask interrupt bits
> +*/
> +static int mvneta_start_internals(struct mvneta_port *pp, int mtu)
> +{
> + int err = 0;
> +
> + pp->pkt_size = MVNETA_RX_PKT_SIZE(mtu);
> + if (test_bit(MVNETA_F_STARTED_BIT, &pp->flags))
> + return -EINVAL;
You probably mean -EBUSY here instead?
> +
> + if (mvneta_max_rx_size_set(pp, MVNETA_RX_PKT_SIZE(mtu))) {
> + netdev_err(pp->dev,
> + "%s: can't set maxRxSize=%d mtu=%d\n",
> + __func__, MVNETA_RX_PKT_SIZE(mtu), mtu);
> + return -EINVAL;
> + }
> +
> + err = mvneta_setup_rxqs(pp);
> + if (unlikely(err))
> + return err;
> +
> + err = mvneta_setup_txqs(pp);
> + if (unlikely(err)) {
> + mvneta_cleanup_rxqs(pp);
> + return err;
> + }
> +
> + mvneta_txq_max_tx_size_set(pp, MVNETA_RX_PKT_SIZE(mtu));
> +
> + /* start the Rx/Tx activity */
> + mvneta_port_enable(pp);
> +
> + set_bit(MVNETA_F_LINK_UP_BIT, &pp->flags);
> + set_bit(MVNETA_F_STARTED_BIT, &pp->flags);
> +
> + return 0;
> +}
> +
> +/* Stop port Rx/Tx activity, free skb's from Rx/Tx rings */
> +static int mvneta_stop_internals(struct mvneta_port *pp)
> +{
> + clear_bit(MVNETA_F_STARTED_BIT, &pp->flags);
> +
> + /* Stop the port activity */
> + mvneta_port_disable(pp);
> +
> + /* Clear all ethernet port interrupts */
> + mvreg_write(pp, MVNETA_INTR_MISC_CAUSE, 0);
> + mvreg_write(pp, MVNETA_INTR_OLD_CAUSE, 0);
> +
> + /* Mask all interrupts */
> + mvneta_interrupts_mask(pp);
> + smp_call_function_many(cpu_online_mask, mvneta_interrupts_mask,
> + pp, 1);
> +
> + /* Reset TX port here. */
> + mvneta_tx_reset(pp);
> +
> + mvneta_cleanup_rxqs(pp);
> + mvneta_cleanup_txqs(pp);
> +
> + return 0;
> +
> +}
> +
> +/* Start the port, connect to port interrupt line, unmask interrupts */
> +static int mvneta_start(struct net_device *dev)
> +{
> + struct mvneta_port *pp = netdev_priv(dev);
> +
> + /* In default link is down */
> + netif_carrier_off(dev);
> + netif_tx_stop_all_queues(dev);
> +
> + /* Fill rx buffers, start Rx/Tx activity, set coalescing */
> + if (mvneta_start_internals(pp, dev->mtu) != 0) {
> + netdev_err(dev, "start internals failed\n");
> + return -ENODEV;
> + }
> +
> + /* Enable polling on the port, must be used after netif_poll_disable */
> + napi_enable(&pp->napi);
> +
> + if (pp->flags & MVNETA_F_LINK_UP) {
> + netif_carrier_on(dev);
> + netif_tx_wake_all_queues(dev);
> + } else {
> + netdev_info(dev, "%s: NOT MVNETA_F_LINK_UP\n", __func__);
> + }
Remove this message as well.
> +
> + /* Connect to port interrupt line */
> + if (request_irq(dev->irq, mvneta_isr, (IRQF_DISABLED), "mv_eth", pp)) {
> + netdev_err(dev, "cannot request irq %d\n", dev->irq);
> + napi_disable(&pp->napi);
> + goto error;
> + }
You should probably request the interrupt prior to calling napi_enable()
> +
> + /* Unmask interrupts */
> + mvneta_interrupts_unmask(pp);
> + smp_call_function_many(cpu_online_mask,
> + mvneta_interrupts_unmask,
> + pp, 1);
> +
> + netdev_info(dev, "started\n");
Remove this please.
> + return 0;
> +
> +error:
> + netdev_err(dev, "start failed\n");
> + mvneta_cleanup_rxqs(pp);
> + mvneta_cleanup_txqs(pp);
> +
> + return -ENODEV;
> +}
> +
> + if (dev->irq != 0)
> + free_irq(dev->irq, pp);
This looks superfluous, you refuse to bring up the interface if the interrupt
requesting fails.
> +
> + netdev_info(dev, "stopped\n");
> +
> + return 0;
> +}
> +
> +
> +/* tx timeout callback - display a message and stop/start the network
device */
> +static void mvneta_tx_timeout(struct net_device *dev)
> +{
> + netdev_info(dev, "tx timeout\n");
> + if (netif_running(dev)) {
> + mvneta_stop(dev);
> + mvneta_start(dev);
> + }
You should never end-up with the case where the interface is not running and
you face a transmit timeout.
[snip]
> +/* Change the device mtu */
> +static int mvneta_change_mtu(struct net_device *dev, int mtu)
> +{
> + int old_mtu = dev->mtu;
> +
> + mtu = mvneta_check_mtu_valid(dev, mtu);
> + if (mtu < 0)
> + return -EINVAL;
> +
> + dev->mtu = mtu;
> +
> + if (!netif_running(dev)) {
> + netdev_info(dev, "change mtu %d (buffer-size %d) to %d (buffer-size
%d)\n",
> + old_mtu, MVNETA_RX_PKT_SIZE(old_mtu),
> + dev->mtu, MVNETA_RX_PKT_SIZE(dev->mtu));
> + return 0;
Remove this message.
> + }
> +
> + if (mvneta_stop(dev)) {
> + netdev_err(dev, "stop interface failed\n");
> + goto error;
> + }
> +
> + if (mvneta_start(dev)) {
> + netdev_err(dev, "start interface failed\n");
> + goto error;
> + }
Propagate the returned error codes back to the caller.
> +
> + netdev_info(dev, "change mtu %d (buffer-size %d) to %d (buffer-size %d)\n",
> + old_mtu, MVNETA_RX_PKT_SIZE(old_mtu),
> + dev->mtu, MVNETA_RX_PKT_SIZE(dev->mtu));
Remove this message too.
> +
> + return 0;
> +
> +error:
> + netdev_info(dev, "change mtu failed\n");
> + return -EINVAL;
> +}
> +
> +/* Handle setting mac address (low level) */
> +static int mvneta_set_mac_addr_internals(struct net_device *dev, void
*addr)
> +{
> + struct mvneta_port *pp = netdev_priv(dev);
> + u8 *mac = addr + 2;
> + int i;
> +
> + /* Remove previous address table entry */
> + if (mvneta_mac_addr_set(pp, dev->dev_addr, -1) != 0) {
> + netdev_err(dev, "mvneta_mac_addr_set failed\n");
> + return -EINVAL;
> + }
> +
> + /* Set new addr in hw */
> + if (mvneta_mac_addr_set(pp, mac, mvneta_rxq_def) != 0) {
> + netdev_err(dev, "mvneta_mac_addr_set failed\n");
> + return -EINVAL;
> + }
> +
> + /* Set addr in the device */
> + for (i = 0; i < MVNETA_MAC_ADDR_SIZE; i++)
> + dev->dev_addr[i] = mac[i];
ETH_ALEN.
> +
> + netdev_info(dev, "mac address changed\n");
Remove this please.
> +
> + return 0;
> +}
> +
> +/* Handle setting mac address */
> +static int mvneta_set_mac_addr(struct net_device *dev, void *addr)
> +{
> + if (!netif_running(dev)) {
> + if (mvneta_set_mac_addr_internals(dev, addr) == -1)
> + goto error;
> + return 0;
> + }
Usually you just check if the interface is running, and if it is return
something like -EBUSY.
> +
> + if (mvneta_stop(dev)) {
> + netdev_err(dev, "stop interface failed\n");
> + goto error;
> + }
> +
> + if (mvneta_set_mac_addr_internals(dev, addr) == -1)
> + goto error;
> +
> + if (mvneta_start(dev)) {
> + netdev_err(dev, "start interface failed\n");
> + goto error;
> + }
Propagate error codes here too please.
> +
> + return 0;
> +
> +error:
> + netdev_err(dev, "set mac addr failed\n");
> + return -EINVAL;
> +}
> +
> +/*
> + * Called when a network interface is made active.
> + * Returns 0 on success, -EINVAL or =ENODEV on failure
> + * mvneta_open() is called when a network interface is made
> + * active by the system (IFF_UP). We set the mac address and
> + * invoke mvneta_start() to start the device.
> + */
> +static int mvneta_open(struct net_device *dev)
> +{
> + struct mvneta_port *pp = netdev_priv(dev);
> + int queue = mvneta_rxq_def;
> +
> + if (mvneta_mac_addr_set(pp, dev->dev_addr, queue) != 0) {
> + netdev_err(dev, "mvneta_mac_addr_set failed\n");
> + return -EINVAL;
> + }
> +
> + if (mvneta_start(dev)) {
> + netdev_err(dev, "start interface failed\n");
> + return -ENODEV;
> + }
> +
> + return 0;
Propagate the error code here too.
[snip]
> +static void mvneta_ethtool_get_drvinfo(struct net_device *dev,
> + struct ethtool_drvinfo *drvinfo)
> +{
> + strlcpy(drvinfo->driver, mvneta_driver_name,
> + sizeof(drvinfo->driver));
> + strlcpy(drvinfo->version, mvneta_driver_version,
> + sizeof(drvinfo->version));
You can probably also provide informations about the firmware version, bus_info
at least.
[snip]
> +/* Device initialization routine */
> +static int __devinit mvneta_probe(struct platform_device *pdev)
> +{
> + int err = -EINVAL;
> + struct mvneta_port *pp;
> + struct net_device *dev;
> + u32 phy_addr, clk;
> + int phy_mode;
> + const char *mac_addr;
> + const struct mbus_dram_target_info *dram_target_info;
> + struct device_node *dn = pdev->dev.of_node;
> +
> + dev = alloc_etherdev_mq(sizeof(struct mvneta_port), 8);
> + if (!dev)
> + return -ENOMEM;
> +
> + dev->irq = irq_of_parse_and_map(dn, 0);
> + if (dev->irq == 0) {
> + err = -EINVAL;
> + goto err_irq;
> + }
> +
> + if (of_property_read_u32(dn, "phy-addr", &phy_addr) != 0) {
> + dev_err(&pdev->dev, "could not read phy_addr\n");
> + err = -ENODEV;
> + goto err_node;
> + }
> +
> + phy_mode = of_get_phy_mode(dn);
> + if (phy_mode < 0) {
> + dev_err(&pdev->dev, "wrong phy-mode\n");
> + err = -EINVAL;
> + goto err_node;
> + }
> +
> + if (of_property_read_u32(dn, "clock-frequency", &clk) != 0) {
> + dev_err(&pdev->dev, "could not read clock-frequency\n");
> + err = -EINVAL;
> + goto err_node;
> + }
> +
> + mac_addr = of_get_mac_address(dn);
> +
> + if (!mac_addr || !is_valid_ether_addr(mac_addr))
> + eth_hw_addr_random(dev);
> + else
> + memcpy(dev->dev_addr, mac_addr, 6);
> +
> + dev->tx_queue_len = MVNETA_MAX_TXD;
> + dev->watchdog_timeo = 5 * HZ;
> + dev->netdev_ops = &mvneta_netdev_ops;
> +
> + SET_ETHTOOL_OPS(dev, &mvneta_eth_tool_ops);
> +
> + pp = netdev_priv(dev);
> +
> + pp->tx_done_timer.function = mvneta_tx_done_timer_callback;
> + init_timer(&pp->tx_done_timer);
> + clear_bit(MVNETA_F_TX_DONE_TIMER_BIT, &pp->flags);
> + pp->cleanup_timer.function = mvneta_cleanup_timer_callback;
> + init_timer(&pp->cleanup_timer);
> + clear_bit(MVNETA_F_CLEANUP_TIMER_BIT, &pp->flags);
> +
> + pp->weight = MVNETA_RX_POLL_WEIGHT;
> + pp->clk = clk;
Rename this clk_freq so make it less ambiguous, because this is not a proper
struct clk pointer.
> +
> + pp->base = of_iomap(dn, 0);
> + if (pp->base == NULL) {
> + err = -ENOMEM;
> + goto err_node;
> + }
> +
> + pp->tx_done_timer.data = (unsigned long)dev;
> + pp->cleanup_timer.data = (unsigned long)dev;
> +
> + pp->tx_ring_size = MVNETA_MAX_TXD;
> + pp->rx_ring_size = MVNETA_MAX_RXD;
> +
> + pp->dev = dev;
> +
> + if (mvneta_init(pp, phy_addr)) {
> + dev_err(&pdev->dev, "can't init eth hal\n");
> + err = -ENODEV;
> + goto err_base;
> + }
> + mvneta_port_power_up(pp, phy_mode);
> +
> + dram_target_info = mv_mbus_dram_info();
> + if (dram_target_info)
> + mvneta_conf_mbus_windows(pp, dram_target_info);
> +
> + netif_napi_add(dev, &pp->napi, mvneta_poll, pp->weight);
> +
> + SET_NETDEV_DEV(dev, &pdev->dev);
> +
> + if (register_netdev(dev)) {
> + dev_err(&pdev->dev, "failed to register\n");
> + err = ENOMEM;
> + goto err_base;
> + }
> +
> + dev->features = NETIF_F_SG;
> + dev->hw_features = NETIF_F_SG;
> + dev->priv_flags |= IFF_UNICAST_FLT;
> +
> + if (dev->mtu <= MVNETA_TX_CSUM_MAX_SIZE) {
> + dev->features |= NETIF_F_IP_CSUM;
> + dev->hw_features |= NETIF_F_IP_CSUM;
> + }
At this point, the condition is always true, so just set these features and
update them when the MTU changes.
> +
> + dev_info(&pdev->dev, "%s, mac: %pM pp->base=%p\n", dev->name,
> + dev->dev_addr, pp->base);
> +
> + platform_set_drvdata(pdev, pp->dev);
> +
> + return 0;
> +err_base:
> + iounmap(pp->base);
> +err_node:
> + irq_dispose_mapping(dev->irq);
> +err_irq:
> + free_netdev(dev);
> + return err;
> +}
> +
> +/* Device removal routine */
> +static int __devexit mvneta_remove(struct platform_device *pdev)
> +{
> + struct net_device *dev = platform_get_drvdata(pdev);
> + struct mvneta_port *pp = netdev_priv(dev);
> +
> + dev_info(&pdev->dev, "Removing Marvell Ethernet Driver\n");
I would remove this message.
^ permalink raw reply
* Re: sctp_close/sk_free: kernel BUG at arch/x86/mm/physaddr.c:18!
From: Eric Dumazet @ 2012-09-05 15:30 UTC (permalink / raw)
To: Marc Kleine-Budde
Cc: Fengguang Wu, H.K. Jerry Chu, Eric W. Biederman, networking,
linux-can
In-Reply-To: <50476931.20100@pengutronix.de>
On Wed, 2012-09-05 at 17:01 +0200, Marc Kleine-Budde wrote:
> On 09/05/2012 04:55 PM, Fengguang Wu wrote:
> >>> This in turn means the problem doesn't come from the CAN patches, as
> >>> both trees have different CAN patches. I'm adding Eric W. Biederman on
> >>> Cc as he contributed some sctp patches between v3.6 and net-next/master.
> >>
> >> Anything is possible, but this seems unlikely as I don't think I touched
> >> anything close to that part of the code.
> >
> > You are both right. The bad commit turns out to be one of:
> >
> > 1bed966cc3bd4042110129f0fc51aeeb59c5b200 Merge branch 'tcp_fastopen_server'
> > 168a8f58059a22feb9e9a2dcc1b8053dbbbc12ef tcp: TCP Fast Open Server - main code path
> > 8336886f786fdacbc19b719c1f7ea91eb70706d4 tcp: TCP Fast Open Server - support TFO listeners
> >
> > Thanks,
> > Fengguang
>
> Thanks for your work Fengguang.
>
> Marc
>
OK I have a good idea how to fix the bug, I will send a patch ASAP
Thanks
^ permalink raw reply
* Re: sctp_close/sk_free: kernel BUG at arch/x86/mm/physaddr.c:18!
From: Eric Dumazet @ 2012-09-05 15:40 UTC (permalink / raw)
To: Marc Kleine-Budde
Cc: Fengguang Wu, H.K. Jerry Chu, Eric W. Biederman, networking,
linux-can
In-Reply-To: <1346859046.13121.144.camel@edumazet-glaptop>
On Wed, 2012-09-05 at 17:30 +0200, Eric Dumazet wrote:
> On Wed, 2012-09-05 at 17:01 +0200, Marc Kleine-Budde wrote:
> > On 09/05/2012 04:55 PM, Fengguang Wu wrote:
> > >>> This in turn means the problem doesn't come from the CAN patches, as
> > >>> both trees have different CAN patches. I'm adding Eric W. Biederman on
> > >>> Cc as he contributed some sctp patches between v3.6 and net-next/master.
> > >>
> > >> Anything is possible, but this seems unlikely as I don't think I touched
> > >> anything close to that part of the code.
> > >
> > > You are both right. The bad commit turns out to be one of:
> > >
> > > 1bed966cc3bd4042110129f0fc51aeeb59c5b200 Merge branch 'tcp_fastopen_server'
> > > 168a8f58059a22feb9e9a2dcc1b8053dbbbc12ef tcp: TCP Fast Open Server - main code path
> > > 8336886f786fdacbc19b719c1f7ea91eb70706d4 tcp: TCP Fast Open Server - support TFO listeners
> > >
> > > Thanks,
> > > Fengguang
> >
> > Thanks for your work Fengguang.
> >
> > Marc
> >
>
> OK I have a good idea how to fix the bug, I will send a patch ASAP
Could you test the following patch please ?
(Not sure why sctp doesnt memset/bzero its whole socket by the way...)
Thanks
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 4f70ef0..845372b 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -149,11 +149,8 @@ void inet_sock_destruct(struct sock *sk)
pr_err("Attempt to release alive inet socket %p\n", sk);
return;
}
- if (sk->sk_type == SOCK_STREAM) {
- struct fastopen_queue *fastopenq =
- inet_csk(sk)->icsk_accept_queue.fastopenq;
- kfree(fastopenq);
- }
+ if (sk->sk_protocol == IPPROTO_TCP)
+ kfree(inet_csk(sk)->icsk_accept_queue.fastopenq);
WARN_ON(atomic_read(&sk->sk_rmem_alloc));
WARN_ON(atomic_read(&sk->sk_wmem_alloc));
^ permalink raw reply related
* NULL pointer dereference in xt_register_target()
From: Cong Wang @ 2012-09-05 15:43 UTC (permalink / raw)
To: netfilter-devel; +Cc: Linux Kernel Network Developers
Hi, folks,
The latest net-next tree can't boot due to a NULL ptr def
bug in the kernel, the full backtrack is:
http://img1.douban.com/view/photo/photo/public/p1697139550.jpg
the kernel .config file is:
http://pastebin.com/9YTnkqKN
I don't have time to look into the issue. If you need other info,
please let me know.
Thanks.
^ permalink raw reply
* Re: [PATCH] ceph: [PATCH] ceph: use list_move_tail instead of list_del/list_add_tail
From: Sage Weil @ 2012-09-05 15:48 UTC (permalink / raw)
To: Wei Yongjun; +Cc: davem, yongjun_wei, ceph-devel, netdev
In-Reply-To: <CAPgLHd-+oONn90yq4yy6-NKDcSwTduqu44ZUFbSCDP8bci84qw@mail.gmail.com>
Applied to the ceph queue. Thanks!
sage
On Wed, 5 Sep 2012, Wei Yongjun wrote:
> From: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
>
> Using list_move_tail() instead of list_del() + list_add_tail().
>
> Signed-off-by: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
> ---
> net/ceph/pagelist.c | 5 ++---
> 1 file changed, 2 insertions(+), 3 deletions(-)
>
> diff --git a/net/ceph/pagelist.c b/net/ceph/pagelist.c
> index 665cd23..92866be 100644
> --- a/net/ceph/pagelist.c
> +++ b/net/ceph/pagelist.c
> @@ -1,4 +1,3 @@
> -
> #include <linux/module.h>
> #include <linux/gfp.h>
> #include <linux/pagemap.h>
> @@ -134,8 +133,8 @@ int ceph_pagelist_truncate(struct ceph_pagelist *pl,
> ceph_pagelist_unmap_tail(pl);
> while (pl->head.prev != c->page_lru) {
> page = list_entry(pl->head.prev, struct page, lru);
> - list_del(&page->lru); /* remove from pagelist */
> - list_add_tail(&page->lru, &pl->free_list); /* add to reserve */
> + /* move from pagelist to reserve */
> + list_move_tail(&page->lru, &pl->free_list);
> ++pl->num_pages_free;
> }
> pl->room = c->room;
>
>
^ permalink raw reply
* Re: [net-next.git 2/7] stmmac: manage tx clean out of rx_poll
From: Ben Hutchings @ 2012-09-05 15:51 UTC (permalink / raw)
To: Giuseppe CAVALLARO; +Cc: netdev, davem
In-Reply-To: <1346857432-24657-3-git-send-email-peppe.cavallaro@st.com>
On Wed, 2012-09-05 at 17:03 +0200, Giuseppe CAVALLARO wrote:
> This patch is to invoke the stmmac_tx (tx handler)
> out of the NAPI poll method.
> This will make easier the next step to add the new
> mitigation schema.
> Also the patch enhances the ethtool to report some
> stats for normal TX and RX IRQs.
[...]
> --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> @@ -824,16 +824,27 @@ static void stmmac_tx_err(struct stmmac_priv *priv)
> netif_wake_queue(priv->dev);
> }
>
> +static inline void stmmac_rx_schedule(struct stmmac_priv *priv)
> +{
> + if (likely(napi_schedule_prep(&priv->napi))) {
> + stmmac_disable_irq(priv);
> + __napi_schedule(&priv->napi);
> + }
> +}
>
> static void stmmac_dma_interrupt(struct stmmac_priv *priv)
> {
> int status;
>
> status = priv->hw->dma->dma_interrupt(priv->ioaddr, &priv->xstats);
> - if (likely(status == handle_tx_rx))
> - _stmmac_schedule(priv);
> -
> - else if (unlikely(status == tx_hard_error_bump_tc)) {
> + if (likely(status == handle_rx)) {
Surely 'status & handle_rx'?
> + priv->xstats.rx_normal_irq_n++;
> + stmmac_rx_schedule(priv);
> + }
> + if (likely(status == handle_tx)) {
'status & handle_tx'?
> + priv->xstats.tx_normal_irq_n++;
> + stmmac_tx(priv);
> + } else if (unlikely(status == tx_hard_error_bump_tc)) {
> /* Try to bump up the dma threshold on this failure */
> if (unlikely(tc != SF_DMA_MODE) && (tc <= 256)) {
> tc += 64;
> @@ -1443,8 +1454,7 @@ static int stmmac_poll(struct napi_struct *napi, int budget)
> struct stmmac_priv *priv = container_of(napi, struct stmmac_priv, napi);
> int work_done = 0;
>
> - priv->xstats.poll_n++;
> - stmmac_tx(priv);
> + priv->xstats.rx_napi_poll++;
> work_done = stmmac_rx(priv, budget);
>
> if (work_done < budget) {
--
Ben Hutchings, Staff Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.
^ permalink raw reply
* Re: NULL pointer dereference in xt_register_target()
From: Eric Dumazet @ 2012-09-05 15:55 UTC (permalink / raw)
To: Cong Wang; +Cc: netfilter-devel, Linux Kernel Network Developers
In-Reply-To: <CAM_iQpVUZ50RYLUgEHbcm7ua996hgE7Gj-P-bWNJutuvm1nUoQ@mail.gmail.com>
On Wed, 2012-09-05 at 23:43 +0800, Cong Wang wrote:
> Hi, folks,
>
> The latest net-next tree can't boot due to a NULL ptr def
> bug in the kernel, the full backtrack is:
>
> http://img1.douban.com/view/photo/photo/public/p1697139550.jpg
>
> the kernel .config file is:
>
> http://pastebin.com/9YTnkqKN
>
> I don't have time to look into the issue. If you need other info,
> please let me know.
It seems xt_nat_init() is called before xt_init(), so xt array is not
yet setup.
^ permalink raw reply
* О чём думаешь
From: Варенникова Томуля @ 2012-09-04 20:16 UTC (permalink / raw)
To: netdev
Хай а ты красивый, если скучно и не против подружиться заходи ко мне на страничку http://pics.lancespinnie.com/media.php
^ permalink raw reply
* Re: kernel BUG at kernel/timer.c:748!
From: Lin Ming @ 2012-09-05 16:04 UTC (permalink / raw)
To: Dave Jones; +Cc: netdev
In-Reply-To: <20120905043523.GA12988@redhat.com>
On Wed, Sep 5, 2012 at 12:35 PM, Dave Jones <davej@redhat.com> wrote:
> Just hit this bug on 3.6-rc4.
>
> The BUG is..
>
> BUG_ON(!timer->function);
TCP keepalive timer is setup when the socket is created.
__sock_create
inet_create
tcp_v4_init_sock
tcp_init_sock
tcp_init_xmit_timers
inet_csk_init_xmit_timers
timer->function should not be NULL when set keepalive option.
Strange...have bug somewhere.
Lin Ming
>
>
> Not much to go on... Any thoughts on what I could add to get
> more debug info on which protocol etc this was ?
>
> Dave
>
>
> kernel BUG at kernel/timer.c:748!
> invalid opcode: 0000 [#1] SMP
> Modules linked in: tun fuse ipt_ULOG binfmt_misc nfnetlink nfc caif_socket caif phonet can llc2 pppoe pppox ppp_generic slhc irda crc_ccitt rds af_key decnet rose x25 atm netrom appletalk ipx p8023 psnap p8022 llc ax25 nfsv3 nfs_acl nfs fscache lockd sunrpc bluetooth rfkill ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 ip6table_filter ip6_tables nf_conntrack_ipv4 nf_defrag_ipv4 xt_state nf_conntrack kvm_intel kvm crc32c_intel ghash_clmulni_intel microcode pcspkr i2c_i801 e1000e uinput i915 video i2c_algo_bit drm_kms_helper drm i2c_core
> CPU 3
> Pid: 12330, comm: trinity-child3 Not tainted 3.6.0-rc4+ #36
> RIP: 0010:[<ffffffff810813f5>] [<ffffffff810813f5>] mod_timer+0x2c5/0x2f0
> RSP: 0018:ffff88000dfd7e08 EFLAGS: 00010246
> RAX: 000000000000001a RBX: ffff880122d62948 RCX: 000000000000001a
> RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff88000dfd7e10
> RBP: ffff88000dfd7e48 R08: 0000000000000000 R09: 0000000000000000
> R10: 0000000001517000 R11: 0000000000000246 R12: 000000016c000000
> R13: 000000016c12bcb1 R14: ffff8801236cee00 R15: 00000000ffffff01
> FS: 00007fa96745f740(0000) GS:ffff880148200000(0000) knlGS:0000000000000000
> CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> CR2: 00000000100ff000 CR3: 0000000099344000 CR4: 00000000001407e0
> DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
> Process trinity-child3 (pid: 12330, threadinfo ffff88000dfd6000, task ffff880090890000)
> Stack:
> ffffffff8154cb6d 0000000007b5edf7 ffff88000dfd7e28 ffff880122d62520
> 0000000000000009 0000000000000004 ffff8801236cee00 00000000ffffff01
> ffff88000dfd7e68 ffffffff8154c79c ffffffff81550e6c ffff880122d62520
> Call Trace:
> [<ffffffff8154cb6d>] ? lock_sock_nested+0x8d/0xa0
> [<ffffffff8154c79c>] sk_reset_timer+0x1c/0x30
> [<ffffffff81550e6c>] ? sock_setsockopt+0x8c/0x960
> [<ffffffff815a84a0>] inet_csk_reset_keepalive_timer+0x20/0x30
> [<ffffffff815c018d>] tcp_set_keepalive+0x3d/0x50
> [<ffffffff81551703>] sock_setsockopt+0x923/0x960
> [<ffffffff810ddf76>] ? trace_hardirqs_on_caller+0x16/0x1e0
> [<ffffffff811db0ac>] ? fget_light+0x24c/0x520
> [<ffffffff8154af86>] sys_setsockopt+0xc6/0xe0
> [<ffffffff816a50ed>] system_call_fastpath+0x1a/0x1f
> Code: 00 74 43 9c 58 0f 1f 44 00 00 f6 c4 02 0f 84 14 ff ff ff eb 93 48 c7 c7 20 48 c3 81 e8 f5 70 05 00 85 c0 0f 85 fe fe ff ff eb b7 <0f> 0b 48 8b 75 08 48 89 df e8 3d f6 ff ff e9 b2 fd ff ff 4d 89
> RIP [<ffffffff810813f5>] mod_timer+0x2c5/0x2f0
> RSP <ffff88000dfd7e08>
> ---[ end trace 7e7b5910138e49a3 ]---
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply
* Re: [net-next.git 3/7] stmmac: add the initial tx coalesce schema
From: Ben Hutchings @ 2012-09-05 16:11 UTC (permalink / raw)
To: Giuseppe CAVALLARO; +Cc: netdev, davem
In-Reply-To: <1346857432-24657-4-git-send-email-peppe.cavallaro@st.com>
On Wed, 2012-09-05 at 17:03 +0200, Giuseppe CAVALLARO wrote:
> This patch adds a new schema used for mitigating the
> number of transmit interrupts.
> It is based on a sw timer and a threshold value.
> The timer is used to periodically call the stmmac_tx
> function that can be invoked by the ISR but only for
> the descriptors where the interrupt on completion
> field has been set. This is tuned by a threshold.
>
> Next step is to add the ability to tune these coalesce
> values by ethtool.
>
> Till now I have put a default that showed a real gain
> on all the platforms ARM/SH4 where I performed benchmarks.
>
> Signed-off-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
> ---
> drivers/net/ethernet/stmicro/stmmac/common.h | 8 +-
> drivers/net/ethernet/stmicro/stmmac/stmmac.h | 4 +
> .../net/ethernet/stmicro/stmmac/stmmac_ethtool.c | 9 +-
> drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 86 +++++++++++++-------
> 4 files changed, 72 insertions(+), 35 deletions(-)
>
> diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
> index bd32fe6..1d6bd3e 100644
> --- a/drivers/net/ethernet/stmicro/stmmac/common.h
> +++ b/drivers/net/ethernet/stmicro/stmmac/common.h
> @@ -95,11 +95,13 @@ struct stmmac_extra_stats {
> unsigned long threshold;
> unsigned long tx_pkt_n;
> unsigned long rx_pkt_n;
> - unsigned long rx_napi_poll;
> + unsigned long normal_irq_n;
> unsigned long rx_normal_irq_n;
> + unsigned long rx_napi_poll;
> unsigned long tx_normal_irq_n;
> - unsigned long sched_timer_n;
> - unsigned long normal_irq_n;
> + unsigned long txtimer;
> + unsigned long tx_clean;
> + unsigned long tx_reset_ic_bit;
> unsigned long mmc_tx_irq_n;
> unsigned long mmc_rx_irq_n;
> unsigned long mmc_rx_csum_offload_irq_n;
> diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
> index 9f35769..0f5ab28 100644
> --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
> +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
> @@ -88,6 +88,10 @@ struct stmmac_priv {
> int eee_enabled;
> int eee_active;
> int tx_lpi_timer;
> + struct timer_list txtimer;
> + u32 tx_count_frames;
> + u32 tx_coal_frames;
> + u32 tx_coal_timer;
> };
>
> extern int phyaddr;
> diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
> index 505fe71..48ad0bc 100644
> --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
> +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
> @@ -90,12 +90,13 @@ static const struct stmmac_stats stmmac_gstrings_stats[] = {
> STMMAC_STAT(threshold),
> STMMAC_STAT(tx_pkt_n),
> STMMAC_STAT(rx_pkt_n),
> - STMMAC_STAT(rx_napi_poll),
> + STMMAC_STAT(normal_irq_n),
> STMMAC_STAT(rx_normal_irq_n),
> + STMMAC_STAT(rx_napi_poll),
> STMMAC_STAT(tx_normal_irq_n),
> - STMMAC_STAT(sched_timer_n),
> - STMMAC_STAT(normal_irq_n),
> - STMMAC_STAT(normal_irq_n),
> + STMMAC_STAT(txtimer),
> + STMMAC_STAT(tx_clean),
> + STMMAC_STAT(tx_reset_ic_bit),
> STMMAC_STAT(mmc_tx_irq_n),
> STMMAC_STAT(mmc_rx_irq_n),
> STMMAC_STAT(mmc_rx_csum_offload_irq_n),
> diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> index b247c39..d7f5482 100644
> --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> @@ -77,6 +77,8 @@
>
> #define STMMAC_ALIGN(x) L1_CACHE_ALIGN(x)
> #define JUMBO_LEN 9000
> +#define STMMAC_TX_TM 40000
> +#define STMMAC_TX_MAX_FRAMES 64 /* Max coalesced frame */
>
> /* Module parameters */
> #define TX_TIMEO 5000 /* default 5 seconds */
> @@ -695,8 +697,11 @@ static void stmmac_dma_operation_mode(struct stmmac_priv *priv)
> static void stmmac_tx(struct stmmac_priv *priv)
> {
> unsigned int txsize = priv->dma_tx_size;
> + unsigned long flags;
> +
> + spin_lock_irqsave(&priv->tx_lock, flags);
>
> - spin_lock(&priv->tx_lock);
> + priv->xstats.tx_clean++;
>
> while (priv->dirty_tx != priv->cur_tx) {
> int last;
> @@ -765,7 +770,7 @@ static void stmmac_tx(struct stmmac_priv *priv)
> stmmac_enable_eee_mode(priv);
> mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_TIMER(eee_timer));
> }
> - spin_unlock(&priv->tx_lock);
> + spin_unlock_irqrestore(&priv->tx_lock, flags);
> }
>
> static inline void stmmac_enable_irq(struct stmmac_priv *priv)
> @@ -778,29 +783,16 @@ static inline void stmmac_disable_irq(struct stmmac_priv *priv)
> priv->hw->dma->disable_dma_irq(priv->ioaddr);
> }
>
> -static int stmmac_has_work(struct stmmac_priv *priv)
> +static void stmmac_txtimer(unsigned long data)
> {
> - unsigned int has_work = 0;
> - int rxret, tx_work = 0;
> + struct stmmac_priv *priv = (struct stmmac_priv *)data;
>
> - rxret = priv->hw->desc->get_rx_owner(priv->dma_rx +
> - (priv->cur_rx % priv->dma_rx_size));
> + priv->xstats.txtimer++;
>
> if (priv->dirty_tx != priv->cur_tx)
> - tx_work = 1;
> -
> - if (likely(!rxret || tx_work))
> - has_work = 1;
> + stmmac_tx(priv);
>
> - return has_work;
> -}
> -
> -static inline void _stmmac_schedule(struct stmmac_priv *priv)
> -{
> - if (likely(stmmac_has_work(priv))) {
> - stmmac_disable_irq(priv);
> - napi_schedule(&priv->napi);
> - }
> + return;
> }
>
> /**
> @@ -824,7 +816,7 @@ static void stmmac_tx_err(struct stmmac_priv *priv)
> netif_wake_queue(priv->dev);
> }
>
> -static inline void stmmac_rx_schedule(struct stmmac_priv *priv)
> +static void stmmac_rx_schedule(struct stmmac_priv *priv)
> {
> if (likely(napi_schedule_prep(&priv->napi))) {
> stmmac_disable_irq(priv);
> @@ -1001,6 +993,36 @@ static int stmmac_init_dma_engine(struct stmmac_priv *priv)
> priv->dma_rx_phy);
> }
>
> +static int stmmac_check_coal(int size, int max_coal_frames)
> +{
> + int ret = 0;
> +
> + if (max_coal_frames >= size)
> + return ret;
> +
> + return max_coal_frames;
> +}
Not sure this makes sense. You're limiting the number of frames to be
coalesced to the size of the TX DMA ring. But each frame can consume
multiple DMA descriptors, so it doesn't make sense to compare these
numbers.
It looks like patch 6 reverts this, but why do it in the first place?
[...]
> @@ -1213,10 +1240,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
> return NETDEV_TX_BUSY;
> }
>
> - spin_lock(&priv->tx_lock);
> -
> - if (priv->tx_path_in_lpi_mode)
> - stmmac_disable_eee_mode(priv);
Why are you removing the call to stmmac_disable_eee_mode()?
> + spin_lock_irqsave(&priv->tx_lock, flags);
>
> entry = priv->cur_tx % txsize;
>
> @@ -1272,7 +1296,14 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
> /* Interrupt on completition only for the latest segment */
> priv->hw->desc->close_tx_desc(desc);
Comment above this is now incorrect.
> - wmb();
Why are you removing this barrier?
> + /* Do not set the IC according to the coalesce patameters */
> + priv->tx_count_frames++;
> + if (priv->tx_coal_frames > priv->tx_count_frames) {
> + priv->hw->desc->clear_tx_ic(desc);
> + priv->xstats.tx_reset_ic_bit++;
> + mod_timer(&priv->txtimer, priv->tx_coal_timer);
> + } else
> + priv->tx_count_frames = 0;
>
> /* To avoid raise condition */
> priv->hw->desc->set_tx_owner(first);
[...]
--
Ben Hutchings, Staff Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.
^ permalink raw reply
* Re: NULL pointer dereference in xt_register_target()
From: Eric Dumazet @ 2012-09-05 16:12 UTC (permalink / raw)
To: Cong Wang; +Cc: netfilter-devel, Linux Kernel Network Developers
In-Reply-To: <1346860506.13121.148.camel@edumazet-glaptop>
On Wed, 2012-09-05 at 17:55 +0200, Eric Dumazet wrote:
> On Wed, 2012-09-05 at 23:43 +0800, Cong Wang wrote:
> > Hi, folks,
> >
> > The latest net-next tree can't boot due to a NULL ptr def
> > bug in the kernel, the full backtrack is:
> >
> > http://img1.douban.com/view/photo/photo/public/p1697139550.jpg
> >
> > the kernel .config file is:
> >
> > http://pastebin.com/9YTnkqKN
> >
> > I don't have time to look into the issue. If you need other info,
> > please let me know.
>
> It seems xt_nat_init() is called before xt_init(), so xt array is not
> yet setup.
>
>
Seems the following patch should help, I have to try it ;)
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 8d987c3..afcea11 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1390,6 +1390,6 @@ static void __exit xt_fini(void)
kfree(xt);
}
-module_init(xt_init);
+core_initcall(xt_init);
module_exit(xt_fini);
^ permalink raw reply related
* Re: [net-next.git 4/7] stmmac: add Rx watchdog optimization to mitigate the DMA irqs
From: Ben Hutchings @ 2012-09-05 16:14 UTC (permalink / raw)
To: Giuseppe CAVALLARO; +Cc: netdev, davem
In-Reply-To: <1346857432-24657-5-git-send-email-peppe.cavallaro@st.com>
On Wed, 2012-09-05 at 17:03 +0200, Giuseppe CAVALLARO wrote:
> GMAC devices newer than databook 3.50 has an embedded timer
> that can be used for mitigating the number of interrupts.
> So this patch adds this optimizations.
> Old MAC will continue to use NAPI.
[...]
Interrupt moderation is *not* a substitute for NAPI; you should continue
using NAPI as well.
Ben.
--
Ben Hutchings, Staff Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.
^ permalink raw reply
* Re: [V2 PATCH 1/9] csiostor: Chelsio FCoE offload driver submission (sources part 1).
From: Stephen Hemminger @ 2012-09-05 16:23 UTC (permalink / raw)
To: Naresh Kumar Inna; +Cc: JBottomley, linux-scsi, dm, leedom, netdev, chethan
In-Reply-To: <1346848442-4573-2-git-send-email-naresh@chelsio.com>
On Wed, 5 Sep 2012 18:03:54 +0530
Naresh Kumar Inna <naresh@chelsio.com> wrote:
> +
> +/* FCoE Adapter types & its description */
> +static struct csio_adap_desc csio_fcoe_adapters[] = {
Tables like this should be const.
^ permalink raw reply
* Re: [V2 PATCH 2/9] csiostor: Chelsio FCoE offload driver submission (sources part 2).
From: Stephen Hemminger @ 2012-09-05 16:29 UTC (permalink / raw)
To: Naresh Kumar Inna; +Cc: JBottomley, linux-scsi, dm, leedom, netdev, chethan
In-Reply-To: <1346848442-4573-3-git-send-email-naresh@chelsio.com>
On Wed, 5 Sep 2012 18:03:55 +0530
Naresh Kumar Inna <naresh@chelsio.com> wrote:
> This patch contains code for driver initialization, driver resource
> allocation and the Work Request module functionality. Driver initialization
> includes module entry/exit points, registration with PCI, FC transport and
> SCSI mid layer subsystems. The Work Request module provides services for
> allocation of DMA queues, posting Work Requests on them and processing
> completions.
>
> Signed-off-by: Naresh Kumar Inna <naresh@chelsio.com>
Although the comments say you are using proc fs, there is no
code here related to that.
Any use of debugfs must be conditional the DEBUG_FS kernel configuration
parameter. Your code probably will break if DEBUG_FS is not
enabled. For a possible alternative see how a sub-config parameter
was added in sky2 driver.
^ permalink raw reply
* Re: [V2 PATCH 6/9] csiostor: Chelsio FCoE offload driver submission (headers part 1).
From: Stephen Hemminger @ 2012-09-05 16:31 UTC (permalink / raw)
To: Naresh Kumar Inna; +Cc: JBottomley, linux-scsi, dm, leedom, netdev, chethan
In-Reply-To: <1346848442-4573-7-git-send-email-naresh@chelsio.com>
On Wed, 5 Sep 2012 18:03:59 +0530
Naresh Kumar Inna <naresh@chelsio.com> wrote:
> +#define CSIO_ROUNDUP(__v, __r) (((__v) + (__r) - 1) / (__r))
This is similar to existing round_up() in kernel.h could you use that?
^ permalink raw reply
* Re: [V2 PATCH 6/9] csiostor: Chelsio FCoE offload driver submission (headers part 1).
From: Stephen Hemminger @ 2012-09-05 16:33 UTC (permalink / raw)
To: Naresh Kumar Inna; +Cc: JBottomley, linux-scsi, dm, leedom, netdev, chethan
In-Reply-To: <1346848442-4573-7-git-send-email-naresh@chelsio.com>
On Wed, 5 Sep 2012 18:03:59 +0530
Naresh Kumar Inna <naresh@chelsio.com> wrote:
> +
> +#define CSIO_ASSERT(cond) \
> +do { \
> + if (unlikely(!((cond)))) \
> + BUG(); \
> +} while (0)
> +
Why is this not just BUG_ON(!(cond)) ?
^ permalink raw reply
* Re: kernel BUG at kernel/timer.c:748!
From: Yuchung Cheng @ 2012-09-05 16:37 UTC (permalink / raw)
To: Lin Ming; +Cc: Dave Jones, netdev
In-Reply-To: <CAF1ivSauxzNhrm9c==_xFpuh9Lo3KrUNLNRb_62fLZxMfTuU1w@mail.gmail.com>
On Wed, Sep 5, 2012 at 9:04 AM, Lin Ming <mlin@ss.pku.edu.cn> wrote:
> On Wed, Sep 5, 2012 at 12:35 PM, Dave Jones <davej@redhat.com> wrote:
>> Just hit this bug on 3.6-rc4.
>>
>> The BUG is..
>>
>> BUG_ON(!timer->function);
>
> TCP keepalive timer is setup when the socket is created.
>
> __sock_create
> inet_create
> tcp_v4_init_sock
> tcp_init_sock
> tcp_init_xmit_timers
> inet_csk_init_xmit_timers
>
> timer->function should not be NULL when set keepalive option.
>
> Strange...have bug somewhere.
is this a passively opened socket or actively opened one?
>
> Lin Ming
>
>>
>>
>> Not much to go on... Any thoughts on what I could add to get
>> more debug info on which protocol etc this was ?
>>
>> Dave
>>
>>
>> kernel BUG at kernel/timer.c:748!
>> invalid opcode: 0000 [#1] SMP
>> Modules linked in: tun fuse ipt_ULOG binfmt_misc nfnetlink nfc caif_socket caif phonet can llc2 pppoe pppox ppp_generic slhc irda crc_ccitt rds af_key decnet rose x25 atm netrom appletalk ipx p8023 psnap p8022 llc ax25 nfsv3 nfs_acl nfs fscache lockd sunrpc bluetooth rfkill ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 ip6table_filter ip6_tables nf_conntrack_ipv4 nf_defrag_ipv4 xt_state nf_conntrack kvm_intel kvm crc32c_intel ghash_clmulni_intel microcode pcspkr i2c_i801 e1000e uinput i915 video i2c_algo_bit drm_kms_helper drm i2c_core
>> CPU 3
>> Pid: 12330, comm: trinity-child3 Not tainted 3.6.0-rc4+ #36
>> RIP: 0010:[<ffffffff810813f5>] [<ffffffff810813f5>] mod_timer+0x2c5/0x2f0
>> RSP: 0018:ffff88000dfd7e08 EFLAGS: 00010246
>> RAX: 000000000000001a RBX: ffff880122d62948 RCX: 000000000000001a
>> RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff88000dfd7e10
>> RBP: ffff88000dfd7e48 R08: 0000000000000000 R09: 0000000000000000
>> R10: 0000000001517000 R11: 0000000000000246 R12: 000000016c000000
>> R13: 000000016c12bcb1 R14: ffff8801236cee00 R15: 00000000ffffff01
>> FS: 00007fa96745f740(0000) GS:ffff880148200000(0000) knlGS:0000000000000000
>> CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>> CR2: 00000000100ff000 CR3: 0000000099344000 CR4: 00000000001407e0
>> DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
>> DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
>> Process trinity-child3 (pid: 12330, threadinfo ffff88000dfd6000, task ffff880090890000)
>> Stack:
>> ffffffff8154cb6d 0000000007b5edf7 ffff88000dfd7e28 ffff880122d62520
>> 0000000000000009 0000000000000004 ffff8801236cee00 00000000ffffff01
>> ffff88000dfd7e68 ffffffff8154c79c ffffffff81550e6c ffff880122d62520
>> Call Trace:
>> [<ffffffff8154cb6d>] ? lock_sock_nested+0x8d/0xa0
>> [<ffffffff8154c79c>] sk_reset_timer+0x1c/0x30
>> [<ffffffff81550e6c>] ? sock_setsockopt+0x8c/0x960
>> [<ffffffff815a84a0>] inet_csk_reset_keepalive_timer+0x20/0x30
>> [<ffffffff815c018d>] tcp_set_keepalive+0x3d/0x50
>> [<ffffffff81551703>] sock_setsockopt+0x923/0x960
>> [<ffffffff810ddf76>] ? trace_hardirqs_on_caller+0x16/0x1e0
>> [<ffffffff811db0ac>] ? fget_light+0x24c/0x520
>> [<ffffffff8154af86>] sys_setsockopt+0xc6/0xe0
>> [<ffffffff816a50ed>] system_call_fastpath+0x1a/0x1f
>> Code: 00 74 43 9c 58 0f 1f 44 00 00 f6 c4 02 0f 84 14 ff ff ff eb 93 48 c7 c7 20 48 c3 81 e8 f5 70 05 00 85 c0 0f 85 fe fe ff ff eb b7 <0f> 0b 48 8b 75 08 48 89 df e8 3d f6 ff ff e9 b2 fd ff ff 4d 89
>> RIP [<ffffffff810813f5>] mod_timer+0x2c5/0x2f0
>> RSP <ffff88000dfd7e08>
>> ---[ end trace 7e7b5910138e49a3 ]---
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe netdev" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at http://vger.kernel.org/majordomo-info.html
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply
* [PATCH net-next] netfilter: x_tables: xt_init() should run earlier
From: Eric Dumazet @ 2012-09-05 16:37 UTC (permalink / raw)
To: Cong Wang, Pablo Neira Ayuso, Patrick McHardy
Cc: netfilter-devel, Linux Kernel Network Developers
In-Reply-To: <1346861569.13121.149.camel@edumazet-glaptop>
From: Eric Dumazet <edumazet@google.com>
Cong Wang reported a NULL dereference in xt_register_target()
It turns out xt_nat_init() was called before xt_init(), so xt array
was not yet setup.
xt_init() should be marked core_initcall() to solve this problem.
Reported-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
net/netfilter/x_tables.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 8d987c3..afcea11 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1390,6 +1390,6 @@ static void __exit xt_fini(void)
kfree(xt);
}
-module_init(xt_init);
+core_initcall(xt_init);
module_exit(xt_fini);
^ permalink raw reply related
* Re: NULL pointer dereference in xt_register_target()
From: Pablo Neira Ayuso @ 2012-09-05 16:48 UTC (permalink / raw)
To: Eric Dumazet; +Cc: Cong Wang, netfilter-devel, Linux Kernel Network Developers
In-Reply-To: <1346860506.13121.148.camel@edumazet-glaptop>
On Wed, Sep 05, 2012 at 05:55:06PM +0200, Eric Dumazet wrote:
> On Wed, 2012-09-05 at 23:43 +0800, Cong Wang wrote:
> > Hi, folks,
> >
> > The latest net-next tree can't boot due to a NULL ptr def
> > bug in the kernel, the full backtrack is:
> >
> > http://img1.douban.com/view/photo/photo/public/p1697139550.jpg
> >
> > the kernel .config file is:
> >
> > http://pastebin.com/9YTnkqKN
> >
> > I don't have time to look into the issue. If you need other info,
> > please let me know.
>
> It seems xt_nat_init() is called before xt_init(), so xt array is not
> yet setup.
I have enqueued the following patch to fix this:
http://1984.lsi.us.es/git/nf-next/commit/?id=00545bec9412d130c77f72a08d6c8b6ad21d4a1
e
commit 00545bec9412d130c77f72a08d6c8b6ad21d4a1e
Author: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed Sep 5 18:24:55 2012 +0200
netfilter: fix crash during boot if NAT has been compiled built-in
Thanks.
^ permalink raw reply
* Re: [PATCH] usbnet: drop unneeded check for NULL
From: David Miller @ 2012-09-05 16:50 UTC (permalink / raw)
To: oneukum; +Cc: richardcochran, netdev
In-Reply-To: <2037108.ZemStJh9zr@linux-lqwf.site>
From: Oliver Neukum <oneukum@suse.de>
Date: Wed, 05 Sep 2012 08:24:25 +0200
> On Wednesday 05 September 2012 06:47:12 Richard Cochran wrote:
>> and so I think the problem that the test addresses is still present,
>> or am I missing something?
>
> No,
>
> you are right. Thank you.
>
> Dave, for now, please don't apply this patch. In the long run, this crap
> in cdc-ncm needs to go. I am starting rewriting this driver right now.
I already applied it several days ago, someone send me a revert with a
verbose commit message explaining the situation.
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox