Netdev List

Netdev List
 help / color / mirror / Atom feed

* Re: [PATCH net-next 2/6] be2net: add be_cmd_set_port_speed_v1 to set port speed
From: David Miller @ 2011-08-08  5:08 UTC (permalink / raw)
  To: ajit.khaparde; +Cc: netdev
In-Reply-To: <20110805195958.GA13539@akhaparde-VBox>

From: Ajit Khaparde <ajit.khaparde@Emulex.Com>
Date: Fri, 5 Aug 2011 14:59:58 -0500

> diff --git a/drivers/net/benet/be_cmds.c b/drivers/net/benet/be_cmds.c
> index 8d178d2..863ae67 100644
> --- a/drivers/net/benet/be_cmds.c
> +++ b/drivers/net/benet/be_cmds.c
> @@ -2367,3 +2367,38 @@ err:
>  	mutex_unlock(&adapter->mbox_lock);
 ...
> +	status = be_mcc_notify_wait(adapter);
> +err:
> +	spin_unlock_bh(&adapter->mcc_lock);
> +	return status;
> +}
> +

Please do not add trailing empty lines to source files, GIT complains
about this and will abort when I try to apply your patch.

^ permalink raw reply

* [RFC PATCH v2 9/9] sfc: Support for byte queue limits
From: Tom Herbert @ 2011-08-08  4:53 UTC (permalink / raw)
  To: davem, netdev

Changes to sfc to use byte queue limits.

Signed-off-by: Tom Herbert <therbert@google.com>
---
 drivers/net/sfc/tx.c |   27 +++++++++++++++++++++------
 1 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/drivers/net/sfc/tx.c b/drivers/net/sfc/tx.c
index 84eb99e..9aa4339 100644
--- a/drivers/net/sfc/tx.c
+++ b/drivers/net/sfc/tx.c
@@ -31,7 +31,9 @@
 #define EFX_TXQ_THRESHOLD(_efx) ((_efx)->txq_entries / 2u)
 
 static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
-			       struct efx_tx_buffer *buffer)
+			       struct efx_tx_buffer *buffer,
+			       unsigned int *pkts_compl,
+			       unsigned int *bytes_compl)
 {
 	if (buffer->unmap_len) {
 		struct pci_dev *pci_dev = tx_queue->efx->pci_dev;
@@ -48,6 +50,8 @@ static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
 	}
 
 	if (buffer->skb) {
+		(*pkts_compl)++;
+		(*bytes_compl) += buffer->skb->len;
 		dev_kfree_skb_any((struct sk_buff *) buffer->skb);
 		buffer->skb = NULL;
 		netif_vdbg(tx_queue->efx, tx_done, tx_queue->efx->net_dev,
@@ -254,6 +258,8 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
 	buffer->skb = skb;
 	buffer->continuation = false;
 
+	netdev_tx_sent_queue(tx_queue->core_txq, 1, skb->len);
+
 	/* Pass off to hardware */
 	efx_nic_push_buffers(tx_queue);
 
@@ -271,10 +277,11 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
  unwind:
 	/* Work backwards until we hit the original insert pointer value */
 	while (tx_queue->insert_count != tx_queue->write_count) {
+		unsigned int pkts_compl = 0, bytes_compl = 0;
 		--tx_queue->insert_count;
 		insert_ptr = tx_queue->insert_count & tx_queue->ptr_mask;
 		buffer = &tx_queue->buffer[insert_ptr];
-		efx_dequeue_buffer(tx_queue, buffer);
+		efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl);
 		buffer->len = 0;
 	}
 
@@ -297,7 +304,9 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
  * specified index.
  */
 static void efx_dequeue_buffers(struct efx_tx_queue *tx_queue,
-				unsigned int index)
+				unsigned int index,
+				unsigned int *pkts_compl,
+				unsigned int *bytes_compl)
 {
 	struct efx_nic *efx = tx_queue->efx;
 	unsigned int stop_index, read_ptr;
@@ -315,7 +324,7 @@ static void efx_dequeue_buffers(struct efx_tx_queue *tx_queue,
 			return;
 		}
 
-		efx_dequeue_buffer(tx_queue, buffer);
+		efx_dequeue_buffer(tx_queue, buffer, pkts_compl, bytes_compl);
 		buffer->continuation = true;
 		buffer->len = 0;
 
@@ -426,10 +435,12 @@ void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index)
 {
 	unsigned fill_level;
 	struct efx_nic *efx = tx_queue->efx;
+	unsigned int pkts_compl = 0, bytes_compl = 0;
 
 	EFX_BUG_ON_PARANOID(index > tx_queue->ptr_mask);
 
-	efx_dequeue_buffers(tx_queue, index);
+	efx_dequeue_buffers(tx_queue, index, &pkts_compl, &bytes_compl);
+	netdev_tx_completed_queue(tx_queue->core_txq, pkts_compl, bytes_compl);
 
 	/* See if we need to restart the netif queue.  This barrier
 	 * separates the update of read_count from the test of the
@@ -519,13 +530,15 @@ void efx_release_tx_buffers(struct efx_tx_queue *tx_queue)
 
 	/* Free any buffers left in the ring */
 	while (tx_queue->read_count != tx_queue->write_count) {
+		unsigned int pkts_compl = 0, bytes_compl = 0;
 		buffer = &tx_queue->buffer[tx_queue->read_count & tx_queue->ptr_mask];
-		efx_dequeue_buffer(tx_queue, buffer);
+		efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl);
 		buffer->continuation = true;
 		buffer->len = 0;
 
 		++tx_queue->read_count;
 	}
+	netdev_tx_reset_queue(tx_queue->core_txq);
 }
 
 void efx_fini_tx_queue(struct efx_tx_queue *tx_queue)
@@ -1168,6 +1181,8 @@ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue,
 	/* Pass off to hardware */
 	efx_nic_push_buffers(tx_queue);
 
+	netdev_tx_sent_queue(tx_queue->core_txq, 1, skb->len);
+
 	tx_queue->tso_bursts++;
 	return NETDEV_TX_OK;
 
-- 
1.7.3.1


^ permalink raw reply related

* [RFC PATCH v2 8/9] bnx2x: Support for byte queue limits
From: Tom Herbert @ 2011-08-08  4:53 UTC (permalink / raw)
  To: davem, netdev

Changes to bnx2x to use byte queue limits.

Signed-off-by: Tom Herbert <therbert@google.com>
---
 drivers/net/bnx2x/bnx2x_cmn.c |   26 ++++++++++++++++++++++----
 1 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/drivers/net/bnx2x/bnx2x_cmn.c b/drivers/net/bnx2x/bnx2x_cmn.c
index 5b0dba6..d4f921a 100644
--- a/drivers/net/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/bnx2x/bnx2x_cmn.c
@@ -97,7 +97,8 @@ int load_count[2][3] = { {0} }; /* per-path: 0-common, 1-port0, 2-port1 */
  * return idx of last bd freed
  */
 static u16 bnx2x_free_tx_pkt(struct bnx2x *bp, struct bnx2x_fp_txdata *txdata,
-			     u16 idx)
+			     u16 idx, unsigned int *pkts_compl,
+			     unsigned int *bytes_compl)
 {
 	struct sw_tx_bd *tx_buf = &txdata->tx_buf_ring[idx];
 	struct eth_tx_start_bd *tx_start_bd;
@@ -154,6 +155,10 @@ static u16 bnx2x_free_tx_pkt(struct bnx2x *bp, struct bnx2x_fp_txdata *txdata,
 
 	/* release skb */
 	WARN_ON(!skb);
+	if (skb) {
+		(*pkts_compl)++;
+		(*bytes_compl) += skb->len;
+	}
 	dev_kfree_skb_any(skb);
 	tx_buf->first_bd = 0;
 	tx_buf->skb = NULL;
@@ -165,6 +170,7 @@ int bnx2x_tx_int(struct bnx2x *bp, struct bnx2x_fp_txdata *txdata)
 {
 	struct netdev_queue *txq;
 	u16 hw_cons, sw_cons, bd_cons = txdata->tx_bd_cons;
+	unsigned int pkts_compl = 0, bytes_compl = 0;
 
 #ifdef BNX2X_STOP_ON_ERROR
 	if (unlikely(bp->panic))
@@ -184,10 +190,13 @@ int bnx2x_tx_int(struct bnx2x *bp, struct bnx2x_fp_txdata *txdata)
 				      " pkt_cons %u\n",
 		   txdata->txq_index, hw_cons, sw_cons, pkt_cons);
 
-		bd_cons = bnx2x_free_tx_pkt(bp, txdata, pkt_cons);
+		bd_cons = bnx2x_free_tx_pkt(bp, txdata, pkt_cons,
+		    &pkts_compl, &bytes_compl);
 		sw_cons++;
 	}
 
+	netdev_tx_completed_queue(txq, pkts_compl, bytes_compl);
+
 	txdata->tx_pkt_cons = sw_cons;
 	txdata->tx_bd_cons = bd_cons;
 
@@ -1088,6 +1097,7 @@ static void bnx2x_free_tx_skbs(struct bnx2x *bp)
 		struct bnx2x_fastpath *fp = &bp->fp[i];
 		for_each_cos_in_tx_queue(fp, cos) {
 			struct bnx2x_fp_txdata *txdata = &fp->txdata[cos];
+			unsigned pkts_compl = 0, bytes_compl = 0;
 
 			u16 bd_cons = txdata->tx_bd_cons;
 			u16 sw_prod = txdata->tx_pkt_prod;
@@ -1095,9 +1105,13 @@ static void bnx2x_free_tx_skbs(struct bnx2x *bp)
 
 			while (sw_cons != sw_prod) {
 				bd_cons = bnx2x_free_tx_pkt(bp, txdata,
-							    TX_BD(sw_cons));
+							    TX_BD(sw_cons),
+							    &pkts_compl,
+							    &bytes_compl);
 				sw_cons++;
 			}
+			netdev_tx_reset_queue(
+			    netdev_get_tx_queue(bp->dev, txdata->txq_index));
 		}
 	}
 }
@@ -2771,6 +2785,7 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev)
 				       frag->page_offset, frag->size,
 				       DMA_TO_DEVICE);
 		if (unlikely(dma_mapping_error(&bp->pdev->dev, mapping))) {
+			unsigned int pkts_compl = 0, bytes_compl = 0;
 
 			DP(NETIF_MSG_TX_QUEUED, "Unable to map page - "
 						"dropping packet...\n");
@@ -2782,7 +2797,8 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev)
 			 */
 			first_bd->nbd = cpu_to_le16(nbd);
 			bnx2x_free_tx_pkt(bp, txdata,
-					  TX_BD(txdata->tx_pkt_prod));
+					  TX_BD(txdata->tx_pkt_prod),
+					  &pkts_compl, &bytes_compl);
 			return NETDEV_TX_OK;
 		}
 
@@ -2843,6 +2859,8 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		   pbd_e2->parsing_data);
 	DP(NETIF_MSG_TX_QUEUED, "doorbell: nbd %d  bd %u\n", nbd, bd_prod);
 
+	netdev_tx_sent_queue(txq, 1, skb->len);
+
 	txdata->tx_pkt_prod++;
 	/*
 	 * Make sure that the BD data is updated before updating the producer
-- 
1.7.3.1


^ permalink raw reply related

* [RFC PATCH v2 7/9] tg3: Support for byte queue limits
From: Tom Herbert @ 2011-08-08  4:53 UTC (permalink / raw)
  To: davem, netdev

Changes to tg3 to use byte queue limits.

Signed-off-by: Tom Herbert <therbert@google.com>
---
 drivers/net/tg3.c |    8 ++++++++
 1 files changed, 8 insertions(+), 0 deletions(-)

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index dc3fbf6..ad06c40 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -4818,6 +4818,7 @@ static void tg3_tx(struct tg3_napi *tnapi)
 	u32 sw_idx = tnapi->tx_cons;
 	struct netdev_queue *txq;
 	int index = tnapi - tp->napi;
+	unsigned int pkts_compl = 0, bytes_compl = 0;
 
 	if (tg3_flag(tp, ENABLE_TSS))
 		index--;
@@ -4868,6 +4869,9 @@ static void tg3_tx(struct tg3_napi *tnapi)
 			sw_idx = NEXT_TX(sw_idx);
 		}
 
+		pkts_compl++;
+		bytes_compl += skb->len;
+
 		dev_kfree_skb(skb);
 
 		if (unlikely(tx_bug)) {
@@ -4876,6 +4880,8 @@ static void tg3_tx(struct tg3_napi *tnapi)
 		}
 	}
 
+	netdev_completed_queue(tp->dev, pkts_compl, bytes_compl);
+
 	tnapi->tx_cons = sw_idx;
 
 	/* Need to make the tx_cons update visible to tg3_start_xmit()
@@ -6313,6 +6319,7 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	}
 
 	skb_tx_timestamp(skb);
+	netdev_sent_queue(tp->dev, 1, skb->len);
 
 	/* Packets are ready, update Tx producer idx local and on card. */
 	tw32_tx_mbox(tnapi->prodmbox, entry);
@@ -6680,6 +6687,7 @@ static void tg3_free_rings(struct tg3 *tp)
 
 			dev_kfree_skb_any(skb);
 		}
+		netdev_reset_queue(tp->dev);
 	}
 }
 
-- 
1.7.3.1


^ permalink raw reply related

* [RFC PATCH v2 6/9] forcedeth: Support for byte queue limits
From: Tom Herbert @ 2011-08-08  4:51 UTC (permalink / raw)
  To: davem, netdev

Changes to forcedeth to use byte queue limits.

Signed-off-by: Tom Herbert <therbert@google.com>
---
 drivers/net/forcedeth.c |   18 ++++++++++++++++++
 1 files changed, 18 insertions(+), 0 deletions(-)

diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c
index e55df30..fcd664a 100644
--- a/drivers/net/forcedeth.c
+++ b/drivers/net/forcedeth.c
@@ -1924,6 +1924,7 @@ static void nv_drain_tx(struct net_device *dev)
 		np->tx_skb[i].first_tx_desc = NULL;
 		np->tx_skb[i].next_tx_ctx = NULL;
 	}
+	netdev_reset_queue(np->dev);
 	np->tx_pkts_in_progress = 0;
 	np->tx_change_owner = NULL;
 	np->tx_end_flip = NULL;
@@ -2178,6 +2179,9 @@ static netdev_tx_t nv_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	/* set tx flags */
 	start_tx->flaglen |= cpu_to_le32(tx_flags | tx_flags_extra);
+
+	netdev_sent_queue(np->dev, 1, skb->len);
+
 	np->put_tx.orig = put_tx;
 
 	spin_unlock_irqrestore(&np->lock, flags);
@@ -2317,6 +2321,9 @@ static netdev_tx_t nv_start_xmit_optimized(struct sk_buff *skb,
 
 	/* set tx flags */
 	start_tx->flaglen |= cpu_to_le32(tx_flags | tx_flags_extra);
+
+	netdev_sent_queue(np->dev, 1, skb->len);
+
 	np->put_tx.ex = put_tx;
 
 	spin_unlock_irqrestore(&np->lock, flags);
@@ -2354,6 +2361,7 @@ static int nv_tx_done(struct net_device *dev, int limit)
 	u32 flags;
 	int tx_work = 0;
 	struct ring_desc *orig_get_tx = np->get_tx.orig;
+	unsigned int bytes_compl = 0;
 
 	while ((np->get_tx.orig != np->put_tx.orig) &&
 	       !((flags = le32_to_cpu(np->get_tx.orig->flaglen)) & NV_TX_VALID) &&
@@ -2375,6 +2383,7 @@ static int nv_tx_done(struct net_device *dev, int limit)
 					dev->stats.tx_packets++;
 					dev->stats.tx_bytes += np->get_tx_ctx->skb->len;
 				}
+				bytes_compl += np->get_tx_ctx->skb->len;
 				dev_kfree_skb_any(np->get_tx_ctx->skb);
 				np->get_tx_ctx->skb = NULL;
 				tx_work++;
@@ -2393,6 +2402,7 @@ static int nv_tx_done(struct net_device *dev, int limit)
 					dev->stats.tx_packets++;
 					dev->stats.tx_bytes += np->get_tx_ctx->skb->len;
 				}
+				bytes_compl += np->get_tx_ctx->skb->len;
 				dev_kfree_skb_any(np->get_tx_ctx->skb);
 				np->get_tx_ctx->skb = NULL;
 				tx_work++;
@@ -2403,6 +2413,9 @@ static int nv_tx_done(struct net_device *dev, int limit)
 		if (unlikely(np->get_tx_ctx++ == np->last_tx_ctx))
 			np->get_tx_ctx = np->first_tx_ctx;
 	}
+
+	netdev_completed_queue(np->dev, tx_work, bytes_compl);
+
 	if (unlikely((np->tx_stop == 1) && (np->get_tx.orig != orig_get_tx))) {
 		np->tx_stop = 0;
 		netif_wake_queue(dev);
@@ -2416,6 +2429,7 @@ static int nv_tx_done_optimized(struct net_device *dev, int limit)
 	u32 flags;
 	int tx_work = 0;
 	struct ring_desc_ex *orig_get_tx = np->get_tx.ex;
+	unsigned long bytes_cleaned = 0;
 
 	while ((np->get_tx.ex != np->put_tx.ex) &&
 	       !((flags = le32_to_cpu(np->get_tx.ex->flaglen)) & NV_TX2_VALID) &&
@@ -2435,6 +2449,7 @@ static int nv_tx_done_optimized(struct net_device *dev, int limit)
 				}
 			}
 
+			bytes_cleaned += np->get_tx_ctx->skb->len;
 			dev_kfree_skb_any(np->get_tx_ctx->skb);
 			np->get_tx_ctx->skb = NULL;
 			tx_work++;
@@ -2447,6 +2462,9 @@ static int nv_tx_done_optimized(struct net_device *dev, int limit)
 		if (unlikely(np->get_tx_ctx++ == np->last_tx_ctx))
 			np->get_tx_ctx = np->first_tx_ctx;
 	}
+
+	netdev_completed_queue(np->dev, tx_work, bytes_cleaned);
+
 	if (unlikely((np->tx_stop == 1) && (np->get_tx.ex != orig_get_tx))) {
 		np->tx_stop = 0;
 		netif_wake_queue(dev);
-- 
1.7.3.1


^ permalink raw reply related

* [RFC PATCH v2 5/9] e1000e: Support for byte queue limits
From: Tom Herbert @ 2011-08-08  4:49 UTC (permalink / raw)
  To: davem, netdev

Changes to e1000e to use byte queue limits.

Signed-off-by: Tom Herbert <therbert@google.com>
---
 drivers/net/e1000e/netdev.c |   10 ++++++++++
 1 files changed, 10 insertions(+), 0 deletions(-)

diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c
index 4353ad5..4ce114c 100644
--- a/drivers/net/e1000e/netdev.c
+++ b/drivers/net/e1000e/netdev.c
@@ -998,6 +998,7 @@ static bool e1000_clean_tx_irq(struct e1000_adapter *adapter)
 	unsigned int i, eop;
 	unsigned int count = 0;
 	unsigned int total_tx_bytes = 0, total_tx_packets = 0;
+	unsigned int bytes_compl = 0, pkts_compl = 0;
 
 	i = tx_ring->next_to_clean;
 	eop = tx_ring->buffer_info[i].next_to_watch;
@@ -1015,6 +1016,11 @@ static bool e1000_clean_tx_irq(struct e1000_adapter *adapter)
 			if (cleaned) {
 				total_tx_packets += buffer_info->segs;
 				total_tx_bytes += buffer_info->bytecount;
+				if (buffer_info->skb) {
+					bytes_compl += buffer_info->skb->len;
+					pkts_compl++;
+				}
+
 			}
 
 			e1000_put_txbuf(adapter, buffer_info);
@@ -1033,6 +1039,8 @@ static bool e1000_clean_tx_irq(struct e1000_adapter *adapter)
 
 	tx_ring->next_to_clean = i;
 
+	netdev_completed_queue(netdev, pkts_compl, bytes_compl);
+
 #define TX_WAKE_THRESHOLD 32
 	if (count && netif_carrier_ok(netdev) &&
 	    e1000_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD) {
@@ -2164,6 +2172,7 @@ static void e1000_clean_tx_ring(struct e1000_adapter *adapter)
 		e1000_put_txbuf(adapter, buffer_info);
 	}
 
+	netdev_reset_queue(adapter->netdev);
 	size = sizeof(struct e1000_buffer) * tx_ring->count;
 	memset(tx_ring->buffer_info, 0, size);
 
@@ -4882,6 +4891,7 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb,
 	/* if count is 0 then mapping error has occurred */
 	count = e1000_tx_map(adapter, skb, first, max_per_txd, nr_frags, mss);
 	if (count) {
+		netdev_sent_queue(netdev, 1, skb->len);
 		e1000_tx_queue(adapter, tx_flags, count);
 		/* Make sure there is space in the ring for the next send. */
 		e1000_maybe_stop_tx(netdev, MAX_SKB_FRAGS + 2);
-- 
1.7.3.1


^ permalink raw reply related

* [RFC PATCH v2 4/9] bql: Byte queue limits
From: Tom Herbert @ 2011-08-08  4:48 UTC (permalink / raw)
  To: davem, netdev

Networking stack support for byte queue limits, uses dynamic queue
limits library.  Byte queue limits are maintained per transmit queue,
and a bql structure has been added to netdev_queue structure for this
purpose.

Configuration of bql is in the tx-<n> sysfs directory for the queue
under the byte_queue_limits directory.  Configuration includes:
limit_min, bql minimum limit
limit_max, bql maximum limit
hold_time, bql slack hold time

Also under the directory are:
limit, current byte limit
inflight, current number of bytes on the queue

Signed-off-by: Tom Herbert <therbert@google.com>
---
 include/linux/netdevice.h |   16 +++
 net/core/dev.c            |    1 +
 net/core/net-sysfs.c      |  230 ++++++++++++++++++++++++++++++++++-----------
 3 files changed, 192 insertions(+), 55 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 74e8862..d49265b 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -43,6 +43,7 @@
 #include <linux/rculist.h>
 #include <linux/dmaengine.h>
 #include <linux/workqueue.h>
+#include <linux/dynamic_queue_limits.h>
 
 #include <linux/ethtool.h>
 #include <net/net_namespace.h>
@@ -536,6 +537,7 @@ struct netdev_queue {
 #if defined(CONFIG_RPS) || defined(CONFIG_XPS)
 	struct kobject		kobj;
 #endif
+	struct dql		dql;
 #if defined(CONFIG_XPS) && defined(CONFIG_NUMA)
 	int			numa_node;
 #endif
@@ -1913,29 +1915,43 @@ static inline int netif_xmit_frozen_or_stopped(const struct netdev_queue *dev_qu
 static inline void netdev_tx_sent_queue(struct netdev_queue *dev_queue,
 					unsigned int pkts, unsigned int bytes)
 {
+	dql_queued(&dev_queue->dql, bytes);
+	if (dql_avail(&dev_queue->dql) < 0)
+		set_bit(__QUEUE_STATE_STACK_XOFF, &dev_queue->state);
 }
 
 static inline void netdev_sent_queue(struct net_device *dev,
 				     unsigned int pkts, unsigned int bytes)
 {
+	netdev_tx_sent_queue(netdev_get_tx_queue(dev, 0), pkts, bytes);
 }
 
 static inline void netdev_tx_completed_queue(struct netdev_queue *dev_queue,
 					     unsigned pkts, unsigned bytes)
 {
+	if (bytes) {
+		dql_completed(&dev_queue->dql, bytes);
+		if (dql_avail(&dev_queue->dql) >= 0 &&
+		    test_and_clear_bit(__QUEUE_STATE_STACK_XOFF,
+		     &dev_queue->state))
+			netif_schedule_queue(dev_queue);
+	}
 }
 
 static inline void netdev_completed_queue(struct net_device *dev,
 					  unsigned pkts, unsigned bytes)
 {
+	netdev_tx_completed_queue(netdev_get_tx_queue(dev, 0), pkts, bytes);
 }
 
 static inline void netdev_tx_reset_queue(struct netdev_queue *q)
 {
+	dql_reset(&q->dql);
 }
 
 static inline void netdev_reset_queue(struct net_device *dev_queue)
 {
+	netdev_tx_reset_queue(netdev_get_tx_queue(dev_queue, 0));
 }
 
 /**
diff --git a/net/core/dev.c b/net/core/dev.c
index a7f8c38..bd5cd15 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5395,6 +5395,7 @@ static void netdev_init_one_queue(struct net_device *dev,
 	queue->xmit_lock_owner = -1;
 	netdev_queue_numa_node_write(queue, NUMA_NO_NODE);
 	queue->dev = dev;
+	dql_init(&queue->dql, 1000);
 }
 
 static int netif_alloc_netdev_queues(struct net_device *dev)
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 1683e5d..eca8684 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -20,6 +20,7 @@
 #include <linux/rtnetlink.h>
 #include <linux/wireless.h>
 #include <linux/vmalloc.h>
+#include <linux/jiffies.h>
 #include <net/wext.h>
 
 #include "net-sysfs.h"
@@ -779,7 +780,6 @@ net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
 #endif
 }
 
-#ifdef CONFIG_XPS
 /*
  * netdev_queue sysfs structures and functions.
  */
@@ -839,7 +839,121 @@ static inline unsigned int get_netdev_queue_index(struct netdev_queue *queue)
 	return i;
 }
 
+static ssize_t bql_show(char *buf, unsigned long value)
+{
+	int p = 0;
+
+	p = sprintf(buf, "%lu\n", value);
+	return p;
+}
+
+static ssize_t bql_set(const char *buf, const size_t count,
+		       unsigned long *pvalue)
+{
+	unsigned long value;
+	int err;
+
+	if (!strcmp(buf, "max") || !strcmp(buf, "max\n"))
+		value = DQL_MAX_LIMIT;
+	else {
+		err = kstrtoul(buf, 10, &value);
+		if (err < 0)
+			return err;
+		if (value > DQL_MAX_LIMIT)
+			return -EINVAL;
+	}
+
+	*pvalue = value;
+
+	return count;
+}
+
+static ssize_t bql_show_hold_time(struct netdev_queue *queue,
+				  struct netdev_queue_attribute *attr,
+				  char *buf)
+{
+	struct dql *dql = &queue->dql;
+	int p = 0;
+
+	p = sprintf(buf, "%u\n", jiffies_to_msecs(dql->slack_hold_time));
+
+	return p;
+}
+
+static ssize_t bql_set_hold_time(struct netdev_queue *queue,
+				 struct netdev_queue_attribute *attribute,
+				 const char *buf, size_t len)
+{
+	struct dql *dql = &queue->dql;
+	unsigned value;
+	int err;
+
+	err = kstrtouint(buf, 10, &value);
+	if (err < 0)
+		return err;
+
+	dql->slack_hold_time = msecs_to_jiffies(value);
+
+	return len;
+}
+
+static struct netdev_queue_attribute bql_hold_time_attribute =
+	__ATTR(hold_time, S_IRUGO | S_IWUSR, bql_show_hold_time,
+	    bql_set_hold_time);
+
+static ssize_t bql_show_inflight(struct netdev_queue *queue,
+				 struct netdev_queue_attribute *attr,
+				 char *buf)
+{
+	struct dql *dql = &queue->dql;
+	int p = 0;
+
+	p = sprintf(buf, "%lu\n", dql->num_queued - dql->num_completed);
+
+	return p;
+}
+
+static struct netdev_queue_attribute bql_inflight_attribute =
+	__ATTR(inflight, S_IRUGO | S_IWUSR, bql_show_inflight, NULL);
+
+#define BQL_ATTR(NAME, FIELD)						\
+static ssize_t bql_show_ ## NAME(struct netdev_queue *queue,		\
+				 struct netdev_queue_attribute *attr,	\
+				 char *buf)				\
+{									\
+	return bql_show(buf, queue->dql.FIELD);				\
+}									\
+									\
+static ssize_t bql_set_ ## NAME(struct netdev_queue *queue,		\
+				struct netdev_queue_attribute *attr,	\
+				const char *buf, size_t len)		\
+{									\
+	return bql_set(buf, len, &queue->dql.FIELD);			\
+}									\
+									\
+static struct netdev_queue_attribute bql_ ## NAME ## _attribute =	\
+	__ATTR(NAME, S_IRUGO | S_IWUSR, bql_show_ ## NAME,		\
+	    bql_set_ ## NAME);
+
+BQL_ATTR(limit, limit)
+BQL_ATTR(limit_max, max_limit)
+BQL_ATTR(limit_min, min_limit)
+
+static struct attribute *dql_attrs[] = {
+	&bql_limit_attribute.attr,
+	&bql_limit_max_attribute.attr,
+	&bql_limit_min_attribute.attr,
+	&bql_hold_time_attribute.attr,
+	&bql_inflight_attribute.attr,
+	NULL
+};
+
+static struct attribute_group dql_group = {
+	.name  = "byte_queue_limits",
+	.attrs  = dql_attrs,
+};
 
+#ifdef CONFIG_XPS
 static ssize_t show_xps_map(struct netdev_queue *queue,
 			    struct netdev_queue_attribute *attribute, char *buf)
 {
@@ -889,6 +1003,51 @@ static DEFINE_MUTEX(xps_map_mutex);
 #define xmap_dereference(P)		\
 	rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))
 
+static void xps_queue_release(struct netdev_queue *queue)
+{
+	struct net_device *dev = queue->dev;
+	struct xps_dev_maps *dev_maps;
+	struct xps_map *map;
+	unsigned long index;
+	int i, pos, nonempty = 0;
+
+	index = get_netdev_queue_index(queue);
+
+	mutex_lock(&xps_map_mutex);
+	dev_maps = xmap_dereference(dev->xps_maps);
+
+	if (dev_maps) {
+		for_each_possible_cpu(i) {
+			map = xmap_dereference(dev_maps->cpu_map[i]);
+			if (!map)
+				continue;
+
+			for (pos = 0; pos < map->len; pos++)
+				if (map->queues[pos] == index)
+					break;
+
+			if (pos < map->len) {
+				if (map->len > 1)
+					map->queues[pos] =
+					    map->queues[--map->len];
+				else {
+					RCU_INIT_POINTER(dev_maps->cpu_map[i],
+					    NULL);
+					kfree_rcu(map, rcu);
+					map = NULL;
+				}
+			}
+			if (map)
+				nonempty = 1;
+		}
+
+		if (!nonempty) {
+			RCU_INIT_POINTER(dev->xps_maps, NULL);
+			kfree_rcu(dev_maps, rcu);
+		}
+	}
+}
+
 static ssize_t store_xps_map(struct netdev_queue *queue,
 		      struct netdev_queue_attribute *attribute,
 		      const char *buf, size_t len)
@@ -1024,53 +1183,13 @@ static struct attribute *netdev_queue_default_attrs[] = {
 	&xps_cpus_attribute.attr,
 	NULL
 };
+#endif
 
 static void netdev_queue_release(struct kobject *kobj)
 {
 	struct netdev_queue *queue = to_netdev_queue(kobj);
-	struct net_device *dev = queue->dev;
-	struct xps_dev_maps *dev_maps;
-	struct xps_map *map;
-	unsigned long index;
-	int i, pos, nonempty = 0;
-
-	index = get_netdev_queue_index(queue);
-
-	mutex_lock(&xps_map_mutex);
-	dev_maps = xmap_dereference(dev->xps_maps);
-
-	if (dev_maps) {
-		for_each_possible_cpu(i) {
-			map = xmap_dereference(dev_maps->cpu_map[i]);
-			if (!map)
-				continue;
 
-			for (pos = 0; pos < map->len; pos++)
-				if (map->queues[pos] == index)
-					break;
-
-			if (pos < map->len) {
-				if (map->len > 1)
-					map->queues[pos] =
-					    map->queues[--map->len];
-				else {
-					RCU_INIT_POINTER(dev_maps->cpu_map[i],
-					    NULL);
-					kfree_rcu(map, rcu);
-					map = NULL;
-				}
-			}
-			if (map)
-				nonempty = 1;
-		}
-
-		if (!nonempty) {
-			RCU_INIT_POINTER(dev->xps_maps, NULL);
-			kfree_rcu(dev_maps, rcu);
-		}
-	}
-
-	mutex_unlock(&xps_map_mutex);
+	xps_queue_release(queue);
 
 	memset(kobj, 0, sizeof(*kobj));
 	dev_put(queue->dev);
@@ -1091,22 +1210,26 @@ static int netdev_queue_add_kobject(struct net_device *net, int index)
 	kobj->kset = net->queues_kset;
 	error = kobject_init_and_add(kobj, &netdev_queue_ktype, NULL,
 	    "tx-%u", index);
+	if (error)
+		goto exit;
+
+	error = sysfs_create_group(kobj, &dql_group);
 	if (error) {
 		kobject_put(kobj);
-		return error;
+		goto exit;
 	}
 
 	kobject_uevent(kobj, KOBJ_ADD);
 	dev_hold(queue->dev);
 
+	return 0;
+exit:
 	return error;
 }
-#endif /* CONFIG_XPS */
 
 int
 netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
 {
-#ifdef CONFIG_XPS
 	int i;
 	int error = 0;
 
@@ -1118,25 +1241,24 @@ netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
 		}
 	}
 
-	while (--i >= new_num)
-		kobject_put(&net->_tx[i].kobj);
+	while (--i >= new_num) {
+		struct netdev_queue *queue = net->_tx + i;
+
+		sysfs_remove_group(&queue->kobj, &dql_group);
+		kobject_put(&queue->kobj);
+	}
 
 	return error;
-#else
-	return 0;
-#endif
 }
 
 static int register_queue_kobjects(struct net_device *net)
 {
 	int error = 0, txq = 0, rxq = 0, real_rx = 0, real_tx = 0;
 
-#if defined(CONFIG_RPS) || defined(CONFIG_XPS)
 	net->queues_kset = kset_create_and_add("queues",
 	    NULL, &net->dev.kobj);
 	if (!net->queues_kset)
 		return -ENOMEM;
-#endif
 
 #ifdef CONFIG_RPS
 	real_rx = net->real_num_rx_queues;
@@ -1172,9 +1294,7 @@ static void remove_queue_kobjects(struct net_device *net)
 
 	net_rx_queue_update_kobjects(net, real_rx, 0);
 	netdev_queue_update_kobjects(net, real_tx, 0);
-#if defined(CONFIG_RPS) || defined(CONFIG_XPS)
 	kset_unregister(net->queues_kset);
-#endif
 }
 
 static void *net_grab_current_ns(void)
-- 
1.7.3.1


^ permalink raw reply related

* [RFC PATCH v2 3/9] net: Add netdev interfaces for recording sends and completions
From: Tom Herbert @ 2011-08-08  4:48 UTC (permalink / raw)
  To: davem, netdev

Add interfaces for driver to call for recording number of packets and
bytes at send time an transmit completion.  Also, a function to "reset"
a queue.  These will be used by Byte Queue Limits.

Signed-off-by: Tom Herbert <therbert@google.com>
---
 include/linux/netdevice.h |   26 ++++++++++++++++++++++++++
 1 files changed, 26 insertions(+), 0 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 4115b4d..74e8862 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1910,6 +1910,32 @@ static inline int netif_xmit_frozen_or_stopped(const struct netdev_queue *dev_qu
 	return dev_queue->state & QUEUE_STATE_ANY_XOFF_OR_FROZEN;
 }
 
+static inline void netdev_tx_sent_queue(struct netdev_queue *dev_queue,
+					unsigned int pkts, unsigned int bytes)
+{
+}
+
+static inline void netdev_sent_queue(struct net_device *dev,
+				     unsigned int pkts, unsigned int bytes)
+{
+}
+
+static inline void netdev_tx_completed_queue(struct netdev_queue *dev_queue,
+					     unsigned pkts, unsigned bytes)
+{
+}
+
+static inline void netdev_completed_queue(struct net_device *dev,
+					  unsigned pkts, unsigned bytes)
+{
+}
+
+static inline void netdev_tx_reset_queue(struct netdev_queue *q)
+{
+}
+
+static inline void netdev_reset_queue(struct net_device *dev_queue)
+{
 }
 
 /**
-- 
1.7.3.1


^ permalink raw reply related

* [RFC PATCH v2 2/9] net: Add queue state xoff flag for stack
From: Tom Herbert @ 2011-08-08  4:44 UTC (permalink / raw)
  To: davem, netdev

>From c3a8c0ace2322f9ccf78089936a504af9c9e0c7f Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Thu, 14 Jul 2011 22:08:27 -0700
Subject: [PATCH 2/9] net: Add queue state xoff flag for stack

Create separate queue state flags so that either the stack or drivers
can turn on XOFF.  Added a set of functions usedin the stack to determine
if a queue is really stopped (either by stack of driver)

Signed-off-by: Tom Herbert <therbert@google.com>
---
 include/linux/netdevice.h |   32 +++++++++++++++++++++-----------
 net/core/dev.c            |    4 ++--
 net/core/netpoll.c        |    4 ++--
 net/core/pktgen.c         |    2 +-
 net/sched/sch_generic.c   |    8 ++++----
 net/sched/sch_multiq.c    |    6 ++++--
 net/sched/sch_teql.c      |    6 +++---
 7 files changed, 37 insertions(+), 25 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ddee79b..4115b4d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -516,10 +516,13 @@ static inline void napi_synchronize(const struct napi_struct *n)
 #endif
 
 enum netdev_queue_state_t {
-	__QUEUE_STATE_XOFF,
+	__QUEUE_STATE_DRV_XOFF,
+	__QUEUE_STATE_STACK_XOFF,
 	__QUEUE_STATE_FROZEN,
-#define QUEUE_STATE_XOFF_OR_FROZEN ((1 << __QUEUE_STATE_XOFF)		| \
-				    (1 << __QUEUE_STATE_FROZEN))
+#define QUEUE_STATE_ANY_XOFF ((1 << __QUEUE_STATE_DRV_XOFF)		| \
+			      (1 << __QUEUE_STATE_STACK_XOFF))
+#define QUEUE_STATE_ANY_XOFF_OR_FROZEN (QUEUE_STATE_ANY_XOFF		| \
+					(1 << __QUEUE_STATE_FROZEN))
 };
 
 struct netdev_queue {
@@ -1778,7 +1781,7 @@ extern void __netif_schedule(struct Qdisc *q);
 
 static inline void netif_schedule_queue(struct netdev_queue *txq)
 {
-	if (!test_bit(__QUEUE_STATE_XOFF, &txq->state))
+	if (!(txq->state & QUEUE_STATE_ANY_XOFF))
 		__netif_schedule(txq->qdisc);
 }
 
@@ -1792,7 +1795,7 @@ static inline void netif_tx_schedule_all(struct net_device *dev)
 
 static inline void netif_tx_start_queue(struct netdev_queue *dev_queue)
 {
-	clear_bit(__QUEUE_STATE_XOFF, &dev_queue->state);
+	clear_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state);
 }
 
 /**
@@ -1824,7 +1827,7 @@ static inline void netif_tx_wake_queue(struct netdev_queue *dev_queue)
 		return;
 	}
 #endif
-	if (test_and_clear_bit(__QUEUE_STATE_XOFF, &dev_queue->state))
+	if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state))
 		__netif_schedule(dev_queue->qdisc);
 }
 
@@ -1856,7 +1859,7 @@ static inline void netif_tx_stop_queue(struct netdev_queue *dev_queue)
 		pr_info("netif_stop_queue() cannot be called before register_netdev()\n");
 		return;
 	}
-	set_bit(__QUEUE_STATE_XOFF, &dev_queue->state);
+	set_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state);
 }
 
 /**
@@ -1883,7 +1886,7 @@ static inline void netif_tx_stop_all_queues(struct net_device *dev)
 
 static inline int netif_tx_queue_stopped(const struct netdev_queue *dev_queue)
 {
-	return test_bit(__QUEUE_STATE_XOFF, &dev_queue->state);
+	return test_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state);
 }
 
 /**
@@ -1897,9 +1900,16 @@ static inline int netif_queue_stopped(const struct net_device *dev)
 	return netif_tx_queue_stopped(netdev_get_tx_queue(dev, 0));
 }
 
-static inline int netif_tx_queue_frozen_or_stopped(const struct netdev_queue *dev_queue)
+static inline int netif_xmit_stopped(const struct netdev_queue *dev_queue)
 {
-	return dev_queue->state & QUEUE_STATE_XOFF_OR_FROZEN;
+	return dev_queue->state & QUEUE_STATE_ANY_XOFF;
+}
+
+static inline int netif_xmit_frozen_or_stopped(const struct netdev_queue *dev_queue)
+{
+	return dev_queue->state & QUEUE_STATE_ANY_XOFF_OR_FROZEN;
+}
+
 }
 
 /**
@@ -1986,7 +1996,7 @@ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
 	if (netpoll_trap())
 		return;
 #endif
-	if (test_and_clear_bit(__QUEUE_STATE_XOFF, &txq->state))
+	if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &txq->state))
 		__netif_schedule(txq->qdisc);
 }
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 17d67b5..a7f8c38 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2190,7 +2190,7 @@ gso:
 			return rc;
 		}
 		txq_trans_update(txq);
-		if (unlikely(netif_tx_queue_stopped(txq) && skb->next))
+		if (unlikely(netif_xmit_stopped(txq) && skb->next))
 			return NETDEV_TX_BUSY;
 	} while (skb->next);
 
@@ -2464,7 +2464,7 @@ int dev_queue_xmit(struct sk_buff *skb)
 
 			HARD_TX_LOCK(dev, txq, cpu);
 
-			if (!netif_tx_queue_stopped(txq)) {
+			if (!netif_xmit_stopped(txq)) {
 				__this_cpu_inc(xmit_recursion);
 				rc = dev_hard_start_xmit(skb, dev, txq);
 				__this_cpu_dec(xmit_recursion);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index adf84dd..9c71328 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -75,7 +75,7 @@ static void queue_process(struct work_struct *work)
 
 		local_irq_save(flags);
 		__netif_tx_lock(txq, smp_processor_id());
-		if (netif_tx_queue_frozen_or_stopped(txq) ||
+		if (netif_xmit_frozen_or_stopped(txq) ||
 		    ops->ndo_start_xmit(skb, dev) != NETDEV_TX_OK) {
 			skb_queue_head(&npinfo->txq, skb);
 			__netif_tx_unlock(txq);
@@ -316,7 +316,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
 		for (tries = jiffies_to_usecs(1)/USEC_PER_POLL;
 		     tries > 0; --tries) {
 			if (__netif_tx_trylock(txq)) {
-				if (!netif_tx_queue_stopped(txq)) {
+				if (!netif_xmit_stopped(txq)) {
 					status = ops->ndo_start_xmit(skb, dev);
 					if (status == NETDEV_TX_OK)
 						txq_trans_update(txq);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index e35a6fb..5c481c5 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3342,7 +3342,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
 
 	__netif_tx_lock_bh(txq);
 
-	if (unlikely(netif_tx_queue_frozen_or_stopped(txq))) {
+	if (unlikely(netif_xmit_frozen_or_stopped(txq))) {
 		ret = NETDEV_TX_BUSY;
 		pkt_dev->last_ok = 0;
 		goto unlock;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 69fca27..7c84f08 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -60,7 +60,7 @@ static inline struct sk_buff *dequeue_skb(struct Qdisc *q)
 
 		/* check the reason of requeuing without tx lock first */
 		txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
-		if (!netif_tx_queue_frozen_or_stopped(txq)) {
+		if (!netif_xmit_frozen_or_stopped(txq)) {
 			q->gso_skb = NULL;
 			q->q.qlen--;
 		} else
@@ -121,7 +121,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
 	spin_unlock(root_lock);
 
 	HARD_TX_LOCK(dev, txq, smp_processor_id());
-	if (!netif_tx_queue_frozen_or_stopped(txq))
+	if (!netif_xmit_frozen_or_stopped(txq))
 		ret = dev_hard_start_xmit(skb, dev, txq);
 
 	HARD_TX_UNLOCK(dev, txq);
@@ -143,7 +143,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
 		ret = dev_requeue_skb(skb, q);
 	}
 
-	if (ret && netif_tx_queue_frozen_or_stopped(txq))
+	if (ret && netif_xmit_frozen_or_stopped(txq))
 		ret = 0;
 
 	return ret;
@@ -242,7 +242,7 @@ static void dev_watchdog(unsigned long arg)
 				 * old device drivers set dev->trans_start
 				 */
 				trans_start = txq->trans_start ? : dev->trans_start;
-				if (netif_tx_queue_stopped(txq) &&
+				if (netif_xmit_stopped(txq) &&
 				    time_after(jiffies, (trans_start +
 							 dev->watchdog_timeo))) {
 					some_queue_timedout = 1;
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index edc1950..49131d7 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -107,7 +107,8 @@ static struct sk_buff *multiq_dequeue(struct Qdisc *sch)
 		/* Check that target subqueue is available before
 		 * pulling an skb to avoid head-of-line blocking.
 		 */
-		if (!__netif_subqueue_stopped(qdisc_dev(sch), q->curband)) {
+		if (!netif_xmit_stopped(
+		    netdev_get_tx_queue(qdisc_dev(sch), q->curband))) {
 			qdisc = q->queues[q->curband];
 			skb = qdisc->dequeue(qdisc);
 			if (skb) {
@@ -138,7 +139,8 @@ static struct sk_buff *multiq_peek(struct Qdisc *sch)
 		/* Check that target subqueue is available before
 		 * pulling an skb to avoid head-of-line blocking.
 		 */
-		if (!__netif_subqueue_stopped(qdisc_dev(sch), curband)) {
+		if (!netif_xmit_stopped(
+		    netdev_get_tx_queue(qdisc_dev(sch), curband))) {
 			qdisc = q->queues[curband];
 			skb = qdisc->ops->peek(qdisc);
 			if (skb)
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index a3b7120..283bfe3 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -301,7 +301,7 @@ restart:
 
 		if (slave_txq->qdisc_sleeping != q)
 			continue;
-		if (__netif_subqueue_stopped(slave, subq) ||
+		if (netif_xmit_stopped(netdev_get_tx_queue(slave, subq)) ||
 		    !netif_running(slave)) {
 			busy = 1;
 			continue;
@@ -312,7 +312,7 @@ restart:
 			if (__netif_tx_trylock(slave_txq)) {
 				unsigned int length = qdisc_pkt_len(skb);
 
-				if (!netif_tx_queue_frozen_or_stopped(slave_txq) &&
+				if (!netif_xmit_frozen_or_stopped(slave_txq) &&
 				    slave_ops->ndo_start_xmit(skb, slave) == NETDEV_TX_OK) {
 					txq_trans_update(slave_txq);
 					__netif_tx_unlock(slave_txq);
@@ -324,7 +324,7 @@ restart:
 				}
 				__netif_tx_unlock(slave_txq);
 			}
-			if (netif_queue_stopped(dev))
+			if (netif_xmit_stopped(netdev_get_tx_queue(dev, 0)))
 				busy = 1;
 			break;
 		case 1:
-- 
1.7.3.1


^ permalink raw reply related

* [RFC PATCH v2 1/9] dql: Dynamic queue limits
From: Tom Herbert @ 2011-08-08  4:43 UTC (permalink / raw)
  To: davem, netdev

Implementation of dynamic queue limits (dql).  This is a libary which
allows a queue limit to be dynamically managed.  The goal of dql is
to set the queue limit, number of ojects to the queue, to be minimized
without allowing the queue to be starved.

dql would be used with a queue whose use has these properties:

1) Objects are queued up to some limit which can be expressed as a
   count of objects.
2) Periodically a completion process executes which retires consumed
   objects.
3) Starvation occurs when limit has been reached, all queued data has
   actually been consumed but completion processing has not yet run,
   so queuing new data is blocked.
4) Minimizing the amount of queued data is desirable.

A canonical example of such a queue would be a NIC HW transmit queue.

The queue limit is dynamic, it will increase or decrease over time
depending on the workload.  The queue limit is recalculated each time
completion processing is done.  Increases occur when the queue is
starved and can exponentially increase over successive intervals.
Decreases occur when more data is being maintained in the queue than
needed to prevent starvation.  The number of extra objects, or "slack",
is measured over successive intervals, and to avoid hysteresis the
limit is only reduced by the miminum slack seen over a configurable
time period.

dql API provides routines to manage the queue:
- dql_init is called to intialize the dql structure
- dql_reset is called to reset dynamic structures
- dql_queued when objects are being enqueued
- dql_avail returns availability in the queue
- dql_completed is called when objects have be consumed in the queue

Configuration consists of:
- max_limit, maximum limit
- min_limt, minimum limit
- slack_hold_time, time to measure instances of slack before reducing
  queue limit.

Signed-off-by: Tom Herbert <therbert@google.com>
---
 include/linux/dynamic_queue_limits.h |   80 ++++++++++++++++++++
 lib/Makefile                         |    2 +-
 lib/dynamic_queue_limits.c           |  132 ++++++++++++++++++++++++++++++++++
 3 files changed, 213 insertions(+), 1 deletions(-)
 create mode 100644 include/linux/dynamic_queue_limits.h
 create mode 100644 lib/dynamic_queue_limits.c

diff --git a/include/linux/dynamic_queue_limits.h b/include/linux/dynamic_queue_limits.h
new file mode 100644
index 0000000..3ffc591
--- /dev/null
+++ b/include/linux/dynamic_queue_limits.h
@@ -0,0 +1,80 @@
+/*
+ * Dynamic queue limits (dql) - Definitions
+ *
+ * Author: Tom Herbert (therbert@google.com)
+ *
+ * This header file contains the definitions for dynamic queue limits (dql).
+ * dql would be used in conjunction with a producer/consumer type queue
+ * (possibly a HW queue).  Such a queue would have these general properties:
+ *
+ *   1) Objects are queued up to some limit.
+ *   2) Periodically a completion process executes which retires consumed
+ *      objects.
+ *   3) Starvation occurs when limit has been reached, all queued data has
+ *      actually been consumed but completion processing has not yet run
+ *      so queuing new data is blocked.
+ *   4) Minimizing the amount of queued data is desirable.
+ *
+ * The goal of dql is to calculate the limit as the minimum number of objects
+ * needed to prevent starvation.
+ *
+ * The dql implemenation does not implement any locking for the dql data
+ * structures, the higher layer should provide this.
+ */
+
+#ifndef _LINUX_DQL_H
+#define _LINUX_DQL_H
+
+#ifdef __KERNEL__
+
+struct dql {
+	unsigned long	limit;			/* Current limit */
+	unsigned long	prev_ovlimit;		/* Previous over limit */
+
+	unsigned long	num_queued;		/* Total ever queued */
+	unsigned long	prev_num_queued;	/* Previous queue total */
+	unsigned long	num_completed;		/* Total ever completed */
+
+	unsigned long	last_obj_cnt;		/* Count at last queuing */
+	unsigned long	prev_last_obj_cnt;	/* Previous queuing cnt */
+
+	unsigned long	lowest_slack;		/* Lowest slack found */
+	unsigned long	slack_start_time;	/* Time slacks seen */
+
+	unsigned long	max_limit;		/* Maximum limit */
+	unsigned long	min_limit;		/* Minimum limit */
+	unsigned	slack_hold_time;	/* Time to measure slack */
+};
+
+/* Set some static maximums */
+#define	DQL_MAX_OBJECT (-1UL / 16)
+#define	DQL_MAX_LIMIT ((-1UL / 2) - DQL_MAX_OBJECT)
+
+/* Record number of objects queued. */
+static inline void dql_queued(struct dql *dql, unsigned long count)
+{
+	BUG_ON(count > DQL_MAX_OBJECT);
+	BUG_ON(dql->num_queued - dql->num_completed > DQL_MAX_LIMIT);
+
+	dql->num_queued += count;
+	dql->last_obj_cnt = count;
+}
+
+/* Returns how many objects can be queued, < 0 indicates over limit.  */
+static inline long dql_avail(struct dql *dql)
+{
+	return dql->limit - (dql->num_queued - dql->num_completed);
+}
+
+/* Record number of completed objects and recalculate the limit. */
+extern void dql_completed(struct dql *dql, unsigned long count);
+
+/* Reset dql state */
+extern void dql_reset(struct dql *dql);
+
+/* Initialize dql state */
+extern int dql_init(struct dql *dql, unsigned hold_time);
+
+#endif /* _KERNEL_ */
+
+#endif /* _LINUX_DQL_H */
diff --git a/lib/Makefile b/lib/Makefile
index 892f4e2..c008661 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -22,7 +22,7 @@ lib-y	+= kobject.o kref.o klist.o
 obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
 	 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
 	 string_helpers.o gcd.o lcm.o list_sort.o uuid.o flex_array.o \
-	 bsearch.o find_last_bit.o
+	 bsearch.o find_last_bit.o dynamic_queue_limits.o
 obj-y += kstrtox.o
 obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o
 
diff --git a/lib/dynamic_queue_limits.c b/lib/dynamic_queue_limits.c
new file mode 100644
index 0000000..6a1f5b9
--- /dev/null
+++ b/lib/dynamic_queue_limits.c
@@ -0,0 +1,132 @@
+/*
+ * Dynamic byte queue limits.  See include/linux/dynamic_queue_limits.h
+ *
+ * Author: Tom Herbert (therbert@google.com)
+ */
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/ctype.h>
+#include <linux/kernel.h>
+#include <linux/dynamic_queue_limits.h>
+
+#define POSDIFF(A, B) ((A) > (B) ? (A) - (B) : 0)
+
+/* Records completed count and recalculates the queue limit */
+void dql_completed(struct dql *dql, unsigned long count)
+{
+	unsigned long inprogress, prev_inprogress, limit;
+	unsigned long ovlimit, all_prev_completed, completed;
+
+	/* Can't complete more than what's in queue */
+	BUG_ON(count > dql->num_queued - dql->num_completed);
+
+	completed = dql->num_completed + count;
+	limit = dql->limit;
+	ovlimit = POSDIFF(dql->num_queued - dql->num_completed, limit);
+	inprogress = dql->num_queued - completed;
+	prev_inprogress = dql->prev_num_queued - dql->num_completed;
+	all_prev_completed = POSDIFF(completed, dql->prev_num_queued);
+
+	if ((ovlimit && !inprogress) ||
+	    (dql->prev_ovlimit && all_prev_completed)) {
+		/*
+		 * Queue considered starved if:
+		 *   - The queue was over-limit in the last interval,
+		 *     and there is no more data in the queue.
+		 *  OR
+		 *   - The queue was over-limit in the previous interval and
+		 *     when enqueuing it was possible that all queued data
+		 *     had been consumed.  This covers the case when queue
+		 *     may have becomes starved between completion processing
+		 *     running and next time enqueue was scheduled.
+		 *
+		 *     When queue is starved increase the limit by the amount
+		 *     of bytes both sent and completed in the last interval,
+		 *     plus any previous over-limit.
+		 */
+		limit += POSDIFF(completed, dql->prev_num_queued) +
+		     dql->prev_ovlimit;
+		dql->slack_start_time = jiffies;
+		dql->lowest_slack = -1UL;
+	} else if (inprogress && prev_inprogress && !all_prev_completed) {
+		/*
+		 * Queue was not starved, check if the limit can be decreased.
+		 * A decrease is only considered if the queue has been busy in
+		 * the whole interval (the check above).
+		 *
+		 * If there is slack, the amount execess data queued above the
+		 * the amount needed to prevent starvation, the queue limit can
+		 * be decreased.  To avoid hysteresis we consider the
+		 * minimum amount of slack found over several iterations of the
+		 * completion routine.
+		 */
+		unsigned long slack, slack_last_objs;
+
+		/*
+		 * Slack is the maximum of
+		 *   - The queue limit plus previous over-limit minus twice
+		 *     the number of objects completed.  Note that two times
+		 *     number of completed bytes is basis for upper bound
+		 *     of the limit.
+		 *   - Portion of objects in the last queuing operation that
+		 *     was not part of non-zero previous over-limit.  That is
+		 *     "round down" by non-overlimit portion of the last
+		 *     queueing operation.
+		 */
+		slack = POSDIFF(limit + dql->prev_ovlimit,
+		    2 * (completed - dql->num_completed));
+		slack_last_objs = dql->prev_ovlimit ?
+		    POSDIFF(dql->prev_last_obj_cnt, dql->prev_ovlimit) : 0;
+
+		slack = max(slack, slack_last_objs);
+
+		if (slack < dql->lowest_slack)
+			dql->lowest_slack = slack;
+
+		if (time_after(jiffies,
+			       dql->slack_start_time + dql->slack_hold_time)) {
+			limit = POSDIFF(limit, dql->lowest_slack);
+			dql->slack_start_time = jiffies;
+			dql->lowest_slack = -1UL;
+		}
+	}
+
+	/* Enforce bounds on limit */
+	limit = clamp(limit, dql->min_limit, dql->max_limit);
+
+	if (limit != dql->limit) {
+		dql->limit = limit;
+		ovlimit = 0;
+	}
+
+	dql->prev_ovlimit = ovlimit;
+	dql->prev_last_obj_cnt = dql->last_obj_cnt;
+	dql->num_completed = completed;
+	dql->prev_num_queued = dql->num_queued;
+}
+EXPORT_SYMBOL(dql_completed);
+
+void dql_reset(struct dql *dql)
+{
+	/* Reset all dynamic values */
+	dql->limit = 0;
+	dql->num_queued = 0;
+	dql->num_completed = 0;
+	dql->last_obj_cnt = 0;
+	dql->prev_num_queued = 0;
+	dql->prev_last_obj_cnt = 0;
+	dql->prev_ovlimit = 0;
+	dql->lowest_slack = -1UL;
+	dql->slack_start_time = jiffies;
+}
+EXPORT_SYMBOL(dql_reset);
+
+int dql_init(struct dql *dql, unsigned hold_time)
+{
+	dql->max_limit = DQL_MAX_LIMIT;
+	dql->min_limit = 0;
+	dql->slack_hold_time = hold_time;
+	dql_reset(dql);
+	return 0;
+}
+EXPORT_SYMBOL(dql_init);
-- 
1.7.3.1


^ permalink raw reply related

* [RFC PATCH v2 0/9] bql: Byte Queue Limits
From: Tom Herbert @ 2011-08-08  4:43 UTC (permalink / raw)
  To: davem, netdev

Changes from last version:
- Simplified and generalized driver interface.  Drivers need to
  implement two functions:
    netdev_tx_completed_queue: Called at end of transmit completion
      to inform stack of number of bytes and packets processed.
    netdev_tx_sent_queue: Called to inform stack when packets are
      queued.

    netdev_tx_reset_queue: is optional to reset state in the stack

- Added new per queue flags that allow stack to stop a queue
  separately from driver doing this.  Driver continue using the
  same functions to stop queues, but there are two functions that
  the stack calls (to check if queue has been stopped by driver or
  stack:

  netif_xmit_stopped,netif_xmit_frozen_or_stopped

- Added example support for bnx2x and sfc (demonstrates operation over
  multi-queue)

- Removed BQL being under CONFIG_RPS (didn't add CONFIG_BQL)

- Still needs some more testing, including ishowing benfits to high
  priority packets in QoS.
----

This patch series implements byte queue limits (bql) for NIC TX queues.

Byte queue limits are a mechanism to limit the size of the transmit
hardware queue on a NIC by number of bytes. The goal of these byte
limits is too reduce latency caused by excessive queuing in hardware
without sacrificing throughput.

Hardware queuing limits are typically specified in terms of a number
hardware descriptors, each of which has a variable size. The variability
of the size of individual queued items can have a very wide range. For
instance with the e1000 NIC the size could range from 64 bytes to 4K
(with TSO enabled). This variability makes it next to impossible to
choose a single queue limit that prevents starvation and provides lowest
possible latency.

The objective of byte queue limits is to set the limit to be the
minimum needed to prevent starvation between successive transmissions to
the hardware. The latency between two transmissions can be variable in a
system. It is dependent on interrupt frequency, NAPI polling latencies,
scheduling of the queuing discipline, lock contention, etc. Therefore we
propose that byte queue limits should be dynamic and change in
iaccordance with networking stack latencies a system encounters.

Patches to implement this:
Patch 1: Dynamic queue limits (dql) library.  This provides the general
queuing algorithm.
Patch 2: netdev changes that use dlq to support byte queue limits.
Patch 3: Support in forcedeth drvier for byte queue limits.

The effects of BQL are demonstrated in the benchmark results below.
These were made running 200 stream of netperf RR tests:

140000 rr size
BQL: 80-215K bytes in queue, 856 tps, 3.26%
No BQL: 2700-2930K bytes in queue, 854 tps, 3.71% cpu

14000 rr size
BQ: 25-55K bytes in queue, 8500 tps
No BQL: 1500-1622K bytes in queue,  8523 tps, 4.53% cpu

1400 rr size
BQL: 20-38K in queue bytes in queue, 86582 tps,  7.38% cpu
No BQL: 29-117K 85738 tps, 7.67% cpu

140 rr size
BQL: 1-10K bytes in queue, 320540 tps, 34.6% cpu
No BQL: 1-13K bytes in queue, 323158, 37.16% cpu

1 rr size
BQL: 0-3K in queue, 338811 tps, 41.41% cpu
No BQL: 0-3K in queue, 339947 42.36% cpu

The amount of queuing in the NIC is reduced up to 90%, and I haven't
yet seen a consistent negative impact in terms of throughout or
CPU utilization.

^ permalink raw reply

* Re: [Bug 40542] overflow/panic on KVM hipervizor
From: Brad Campbell @ 2011-08-08  1:40 UTC (permalink / raw)
  To: Avi Kivity; +Cc: bugzilla-daemon, kvm, slawek, netdev
In-Reply-To: <4E3EAA84.7040708@redhat.com>

On 07/08/11 23:08, Avi Kivity wrote:
> On 08/07/2011 04:39 PM, Brad Campbell wrote:
>>
>> This looks like the bug I've been fighting with on and off.
>
> What's the bugzilla number for that?
>
> (unfortunately, no great insight except for "CLOSED DUPLICATE")
>
> hopefully someone from netdev can take a look, DNAT is seriously broken.
>
I can reproduce it at will, but it's on a live production machine. I've just ordered a second 
machine which I can use to reproduce and test against. From a bisection standpoint I'm about half 
way between 2.6.35 & 2.6.36, but until the second machine arrives I'm just unable to chase it any 
further.

Brad

^ permalink raw reply

* Re: include/linux/netlink.h: problem when included by an application
From: Michel Machado @ 2011-08-07 22:14 UTC (permalink / raw)
  To: Ben Hutchings; +Cc: netdev
In-Reply-To: <1312679707.2591.987.camel@deadeye>

> >    The simplest solution that I came up was replacing sa_family_t in
> > include/linux/netlink.h to 'unsigned short' as header
> > include/linux/socket.h does for struct __kernel_sockaddr_storage
> > available to applications.
> 
> Maybe we should do something like this in <linux/socket.h>:
> 
> typedef unsigned short __kernel_sa_family_t;
> #ifdef __KERNEL__
> typedef __kernel_sa_family_t sa_family_t;
> #endif
> 
> and then use __kernel_sa_family_t in <linux/netlink.h>.
> 
> Ben.

   I like this solution, it solves both struct __kernel_sockaddr_storage
in include/linux/socket.h, and struct sockaddr_nl in
include/linux/netlink.h.

[ ]'s
Michel Machado


^ permalink raw reply

* [PATCH] ipv4: use dst with ref during bcast/mcast loopback
From: Julian Anastasov @ 2011-08-07 20:17 UTC (permalink / raw)
  To: David Miller; +Cc: netdev


	Make sure skb dst has reference when moving to
another context. Currently, I don't see protocols that can
hit it when sending broadcasts/multicasts to loopback using
noref dsts, so it is just a precaution.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
---

	Please, review and apply if needed...

diff -urp v3.0/linux/net/ipv4/ip_output.c linux/net/ipv4/ip_output.c
--- v3.0/linux/net/ipv4/ip_output.c	2011-07-22 09:43:32.000000000 +0300
+++ linux/net/ipv4/ip_output.c	2011-08-07 22:21:23.909347184 +0300
@@ -122,6 +122,7 @@ static int ip_dev_loopback_xmit(struct s
 	newskb->pkt_type = PACKET_LOOPBACK;
 	newskb->ip_summed = CHECKSUM_UNNECESSARY;
 	WARN_ON(!skb_dst(newskb));
+	skb_dst_force(newskb);
 	netif_rx_ni(newskb);
 	return 0;
 }

^ permalink raw reply

* 802.3ad bonding brain damaged?
From: Phillip Susi @ 2011-08-07 19:52 UTC (permalink / raw)
  To: netdev

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

- From Documentation/networking/bonding.txt:

	Additionally, the linux bonding 802.3ad implementation
	distributes traffic by peer (using an XOR of MAC addresses),

This is counter to the entire point of 802.3ad.  Distributing traffic by
hash of the destination address is poor mans load balancing for systems
not supporting 802.3ad.  When in 802.3ad mode, packets are supposed to
be queued to whichever interface has the shortest tx length so a single
stream to a single host can be balanced across all links instead of
being restricted to one, while the other is idle.
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.11 (GNU/Linux)
Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org/

iEYEARECAAYFAk4+7PMACgkQJ4UciIs+XuKJtwCgrubCy9NgiS3HppxpRRtx4W7l
aFkAnR1uLW+4aM/TOSQgYZVsf/4yXGvE
=Yetx
-----END PGP SIGNATURE-----

^ permalink raw reply

* [PATCH] ipv4: route non-local sources for raw socket
From: Julian Anastasov @ 2011-08-07 19:16 UTC (permalink / raw)
  To: David Miller; +Cc: netdev


	The raw sockets can provide source address for
routing but their privileges are not considered. We
can provide non-local source address, make sure the
FLOWI_FLAG_ANYSRC flag is set if socket has privileges
for this, i.e. based on hdrincl (IP_HDRINCL) and
transparent flags.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
---

diff -urp v3.0/linux/include/net/inet_sock.h linux/include/net/inet_sock.h
--- v3.0/linux/include/net/inet_sock.h	2011-07-22 09:43:31.000000000 +0300
+++ linux/include/net/inet_sock.h	2011-08-07 19:09:37.365347358 +0300
@@ -238,7 +238,7 @@ static inline __u8 inet_sk_flowi_flags(c
 {
 	__u8 flags = 0;
 
-	if (inet_sk(sk)->transparent)
+	if (inet_sk(sk)->transparent || inet_sk(sk)->hdrincl)
 		flags |= FLOWI_FLAG_ANYSRC;
 	if (sk->sk_protocol == IPPROTO_TCP)
 		flags |= FLOWI_FLAG_PRECOW_METRICS;
diff -urp v3.0/linux/net/ipv4/raw.c linux/net/ipv4/raw.c
--- v3.0/linux/net/ipv4/raw.c	2011-07-22 09:43:33.000000000 +0300
+++ linux/net/ipv4/raw.c	2011-08-06 20:38:48.493063515 +0300
@@ -563,7 +563,8 @@ static int raw_sendmsg(struct kiocb *ioc
 	flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos,
 			   RT_SCOPE_UNIVERSE,
 			   inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
-			   FLOWI_FLAG_CAN_SLEEP, daddr, saddr, 0, 0);
+			   inet_sk_flowi_flags(sk) | FLOWI_FLAG_CAN_SLEEP,
+			   daddr, saddr, 0, 0);
 
 	if (!inet->hdrincl) {
 		err = raw_probe_proto_opt(&fl4, msg);

^ permalink raw reply

* [PATCH] netfilter: TCP and raw fix for ip_route_me_harder
From: Julian Anastasov @ 2011-08-07 19:11 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, netfilter-devel


	TCP in some cases uses different global (raw) socket
to send RST and ACK. The transparent flag is not set there.
Currently, it is a problem for rerouting after the previous
change.

	Fix it by simplifying the checks in ip_route_me_harder
and use FLOWI_FLAG_ANYSRC even for sockets. It looks safe
because the initial routing allowed this source address to
be used and now we just have to make sure the packet is rerouted.

	As a side effect this also allows rerouting for normal
raw sockets that use spoofed source addresses which was not possible
even before we eliminated the ip_route_input call.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
---

diff -urp v3.0/linux/net/ipv4/netfilter.c linux/net/ipv4/netfilter.c
--- v3.0/linux/net/ipv4/netfilter.c	2011-07-22 09:43:32.862081622 +0300
+++ linux/net/ipv4/netfilter.c	2011-08-07 19:22:05.772347388 +0300
@@ -18,17 +18,15 @@ int ip_route_me_harder(struct sk_buff *s
 	struct rtable *rt;
 	struct flowi4 fl4 = {};
 	__be32 saddr = iph->saddr;
-	__u8 flags = 0;
+	__u8 flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0;
 	unsigned int hh_len;
 
-	if (!skb->sk && addr_type != RTN_LOCAL) {
-		if (addr_type == RTN_UNSPEC)
-			addr_type = inet_addr_type(net, saddr);
-		if (addr_type == RTN_LOCAL || addr_type == RTN_UNICAST)
-			flags |= FLOWI_FLAG_ANYSRC;
-		else
-			saddr = 0;
-	}
+	if (addr_type == RTN_UNSPEC)
+		addr_type = inet_addr_type(net, saddr);
+	if (addr_type == RTN_LOCAL || addr_type == RTN_UNICAST)
+		flags |= FLOWI_FLAG_ANYSRC;
+	else
+		saddr = 0;
 
 	/* some non-standard hacks like ipt_REJECT.c:send_reset() can cause
 	 * packets with foreign saddr to appear on the NF_INET_LOCAL_OUT hook.
@@ -38,7 +36,7 @@ int ip_route_me_harder(struct sk_buff *s
 	fl4.flowi4_tos = RT_TOS(iph->tos);
 	fl4.flowi4_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0;
 	fl4.flowi4_mark = skb->mark;
-	fl4.flowi4_flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : flags;
+	fl4.flowi4_flags = flags;
 	rt = ip_route_output_key(net, &fl4);
 	if (IS_ERR(rt))
 		return -1;

^ permalink raw reply

* Re: [Bug 40542] overflow/panic on KVM hipervizor
From: Avi Kivity @ 2011-08-07 15:08 UTC (permalink / raw)
  To: Brad Campbell; +Cc: bugzilla-daemon, kvm, slawek, netdev
In-Reply-To: <4E3E9596.7090908@fnarfbargle.com>

On 08/07/2011 04:39 PM, Brad Campbell wrote:
>
> This looks like the bug I've been fighting with on and off.

What's the bugzilla number for that?

(unfortunately, no great insight except for "CLOSED DUPLICATE")

hopefully someone from netdev can take a look, DNAT is seriously broken.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply

* ping6: echo reply ok in tcpdump, but not captured in ping (Depending on address type)
From: Jakub Muszynski @ 2011-08-07 14:13 UTC (permalink / raw)
  To: netdev

Hello, 

I have a problem:
I'm writing my own IPv6 stack, and I faced interesting issue (probably
my mistake).
I do receive ping6 output, depending on address I probe (ff02:x or
fe80:x). The echo reply is the same in both cases.

I do ping:
$ ping6 -I eth0 fe80::21e:33ff:fe0b:872e 
PING fe80::21e:33ff:fe0b:872e(fe80::21e:33ff:fe0b:872e) from
fe80::21e:33ff:fe0b:872f eth0: 56 data bytes
^C
--- fe80::21e:33ff:fe0b:872e ping statistics ---
2 packets transmitted, 0 received, 100% packet loss, time 999ms

On my device, the ping is replied, and send back.
I can capture it via tcpdump, but can't register in ping6:

tcpdump:
00:1e:33:0b:87:2f > 00:1e:33:0b:87:2e, ethertype IPv6 (0x86dd), length
118: (hlim 64, next-header ICMPv6 (58) payload length: 64)
fe80::21e:33ff:fe0b:872f > fe80::21e:33ff:fe0b:872e: [icmp6 sum ok]
ICMP6, echo request, length 64, seq 2
	0x0000:  6000 0000 0040 3a40 fe80 0000 0000 0000
	0x0010:  021e 33ff fe0b 872f fe80 0000 0000 0000
	0x0020:  021e 33ff fe0b 872e 8000 b3da 0c69 0002
	0x0030:  365b 3e4e e4e0 0700 0809 0a0b 0c0d 0e0f
	0x0040:  1011 1213 1415 1617 1819 1a1b 1c1d 1e1f
	0x0050:  2021 2223 2425 2627 2829 2a2b 2c2d 2e2f
	0x0060:  3031 3233 3435 3637
00:1e:33:0b:87:2f > 00:1e:33:0b:87:2e, ethertype IPv6 (0x86dd), length
118: (hlim 255, next-header ICMPv6 (58) payload length: 64)
fe80::21e:33ff:fe0b:872e > fe80::21e:33ff:fe0b:872f: [icmp6 sum ok]
ICMP6, echo reply, length 64, seq 2
	0x0000:  6000 0000 0040 3aff fe80 0000 0000 0000
	0x0010:  021e 33ff fe0b 872e fe80 0000 0000 0000
	0x0020:  021e 33ff fe0b 872f 8100 b2da 0c69 0002
	0x0030:  365b 3e4e e4e0 0700 0809 0a0b 0c0d 0e0f
	0x0040:  1011 1213 1415 1617 1819 1a1b 1c1d 1e1f
	0x0050:  2021 2223 2425 2627 2829 2a2b 2c2d 2e2f
	0x0060:  3031 3233 3435 3637


>>> Curious case: 
If I do ping and ff02:xxxx address, it works:

ping6 -I eth0 ff02::21e:33ff:fe0b:872e 
PING ff02::21e:33ff:fe0b:872e(ff02::21e:33ff:fe0b:872e) from
fe80::21e:33ff:fe0b:872f eth0: 56 data bytes
64 bytes from ff02::21e:33ff:fe0b:872e: icmp_seq=1 ttl=255 time=125 ms
64 bytes from ff02::21e:33ff:fe0b:872e: icmp_seq=2 ttl=255 time=125 ms
^C
--- ff02::21e:33ff:fe0b:872e ping statistics ---
2 packets transmitted, 2 received, 0% packet loss, time 1001ms
rtt min/avg/max/mdev = 125.374/125.423/125.473/0.357 ms


tcpdump:
00:1e:33:0b:87:2f > 33:33:fe:0b:87:2e, ethertype IPv6 (0x86dd), length
118: (hlim 1, next-header ICMPv6 (58) payload length: 64)
fe80::21e:33ff:fe0b:872f > ff02::21e:33ff:fe0b:872e: [icmp6 sum ok]
ICMP6, echo request, length 64, seq 2
	0x0000:  6000 0000 0040 3a01 fe80 0000 0000 0000
	0x0010:  021e 33ff fe0b 872f ff02 0000 0000 0000
	0x0020:  021e 33ff fe0b 872e 8000 b262 0cb4 0002
	0x0030:  bb5c 3e4e 608a 0700 0809 0a0b 0c0d 0e0f
	0x0040:  1011 1213 1415 1617 1819 1a1b 1c1d 1e1f
	0x0050:  2021 2223 2425 2627 2829 2a2b 2c2d 2e2f
	0x0060:  3031 3233 3435 3637
00:1e:33:0b:87:2f > 33:33:fe:0b:87:2e, ethertype IPv6 (0x86dd), length
118: (hlim 255, next-header ICMPv6 (58) payload length: 64)
ff02::21e:33ff:fe0b:872e > fe80::21e:33ff:fe0b:872f: [icmp6 sum ok]
ICMP6, echo reply, length 64, seq 2
	0x0000:  6000 0000 0040 3aff ff02 0000 0000 0000
	0x0010:  021e 33ff fe0b 872e fe80 0000 0000 0000
	0x0020:  021e 33ff fe0b 872f 8100 b162 0cb4 0002
	0x0030:  bb5c 3e4e 608a 0700 0809 0a0b 0c0d 0e0f
	0x0040:  1011 1213 1415 1617 1819 1a1b 1c1d 1e1f
	0x0050:  2021 2223 2425 2627 2829 2a2b 2c2d 2e2f
	0x0060:  3031 3233 3435 3637


Well - all seems quite similar.
I did try to understand strace ping6, it says:
recvmsg(3, 0xbfc64358, 0) = -1 EAGAIN (Resource temporarily unavailable)
But why?
Some specification (RFC) issues?
Do you have any idea, why local-multicast address works, and other not?

Greetings
Kuba

-------------------------------
the tcpdump command:
tcpdump -t -n -i eth0 -e -x -vv

Interface settings:
ip -6 a
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 16436 
    inet6 ::1/128 scope host 
       valid_lft forever preferred_lft forever
2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qlen 1000
    inet6 fe80::21e:33ff:fe0b:872f/64 scope link 
       valid_lft forever preferred_lft forever

---------------------------

strace :

NOT WORKING: (fe80:xx)

recvmsg(3, 0xbfc64358, 0)               = -1 EAGAIN (Resource
temporarily unavailable)
gettimeofday({1312710414, 968503}, NULL) = 0
poll([{fd=3, events=POLLIN|POLLERR}], 1, 10) = 0 (Timeout)
gettimeofday({1312710414, 978868}, NULL) = 0
gettimeofday({1312710414, 978996}, NULL) = 0
sendmsg(3, {msg_name(28)={sa_family=AF_INET6, sin6_port=htons(58),
inet_pton(AF_INET6, "fe80::2aa:ff:fe28:9c5a", &sin6_addr),
sin6_flowinfo=0, sin6_scope_id=0}, msg_iov(1)=[{"\200\0\0\0\f\365\0\7
\16_>N4\360\16\0\10\t\n\v\f\r\16\17\20\21\22\23\24\25\26\27"..., 64}],
msg_controllen=32, {cmsg_len=32, cmsg_level=SOL_IPV6, cmsg_type=, ...},
msg_flags=MSG_OOB}, 0) = 64
recvmsg(3, 0xbfc64358, 0)               = -1 EAGAIN (Resource
temporarily unavailable)
gettimeofday({1312710415, 976589}, NULL) = 0
poll([{fd=3, events=POLLIN|POLLERR}], 1, 10) = 0 (Timeout)



WOTKING: (ff02:xx)

poll([{fd=3, events=POLLIN|POLLERR}], 1, 870) = 0 (Timeout)
gettimeofday({1312710444, 919737}, NULL) = 0
gettimeofday({1312710444, 919854}, NULL) = 0
sendmsg(3, {msg_name(28)={sa_family=AF_INET6, sin6_port=htons(58),
inet_pton(AF_INET6, "ff02::2aa:ff:fe28:9c5a", &sin6_addr),
sin6_flowinfo=0, sin6_scope_id=0}, msg_iov(1)=[{"\200\0\0\0\f\376\0
\2,_>N.\t\16\0\10\t\n\v\f\r\16\17\20\21\22\23\24\25\26\27"..., 64}],
msg_controllen=32, {cmsg_len=32, cmsg_level=SOL_IPV6, cmsg_type=, ...},
msg_flags=MSG_OOB}, MSG_CONFIRM) = 64
recvmsg(3, {msg_name(28)={sa_family=AF_INET6, sin6_port=htons(0),
inet_pton(AF_INET6, "ff02::2aa:ff:fe28:9c5a", &sin6_addr),
sin6_flowinfo=0, sin6_scope_id=if_nametoindex("eth0")},
msg_iov(1)=[{"\201\0\210\302\f\376\0\2,_>N.\t\16\0\10\t\n\v\f\r\16\17\20
\21\22\23\24\25\26\27"..., 4208}], msg_controllen=36, {cmsg_len=20,
cmsg_level=SOL_SOCKET, cmsg_type=0x1d /* SCM_??? */, ...}, msg_flags=0},
0) = 64
write(1, "64 bytes from ff02::2aa:ff:fe28:"..., 6964 bytes from
ff02::2aa:ff:fe28:9c5a: icmp_seq=2 ttl=255 time=125 ms
) = 69
gettimeofday({1312710445, 46448}, NULL) = 0
poll([{fd=3, events=POLLIN|POLLERR}], 1, 873) = 0 (Timeout)




^ permalink raw reply

* ping6: echo reply ok in tcpdump, but not captured in ping (Depending on address type)
From: Jakub Muszynski @ 2011-08-07  9:56 UTC (permalink / raw)
  To: netdev

Hello, 

I have a problem:
I'm writing my own IPv6 stack, and I faced interesting issue (probably
my mistake).
I do receive ping6 output, depending on address I probe (ff02:x or
fe80:x). The echo reply is the same in both cases.

I do ping:
$ ping6 -I eth0 fe80::21e:33ff:fe0b:872e 
PING fe80::21e:33ff:fe0b:872e(fe80::21e:33ff:fe0b:872e) from
fe80::21e:33ff:fe0b:872f eth0: 56 data bytes
^C
--- fe80::21e:33ff:fe0b:872e ping statistics ---
2 packets transmitted, 0 received, 100% packet loss, time 999ms

On my device, the ping is replied, and send back.
I can capture it via tcpdump, but can't register in ping6:

tcpdump:
00:1e:33:0b:87:2f > 00:1e:33:0b:87:2e, ethertype IPv6 (0x86dd), length
118: (hlim 64, next-header ICMPv6 (58) payload length: 64)
fe80::21e:33ff:fe0b:872f > fe80::21e:33ff:fe0b:872e: [icmp6 sum ok]
ICMP6, echo request, length 64, seq 2
	0x0000:  6000 0000 0040 3a40 fe80 0000 0000 0000
	0x0010:  021e 33ff fe0b 872f fe80 0000 0000 0000
	0x0020:  021e 33ff fe0b 872e 8000 b3da 0c69 0002
	0x0030:  365b 3e4e e4e0 0700 0809 0a0b 0c0d 0e0f
	0x0040:  1011 1213 1415 1617 1819 1a1b 1c1d 1e1f
	0x0050:  2021 2223 2425 2627 2829 2a2b 2c2d 2e2f
	0x0060:  3031 3233 3435 3637
00:1e:33:0b:87:2f > 00:1e:33:0b:87:2e, ethertype IPv6 (0x86dd), length
118: (hlim 255, next-header ICMPv6 (58) payload length: 64)
fe80::21e:33ff:fe0b:872e > fe80::21e:33ff:fe0b:872f: [icmp6 sum ok]
ICMP6, echo reply, length 64, seq 2
	0x0000:  6000 0000 0040 3aff fe80 0000 0000 0000
	0x0010:  021e 33ff fe0b 872e fe80 0000 0000 0000
	0x0020:  021e 33ff fe0b 872f 8100 b2da 0c69 0002
	0x0030:  365b 3e4e e4e0 0700 0809 0a0b 0c0d 0e0f
	0x0040:  1011 1213 1415 1617 1819 1a1b 1c1d 1e1f
	0x0050:  2021 2223 2425 2627 2829 2a2b 2c2d 2e2f
	0x0060:  3031 3233 3435 3637


>>> Curious case: 
If I do ping and ff02:xxxx address, it works:

ping6 -I eth0 ff02::21e:33ff:fe0b:872e 
PING ff02::21e:33ff:fe0b:872e(ff02::21e:33ff:fe0b:872e) from
fe80::21e:33ff:fe0b:872f eth0: 56 data bytes
64 bytes from ff02::21e:33ff:fe0b:872e: icmp_seq=1 ttl=255 time=125 ms
64 bytes from ff02::21e:33ff:fe0b:872e: icmp_seq=2 ttl=255 time=125 ms
^C
--- ff02::21e:33ff:fe0b:872e ping statistics ---
2 packets transmitted, 2 received, 0% packet loss, time 1001ms
rtt min/avg/max/mdev = 125.374/125.423/125.473/0.357 ms


tcpdump:
00:1e:33:0b:87:2f > 33:33:fe:0b:87:2e, ethertype IPv6 (0x86dd), length
118: (hlim 1, next-header ICMPv6 (58) payload length: 64)
fe80::21e:33ff:fe0b:872f > ff02::21e:33ff:fe0b:872e: [icmp6 sum ok]
ICMP6, echo request, length 64, seq 2
	0x0000:  6000 0000 0040 3a01 fe80 0000 0000 0000
	0x0010:  021e 33ff fe0b 872f ff02 0000 0000 0000
	0x0020:  021e 33ff fe0b 872e 8000 b262 0cb4 0002
	0x0030:  bb5c 3e4e 608a 0700 0809 0a0b 0c0d 0e0f
	0x0040:  1011 1213 1415 1617 1819 1a1b 1c1d 1e1f
	0x0050:  2021 2223 2425 2627 2829 2a2b 2c2d 2e2f
	0x0060:  3031 3233 3435 3637
00:1e:33:0b:87:2f > 33:33:fe:0b:87:2e, ethertype IPv6 (0x86dd), length
118: (hlim 255, next-header ICMPv6 (58) payload length: 64)
ff02::21e:33ff:fe0b:872e > fe80::21e:33ff:fe0b:872f: [icmp6 sum ok]
ICMP6, echo reply, length 64, seq 2
	0x0000:  6000 0000 0040 3aff ff02 0000 0000 0000
	0x0010:  021e 33ff fe0b 872e fe80 0000 0000 0000
	0x0020:  021e 33ff fe0b 872f 8100 b162 0cb4 0002
	0x0030:  bb5c 3e4e 608a 0700 0809 0a0b 0c0d 0e0f
	0x0040:  1011 1213 1415 1617 1819 1a1b 1c1d 1e1f
	0x0050:  2021 2223 2425 2627 2829 2a2b 2c2d 2e2f
	0x0060:  3031 3233 3435 3637


Well - all seems quite similar.
I did try to understand strace ping6, it says:
recvmsg(3, 0xbfc64358, 0) = -1 EAGAIN (Resource temporarily unavailable)
But why?
Some specification (RFC) issues?
Do you have any idea, why local-multicast address works, and other not?

Greetings
Kuba

-------------------------------
the tcpdump command:
tcpdump -t -n -i eth0 -e -x -vv

Interface settings:
ip -6 a
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 16436 
    inet6 ::1/128 scope host 
       valid_lft forever preferred_lft forever
2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qlen 1000
    inet6 fe80::21e:33ff:fe0b:872f/64 scope link 
       valid_lft forever preferred_lft forever

---------------------------

strace :

NOT WORKING: (fe80:xx)

recvmsg(3, 0xbfc64358, 0)               = -1 EAGAIN (Resource
temporarily unavailable)
gettimeofday({1312710414, 968503}, NULL) = 0
poll([{fd=3, events=POLLIN|POLLERR}], 1, 10) = 0 (Timeout)
gettimeofday({1312710414, 978868}, NULL) = 0
gettimeofday({1312710414, 978996}, NULL) = 0
sendmsg(3, {msg_name(28)={sa_family=AF_INET6, sin6_port=htons(58),
inet_pton(AF_INET6, "fe80::2aa:ff:fe28:9c5a", &sin6_addr),
sin6_flowinfo=0, sin6_scope_id=0}, msg_iov(1)=[{"\200\0\0\0\f\365\0\7
\16_>N4\360\16\0\10\t\n\v\f\r\16\17\20\21\22\23\24\25\26\27"..., 64}],
msg_controllen=32, {cmsg_len=32, cmsg_level=SOL_IPV6, cmsg_type=, ...},
msg_flags=MSG_OOB}, 0) = 64
recvmsg(3, 0xbfc64358, 0)               = -1 EAGAIN (Resource
temporarily unavailable)
gettimeofday({1312710415, 976589}, NULL) = 0
poll([{fd=3, events=POLLIN|POLLERR}], 1, 10) = 0 (Timeout)



WOTKING: (ff02:xx)

poll([{fd=3, events=POLLIN|POLLERR}], 1, 870) = 0 (Timeout)
gettimeofday({1312710444, 919737}, NULL) = 0
gettimeofday({1312710444, 919854}, NULL) = 0
sendmsg(3, {msg_name(28)={sa_family=AF_INET6, sin6_port=htons(58),
inet_pton(AF_INET6, "ff02::2aa:ff:fe28:9c5a", &sin6_addr),
sin6_flowinfo=0, sin6_scope_id=0}, msg_iov(1)=[{"\200\0\0\0\f\376\0
\2,_>N.\t\16\0\10\t\n\v\f\r\16\17\20\21\22\23\24\25\26\27"..., 64}],
msg_controllen=32, {cmsg_len=32, cmsg_level=SOL_IPV6, cmsg_type=, ...},
msg_flags=MSG_OOB}, MSG_CONFIRM) = 64
recvmsg(3, {msg_name(28)={sa_family=AF_INET6, sin6_port=htons(0),
inet_pton(AF_INET6, "ff02::2aa:ff:fe28:9c5a", &sin6_addr),
sin6_flowinfo=0, sin6_scope_id=if_nametoindex("eth0")},
msg_iov(1)=[{"\201\0\210\302\f\376\0\2,_>N.\t\16\0\10\t\n\v\f\r\16\17\20
\21\22\23\24\25\26\27"..., 4208}], msg_controllen=36, {cmsg_len=20,
cmsg_level=SOL_SOCKET, cmsg_type=0x1d /* SCM_??? */, ...}, msg_flags=0},
0) = 64
write(1, "64 bytes from ff02::2aa:ff:fe28:"..., 6964 bytes from
ff02::2aa:ff:fe28:9c5a: icmp_seq=2 ttl=255 time=125 ms
) = 69
gettimeofday({1312710445, 46448}, NULL) = 0
poll([{fd=3, events=POLLIN|POLLERR}], 1, 873) = 0 (Timeout)



^ permalink raw reply

* rtl8150: rtl8150_disconnect(...) does not need tasklet_disable(...)
From: Huajun Li @ 2011-08-07 13:03 UTC (permalink / raw)
  To: David Miller, petkan-Rn4VEauK+AKRv+LV9MX5uipxlwaOVQ5f
  Cc: netdev, lud, Huajun Li

Executing cmd 'rmmod rtl8150' does not return(if your device connects
to host), the root cause is tasklet_disable() causes tasklet_kill()
block, remove it from rtl8150_disconnect().

Signed-off-by: Huajun Li <huajun.li.lee-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
---
 drivers/net/usb/rtl8150.c |    1 -
 1 files changed, 0 insertions(+), 1 deletions(-)

diff --git a/drivers/net/usb/rtl8150.c b/drivers/net/usb/rtl8150.c
index 041fb7d..ef3b236 100644
--- a/drivers/net/usb/rtl8150.c
+++ b/drivers/net/usb/rtl8150.c
@@ -977,7 +977,6 @@ static void rtl8150_disconnect(struct usb_interface *intf)
 	usb_set_intfdata(intf, NULL);
 	if (dev) {
 		set_bit(RTL8150_UNPLUG, &dev->flags);
-		tasklet_disable(&dev->tl);
 		tasklet_kill(&dev->tl);
 		unregister_netdev(dev->netdev);
 		unlink_all_urbs(dev);
-- 
1.7.4.1
--
To unsubscribe from this list: send the line "unsubscribe linux-usb" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* Bonding problem
From: Eduard Sinelnikov @ 2011-08-07 12:00 UTC (permalink / raw)
  To: majordomo, netdev, Jay Vosburgh, Andy Gospodarek

Hi,

In the kernel 2.6.39.3 ( /drivers/net/bond/bond_main.c).
In the function  ‘bond_xmit_roundrobin’
The code check if the bond is active via
‘bond_is_active_slave(slave)’ Function call.
Which actually checks if the slave is backup or active
What is the meaning of slave being  backup in round robin mode?
Correct me if I wrong but in round robin every slave should send a
packet, regardless of being active or backup.

Thank you,
           Eduard

^ permalink raw reply

* Pick Up
From: WESTERN UNION OFFICE @ 2011-08-07 11:26 UTC (permalink / raw)


How are you today?


I write to inform you that we have already sent you $5,000.00USD
through Western union as we have been given the mandate to transfer
your full compensation payment of  $1.800,000.00USD via western union
by this government.

I called to give you the information through phone as internet hackers
were many but i cannot reach you yesterday even this morning,So I
decided to email you the (MTCN) and sender name so that you can pick
up this $5,000.00USD to enable us send another $5,000.00USD by
tomorrow as you knows we will be sending you only $5,000.00USD per
day.Please pick up this information and run to any western union
(OUTLET) in your country and pick up this $5,000.00USD and send us an
email back,so that we can send another $5,000.00USD by tomorrow.

Manager: Mr Frank Amos
email me on:western-money71@xnmsn.com
call us on: +234-7031908911
once you picked up this $5000.00USD today.

Here is the western union information to pick up the $5000.00USD,

MTCN :___________  MTCN 9500834460
first name: ________Appoline
Second Name: ____Ouedraoge
Text Question: ____When
Answer: ____________2Hours ago
Amount:__________ $5,000.00 United State Dollars

I am waiting for your E-mail once you pick up $5000.00USD,

Thanks
Mr Frank Amos.

^ permalink raw reply

* [PATCH 1/2] gianfar: fix fiper alignment after resetting the time
From: Richard Cochran @ 2011-08-07  7:03 UTC (permalink / raw)
  To: netdev; +Cc: linux-kernel, David Miller, stable
In-Reply-To: <cover.1312699693.git.richard.cochran@omicron.at>

After resetting the time, the PPS signals on the FIPER output channels
are incorrectly offset from the clock time, as can be readily verified
by a looping back the FIPER to the external time stamp input.

Despite its name, setting the "Fiper Realignment Disable" bit seems to
fix the problem, at least on the P2020.

Also, following the example code from the Freescale BSP, it is not really
necessary to disable and re-enable the timer in order to reprogram the
FIPER. (The documentation is rather unclear on this point. It seems that
writing to the alarm register also disables the FIPER.)

Signed-off-by: Richard Cochran <richard.cochran@omicron.at>
Cc: <stable@kernel.org>
---
 drivers/net/gianfar_ptp.c |    9 ++-------
 1 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/drivers/net/gianfar_ptp.c b/drivers/net/gianfar_ptp.c
index 1c97861..f67b8ae 100644
--- a/drivers/net/gianfar_ptp.c
+++ b/drivers/net/gianfar_ptp.c
@@ -193,14 +193,9 @@ static void set_alarm(struct etsects *etsects)
 /* Caller must hold etsects->lock. */
 static void set_fipers(struct etsects *etsects)
 {
-	u32 tmr_ctrl = gfar_read(&etsects->regs->tmr_ctrl);
-
-	gfar_write(&etsects->regs->tmr_ctrl,   tmr_ctrl & (~TE));
-	gfar_write(&etsects->regs->tmr_prsc,   etsects->tmr_prsc);
+	set_alarm(etsects);
 	gfar_write(&etsects->regs->tmr_fiper1, etsects->tmr_fiper1);
 	gfar_write(&etsects->regs->tmr_fiper2, etsects->tmr_fiper2);
-	set_alarm(etsects);
-	gfar_write(&etsects->regs->tmr_ctrl,   tmr_ctrl|TE);
 }
 
 /*
@@ -511,7 +506,7 @@ static int gianfar_ptp_probe(struct platform_device *dev)
 	gfar_write(&etsects->regs->tmr_fiper1, etsects->tmr_fiper1);
 	gfar_write(&etsects->regs->tmr_fiper2, etsects->tmr_fiper2);
 	set_alarm(etsects);
-	gfar_write(&etsects->regs->tmr_ctrl,   tmr_ctrl|FS|RTPE|TE);
+	gfar_write(&etsects->regs->tmr_ctrl,   tmr_ctrl|FS|RTPE|TE|FRD);
 
 	spin_unlock_irqrestore(&etsects->lock, flags);
 
-- 
1.7.0.4


^ permalink raw reply related

* [PATCH 2/2] dp83640: increase receive time stamp buffer size
From: Richard Cochran @ 2011-08-07  7:03 UTC (permalink / raw)
  To: netdev; +Cc: linux-kernel, David Miller, stable
In-Reply-To: <cover.1312699693.git.richard.cochran@omicron.at>

The dp83640 buffers receive time stamps from special PHY status frames,
matching them to received PTP packets in a work queue. Because the timeout
for orphaned time stamps is so long and the buffer is so small, the driver
can drop time stamps under moderate PTP traffic.

This commit fixes the issue by decreasing the timeout to (at least) one
timer tick and increasing the buffer size.

Signed-off-by: Richard Cochran <richard.cochran@omicron.at>
Cc: <stable@kernel.org>
---
 drivers/net/phy/dp83640.c |    5 ++---
 1 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c
index 2cd8dc5..cb6e0b4 100644
--- a/drivers/net/phy/dp83640.c
+++ b/drivers/net/phy/dp83640.c
@@ -34,8 +34,7 @@
 #define PAGESEL		0x13
 #define LAYER4		0x02
 #define LAYER2		0x01
-#define MAX_RXTS	4
-#define MAX_TXTS	4
+#define MAX_RXTS	64
 #define N_EXT_TS	1
 #define PSF_PTPVER	2
 #define PSF_EVNT	0x4000
@@ -218,7 +217,7 @@ static void phy2rxts(struct phy_rxts *p, struct rxts *rxts)
 	rxts->seqid = p->seqid;
 	rxts->msgtype = (p->msgtype >> 12) & 0xf;
 	rxts->hash = p->msgtype & 0x0fff;
-	rxts->tmo = jiffies + HZ;
+	rxts->tmo = jiffies + 2;
 }
 
 static u64 phy2txts(struct phy_txts *p)
-- 
1.7.0.4

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox