All of lore.kernel.org
 help / color / mirror / Atom feed
From: Giuseppe CAVALLARO <peppe.cavallaro@st.com>
To: Jimmy Perchet <jimmy.perchet@parrot.com>
Cc: <netdev@vger.kernel.org>
Subject: Re: [PATCH RFC 5/5] net:stmmac: asynchronous tx_clean
Date: Mon, 21 Oct 2013 15:52:12 +0200	[thread overview]
Message-ID: <5265318C.5050307@st.com> (raw)
In-Reply-To: <1381937052-8999-6-git-send-email-jimmy.perchet@parrot.com>

Hello Jimmy

On 10/16/2013 5:24 PM, Jimmy Perchet wrote:
> Tx descriptor's cleanup and preparation are serialized, which is not necessary
> and decrease performance.
> In addition TX descriptor's cleanup is performed on NET_RX softirq, this is
> confusing.

hmm, here you are changing the logic behind the tx/rx processes.

As done in many drivers, the stmmac cleans the tx resources in
NAPI context and this is not a confuse approach ;-).

It gave me some performance improvements especially on TCP benchmarks.

> This patch unserialize tx descriptor's cleanup and preparation
> and defer cleanup in workqueue.

So you decide to use workqueue and I kindly ask you to give me more
details about the performance improvements (UDP/TCP) and cpu usage.

I can try to do some tests on my side too. This could take a while
unfortunately.

peppe

>
> Signed-off-by: Jimmy Perchet <jimmy.perchet@parrot.com>
> ---
>   drivers/net/ethernet/stmicro/stmmac/chain_mode.c  |  6 +--
>   drivers/net/ethernet/stmicro/stmmac/stmmac.h      |  6 ++-
>   drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 52 +++++++++++++----------
>   3 files changed, 37 insertions(+), 27 deletions(-)
>
> diff --git a/drivers/net/ethernet/stmicro/stmmac/chain_mode.c b/drivers/net/ethernet/stmicro/stmmac/chain_mode.c
> index d6ed0ce..8ab83cb 100644
> --- a/drivers/net/ethernet/stmicro/stmmac/chain_mode.c
> +++ b/drivers/net/ethernet/stmicro/stmmac/chain_mode.c
> @@ -120,7 +120,7 @@ static void stmmac_refill_desc3(void *priv_ptr, struct dma_desc *p)
>   		 * to keep explicit chaining in the descriptor.
>   		 */
>   		p->des3 = (unsigned int)(priv->dma_rx_phy +
> -					 (((priv->dirty_rx) + 1) %
> +					 ((priv->dirty_rx + 1) %
>   					  priv->dma_rx_size) *
>   					 sizeof(struct dma_desc));
>   }
> @@ -135,9 +135,9 @@ static void stmmac_clean_desc3(void *priv_ptr, struct dma_desc *p)
>   		 * to keep explicit chaining in the descriptor.
>   		 */
>   		p->des3 = (unsigned int)(priv->dma_tx_phy +
> -					 (((priv->dirty_tx + 1) %
> +					 ((atomic_read(&priv->dirty_tx) + 1) %
>   					   priv->dma_tx_size) *
> -					  sizeof(struct dma_desc)));
> +					  sizeof(struct dma_desc));
>   }
>
>   const struct stmmac_chain_mode_ops chain_mode_ops = {
> diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
> index f16a9bd..d5b8906 100644
> --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
> +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
> @@ -38,8 +38,8 @@ struct stmmac_priv {
>   	struct dma_extended_desc *dma_etx ____cacheline_aligned_in_smp;
>   	struct dma_desc *dma_tx;
>   	struct sk_buff **tx_skbuff;
> -	unsigned int cur_tx;
> -	unsigned int dirty_tx;
> +	atomic_t cur_tx;
> +	atomic_t dirty_tx;
>   	unsigned int dma_tx_size;
>   	u32 tx_count_frames;
>   	u32 tx_coal_frames;
> @@ -106,6 +106,8 @@ struct stmmac_priv {
>   	u32 adv_ts;
>   	int use_riwt;
>   	spinlock_t ptp_lock;
> +	struct work_struct txclean_work;
> +	struct workqueue_struct *wq;
>   };
>
>   extern int phyaddr;
> diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> index 5873246..de614ad 100644
> --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> @@ -48,6 +48,7 @@
>   #include <linux/seq_file.h>
>   #endif /* CONFIG_STMMAC_DEBUG_FS */
>   #include <linux/net_tstamp.h>
> +#include <linux/workqueue.h>
>   #include "stmmac_ptp.h"
>   #include "stmmac.h"
>
> @@ -205,7 +206,8 @@ static void print_pkt(unsigned char *buf, int len)
>
>   static inline u32 stmmac_tx_avail(struct stmmac_priv *priv)
>   {
> -	return priv->dirty_tx + priv->dma_tx_size - priv->cur_tx - 1;
> +	return atomic_read(&priv->dirty_tx) + priv->dma_tx_size
> +					 - atomic_read(&priv->cur_tx) - 1;
>   }
>
>   /**
> @@ -230,7 +232,7 @@ static inline void stmmac_hw_fix_mac_speed(struct stmmac_priv *priv)
>   static void stmmac_enable_eee_mode(struct stmmac_priv *priv)
>   {
>   	/* Check and enter in LPI mode */
> -	if ((priv->dirty_tx == priv->cur_tx) &&
> +	if ((atomic_read(&priv->dirty_tx) == atomic_read(&priv->cur_tx)) &&
>   	    (priv->tx_path_in_lpi_mode == false))
>   		priv->hw->mac->set_eee_mode(priv->ioaddr);
>   }
> @@ -1118,8 +1120,8 @@ static int init_dma_desc_rings(struct net_device *dev)
>   		priv->tx_skbuff[i] = NULL;
>   	}
>
> -	priv->dirty_tx = 0;
> -	priv->cur_tx = 0;
> +	atomic_set(&priv->dirty_tx, 0);
> +	atomic_set(&priv->cur_tx, 0);
>
>   	stmmac_clear_descriptors(priv);
>
> @@ -1264,17 +1266,17 @@ static void stmmac_dma_operation_mode(int mtu, struct stmmac_priv *priv)
>    * @priv: driver private structure
>    * Description: it reclaims resources after transmission completes.
>    */
> -static void stmmac_tx_clean(struct stmmac_priv *priv)
> +static void stmmac_tx_clean(struct work_struct *work)
>   {
> +	struct stmmac_priv *priv =
> +			container_of(work, struct stmmac_priv, txclean_work);
>   	unsigned int txsize = priv->dma_tx_size;
>
> -	spin_lock(&priv->tx_lock);
> -
>   	priv->xstats.tx_clean++;
>
> -	while (priv->dirty_tx != priv->cur_tx) {
> +	while (atomic_read(&priv->dirty_tx) != atomic_read(&priv->cur_tx)) {
>   		int last;
> -		unsigned int entry = priv->dirty_tx % txsize;
> +		unsigned int entry = atomic_read(&priv->dirty_tx) % txsize;
>   		struct sk_buff *skb = priv->tx_skbuff[entry];
>   		struct dma_desc *p;
>
> @@ -1308,7 +1310,8 @@ static void stmmac_tx_clean(struct stmmac_priv *priv)
>   		}
>   		if (netif_msg_tx_done(priv))
>   			pr_debug("%s: curr %d, dirty %d\n", __func__,
> -				 priv->cur_tx, priv->dirty_tx);
> +				 atomic_read(&priv->cur_tx),
> +				 atomic_read(&priv->dirty_tx));
>
>   		if (likely(priv->tx_skbuff_dma[entry])) {
>   			dma_unmap_single(priv->device,
> @@ -1326,7 +1329,8 @@ static void stmmac_tx_clean(struct stmmac_priv *priv)
>
>   		priv->hw->desc->release_tx_desc(p, priv->mode);
>
> -		priv->dirty_tx++;
> +		wmb();
> +		atomic_inc(&priv->dirty_tx);
>   	}
>   	if (unlikely(netif_queue_stopped(priv->dev) &&
>   		     stmmac_tx_avail(priv) > STMMAC_TX_THRESH(priv))) {
> @@ -1344,7 +1348,6 @@ static void stmmac_tx_clean(struct stmmac_priv *priv)
>   		stmmac_enable_eee_mode(priv);
>   		mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_T(eee_timer));
>   	}
> -	spin_unlock(&priv->tx_lock);
>   }
>
>   static inline void stmmac_enable_dma_irq(struct stmmac_priv *priv)
> @@ -1380,8 +1383,8 @@ static void stmmac_tx_err(struct stmmac_priv *priv)
>   			priv->hw->desc->init_tx_desc(&priv->dma_tx[i],
>   						     priv->mode,
>   						     (i == txsize - 1));
> -	priv->dirty_tx = 0;
> -	priv->cur_tx = 0;
> +	atomic_set(&priv->dirty_tx, 0);
> +	atomic_set(&priv->cur_tx, 0);
>   	priv->hw->dma->start_tx(priv->ioaddr);
>
>   	priv->dev->stats.tx_errors++;
> @@ -1401,12 +1404,16 @@ static void stmmac_dma_interrupt(struct stmmac_priv *priv)
>   	int status;
>
>   	status = priv->hw->dma->dma_interrupt(priv->ioaddr, &priv->xstats);
> -	if (likely((status & handle_rx)) || (status & handle_tx)) {
> +	if (likely(status & handle_rx)) {
>   		if (likely(napi_schedule_prep(&priv->napi))) {
>   			stmmac_disable_dma_irq(priv);
>   			__napi_schedule(&priv->napi);
>   		}
>   	}
> +	if (likely(status & handle_tx))
> +		queue_work(priv->wq, &priv->txclean_work);
> +
> +
>   	if (unlikely(status & tx_hard_error_bump_tc)) {
>   		/* Try to bump up the dma threshold on this failure */
>   		if (unlikely(tc != SF_DMA_MODE) && (tc <= 256)) {
> @@ -1596,8 +1603,7 @@ static int stmmac_init_dma_engine(struct stmmac_priv *priv)
>   static void stmmac_tx_timer(unsigned long data)
>   {
>   	struct stmmac_priv *priv = (struct stmmac_priv *)data;
> -
> -	stmmac_tx_clean(priv);
> +	queue_work(priv->wq, &priv->txclean_work);
>   }
>
>   /**
> @@ -1876,7 +1882,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
>   	if (priv->tx_path_in_lpi_mode)
>   		stmmac_disable_eee_mode(priv);
>
> -	first_entry = entry = priv->cur_tx % txsize;
> +	first_entry = entry = atomic_read(&priv->cur_tx) % txsize;
>
>   	csum_insertion = (skb->ip_summed == CHECKSUM_PARTIAL);
>   	/* To program the descriptors according to the size of the frame */
> @@ -1944,12 +1950,13 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
>   	priv->hw->desc->set_tx_owner(first);
>   	wmb();
>
> -	priv->cur_tx += nb_desc;
> +	atomic_add(nb_desc, &priv->cur_tx);
>
>   	if (netif_msg_pktdata(priv)) {
>   		pr_debug("%s: curr %d dirty=%d entry=%d, first=%p, nfrags=%d",
> -			__func__, (priv->cur_tx % txsize),
> -			(priv->dirty_tx % txsize), entry, first, nfrags);
> +			__func__, (atomic_read(&priv->cur_tx) % txsize),
> +			(atomic_read(&priv->dirty_tx) % txsize), entry,
> +			first, nfrags);
>
>   		if (priv->extend_desc)
>   			stmmac_display_ring((void *)priv->dma_etx, txsize, 1);
> @@ -2171,7 +2178,6 @@ static int stmmac_poll(struct napi_struct *napi, int budget)
>   	int work_done = 0;
>
>   	priv->xstats.napi_poll++;
> -	stmmac_tx_clean(priv);
>
>   	work_done = stmmac_rx(priv, budget);
>   	if (work_done < budget) {
> @@ -2747,6 +2753,8 @@ struct stmmac_priv *stmmac_dvr_probe(struct device *device,
>
>   	spin_lock_init(&priv->lock);
>   	spin_lock_init(&priv->tx_lock);
> +	priv->wq = create_singlethread_workqueue("stmmac_wq");
> +	INIT_WORK(&priv->txclean_work, stmmac_tx_clean);
>
>   	ret = register_netdev(ndev);
>   	if (ret) {
>

  reply	other threads:[~2013-10-21 13:52 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-10-16 15:24 [PATCH RFC 0/5] net:stmmac: fix jumbo frames handling and optimisation Jimmy Perchet
2013-10-16 15:24 ` [PATCH RFC 1/5] net:stmmac: set threshold/store and forward mode according to mtu size Jimmy Perchet
2013-10-21  8:47   ` Giuseppe CAVALLARO
2013-10-21  9:58     ` Rayagond K
2013-10-21 13:49       ` Giuseppe CAVALLARO
2013-10-16 15:24 ` [PATCH RFC 2/5] net:stmmac: fix rx buffer allocation Jimmy Perchet
2013-10-21  8:54   ` Giuseppe CAVALLARO
2013-10-16 15:24 ` [PATCH RFC 3/5] net:stmmac: ensure we reclaim all dirty descriptors Jimmy Perchet
2013-10-16 17:46   ` Sergei Shtylyov
2013-10-18  8:32     ` Jimmy PERCHET
2013-10-21  9:07   ` Giuseppe CAVALLARO
2013-10-21 13:10     ` Jimmy PERCHET
2013-10-21 18:32       ` Eric Dumazet
2013-10-21 18:49         ` Eric Dumazet
2013-10-22 13:33           ` Giuseppe CAVALLARO
2013-10-16 15:24 ` [PATCH RFC 4/5] net:stmmac: fix jumbo frame handling Jimmy Perchet
2013-10-21 13:40   ` Giuseppe CAVALLARO
2013-10-21 16:28     ` Jimmy PERCHET
2013-10-22 13:24       ` Giuseppe CAVALLARO
2013-10-16 15:24 ` [PATCH RFC 5/5] net:stmmac: asynchronous tx_clean Jimmy Perchet
2013-10-21 13:52   ` Giuseppe CAVALLARO [this message]
2013-10-21 16:30     ` Eric Dumazet
2013-10-21 18:05       ` Jimmy PERCHET
2013-10-21 18:24         ` Eric Dumazet
2013-10-16 20:37 ` [PATCH RFC 0/5] net:stmmac: fix jumbo frames handling and optimisation Giuseppe CAVALLARO
2013-10-18 16:24   ` Jimmy PERCHET

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5265318C.5050307@st.com \
    --to=peppe.cavallaro@st.com \
    --cc=jimmy.perchet@parrot.com \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.