From mboxrd@z Thu Jan 1 00:00:00 1970 From: Florian Fainelli Subject: Re: [PATCH net-next 1/2] net: systemport: Implement adaptive interrupt coalescing Date: Mon, 26 Mar 2018 14:36:02 -0700 Message-ID: <0056b0e3-7672-16ad-725b-8b2bc22b97fd@gmail.com> References: <20180323011933.29748-1-f.fainelli@gmail.com> <20180323011933.29748-2-f.fainelli@gmail.com> <58becbf1-baea-a679-ed32-b58fa5fd24bd@mellanox.com> Mime-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit Cc: davem@davemloft.net, jaedon.shin@gmail.com, pgynther@google.com, opendmb@gmail.com, michal.chan@broadcom.com, gospo@broadcom.com, saeedm@mellanox.com To: Tal Gilboa , netdev@vger.kernel.org Return-path: Received: from mail-qk0-f196.google.com ([209.85.220.196]:46129 "EHLO mail-qk0-f196.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1750983AbeCZVgT (ORCPT ); Mon, 26 Mar 2018 17:36:19 -0400 Received: by mail-qk0-f196.google.com with SMTP id o184so21768672qkd.13 for ; Mon, 26 Mar 2018 14:36:19 -0700 (PDT) In-Reply-To: <58becbf1-baea-a679-ed32-b58fa5fd24bd@mellanox.com> Content-Language: en-US Sender: netdev-owner@vger.kernel.org List-ID: On 03/26/2018 02:22 PM, Tal Gilboa wrote: > On 3/23/2018 4:19 AM, Florian Fainelli wrote: >> Implement support for adaptive RX and TX interrupt coalescing using >> net_dim. We have each of our TX ring and our single RX ring implement a >> bcm_sysport_net_dim structure which holds an interrupt counter, number >> of packets, bytes, and a container for a net_dim instance. >> >> Signed-off-by: Florian Fainelli >> --- >>   drivers/net/ethernet/broadcom/bcmsysport.c | 141 >> ++++++++++++++++++++++++++--- >>   drivers/net/ethernet/broadcom/bcmsysport.h |  14 +++ >>   2 files changed, 140 insertions(+), 15 deletions(-) >> >> diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c >> b/drivers/net/ethernet/broadcom/bcmsysport.c >> index f15a8fc6dfc9..5a5a726bafa4 100644 >> --- a/drivers/net/ethernet/broadcom/bcmsysport.c >> +++ b/drivers/net/ethernet/broadcom/bcmsysport.c >> @@ -15,6 +15,7 @@ >>   #include >>   #include >>   #include >> +#include > > I don't think you need this include. You already include net_dim in > bcmsysport.h and include the bcmsysport.h here. Indeed. > >>   #include >>   #include >>   #include >> @@ -574,21 +575,55 @@ static int bcm_sysport_set_wol(struct net_device >> *dev, >>       return 0; >>   } >>   +static void bcm_sysport_set_rx_coalesce(struct bcm_sysport_priv *priv) >> +{ >> +    u32 reg; >> + >> +    reg = rdma_readl(priv, RDMA_MBDONE_INTR); >> +    reg &= ~(RDMA_INTR_THRESH_MASK | >> +         RDMA_TIMEOUT_MASK << RDMA_TIMEOUT_SHIFT); >> +    reg |= priv->dim.coal_pkts; >> +    reg |= DIV_ROUND_UP(priv->dim.coal_usecs * 1000, 8192) << >> +                RDMA_TIMEOUT_SHIFT; >> +    rdma_writel(priv, reg, RDMA_MBDONE_INTR); >> +} >> + >> +static void bcm_sysport_set_tx_coalesce(struct bcm_sysport_tx_ring >> *ring) >> +{ >> +    struct bcm_sysport_priv *priv = ring->priv; >> +    u32 reg; >> + >> +    reg = tdma_readl(priv, TDMA_DESC_RING_INTR_CONTROL(ring->index)); >> +    reg &= ~(RING_INTR_THRESH_MASK | >> +         RING_TIMEOUT_MASK << RING_TIMEOUT_SHIFT); >> +    reg |= ring->dim.coal_pkts; >> +    reg |= DIV_ROUND_UP(ring->dim.coal_usecs * 1000, 8192) << >> +                RING_TIMEOUT_SHIFT; >> +    tdma_writel(priv, reg, TDMA_DESC_RING_INTR_CONTROL(ring->index)); >> +} >> + > > I wouldn't couple these functions with dim. This implies dim is always > used. IMO, would be more clear to use a generic method which takes usecs > and packets as an argument. I did not want to create an additional structure for storing coalescing parameters, but if you prefer I make this function take two parameters, that sounds entirely reasonable. > >>   static int bcm_sysport_get_coalesce(struct net_device *dev, >>                       struct ethtool_coalesce *ec) >>   { >>       struct bcm_sysport_priv *priv = netdev_priv(dev); >> +    struct bcm_sysport_tx_ring *ring; >> +    unsigned int i; >>       u32 reg; >>         reg = tdma_readl(priv, TDMA_DESC_RING_INTR_CONTROL(0)); >>         ec->tx_coalesce_usecs = (reg >> RING_TIMEOUT_SHIFT) * 8192 / >> 1000; >>       ec->tx_max_coalesced_frames = reg & RING_INTR_THRESH_MASK; >> +    for (i = 0; i < dev->num_tx_queues; i++) { >> +        ring = &priv->tx_rings[i]; >> +        ec->use_adaptive_tx_coalesce |= ring->dim.use_dim; >> +    } >>         reg = rdma_readl(priv, RDMA_MBDONE_INTR); >>         ec->rx_coalesce_usecs = (reg >> RDMA_TIMEOUT_SHIFT) * 8192 / >> 1000; >>       ec->rx_max_coalesced_frames = reg & RDMA_INTR_THRESH_MASK; >> +    ec->use_adaptive_rx_coalesce = priv->dim.use_dim; >>         return 0; >>   } >> @@ -597,8 +632,8 @@ static int bcm_sysport_set_coalesce(struct >> net_device *dev, >>                       struct ethtool_coalesce *ec) >>   { >>       struct bcm_sysport_priv *priv = netdev_priv(dev); >> +    struct bcm_sysport_tx_ring *ring; >>       unsigned int i; >> -    u32 reg; >>         /* Base system clock is 125Mhz, DMA timeout is this reference >> clock >>        * divided by 1024, which yield roughly 8.192 us, our maximum >> value has >> @@ -615,22 +650,26 @@ static int bcm_sysport_set_coalesce(struct >> net_device *dev, >>           return -EINVAL; >>         for (i = 0; i < dev->num_tx_queues; i++) { >> -        reg = tdma_readl(priv, TDMA_DESC_RING_INTR_CONTROL(i)); >> -        reg &= ~(RING_INTR_THRESH_MASK | >> -             RING_TIMEOUT_MASK << RING_TIMEOUT_SHIFT); >> -        reg |= ec->tx_max_coalesced_frames; >> -        reg |= DIV_ROUND_UP(ec->tx_coalesce_usecs * 1000, 8192) << >> -             RING_TIMEOUT_SHIFT; >> -        tdma_writel(priv, reg, TDMA_DESC_RING_INTR_CONTROL(i)); >> +        ring = &priv->tx_rings[i]; >> +        ring->dim.coal_pkts = ec->tx_max_coalesced_frames; >> +        ring->dim.coal_usecs = ec->tx_coalesce_usecs; >> +        if (!ec->use_adaptive_tx_coalesce && ring->dim.use_dim) { >> +            ring->dim.coal_pkts = 1; >> +            ring->dim.coal_usecs = 0; >> +        } >> +        ring->dim.use_dim = ec->use_adaptive_tx_coalesce; >> +        bcm_sysport_set_tx_coalesce(ring); >>       } > > If I understand correctly, if I disable dim, moderation is set to > {usecs,packets}={0,1} regardless of the input from ethtool right? Correct, these are the default coalescing parameters that the driver sets. As mentioned before, since I am not storing any coalescing parameters other than these two, there is no copy of what an user might have previously provided, falling back to the defaults seemed reasonable. > Doesn't this break the wanted behavior? As mentioned above, I would > decouple dim from the set_tx/rx_coalesce() function. Also, when dim is > enabled, why change dim.coal_pkts/usecs? They would just be overwritten > in the next iteration of net_dim. Indeed, that is not necessary. > >>   -    reg = rdma_readl(priv, RDMA_MBDONE_INTR); >> -    reg &= ~(RDMA_INTR_THRESH_MASK | >> -         RDMA_TIMEOUT_MASK << RDMA_TIMEOUT_SHIFT); >> -    reg |= ec->rx_max_coalesced_frames; >> -    reg |= DIV_ROUND_UP(ec->rx_coalesce_usecs * 1000, 8192) << >> -                RDMA_TIMEOUT_SHIFT; >> -    rdma_writel(priv, reg, RDMA_MBDONE_INTR); >> +    priv->dim.coal_usecs = ec->rx_coalesce_usecs; >> +    priv->dim.coal_pkts = ec->rx_max_coalesced_frames; >> + >> +    if (!ec->use_adaptive_rx_coalesce && priv->dim.use_dim) { >> +        priv->dim.coal_pkts = 1; >> +        priv->dim.coal_usecs = 0; >> +    } >> +    priv->dim.use_dim = ec->use_adaptive_rx_coalesce; >> +    bcm_sysport_set_rx_coalesce(priv); > > Same comment as above. > >>         return 0; >>   } >> @@ -709,6 +748,7 @@ static unsigned int bcm_sysport_desc_rx(struct >> bcm_sysport_priv *priv, >>       struct bcm_sysport_stats64 *stats64 = &priv->stats64; >>       struct net_device *ndev = priv->netdev; >>       unsigned int processed = 0, to_process; >> +    unsigned int processed_bytes = 0; >>       struct bcm_sysport_cb *cb; >>       struct sk_buff *skb; >>       unsigned int p_index; >> @@ -800,6 +840,7 @@ static unsigned int bcm_sysport_desc_rx(struct >> bcm_sysport_priv *priv, >>            */ >>           skb_pull(skb, sizeof(*rsb) + 2); >>           len -= (sizeof(*rsb) + 2); >> +        processed_bytes += len; >>             /* UniMAC may forward CRC */ >>           if (priv->crc_fwd) { >> @@ -824,6 +865,9 @@ static unsigned int bcm_sysport_desc_rx(struct >> bcm_sysport_priv *priv, >>               priv->rx_read_ptr = 0; >>       } >>   +    priv->dim.packets = processed; >> +    priv->dim.bytes = processed_bytes; >> + >>       return processed; >>   } >>   @@ -900,6 +944,8 @@ static unsigned int >> __bcm_sysport_tx_reclaim(struct bcm_sysport_priv *priv, >>       ring->packets += pkts_compl; >>       ring->bytes += bytes_compl; >>       u64_stats_update_end(&priv->syncp); >> +    ring->dim.packets = pkts_compl; >> +    ring->dim.bytes = bytes_compl; >>         ring->c_index = c_index; >>   @@ -945,6 +991,7 @@ static int bcm_sysport_tx_poll(struct >> napi_struct *napi, int budget) >>   { >>       struct bcm_sysport_tx_ring *ring = >>           container_of(napi, struct bcm_sysport_tx_ring, napi); >> +    struct net_dim_sample dim_sample; >>       unsigned int work_done = 0; >>         work_done = bcm_sysport_tx_reclaim(ring->priv, ring); >> @@ -961,6 +1008,12 @@ static int bcm_sysport_tx_poll(struct >> napi_struct *napi, int budget) >>           return 0; >>       } >>   +    if (ring->dim.use_dim) { >> +        net_dim_sample(ring->dim.event_ctr, ring->dim.packets, >> +                   ring->dim.bytes, &dim_sample); >> +        net_dim(&ring->dim.dim, dim_sample); >> +    } >> + >>       return budget; >>   } >>   @@ -976,6 +1029,7 @@ static int bcm_sysport_poll(struct napi_struct >> *napi, int budget) >>   { >>       struct bcm_sysport_priv *priv = >>           container_of(napi, struct bcm_sysport_priv, napi); >> +    struct net_dim_sample dim_sample; >>       unsigned int work_done = 0; >>         work_done = bcm_sysport_desc_rx(priv, budget); >> @@ -998,6 +1052,12 @@ static int bcm_sysport_poll(struct napi_struct >> *napi, int budget) >>           intrl2_0_mask_clear(priv, INTRL2_0_RDMA_MBDONE); >>       } >>   +    if (priv->dim.use_dim) { >> +        net_dim_sample(priv->dim.event_ctr, priv->dim.packets, >> +                   priv->dim.bytes, &dim_sample); >> +        net_dim(&priv->dim.dim, dim_sample); >> +    } >> + >>       return work_done; >>   } >>   @@ -1016,6 +1076,40 @@ static void >> bcm_sysport_resume_from_wol(struct bcm_sysport_priv *priv) >>       netif_dbg(priv, wol, priv->netdev, "resumed from WOL\n"); >>   } >>   +static void bcm_sysport_dim_work(struct work_struct *work) >> +{ >> +    struct net_dim *dim = container_of(work, struct net_dim, work); >> +    struct bcm_sysport_net_dim *ndim = >> +            container_of(dim, struct bcm_sysport_net_dim, dim); >> +    struct bcm_sysport_priv *priv = >> +            container_of(ndim, struct bcm_sysport_priv, dim); >> +    struct net_dim_cq_moder cur_profile = >> +                net_dim_get_profile(dim->mode, dim->profile_ix); >> + >> +    priv->dim.coal_usecs = cur_profile.usec; >> +    priv->dim.coal_pkts = cur_profile.pkts; >> + >> +    bcm_sysport_set_rx_coalesce(priv); >> +    dim->state = NET_DIM_START_MEASURE; >> +} >> + >> +static void bcm_sysport_dim_tx_work(struct work_struct *work) >> +{ >> +    struct net_dim *dim = container_of(work, struct net_dim, work); >> +    struct bcm_sysport_net_dim *ndim = >> +            container_of(dim, struct bcm_sysport_net_dim, dim); >> +    struct bcm_sysport_tx_ring *ring = >> +            container_of(ndim, struct bcm_sysport_tx_ring, dim); >> +    struct net_dim_cq_moder cur_profile = >> +                net_dim_get_profile(dim->mode, dim->profile_ix); >> + >> +    ring->dim.coal_usecs = cur_profile.usec; >> +    ring->dim.coal_pkts = cur_profile.pkts; >> + >> +    bcm_sysport_set_tx_coalesce(ring); >> +    dim->state = NET_DIM_START_MEASURE; >> +} >> + >>   /* RX and misc interrupt routine */ >>   static irqreturn_t bcm_sysport_rx_isr(int irq, void *dev_id) >>   { >> @@ -1034,6 +1128,7 @@ static irqreturn_t bcm_sysport_rx_isr(int irq, >> void *dev_id) >>       } >>         if (priv->irq0_stat & INTRL2_0_RDMA_MBDONE) { >> +        priv->dim.event_ctr++; >>           if (likely(napi_schedule_prep(&priv->napi))) { >>               /* disable RX interrupts */ >>               intrl2_0_mask_set(priv, INTRL2_0_RDMA_MBDONE); >> @@ -1061,6 +1156,7 @@ static irqreturn_t bcm_sysport_rx_isr(int irq, >> void *dev_id) >>               continue; >>             txr = &priv->tx_rings[ring]; >> +        txr->dim.event_ctr++; >>             if (likely(napi_schedule_prep(&txr->napi))) { >>               intrl2_0_mask_set(priv, ring_bit); >> @@ -1093,6 +1189,7 @@ static irqreturn_t bcm_sysport_tx_isr(int irq, >> void *dev_id) >>               continue; >>             txr = &priv->tx_rings[ring]; >> +        txr->dim.event_ctr++; >>             if (likely(napi_schedule_prep(&txr->napi))) { >>               intrl2_1_mask_set(priv, BIT(ring)); >> @@ -1358,6 +1455,16 @@ static void bcm_sysport_adj_link(struct >> net_device *dev) >>           phy_print_status(phydev); >>   } >>   +static void bcm_sysport_init_dim(struct bcm_sysport_net_dim *dim, >> +                 void (*cb)(struct work_struct *work)) >> +{ >> +    INIT_WORK(&dim->dim.work, cb); >> +    dim->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE; >> +    dim->event_ctr = 0; >> +    dim->packets = 0; >> +    dim->bytes = 0; >> +} > > What about default values for coal_usecs/pkts? dim supports it through > net_dim_get_def_profile(mode) function. OK, thanks I did not know that. -- Florian