Netdev List
 help / color / mirror / Atom feed
* Re: [net-next PATCH v4 1/7] net: Refactor XPS for CPUs and Rx queues
From: Nambiar, Amritha @ 2018-06-28  0:47 UTC (permalink / raw)
  To: Tom Herbert
  Cc: Linux Kernel Network Developers, David S. Miller, Alexander Duyck,
	Willem de Bruijn, Sridhar Samudrala, Alexander Duyck,
	Eric Dumazet, Hannes Frederic Sowa
In-Reply-To: <CALx6S36b54XVuuEkd6JoDJYb+mzJ1K0X7G512EJZk_D3V2b0_w@mail.gmail.com>

On 6/26/2018 3:53 PM, Tom Herbert wrote:
> On Mon, Jun 25, 2018 at 11:04 AM, Amritha Nambiar
> <amritha.nambiar@intel.com> wrote:
>> Refactor XPS code to support Tx queue selection based on
>> CPU(s) map or Rx queue(s) map.
>>
>> Signed-off-by: Amritha Nambiar <amritha.nambiar@intel.com>
>> ---
>>  include/linux/cpumask.h   |   11 ++
>>  include/linux/netdevice.h |  100 +++++++++++++++++++++
>>  net/core/dev.c            |  211 ++++++++++++++++++++++++++++++---------------
>>  net/core/net-sysfs.c      |    4 -
>>  4 files changed, 246 insertions(+), 80 deletions(-)
>>
>> diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
>> index bf53d89..57f20a0 100644
>> --- a/include/linux/cpumask.h
>> +++ b/include/linux/cpumask.h
>> @@ -115,12 +115,17 @@ extern struct cpumask __cpu_active_mask;
>>  #define cpu_active(cpu)                ((cpu) == 0)
>>  #endif
>>
>> -/* verify cpu argument to cpumask_* operators */
>> -static inline unsigned int cpumask_check(unsigned int cpu)
>> +static inline void cpu_max_bits_warn(unsigned int cpu, unsigned int bits)
>>  {
>>  #ifdef CONFIG_DEBUG_PER_CPU_MAPS
>> -       WARN_ON_ONCE(cpu >= nr_cpumask_bits);
>> +       WARN_ON_ONCE(cpu >= bits);
>>  #endif /* CONFIG_DEBUG_PER_CPU_MAPS */
>> +}
>> +
>> +/* verify cpu argument to cpumask_* operators */
>> +static inline unsigned int cpumask_check(unsigned int cpu)
>> +{
>> +       cpu_max_bits_warn(cpu, nr_cpumask_bits);
>>         return cpu;
>>  }
>>
>> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
>> index 3ec9850..c534f03 100644
>> --- a/include/linux/netdevice.h
>> +++ b/include/linux/netdevice.h
>> @@ -730,10 +730,15 @@ struct xps_map {
>>   */
>>  struct xps_dev_maps {
>>         struct rcu_head rcu;
>> -       struct xps_map __rcu *cpu_map[0];
>> +       struct xps_map __rcu *attr_map[0]; /* Either CPUs map or RXQs map */
>>  };
>> -#define XPS_DEV_MAPS_SIZE(_tcs) (sizeof(struct xps_dev_maps) +         \
>> +
>> +#define XPS_CPU_DEV_MAPS_SIZE(_tcs) (sizeof(struct xps_dev_maps) +     \
>>         (nr_cpu_ids * (_tcs) * sizeof(struct xps_map *)))
>> +
>> +#define XPS_RXQ_DEV_MAPS_SIZE(_tcs, _rxqs) (sizeof(struct xps_dev_maps) +\
>> +       (_rxqs * (_tcs) * sizeof(struct xps_map *)))
>> +
>>  #endif /* CONFIG_XPS */
>>
>>  #define TC_MAX_QUEUE   16
>> @@ -1909,7 +1914,8 @@ struct net_device {
>>         int                     watchdog_timeo;
>>
>>  #ifdef CONFIG_XPS
>> -       struct xps_dev_maps __rcu *xps_maps;
>> +       struct xps_dev_maps __rcu *xps_cpus_map;
>> +       struct xps_dev_maps __rcu *xps_rxqs_map;
>>  #endif
>>  #ifdef CONFIG_NET_CLS_ACT
>>         struct mini_Qdisc __rcu *miniq_egress;
>> @@ -3258,6 +3264,94 @@ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
>>  #ifdef CONFIG_XPS
>>  int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
>>                         u16 index);
>> +int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
>> +                         u16 index, bool is_rxqs_map);
>> +
>> +/**
>> + *     attr_test_mask - Test a CPU or Rx queue set in a cpumask/rx queues mask
>> + *     @j: CPU/Rx queue index
>> + *     @mask: bitmask of all cpus/rx queues
>> + *     @nr_bits: number of bits in the bitmask
>> + *
>> + * Test if a CPU or Rx queue index is set in a mask of all CPU/Rx queues.
>> + */
>> +static inline bool attr_test_mask(unsigned long j, const unsigned long *mask,
>> +                                 unsigned int nr_bits)
>> +{
>> +       cpu_max_bits_warn(j, nr_bits);
>> +       return test_bit(j, mask);
>> +}
>> +
>> +/**
>> + *     attr_test_online - Test for online CPU/Rx queue
>> + *     @j: CPU/Rx queue index
>> + *     @online_mask: bitmask for CPUs/Rx queues that are online
>> + *     @nr_bits: number of bits in the bitmask
>> + *
>> + * Returns true if a CPU/Rx queue is online.
>> + */
>> +static inline bool attr_test_online(unsigned long j,
>> +                                   const unsigned long *online_mask,
>> +                                   unsigned int nr_bits)
>> +{
>> +       cpu_max_bits_warn(j, nr_bits);
>> +
>> +       if (online_mask)
>> +               return test_bit(j, online_mask);
>> +
>> +       if (j >= 0 && j < nr_bits)
> 
> j is unsigned so j >= 0 is superfluous.
> 
>> +               return true;
>> +
>> +       return false;
> 
> Could just do:
> 
> return (j < nr_bits);

Will fix.

> 
>> +}
>> +
>> +/**
>> + *     attrmask_next - get the next CPU/Rx queue in a cpumask/Rx queues mask
>> + *     @n: CPU/Rx queue index
>> + *     @srcp: the cpumask/Rx queue mask pointer
>> + *     @nr_bits: number of bits in the bitmask
>> + *
>> + * Returns >= nr_bits if no further CPUs/Rx queues set.
>> + */
>> +static inline unsigned int attrmask_next(int n, const unsigned long *srcp,
>> +                                        unsigned int nr_bits)
>> +{
>> +       /* -1 is a legal arg here. */
>> +       if (n != -1)
>> +               cpu_max_bits_warn(n, nr_bits);
>> +
>> +       if (srcp)
>> +               return find_next_bit(srcp, nr_bits, n + 1);
>> +
>> +       return n + 1;
>> +}
>> +
>> +/**
>> + *     attrmask_next_and - get the next CPU/Rx queue in *src1p & *src2p
>> + *     @n: CPU/Rx queue index
>> + *     @src1p: the first CPUs/Rx queues mask pointer
>> + *     @src2p: the second CPUs/Rx queues mask pointer
>> + *     @nr_bits: number of bits in the bitmask
>> + *
>> + * Returns >= nr_bits if no further CPUs/Rx queues set in both.
>> + */
>> +static inline int attrmask_next_and(int n, const unsigned long *src1p,
>> +                                   const unsigned long *src2p,
>> +                                   unsigned int nr_bits)
>> +{
>> +       /* -1 is a legal arg here. */
>> +       if (n != -1)
>> +               cpu_max_bits_warn(n, nr_bits);
>> +
>> +       if (src1p && src2p)
>> +               return find_next_and_bit(src1p, src2p, nr_bits, n + 1);
>> +       else if (src1p)
>> +               return find_next_bit(src1p, nr_bits, n + 1);
>> +       else if (src2p)
>> +               return find_next_bit(src2p, nr_bits, n + 1);
>> +
>> +       return n + 1;
>> +}
>>  #else
>>  static inline int netif_set_xps_queue(struct net_device *dev,
>>                                       const struct cpumask *mask,
>> diff --git a/net/core/dev.c b/net/core/dev.c
>> index a5aa1c7..2552556 100644
>> --- a/net/core/dev.c
>> +++ b/net/core/dev.c
>> @@ -2092,7 +2092,7 @@ static bool remove_xps_queue(struct xps_dev_maps *dev_maps,
>>         int pos;
>>
>>         if (dev_maps)
>> -               map = xmap_dereference(dev_maps->cpu_map[tci]);
>> +               map = xmap_dereference(dev_maps->attr_map[tci]);
>>         if (!map)
>>                 return false;
>>
>> @@ -2105,7 +2105,7 @@ static bool remove_xps_queue(struct xps_dev_maps *dev_maps,
>>                         break;
>>                 }
>>
>> -               RCU_INIT_POINTER(dev_maps->cpu_map[tci], NULL);
>> +               RCU_INIT_POINTER(dev_maps->attr_map[tci], NULL);
>>                 kfree_rcu(map, rcu);
>>                 return false;
>>         }
>> @@ -2135,31 +2135,58 @@ static bool remove_xps_queue_cpu(struct net_device *dev,
>>         return active;
>>  }
>>
>> +static void clean_xps_maps(struct net_device *dev, const unsigned long *mask,
>> +                          struct xps_dev_maps *dev_maps, unsigned int nr_ids,
>> +                          u16 offset, u16 count, bool is_rxqs_map)
>> +{
>> +       bool active = false;
>> +       int i, j;
>> +
>> +       for (j = -1; j = attrmask_next(j, mask, nr_ids),
>> +            j < nr_ids;)
>> +               active |= remove_xps_queue_cpu(dev, dev_maps, j, offset,
>> +                                              count);
>> +       if (!active) {
>> +               if (is_rxqs_map) {
>> +                       RCU_INIT_POINTER(dev->xps_rxqs_map, NULL);
>> +               } else {
>> +                       RCU_INIT_POINTER(dev->xps_cpus_map, NULL);
>> +
>> +                       for (i = offset + (count - 1); count--; i--)
>> +                               netdev_queue_numa_node_write(
>> +                                       netdev_get_tx_queue(dev, i),
>> +                                                       NUMA_NO_NODE);
>> +               }
>> +               kfree_rcu(dev_maps, rcu);
>> +       }
>> +}
>> +
>>  static void netif_reset_xps_queues(struct net_device *dev, u16 offset,
>>                                    u16 count)
>>  {
>> +       const unsigned long *possible_mask = NULL;
>>         struct xps_dev_maps *dev_maps;
>> -       int cpu, i;
>> -       bool active = false;
>> +       unsigned int nr_ids;
>>
>>         mutex_lock(&xps_map_mutex);
>> -       dev_maps = xmap_dereference(dev->xps_maps);
>>
>> -       if (!dev_maps)
>> -               goto out_no_maps;
>> -
>> -       for_each_possible_cpu(cpu)
>> -               active |= remove_xps_queue_cpu(dev, dev_maps, cpu,
>> -                                              offset, count);
>> +       dev_maps = xmap_dereference(dev->xps_rxqs_map);
>> +       if (dev_maps) {
>> +               nr_ids = dev->num_rx_queues;
>> +               clean_xps_maps(dev, possible_mask, dev_maps, nr_ids, offset,
>> +                              count, true);
>>
>> -       if (!active) {
>> -               RCU_INIT_POINTER(dev->xps_maps, NULL);
>> -               kfree_rcu(dev_maps, rcu);
>>         }
>>
>> -       for (i = offset + (count - 1); count--; i--)
>> -               netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i),
>> -                                            NUMA_NO_NODE);
>> +       dev_maps = xmap_dereference(dev->xps_cpus_map);
>> +       if (!dev_maps)
>> +               goto out_no_maps;
>> +
>> +       if (num_possible_cpus() > 1)
>> +               possible_mask = cpumask_bits(cpu_possible_mask);
>> +       nr_ids = nr_cpu_ids;
>> +       clean_xps_maps(dev, possible_mask, dev_maps, nr_ids, offset, count,
>> +                      false);
>>
>>  out_no_maps:
>>         mutex_unlock(&xps_map_mutex);
>> @@ -2170,8 +2197,8 @@ static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index)
>>         netif_reset_xps_queues(dev, index, dev->num_tx_queues - index);
>>  }
>>
>> -static struct xps_map *expand_xps_map(struct xps_map *map,
>> -                                     int cpu, u16 index)
>> +static struct xps_map *expand_xps_map(struct xps_map *map, int attr_index,
>> +                                     u16 index, bool is_rxqs_map)
>>  {
>>         struct xps_map *new_map;
>>         int alloc_len = XPS_MIN_MAP_ALLOC;
>> @@ -2183,7 +2210,7 @@ static struct xps_map *expand_xps_map(struct xps_map *map,
>>                 return map;
>>         }
>>
>> -       /* Need to add queue to this CPU's existing map */
>> +       /* Need to add tx-queue to this CPU's/rx-queue's existing map */
>>         if (map) {
>>                 if (pos < map->alloc_len)
>>                         return map;
>> @@ -2191,9 +2218,14 @@ static struct xps_map *expand_xps_map(struct xps_map *map,
>>                 alloc_len = map->alloc_len * 2;
>>         }
>>
>> -       /* Need to allocate new map to store queue on this CPU's map */
>> -       new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL,
>> -                              cpu_to_node(cpu));
>> +       /* Need to allocate new map to store tx-queue on this CPU's/rx-queue's
>> +        *  map
>> +        */
>> +       if (is_rxqs_map)
>> +               new_map = kzalloc(XPS_MAP_SIZE(alloc_len), GFP_KERNEL);
>> +       else
>> +               new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL,
>> +                                      cpu_to_node(attr_index));
>>         if (!new_map)
>>                 return NULL;
>>
>> @@ -2205,14 +2237,16 @@ static struct xps_map *expand_xps_map(struct xps_map *map,
>>         return new_map;
>>  }
>>
>> -int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
>> -                       u16 index)
>> +int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
>> +                         u16 index, bool is_rxqs_map)
>>  {
>> +       const unsigned long *online_mask = NULL, *possible_mask = NULL;
>>         struct xps_dev_maps *dev_maps, *new_dev_maps = NULL;
>> -       int i, cpu, tci, numa_node_id = -2;
>> +       int i, j, tci, numa_node_id = -2;
>>         int maps_sz, num_tc = 1, tc = 0;
>>         struct xps_map *map, *new_map;
>>         bool active = false;
>> +       unsigned int nr_ids;
>>
>>         if (dev->num_tc) {
>>                 num_tc = dev->num_tc;
>> @@ -2221,16 +2255,27 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
>>                         return -EINVAL;
>>         }
>>
>> -       maps_sz = XPS_DEV_MAPS_SIZE(num_tc);
>> -       if (maps_sz < L1_CACHE_BYTES)
>> -               maps_sz = L1_CACHE_BYTES;
>> -
>>         mutex_lock(&xps_map_mutex);
>> +       if (is_rxqs_map) {
>> +               maps_sz = XPS_RXQ_DEV_MAPS_SIZE(num_tc, dev->num_rx_queues);
>> +               dev_maps = xmap_dereference(dev->xps_rxqs_map);
>> +               nr_ids = dev->num_rx_queues;
>> +       } else {
>> +               maps_sz = XPS_CPU_DEV_MAPS_SIZE(num_tc);
>> +               if (num_possible_cpus() > 1) {
>> +                       online_mask = cpumask_bits(cpu_online_mask);
>> +                       possible_mask = cpumask_bits(cpu_possible_mask);
>> +               }
>> +               dev_maps = xmap_dereference(dev->xps_cpus_map);
>> +               nr_ids = nr_cpu_ids;
>> +       }
>>
>> -       dev_maps = xmap_dereference(dev->xps_maps);
>> +       if (maps_sz < L1_CACHE_BYTES)
>> +               maps_sz = L1_CACHE_BYTES;
>>
>>         /* allocate memory for queue storage */
>> -       for_each_cpu_and(cpu, cpu_online_mask, mask) {
>> +       for (j = -1; j = attrmask_next_and(j, online_mask, mask, nr_ids),
>> +            j < nr_ids;) {
>>                 if (!new_dev_maps)
>>                         new_dev_maps = kzalloc(maps_sz, GFP_KERNEL);
>>                 if (!new_dev_maps) {
>> @@ -2238,73 +2283,81 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
>>                         return -ENOMEM;
>>                 }
>>
>> -               tci = cpu * num_tc + tc;
>> -               map = dev_maps ? xmap_dereference(dev_maps->cpu_map[tci]) :
>> +               tci = j * num_tc + tc;
>> +               map = dev_maps ? xmap_dereference(dev_maps->attr_map[tci]) :
>>                                  NULL;
>>
>> -               map = expand_xps_map(map, cpu, index);
>> +               map = expand_xps_map(map, j, index, is_rxqs_map);
>>                 if (!map)
>>                         goto error;
>>
>> -               RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
>> +               RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
>>         }
>>
>>         if (!new_dev_maps)
>>                 goto out_no_new_maps;
>>
>> -       for_each_possible_cpu(cpu) {
>> +       for (j = -1; j = attrmask_next(j, possible_mask, nr_ids),
>> +            j < nr_ids;) {
>>                 /* copy maps belonging to foreign traffic classes */
>> -               for (i = tc, tci = cpu * num_tc; dev_maps && i--; tci++) {
>> +               for (i = tc, tci = j * num_tc; dev_maps && i--; tci++) {
>>                         /* fill in the new device map from the old device map */
>> -                       map = xmap_dereference(dev_maps->cpu_map[tci]);
>> -                       RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
>> +                       map = xmap_dereference(dev_maps->attr_map[tci]);
>> +                       RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
>>                 }
>>
>>                 /* We need to explicitly update tci as prevous loop
>>                  * could break out early if dev_maps is NULL.
>>                  */
>> -               tci = cpu * num_tc + tc;
>> +               tci = j * num_tc + tc;
>>
>> -               if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) {
>> -                       /* add queue to CPU maps */
>> +               if (attr_test_mask(j, mask, nr_ids) &&
>> +                   attr_test_online(j, online_mask, nr_ids)) {
>> +                       /* add tx-queue to CPU/rx-queue maps */
>>                         int pos = 0;
>>
>> -                       map = xmap_dereference(new_dev_maps->cpu_map[tci]);
>> +                       map = xmap_dereference(new_dev_maps->attr_map[tci]);
>>                         while ((pos < map->len) && (map->queues[pos] != index))
>>                                 pos++;
>>
>>                         if (pos == map->len)
>>                                 map->queues[map->len++] = index;
>>  #ifdef CONFIG_NUMA
>> -                       if (numa_node_id == -2)
>> -                               numa_node_id = cpu_to_node(cpu);
>> -                       else if (numa_node_id != cpu_to_node(cpu))
>> -                               numa_node_id = -1;
> 
> Seems like there should be a comment here about meaning of -2 and -1
> in NUMA node. Better yet, seems like there should be constants defined
> for these special values. Maybe something to clean up in the future.

Will have a separate patch (not part of this series) for this.

> 
>> +                       if (!is_rxqs_map) {
>> +                               if (numa_node_id == -2)
>> +                                       numa_node_id = cpu_to_node(j);
>> +                               else if (numa_node_id != cpu_to_node(j))
>> +                                       numa_node_id = -1;
>> +                       }
>>  #endif
>>                 } else if (dev_maps) {
>>                         /* fill in the new device map from the old device map */
>> -                       map = xmap_dereference(dev_maps->cpu_map[tci]);
>> -                       RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
>> +                       map = xmap_dereference(dev_maps->attr_map[tci]);
>> +                       RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
>>                 }
>>
>>                 /* copy maps belonging to foreign traffic classes */
>>                 for (i = num_tc - tc, tci++; dev_maps && --i; tci++) {
>>                         /* fill in the new device map from the old device map */
>> -                       map = xmap_dereference(dev_maps->cpu_map[tci]);
>> -                       RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
>> +                       map = xmap_dereference(dev_maps->attr_map[tci]);
>> +                       RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
>>                 }
>>         }
>>
>> -       rcu_assign_pointer(dev->xps_maps, new_dev_maps);
>> +       if (is_rxqs_map)
>> +               rcu_assign_pointer(dev->xps_rxqs_map, new_dev_maps);
>> +       else
>> +               rcu_assign_pointer(dev->xps_cpus_map, new_dev_maps);
>>
>>         /* Cleanup old maps */
>>         if (!dev_maps)
>>                 goto out_no_old_maps;
>>
>> -       for_each_possible_cpu(cpu) {
>> -               for (i = num_tc, tci = cpu * num_tc; i--; tci++) {
>> -                       new_map = xmap_dereference(new_dev_maps->cpu_map[tci]);
>> -                       map = xmap_dereference(dev_maps->cpu_map[tci]);
>> +       for (j = -1; j = attrmask_next(j, possible_mask, nr_ids),
>> +            j < nr_ids;) {
>> +               for (i = num_tc, tci = j * num_tc; i--; tci++) {
>> +                       new_map = xmap_dereference(new_dev_maps->attr_map[tci]);
>> +                       map = xmap_dereference(dev_maps->attr_map[tci]);
>>                         if (map && map != new_map)
>>                                 kfree_rcu(map, rcu);
>>                 }
>> @@ -2317,19 +2370,23 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
>>         active = true;
>>
>>  out_no_new_maps:
>> -       /* update Tx queue numa node */
>> -       netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),
>> -                                    (numa_node_id >= 0) ? numa_node_id :
>> -                                    NUMA_NO_NODE);
>> +       if (!is_rxqs_map) {
>> +               /* update Tx queue numa node */
>> +               netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),
>> +                                            (numa_node_id >= 0) ?
>> +                                            numa_node_id : NUMA_NO_NODE);
>> +       }
>>
>>         if (!dev_maps)
>>                 goto out_no_maps;
>>
>> -       /* removes queue from unused CPUs */
>> -       for_each_possible_cpu(cpu) {
>> -               for (i = tc, tci = cpu * num_tc; i--; tci++)
>> +       /* removes tx-queue from unused CPUs/rx-queues */
>> +       for (j = -1; j = attrmask_next(j, possible_mask, nr_ids),
>> +            j < nr_ids;) {
>> +               for (i = tc, tci = j * num_tc; i--; tci++)
>>                         active |= remove_xps_queue(dev_maps, tci, index);
>> -               if (!cpumask_test_cpu(cpu, mask) || !cpu_online(cpu))
>> +               if (!attr_test_mask(j, mask, nr_ids) ||
>> +                   !attr_test_online(j, online_mask, nr_ids))
>>                         active |= remove_xps_queue(dev_maps, tci, index);
>>                 for (i = num_tc - tc, tci++; --i; tci++)
>>                         active |= remove_xps_queue(dev_maps, tci, index);
>> @@ -2337,7 +2394,10 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
>>
>>         /* free map if not active */
>>         if (!active) {
>> -               RCU_INIT_POINTER(dev->xps_maps, NULL);
>> +               if (is_rxqs_map)
>> +                       RCU_INIT_POINTER(dev->xps_rxqs_map, NULL);
>> +               else
>> +                       RCU_INIT_POINTER(dev->xps_cpus_map, NULL);
>>                 kfree_rcu(dev_maps, rcu);
>>         }
>>
>> @@ -2347,11 +2407,12 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
>>         return 0;
>>  error:
>>         /* remove any maps that we added */
>> -       for_each_possible_cpu(cpu) {
>> -               for (i = num_tc, tci = cpu * num_tc; i--; tci++) {
>> -                       new_map = xmap_dereference(new_dev_maps->cpu_map[tci]);
>> +       for (j = -1; j = attrmask_next(j, possible_mask, nr_ids),
>> +            j < nr_ids;) {
>> +               for (i = num_tc, tci = j * num_tc; i--; tci++) {
>> +                       new_map = xmap_dereference(new_dev_maps->attr_map[tci]);
>>                         map = dev_maps ?
>> -                             xmap_dereference(dev_maps->cpu_map[tci]) :
>> +                             xmap_dereference(dev_maps->attr_map[tci]) :
>>                               NULL;
>>                         if (new_map && new_map != map)
>>                                 kfree(new_map);
>> @@ -2363,6 +2424,12 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
>>         kfree(new_dev_maps);
>>         return -ENOMEM;
>>  }
>> +
>> +int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
>> +                       u16 index)
>> +{
>> +       return __netif_set_xps_queue(dev, cpumask_bits(mask), index, false);
>> +}
>>  EXPORT_SYMBOL(netif_set_xps_queue);
>>
>>  #endif
>> @@ -3384,7 +3451,7 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
>>         int queue_index = -1;
>>
>>         rcu_read_lock();
>> -       dev_maps = rcu_dereference(dev->xps_maps);
>> +       dev_maps = rcu_dereference(dev->xps_cpus_map);
>>         if (dev_maps) {
>>                 unsigned int tci = skb->sender_cpu - 1;
>>
>> @@ -3393,7 +3460,7 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
>>                         tci += netdev_get_prio_tc_map(dev, skb->priority);
>>                 }
>>
>> -               map = rcu_dereference(dev_maps->cpu_map[tci]);
>> +               map = rcu_dereference(dev_maps->attr_map[tci]);
>>                 if (map) {
>>                         if (map->len == 1)
>>                                 queue_index = map->queues[0];
>> diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
>> index bb7e80f..b39987c 100644
>> --- a/net/core/net-sysfs.c
>> +++ b/net/core/net-sysfs.c
>> @@ -1227,13 +1227,13 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue,
>>                 return -ENOMEM;
>>
>>         rcu_read_lock();
>> -       dev_maps = rcu_dereference(dev->xps_maps);
>> +       dev_maps = rcu_dereference(dev->xps_cpus_map);
>>         if (dev_maps) {
>>                 for_each_possible_cpu(cpu) {
>>                         int i, tci = cpu * num_tc + tc;
>>                         struct xps_map *map;
>>
>> -                       map = rcu_dereference(dev_maps->cpu_map[tci]);
>> +                       map = rcu_dereference(dev_maps->attr_map[tci]);
>>                         if (!map)
>>                                 continue;
>>
>>
> 
> Acked-by: Tom Herbert <tom@quantonium.net>
> 

^ permalink raw reply

* Re: [net-next PATCH v4 5/7] net: Enable Tx queue selection based on Rx queues
From: Nambiar, Amritha @ 2018-06-28  0:48 UTC (permalink / raw)
  To: Willem de Bruijn
  Cc: Network Development, David Miller, Alexander Duyck,
	Samudrala, Sridhar, Alexander Duyck, Eric Dumazet,
	Hannes Frederic Sowa, Tom Herbert
In-Reply-To: <CAF=yD-Kd4DkC6DWydwiwEZytE9NsnOYZg2KFFxQ8xgAA=DUiXw@mail.gmail.com>

On 6/27/2018 3:47 AM, Willem de Bruijn wrote:
>>>> +static int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
>>>>  {
>>>>  #ifdef CONFIG_XPS
>>>>         struct xps_dev_maps *dev_maps;
>>>> -       struct xps_map *map;
>>>> +       struct sock *sk = skb->sk;
>>>>         int queue_index = -1;
>>>>
>>>>         if (!static_key_false(&xps_needed))
>>>>                 return -1;
>>>>
>>>>         rcu_read_lock();
>>>> -       dev_maps = rcu_dereference(dev->xps_cpus_map);
>>>> +       if (!static_key_false(&xps_rxqs_needed))
>>>> +               goto get_cpus_map;
>>>> +
>>>> +       dev_maps = rcu_dereference(dev->xps_rxqs_map);
>>>>         if (dev_maps) {
>>>> -               unsigned int tci = skb->sender_cpu - 1;
>>>> +               int tci = sk_rx_queue_get(sk);
>>>
>>> What if the rx device differs from the tx device?
>>>
>> I think I have 3 options here:
>> 1. Cache the ifindex in sock_common which will introduce a new
>> additional field in sock_common.
>> 2. Use dev_get_by_napi_id to get the device id. This could be expensive,
>> if the rxqs_map is set, this will be done on every packet and involves
>> walking through the hashlist for napi_id lookup.
> 
> The tx queue mapping is cached in the sk for connected sockets, but
> indeed this would be expensive for many workloads.
> 
>> 3. Remove validating device id, similar to how it is in skb_tx_hash
>> where rx_queue recorded is used and if not, fall through to flow hash
>> calculation.
>> What do you think is suitable here?
> 
> Alternatively, just accept the misprediction in this rare case. But do
> make the caveat explicit in the documentation.
> 
Okay, I will add this in the documentation.

^ permalink raw reply

* Re: [BISECTED] [4.17.0-rc6] IPv6 link-local address not getting added
From: David Ahern @ 2018-06-28  1:19 UTC (permalink / raw)
  To: Sowmini Varadhan; +Cc: netdev
In-Reply-To: <20180628003515.GD985@oracle.com>

On 6/27/18 6:35 PM, Sowmini Varadhan wrote:
> 
> Hi David,
> 
> An IPv6 regression has been introduced in 4.17.0-rc6 by
>   8308f3f net/ipv6: Add support for specifying metric of connected routes
> 
> The regression is that some interfaces on my test machine come
> up with link-local addrs but the fe80 prefix is missing.
> After this bug, I cannot send any packets to anyone onlink 
> (including my routers). 
> 
> Here are the symptoms:
> 
> When everything is fine, "ip -6 route|grep eno" shows
> 
> 2606:b400:400:18c8::/64 dev eno1 proto ra metric 100  pref medium
> fe80::5:73ff:fea0:52d dev eno1 proto static metric 100  pref medium
> fe80::/64 dev eno1 proto kernel metric 256  pref medium
> fe80::/64 dev eno3 proto kernel metric 256  pref medium
> fe80::/64 dev eno4 proto kernel metric 256  pref medium
> default via fe80::5:73ff:fea0:52d dev eno1 proto static metric 100  pref medium
> 
> But after 8308f3f, I only find
> 
> # ip -6 route|grep eno
> 2606:b400:400:18c8::/64 dev eno1 proto ra metric 100  pref medium
> fe80::5:73ff:fea0:52d dev eno1 proto static metric 100  pref medium
> fe80::/64 dev eno1 proto kernel metric 256  pref medium
> default via fe80::5:73ff:fea0:52d dev eno1 proto static metric 100  pref medium
> 
> (note that eno2 is not enabled in my config, so its absence is expected)
> 
> Please have a look, thanks.
> --Sowmini
> 

interesting. I am not seeing that. Using your static LL routes:

# ip -6 ro ls
::1 dev lo proto kernel metric 256 pref medium
2001:db8:2::/120 dev eth2 proto kernel metric 256 pref medium
fe80::5:73ff:fea0:52d dev eth4 metric 100 pref medium
fe80::/64 dev eth4 proto kernel metric 256 pref medium
fe80::/64 dev eth5 proto kernel metric 256 pref medium
fe80::/64 dev eth6 proto kernel metric 256 pref medium
fe80::/64 dev eth7 proto kernel metric 256 pref medium
fe80::/64 dev eth8 proto kernel metric 256 pref medium
fe80::/64 dev eth2 proto kernel metric 256 pref medium
default via fe80::5:73ff:fea0:52d dev eth4 metric 100 pref medium

can you send me the network config files (off list is fine)? What's the
interface manager and any routing daemon in use?

^ permalink raw reply

* [PATCH net] net: fib_rules: add protocol check in rule_find
From: Roopa Prabhu @ 2018-06-28  1:27 UTC (permalink / raw)
  To: davem; +Cc: netdev

From: Roopa Prabhu <roopa@cumulusnetworks.com>

After commit f9d4b0c1e969 ("fib_rules: move common handling of newrule
delrule msgs into fib_nl2rule"), rule_find is strict about checking
for an existing rule. rule_find must check against all
user given attributes, else it may match against a subset
of attributes and return an existing rule.

In the below case, without support for protocol match, rule_find
will match only against 'table main' and return an existing rule.

$ip -4 rule add table main protocol boot
RTNETLINK answers: File exists

This patch adds protocol support to rule_find, forcing it to
check protocol match if given by the user.

Fixes: f9d4b0c1e969 ("fib_rules: move common handling of newrule delrule msgs into fib_nl2rule")
Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
---
I spent some time looking at all match keys today and protocol
was the only missing one (protocol is not in a released kernel yet).
The only way this could be avoided is to move back to the old loose
rule_find. I am worried about this new strict checking surprising users,
but going back to the previous loose checking does not seem right either.
If there is a reason to believe that users did rely on the previous
behaviour, I will be happy to revert. Here is another example of old and
new behaviour.

old rule_find behaviour:
$ip -4 rule add table main protocol boot
$ip -4 rule add table main protocol boot
$ip -4 rule add table main protocol boot
$ip rule show
0:      from all lookup local 
32763:  from all lookup main  proto boot 
32764:  from all lookup main  proto boot 
32765:  from all lookup main  proto boot 
32766:  from all lookup main 
32767:  from all lookup default 

new rule_find behaviour (after this patch):
$ip -4 rule add table main protocol boot
$ip -4 rule add table main protocol boot
RTNETLINK answers: File exists

 net/core/fib_rules.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index bc8425d..5905567 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -444,6 +444,9 @@ static struct fib_rule *rule_find(struct fib_rules_ops *ops,
 		if (rule->ip_proto && r->ip_proto != rule->ip_proto)
 			continue;
 
+		if (rule->proto && r->proto != rule->proto)
+			continue;
+
 		if (fib_rule_port_range_set(&rule->sport_range) &&
 		    !fib_rule_port_range_compare(&r->sport_range,
 						 &rule->sport_range))
-- 
2.1.4

^ permalink raw reply related

* [PATCH] bnx2x: Mark expected switch fall-throughs
From: Gustavo A. R. Silva @ 2018-06-28  1:32 UTC (permalink / raw)
  To: Ariel Elior, everest-linux-l2, David S. Miller, Michael Chan
  Cc: netdev, linux-kernel, Gustavo A. R. Silva

In preparation to enabling -Wimplicit-fallthrough, mark switch cases
where we are expecting to fall through.

Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c  | 3 +++
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c  | 4 ++--
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c    | 4 ++--
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c | 1 +
 drivers/net/ethernet/broadcom/bnxt/bnxt.c         | 2 +-
 5 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
index 22243c4..98d4c5a 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
@@ -6339,6 +6339,7 @@ int bnx2x_set_led(struct link_params *params,
 		 */
 		if (!vars->link_up)
 			break;
+		/* else: fall through */
 	case LED_MODE_ON:
 		if (((params->phy[EXT_PHY1].type ==
 			  PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM8727) ||
@@ -12521,11 +12522,13 @@ static void bnx2x_phy_def_cfg(struct link_params *params,
 	switch (link_config  & PORT_FEATURE_LINK_SPEED_MASK) {
 	case PORT_FEATURE_LINK_SPEED_10M_HALF:
 		phy->req_duplex = DUPLEX_HALF;
+		/* fall through */
 	case PORT_FEATURE_LINK_SPEED_10M_FULL:
 		phy->req_line_speed = SPEED_10;
 		break;
 	case PORT_FEATURE_LINK_SPEED_100M_HALF:
 		phy->req_duplex = DUPLEX_HALF;
+		/* fall through */
 	case PORT_FEATURE_LINK_SPEED_100M_FULL:
 		phy->req_line_speed = SPEED_100;
 		break;
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 5b1ed24..44a6f28 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -8561,11 +8561,11 @@ int bnx2x_set_int_mode(struct bnx2x *bp)
 			       bp->num_queues,
 			       1 + bp->num_cnic_queues);
 
-		/* falling through... */
+		/* fall through */
 	case BNX2X_INT_MODE_MSI:
 		bnx2x_enable_msi(bp);
 
-		/* falling through... */
+		/* fall through */
 	case BNX2X_INT_MODE_INTX:
 		bp->num_ethernet_queues = 1;
 		bp->num_queues = bp->num_ethernet_queues + bp->num_cnic_queues;
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
index 8baf9d3..3f4d2c8 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
@@ -3258,7 +3258,7 @@ static int bnx2x_mcast_validate_e2(struct bnx2x *bp,
 	/* DEL command deletes all currently configured MACs */
 	case BNX2X_MCAST_CMD_DEL:
 		o->set_registry_size(o, 0);
-		/* Don't break */
+		/* fall through */
 
 	/* RESTORE command will restore the entire multicast configuration */
 	case BNX2X_MCAST_CMD_RESTORE:
@@ -3592,7 +3592,7 @@ static int bnx2x_mcast_validate_e1(struct bnx2x *bp,
 	/* DEL command deletes all currently configured MACs */
 	case BNX2X_MCAST_CMD_DEL:
 		o->set_registry_size(o, 0);
-		/* Don't break */
+		/* fall through */
 
 	/* RESTORE command will restore the entire multicast configuration */
 	case BNX2X_MCAST_CMD_RESTORE:
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
index dc77bfd..62da465 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
@@ -1827,6 +1827,7 @@ int bnx2x_iov_eq_sp_event(struct bnx2x *bp, union event_ring_elem *elem)
 		DP(BNX2X_MSG_IOV, "got VF [%d:%d] RSS update ramrod\n",
 		   vf->abs_vfid, qidx);
 		bnx2x_vf_handle_rss_update_eqe(bp, vf);
+		/* fall through */
 	case EVENT_RING_OPCODE_VF_FLR:
 		/* Do nothing for now */
 		return 0;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index b5fc641..d2dadad 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -1727,7 +1727,7 @@ static int bnxt_async_event_process(struct bnxt *bp,
 					    speed);
 		}
 		set_bit(BNXT_LINK_SPEED_CHNG_SP_EVENT, &bp->sp_event);
-		/* fall thru */
+		/* fall through */
 	}
 	case ASYNC_EVENT_CMPL_EVENT_ID_LINK_STATUS_CHANGE:
 		set_bit(BNXT_LINK_CHNG_SP_EVENT, &bp->sp_event);
-- 
2.7.4

^ permalink raw reply related

* [PATCH v4] net: ethernet: stmmac: dwmac-rk: Add GMAC support for px30
From: David Wu @ 2018-06-28  1:33 UTC (permalink / raw)
  To: davem, heiko, robh+dt
  Cc: mark.rutland, huangtao, netdev, linux-arm-kernel, linux-rockchip,
	linux-kernel, David Wu

Add constants and callback functions for the dwmac on px30 Soc.
The base structure is the same, but registers and the bits in
them are moved slightly, and add the clk_mac_speed for selecting
mac speed.

Signed-off-by: David Wu <david.wu@rock-chips.com>
---
Change in v4:
- Fix the patch applied failed at net-next.

Change in v3:
- Add the clock enable/disable for clk_mac_speed.

Change in v2:
- Fix some error in commit title and message.

 .../devicetree/bindings/net/rockchip-dwmac.txt     |  1 +
 drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c     | 69 ++++++++++++++++++++++
 2 files changed, 70 insertions(+)

diff --git a/Documentation/devicetree/bindings/net/rockchip-dwmac.txt b/Documentation/devicetree/bindings/net/rockchip-dwmac.txt
index 9c16ee2..3b71da7 100644
--- a/Documentation/devicetree/bindings/net/rockchip-dwmac.txt
+++ b/Documentation/devicetree/bindings/net/rockchip-dwmac.txt
@@ -4,6 +4,7 @@ The device node has following properties.
 
 Required properties:
  - compatible: should be "rockchip,<name>-gamc"
+   "rockchip,px30-gmac":   found on PX30 SoCs
    "rockchip,rk3128-gmac": found on RK312x SoCs
    "rockchip,rk3228-gmac": found on RK322x SoCs
    "rockchip,rk3288-gmac": found on RK3288 SoCs
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
index f08625a..7b92336 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
@@ -61,6 +61,7 @@ struct rk_priv_data {
 	struct clk *mac_clk_tx;
 	struct clk *clk_mac_ref;
 	struct clk *clk_mac_refout;
+	struct clk *clk_mac_speed;
 	struct clk *aclk_mac;
 	struct clk *pclk_mac;
 	struct clk *clk_phy;
@@ -83,6 +84,64 @@ struct rk_priv_data {
 	(((tx) ? soc##_GMAC_TXCLK_DLY_ENABLE : soc##_GMAC_TXCLK_DLY_DISABLE) | \
 	 ((rx) ? soc##_GMAC_RXCLK_DLY_ENABLE : soc##_GMAC_RXCLK_DLY_DISABLE))
 
+#define PX30_GRF_GMAC_CON1		0x0904
+
+/* PX30_GRF_GMAC_CON1 */
+#define PX30_GMAC_PHY_INTF_SEL_RMII	(GRF_CLR_BIT(4) | GRF_CLR_BIT(5) | \
+					 GRF_BIT(6))
+#define PX30_GMAC_SPEED_10M		GRF_CLR_BIT(2)
+#define PX30_GMAC_SPEED_100M		GRF_BIT(2)
+
+static void px30_set_to_rmii(struct rk_priv_data *bsp_priv)
+{
+	struct device *dev = &bsp_priv->pdev->dev;
+
+	if (IS_ERR(bsp_priv->grf)) {
+		dev_err(dev, "%s: Missing rockchip,grf property\n", __func__);
+		return;
+	}
+
+	regmap_write(bsp_priv->grf, PX30_GRF_GMAC_CON1,
+		     PX30_GMAC_PHY_INTF_SEL_RMII);
+}
+
+static void px30_set_rmii_speed(struct rk_priv_data *bsp_priv, int speed)
+{
+	struct device *dev = &bsp_priv->pdev->dev;
+	int ret;
+
+	if (IS_ERR(bsp_priv->clk_mac_speed)) {
+		dev_err(dev, "%s: Missing clk_mac_speed clock\n", __func__);
+		return;
+	}
+
+	if (speed == 10) {
+		regmap_write(bsp_priv->grf, PX30_GRF_GMAC_CON1,
+			     PX30_GMAC_SPEED_10M);
+
+		ret = clk_set_rate(bsp_priv->clk_mac_speed, 2500000);
+		if (ret)
+			dev_err(dev, "%s: set clk_mac_speed rate 2500000 failed: %d\n",
+				__func__, ret);
+	} else if (speed == 100) {
+		regmap_write(bsp_priv->grf, PX30_GRF_GMAC_CON1,
+			     PX30_GMAC_SPEED_100M);
+
+		ret = clk_set_rate(bsp_priv->clk_mac_speed, 25000000);
+		if (ret)
+			dev_err(dev, "%s: set clk_mac_speed rate 25000000 failed: %d\n",
+				__func__, ret);
+
+	} else {
+		dev_err(dev, "unknown speed value for RMII! speed=%d", speed);
+	}
+}
+
+static const struct rk_gmac_ops px30_ops = {
+	.set_to_rmii = px30_set_to_rmii,
+	.set_rmii_speed = px30_set_rmii_speed,
+};
+
 #define RK3128_GRF_MAC_CON0	0x0168
 #define RK3128_GRF_MAC_CON1	0x016c
 
@@ -1042,6 +1101,10 @@ static int rk_gmac_clk_init(struct plat_stmmacenet_data *plat)
 		}
 	}
 
+	bsp_priv->clk_mac_speed = devm_clk_get(dev, "clk_mac_speed");
+	if (IS_ERR(bsp_priv->clk_mac_speed))
+		dev_err(dev, "cannot get clock %s\n", "clk_mac_speed");
+
 	if (bsp_priv->clock_input) {
 		dev_info(dev, "clock input from PHY\n");
 	} else {
@@ -1094,6 +1157,9 @@ static int gmac_clk_enable(struct rk_priv_data *bsp_priv, bool enable)
 			if (!IS_ERR(bsp_priv->mac_clk_tx))
 				clk_prepare_enable(bsp_priv->mac_clk_tx);
 
+			if (!IS_ERR(bsp_priv->clk_mac_speed))
+				clk_prepare_enable(bsp_priv->clk_mac_speed);
+
 			/**
 			 * if (!IS_ERR(bsp_priv->clk_mac))
 			 *	clk_prepare_enable(bsp_priv->clk_mac);
@@ -1118,6 +1184,8 @@ static int gmac_clk_enable(struct rk_priv_data *bsp_priv, bool enable)
 			clk_disable_unprepare(bsp_priv->pclk_mac);
 
 			clk_disable_unprepare(bsp_priv->mac_clk_tx);
+
+			clk_disable_unprepare(bsp_priv->clk_mac_speed);
 			/**
 			 * if (!IS_ERR(bsp_priv->clk_mac))
 			 *	clk_disable_unprepare(bsp_priv->clk_mac);
@@ -1414,6 +1482,7 @@ static int rk_gmac_resume(struct device *dev)
 static SIMPLE_DEV_PM_OPS(rk_gmac_pm_ops, rk_gmac_suspend, rk_gmac_resume);
 
 static const struct of_device_id rk_gmac_dwmac_match[] = {
+	{ .compatible = "rockchip,px30-gmac",	.data = &px30_ops   },
 	{ .compatible = "rockchip,rk3128-gmac", .data = &rk3128_ops },
 	{ .compatible = "rockchip,rk3228-gmac", .data = &rk3228_ops },
 	{ .compatible = "rockchip,rk3288-gmac", .data = &rk3288_ops },
-- 
2.7.4

^ permalink raw reply related

* [PATCH] tg3: Mark expected switch fall-throughs
From: Gustavo A. R. Silva @ 2018-06-28  1:45 UTC (permalink / raw)
  To: Siva Reddy Kallam, Prashant Sreedharan, Michael Chan,
	David S. Miller
  Cc: netdev, linux-kernel, Gustavo A. R. Silva

In preparation to enabling -Wimplicit-fallthrough, mark switch cases
where we are expecting to fall through.

Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
---
 drivers/net/ethernet/broadcom/tg3.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 3be87ef..0a796d5 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -721,6 +721,7 @@ static int tg3_ape_lock(struct tg3 *tp, int locknum)
 	case TG3_APE_LOCK_GPIO:
 		if (tg3_asic_rev(tp) == ASIC_REV_5761)
 			return 0;
+		/* else: fall through */
 	case TG3_APE_LOCK_GRC:
 	case TG3_APE_LOCK_MEM:
 		if (!tp->pci_fn)
@@ -781,6 +782,7 @@ static void tg3_ape_unlock(struct tg3 *tp, int locknum)
 	case TG3_APE_LOCK_GPIO:
 		if (tg3_asic_rev(tp) == ASIC_REV_5761)
 			return;
+		/* else: fall through */
 	case TG3_APE_LOCK_GRC:
 	case TG3_APE_LOCK_MEM:
 		if (!tp->pci_fn)
@@ -10706,28 +10708,40 @@ static int tg3_reset_hw(struct tg3 *tp, bool reset_phy)
 	switch (limit) {
 	case 16:
 		tw32(MAC_RCV_RULE_15,  0); tw32(MAC_RCV_VALUE_15,  0);
+		/* fall through */
 	case 15:
 		tw32(MAC_RCV_RULE_14,  0); tw32(MAC_RCV_VALUE_14,  0);
+		/* fall through */
 	case 14:
 		tw32(MAC_RCV_RULE_13,  0); tw32(MAC_RCV_VALUE_13,  0);
+		/* fall through */
 	case 13:
 		tw32(MAC_RCV_RULE_12,  0); tw32(MAC_RCV_VALUE_12,  0);
+		/* fall through */
 	case 12:
 		tw32(MAC_RCV_RULE_11,  0); tw32(MAC_RCV_VALUE_11,  0);
+		/* fall through */
 	case 11:
 		tw32(MAC_RCV_RULE_10,  0); tw32(MAC_RCV_VALUE_10,  0);
+		/* fall through */
 	case 10:
 		tw32(MAC_RCV_RULE_9,  0); tw32(MAC_RCV_VALUE_9,  0);
+		/* fall through */
 	case 9:
 		tw32(MAC_RCV_RULE_8,  0); tw32(MAC_RCV_VALUE_8,  0);
+		/* fall through */
 	case 8:
 		tw32(MAC_RCV_RULE_7,  0); tw32(MAC_RCV_VALUE_7,  0);
+		/* fall through */
 	case 7:
 		tw32(MAC_RCV_RULE_6,  0); tw32(MAC_RCV_VALUE_6,  0);
+		/* fall through */
 	case 6:
 		tw32(MAC_RCV_RULE_5,  0); tw32(MAC_RCV_VALUE_5,  0);
+		/* fall through */
 	case 5:
 		tw32(MAC_RCV_RULE_4,  0); tw32(MAC_RCV_VALUE_4,  0);
+		/* fall through */
 	case 4:
 		/* tw32(MAC_RCV_RULE_3,  0); tw32(MAC_RCV_VALUE_3,  0); */
 	case 3:
-- 
2.7.4

^ permalink raw reply related

* Re: [RESEND PATCH] bpfilter: check compiler capability in Kconfig
From: Alexei Starovoitov @ 2018-06-28  2:05 UTC (permalink / raw)
  To: Masahiro Yamada
  Cc: David S . Miller, netdev, Matteo Croce, Arnd Bergmann,
	linux-kbuild, Alexei Starovoitov, linux-kernel, Michal Marek,
	Daniel Borkmann
In-Reply-To: <1529985336-27522-1-git-send-email-yamada.masahiro@socionext.com>

On Tue, Jun 26, 2018 at 12:55:35PM +0900, Masahiro Yamada wrote:
> With the brand-new syntax extension of Kconfig, we can directly
> check the compiler capability in the configuration phase.
> 
> If the cc-can-link.sh fails, the BPFILTER_UMH is automatically
> hidden by the dependency.
> 
> I also deleted 'default n', which is no-op.
> 
> Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>

I haven't tested it yet, but looks good to me as well.
Thank you for the follow up.
Acked-by: Alexei Starovoitov <ast@kernel.org>

^ permalink raw reply

* Re: [PATCH v1 net-next 02/14] net: Add a new socket option for a future transmit time.
From: kbuild test robot @ 2018-06-28  2:16 UTC (permalink / raw)
  To: Jesus Sanchez-Palencia
  Cc: kbuild-all, netdev, tglx, jan.altenberg, vinicius.gomes,
	kurt.kanzenbach, henrik, richardcochran, levi.pearson,
	ilias.apalodimas, ivan.khoronzhuk, mlichvar, willemb, jhs,
	xiyou.wangcong, jiri, Richard Cochran, Jesus Sanchez-Palencia
In-Reply-To: <20180627215950.6719-3-jesus.sanchez-palencia@intel.com>

[-- Attachment #1: Type: text/plain, Size: 20657 bytes --]

Hi Richard,

I love your patch! Perhaps something to improve:

[auto build test WARNING on net-next/master]

url:    https://github.com/0day-ci/linux/commits/Jesus-Sanchez-Palencia/Scheduled-packet-Transmission-ETF/20180628-061119
reproduce: make htmldocs

All warnings (new ones prefixed by >>):

   include/net/mac80211.h:955: warning: Function parameter or member 'status.ampdu_ack_len' not described in 'ieee80211_tx_info'
   include/net/mac80211.h:955: warning: Function parameter or member 'status.ampdu_len' not described in 'ieee80211_tx_info'
   include/net/mac80211.h:955: warning: Function parameter or member 'status.antenna' not described in 'ieee80211_tx_info'
   include/net/mac80211.h:955: warning: Function parameter or member 'status.tx_time' not described in 'ieee80211_tx_info'
   include/net/mac80211.h:955: warning: Function parameter or member 'status.is_valid_ack_signal' not described in 'ieee80211_tx_info'
   include/net/mac80211.h:955: warning: Function parameter or member 'status.status_driver_data' not described in 'ieee80211_tx_info'
   include/net/mac80211.h:955: warning: Function parameter or member 'driver_rates' not described in 'ieee80211_tx_info'
   include/net/mac80211.h:955: warning: Function parameter or member 'pad' not described in 'ieee80211_tx_info'
   include/net/mac80211.h:955: warning: Function parameter or member 'rate_driver_data' not described in 'ieee80211_tx_info'
   net/mac80211/sta_info.h:588: warning: Function parameter or member 'rx_stats_avg' not described in 'sta_info'
   net/mac80211/sta_info.h:588: warning: Function parameter or member 'rx_stats_avg.signal' not described in 'sta_info'
   net/mac80211/sta_info.h:588: warning: Function parameter or member 'rx_stats_avg.chain_signal' not described in 'sta_info'
   net/mac80211/sta_info.h:588: warning: Function parameter or member 'status_stats.filtered' not described in 'sta_info'
   net/mac80211/sta_info.h:588: warning: Function parameter or member 'status_stats.retry_failed' not described in 'sta_info'
   net/mac80211/sta_info.h:588: warning: Function parameter or member 'status_stats.retry_count' not described in 'sta_info'
   net/mac80211/sta_info.h:588: warning: Function parameter or member 'status_stats.lost_packets' not described in 'sta_info'
   net/mac80211/sta_info.h:588: warning: Function parameter or member 'status_stats.last_tdls_pkt_time' not described in 'sta_info'
   net/mac80211/sta_info.h:588: warning: Function parameter or member 'status_stats.msdu_retries' not described in 'sta_info'
   net/mac80211/sta_info.h:588: warning: Function parameter or member 'status_stats.msdu_failed' not described in 'sta_info'
   net/mac80211/sta_info.h:588: warning: Function parameter or member 'status_stats.last_ack' not described in 'sta_info'
   net/mac80211/sta_info.h:588: warning: Function parameter or member 'status_stats.last_ack_signal' not described in 'sta_info'
   net/mac80211/sta_info.h:588: warning: Function parameter or member 'status_stats.ack_signal_filled' not described in 'sta_info'
   net/mac80211/sta_info.h:588: warning: Function parameter or member 'status_stats.avg_ack_signal' not described in 'sta_info'
   net/mac80211/sta_info.h:588: warning: Function parameter or member 'tx_stats.packets' not described in 'sta_info'
   net/mac80211/sta_info.h:588: warning: Function parameter or member 'tx_stats.bytes' not described in 'sta_info'
   net/mac80211/sta_info.h:588: warning: Function parameter or member 'tx_stats.last_rate' not described in 'sta_info'
   net/mac80211/sta_info.h:588: warning: Function parameter or member 'tx_stats.msdu' not described in 'sta_info'
   kernel/sched/fair.c:3760: warning: Function parameter or member 'flags' not described in 'attach_entity_load_avg'
   include/linux/device.h:93: warning: bad line: this bus.
   include/linux/dma-buf.h:307: warning: Function parameter or member 'cb_excl.cb' not described in 'dma_buf'
   include/linux/dma-buf.h:307: warning: Function parameter or member 'cb_excl.poll' not described in 'dma_buf'
   include/linux/dma-buf.h:307: warning: Function parameter or member 'cb_excl.active' not described in 'dma_buf'
   include/linux/dma-buf.h:307: warning: Function parameter or member 'cb_shared.cb' not described in 'dma_buf'
   include/linux/dma-buf.h:307: warning: Function parameter or member 'cb_shared.poll' not described in 'dma_buf'
   include/linux/dma-buf.h:307: warning: Function parameter or member 'cb_shared.active' not described in 'dma_buf'
   include/linux/dma-fence-array.h:54: warning: Function parameter or member 'work' not described in 'dma_fence_array'
   include/linux/gpio/driver.h:142: warning: Function parameter or member 'request_key' not described in 'gpio_irq_chip'
   include/linux/iio/hw-consumer.h:1: warning: no structured comments found
   include/linux/device.h:94: warning: bad line: this bus.
   include/linux/input/sparse-keymap.h:46: warning: Function parameter or member 'sw' not described in 'key_entry'
   include/linux/regulator/driver.h:227: warning: Function parameter or member 'resume_early' not described in 'regulator_ops'
   drivers/regulator/core.c:4465: warning: Excess function parameter 'state' description in 'regulator_suspend_late'
   arch/s390/include/asm/cio.h:245: warning: Function parameter or member 'esw.esw0' not described in 'irb'
   arch/s390/include/asm/cio.h:245: warning: Function parameter or member 'esw.esw1' not described in 'irb'
   arch/s390/include/asm/cio.h:245: warning: Function parameter or member 'esw.esw2' not described in 'irb'
   arch/s390/include/asm/cio.h:245: warning: Function parameter or member 'esw.esw3' not described in 'irb'
   arch/s390/include/asm/cio.h:245: warning: Function parameter or member 'esw.eadm' not described in 'irb'
   drivers/usb/dwc3/gadget.c:510: warning: Excess function parameter 'dwc' description in 'dwc3_gadget_start_config'
   include/drm/drm_drv.h:610: warning: Function parameter or member 'gem_prime_pin' not described in 'drm_driver'
   include/drm/drm_drv.h:610: warning: Function parameter or member 'gem_prime_unpin' not described in 'drm_driver'
   include/drm/drm_drv.h:610: warning: Function parameter or member 'gem_prime_res_obj' not described in 'drm_driver'
   include/drm/drm_drv.h:610: warning: Function parameter or member 'gem_prime_get_sg_table' not described in 'drm_driver'
   include/drm/drm_drv.h:610: warning: Function parameter or member 'gem_prime_import_sg_table' not described in 'drm_driver'
   include/drm/drm_drv.h:610: warning: Function parameter or member 'gem_prime_vmap' not described in 'drm_driver'
   include/drm/drm_drv.h:610: warning: Function parameter or member 'gem_prime_vunmap' not described in 'drm_driver'
   include/drm/drm_drv.h:610: warning: Function parameter or member 'gem_prime_mmap' not described in 'drm_driver'
   drivers/gpu/drm/i915/i915_vma.h:48: warning: cannot understand function prototype: 'struct i915_vma '
   drivers/gpu/drm/i915/i915_vma.h:1: warning: no structured comments found
   include/drm/tinydrm/tinydrm.h:34: warning: Function parameter or member 'fb_dirty' not described in 'tinydrm_device'
   drivers/gpu/drm/tinydrm/mipi-dbi.c:272: warning: Function parameter or member 'crtc_state' not described in 'mipi_dbi_enable_flush'
   drivers/gpu/drm/tinydrm/mipi-dbi.c:272: warning: Function parameter or member 'plane_state' not described in 'mipi_dbi_enable_flush'
   include/linux/skbuff.h:853: warning: Function parameter or member 'dev_scratch' not described in 'sk_buff'
   include/linux/skbuff.h:853: warning: Function parameter or member 'ip_defrag_offset' not described in 'sk_buff'
   include/linux/skbuff.h:853: warning: Function parameter or member 'list' not described in 'sk_buff'
   include/linux/skbuff.h:853: warning: Function parameter or member 'skb_mstamp' not described in 'sk_buff'
   include/linux/skbuff.h:853: warning: Function parameter or member '__cloned_offset' not described in 'sk_buff'
   include/linux/skbuff.h:853: warning: Function parameter or member 'head_frag' not described in 'sk_buff'
   include/linux/skbuff.h:853: warning: Function parameter or member '__unused' not described in 'sk_buff'
   include/linux/skbuff.h:853: warning: Function parameter or member '__pkt_type_offset' not described in 'sk_buff'
   include/linux/skbuff.h:853: warning: Function parameter or member 'pfmemalloc' not described in 'sk_buff'
   include/linux/skbuff.h:853: warning: Function parameter or member 'encapsulation' not described in 'sk_buff'
   include/linux/skbuff.h:853: warning: Function parameter or member 'encap_hdr_csum' not described in 'sk_buff'
   include/linux/skbuff.h:853: warning: Function parameter or member 'csum_valid' not described in 'sk_buff'
   include/linux/skbuff.h:853: warning: Function parameter or member 'csum_complete_sw' not described in 'sk_buff'
   include/linux/skbuff.h:853: warning: Function parameter or member 'csum_level' not described in 'sk_buff'
   include/linux/skbuff.h:853: warning: Function parameter or member 'inner_protocol_type' not described in 'sk_buff'
   include/linux/skbuff.h:853: warning: Function parameter or member 'remcsum_offload' not described in 'sk_buff'
   include/linux/skbuff.h:853: warning: Function parameter or member 'offload_fwd_mark' not described in 'sk_buff'
   include/linux/skbuff.h:853: warning: Function parameter or member 'offload_mr_fwd_mark' not described in 'sk_buff'
   include/linux/skbuff.h:853: warning: Function parameter or member 'sender_cpu' not described in 'sk_buff'
   include/linux/skbuff.h:853: warning: Function parameter or member 'reserved_tailroom' not described in 'sk_buff'
   include/linux/skbuff.h:853: warning: Function parameter or member 'inner_ipproto' not described in 'sk_buff'
   include/net/sock.h:234: warning: Function parameter or member 'skc_addrpair' not described in 'sock_common'
   include/net/sock.h:234: warning: Function parameter or member 'skc_portpair' not described in 'sock_common'
   include/net/sock.h:234: warning: Function parameter or member 'skc_ipv6only' not described in 'sock_common'
   include/net/sock.h:234: warning: Function parameter or member 'skc_net_refcnt' not described in 'sock_common'
   include/net/sock.h:234: warning: Function parameter or member 'skc_v6_daddr' not described in 'sock_common'
   include/net/sock.h:234: warning: Function parameter or member 'skc_v6_rcv_saddr' not described in 'sock_common'
   include/net/sock.h:234: warning: Function parameter or member 'skc_cookie' not described in 'sock_common'
   include/net/sock.h:234: warning: Function parameter or member 'skc_listener' not described in 'sock_common'
   include/net/sock.h:234: warning: Function parameter or member 'skc_tw_dr' not described in 'sock_common'
   include/net/sock.h:234: warning: Function parameter or member 'skc_rcv_wnd' not described in 'sock_common'
   include/net/sock.h:234: warning: Function parameter or member 'skc_tw_rcv_nxt' not described in 'sock_common'
   include/net/sock.h:499: warning: Function parameter or member 'sk_backlog.rmem_alloc' not described in 'sock'
   include/net/sock.h:499: warning: Function parameter or member 'sk_backlog.len' not described in 'sock'
   include/net/sock.h:499: warning: Function parameter or member 'sk_backlog.head' not described in 'sock'
   include/net/sock.h:499: warning: Function parameter or member 'sk_backlog.tail' not described in 'sock'
   include/net/sock.h:499: warning: Function parameter or member 'sk_wq_raw' not described in 'sock'
   include/net/sock.h:499: warning: Function parameter or member 'tcp_rtx_queue' not described in 'sock'
   include/net/sock.h:499: warning: Function parameter or member 'sk_route_forced_caps' not described in 'sock'
>> include/net/sock.h:499: warning: Function parameter or member 'sk_clockid' not described in 'sock'
>> include/net/sock.h:499: warning: Function parameter or member 'sk_txtime_flags' not described in 'sock'
   include/net/sock.h:499: warning: Function parameter or member 'sk_validate_xmit_skb' not described in 'sock'
   net/core/datagram.c:835: warning: Function parameter or member 'events' not described in 'datagram_poll_mask'
   include/linux/netdevice.h:1998: warning: Function parameter or member 'adj_list.upper' not described in 'net_device'
   include/linux/netdevice.h:1998: warning: Function parameter or member 'adj_list.lower' not described in 'net_device'
   include/linux/netdevice.h:1998: warning: Function parameter or member 'gso_partial_features' not described in 'net_device'
   include/linux/netdevice.h:1998: warning: Function parameter or member 'switchdev_ops' not described in 'net_device'
   include/linux/netdevice.h:1998: warning: Function parameter or member 'l3mdev_ops' not described in 'net_device'
   include/linux/netdevice.h:1998: warning: Function parameter or member 'xfrmdev_ops' not described in 'net_device'
   include/linux/netdevice.h:1998: warning: Function parameter or member 'tlsdev_ops' not described in 'net_device'
   include/linux/netdevice.h:1998: warning: Function parameter or member 'name_assign_type' not described in 'net_device'
   include/linux/netdevice.h:1998: warning: Function parameter or member 'ieee802154_ptr' not described in 'net_device'
   include/linux/netdevice.h:1998: warning: Function parameter or member 'mpls_ptr' not described in 'net_device'
   include/linux/netdevice.h:1998: warning: Function parameter or member 'xdp_prog' not described in 'net_device'
   include/linux/netdevice.h:1998: warning: Function parameter or member 'gro_flush_timeout' not described in 'net_device'
   include/linux/netdevice.h:1998: warning: Function parameter or member 'nf_hooks_ingress' not described in 'net_device'
   include/linux/netdevice.h:1998: warning: Function parameter or member '____cacheline_aligned_in_smp' not described in 'net_device'
   include/linux/netdevice.h:1998: warning: Function parameter or member 'qdisc_hash' not described in 'net_device'
   include/linux/phylink.h:56: warning: Function parameter or member '__ETHTOOL_DECLARE_LINK_MODE_MASK(advertising' not described in 'phylink_link_state'
   include/linux/phylink.h:56: warning: Function parameter or member '__ETHTOOL_DECLARE_LINK_MODE_MASK(lp_advertising' not described in 'phylink_link_state'
   sound/soc/soc-core.c:2787: warning: Excess function parameter 'legacy_dai_naming' description in 'snd_soc_register_dais'
   include/linux/rcupdate.h:571: ERROR: Unexpected indentation.
   include/linux/rcupdate.h:575: ERROR: Unexpected indentation.
   include/linux/rcupdate.h:579: WARNING: Block quote ends without a blank line; unexpected unindent.
   include/linux/rcupdate.h:581: WARNING: Block quote ends without a blank line; unexpected unindent.
   include/linux/rcupdate.h:581: WARNING: Inline literal start-string without end-string.
   lib/reed_solomon/reed_solomon.c:287: ERROR: Unknown target name: "gfp".
   include/linux/wait.h:110: WARNING: Block quote ends without a blank line; unexpected unindent.
   include/linux/wait.h:113: ERROR: Unexpected indentation.
   include/linux/wait.h:115: WARNING: Block quote ends without a blank line; unexpected unindent.
   kernel/time/hrtimer.c:1129: WARNING: Block quote ends without a blank line; unexpected unindent.
   kernel/signal.c:327: WARNING: Inline literal start-string without end-string.
   drivers/video/fbdev/core/modedb.c:647: WARNING: Inline strong start-string without end-string.
   drivers/video/fbdev/core/modedb.c:647: WARNING: Inline strong start-string without end-string.
   drivers/video/fbdev/core/modedb.c:647: WARNING: Inline strong start-string without end-string.
   drivers/video/fbdev/core/modedb.c:647: WARNING: Inline strong start-string without end-string.
   drivers/ata/libata-core.c:5943: ERROR: Unknown target name: "hw".
   drivers/message/fusion/mptbase.c:5054: WARNING: Definition list ends without a blank line; unexpected unindent.
   drivers/tty/serial/serial_core.c:1892: WARNING: Definition list ends without a blank line; unexpected unindent.
   include/linux/mtd/rawnand.h:1446: WARNING: Inline strong start-string without end-string.
   include/linux/mtd/rawnand.h:1448: WARNING: Inline strong start-string without end-string.
   include/linux/regulator/driver.h:279: ERROR: Unknown target name: "regulator_regmap_x_voltage".
   Documentation/driver-api/soundwire/locking.rst:50: ERROR: Inconsistent literal block quoting.
   Documentation/driver-api/soundwire/locking.rst:51: WARNING: Line block ends without a blank line.
   Documentation/driver-api/soundwire/locking.rst:55: WARNING: Inline substitution_reference start-string without end-string.
   Documentation/driver-api/soundwire/locking.rst:56: WARNING: Line block ends without a blank line.
   Documentation/driver-api/soundwire/stream.rst:177: WARNING: Explicit markup ends without a blank line; unexpected unindent.
   Documentation/driver-api/soundwire/stream.rst:203: WARNING: Explicit markup ends without a blank line; unexpected unindent.
   Documentation/driver-api/soundwire/stream.rst:248: WARNING: Explicit markup ends without a blank line; unexpected unindent.
   Documentation/driver-api/soundwire/stream.rst:277: WARNING: Explicit markup ends without a blank line; unexpected unindent.
   Documentation/driver-api/soundwire/stream.rst:304: WARNING: Explicit markup ends without a blank line; unexpected unindent.
   Documentation/driver-api/soundwire/stream.rst:328: WARNING: Explicit markup ends without a blank line; unexpected unindent.
   Documentation/driver-api/soundwire/stream.rst:352: WARNING: Explicit markup ends without a blank line; unexpected unindent.
   Documentation/driver-api/soundwire/stream.rst:364: WARNING: Explicit markup ends without a blank line; unexpected unindent.
   include/linux/spi/spi.h:373: ERROR: Unexpected indentation.
   Documentation/gpu/drivers.rst:5: WARNING: toctree contains reference to nonexisting document u'gpu/v3d'
   Documentation/misc-devices/ibmvmc.rst:2: WARNING: Explicit markup ends without a blank line; unexpected unindent.
   Documentation/networking/e100.rst:57: WARNING: Literal block expected; none found.
   Documentation/networking/e100.rst:68: WARNING: Literal block expected; none found.
   Documentation/networking/e100.rst:75: WARNING: Literal block expected; none found.
   Documentation/networking/e100.rst:84: WARNING: Literal block expected; none found.
   Documentation/networking/e100.rst:93: WARNING: Inline emphasis start-string without end-string.
   Documentation/networking/e1000.rst:83: ERROR: Unexpected indentation.
   Documentation/networking/e1000.rst:84: WARNING: Block quote ends without a blank line; unexpected unindent.
   Documentation/networking/e1000.rst:173: WARNING: Definition list ends without a blank line; unexpected unindent.
   Documentation/networking/e1000.rst:236: WARNING: Definition list ends without a blank line; unexpected unindent.
   net/core/dev.c:4650: ERROR: Unknown target name: "page_is".
   Documentation/networking/net_failover.rst:48: WARNING: Definition list ends without a blank line; unexpected unindent.
   Documentation/networking/net_failover.rst:50: ERROR: Unexpected indentation.
   Documentation/networking/net_failover.rst:52: ERROR: Unexpected indentation.
   Documentation/networking/net_failover.rst:53: WARNING: Block quote ends without a blank line; unexpected unindent.
   Documentation/networking/net_failover.rst:55: WARNING: Block quote ends without a blank line; unexpected unindent.
   Documentation/networking/net_failover.rst:63: ERROR: Unexpected indentation.
   Documentation/networking/net_failover.rst:64: WARNING: Block quote ends without a blank line; unexpected unindent.
   Documentation/networking/net_failover.rst:86: ERROR: Unexpected indentation.
   Documentation/networking/net_failover.rst:88: ERROR: Unexpected indentation.
   Documentation/networking/net_failover.rst:89: WARNING: Block quote ends without a blank line; unexpected unindent.
   Documentation/networking/net_failover.rst:91: WARNING: Block quote ends without a blank line; unexpected unindent.
   Documentation/process/2.Process.rst:131: ERROR: Malformed table.
   Bottom/header table border does not match top border.

vim +499 include/net/sock.h

^1da177e Linus Torvalds 2005-04-16 @499  

:::::: The code at line 499 was first introduced by commit
:::::: 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 Linux-2.6.12-rc2

:::::: TO: Linus Torvalds <torvalds@ppc970.osdl.org>
:::::: CC: Linus Torvalds <torvalds@ppc970.osdl.org>

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 6443 bytes --]

^ permalink raw reply

* Re: [PATCH] tg3: Mark expected switch fall-throughs
From: Michael Chan @ 2018-06-28  2:51 UTC (permalink / raw)
  To: Gustavo A. R. Silva
  Cc: Siva Reddy Kallam, Prashant Sreedharan, Michael Chan,
	David S. Miller, Netdev, open list
In-Reply-To: <20180628014524.GA26061@embeddedor.com>

On Wed, Jun 27, 2018 at 6:45 PM, Gustavo A. R. Silva
<gustavo@embeddedor.com> wrote:
> In preparation to enabling -Wimplicit-fallthrough, mark switch cases
> where we are expecting to fall through.
>
> Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>

Acked-by: Michael Chan <michael.chan@broadcom.com>

^ permalink raw reply

* Re: [iovisor-dev] [RFC PATCH 00/11] OVS eBPF datapath.
From: Alexei Starovoitov @ 2018-06-28  3:00 UTC (permalink / raw)
  To: William Tu; +Cc: dev, iovisor-dev, netdev
In-Reply-To: <1529756203-77067-1-git-send-email-u9012063@gmail.com>

On Sat, Jun 23, 2018 at 05:16:32AM -0700, William Tu wrote:
> 
> Discussion
> ==========
> We are still actively working on finishing the feature, currently
> the basic forwarding and tunnel feature work, but still under
> heavy debugging and development.  The purpose of this RFC is to
> get some early feedbacks and direction for finishing the complete
> features in existing kernel's OVS datapath (the net/openvswitch/*).

Thank you for sharing the patches.

> Three major issues we are worried:
>   a. Megaflow support in BPF.
>   b. Connection Tracking support in BPF.

my opinion on the above two didn't change.
To recap:
A. Non scalable megaflow map is no go. I'd like to see packet classification
algorithm like hicuts or efficuts to be implemented instead, since it can be
shared by generic bpf, bpftiler, ovs and likely others.
B. instead of helpers to interface with conntrack the way ovs did, I prefer
a generic conntrack mechanism that can be used out of xdp too

>   c. Verifier limitation.

Not sure what limitations you're concerned about.

^ permalink raw reply

* [net-next PATCH v5 0/7] Symmetric queue selection using XPS for Rx queues
From: Amritha Nambiar @ 2018-06-27 22:31 UTC (permalink / raw)
  To: netdev, davem
  Cc: alexander.h.duyck, willemdebruijn.kernel, amritha.nambiar,
	sridhar.samudrala, alexander.duyck, edumazet, hannes, tom, tom

This patch series implements support for Tx queue selection based on
Rx queue(s) map. This is done by configuring Rx queue(s) map per Tx-queue
using sysfs attribute. If the user configuration for Rx queues does
not apply, then the Tx queue selection falls back to XPS using CPUs and
finally to hashing.

XPS is refactored to support Tx queue selection based on either the
CPUs map or the Rx-queues map. The config option CONFIG_XPS needs to be
enabled. By default no receive queues are configured for the Tx queue.

- /sys/class/net/<dev>/queues/tx-*/xps_rxqs

A set of receive queues can be mapped to a set of transmit queues (many:many),
although the common use case is a 1:1 mapping. This will enable sending
packets on the same Tx-Rx queue association as this is useful for busy polling
multi-threaded workloads where it is not possible to pin the threads to
a CPU. This is a rework of Sridhar's patch for symmetric queueing via
socket option:
https://www.spinics.net/lists/netdev/msg453106.html

Testing Hints:
Kernel:  Linux 4.17.0-rc7+
Interface:
driver: ixgbe
version: 5.1.0-k
firmware-version: 0x00015e0b

Configuration:
ethtool -L $iface combined 16
ethtool -C $iface rx-usecs 1000
sysctl net.core.busy_poll=1000
ATR disabled:
ethtool -K $iface ntuple on

Workload:
Modified memcached that changes the thread selection policy to be based
on the incoming rx-queue of a connection using SO_INCOMING_NAPI_ID socket
option. The default is round-robin.

Default: No rxqs_map configured
Symmetric queues: Enable rxqs_map for all queues 1:1 mapped to Tx queue

System:
Architecture:          x86_64
CPU(s):                72
Model name:            Intel(R) Xeon(R) CPU E5-2699 v3 @ 2.30GHz

16 threads  400K requests/sec
=============================
-------------------------------------------------------------------------------
                                Default                 Symmetric queues
-------------------------------------------------------------------------------
RTT min/avg/max                 4/51/2215               2/30/5163
(usec)


intr/sec                        26655                   18606

contextswitch/sec               5145                    4044

insn per cycle                  0.43                    0.72

cache-misses                    6.919                   4.310
(% of all cache refs)

L1-dcache-load-                 4.49                    3.29
-misses
(% of all L1-dcache hits)

LLC-load-misses                 13.26                   8.96
(% of all LL-cache hits)

-------------------------------------------------------------------------------

32 threads  400K requests/sec
=============================
-------------------------------------------------------------------------------
                                Default                 Symmetric queues
-------------------------------------------------------------------------------
RTT min/avg/max                 10/112/5562             9/46/4637
(usec)


intr/sec                        30456                   27666

contextswitch/sec               7552                    5133

insn per cycle                  0.41                    0.49

cache-misses                    9.357                   2.769
(% of all cache refs)

L1-dcache-load-                 4.09                    3.98
-misses
(% of all L1-dcache hits)

LLC-load-misses                 12.96                   3.96
(% of all LL-cache hits)

-------------------------------------------------------------------------------

16 threads  800K requests/sec
=============================
-------------------------------------------------------------------------------
                                Default                 Symmetric queues
-------------------------------------------------------------------------------
RTT min/avg/max                  5/151/4989             9/69/2611
(usec)


intr/sec                        35686                   22907

contextswitch/sec               25522                   12281

insn per cycle                  0.67                    0.74

cache-misses                    8.652                   6.38
(% of all cache refs)

L1-dcache-load-                 3.19                    2.86
-misses
(% of all L1-dcache hits)

LLC-load-misses                 16.53                   11.99
(% of all LL-cache hits)

-------------------------------------------------------------------------------
32 threads  800K requests/sec
=============================
-------------------------------------------------------------------------------
                                Default                 Symmetric queues
-------------------------------------------------------------------------------
RTT min/avg/max                  6/163/6152             8/88/4209
(usec)


intr/sec                        47079                   26548

contextswitch/sec               42190                   39168

insn per cycle                  0.45                    0.54

cache-misses                    8.798                   4.668
(% of all cache refs)

L1-dcache-load-                 6.55                    6.29
-misses
(% of all L1-dcache hits)

LLC-load-misses                 13.91                   10.44
(% of all LL-cache hits)

-------------------------------------------------------------------------------

v5:
- Clean sk_tx_queue_mapping set and get functions, initialize it to USHRT_MAX.
- Similarly clean sk_rx_queue_mapping set and get functions.
- Use ns_capable in place of capable(), reorganize/properly free pointer
  in xps_rxqs_show.
- Add a note in documentation not validating transmit device against
  receive device to avoid expensive lookup in datapath.

---

Amritha Nambiar (7):
      net: Refactor XPS for CPUs and Rx queues
      net: Use static_key for XPS maps
      net: sock: Change tx_queue_mapping in sock_common to unsigned short
      net: Record receive queue number for a connection
      net: Enable Tx queue selection based on Rx queues
      net-sysfs: Add interface for Rx queue(s) map per Tx queue
      Documentation: Add explanation for XPS using Rx-queue(s) map


 Documentation/ABI/testing/sysfs-class-net-queues |   11 +
 Documentation/networking/scaling.txt             |   61 ++++-
 include/linux/cpumask.h                          |   11 +
 include/linux/netdevice.h                        |   97 +++++++-
 include/net/busy_poll.h                          |    1 
 include/net/sock.h                               |   38 +++
 net/core/dev.c                                   |  283 +++++++++++++++-------
 net/core/net-sysfs.c                             |   85 ++++++-
 net/core/sock.c                                  |    4 
 net/ipv4/tcp_input.c                             |    3 
 10 files changed, 485 insertions(+), 109 deletions(-)

^ permalink raw reply

* [net-next PATCH v5 1/7] net: Refactor XPS for CPUs and Rx queues
From: Amritha Nambiar @ 2018-06-27 22:31 UTC (permalink / raw)
  To: netdev, davem
  Cc: alexander.h.duyck, willemdebruijn.kernel, amritha.nambiar,
	sridhar.samudrala, alexander.duyck, edumazet, hannes, tom, tom
In-Reply-To: <153013824922.4959.14633065530326138344.stgit@anamhost.jf.intel.com>

Refactor XPS code to support Tx queue selection based on
CPU(s) map or Rx queue(s) map.

Signed-off-by: Amritha Nambiar <amritha.nambiar@intel.com>
---
 include/linux/cpumask.h   |   11 ++
 include/linux/netdevice.h |   97 ++++++++++++++++++++-
 net/core/dev.c            |  211 ++++++++++++++++++++++++++++++---------------
 net/core/net-sysfs.c      |    4 -
 4 files changed, 243 insertions(+), 80 deletions(-)

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index bf53d89..57f20a0 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -115,12 +115,17 @@ extern struct cpumask __cpu_active_mask;
 #define cpu_active(cpu)		((cpu) == 0)
 #endif
 
-/* verify cpu argument to cpumask_* operators */
-static inline unsigned int cpumask_check(unsigned int cpu)
+static inline void cpu_max_bits_warn(unsigned int cpu, unsigned int bits)
 {
 #ifdef CONFIG_DEBUG_PER_CPU_MAPS
-	WARN_ON_ONCE(cpu >= nr_cpumask_bits);
+	WARN_ON_ONCE(cpu >= bits);
 #endif /* CONFIG_DEBUG_PER_CPU_MAPS */
+}
+
+/* verify cpu argument to cpumask_* operators */
+static inline unsigned int cpumask_check(unsigned int cpu)
+{
+	cpu_max_bits_warn(cpu, nr_cpumask_bits);
 	return cpu;
 }
 
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c6b377a..3790ac9 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -731,10 +731,15 @@ struct xps_map {
  */
 struct xps_dev_maps {
 	struct rcu_head rcu;
-	struct xps_map __rcu *cpu_map[0];
+	struct xps_map __rcu *attr_map[0]; /* Either CPUs map or RXQs map */
 };
-#define XPS_DEV_MAPS_SIZE(_tcs) (sizeof(struct xps_dev_maps) +		\
+
+#define XPS_CPU_DEV_MAPS_SIZE(_tcs) (sizeof(struct xps_dev_maps) +	\
 	(nr_cpu_ids * (_tcs) * sizeof(struct xps_map *)))
+
+#define XPS_RXQ_DEV_MAPS_SIZE(_tcs, _rxqs) (sizeof(struct xps_dev_maps) +\
+	(_rxqs * (_tcs) * sizeof(struct xps_map *)))
+
 #endif /* CONFIG_XPS */
 
 #define TC_MAX_QUEUE	16
@@ -1910,7 +1915,8 @@ struct net_device {
 	int			watchdog_timeo;
 
 #ifdef CONFIG_XPS
-	struct xps_dev_maps __rcu *xps_maps;
+	struct xps_dev_maps __rcu *xps_cpus_map;
+	struct xps_dev_maps __rcu *xps_rxqs_map;
 #endif
 #ifdef CONFIG_NET_CLS_ACT
 	struct mini_Qdisc __rcu	*miniq_egress;
@@ -3259,6 +3265,91 @@ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
 #ifdef CONFIG_XPS
 int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
 			u16 index);
+int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
+			  u16 index, bool is_rxqs_map);
+
+/**
+ *	attr_test_mask - Test a CPU or Rx queue set in a cpumask/rx queues mask
+ *	@j: CPU/Rx queue index
+ *	@mask: bitmask of all cpus/rx queues
+ *	@nr_bits: number of bits in the bitmask
+ *
+ * Test if a CPU or Rx queue index is set in a mask of all CPU/Rx queues.
+ */
+static inline bool attr_test_mask(unsigned long j, const unsigned long *mask,
+				  unsigned int nr_bits)
+{
+	cpu_max_bits_warn(j, nr_bits);
+	return test_bit(j, mask);
+}
+
+/**
+ *	attr_test_online - Test for online CPU/Rx queue
+ *	@j: CPU/Rx queue index
+ *	@online_mask: bitmask for CPUs/Rx queues that are online
+ *	@nr_bits: number of bits in the bitmask
+ *
+ * Returns true if a CPU/Rx queue is online.
+ */
+static inline bool attr_test_online(unsigned long j,
+				    const unsigned long *online_mask,
+				    unsigned int nr_bits)
+{
+	cpu_max_bits_warn(j, nr_bits);
+
+	if (online_mask)
+		return test_bit(j, online_mask);
+
+	return (j < nr_bits);
+}
+
+/**
+ *	attrmask_next - get the next CPU/Rx queue in a cpumask/Rx queues mask
+ *	@n: CPU/Rx queue index
+ *	@srcp: the cpumask/Rx queue mask pointer
+ *	@nr_bits: number of bits in the bitmask
+ *
+ * Returns >= nr_bits if no further CPUs/Rx queues set.
+ */
+static inline unsigned int attrmask_next(int n, const unsigned long *srcp,
+					 unsigned int nr_bits)
+{
+	/* -1 is a legal arg here. */
+	if (n != -1)
+		cpu_max_bits_warn(n, nr_bits);
+
+	if (srcp)
+		return find_next_bit(srcp, nr_bits, n + 1);
+
+	return n + 1;
+}
+
+/**
+ *	attrmask_next_and - get the next CPU/Rx queue in *src1p & *src2p
+ *	@n: CPU/Rx queue index
+ *	@src1p: the first CPUs/Rx queues mask pointer
+ *	@src2p: the second CPUs/Rx queues mask pointer
+ *	@nr_bits: number of bits in the bitmask
+ *
+ * Returns >= nr_bits if no further CPUs/Rx queues set in both.
+ */
+static inline int attrmask_next_and(int n, const unsigned long *src1p,
+				    const unsigned long *src2p,
+				    unsigned int nr_bits)
+{
+	/* -1 is a legal arg here. */
+	if (n != -1)
+		cpu_max_bits_warn(n, nr_bits);
+
+	if (src1p && src2p)
+		return find_next_and_bit(src1p, src2p, nr_bits, n + 1);
+	else if (src1p)
+		return find_next_bit(src1p, nr_bits, n + 1);
+	else if (src2p)
+		return find_next_bit(src2p, nr_bits, n + 1);
+
+	return n + 1;
+}
 #else
 static inline int netif_set_xps_queue(struct net_device *dev,
 				      const struct cpumask *mask,
diff --git a/net/core/dev.c b/net/core/dev.c
index dffed64..6ca62df 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2092,7 +2092,7 @@ static bool remove_xps_queue(struct xps_dev_maps *dev_maps,
 	int pos;
 
 	if (dev_maps)
-		map = xmap_dereference(dev_maps->cpu_map[tci]);
+		map = xmap_dereference(dev_maps->attr_map[tci]);
 	if (!map)
 		return false;
 
@@ -2105,7 +2105,7 @@ static bool remove_xps_queue(struct xps_dev_maps *dev_maps,
 			break;
 		}
 
-		RCU_INIT_POINTER(dev_maps->cpu_map[tci], NULL);
+		RCU_INIT_POINTER(dev_maps->attr_map[tci], NULL);
 		kfree_rcu(map, rcu);
 		return false;
 	}
@@ -2135,31 +2135,58 @@ static bool remove_xps_queue_cpu(struct net_device *dev,
 	return active;
 }
 
+static void clean_xps_maps(struct net_device *dev, const unsigned long *mask,
+			   struct xps_dev_maps *dev_maps, unsigned int nr_ids,
+			   u16 offset, u16 count, bool is_rxqs_map)
+{
+	bool active = false;
+	int i, j;
+
+	for (j = -1; j = attrmask_next(j, mask, nr_ids),
+	     j < nr_ids;)
+		active |= remove_xps_queue_cpu(dev, dev_maps, j, offset,
+					       count);
+	if (!active) {
+		if (is_rxqs_map) {
+			RCU_INIT_POINTER(dev->xps_rxqs_map, NULL);
+		} else {
+			RCU_INIT_POINTER(dev->xps_cpus_map, NULL);
+
+			for (i = offset + (count - 1); count--; i--)
+				netdev_queue_numa_node_write(
+					netdev_get_tx_queue(dev, i),
+							NUMA_NO_NODE);
+		}
+		kfree_rcu(dev_maps, rcu);
+	}
+}
+
 static void netif_reset_xps_queues(struct net_device *dev, u16 offset,
 				   u16 count)
 {
+	const unsigned long *possible_mask = NULL;
 	struct xps_dev_maps *dev_maps;
-	int cpu, i;
-	bool active = false;
+	unsigned int nr_ids;
 
 	mutex_lock(&xps_map_mutex);
-	dev_maps = xmap_dereference(dev->xps_maps);
 
-	if (!dev_maps)
-		goto out_no_maps;
-
-	for_each_possible_cpu(cpu)
-		active |= remove_xps_queue_cpu(dev, dev_maps, cpu,
-					       offset, count);
+	dev_maps = xmap_dereference(dev->xps_rxqs_map);
+	if (dev_maps) {
+		nr_ids = dev->num_rx_queues;
+		clean_xps_maps(dev, possible_mask, dev_maps, nr_ids, offset,
+			       count, true);
 
-	if (!active) {
-		RCU_INIT_POINTER(dev->xps_maps, NULL);
-		kfree_rcu(dev_maps, rcu);
 	}
 
-	for (i = offset + (count - 1); count--; i--)
-		netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i),
-					     NUMA_NO_NODE);
+	dev_maps = xmap_dereference(dev->xps_cpus_map);
+	if (!dev_maps)
+		goto out_no_maps;
+
+	if (num_possible_cpus() > 1)
+		possible_mask = cpumask_bits(cpu_possible_mask);
+	nr_ids = nr_cpu_ids;
+	clean_xps_maps(dev, possible_mask, dev_maps, nr_ids, offset, count,
+		       false);
 
 out_no_maps:
 	mutex_unlock(&xps_map_mutex);
@@ -2170,8 +2197,8 @@ static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index)
 	netif_reset_xps_queues(dev, index, dev->num_tx_queues - index);
 }
 
-static struct xps_map *expand_xps_map(struct xps_map *map,
-				      int cpu, u16 index)
+static struct xps_map *expand_xps_map(struct xps_map *map, int attr_index,
+				      u16 index, bool is_rxqs_map)
 {
 	struct xps_map *new_map;
 	int alloc_len = XPS_MIN_MAP_ALLOC;
@@ -2183,7 +2210,7 @@ static struct xps_map *expand_xps_map(struct xps_map *map,
 		return map;
 	}
 
-	/* Need to add queue to this CPU's existing map */
+	/* Need to add tx-queue to this CPU's/rx-queue's existing map */
 	if (map) {
 		if (pos < map->alloc_len)
 			return map;
@@ -2191,9 +2218,14 @@ static struct xps_map *expand_xps_map(struct xps_map *map,
 		alloc_len = map->alloc_len * 2;
 	}
 
-	/* Need to allocate new map to store queue on this CPU's map */
-	new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL,
-			       cpu_to_node(cpu));
+	/* Need to allocate new map to store tx-queue on this CPU's/rx-queue's
+	 *  map
+	 */
+	if (is_rxqs_map)
+		new_map = kzalloc(XPS_MAP_SIZE(alloc_len), GFP_KERNEL);
+	else
+		new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL,
+				       cpu_to_node(attr_index));
 	if (!new_map)
 		return NULL;
 
@@ -2205,14 +2237,16 @@ static struct xps_map *expand_xps_map(struct xps_map *map,
 	return new_map;
 }
 
-int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
-			u16 index)
+int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
+			  u16 index, bool is_rxqs_map)
 {
+	const unsigned long *online_mask = NULL, *possible_mask = NULL;
 	struct xps_dev_maps *dev_maps, *new_dev_maps = NULL;
-	int i, cpu, tci, numa_node_id = -2;
+	int i, j, tci, numa_node_id = -2;
 	int maps_sz, num_tc = 1, tc = 0;
 	struct xps_map *map, *new_map;
 	bool active = false;
+	unsigned int nr_ids;
 
 	if (dev->num_tc) {
 		num_tc = dev->num_tc;
@@ -2221,16 +2255,27 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
 			return -EINVAL;
 	}
 
-	maps_sz = XPS_DEV_MAPS_SIZE(num_tc);
-	if (maps_sz < L1_CACHE_BYTES)
-		maps_sz = L1_CACHE_BYTES;
-
 	mutex_lock(&xps_map_mutex);
+	if (is_rxqs_map) {
+		maps_sz = XPS_RXQ_DEV_MAPS_SIZE(num_tc, dev->num_rx_queues);
+		dev_maps = xmap_dereference(dev->xps_rxqs_map);
+		nr_ids = dev->num_rx_queues;
+	} else {
+		maps_sz = XPS_CPU_DEV_MAPS_SIZE(num_tc);
+		if (num_possible_cpus() > 1) {
+			online_mask = cpumask_bits(cpu_online_mask);
+			possible_mask = cpumask_bits(cpu_possible_mask);
+		}
+		dev_maps = xmap_dereference(dev->xps_cpus_map);
+		nr_ids = nr_cpu_ids;
+	}
 
-	dev_maps = xmap_dereference(dev->xps_maps);
+	if (maps_sz < L1_CACHE_BYTES)
+		maps_sz = L1_CACHE_BYTES;
 
 	/* allocate memory for queue storage */
-	for_each_cpu_and(cpu, cpu_online_mask, mask) {
+	for (j = -1; j = attrmask_next_and(j, online_mask, mask, nr_ids),
+	     j < nr_ids;) {
 		if (!new_dev_maps)
 			new_dev_maps = kzalloc(maps_sz, GFP_KERNEL);
 		if (!new_dev_maps) {
@@ -2238,73 +2283,81 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
 			return -ENOMEM;
 		}
 
-		tci = cpu * num_tc + tc;
-		map = dev_maps ? xmap_dereference(dev_maps->cpu_map[tci]) :
+		tci = j * num_tc + tc;
+		map = dev_maps ? xmap_dereference(dev_maps->attr_map[tci]) :
 				 NULL;
 
-		map = expand_xps_map(map, cpu, index);
+		map = expand_xps_map(map, j, index, is_rxqs_map);
 		if (!map)
 			goto error;
 
-		RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
+		RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
 	}
 
 	if (!new_dev_maps)
 		goto out_no_new_maps;
 
-	for_each_possible_cpu(cpu) {
+	for (j = -1; j = attrmask_next(j, possible_mask, nr_ids),
+	     j < nr_ids;) {
 		/* copy maps belonging to foreign traffic classes */
-		for (i = tc, tci = cpu * num_tc; dev_maps && i--; tci++) {
+		for (i = tc, tci = j * num_tc; dev_maps && i--; tci++) {
 			/* fill in the new device map from the old device map */
-			map = xmap_dereference(dev_maps->cpu_map[tci]);
-			RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
+			map = xmap_dereference(dev_maps->attr_map[tci]);
+			RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
 		}
 
 		/* We need to explicitly update tci as prevous loop
 		 * could break out early if dev_maps is NULL.
 		 */
-		tci = cpu * num_tc + tc;
+		tci = j * num_tc + tc;
 
-		if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) {
-			/* add queue to CPU maps */
+		if (attr_test_mask(j, mask, nr_ids) &&
+		    attr_test_online(j, online_mask, nr_ids)) {
+			/* add tx-queue to CPU/rx-queue maps */
 			int pos = 0;
 
-			map = xmap_dereference(new_dev_maps->cpu_map[tci]);
+			map = xmap_dereference(new_dev_maps->attr_map[tci]);
 			while ((pos < map->len) && (map->queues[pos] != index))
 				pos++;
 
 			if (pos == map->len)
 				map->queues[map->len++] = index;
 #ifdef CONFIG_NUMA
-			if (numa_node_id == -2)
-				numa_node_id = cpu_to_node(cpu);
-			else if (numa_node_id != cpu_to_node(cpu))
-				numa_node_id = -1;
+			if (!is_rxqs_map) {
+				if (numa_node_id == -2)
+					numa_node_id = cpu_to_node(j);
+				else if (numa_node_id != cpu_to_node(j))
+					numa_node_id = -1;
+			}
 #endif
 		} else if (dev_maps) {
 			/* fill in the new device map from the old device map */
-			map = xmap_dereference(dev_maps->cpu_map[tci]);
-			RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
+			map = xmap_dereference(dev_maps->attr_map[tci]);
+			RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
 		}
 
 		/* copy maps belonging to foreign traffic classes */
 		for (i = num_tc - tc, tci++; dev_maps && --i; tci++) {
 			/* fill in the new device map from the old device map */
-			map = xmap_dereference(dev_maps->cpu_map[tci]);
-			RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
+			map = xmap_dereference(dev_maps->attr_map[tci]);
+			RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
 		}
 	}
 
-	rcu_assign_pointer(dev->xps_maps, new_dev_maps);
+	if (is_rxqs_map)
+		rcu_assign_pointer(dev->xps_rxqs_map, new_dev_maps);
+	else
+		rcu_assign_pointer(dev->xps_cpus_map, new_dev_maps);
 
 	/* Cleanup old maps */
 	if (!dev_maps)
 		goto out_no_old_maps;
 
-	for_each_possible_cpu(cpu) {
-		for (i = num_tc, tci = cpu * num_tc; i--; tci++) {
-			new_map = xmap_dereference(new_dev_maps->cpu_map[tci]);
-			map = xmap_dereference(dev_maps->cpu_map[tci]);
+	for (j = -1; j = attrmask_next(j, possible_mask, nr_ids),
+	     j < nr_ids;) {
+		for (i = num_tc, tci = j * num_tc; i--; tci++) {
+			new_map = xmap_dereference(new_dev_maps->attr_map[tci]);
+			map = xmap_dereference(dev_maps->attr_map[tci]);
 			if (map && map != new_map)
 				kfree_rcu(map, rcu);
 		}
@@ -2317,19 +2370,23 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
 	active = true;
 
 out_no_new_maps:
-	/* update Tx queue numa node */
-	netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),
-				     (numa_node_id >= 0) ? numa_node_id :
-				     NUMA_NO_NODE);
+	if (!is_rxqs_map) {
+		/* update Tx queue numa node */
+		netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),
+					     (numa_node_id >= 0) ?
+					     numa_node_id : NUMA_NO_NODE);
+	}
 
 	if (!dev_maps)
 		goto out_no_maps;
 
-	/* removes queue from unused CPUs */
-	for_each_possible_cpu(cpu) {
-		for (i = tc, tci = cpu * num_tc; i--; tci++)
+	/* removes tx-queue from unused CPUs/rx-queues */
+	for (j = -1; j = attrmask_next(j, possible_mask, nr_ids),
+	     j < nr_ids;) {
+		for (i = tc, tci = j * num_tc; i--; tci++)
 			active |= remove_xps_queue(dev_maps, tci, index);
-		if (!cpumask_test_cpu(cpu, mask) || !cpu_online(cpu))
+		if (!attr_test_mask(j, mask, nr_ids) ||
+		    !attr_test_online(j, online_mask, nr_ids))
 			active |= remove_xps_queue(dev_maps, tci, index);
 		for (i = num_tc - tc, tci++; --i; tci++)
 			active |= remove_xps_queue(dev_maps, tci, index);
@@ -2337,7 +2394,10 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
 
 	/* free map if not active */
 	if (!active) {
-		RCU_INIT_POINTER(dev->xps_maps, NULL);
+		if (is_rxqs_map)
+			RCU_INIT_POINTER(dev->xps_rxqs_map, NULL);
+		else
+			RCU_INIT_POINTER(dev->xps_cpus_map, NULL);
 		kfree_rcu(dev_maps, rcu);
 	}
 
@@ -2347,11 +2407,12 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
 	return 0;
 error:
 	/* remove any maps that we added */
-	for_each_possible_cpu(cpu) {
-		for (i = num_tc, tci = cpu * num_tc; i--; tci++) {
-			new_map = xmap_dereference(new_dev_maps->cpu_map[tci]);
+	for (j = -1; j = attrmask_next(j, possible_mask, nr_ids),
+	     j < nr_ids;) {
+		for (i = num_tc, tci = j * num_tc; i--; tci++) {
+			new_map = xmap_dereference(new_dev_maps->attr_map[tci]);
 			map = dev_maps ?
-			      xmap_dereference(dev_maps->cpu_map[tci]) :
+			      xmap_dereference(dev_maps->attr_map[tci]) :
 			      NULL;
 			if (new_map && new_map != map)
 				kfree(new_map);
@@ -2363,6 +2424,12 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
 	kfree(new_dev_maps);
 	return -ENOMEM;
 }
+
+int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
+			u16 index)
+{
+	return __netif_set_xps_queue(dev, cpumask_bits(mask), index, false);
+}
 EXPORT_SYMBOL(netif_set_xps_queue);
 
 #endif
@@ -3384,7 +3451,7 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
 	int queue_index = -1;
 
 	rcu_read_lock();
-	dev_maps = rcu_dereference(dev->xps_maps);
+	dev_maps = rcu_dereference(dev->xps_cpus_map);
 	if (dev_maps) {
 		unsigned int tci = skb->sender_cpu - 1;
 
@@ -3393,7 +3460,7 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
 			tci += netdev_get_prio_tc_map(dev, skb->priority);
 		}
 
-		map = rcu_dereference(dev_maps->cpu_map[tci]);
+		map = rcu_dereference(dev_maps->attr_map[tci]);
 		if (map) {
 			if (map->len == 1)
 				queue_index = map->queues[0];
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index bb7e80f..b39987c 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1227,13 +1227,13 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue,
 		return -ENOMEM;
 
 	rcu_read_lock();
-	dev_maps = rcu_dereference(dev->xps_maps);
+	dev_maps = rcu_dereference(dev->xps_cpus_map);
 	if (dev_maps) {
 		for_each_possible_cpu(cpu) {
 			int i, tci = cpu * num_tc + tc;
 			struct xps_map *map;
 
-			map = rcu_dereference(dev_maps->cpu_map[tci]);
+			map = rcu_dereference(dev_maps->attr_map[tci]);
 			if (!map)
 				continue;
 

^ permalink raw reply related

* [net-next PATCH v5 3/7] net: sock: Change tx_queue_mapping in sock_common to unsigned short
From: Amritha Nambiar @ 2018-06-27 22:31 UTC (permalink / raw)
  To: netdev, davem
  Cc: alexander.h.duyck, willemdebruijn.kernel, amritha.nambiar,
	sridhar.samudrala, alexander.duyck, edumazet, hannes, tom, tom
In-Reply-To: <153013824922.4959.14633065530326138344.stgit@anamhost.jf.intel.com>

Change 'skc_tx_queue_mapping' field in sock_common structure from
'int' to 'unsigned short' type with ~0 indicating unset and
other positive queue values being set. This will accommodate adding
a new 'unsigned short' field in sock_common in the next patch for
rx_queue_mapping.

Signed-off-by: Amritha Nambiar <amritha.nambiar@intel.com>
---
 include/net/sock.h |   14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/include/net/sock.h b/include/net/sock.h
index b3b7541..0a7d57b 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -214,7 +214,7 @@ struct sock_common {
 		struct hlist_node	skc_node;
 		struct hlist_nulls_node skc_nulls_node;
 	};
-	int			skc_tx_queue_mapping;
+	unsigned short		skc_tx_queue_mapping;
 	union {
 		int		skc_incoming_cpu;
 		u32		skc_rcv_wnd;
@@ -1681,17 +1681,25 @@ static inline int sk_receive_skb(struct sock *sk, struct sk_buff *skb,
 
 static inline void sk_tx_queue_set(struct sock *sk, int tx_queue)
 {
+	/* sk_tx_queue_mapping accept only upto a 16-bit value */
+	if (WARN_ON_ONCE((unsigned short)tx_queue > USHRT_MAX))
+		return;
 	sk->sk_tx_queue_mapping = tx_queue;
 }
 
+#define NO_QUEUE_MAPPING	USHRT_MAX
+
 static inline void sk_tx_queue_clear(struct sock *sk)
 {
-	sk->sk_tx_queue_mapping = -1;
+	sk->sk_tx_queue_mapping = NO_QUEUE_MAPPING;
 }
 
 static inline int sk_tx_queue_get(const struct sock *sk)
 {
-	return sk ? sk->sk_tx_queue_mapping : -1;
+	if (sk && sk->sk_tx_queue_mapping != NO_QUEUE_MAPPING)
+		return sk->sk_tx_queue_mapping;
+
+	return -1;
 }
 
 static inline void sk_set_socket(struct sock *sk, struct socket *sock)

^ permalink raw reply related

* [net-next PATCH v5 4/7] net: Record receive queue number for a connection
From: Amritha Nambiar @ 2018-06-27 22:31 UTC (permalink / raw)
  To: netdev, davem
  Cc: alexander.h.duyck, willemdebruijn.kernel, amritha.nambiar,
	sridhar.samudrala, alexander.duyck, edumazet, hannes, tom, tom
In-Reply-To: <153013824922.4959.14633065530326138344.stgit@anamhost.jf.intel.com>

This patch adds a new field to sock_common 'skc_rx_queue_mapping'
which holds the receive queue number for the connection. The Rx queue
is marked in tcp_finish_connect() to allow a client app to do
SO_INCOMING_NAPI_ID after a connect() call to get the right queue
association for a socket. Rx queue is also marked in tcp_conn_request()
to allow syn-ack to go on the right tx-queue associated with
the queue on which syn is received.

Signed-off-by: Amritha Nambiar <amritha.nambiar@intel.com>
Signed-off-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
---
 include/net/busy_poll.h |    1 +
 include/net/sock.h      |   14 ++++++++++++++
 net/core/sock.c         |    4 ++++
 net/ipv4/tcp_input.c    |    3 +++
 4 files changed, 22 insertions(+)

diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h
index c518743..9e36fda6 100644
--- a/include/net/busy_poll.h
+++ b/include/net/busy_poll.h
@@ -151,6 +151,7 @@ static inline void sk_mark_napi_id(struct sock *sk, const struct sk_buff *skb)
 #ifdef CONFIG_NET_RX_BUSY_POLL
 	sk->sk_napi_id = skb->napi_id;
 #endif
+	sk_rx_queue_set(sk, skb);
 }
 
 /* variant used for unconnected sockets */
diff --git a/include/net/sock.h b/include/net/sock.h
index 0a7d57b..f73dbca 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -139,6 +139,7 @@ typedef __u64 __bitwise __addrpair;
  *	@skc_node: main hash linkage for various protocol lookup tables
  *	@skc_nulls_node: main hash linkage for TCP/UDP/UDP-Lite protocol
  *	@skc_tx_queue_mapping: tx queue number for this connection
+ *	@skc_rx_queue_mapping: rx queue number for this connection
  *	@skc_flags: place holder for sk_flags
  *		%SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE,
  *		%SO_OOBINLINE settings, %SO_TIMESTAMPING settings
@@ -215,6 +216,9 @@ struct sock_common {
 		struct hlist_nulls_node skc_nulls_node;
 	};
 	unsigned short		skc_tx_queue_mapping;
+#ifdef CONFIG_XPS
+	unsigned short		skc_rx_queue_mapping;
+#endif
 	union {
 		int		skc_incoming_cpu;
 		u32		skc_rcv_wnd;
@@ -326,6 +330,9 @@ struct sock {
 #define sk_nulls_node		__sk_common.skc_nulls_node
 #define sk_refcnt		__sk_common.skc_refcnt
 #define sk_tx_queue_mapping	__sk_common.skc_tx_queue_mapping
+#ifdef CONFIG_XPS
+#define sk_rx_queue_mapping	__sk_common.skc_rx_queue_mapping
+#endif
 
 #define sk_dontcopy_begin	__sk_common.skc_dontcopy_begin
 #define sk_dontcopy_end		__sk_common.skc_dontcopy_end
@@ -1702,6 +1709,13 @@ static inline int sk_tx_queue_get(const struct sock *sk)
 	return -1;
 }
 
+static inline void sk_rx_queue_set(struct sock *sk, const struct sk_buff *skb)
+{
+#ifdef CONFIG_XPS
+	sk->sk_rx_queue_mapping = skb_get_rx_queue(skb);
+#endif
+}
+
 static inline void sk_set_socket(struct sock *sk, struct socket *sock)
 {
 	sk_tx_queue_clear(sk);
diff --git a/net/core/sock.c b/net/core/sock.c
index bcc4182..fe8cb25c 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2818,6 +2818,10 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 	sk->sk_pacing_rate = ~0U;
 	sk->sk_pacing_shift = 10;
 	sk->sk_incoming_cpu = -1;
+
+#ifdef CONFIG_XPS
+	sk->sk_rx_queue_mapping = NO_QUEUE_MAPPING;
+#endif
 	/*
 	 * Before updating sk_refcnt, we must commit prior changes to memory
 	 * (Documentation/RCU/rculist_nulls.txt for details)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 9c5b341..b3b5aef 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -78,6 +78,7 @@
 #include <linux/errqueue.h>
 #include <trace/events/tcp.h>
 #include <linux/static_key.h>
+#include <net/busy_poll.h>
 
 int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
 
@@ -5588,6 +5589,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
 	if (skb) {
 		icsk->icsk_af_ops->sk_rx_dst_set(sk, skb);
 		security_inet_conn_established(sk, skb);
+		sk_mark_napi_id(sk, skb);
 	}
 
 	tcp_init_transfer(sk, BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB);
@@ -6416,6 +6418,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 	tcp_rsk(req)->snt_isn = isn;
 	tcp_rsk(req)->txhash = net_tx_rndhash();
 	tcp_openreq_init_rwin(req, sk, dst);
+	sk_rx_queue_set(req_to_sk(req), skb);
 	if (!want_cookie) {
 		tcp_reqsk_record_syn(sk, req, skb);
 		fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst);

^ permalink raw reply related

* [net-next PATCH v5 2/7] net: Use static_key for XPS maps
From: Amritha Nambiar @ 2018-06-27 22:31 UTC (permalink / raw)
  To: netdev, davem
  Cc: alexander.h.duyck, willemdebruijn.kernel, amritha.nambiar,
	sridhar.samudrala, alexander.duyck, edumazet, hannes, tom, tom
In-Reply-To: <153013824922.4959.14633065530326138344.stgit@anamhost.jf.intel.com>

Use static_key for XPS maps to reduce the cost of extra map checks,
similar to how it is used for RPS and RFS. This includes static_key
'xps_needed' for XPS and another for 'xps_rxqs_needed' for XPS using
Rx queues map.

Signed-off-by: Amritha Nambiar <amritha.nambiar@intel.com>
---
 net/core/dev.c |   26 ++++++++++++++++++++------
 1 file changed, 20 insertions(+), 6 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 6ca62df..09cba23 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2081,6 +2081,10 @@ int netdev_txq_to_tc(struct net_device *dev, unsigned int txq)
 EXPORT_SYMBOL(netdev_txq_to_tc);
 
 #ifdef CONFIG_XPS
+struct static_key xps_needed __read_mostly;
+EXPORT_SYMBOL(xps_needed);
+struct static_key xps_rxqs_needed __read_mostly;
+EXPORT_SYMBOL(xps_rxqs_needed);
 static DEFINE_MUTEX(xps_map_mutex);
 #define xmap_dereference(P)		\
 	rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))
@@ -2170,12 +2174,14 @@ static void netif_reset_xps_queues(struct net_device *dev, u16 offset,
 
 	mutex_lock(&xps_map_mutex);
 
-	dev_maps = xmap_dereference(dev->xps_rxqs_map);
-	if (dev_maps) {
-		nr_ids = dev->num_rx_queues;
-		clean_xps_maps(dev, possible_mask, dev_maps, nr_ids, offset,
-			       count, true);
-
+	if (static_key_false(&xps_rxqs_needed)) {
+		dev_maps = xmap_dereference(dev->xps_rxqs_map);
+		if (dev_maps) {
+			nr_ids = dev->num_rx_queues;
+			clean_xps_maps(dev, possible_mask, dev_maps, nr_ids,
+				       offset, count, true);
+		}
+		static_key_slow_dec(&xps_rxqs_needed);
 	}
 
 	dev_maps = xmap_dereference(dev->xps_cpus_map);
@@ -2189,6 +2195,7 @@ static void netif_reset_xps_queues(struct net_device *dev, u16 offset,
 		       false);
 
 out_no_maps:
+	static_key_slow_dec(&xps_needed);
 	mutex_unlock(&xps_map_mutex);
 }
 
@@ -2297,6 +2304,10 @@ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
 	if (!new_dev_maps)
 		goto out_no_new_maps;
 
+	static_key_slow_inc(&xps_needed);
+	if (is_rxqs_map)
+		static_key_slow_inc(&xps_rxqs_needed);
+
 	for (j = -1; j = attrmask_next(j, possible_mask, nr_ids),
 	     j < nr_ids;) {
 		/* copy maps belonging to foreign traffic classes */
@@ -3450,6 +3461,9 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
 	struct xps_map *map;
 	int queue_index = -1;
 
+	if (!static_key_false(&xps_needed))
+		return -1;
+
 	rcu_read_lock();
 	dev_maps = rcu_dereference(dev->xps_cpus_map);
 	if (dev_maps) {

^ permalink raw reply related

* [net-next PATCH v5 6/7] net-sysfs: Add interface for Rx queue(s) map per Tx queue
From: Amritha Nambiar @ 2018-06-27 22:31 UTC (permalink / raw)
  To: netdev, davem
  Cc: alexander.h.duyck, willemdebruijn.kernel, amritha.nambiar,
	sridhar.samudrala, alexander.duyck, edumazet, hannes, tom, tom
In-Reply-To: <153013824922.4959.14633065530326138344.stgit@anamhost.jf.intel.com>

Extend transmit queue sysfs attribute to configure Rx queue(s) map
per Tx queue. By default no receive queues are configured for the
Tx queue.

- /sys/class/net/eth0/queues/tx-*/xps_rxqs

Signed-off-by: Amritha Nambiar <amritha.nambiar@intel.com>
---
 net/core/net-sysfs.c |   81 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 81 insertions(+)

diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index b39987c..f4800c5 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1283,6 +1283,86 @@ static ssize_t xps_cpus_store(struct netdev_queue *queue,
 
 static struct netdev_queue_attribute xps_cpus_attribute __ro_after_init
 	= __ATTR_RW(xps_cpus);
+
+static ssize_t xps_rxqs_show(struct netdev_queue *queue, char *buf)
+{
+	struct net_device *dev = queue->dev;
+	struct xps_dev_maps *dev_maps;
+	unsigned long *mask, index;
+	int j, len, num_tc = 1, tc = 0;
+
+	index = get_netdev_queue_index(queue);
+
+	if (dev->num_tc) {
+		num_tc = dev->num_tc;
+		tc = netdev_txq_to_tc(dev, index);
+		if (tc < 0)
+			return -EINVAL;
+	}
+	mask = kcalloc(BITS_TO_LONGS(dev->num_rx_queues), sizeof(long),
+		       GFP_KERNEL);
+	if (!mask)
+		return -ENOMEM;
+
+	rcu_read_lock();
+	dev_maps = rcu_dereference(dev->xps_rxqs_map);
+	if (dev_maps) {
+		for (j = -1; j = attrmask_next(j, NULL, dev->num_rx_queues),
+		     j < dev->num_rx_queues;) {
+			int i, tci = j * num_tc + tc;
+			struct xps_map *map;
+
+			map = rcu_dereference(dev_maps->attr_map[tci]);
+			if (!map)
+				continue;
+
+			for (i = map->len; i--;) {
+				if (map->queues[i] == index) {
+					set_bit(j, mask);
+					break;
+				}
+			}
+		}
+	}
+	rcu_read_unlock();
+
+	len = bitmap_print_to_pagebuf(false, buf, mask, dev->num_rx_queues);
+	kfree(mask);
+
+	return len < PAGE_SIZE ? len : -EINVAL;
+}
+
+static ssize_t xps_rxqs_store(struct netdev_queue *queue, const char *buf,
+			      size_t len)
+{
+	struct net_device *dev = queue->dev;
+	struct net *net = dev_net(dev);
+	unsigned long *mask, index;
+	int err;
+
+	if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+		return -EPERM;
+
+	mask = kcalloc(BITS_TO_LONGS(dev->num_rx_queues), sizeof(long),
+		       GFP_KERNEL);
+	if (!mask)
+		return -ENOMEM;
+
+	index = get_netdev_queue_index(queue);
+
+	err = bitmap_parse(buf, len, mask, dev->num_rx_queues);
+	if (err) {
+		kfree(mask);
+		return err;
+	}
+
+	err = __netif_set_xps_queue(dev, mask, index, true);
+	kfree(mask);
+	return err ? : len;
+}
+
+static struct netdev_queue_attribute xps_rxqs_attribute __ro_after_init
+	= __ATTR_RW(xps_rxqs);
 #endif /* CONFIG_XPS */
 
 static struct attribute *netdev_queue_default_attrs[] __ro_after_init = {
@@ -1290,6 +1370,7 @@ static struct attribute *netdev_queue_default_attrs[] __ro_after_init = {
 	&queue_traffic_class.attr,
 #ifdef CONFIG_XPS
 	&xps_cpus_attribute.attr,
+	&xps_rxqs_attribute.attr,
 	&queue_tx_maxrate.attr,
 #endif
 	NULL

^ permalink raw reply related

* [net-next PATCH v5 5/7] net: Enable Tx queue selection based on Rx queues
From: Amritha Nambiar @ 2018-06-27 22:31 UTC (permalink / raw)
  To: netdev, davem
  Cc: alexander.h.duyck, willemdebruijn.kernel, amritha.nambiar,
	sridhar.samudrala, alexander.duyck, edumazet, hannes, tom, tom
In-Reply-To: <153013824922.4959.14633065530326138344.stgit@anamhost.jf.intel.com>

This patch adds support to pick Tx queue based on the Rx queue(s) map
configuration set by the admin through the sysfs attribute
for each Tx queue. If the user configuration for receive queue(s) map
does not apply, then the Tx queue selection falls back to CPU(s) map
based selection and finally to hashing.

Signed-off-by: Amritha Nambiar <amritha.nambiar@intel.com>
---
 include/net/sock.h |   10 ++++++++
 net/core/dev.c     |   62 ++++++++++++++++++++++++++++++++++++++--------------
 2 files changed, 55 insertions(+), 17 deletions(-)

diff --git a/include/net/sock.h b/include/net/sock.h
index f73dbca..3b22782 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1716,6 +1716,16 @@ static inline void sk_rx_queue_set(struct sock *sk, const struct sk_buff *skb)
 #endif
 }
 
+#ifdef CONFIG_XPS
+static inline int sk_rx_queue_get(const struct sock *sk)
+{
+	if (sk && sk->sk_rx_queue_mapping != NO_QUEUE_MAPPING)
+		return sk->sk_rx_queue_mapping;
+
+	return -1;
+}
+#endif
+
 static inline void sk_set_socket(struct sock *sk, struct socket *sock)
 {
 	sk_tx_queue_clear(sk);
diff --git a/net/core/dev.c b/net/core/dev.c
index 09cba23..1122f68 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3454,35 +3454,63 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
 }
 #endif /* CONFIG_NET_EGRESS */
 
-static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
+#ifdef CONFIG_XPS
+static int __get_xps_queue_idx(struct net_device *dev, struct sk_buff *skb,
+			       struct xps_dev_maps *dev_maps, unsigned int tci)
+{
+	struct xps_map *map;
+	int queue_index = -1;
+
+	if (dev->num_tc) {
+		tci *= dev->num_tc;
+		tci += netdev_get_prio_tc_map(dev, skb->priority);
+	}
+
+	map = rcu_dereference(dev_maps->attr_map[tci]);
+	if (map) {
+		if (map->len == 1)
+			queue_index = map->queues[0];
+		else
+			queue_index = map->queues[reciprocal_scale(
+						skb_get_hash(skb), map->len)];
+		if (unlikely(queue_index >= dev->real_num_tx_queues))
+			queue_index = -1;
+	}
+	return queue_index;
+}
+#endif
+
+static int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
 {
 #ifdef CONFIG_XPS
 	struct xps_dev_maps *dev_maps;
-	struct xps_map *map;
+	struct sock *sk = skb->sk;
 	int queue_index = -1;
 
 	if (!static_key_false(&xps_needed))
 		return -1;
 
 	rcu_read_lock();
-	dev_maps = rcu_dereference(dev->xps_cpus_map);
+	if (!static_key_false(&xps_rxqs_needed))
+		goto get_cpus_map;
+
+	dev_maps = rcu_dereference(dev->xps_rxqs_map);
 	if (dev_maps) {
-		unsigned int tci = skb->sender_cpu - 1;
+		int tci = sk_rx_queue_get(sk);
 
-		if (dev->num_tc) {
-			tci *= dev->num_tc;
-			tci += netdev_get_prio_tc_map(dev, skb->priority);
-		}
+		if (tci >= 0 && tci < dev->num_rx_queues)
+			queue_index = __get_xps_queue_idx(dev, skb, dev_maps,
+							  tci);
+	}
 
-		map = rcu_dereference(dev_maps->attr_map[tci]);
-		if (map) {
-			if (map->len == 1)
-				queue_index = map->queues[0];
-			else
-				queue_index = map->queues[reciprocal_scale(skb_get_hash(skb),
-									   map->len)];
-			if (unlikely(queue_index >= dev->real_num_tx_queues))
-				queue_index = -1;
+get_cpus_map:
+	if (queue_index < 0) {
+		dev_maps = rcu_dereference(dev->xps_cpus_map);
+		if (dev_maps) {
+			unsigned int tci = skb->sender_cpu - 1;
+
+			queue_index = __get_xps_queue_idx(dev, skb, dev_maps,
+							  tci);
 		}
 	}
 	rcu_read_unlock();

^ permalink raw reply related

* [net-next PATCH v5 7/7] Documentation: Add explanation for XPS using Rx-queue(s) map
From: Amritha Nambiar @ 2018-06-27 22:31 UTC (permalink / raw)
  To: netdev, davem
  Cc: alexander.h.duyck, willemdebruijn.kernel, amritha.nambiar,
	sridhar.samudrala, alexander.duyck, edumazet, hannes, tom, tom
In-Reply-To: <153013824922.4959.14633065530326138344.stgit@anamhost.jf.intel.com>

Signed-off-by: Amritha Nambiar <amritha.nambiar@intel.com>
---
 Documentation/ABI/testing/sysfs-class-net-queues |   11 ++++
 Documentation/networking/scaling.txt             |   61 ++++++++++++++++++----
 2 files changed, 61 insertions(+), 11 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-class-net-queues b/Documentation/ABI/testing/sysfs-class-net-queues
index 0c0df91..978b763 100644
--- a/Documentation/ABI/testing/sysfs-class-net-queues
+++ b/Documentation/ABI/testing/sysfs-class-net-queues
@@ -42,6 +42,17 @@ Description:
 		network device transmit queue. Possible vaules depend on the
 		number of available CPU(s) in the system.
 
+What:		/sys/class/<iface>/queues/tx-<queue>/xps_rxqs
+Date:		June 2018
+KernelVersion:	4.18.0
+Contact:	netdev@vger.kernel.org
+Description:
+		Mask of the receive queue(s) currently enabled to participate
+		into the Transmit Packet Steering packet processing flow for this
+		network device transmit queue. Possible values depend on the
+		number of available receive queue(s) in the network device.
+		Default is disabled.
+
 What:		/sys/class/<iface>/queues/tx-<queue>/byte_queue_limits/hold_time
 Date:		November 2011
 KernelVersion:	3.3
diff --git a/Documentation/networking/scaling.txt b/Documentation/networking/scaling.txt
index f55639d..b7056a8 100644
--- a/Documentation/networking/scaling.txt
+++ b/Documentation/networking/scaling.txt
@@ -366,8 +366,13 @@ XPS: Transmit Packet Steering
 
 Transmit Packet Steering is a mechanism for intelligently selecting
 which transmit queue to use when transmitting a packet on a multi-queue
-device. To accomplish this, a mapping from CPU to hardware queue(s) is
-recorded. The goal of this mapping is usually to assign queues
+device. This can be accomplished by recording two kinds of maps, either
+a mapping of CPU to hardware queue(s) or a mapping of receive queue(s)
+to hardware transmit queue(s).
+
+1. XPS using CPUs map
+
+The goal of this mapping is usually to assign queues
 exclusively to a subset of CPUs, where the transmit completions for
 these queues are processed on a CPU within this set. This choice
 provides two benefits. First, contention on the device queue lock is
@@ -377,15 +382,40 @@ transmit queue). Secondly, cache miss rate on transmit completion is
 reduced, in particular for data cache lines that hold the sk_buff
 structures.
 
-XPS is configured per transmit queue by setting a bitmap of CPUs that
-may use that queue to transmit. The reverse mapping, from CPUs to
-transmit queues, is computed and maintained for each network device.
-When transmitting the first packet in a flow, the function
-get_xps_queue() is called to select a queue. This function uses the ID
-of the running CPU as a key into the CPU-to-queue lookup table. If the
+2. XPS using receive queues map
+
+This mapping is used to pick transmit queue based on the receive
+queue(s) map configuration set by the administrator. A set of receive
+queues can be mapped to a set of transmit queues (many:many), although
+the common use case is a 1:1 mapping. This will enable sending packets
+on the same queue associations for transmit and receive. This is useful for
+busy polling multi-threaded workloads where there are challenges in
+associating a given CPU to a given application thread. The application
+threads are not pinned to CPUs and each thread handles packets
+received on a single queue. The receive queue number is cached in the
+socket for the connection. In this model, sending the packets on the same
+transmit queue corresponding to the associated receive queue has benefits
+in keeping the CPU overhead low. Transmit completion work is locked into
+the same queue-association that a given application is polling on. This
+avoids the overhead of triggering an interrupt on another CPU. When the
+application cleans up the packets during the busy poll, transmit completion
+may be processed along with it in the same thread context and so result in
+reduced latency.
+
+XPS is configured per transmit queue by setting a bitmap of
+CPUs/receive-queues that may use that queue to transmit. The reverse
+mapping, from CPUs to transmit queues or from receive-queues to transmit
+queues, is computed and maintained for each network device. When
+transmitting the first packet in a flow, the function get_xps_queue() is
+called to select a queue. This function uses the ID of the receive queue
+for the socket connection for a match in the receive queue-to-transmit queue
+lookup table. Alternatively, this function can also use the ID of the
+running CPU as a key into the CPU-to-queue lookup table. If the
 ID matches a single queue, that is used for transmission. If multiple
 queues match, one is selected by using the flow hash to compute an index
-into the set.
+into the set. When selecting the transmit queue based on receive queue(s)
+map, the transmit device is not validated against the receive device as it
+requires expensive lookup operation in the datapath.
 
 The queue chosen for transmitting a particular flow is saved in the
 corresponding socket structure for the flow (e.g. a TCP connection).
@@ -404,11 +434,15 @@ acknowledged.
 
 XPS is only available if the kconfig symbol CONFIG_XPS is enabled (on by
 default for SMP). The functionality remains disabled until explicitly
-configured. To enable XPS, the bitmap of CPUs that may use a transmit
-queue is configured using the sysfs file entry:
+configured. To enable XPS, the bitmap of CPUs/receive-queues that may
+use a transmit queue is configured using the sysfs file entry:
 
+For selection based on CPUs map:
 /sys/class/net/<dev>/queues/tx-<n>/xps_cpus
 
+For selection based on receive-queues map:
+/sys/class/net/<dev>/queues/tx-<n>/xps_rxqs
+
 == Suggested Configuration
 
 For a network device with a single transmission queue, XPS configuration
@@ -421,6 +455,11 @@ best CPUs to share a given queue are probably those that share the cache
 with the CPU that processes transmit completions for that queue
 (transmit interrupts).
 
+For transmit queue selection based on receive queue(s), XPS has to be
+explicitly configured mapping receive-queue(s) to transmit queue(s). If the
+user configuration for receive-queue map does not apply, then the transmit
+queue is selected based on the CPUs map.
+
 Per TX Queue rate limitation:
 =============================
 

^ permalink raw reply related

* [PATCH net-next 10/11] net: hns3: remove back in struct hclge_hw
From: Peng Li @ 2018-06-28  4:12 UTC (permalink / raw)
  To: davem; +Cc: netdev, linux-kernel, linuxarm, yisen.zhuang, salil.mehta,
	lipeng321
In-Reply-To: <1530159149-122284-1-git-send-email-lipeng321@huawei.com>

From: Huazhong Tan <tanhuazhong@huawei.com>

hclge_hw is embedded in hclge_dev, so use container_of instead of
back to get hclge_dev.

Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: Peng Li <lipeng321@huawei.com>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c  | 4 ++--
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 1 -
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h | 1 -
 3 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
index c36d647..7049d0b 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
@@ -152,7 +152,7 @@ static void hclge_cmd_init_regs(struct hclge_hw *hw)
 
 static int hclge_cmd_csq_clean(struct hclge_hw *hw)
 {
-	struct hclge_dev *hdev = (struct hclge_dev *)hw->back;
+	struct hclge_dev *hdev = container_of(hw, struct hclge_dev, hw);
 	struct hclge_cmq_ring *csq = &hw->cmq.csq;
 	u16 ntc = csq->next_to_clean;
 	struct hclge_desc *desc;
@@ -216,7 +216,7 @@ static bool hclge_is_special_opcode(u16 opcode)
  **/
 int hclge_cmd_send(struct hclge_hw *hw, struct hclge_desc *desc, int num)
 {
-	struct hclge_dev *hdev = (struct hclge_dev *)hw->back;
+	struct hclge_dev *hdev = container_of(hw, struct hclge_dev, hw);
 	struct hclge_desc *desc_to_use;
 	bool complete = false;
 	u32 timeout = 0;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 805c780..14a6991 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -5537,7 +5537,6 @@ static int hclge_pci_init(struct hclge_dev *hdev)
 
 	pci_set_master(pdev);
 	hw = &hdev->hw;
-	hw->back = hdev;
 	hw->io_base = pcim_iomap(pdev, 2, 0);
 	if (!hw->io_base) {
 		dev_err(&pdev->dev, "Can't map configuration register space\n");
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index 7488534..71d38b8 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -190,7 +190,6 @@ struct hclge_hw {
 	int num_vec;
 	struct hclge_cmq cmq;
 	struct hclge_caps caps;
-	void *back;
 };
 
 /* TQP stats */
-- 
2.9.3

^ permalink raw reply related

* Re: [PATCH 0/6] offload Linux LAG devices to the TC datapath
From: Or Gerlitz @ 2018-06-28  3:50 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: Or Gerlitz, John Hurley, Jiri Pirko, Linux Netdev List,
	ASAP_Direct_Dev, Simon Horman, Andy Gospodarek
In-Reply-To: <20180627160811.57250c26@cakuba.netronome.com>

On Thu, Jun 28, 2018 at 2:08 AM, Jakub Kicinski
<jakub.kicinski@netronome.com> wrote:
> On Wed, 27 Jun 2018 23:07:29 +0300, Or Gerlitz wrote:
>> On Wed, Jun 27, 2018 at 1:31 AM, Jakub Kicinski
>> <jakub.kicinski@netronome.com> wrote:
>> > On Tue, 26 Jun 2018 17:57:08 +0300, Or Gerlitz wrote:
>>
>> >> 2. re the egress side of things. Some NIC HWs can't just use LAG
>> >> as the egress port destination of an ACL (tc rule) and the HW rule
>> >> needs to be duplicated to both HW ports. So... in that case, you
>> >> see the HW driver doing the duplication (:() or we can somehow
>> >> make it happen from user-space?
>>
>> > It's the TC core that does the duplication.  Drivers which don't need
>> > the duplication (e.g. mlxsw) will not register a new callback for each
>> > port on which shared block is bound.  They will keep one list of rules,
>> > and a list of ports that those rules apply to.
>>
>> [snip]
>>
>> > Drivers which need duplication (multiplication) (all NICs?) have to
>> > register a new callback for each port bound to a shared block.  And TC
>> > will call those drivers as many times as they have callbacks registered
>> > == as many times as they have ports bound to the block.  Each time
>> > callback is invoked the driver will figure out the ingress port based
>> > on the cb_priv and use <ingress, cookie> as the key in its rule table
>> > (or have a separate rule table per ingress port).
>>
>> [snip snip]
>>
>> > I may be wrong, but I think you split the rules tables per port for mlx5
>>
>> correct,  currently I have a rule table per physical port.
>>
>> > So again you just register a callback every time shared block is bound,
>> > and then TC core will send add/remove rule commands down to the driver,
>> > relaying existing rules as well if needed.
>>
>> Let's see, the NIC uplink rep port devices were bounded (say) by ovs to
>> a shared-block because they are the lower devices (hate the slavish jargon)
>> of a bond device.
>>
>> Next, the TC stack will invoke the callback over these ports, when ingress
>> rule is added on the bond.
>>
>> But we are talking on ingress rule set on a non-uplink rep (VF rep) port,
>> where bonding is the egress of the rule. I guess the callback which you probably
>> refer to (you hinted there below) is the egdev one, correct? you are suggesting
>> that bonding will do egdev registration... I am a bit confused.
>
> Ah, you really meant egress.  We don't have this problem, but yes, I

so how does it works for you -- the rule is:

<ingress=vfrep netdev, egress=bond netdev>

so from here, your driver logic does what inorder
to allow offloading into the lagged uplinks? can you
point the code please..

the bond BTW doesn't have the same switchdev id as
the vfrep in case you keep different switchdev id's
for the uplink reps under bonding -- do you unite them?

^ permalink raw reply

* Re: [PATCH 0/6] offload Linux LAG devices to the TC datapath
From: Jakub Kicinski @ 2018-06-28  4:02 UTC (permalink / raw)
  To: Or Gerlitz
  Cc: Or Gerlitz, John Hurley, Jiri Pirko, Linux Netdev List,
	ASAP_Direct_Dev, Simon Horman, Andy Gospodarek
In-Reply-To: <CAJ3xEMhZ8uF4ohrQUGUHtVxzKUeBU7ejZqYAHw+HtkQ5voeGVg@mail.gmail.com>

On Thu, 28 Jun 2018 06:50:32 +0300, Or Gerlitz wrote:
> On Thu, Jun 28, 2018 at 2:08 AM, Jakub Kicinski
> <jakub.kicinski@netronome.com> wrote:
> > On Wed, 27 Jun 2018 23:07:29 +0300, Or Gerlitz wrote:  
> >> On Wed, Jun 27, 2018 at 1:31 AM, Jakub Kicinski
> >> <jakub.kicinski@netronome.com> wrote:  
> >> > On Tue, 26 Jun 2018 17:57:08 +0300, Or Gerlitz wrote:  
> >>  
> >> >> 2. re the egress side of things. Some NIC HWs can't just use LAG
> >> >> as the egress port destination of an ACL (tc rule) and the HW rule
> >> >> needs to be duplicated to both HW ports. So... in that case, you
> >> >> see the HW driver doing the duplication (:() or we can somehow
> >> >> make it happen from user-space?  
> >>  
> >> > It's the TC core that does the duplication.  Drivers which don't need
> >> > the duplication (e.g. mlxsw) will not register a new callback for each
> >> > port on which shared block is bound.  They will keep one list of rules,
> >> > and a list of ports that those rules apply to.  
> >>
> >> [snip]
> >>  
> >> > Drivers which need duplication (multiplication) (all NICs?) have to
> >> > register a new callback for each port bound to a shared block.  And TC
> >> > will call those drivers as many times as they have callbacks registered
> >> > == as many times as they have ports bound to the block.  Each time
> >> > callback is invoked the driver will figure out the ingress port based
> >> > on the cb_priv and use <ingress, cookie> as the key in its rule table
> >> > (or have a separate rule table per ingress port).  
> >>
> >> [snip snip]
> >>  
> >> > I may be wrong, but I think you split the rules tables per port for mlx5  
> >>
> >> correct,  currently I have a rule table per physical port.
> >>  
> >> > So again you just register a callback every time shared block is bound,
> >> > and then TC core will send add/remove rule commands down to the driver,
> >> > relaying existing rules as well if needed.  
> >>
> >> Let's see, the NIC uplink rep port devices were bounded (say) by ovs to
> >> a shared-block because they are the lower devices (hate the slavish jargon)
> >> of a bond device.
> >>
> >> Next, the TC stack will invoke the callback over these ports, when ingress
> >> rule is added on the bond.
> >>
> >> But we are talking on ingress rule set on a non-uplink rep (VF rep) port,
> >> where bonding is the egress of the rule. I guess the callback which you probably
> >> refer to (you hinted there below) is the egdev one, correct? you are suggesting
> >> that bonding will do egdev registration... I am a bit confused.  
> >
> > Ah, you really meant egress.  We don't have this problem, but yes, I  
> 
> so how does it works for you -- the rule is:
> 
> <ingress=vfrep netdev, egress=bond netdev>
> 
> so from here, your driver logic does what inorder
> to allow offloading into the lagged uplinks? can you
> point the code please..

static int
nfp_fl_output(struct nfp_app *app, struct nfp_fl_output *output,
...
	if (tun_type) {
		/* Verify the egress netdev matches the tunnel type. */
		if (!nfp_fl_netdev_is_tunnel_type(out_dev, tun_type))
			return -EOPNOTSUPP;

		if (*tun_out_cnt)
			return -EOPNOTSUPP;
		(*tun_out_cnt)++;

		output->flags = cpu_to_be16(tmp_flags |
					    NFP_FL_OUT_FLAGS_USE_TUN);
		output->port = cpu_to_be32(NFP_FL_PORT_TYPE_TUN | tun_type);
	} else if (netif_is_lag_master(out_dev) &&
		   priv->flower_ext_feats & NFP_FL_FEATS_LAG) {
		int gid;

		output->flags = cpu_to_be16(tmp_flags);
		gid = nfp_flower_lag_get_output_id(app, out_dev);
		if (gid < 0)
			return gid;
		output->port = cpu_to_be32(NFP_FL_LAG_OUT | gid);
	} else {
		/* Set action output parameters. */
		output->flags = cpu_to_be16(tmp_flags);

		/* Only offload if egress ports are on the same device as the
		 * ingress port.
		 */
		if (!switchdev_port_same_parent_id(in_dev, out_dev))
			return -EOPNOTSUPP;
		if (!nfp_netdev_is_nfp_repr(out_dev))
			return -EOPNOTSUPP;

		output->port = cpu_to_be32(nfp_repr_get_port_id(out_dev));
		if (!output->port)
			return -EOPNOTSUPP;
	}

> the bond BTW doesn't have the same switchdev id as
> the vfrep in case you keep different switchdev id's
> for the uplink reps under bonding -- do you unite them?

^ permalink raw reply

* [PATCH net-next 00/11] net: hns3: a few code improvements
From: Peng Li @ 2018-06-28  4:12 UTC (permalink / raw)
  To: davem; +Cc: netdev, linux-kernel, linuxarm, yisen.zhuang, salil.mehta,
	lipeng321

This patchset fixes a few code stylistic issues from
concentrated review, no functional changes introduced.

Huazhong Tan (2):
  net: hns3: remove back in struct hclge_hw
  net: hns3: use lower_32_bits and upper_32_bits

Peng Li (9):
  net: hns3: remove hclge_get_vector_index from
    hclge_bind_ring_with_vector
  net: hns3: rename the interface for init_client_instance and
    uninit_client_instance
  net: hns3: add vector status check before free vector
  net: hns3: add l4_type check for both ipv4 and ipv6
  net: hns3: add unlikely for error check
  net: hns3: remove unused head file in hnae3.c
  net: hns3: extraction an interface for state init|uninit
  net: hns3: print the ret value in error information
  net: hns3: remove the Redundant put_vector in hns3_client_uninit

 drivers/net/ethernet/hisilicon/hns3/hnae3.c        | 18 ++++---
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.c    | 18 +++----
 .../net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c | 12 ++---
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c    | 49 +++++++++++------
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.h    |  1 -
 .../ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c  | 61 ++++++++++------------
 6 files changed, 85 insertions(+), 74 deletions(-)

-- 
2.9.3

^ permalink raw reply

* [PATCH net-next 01/11] net: hns3: remove hclge_get_vector_index from hclge_bind_ring_with_vector
From: Peng Li @ 2018-06-28  4:12 UTC (permalink / raw)
  To: davem; +Cc: netdev, linux-kernel, linuxarm, yisen.zhuang, salil.mehta,
	lipeng321
In-Reply-To: <1530159149-122284-1-git-send-email-lipeng321@huawei.com>

In hclge_unmap_ring_frm_vector, there are 2 steps:
step 1: get vector index.
step 2 unbind ring with vector.

But it gets vector id again in step 2 interface. This patch
removes hclge_get_vector_index from hclge_bind_ring_with_vector,
and make the step the same with hns3 PF driver.

Signed-off-by: Peng Li <lipeng321@huawei.com>
---
 .../ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c  | 24 +++++++++++++---------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index a17872a..b3d8237 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -547,24 +547,18 @@ static int hclgevf_get_tc_size(struct hnae3_handle *handle)
 }
 
 static int hclgevf_bind_ring_to_vector(struct hnae3_handle *handle, bool en,
-				       int vector,
+				       int vector_id,
 				       struct hnae3_ring_chain_node *ring_chain)
 {
 	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
 	struct hnae3_ring_chain_node *node;
 	struct hclge_mbx_vf_to_pf_cmd *req;
 	struct hclgevf_desc desc;
-	int i = 0, vector_id;
+	int i = 0;
 	int status;
 	u8 type;
 
 	req = (struct hclge_mbx_vf_to_pf_cmd *)desc.data;
-	vector_id = hclgevf_get_vector_index(hdev, vector);
-	if (vector_id < 0) {
-		dev_err(&handle->pdev->dev,
-			"Get vector index fail. ret =%d\n", vector_id);
-		return vector_id;
-	}
 
 	for (node = ring_chain; node; node = node->next) {
 		int idx_offset = HCLGE_MBX_RING_MAP_BASIC_MSG_NUM +
@@ -617,7 +611,17 @@ static int hclgevf_bind_ring_to_vector(struct hnae3_handle *handle, bool en,
 static int hclgevf_map_ring_to_vector(struct hnae3_handle *handle, int vector,
 				      struct hnae3_ring_chain_node *ring_chain)
 {
-	return hclgevf_bind_ring_to_vector(handle, true, vector, ring_chain);
+	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+	int vector_id;
+
+	vector_id = hclgevf_get_vector_index(hdev, vector);
+	if (vector_id < 0) {
+		dev_err(&handle->pdev->dev,
+			"Get vector index fail. ret =%d\n", vector_id);
+		return vector_id;
+	}
+
+	return hclgevf_bind_ring_to_vector(handle, true, vector_id, ring_chain);
 }
 
 static int hclgevf_unmap_ring_from_vector(
@@ -635,7 +639,7 @@ static int hclgevf_unmap_ring_from_vector(
 		return vector_id;
 	}
 
-	ret = hclgevf_bind_ring_to_vector(handle, false, vector, ring_chain);
+	ret = hclgevf_bind_ring_to_vector(handle, false, vector_id, ring_chain);
 	if (ret)
 		dev_err(&handle->pdev->dev,
 			"Unmap ring from vector fail. vector=%d, ret =%d\n",
-- 
2.9.3

^ permalink raw reply related

* [PATCH net-next 02/11] net: hns3: rename the interface for init_client_instance and uninit_client_instance
From: Peng Li @ 2018-06-28  4:12 UTC (permalink / raw)
  To: davem; +Cc: netdev, linux-kernel, linuxarm, yisen.zhuang, salil.mehta,
	lipeng321
In-Reply-To: <1530159149-122284-1-git-send-email-lipeng321@huawei.com>

The interface init_client_instance and uninit_client_instance
do not register anything, only initialize the client instance.
This patch rename the related interface to make the function
name to indicate the purpose.

Signed-off-by: Peng Li <lipeng321@huawei.com>
---
 .../ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c  | 31 +++++++---------------
 1 file changed, 9 insertions(+), 22 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index b3d8237..3a8d7e0 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -1586,9 +1586,10 @@ static void hclgevf_misc_irq_uninit(struct hclgevf_dev *hdev)
 	hclgevf_free_vector(hdev, 0);
 }
 
-static int hclgevf_init_instance(struct hclgevf_dev *hdev,
-				 struct hnae3_client *client)
+static int hclgevf_init_client_instance(struct hnae3_client *client,
+					struct hnae3_ae_dev *ae_dev)
 {
+	struct hclgevf_dev *hdev = ae_dev->priv;
 	int ret;
 
 	switch (client->type) {
@@ -1639,9 +1640,11 @@ static int hclgevf_init_instance(struct hclgevf_dev *hdev,
 	return 0;
 }
 
-static void hclgevf_uninit_instance(struct hclgevf_dev *hdev,
-				    struct hnae3_client *client)
+static void hclgevf_uninit_client_instance(struct hnae3_client *client,
+					   struct hnae3_ae_dev *ae_dev)
 {
+	struct hclgevf_dev *hdev = ae_dev->priv;
+
 	/* un-init roce, if it exists */
 	if (hdev->roce_client)
 		hdev->roce_client->ops->uninit_instance(&hdev->roce, 0);
@@ -1652,22 +1655,6 @@ static void hclgevf_uninit_instance(struct hclgevf_dev *hdev,
 		client->ops->uninit_instance(&hdev->nic, 0);
 }
 
-static int hclgevf_register_client(struct hnae3_client *client,
-				   struct hnae3_ae_dev *ae_dev)
-{
-	struct hclgevf_dev *hdev = ae_dev->priv;
-
-	return hclgevf_init_instance(hdev, client);
-}
-
-static void hclgevf_unregister_client(struct hnae3_client *client,
-				      struct hnae3_ae_dev *ae_dev)
-{
-	struct hclgevf_dev *hdev = ae_dev->priv;
-
-	hclgevf_uninit_instance(hdev, client);
-}
-
 static int hclgevf_pci_init(struct hclgevf_dev *hdev)
 {
 	struct pci_dev *pdev = hdev->pdev;
@@ -1928,8 +1915,8 @@ void hclgevf_update_speed_duplex(struct hclgevf_dev *hdev, u32 speed,
 static const struct hnae3_ae_ops hclgevf_ops = {
 	.init_ae_dev = hclgevf_init_ae_dev,
 	.uninit_ae_dev = hclgevf_uninit_ae_dev,
-	.init_client_instance = hclgevf_register_client,
-	.uninit_client_instance = hclgevf_unregister_client,
+	.init_client_instance = hclgevf_init_client_instance,
+	.uninit_client_instance = hclgevf_uninit_client_instance,
 	.start = hclgevf_ae_start,
 	.stop = hclgevf_ae_stop,
 	.map_ring_to_vector = hclgevf_map_ring_to_vector,
-- 
2.9.3

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox