public inbox for netdev@vger.kernel.org
 help / color / mirror / Atom feed
From: "Eelco Chaudron" <echaudro@redhat.com>
To: "Ilya Maximets" <i.maximets@ovn.org>
Cc: netdev@vger.kernel.org, dev@openvswitch.org,
	bigeasy@linutronix.de, jlelli@redhat.com, kuba@kernel.org,
	pabeni@redhat.com, davem@davemloft.net
Subject: Re: [ovs-dev] [PATCH net v2] net: openvswitch: fix to make sure flow_lookup() is not preempted
Date: Thu, 15 Oct 2020 14:42:52 +0200	[thread overview]
Message-ID: <A93D105E-E051-4DF2-B449-509CF1CA5E76@redhat.com> (raw)
In-Reply-To: <12246eee-e235-c10b-96e7-fffc5dda18a2@ovn.org>



On 15 Oct 2020, at 13:22, Ilya Maximets wrote:

> On 10/15/20 1:04 PM, Ilya Maximets wrote:
>> On 10/15/20 12:54 PM, Eelco Chaudron wrote:
>>>
>>>
>>> On 15 Oct 2020, at 12:27, Ilya Maximets wrote:
>>>
>>>> On 10/15/20 11:46 AM, Eelco Chaudron wrote:
>>>>> The flow_lookup() function uses per CPU variables, which must not 
>>>>> be
>>>>> preempted. However, this is fine in the general napi use case 
>>>>> where
>>>>> the local BH is disabled. But, it's also called in the netlink
>>>>> context, which is preemptible. The below patch makes sure that 
>>>>> even
>>>>> in the netlink path, preemption is disabled.
>>>>>
>>>>> In addition, the u64_stats_update_begin() sync point was not 
>>>>> protected,
>>>>> making the sync point part of the per CPU variable fixed this.
>>>>>
>>>>> Fixes: eac87c413bf9 ("net: openvswitch: reorder masks array based 
>>>>> on usage")
>>>>> Reported-by: Juri Lelli <jlelli@redhat.com>
>>>>> Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
>>>>> ---
>>>>> v2: - Add u64_stats_update_begin() sync point protection
>>>>>     - Moved patch to net from net-next branch
>>>>>
>>>>>  net/openvswitch/flow_table.c |   56 
>>>>> +++++++++++++++++++++++++-----------------
>>>>>  net/openvswitch/flow_table.h |    8 +++++-
>>>>>  2 files changed, 39 insertions(+), 25 deletions(-)
>>>>>
>>>>> diff --git a/net/openvswitch/flow_table.c 
>>>>> b/net/openvswitch/flow_table.c
>>>>> index e2235849a57e..d90b4af6f539 100644
>>>>> --- a/net/openvswitch/flow_table.c
>>>>> +++ b/net/openvswitch/flow_table.c
>>>>> @@ -172,7 +172,7 @@ static struct table_instance 
>>>>> *table_instance_alloc(int new_size)
>>>>>
>>>>>  static void __mask_array_destroy(struct mask_array *ma)
>>>>>  {
>>>>> -    free_percpu(ma->masks_usage_cntr);
>>>>> +    free_percpu(ma->masks_usage_stats);
>>>>>      kfree(ma);
>>>>>  }
>>>>>
>>>>> @@ -196,15 +196,15 @@ static void 
>>>>> tbl_mask_array_reset_counters(struct mask_array *ma)
>>>>>          ma->masks_usage_zero_cntr[i] = 0;
>>>>>
>>>>>          for_each_possible_cpu(cpu) {
>>>>> -            u64 *usage_counters = 
>>>>> per_cpu_ptr(ma->masks_usage_cntr,
>>>>> -                              cpu);
>>>>> +            struct mask_array_stats *stats;
>>>>>              unsigned int start;
>>>>>              u64 counter;
>>>>>
>>>>> +            stats = per_cpu_ptr(ma->masks_usage_stats, 
>>>>> cpu);
>>>>>              do {
>>>>> -                start = 
>>>>> u64_stats_fetch_begin_irq(&ma->syncp);
>>>>> -                counter = usage_counters[i];
>>>>> -            } while 
>>>>> (u64_stats_fetch_retry_irq(&ma->syncp, start));
>>>>> +                start = 
>>>>> u64_stats_fetch_begin_irq(&stats->syncp);
>>>>> +                counter = stats->usage_cntrs[i];
>>>>> +            } while 
>>>>> (u64_stats_fetch_retry_irq(&stats->syncp, start));
>>>>>
>>>>>              ma->masks_usage_zero_cntr[i] += counter;
>>>>>          }
>>>>> @@ -227,9 +227,10 @@ static struct mask_array 
>>>>> *tbl_mask_array_alloc(int size)
>>>>>                           sizeof(struct 
>>>>> sw_flow_mask *) *
>>>>>                           size);
>>>>>
>>>>> -    new->masks_usage_cntr = __alloc_percpu(sizeof(u64) * size,
>>>>> -                           
>>>>> __alignof__(u64));
>>>>> -    if (!new->masks_usage_cntr) {
>>>>> +    new->masks_usage_stats = __alloc_percpu(sizeof(struct 
>>>>> mask_array_stats) +
>>>>> +                        sizeof(u64) * 
>>>>> size,
>>>>> +                        __alignof__(u64));
>>>>> +    if (!new->masks_usage_stats) {
>>>>>          kfree(new);
>>>>>          return NULL;
>>>>>      }
>>>>> @@ -732,7 +733,7 @@ static struct sw_flow *flow_lookup(struct 
>>>>> flow_table *tbl,
>>>>>                     u32 *n_cache_hit,
>>>>>                     u32 *index)
>>>>>  {
>>>>> -    u64 *usage_counters = this_cpu_ptr(ma->masks_usage_cntr);
>>>>> +    struct mask_array_stats *stats = 
>>>>> this_cpu_ptr(ma->masks_usage_stats);
>>>>>      struct sw_flow *flow;
>>>>>      struct sw_flow_mask *mask;
>>>>>      int i;
>>>>> @@ -742,9 +743,9 @@ static struct sw_flow *flow_lookup(struct 
>>>>> flow_table *tbl,
>>>>>          if (mask) {
>>>>>              flow = masked_flow_lookup(ti, key, mask, 
>>>>> n_mask_hit);
>>>>>              if (flow) {
>>>>> -                
>>>>> u64_stats_update_begin(&ma->syncp);
>>>>> -                usage_counters[*index]++;
>>>>> -                u64_stats_update_end(&ma->syncp);
>>>>> +                
>>>>> u64_stats_update_begin(&stats->syncp);
>>>>> +                stats->usage_cntrs[*index]++;
>>>>> +                
>>>>> u64_stats_update_end(&stats->syncp);
>>>>>                  (*n_cache_hit)++;
>>>>>                  return flow;
>>>>>              }
>>>>> @@ -763,9 +764,9 @@ static struct sw_flow *flow_lookup(struct 
>>>>> flow_table *tbl,
>>>>>          flow = masked_flow_lookup(ti, key, mask, 
>>>>> n_mask_hit);
>>>>>          if (flow) { /* Found */
>>>>>              *index = i;
>>>>> -            u64_stats_update_begin(&ma->syncp);
>>>>> -            usage_counters[*index]++;
>>>>> -            u64_stats_update_end(&ma->syncp);
>>>>> +            u64_stats_update_begin(&stats->syncp);
>>>>> +            stats->usage_cntrs[*index]++;
>>>>> +            u64_stats_update_end(&stats->syncp);
>>>>>              return flow;
>>>>>          }
>>>>>      }
>>>>> @@ -851,9 +852,17 @@ struct sw_flow *ovs_flow_tbl_lookup(struct 
>>>>> flow_table *tbl,
>>>>>      struct mask_array *ma = 
>>>>> rcu_dereference_ovsl(tbl->mask_array);
>>>>>      u32 __always_unused n_mask_hit;
>>>>>      u32 __always_unused n_cache_hit;
>>>>> +    struct sw_flow *flow;
>>>>>      u32 index = 0;
>>>>>
>>>>> -    return flow_lookup(tbl, ti, ma, key, &n_mask_hit, 
>>>>> &n_cache_hit, &index);
>>>>> +    /* This function gets called trough the netlink interface 
>>>>> and therefore
>>>>> +     * is preemptible. However, flow_lookup() function needs 
>>>>> to be called
>>>>> +     * with preemption disabled due to CPU specific 
>>>>> variables.
>>>>
>>>> Is it possible to add some kind of assertion inside flow_lookup() 
>>>> to avoid
>>>> this kind of issues in the future?
>>>
>>> We could do something like WARN_ON_ONCE(preemptible()) but do not 
>>> think such check should be added to the fast path.
>>
>> I meant something like lockdep_assert_preemption_disabled().  This 
>> will not
>> impact fast path if CONFIG_PROVE_LOCKING disabled, but will allow to 
>> catch
>> issues during development.
>
> To be clear, I just mean that this could be compiled conditionally 
> under some
> debugging config.  Do not know which of existing configs might be 
> used, though.

Looked at the debug flags, but there is not really one that makes sense 
to use. Also using a general one like DEBUG_MISC will probably no help, 
as none would probably run OVS tests with this flag enabled. So for now 
I would leave it as is unless someone has a better idea…

>>>
>>>> It might be also good to update the comment for flow_lookup() 
>>>> function itself.
>>>
>>> Good idea, will do this in a v3
>>>
>>>>> +     */
>>>>> +    local_bh_disable();
>>>>> +    flow = flow_lookup(tbl, ti, ma, key, &n_mask_hit, 
>>>>> &n_cache_hit, &index);
>>>>> +    local_bh_enable();
>>>>> +    return flow;
>>>>>  }
>>>>>
>>>>>  struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table 
>>>>> *tbl,
>>>>> @@ -1109,7 +1118,6 @@ void ovs_flow_masks_rebalance(struct 
>>>>> flow_table *table)
>>>>>
>>>>>      for (i = 0; i < ma->max; i++)  {
>>>>>          struct sw_flow_mask *mask;
>>>>> -        unsigned int start;
>>>>>          int cpu;
>>>>>
>>>>>          mask = rcu_dereference_ovsl(ma->masks[i]);
>>>>> @@ -1120,14 +1128,16 @@ void ovs_flow_masks_rebalance(struct 
>>>>> flow_table *table)
>>>>>          masks_and_count[i].counter = 0;
>>>>>
>>>>>          for_each_possible_cpu(cpu) {
>>>>> -            u64 *usage_counters = 
>>>>> per_cpu_ptr(ma->masks_usage_cntr,
>>>>> -                              cpu);
>>>>> +            struct mask_array_stats *stats;
>>>>> +            unsigned int start;
>>>>>              u64 counter;
>>>>>
>>>>> +            stats = per_cpu_ptr(ma->masks_usage_stats, 
>>>>> cpu);
>>>>>              do {
>>>>> -                start = 
>>>>> u64_stats_fetch_begin_irq(&ma->syncp);
>>>>> -                counter = usage_counters[i];
>>>>> -            } while 
>>>>> (u64_stats_fetch_retry_irq(&ma->syncp, start));
>>>>> +                start = 
>>>>> u64_stats_fetch_begin_irq(&stats->syncp);
>>>>> +                counter = stats->usage_cntrs[i];
>>>>> +            } while 
>>>>> (u64_stats_fetch_retry_irq(&stats->syncp,
>>>>> +                               
>>>>> start));
>>>>>
>>>>>              masks_and_count[i].counter += counter;
>>>>>          }
>>>>> diff --git a/net/openvswitch/flow_table.h 
>>>>> b/net/openvswitch/flow_table.h
>>>>> index 6e7d4ac59353..43144396e192 100644
>>>>> --- a/net/openvswitch/flow_table.h
>>>>> +++ b/net/openvswitch/flow_table.h
>>>>> @@ -38,12 +38,16 @@ struct mask_count {
>>>>>      u64 counter;
>>>>>  };
>>>>>
>>>>> +struct mask_array_stats {
>>>>> +    struct u64_stats_sync syncp;
>>>>> +    u64 usage_cntrs[];
>>>>> +};
>>>>> +
>>>>>  struct mask_array {
>>>>>      struct rcu_head rcu;
>>>>>      int count, max;
>>>>> -    u64 __percpu *masks_usage_cntr;
>>>>> +    struct mask_array_stats __percpu *masks_usage_stats;
>>>>>      u64 *masks_usage_zero_cntr;
>>>>> -    struct u64_stats_sync syncp;
>>>>>      struct sw_flow_mask __rcu *masks[];
>>>>>  };
>>>>>
>>>>>
>>>>> _______________________________________________
>>>>> dev mailing list
>>>>> dev@openvswitch.org
>>>>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>>>>>
>>>
>>


  reply	other threads:[~2020-10-15 12:43 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-10-15  9:46 [PATCH net v2] net: openvswitch: fix to make sure flow_lookup() is not preempted Eelco Chaudron
2020-10-15 10:27 ` [ovs-dev] " Ilya Maximets
2020-10-15 10:54   ` Eelco Chaudron
2020-10-15 11:04     ` Ilya Maximets
2020-10-15 11:22       ` Ilya Maximets
2020-10-15 12:42         ` Eelco Chaudron [this message]
2020-10-15 12:34 ` Sebastian Andrzej Siewior
2020-10-15 17:06   ` Eelco Chaudron

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=A93D105E-E051-4DF2-B449-509CF1CA5E76@redhat.com \
    --to=echaudro@redhat.com \
    --cc=bigeasy@linutronix.de \
    --cc=davem@davemloft.net \
    --cc=dev@openvswitch.org \
    --cc=i.maximets@ovn.org \
    --cc=jlelli@redhat.com \
    --cc=kuba@kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox