From: Ilya Maximets <i.maximets@ovn.org>
To: Eelco Chaudron <echaudro@redhat.com>, netdev@vger.kernel.org
Cc: i.maximets@ovn.org, dev@openvswitch.org, bigeasy@linutronix.de,
jlelli@redhat.com, kuba@kernel.org, pabeni@redhat.com,
davem@davemloft.net
Subject: Re: [ovs-dev] [PATCH net v2] net: openvswitch: fix to make sure flow_lookup() is not preempted
Date: Thu, 15 Oct 2020 12:27:11 +0200 [thread overview]
Message-ID: <7212d390-ecfd-bfa1-97fc-1819c0c1ee1d@ovn.org> (raw)
In-Reply-To: <160275519174.566500.6537031776378218151.stgit@ebuild>
On 10/15/20 11:46 AM, Eelco Chaudron wrote:
> The flow_lookup() function uses per CPU variables, which must not be
> preempted. However, this is fine in the general napi use case where
> the local BH is disabled. But, it's also called in the netlink
> context, which is preemptible. The below patch makes sure that even
> in the netlink path, preemption is disabled.
>
> In addition, the u64_stats_update_begin() sync point was not protected,
> making the sync point part of the per CPU variable fixed this.
>
> Fixes: eac87c413bf9 ("net: openvswitch: reorder masks array based on usage")
> Reported-by: Juri Lelli <jlelli@redhat.com>
> Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
> ---
> v2: - Add u64_stats_update_begin() sync point protection
> - Moved patch to net from net-next branch
>
> net/openvswitch/flow_table.c | 56 +++++++++++++++++++++++++-----------------
> net/openvswitch/flow_table.h | 8 +++++-
> 2 files changed, 39 insertions(+), 25 deletions(-)
>
> diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
> index e2235849a57e..d90b4af6f539 100644
> --- a/net/openvswitch/flow_table.c
> +++ b/net/openvswitch/flow_table.c
> @@ -172,7 +172,7 @@ static struct table_instance *table_instance_alloc(int new_size)
>
> static void __mask_array_destroy(struct mask_array *ma)
> {
> - free_percpu(ma->masks_usage_cntr);
> + free_percpu(ma->masks_usage_stats);
> kfree(ma);
> }
>
> @@ -196,15 +196,15 @@ static void tbl_mask_array_reset_counters(struct mask_array *ma)
> ma->masks_usage_zero_cntr[i] = 0;
>
> for_each_possible_cpu(cpu) {
> - u64 *usage_counters = per_cpu_ptr(ma->masks_usage_cntr,
> - cpu);
> + struct mask_array_stats *stats;
> unsigned int start;
> u64 counter;
>
> + stats = per_cpu_ptr(ma->masks_usage_stats, cpu);
> do {
> - start = u64_stats_fetch_begin_irq(&ma->syncp);
> - counter = usage_counters[i];
> - } while (u64_stats_fetch_retry_irq(&ma->syncp, start));
> + start = u64_stats_fetch_begin_irq(&stats->syncp);
> + counter = stats->usage_cntrs[i];
> + } while (u64_stats_fetch_retry_irq(&stats->syncp, start));
>
> ma->masks_usage_zero_cntr[i] += counter;
> }
> @@ -227,9 +227,10 @@ static struct mask_array *tbl_mask_array_alloc(int size)
> sizeof(struct sw_flow_mask *) *
> size);
>
> - new->masks_usage_cntr = __alloc_percpu(sizeof(u64) * size,
> - __alignof__(u64));
> - if (!new->masks_usage_cntr) {
> + new->masks_usage_stats = __alloc_percpu(sizeof(struct mask_array_stats) +
> + sizeof(u64) * size,
> + __alignof__(u64));
> + if (!new->masks_usage_stats) {
> kfree(new);
> return NULL;
> }
> @@ -732,7 +733,7 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl,
> u32 *n_cache_hit,
> u32 *index)
> {
> - u64 *usage_counters = this_cpu_ptr(ma->masks_usage_cntr);
> + struct mask_array_stats *stats = this_cpu_ptr(ma->masks_usage_stats);
> struct sw_flow *flow;
> struct sw_flow_mask *mask;
> int i;
> @@ -742,9 +743,9 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl,
> if (mask) {
> flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
> if (flow) {
> - u64_stats_update_begin(&ma->syncp);
> - usage_counters[*index]++;
> - u64_stats_update_end(&ma->syncp);
> + u64_stats_update_begin(&stats->syncp);
> + stats->usage_cntrs[*index]++;
> + u64_stats_update_end(&stats->syncp);
> (*n_cache_hit)++;
> return flow;
> }
> @@ -763,9 +764,9 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl,
> flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
> if (flow) { /* Found */
> *index = i;
> - u64_stats_update_begin(&ma->syncp);
> - usage_counters[*index]++;
> - u64_stats_update_end(&ma->syncp);
> + u64_stats_update_begin(&stats->syncp);
> + stats->usage_cntrs[*index]++;
> + u64_stats_update_end(&stats->syncp);
> return flow;
> }
> }
> @@ -851,9 +852,17 @@ struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl,
> struct mask_array *ma = rcu_dereference_ovsl(tbl->mask_array);
> u32 __always_unused n_mask_hit;
> u32 __always_unused n_cache_hit;
> + struct sw_flow *flow;
> u32 index = 0;
>
> - return flow_lookup(tbl, ti, ma, key, &n_mask_hit, &n_cache_hit, &index);
> + /* This function gets called trough the netlink interface and therefore
> + * is preemptible. However, flow_lookup() function needs to be called
> + * with preemption disabled due to CPU specific variables.
Is it possible to add some kind of assertion inside flow_lookup() to avoid
this kind of issues in the future?
It might be also good to update the comment for flow_lookup() function itself.
> + */
> + local_bh_disable();
> + flow = flow_lookup(tbl, ti, ma, key, &n_mask_hit, &n_cache_hit, &index);
> + local_bh_enable();
> + return flow;
> }
>
> struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
> @@ -1109,7 +1118,6 @@ void ovs_flow_masks_rebalance(struct flow_table *table)
>
> for (i = 0; i < ma->max; i++) {
> struct sw_flow_mask *mask;
> - unsigned int start;
> int cpu;
>
> mask = rcu_dereference_ovsl(ma->masks[i]);
> @@ -1120,14 +1128,16 @@ void ovs_flow_masks_rebalance(struct flow_table *table)
> masks_and_count[i].counter = 0;
>
> for_each_possible_cpu(cpu) {
> - u64 *usage_counters = per_cpu_ptr(ma->masks_usage_cntr,
> - cpu);
> + struct mask_array_stats *stats;
> + unsigned int start;
> u64 counter;
>
> + stats = per_cpu_ptr(ma->masks_usage_stats, cpu);
> do {
> - start = u64_stats_fetch_begin_irq(&ma->syncp);
> - counter = usage_counters[i];
> - } while (u64_stats_fetch_retry_irq(&ma->syncp, start));
> + start = u64_stats_fetch_begin_irq(&stats->syncp);
> + counter = stats->usage_cntrs[i];
> + } while (u64_stats_fetch_retry_irq(&stats->syncp,
> + start));
>
> masks_and_count[i].counter += counter;
> }
> diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h
> index 6e7d4ac59353..43144396e192 100644
> --- a/net/openvswitch/flow_table.h
> +++ b/net/openvswitch/flow_table.h
> @@ -38,12 +38,16 @@ struct mask_count {
> u64 counter;
> };
>
> +struct mask_array_stats {
> + struct u64_stats_sync syncp;
> + u64 usage_cntrs[];
> +};
> +
> struct mask_array {
> struct rcu_head rcu;
> int count, max;
> - u64 __percpu *masks_usage_cntr;
> + struct mask_array_stats __percpu *masks_usage_stats;
> u64 *masks_usage_zero_cntr;
> - struct u64_stats_sync syncp;
> struct sw_flow_mask __rcu *masks[];
> };
>
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
next prev parent reply other threads:[~2020-10-15 10:46 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-10-15 9:46 [PATCH net v2] net: openvswitch: fix to make sure flow_lookup() is not preempted Eelco Chaudron
2020-10-15 10:27 ` Ilya Maximets [this message]
2020-10-15 10:54 ` [ovs-dev] " Eelco Chaudron
2020-10-15 11:04 ` Ilya Maximets
2020-10-15 11:22 ` Ilya Maximets
2020-10-15 12:42 ` Eelco Chaudron
2020-10-15 12:34 ` Sebastian Andrzej Siewior
2020-10-15 17:06 ` Eelco Chaudron
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=7212d390-ecfd-bfa1-97fc-1819c0c1ee1d@ovn.org \
--to=i.maximets@ovn.org \
--cc=bigeasy@linutronix.de \
--cc=davem@davemloft.net \
--cc=dev@openvswitch.org \
--cc=echaudro@redhat.com \
--cc=jlelli@redhat.com \
--cc=kuba@kernel.org \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox