From: Martin KaFai Lau <martin.lau@linux.dev>
To: Aditi Ghag <aditi.ghag@isovalent.com>
Cc: kafai@fb.com, sdf@google.com, edumazet@google.com,
Martin KaFai Lau <martin.lau@kernel.org>,
bpf@vger.kernel.org
Subject: Re: [PATCH v2 bpf-next 1/3] bpf: Implement batching in UDP iterator
Date: Tue, 28 Feb 2023 11:58:46 -0800 [thread overview]
Message-ID: <22705f9d-9b94-a4ec-3202-270fef1ed657@linux.dev> (raw)
In-Reply-To: <20230223215311.926899-2-aditi.ghag@isovalent.com>
On 2/23/23 1:53 PM, Aditi Ghag wrote:
> +struct bpf_udp_iter_state {
> + struct udp_iter_state state;
> + unsigned int cur_sk;
> + unsigned int end_sk;
> + unsigned int max_sk;
> + struct sock **batch;
> + bool st_bucket_done;
> +};
> +
> +static unsigned short seq_file_family(const struct seq_file *seq);
> +static int bpf_iter_udp_realloc_batch(struct bpf_udp_iter_state *iter,
> + unsigned int new_batch_sz);
> +
> +static inline bool seq_sk_match(struct seq_file *seq, const struct sock *sk)
> +{
> + unsigned short family = seq_file_family(seq);
> +
> + /* AF_UNSPEC is used as a match all */
> + return ((family == AF_UNSPEC || family == sk->sk_family) &&
> + net_eq(sock_net(sk), seq_file_net(seq)));
> +}
> +
> +static struct sock *bpf_iter_udp_batch(struct seq_file *seq)
> +{
> + struct bpf_udp_iter_state *iter = seq->private;
> + struct udp_iter_state *state = &iter->state;
> + struct net *net = seq_file_net(seq);
> + struct udp_seq_afinfo *afinfo = state->bpf_seq_afinfo;
> + struct udp_table *udptable;
> + struct sock *first_sk = NULL;
> + struct sock *sk;
> + unsigned int bucket_sks = 0;
> + bool first;
> + bool resized = false;
> +
> + /* The current batch is done, so advance the bucket. */
> + if (iter->st_bucket_done)
> + state->bucket++;
> +
> + udptable = udp_get_table_afinfo(afinfo, net);
> +
> +again:
> + /* New batch for the next bucket.
> + * Iterate over the hash table to find a bucket with sockets matching
> + * the iterator attributes, and return the first matching socket from
> + * the bucket. The remaining matched sockets from the bucket are batched
> + * before releasing the bucket lock. This allows BPF programs that are
> + * called in seq_show to acquire the bucket lock if needed.
> + */
> + iter->cur_sk = 0;
> + iter->end_sk = 0;
> + iter->st_bucket_done = false;
> + first = true;
> +
> + for (; state->bucket <= udptable->mask; state->bucket++) {
> + struct udp_hslot *hslot = &udptable->hash[state->bucket];
Since it is mostly separated from the proc's udp-seq-file now, may as well
iterate the udptable->hash"2" which is hashed by both addr and port such that
each batch should be smaller.
> +
> + if (hlist_empty(&hslot->head))
> + continue;
> +
> + spin_lock_bh(&hslot->lock);
> + sk_for_each(sk, &hslot->head) {
> + if (seq_sk_match(seq, sk)) {
> + if (first) {
> + first_sk = sk;
> + first = false;
> + }
> + if (iter->end_sk < iter->max_sk) {
> + sock_hold(sk);
> + iter->batch[iter->end_sk++] = sk;
> + }
> + bucket_sks++;
> + }
> + }
> + spin_unlock_bh(&hslot->lock);
> + if (first_sk)
> + break;
> + }
> +
> + /* All done: no batch made. */
> + if (!first_sk)
> + return NULL;
I think first_sk and bucket_sks need to be reset on the "again" case also?
If bpf_iter_udp_seq_stop() is called before a batch has been fully processed by
the bpf prog in ".show", how does the next bpf_iter_udp_seq_start() continue
from where it left off? The bpf_tcp_iter remembers the bucket and the
offset-in-this-bucket. I think bpf_udp_iter can do something similar.
> +
> + if (iter->end_sk == bucket_sks) {
> + /* Batching is done for the current bucket; return the first
> + * socket to be iterated from the batch.
> + */
> + iter->st_bucket_done = true;
> + return first_sk;
> + }
> + if (!resized && !bpf_iter_udp_realloc_batch(iter, bucket_sks * 3 / 2)) {
> + resized = true;
> + /* Go back to the previous bucket to resize its batch. */
> + state->bucket--;
> + goto again;
> + }
> + return first_sk;
> +}
> +
[ ... ]
> static int bpf_iter_init_udp(void *priv_data, struct bpf_iter_aux_info *aux)
> {
> - struct udp_iter_state *st = priv_data;
> + struct bpf_udp_iter_state *iter = priv_data;
> + struct udp_iter_state *st = &iter->state;
> struct udp_seq_afinfo *afinfo;
> int ret;
>
> @@ -3427,24 +3623,34 @@ static int bpf_iter_init_udp(void *priv_data, struct bpf_iter_aux_info *aux)
> afinfo->udp_table = NULL;
> st->bpf_seq_afinfo = afinfo;
Is bpf_seq_afinfo still needed in 'struct udp_iter_state'? Can it be removed?
> ret = bpf_iter_init_seq_net(priv_data, aux);
> - if (ret)
> + if (ret) {
> kfree(afinfo);
> + return ret;
> + }
> + ret = bpf_iter_udp_realloc_batch(iter, INIT_BATCH_SZ);
> + if (ret) {
> + bpf_iter_fini_seq_net(priv_data);
> + return ret;
> + }
> + iter->cur_sk = 0;
> + iter->end_sk = 0;
> +
> return ret;
> }
>
> static void bpf_iter_fini_udp(void *priv_data)
> {
> - struct udp_iter_state *st = priv_data;
> + struct bpf_udp_iter_state *iter = priv_data;
>
> - kfree(st->bpf_seq_afinfo);
> bpf_iter_fini_seq_net(priv_data);
> + kfree(iter->batch);
kvfree
next prev parent reply other threads:[~2023-02-28 19:58 UTC|newest]
Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-02-23 21:53 [PATCH v2 bpf-next 0/3]: Add socket destroy capability Aditi Ghag
2023-02-23 21:53 ` [PATCH v2 bpf-next 1/3] bpf: Implement batching in UDP iterator Aditi Ghag
2023-02-24 22:32 ` Stanislav Fomichev
2023-02-28 20:32 ` Martin KaFai Lau
2023-02-28 20:52 ` Stanislav Fomichev
2023-02-28 19:58 ` Martin KaFai Lau [this message]
2023-03-01 2:40 ` Aditi Ghag
2023-03-02 6:43 ` Martin KaFai Lau
2023-02-23 21:53 ` [PATCH v2 bpf-next 2/3] bpf: Add bpf_sock_destroy kfunc Aditi Ghag
2023-02-24 22:35 ` Stanislav Fomichev
2023-02-27 14:56 ` Aditi Ghag
2023-02-27 15:32 ` Aditi Ghag
2023-02-27 17:30 ` Stanislav Fomichev
2023-02-28 22:55 ` Martin KaFai Lau
2023-03-16 22:37 ` Aditi Ghag
2023-03-21 18:49 ` Aditi Ghag
2023-02-23 21:53 ` [PATCH v2 bpf-next 3/3] selftests/bpf: Add tests for bpf_sock_destroy Aditi Ghag
2023-02-24 22:40 ` Stanislav Fomichev
2023-02-27 19:37 ` Andrii Nakryiko
2023-03-03 16:00 ` Aditi Ghag
2023-02-28 23:08 ` Martin KaFai Lau
2023-03-01 2:17 ` Aditi Ghag
2023-03-02 7:06 ` Martin KaFai Lau
2023-03-02 20:52 ` Aditi Ghag
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=22705f9d-9b94-a4ec-3202-270fef1ed657@linux.dev \
--to=martin.lau@linux.dev \
--cc=aditi.ghag@isovalent.com \
--cc=bpf@vger.kernel.org \
--cc=edumazet@google.com \
--cc=kafai@fb.com \
--cc=martin.lau@kernel.org \
--cc=sdf@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.