All of lore.kernel.org
 help / color / mirror / Atom feed
From: Fabiano Rosas <farosas@suse.de>
To: Hao Xiang <hao.xiang@bytedance.com>,
	peter.maydell@linaro.org, quintela@redhat.com, peterx@redhat.com,
	marcandre.lureau@redhat.com, bryan.zhang@bytedance.com,
	qemu-devel@nongnu.org
Cc: Hao Xiang <hao.xiang@bytedance.com>
Subject: Re: [PATCH v2 08/20] util/dsa: Implement DSA task enqueue and dequeue.
Date: Tue, 12 Dec 2023 13:10:40 -0300	[thread overview]
Message-ID: <87msufwge7.fsf@suse.de> (raw)
In-Reply-To: <20231114054032.1192027-9-hao.xiang@bytedance.com>

Hao Xiang <hao.xiang@bytedance.com> writes:

> * Use a safe thread queue for DSA task enqueue/dequeue.
> * Implement DSA task submission.
> * Implement DSA batch task submission.
>
> Signed-off-by: Hao Xiang <hao.xiang@bytedance.com>
> ---
>  include/qemu/dsa.h |  35 ++++++++
>  util/dsa.c         | 196 +++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 231 insertions(+)
>
> diff --git a/include/qemu/dsa.h b/include/qemu/dsa.h
> index 30246b507e..23f55185be 100644
> --- a/include/qemu/dsa.h
> +++ b/include/qemu/dsa.h
> @@ -12,6 +12,41 @@
>  #include <linux/idxd.h>
>  #include "x86intrin.h"
>  
> +enum dsa_task_type {

Our coding style requires CamelCase for enums and typedef'ed structures.

> +    DSA_TASK = 0,
> +    DSA_BATCH_TASK
> +};
> +
> +enum dsa_task_status {
> +    DSA_TASK_READY = 0,
> +    DSA_TASK_PROCESSING,
> +    DSA_TASK_COMPLETION
> +};
> +
> +typedef void (*buffer_zero_dsa_completion_fn)(void *);

We don't really need the "buffer_zero" mention in any of this
code. Simply dsa_batch_task or batch_task would suffice.

> +
> +typedef struct buffer_zero_batch_task {
> +    struct dsa_hw_desc batch_descriptor;
> +    struct dsa_hw_desc *descriptors;
> +    struct dsa_completion_record batch_completion __attribute__((aligned(32)));
> +    struct dsa_completion_record *completions;
> +    struct dsa_device_group *group;
> +    struct dsa_device *device;
> +    buffer_zero_dsa_completion_fn completion_callback;
> +    QemuSemaphore sem_task_complete;
> +    enum dsa_task_type task_type;
> +    enum dsa_task_status status;
> +    bool *results;
> +    int batch_size;
> +    QSIMPLEQ_ENTRY(buffer_zero_batch_task) entry;
> +} buffer_zero_batch_task;

I see data specific to this implementation and data coming from the
library, maybe these would be better organized in two separate
structures with the qemu-specific having a pointer to the generic
one. Looking ahead in the series, there seems to be migration data
coming into this as well.

> +
> +#else
> +
> +struct buffer_zero_batch_task {
> +    bool *results;
> +};
> +
>  #endif
>  
>  /**
> diff --git a/util/dsa.c b/util/dsa.c
> index 8edaa892ec..f82282ce99 100644
> --- a/util/dsa.c
> +++ b/util/dsa.c
> @@ -245,6 +245,200 @@ dsa_device_group_get_next_device(struct dsa_device_group *group)
>      return &group->dsa_devices[current];
>  }
>  
> +/**
> + * @brief Empties out the DSA task queue.
> + *
> + * @param group A pointer to the DSA device group.
> + */
> +static void
> +dsa_empty_task_queue(struct dsa_device_group *group)
> +{
> +    qemu_mutex_lock(&group->task_queue_lock);
> +    dsa_task_queue *task_queue = &group->task_queue;
> +    while (!QSIMPLEQ_EMPTY(task_queue)) {
> +        QSIMPLEQ_REMOVE_HEAD(task_queue, entry);
> +    }
> +    qemu_mutex_unlock(&group->task_queue_lock);
> +}
> +
> +/**
> + * @brief Adds a task to the DSA task queue.
> + *
> + * @param group A pointer to the DSA device group.
> + * @param context A pointer to the DSA task to enqueue.
> + *
> + * @return int Zero if successful, otherwise a proper error code.
> + */
> +static int
> +dsa_task_enqueue(struct dsa_device_group *group,
> +                 struct buffer_zero_batch_task *task)
> +{
> +    dsa_task_queue *task_queue = &group->task_queue;
> +    QemuMutex *task_queue_lock = &group->task_queue_lock;
> +    QemuCond *task_queue_cond = &group->task_queue_cond;
> +
> +    bool notify = false;
> +
> +    qemu_mutex_lock(task_queue_lock);
> +
> +    if (!group->running) {
> +        fprintf(stderr, "DSA: Tried to queue task to stopped device queue\n");
> +        qemu_mutex_unlock(task_queue_lock);
> +        return -1;
> +    }
> +
> +    // The queue is empty. This enqueue operation is a 0->1 transition.
> +    if (QSIMPLEQ_EMPTY(task_queue))
> +        notify = true;
> +
> +    QSIMPLEQ_INSERT_TAIL(task_queue, task, entry);
> +
> +    // We need to notify the waiter for 0->1 transitions.
> +    if (notify)
> +        qemu_cond_signal(task_queue_cond);
> +
> +    qemu_mutex_unlock(task_queue_lock);
> +
> +    return 0;
> +}
> +
> +/**
> + * @brief Takes a DSA task out of the task queue.
> + *
> + * @param group A pointer to the DSA device group.
> + * @return buffer_zero_batch_task* The DSA task being dequeued.
> + */
> +__attribute__((unused))
> +static struct buffer_zero_batch_task *
> +dsa_task_dequeue(struct dsa_device_group *group)
> +{
> +    struct buffer_zero_batch_task *task = NULL;
> +    dsa_task_queue *task_queue = &group->task_queue;
> +    QemuMutex *task_queue_lock = &group->task_queue_lock;
> +    QemuCond *task_queue_cond = &group->task_queue_cond;
> +
> +    qemu_mutex_lock(task_queue_lock);
> +
> +    while (true) {
> +        if (!group->running)
> +            goto exit;
> +        task = QSIMPLEQ_FIRST(task_queue);
> +        if (task != NULL) {
> +            break;
> +        }
> +        qemu_cond_wait(task_queue_cond, task_queue_lock);
> +    }
> +
> +    QSIMPLEQ_REMOVE_HEAD(task_queue, entry);
> +
> +exit:
> +    qemu_mutex_unlock(task_queue_lock);
> +    return task;
> +}
> +
> +/**
> + * @brief Submits a DSA work item to the device work queue.
> + *
> + * @param wq A pointer to the DSA work queue's device memory.
> + * @param descriptor A pointer to the DSA work item descriptor.
> + *
> + * @return Zero if successful, non-zero otherwise.
> + */
> +static int
> +submit_wi_int(void *wq, struct dsa_hw_desc *descriptor)
> +{
> +    uint64_t retry = 0;
> +
> +    _mm_sfence();
> +
> +    while (true) {
> +        if (_enqcmd(wq, descriptor) == 0) {
> +            break;
> +        }
> +        retry++;
> +        if (retry > max_retry_count) {

'max_retry_count' is UINT64_MAX so 'retry' will wrap around.

> +            fprintf(stderr, "Submit work retry %lu times.\n", retry);
> +            exit(1);

Is this not the case where we'd fallback to the CPU?

You should not exit() here, but return non-zero as the documentation
mentions and the callers expect.

> +        }
> +    }
> +
> +    return 0;
> +}
> +
> +/**
> + * @brief Synchronously submits a DSA work item to the
> + *        device work queue.
> + *
> + * @param wq A pointer to the DSA worjk queue's device memory.
> + * @param descriptor A pointer to the DSA work item descriptor.
> + *
> + * @return int Zero if successful, non-zero otherwise.
> + */
> +__attribute__((unused))
> +static int
> +submit_wi(void *wq, struct dsa_hw_desc *descriptor)
> +{
> +    return submit_wi_int(wq, descriptor);
> +}
> +
> +/**
> + * @brief Asynchronously submits a DSA work item to the
> + *        device work queue.
> + *
> + * @param task A pointer to the buffer zero task.
> + *
> + * @return int Zero if successful, non-zero otherwise.
> + */
> +__attribute__((unused))
> +static int
> +submit_wi_async(struct buffer_zero_batch_task *task)
> +{
> +    struct dsa_device_group *device_group = task->group;
> +    struct dsa_device *device_instance = task->device;
> +    int ret;
> +
> +    assert(task->task_type == DSA_TASK);
> +
> +    task->status = DSA_TASK_PROCESSING;
> +
> +    ret = submit_wi_int(device_instance->work_queue,
> +                        &task->descriptors[0]);
> +    if (ret != 0)
> +        return ret;
> +
> +    return dsa_task_enqueue(device_group, task);
> +}
> +
> +/**
> + * @brief Asynchronously submits a DSA batch work item to the
> + *        device work queue.
> + *
> + * @param batch_task A pointer to the batch buffer zero task.
> + *
> + * @return int Zero if successful, non-zero otherwise.
> + */
> +__attribute__((unused))
> +static int
> +submit_batch_wi_async(struct buffer_zero_batch_task *batch_task)
> +{
> +    struct dsa_device_group *device_group = batch_task->group;
> +    struct dsa_device *device_instance = batch_task->device;
> +    int ret;
> +
> +    assert(batch_task->task_type == DSA_BATCH_TASK);
> +    assert(batch_task->batch_descriptor.desc_count <= batch_task->batch_size);
> +    assert(batch_task->status == DSA_TASK_READY);
> +
> +    batch_task->status = DSA_TASK_PROCESSING;
> +
> +    ret = submit_wi_int(device_instance->work_queue,
> +                        &batch_task->batch_descriptor);
> +    if (ret != 0)
> +        return ret;
> +
> +    return dsa_task_enqueue(device_group, batch_task);
> +}

At this point in the series submit_wi_async() and
submit_batch_wi_async() look the same to me without the asserts. Can't
we consolidate them?

There's also the fact that both functions receive a _batch_ task but one
is supposed to work in batches and the other is not. That could be
solved by renaming the structure I guess.

> +
>  /**
>   * @brief Check if DSA is running.
>   *
> @@ -301,6 +495,8 @@ void dsa_stop(void)
>      if (!group->running) {
>          return;
>      }
> +
> +    dsa_empty_task_queue(group);
>  }
>  
>  /**


  reply	other threads:[~2023-12-12 16:12 UTC|newest]

Thread overview: 51+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-11-14  5:40 [PATCH v2 00/20] Use Intel DSA accelerator to offload zero page checking in multifd live migration Hao Xiang
2023-11-14  5:40 ` [PATCH v2 01/20] multifd: Add capability to enable/disable zero_page Hao Xiang
2023-11-16 15:15   ` Fabiano Rosas
2023-11-14  5:40 ` [PATCH v2 02/20] multifd: Support for zero pages transmission Hao Xiang
2023-11-14  5:40 ` [PATCH v2 03/20] multifd: Zero " Hao Xiang
2023-12-18  2:43   ` Wang, Lei
2023-11-14  5:40 ` [PATCH v2 04/20] So we use multifd to transmit zero pages Hao Xiang
2023-11-16 15:14   ` Fabiano Rosas
2024-01-23  4:28     ` [External] " Hao Xiang
2024-01-25 21:55       ` Hao Xiang
2024-01-25 23:14         ` Fabiano Rosas
2024-01-25 23:46           ` Hao Xiang
2023-11-14  5:40 ` [PATCH v2 05/20] meson: Introduce new instruction set enqcmd to the build system Hao Xiang
2023-12-11 15:41   ` Fabiano Rosas
2023-12-16  0:26     ` [External] " Hao Xiang
2023-11-14  5:40 ` [PATCH v2 06/20] util/dsa: Add dependency idxd Hao Xiang
2023-11-14  5:40 ` [PATCH v2 07/20] util/dsa: Implement DSA device start and stop logic Hao Xiang
2023-12-11 21:28   ` Fabiano Rosas
2023-12-19  6:41     ` [External] " Hao Xiang
2023-12-19 13:18       ` Fabiano Rosas
2023-12-27  6:00         ` Hao Xiang
2023-11-14  5:40 ` [PATCH v2 08/20] util/dsa: Implement DSA task enqueue and dequeue Hao Xiang
2023-12-12 16:10   ` Fabiano Rosas [this message]
2023-12-27  0:07     ` [External] " Hao Xiang
2023-11-14  5:40 ` [PATCH v2 09/20] util/dsa: Implement DSA task asynchronous completion thread model Hao Xiang
2023-12-12 19:36   ` Fabiano Rosas
2023-12-18  3:11   ` Wang, Lei
2023-12-18 18:57     ` [External] " Hao Xiang
2023-12-19  1:33       ` Wang, Lei
2023-12-19  5:12         ` Hao Xiang
2023-11-14  5:40 ` [PATCH v2 10/20] util/dsa: Implement zero page checking in DSA task Hao Xiang
2023-11-14  5:40 ` [PATCH v2 11/20] util/dsa: Implement DSA task asynchronous submission and wait for completion Hao Xiang
2023-12-13 14:01   ` Fabiano Rosas
2023-12-27  6:26     ` [External] " Hao Xiang
2023-11-14  5:40 ` [PATCH v2 12/20] migration/multifd: Add new migration option for multifd DSA offloading Hao Xiang
2023-12-11 19:44   ` Fabiano Rosas
2023-12-18 18:34     ` [External] " Hao Xiang
2023-12-18  3:12   ` Wang, Lei
2023-11-14  5:40 ` [PATCH v2 13/20] migration/multifd: Prepare to introduce DSA acceleration on the multifd path Hao Xiang
2023-12-18  3:20   ` Wang, Lei
2023-11-14  5:40 ` [PATCH v2 14/20] migration/multifd: Enable DSA offloading in multifd sender path Hao Xiang
2023-11-14  5:40 ` [PATCH v2 15/20] migration/multifd: Add test hook to set normal page ratio Hao Xiang
2023-11-14  5:40 ` [PATCH v2 16/20] migration/multifd: Enable set normal page ratio test hook in multifd Hao Xiang
2023-11-14  5:40 ` [PATCH v2 17/20] migration/multifd: Add migration option set packet size Hao Xiang
2023-11-14  5:40 ` [PATCH v2 18/20] migration/multifd: Enable set packet size migration option Hao Xiang
2023-12-13 17:33   ` Fabiano Rosas
2024-01-03 20:04     ` [External] " Hao Xiang
2023-11-14  5:40 ` [PATCH v2 19/20] util/dsa: Add unit test coverage for Intel DSA task submission and completion Hao Xiang
2023-11-14  5:40 ` [PATCH v2 20/20] migration/multifd: Add integration tests for multifd with Intel DSA offloading Hao Xiang
2023-11-15 17:43 ` [PATCH v2 00/20] Use Intel DSA accelerator to offload zero page checking in multifd live migration Elena Ufimtseva
2023-11-15 19:37   ` [External] " Hao Xiang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87msufwge7.fsf@suse.de \
    --to=farosas@suse.de \
    --cc=bryan.zhang@bytedance.com \
    --cc=hao.xiang@bytedance.com \
    --cc=marcandre.lureau@redhat.com \
    --cc=peter.maydell@linaro.org \
    --cc=peterx@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=quintela@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.