From: Paolo Bonzini <pbonzini@redhat.com>
To: mrhines@linux.vnet.ibm.com
Cc: aliguori@us.ibm.com, quintela@redhat.com, qemu-devel@nongnu.org,
owasserm@redhat.com, abali@us.ibm.com, mrhines@us.ibm.com,
gokul@us.ibm.com, chegu_vinod@hp.com, knoel@redhat.com
Subject: Re: [Qemu-devel] [PATCH v2 4/8] rdma: unpin support
Date: Mon, 01 Jul 2013 14:04:24 +0200 [thread overview]
Message-ID: <51D17048.7080609@redhat.com> (raw)
In-Reply-To: <1372449603-20431-5-git-send-email-mrhines@linux.vnet.ibm.com>
Il 28/06/2013 21:59, mrhines@linux.vnet.ibm.com ha scritto:
> +/*
> + * Perform a non-optimized memory unregistration after every transfer
> + * for demonsration purposes, only if pin-all is not requested.
> + *
> + * Potential optimizations:
> + * 1. Start a new thread to run this function continuously
> + - for bit clearing
> + - and for receipt of unregister messages
> + * 2. Use an LRU.
> + * 3. Use workload hints.
> + */
> +#ifdef RDMA_UNREGISTRATION_EXAMPLE
> +static int qemu_rdma_unregister_waiting(RDMAContext *rdma)
> +{
> + while (rdma->unregistrations[rdma->unregister_current]) {
> + int ret;
> + uint64_t wr_id = rdma->unregistrations[rdma->unregister_current];
> + uint64_t chunk =
> + (wr_id & RDMA_WRID_CHUNK_MASK) >> RDMA_WRID_CHUNK_SHIFT;
> + uint64_t index =
> + (wr_id & RDMA_WRID_BLOCK_MASK) >> RDMA_WRID_BLOCK_SHIFT;
> + RDMALocalBlock *block =
> + &(rdma->local_ram_blocks.block[index]);
> + RDMARegister reg = { .current_index = index };
> + RDMAControlHeader resp = { .type = RDMA_CONTROL_UNREGISTER_FINISHED,
> + };
> + RDMAControlHeader head = { .len = sizeof(RDMARegister),
> + .type = RDMA_CONTROL_UNREGISTER_REQUEST,
> + .repeat = 1,
> + };
> +
> + DDPRINTF("Processing unregister for chunk: %" PRIu64 " at position %d\n",
> + chunk, rdma->unregister_current);
> +
> + rdma->unregistrations[rdma->unregister_current] = 0;
> + rdma->unregister_current++;
> +
> + if (rdma->unregister_current == RDMA_SIGNALED_SEND_MAX) {
> + rdma->unregister_current = 0;
> + }
> +
> + DDPRINTF("Sending unregister for chunk: %" PRIu64 "\n", chunk);
> +
> + clear_bit(chunk, block->unregister_bitmap);
The chunk is still registered at this point, shouldn't it be after
the ibv_dereg_mr or something like that?
> + if (test_bit(chunk, block->transit_bitmap)) {
> + DDPRINTF("Cannot unregister inflight chunk: %" PRIu64 "\n", chunk);
> + continue;
> + }
This was not clear from your answer: who exactly will unregister this
chunk? Why not call the 15 lines below this one also at this point:
+ if (wr_id == RDMA_WRID_RDMA_WRITE) {
+ uint64_t chunk =
+ (wc.wr_id & RDMA_WRID_CHUNK_MASK) >> RDMA_WRID_CHUNK_SHIFT;
+ uint64_t index =
+ (wc.wr_id & RDMA_WRID_BLOCK_MASK) >> RDMA_WRID_BLOCK_SHIFT;
+ RDMALocalBlock *block = &(rdma->local_ram_blocks.block[index]);
+
+ DDDPRINTF("completions %s (%" PRId64 ") left %d, "
+ "block %" PRIu64 ", chunk: %" PRIu64 "\n",
+ print_wrid(wr_id), wr_id, rdma->nb_sent, index, chunk);
+
+ clear_bit(chunk, block->transit_bitmap);
+
+ if (rdma->nb_sent > 0) {
+ rdma->nb_sent--;
+ }
?
> +
> + ret = ibv_dereg_mr(block->pmr[chunk]);
> + block->pmr[chunk] = NULL;
> + block->remote_keys[chunk] = 0;
> +
> + if (ret != 0) {
> + perror("unregistration chunk failed");
> + return -ret;
> + }
> + rdma->total_registrations--;
> +
> + reg.key.chunk = chunk;
> + register_to_network(®);
> + ret = qemu_rdma_exchange_send(rdma, &head, (uint8_t *) ®,
> + &resp, NULL, NULL);
> + if (ret < 0) {
> + return ret;
> + }
> +
> + DDPRINTF("Unregister for chunk: %" PRIu64 " complete.\n", chunk);
> + }
> +
> + return 0;
> +}
> +
> +/*
> + * Set bit for unregistration in the next iteration.
> + * We cannot transmit right here, but will unpin later.
> + */
> +static void qemu_rdma_signal_unregister(RDMAContext *rdma, uint64_t index,
> + uint64_t chunk, uint64_t wr_id)
> +{
> + if (rdma->unregistrations[rdma->unregister_next] != 0) {
> + fprintf(stderr, "rdma migration: queue is full!\n");
> + } else {
> + RDMALocalBlock *block = &(rdma->local_ram_blocks.block[index]);
> +
> + if (!test_and_set_bit(chunk, block->unregister_bitmap)) {
> + DDPRINTF("Appending unregister chunk %" PRIu64
> + " at position %d\n", chunk, rdma->unregister_next);
> +
> + rdma->unregistrations[rdma->unregister_next++] = wr_id;
> +
> + if (rdma->unregister_next == RDMA_SIGNALED_SEND_MAX) {
> + rdma->unregister_next = 0;
> + }
> + } else {
> + DDPRINTF("Unregister chunk %" PRIu64 " already in queue.\n",
> + chunk);
> + }
> + }
> +}
> +#endif
> static int qemu_rdma_exchange_send(RDMAContext *rdma, RDMAControlHeader *head,
> uint8_t *data, RDMAControlHeader *resp,
> int *resp_idx,
> @@ -1006,6 +1132,17 @@ static uint64_t qemu_rdma_poll(RDMAContext *rdma, uint64_t *wr_id_out)
> if (rdma->nb_sent > 0) {
> rdma->nb_sent--;
> }
> + if (!rdma->pin_all) {
> + /*
> + * FYI: If one wanted to signal a specific chunk to be unregistered
> + * using LRU or workload-specific information, this is the function
> + * you would call to do so. That chunk would then get asynchronously
> + * unregistered later.
> + */
> +#ifdef RDMA_UNREGISTRATION_EXAMPLE
> + qemu_rdma_signal_unregister(rdma, index, chunk, wc.wr_id);
> +#endif
> + }
> } else {
> DDPRINTF("other completion %s (%" PRId64 ") received left %d\n",
> print_wrid(wr_id), wr_id, rdma->nb_sent);
> @@ -1423,6 +1560,12 @@ retry:
> chunk_start = ram_chunk_start(block, chunk);
> chunk_end = ram_chunk_end(block, chunk);
>
> + if (!rdma->pin_all) {
> +#ifdef RDMA_UNREGISTRATION_EXAMPLE
> + qemu_rdma_unregister_waiting(rdma);
> +#endif
> + }
> +
> while (test_bit(chunk, block->transit_bitmap)) {
> (void)count;
> DDPRINTF("(%d) Not clobbering: block: %d chunk %" PRIu64
>
next prev parent reply other threads:[~2013-07-01 12:04 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-06-28 19:59 [Qemu-devel] [PATCH v2 0/8] rdma: core logic w/ unpin example mrhines
2013-06-28 19:59 ` [Qemu-devel] [PATCH v2 1/8] rdma: update documentation to reflect new unpin support mrhines
2013-06-28 20:14 ` Eric Blake
2013-06-28 20:17 ` Michael R. Hines
2013-06-28 19:59 ` [Qemu-devel] [PATCH v2 2/8] rdma: introduce ram_handle_compressed() mrhines
2013-06-28 19:59 ` [Qemu-devel] [PATCH v2 3/8] rdma: core logic mrhines
2013-06-28 19:59 ` [Qemu-devel] [PATCH v2 4/8] rdma: unpin support mrhines
2013-07-01 12:04 ` Paolo Bonzini [this message]
2013-07-01 14:23 ` Michael R. Hines
2013-06-28 20:00 ` [Qemu-devel] [PATCH v2 5/8] rdma: send pc.ram mrhines
2013-06-28 20:00 ` [Qemu-devel] [PATCH v2 6/8] rdma: allow state transitions between other states besides ACTIVE mrhines
2013-06-28 20:00 ` [Qemu-devel] [PATCH v2 7/8] rdma: introduce MIG_STATE_NONE and change MIG_STATE_SETUP state transition mrhines
2013-06-28 20:00 ` [Qemu-devel] [PATCH v2 8/8] rdma: account for the time spent in MIG_STATE_SETUP through QMP mrhines
2013-06-28 23:28 ` [Qemu-devel] [PATCH v2 0/8] rdma: core logic w/ unpin example Michael R. Hines
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=51D17048.7080609@redhat.com \
--to=pbonzini@redhat.com \
--cc=abali@us.ibm.com \
--cc=aliguori@us.ibm.com \
--cc=chegu_vinod@hp.com \
--cc=gokul@us.ibm.com \
--cc=knoel@redhat.com \
--cc=mrhines@linux.vnet.ibm.com \
--cc=mrhines@us.ibm.com \
--cc=owasserm@redhat.com \
--cc=qemu-devel@nongnu.org \
--cc=quintela@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).