All of lore.kernel.org
 help / color / mirror / Atom feed
From: Paolo Bonzini <pbonzini@redhat.com>
To: mrhines@linux.vnet.ibm.com
Cc: aliguori@us.ibm.com, quintela@redhat.com, qemu-devel@nongnu.org,
	owasserm@redhat.com, abali@us.ibm.com, mrhines@us.ibm.com,
	gokul@us.ibm.com, chegu_vinod@hp.com, knoel@redhat.com
Subject: Re: [Qemu-devel] [PATCH v2 4/8] rdma: unpin support
Date: Mon, 01 Jul 2013 14:04:24 +0200	[thread overview]
Message-ID: <51D17048.7080609@redhat.com> (raw)
In-Reply-To: <1372449603-20431-5-git-send-email-mrhines@linux.vnet.ibm.com>

Il 28/06/2013 21:59, mrhines@linux.vnet.ibm.com ha scritto:
> +/*
> + * Perform a non-optimized memory unregistration after every transfer
> + * for demonsration purposes, only if pin-all is not requested.
> + *
> + * Potential optimizations:
> + * 1. Start a new thread to run this function continuously
> +        - for bit clearing
> +        - and for receipt of unregister messages
> + * 2. Use an LRU.
> + * 3. Use workload hints.
> + */
> +#ifdef RDMA_UNREGISTRATION_EXAMPLE
> +static int qemu_rdma_unregister_waiting(RDMAContext *rdma)
> +{
> +    while (rdma->unregistrations[rdma->unregister_current]) {
> +        int ret;
> +        uint64_t wr_id = rdma->unregistrations[rdma->unregister_current];
> +        uint64_t chunk =
> +            (wr_id & RDMA_WRID_CHUNK_MASK) >> RDMA_WRID_CHUNK_SHIFT;
> +        uint64_t index =
> +            (wr_id & RDMA_WRID_BLOCK_MASK) >> RDMA_WRID_BLOCK_SHIFT;
> +        RDMALocalBlock *block =
> +            &(rdma->local_ram_blocks.block[index]);
> +        RDMARegister reg = { .current_index = index };
> +        RDMAControlHeader resp = { .type = RDMA_CONTROL_UNREGISTER_FINISHED,
> +                                 };
> +        RDMAControlHeader head = { .len = sizeof(RDMARegister),
> +                                   .type = RDMA_CONTROL_UNREGISTER_REQUEST,
> +                                   .repeat = 1,
> +                                 };
> +
> +        DDPRINTF("Processing unregister for chunk: %" PRIu64 " at position %d\n",
> +                    chunk, rdma->unregister_current);
> +
> +        rdma->unregistrations[rdma->unregister_current] = 0;
> +        rdma->unregister_current++;
> +
> +        if (rdma->unregister_current == RDMA_SIGNALED_SEND_MAX) {
> +            rdma->unregister_current = 0;
> +        }
> +
> +        DDPRINTF("Sending unregister for chunk: %" PRIu64 "\n", chunk);
> +
> +        clear_bit(chunk, block->unregister_bitmap);

The chunk is still registered at this point, shouldn't it be after
the ibv_dereg_mr or something like that?

> +        if (test_bit(chunk, block->transit_bitmap)) {
> +            DDPRINTF("Cannot unregister inflight chunk: %" PRIu64 "\n", chunk);
> +            continue;
> +        }

This was not clear from your answer: who exactly will unregister this
chunk?  Why not call the 15 lines below this one also at this point:

+    if (wr_id == RDMA_WRID_RDMA_WRITE) {
+        uint64_t chunk =
+            (wc.wr_id & RDMA_WRID_CHUNK_MASK) >> RDMA_WRID_CHUNK_SHIFT;
+        uint64_t index =
+            (wc.wr_id & RDMA_WRID_BLOCK_MASK) >> RDMA_WRID_BLOCK_SHIFT;
+        RDMALocalBlock *block = &(rdma->local_ram_blocks.block[index]);
+
+        DDDPRINTF("completions %s (%" PRId64 ") left %d, "
+                 "block %" PRIu64 ", chunk: %" PRIu64 "\n",
+                 print_wrid(wr_id), wr_id, rdma->nb_sent, index, chunk);
+
+        clear_bit(chunk, block->transit_bitmap);
+
+        if (rdma->nb_sent > 0) {
+            rdma->nb_sent--;
+        }

?

> +
> +        ret = ibv_dereg_mr(block->pmr[chunk]);
> +        block->pmr[chunk] = NULL;
> +        block->remote_keys[chunk] = 0;
> +
> +        if (ret != 0) {
> +            perror("unregistration chunk failed");
> +            return -ret;
> +        }
> +        rdma->total_registrations--;
> +
> +        reg.key.chunk = chunk;
> +        register_to_network(&reg);
> +        ret = qemu_rdma_exchange_send(rdma, &head, (uint8_t *) &reg,
> +                                &resp, NULL, NULL);
> +        if (ret < 0) {
> +            return ret;
> +        }
> +
> +        DDPRINTF("Unregister for chunk: %" PRIu64 " complete.\n", chunk);
> +    }
> +
> +    return 0;
> +}
> +
> +/*
> + * Set bit for unregistration in the next iteration.
> + * We cannot transmit right here, but will unpin later.
> + */
> +static void qemu_rdma_signal_unregister(RDMAContext *rdma, uint64_t index,
> +                                        uint64_t chunk, uint64_t wr_id)
> +{
> +    if (rdma->unregistrations[rdma->unregister_next] != 0) {
> +        fprintf(stderr, "rdma migration: queue is full!\n");
> +    } else {
> +        RDMALocalBlock *block = &(rdma->local_ram_blocks.block[index]);
> +
> +        if (!test_and_set_bit(chunk, block->unregister_bitmap)) {
> +            DDPRINTF("Appending unregister chunk %" PRIu64
> +                    " at position %d\n", chunk, rdma->unregister_next);
> +
> +            rdma->unregistrations[rdma->unregister_next++] = wr_id;
> +
> +            if (rdma->unregister_next == RDMA_SIGNALED_SEND_MAX) {
> +                rdma->unregister_next = 0;
> +            }
> +        } else {
> +            DDPRINTF("Unregister chunk %" PRIu64 " already in queue.\n",
> +                    chunk);
> +        }
> +    }
> +}
> +#endif
>  static int qemu_rdma_exchange_send(RDMAContext *rdma, RDMAControlHeader *head,
>                                     uint8_t *data, RDMAControlHeader *resp,
>                                     int *resp_idx,
> @@ -1006,6 +1132,17 @@ static uint64_t qemu_rdma_poll(RDMAContext *rdma, uint64_t *wr_id_out)
>          if (rdma->nb_sent > 0) {
>              rdma->nb_sent--;
>          }
> +        if (!rdma->pin_all) {
> +            /*
> +             * FYI: If one wanted to signal a specific chunk to be unregistered
> +             * using LRU or workload-specific information, this is the function
> +             * you would call to do so. That chunk would then get asynchronously
> +             * unregistered later.
> +             */
> +#ifdef RDMA_UNREGISTRATION_EXAMPLE
> +            qemu_rdma_signal_unregister(rdma, index, chunk, wc.wr_id);
> +#endif
> +        }
>      } else {
>          DDPRINTF("other completion %s (%" PRId64 ") received left %d\n",
>              print_wrid(wr_id), wr_id, rdma->nb_sent);
> @@ -1423,6 +1560,12 @@ retry:
>      chunk_start = ram_chunk_start(block, chunk);
>      chunk_end = ram_chunk_end(block, chunk);
>  
> +    if (!rdma->pin_all) {
> +#ifdef RDMA_UNREGISTRATION_EXAMPLE
> +        qemu_rdma_unregister_waiting(rdma);
> +#endif
> +    }
> +
>      while (test_bit(chunk, block->transit_bitmap)) {
>          (void)count;
>          DDPRINTF("(%d) Not clobbering: block: %d chunk %" PRIu64
> 

  reply	other threads:[~2013-07-01 12:04 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-06-28 19:59 [Qemu-devel] [PATCH v2 0/8] rdma: core logic w/ unpin example mrhines
2013-06-28 19:59 ` [Qemu-devel] [PATCH v2 1/8] rdma: update documentation to reflect new unpin support mrhines
2013-06-28 20:14   ` Eric Blake
2013-06-28 20:17     ` Michael R. Hines
2013-06-28 19:59 ` [Qemu-devel] [PATCH v2 2/8] rdma: introduce ram_handle_compressed() mrhines
2013-06-28 19:59 ` [Qemu-devel] [PATCH v2 3/8] rdma: core logic mrhines
2013-06-28 19:59 ` [Qemu-devel] [PATCH v2 4/8] rdma: unpin support mrhines
2013-07-01 12:04   ` Paolo Bonzini [this message]
2013-07-01 14:23     ` Michael R. Hines
2013-06-28 20:00 ` [Qemu-devel] [PATCH v2 5/8] rdma: send pc.ram mrhines
2013-06-28 20:00 ` [Qemu-devel] [PATCH v2 6/8] rdma: allow state transitions between other states besides ACTIVE mrhines
2013-06-28 20:00 ` [Qemu-devel] [PATCH v2 7/8] rdma: introduce MIG_STATE_NONE and change MIG_STATE_SETUP state transition mrhines
2013-06-28 20:00 ` [Qemu-devel] [PATCH v2 8/8] rdma: account for the time spent in MIG_STATE_SETUP through QMP mrhines
2013-06-28 23:28 ` [Qemu-devel] [PATCH v2 0/8] rdma: core logic w/ unpin example Michael R. Hines

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=51D17048.7080609@redhat.com \
    --to=pbonzini@redhat.com \
    --cc=abali@us.ibm.com \
    --cc=aliguori@us.ibm.com \
    --cc=chegu_vinod@hp.com \
    --cc=gokul@us.ibm.com \
    --cc=knoel@redhat.com \
    --cc=mrhines@linux.vnet.ibm.com \
    --cc=mrhines@us.ibm.com \
    --cc=owasserm@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=quintela@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.