qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Chenyi Qiang <chenyi.qiang@intel.com>
To: "Alexey Kardashevskiy" <aik@amd.com>,
	"David Hildenbrand" <david@redhat.com>,
	"Peter Xu" <peterx@redhat.com>,
	"Paolo Bonzini" <pbonzini@redhat.com>,
	"Philippe Mathieu-Daudé" <philmd@linaro.org>,
	"Michael Roth" <michael.roth@amd.com>
Cc: <qemu-devel@nongnu.org>, <kvm@vger.kernel.org>,
	Williams Dan J <dan.j.williams@intel.com>,
	Peng Chao P <chao.p.peng@intel.com>,
	Gao Chao <chao.gao@intel.com>, Xu Yilun <yilun.xu@intel.com>,
	Li Xiaoyao <xiaoyao.li@intel.com>
Subject: Re: [PATCH v2 3/6] memory-attribute-manager: Introduce MemoryAttributeManager to manage RAMBLock with guest_memfd
Date: Wed, 19 Feb 2025 09:20:41 +0800	[thread overview]
Message-ID: <c5682028-b84c-4b4c-8c4d-f3b43d412e83@intel.com> (raw)
In-Reply-To: <60c9ddb7-7f3e-4066-a165-c583af2411ea@amd.com>



On 2/18/2025 5:19 PM, Alexey Kardashevskiy wrote:
> 
> 

[..]

>> diff --git a/include/system/memory-attribute-manager.h b/include/
>> system/memory-attribute-manager.h
>> new file mode 100644
>> index 0000000000..72adc0028e
>> --- /dev/null
>> +++ b/include/system/memory-attribute-manager.h
>> @@ -0,0 +1,42 @@
>> +/*
>> + * QEMU memory attribute manager
>> + *
>> + * Copyright Intel
>> + *
>> + * Author:
>> + *      Chenyi Qiang <chenyi.qiang@intel.com>
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2 or
>> later.
>> + * See the COPYING file in the top-level directory
>> + *
>> + */
>> +
>> +#ifndef SYSTEM_MEMORY_ATTRIBUTE_MANAGER_H
>> +#define SYSTEM_MEMORY_ATTRIBUTE_MANAGER_H
>> +
>> +#include "system/hostmem.h"
>> +
>> +#define TYPE_MEMORY_ATTRIBUTE_MANAGER "memory-attribute-manager"
>> +
>> +OBJECT_DECLARE_TYPE(MemoryAttributeManager,
>> MemoryAttributeManagerClass, MEMORY_ATTRIBUTE_MANAGER)
>> +
>> +struct MemoryAttributeManager {
>> +    Object parent;
>> +
>> +    MemoryRegion *mr;
>> +
>> +    /* 1-setting of the bit represents the memory is populated
>> (shared) */
>> +    int32_t bitmap_size;
> 
> unsigned.
> 
> Also, do either s/bitmap_size/shared_bitmap_size/ or
> s/shared_bitmap/bitmap/

Will change it. Thanks.

> 
> 
> 
>> +    unsigned long *shared_bitmap;
>> +
>> +    QLIST_HEAD(, RamDiscardListener) rdl_list;
>> +};
>> +
>> +struct MemoryAttributeManagerClass {
>> +    ObjectClass parent_class;
>> +};
>> +
>> +int memory_attribute_manager_realize(MemoryAttributeManager *mgr,
>> MemoryRegion *mr);
>> +void memory_attribute_manager_unrealize(MemoryAttributeManager *mgr);
>> +
>> +#endif
>> diff --git a/system/memory-attribute-manager.c b/system/memory-
>> attribute-manager.c
>> new file mode 100644
>> index 0000000000..ed97e43dd0
>> --- /dev/null
>> +++ b/system/memory-attribute-manager.c
>> @@ -0,0 +1,292 @@
>> +/*
>> + * QEMU memory attribute manager
>> + *
>> + * Copyright Intel
>> + *
>> + * Author:
>> + *      Chenyi Qiang <chenyi.qiang@intel.com>
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2 or
>> later.
>> + * See the COPYING file in the top-level directory
>> + *
>> + */
>> +
>> +#include "qemu/osdep.h"
>> +#include "qemu/error-report.h"
>> +#include "system/memory-attribute-manager.h"
>> +
>> +OBJECT_DEFINE_TYPE_WITH_INTERFACES(MemoryAttributeManager,
>> +                                   memory_attribute_manager,
>> +                                   MEMORY_ATTRIBUTE_MANAGER,
>> +                                   OBJECT,
>> +                                   { TYPE_RAM_DISCARD_MANAGER },
>> +                                   { })
>> +
>> +static int memory_attribute_manager_get_block_size(const
>> MemoryAttributeManager *mgr)
>> +{
>> +    /*
>> +     * Because page conversion could be manipulated in the size of at
>> least 4K or 4K aligned,
>> +     * Use the host page size as the granularity to track the memory
>> attribute.
>> +     * TODO: if necessary, switch to get the page_size from RAMBlock.
>> +     * i.e. mgr->mr->ram_block->page_size.
> 
> I'd assume it is rather necessary already.

OK, Will return the page_size of RAMBlock directly.

> 
>> +     */
>> +    return qemu_real_host_page_size();
>> +}
>> +
>> +
>> +static bool memory_attribute_rdm_is_populated(const RamDiscardManager
>> *rdm,
>> +                                              const
>> MemoryRegionSection *section)
>> +{
>> +    const MemoryAttributeManager *mgr = MEMORY_ATTRIBUTE_MANAGER(rdm);
>> +    int block_size = memory_attribute_manager_get_block_size(mgr);
>> +    uint64_t first_bit = section->offset_within_region / block_size;
>> +    uint64_t last_bit = first_bit + int128_get64(section->size) /
>> block_size - 1;
>> +    unsigned long first_discard_bit;
>> +
>> +    first_discard_bit = find_next_zero_bit(mgr->shared_bitmap,
>> last_bit + 1, first_bit);
>> +    return first_discard_bit > last_bit;
>> +}
>> +
>> +typedef int (*memory_attribute_section_cb)(MemoryRegionSection *s,
>> void *arg);
>> +
>> +static int memory_attribute_notify_populate_cb(MemoryRegionSection
>> *section, void *arg)
>> +{
>> +    RamDiscardListener *rdl = arg;
>> +
>> +    return rdl->notify_populate(rdl, section);
>> +}
>> +
>> +static int memory_attribute_notify_discard_cb(MemoryRegionSection
>> *section, void *arg)
>> +{
>> +    RamDiscardListener *rdl = arg;
>> +
>> +    rdl->notify_discard(rdl, section);
>> +
>> +    return 0;
>> +}
>> +
>> +static int memory_attribute_for_each_populated_section(const
>> MemoryAttributeManager *mgr,
>> +                                                      
>> MemoryRegionSection *section,
>> +                                                       void *arg,
>> +                                                      
>> memory_attribute_section_cb cb)
>> +{
>> +    unsigned long first_one_bit, last_one_bit;
>> +    uint64_t offset, size;
>> +    int block_size = memory_attribute_manager_get_block_size(mgr);
>> +    int ret = 0;
>> +
>> +    first_one_bit = section->offset_within_region / block_size;
>> +    first_one_bit = find_next_bit(mgr->shared_bitmap, mgr-
>> >bitmap_size, first_one_bit);
>> +
>> +    while (first_one_bit < mgr->bitmap_size) {
>> +        MemoryRegionSection tmp = *section;
>> +
>> +        offset = first_one_bit * block_size;
>> +        last_one_bit = find_next_zero_bit(mgr->shared_bitmap, mgr-
>> >bitmap_size,
>> +                                          first_one_bit + 1) - 1;
>> +        size = (last_one_bit - first_one_bit + 1) * block_size;
> 
> 
> What all this math is for if we stuck with VFIO doing 1 page at the
> time? (I think I commented on this)

Sorry, I missed your previous comment. IMHO, as we track the status in
bitmap and we want to call the cb() on the shared part within
MemoryRegionSection. Here we do the calculation to find the expected
sub-range.

> 
>> +
>> +        if (!memory_region_section_intersect_range(&tmp, offset,
>> size)) {
>> +            break;
>> +        }
>> +
>> +        ret = cb(&tmp, arg);
>> +        if (ret) {
>> +            error_report("%s: Failed to notify RAM discard listener:
>> %s", __func__,
>> +                         strerror(-ret));
>> +            break;
>> +        }
>> +
>> +        first_one_bit = find_next_bit(mgr->shared_bitmap, mgr-
>> >bitmap_size,
>> +                                      last_one_bit + 2);
>> +    }
>> +
>> +    return ret;
>> +}
>> +

[..]

>> +
>> +static void
>> memory_attribute_rdm_unregister_listener(RamDiscardManager *rdm,
>> +                                                    
>> RamDiscardListener *rdl)
>> +{
>> +    MemoryAttributeManager *mgr = MEMORY_ATTRIBUTE_MANAGER(rdm);
>> +    int ret;
>> +
>> +    g_assert(rdl->section);
>> +    g_assert(rdl->section->mr == mgr->mr);
>> +
>> +    ret = memory_attribute_for_each_populated_section(mgr, rdl-
>> >section, rdl,
>> +                                                     
>> memory_attribute_notify_discard_cb);
>> +    if (ret) {
>> +        error_report("%s: Failed to unregister RAM discard listener:
>> %s", __func__,
>> +                     strerror(-ret));
>> +    }
>> +
>> +    memory_region_section_free_copy(rdl->section);
>> +    rdl->section = NULL;
>> +    QLIST_REMOVE(rdl, next);
>> +
>> +}
>> +
>> +typedef struct MemoryAttributeReplayData {
>> +    void *fn;
> 
> ReplayRamDiscard *fn, not void*.

We could cast the void *fn either to ReplayRamPopulate or
ReplayRamDiscard (see below).

> 
>> +    void *opaque;
>> +} MemoryAttributeReplayData;
>> +
>> +static int
>> memory_attribute_rdm_replay_populated_cb(MemoryRegionSection *section,
>> void *arg)
>> +{
>> +    MemoryAttributeReplayData *data = arg;
>> +
>> +    return ((ReplayRamPopulate)data->fn)(section, data->opaque);
>> +}
>> +
>> +static int memory_attribute_rdm_replay_populated(const
>> RamDiscardManager *rdm,
>> +                                                 MemoryRegionSection
>> *section,
>> +                                                 ReplayRamPopulate
>> replay_fn,
>> +                                                 void *opaque)
>> +{
>> +    MemoryAttributeManager *mgr = MEMORY_ATTRIBUTE_MANAGER(rdm);
>> +    MemoryAttributeReplayData data = { .fn = replay_fn, .opaque =
>> opaque };
>> +
>> +    g_assert(section->mr == mgr->mr);
>> +    return memory_attribute_for_each_populated_section(mgr, section,
>> &data,
>> +                                                      
>> memory_attribute_rdm_replay_populated_cb);
>> +}
>> +
>> +static int
>> memory_attribute_rdm_replay_discarded_cb(MemoryRegionSection *section,
>> void *arg)
>> +{
>> +    MemoryAttributeReplayData *data = arg;
>> +
>> +    ((ReplayRamDiscard)data->fn)(section, data->opaque);
>> +    return 0;
>> +}
>> +
>> +static void memory_attribute_rdm_replay_discarded(const
>> RamDiscardManager *rdm,
>> +                                                  MemoryRegionSection
>> *section,
>> +                                                  ReplayRamDiscard
>> replay_fn,
>> +                                                  void *opaque)
>> +{
>> +    MemoryAttributeManager *mgr = MEMORY_ATTRIBUTE_MANAGER(rdm);
>> +    MemoryAttributeReplayData data = { .fn = replay_fn, .opaque =
>> opaque };
>> +
>> +    g_assert(section->mr == mgr->mr);
>> +    memory_attribute_for_each_discarded_section(mgr, section, &data,
>> +                                               
>> memory_attribute_rdm_replay_discarded_cb);
>> +}
>> +
>> +int memory_attribute_manager_realize(MemoryAttributeManager *mgr,
>> MemoryRegion *mr)
>> +{
>> +    uint64_t bitmap_size;
>> +    int block_size = memory_attribute_manager_get_block_size(mgr);
>> +    int ret;
>> +
>> +    bitmap_size = ROUND_UP(mr->size, block_size) / block_size;
>> +
>> +    mgr->mr = mr;
>> +    mgr->bitmap_size = bitmap_size;
>> +    mgr->shared_bitmap = bitmap_new(bitmap_size);
>> +
>> +    ret = memory_region_set_ram_discard_manager(mgr->mr,
>> RAM_DISCARD_MANAGER(mgr));
> 
> Move it 3 lines up and avoid stale data in mgr->mr/bitmap_size/
> shared_bitmap and avoid g_free below?

Make sense. I will move it up the same as patch 02 before bitmap_new().

> 
>> +    if (ret) {
>> +        g_free(mgr->shared_bitmap);
>> +    }
>> +
>> +    return ret;
>> +}
>> +
>> +void memory_attribute_manager_unrealize(MemoryAttributeManager *mgr)
>> +{
>> +    memory_region_set_ram_discard_manager(mgr->mr, NULL);
>> +
>> +    g_free(mgr->shared_bitmap);
>> +}
>> +
>> +static void memory_attribute_manager_init(Object *obj)
> 
> Not used.
> 
>> +{
>> +    MemoryAttributeManager *mgr = MEMORY_ATTRIBUTE_MANAGER(obj);
>> +
>> +    QLIST_INIT(&mgr->rdl_list);
>> +} > +
>> +static void memory_attribute_manager_finalize(Object *obj)
> 
> Not used either. Thanks,

I think it is OK to define it as a placeholder? Just some preference.

> 
>> +{
>> +}
>> +
>> +static void memory_attribute_manager_class_init(ObjectClass *oc, void
>> *data)
>> +{
>> +    RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_CLASS(oc);
>> +
>> +    rdmc->get_min_granularity =
>> memory_attribute_rdm_get_min_granularity;
>> +    rdmc->register_listener = memory_attribute_rdm_register_listener;
>> +    rdmc->unregister_listener =
>> memory_attribute_rdm_unregister_listener;
>> +    rdmc->is_populated = memory_attribute_rdm_is_populated;
>> +    rdmc->replay_populated = memory_attribute_rdm_replay_populated;
>> +    rdmc->replay_discarded = memory_attribute_rdm_replay_discarded;
>> +}
>> diff --git a/system/meson.build b/system/meson.build
>> index 4952f4b2c7..ab07ff1442 100644
>> --- a/system/meson.build
>> +++ b/system/meson.build
>> @@ -15,6 +15,7 @@ system_ss.add(files(
>>     'dirtylimit.c',
>>     'dma-helpers.c',
>>     'globals.c',
>> +  'memory-attribute-manager.c',
>>     'memory_mapping.c',
>>     'qdev-monitor.c',
>>     'qtest.c',
> 



  reply	other threads:[~2025-02-19  1:22 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-02-17  8:18 [PATCH v2 0/6] Enable shared device assignment Chenyi Qiang
2025-02-17  8:18 ` [PATCH v2 1/6] memory: Export a helper to get intersection of a MemoryRegionSection with a given range Chenyi Qiang
2025-02-17  8:18 ` [PATCH v2 2/6] memory: Change memory_region_set_ram_discard_manager() to return the result Chenyi Qiang
2025-02-18  9:19   ` Alexey Kardashevskiy
2025-02-18  9:41     ` Chenyi Qiang
2025-02-18 10:46     ` David Hildenbrand
2025-02-17  8:18 ` [PATCH v2 3/6] memory-attribute-manager: Introduce MemoryAttributeManager to manage RAMBLock with guest_memfd Chenyi Qiang
2025-02-18  9:19   ` Alexey Kardashevskiy
2025-02-19  1:20     ` Chenyi Qiang [this message]
2025-02-19  3:49       ` Alexey Kardashevskiy
2025-02-19  6:33         ` Chenyi Qiang
2025-02-20  3:02           ` Alexey Kardashevskiy
2025-02-17  8:18 ` [PATCH v2 4/6] memory-attribute-manager: Introduce a callback to notify the shared/private state change Chenyi Qiang
2025-02-18  9:19   ` Alexey Kardashevskiy
2025-02-19  1:50     ` Chenyi Qiang
2025-02-17  8:18 ` [PATCH v2 5/6] memory: Attach MemoryAttributeManager to guest_memfd-backed RAMBlocks Chenyi Qiang
2025-02-18  9:19   ` Alexey Kardashevskiy
2025-02-17  8:18 ` [PATCH v2 6/6] RAMBlock: Make guest_memfd require coordinate discard Chenyi Qiang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=c5682028-b84c-4b4c-8c4d-f3b43d412e83@intel.com \
    --to=chenyi.qiang@intel.com \
    --cc=aik@amd.com \
    --cc=chao.gao@intel.com \
    --cc=chao.p.peng@intel.com \
    --cc=dan.j.williams@intel.com \
    --cc=david@redhat.com \
    --cc=kvm@vger.kernel.org \
    --cc=michael.roth@amd.com \
    --cc=pbonzini@redhat.com \
    --cc=peterx@redhat.com \
    --cc=philmd@linaro.org \
    --cc=qemu-devel@nongnu.org \
    --cc=xiaoyao.li@intel.com \
    --cc=yilun.xu@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).