From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:43228) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1bhO6x-0003Bs-18 for qemu-devel@nongnu.org; Tue, 06 Sep 2016 17:40:56 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1bhO6t-0000nV-Qi for qemu-devel@nongnu.org; Tue, 06 Sep 2016 17:40:55 -0400 References: <20160829171021.4902-1-pbutsykin@virtuozzo.com> <20160829171021.4902-6-pbutsykin@virtuozzo.com> <20160901152813.GE6355@noname.redhat.com> From: Pavel Butsykin Message-ID: <57CEF4DE.9030901@virtuozzo.com> Date: Tue, 6 Sep 2016 19:54:54 +0300 MIME-Version: 1.0 In-Reply-To: <20160901152813.GE6355@noname.redhat.com> Content-Type: text/plain; charset="windows-1252"; format=flowed Content-Transfer-Encoding: 7bit Subject: Re: [Qemu-devel] [PATCH RFC v2 05/22] block/pcache: add aio requests into cache List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Kevin Wolf Cc: qemu-block@nongnu.org, qemu-devel@nongnu.org, mreitz@redhat.com, stefanha@redhat.com, den@openvz.org, jsnow@redhat.com, eblake@redhat.com, famz@redhat.com On 01.09.2016 18:28, Kevin Wolf wrote: > Am 29.08.2016 um 19:10 hat Pavel Butsykin geschrieben: >> For storing requests use an rbtree, here are add basic operations on the >> rbtree to work with cache nodes. >> >> Signed-off-by: Pavel Butsykin >> --- >> block/pcache.c | 190 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++- >> 1 file changed, 189 insertions(+), 1 deletion(-) >> >> diff --git a/block/pcache.c b/block/pcache.c >> index 7f221d6..f5022f9 100644 >> --- a/block/pcache.c >> +++ b/block/pcache.c >> @@ -27,6 +27,7 @@ >> #include "block/raw-aio.h" >> #include "qapi/error.h" >> #include "qapi/qmp/qstring.h" >> +#include "qemu/rbtree.h" >> >> #define PCACHE_DEBUG >> >> @@ -37,9 +38,53 @@ >> #define DPRINTF(fmt, ...) do { } while (0) >> #endif >> >> +typedef struct RbNodeKey { >> + uint64_t num; >> + uint32_t size; >> +} RbNodeKey; >> + >> +typedef struct BlockNode { >> + struct RbNode rb_node; >> + union { >> + RbNodeKey key; >> + struct { >> + uint64_t sector_num; >> + uint32_t nb_sectors; >> + }; >> + }; > > What's the deal with this union? > > It just adds an alias, and 'sector_num'/'nb_sectors' are actually longer > to type than 'key.num' and 'key.size', so the only advantage that I > could image doesn't really apply. > Yes, you're right. Initially this was done to reduce the code in those places where the name change will not confuse. But later it turned out that such places are few. > But it brings problems: We always have to be careful to keep the two > structs in sync, and grepping for the field name will only bring up half > of the users because the other half uses the other alias. > Yep, relative to the rest of the Qemu code it looks unusual, and given the small benefits it is need to remove :) >> + QTAILQ_ENTRY(BlockNode) entry; >> +} BlockNode; >> + >> +typedef struct PCNode { >> + BlockNode cm; >> + >> + uint8_t *data; >> +} PCNode; > > What do 'PC' and 'cm' mean? > PC - PrefetchCache, cm - common. But I think it would be better to fix it: PrefetchCache - PrefCacheNode, common - common. >> +typedef struct ReqStor { >> + struct { >> + struct RbRoot root; >> + CoMutex lock; >> + } tree; >> + >> + uint32_t curr_size; >> +} ReqStor; > > Same question for ReqStor. For an identifier that is used only three > times, it could be a bit more descriptive. > Storage requests > What unit has curr_size or what does it count? The nodes in the tree? This is the current size of the storage requests. > Also, cur_ seems to be more common as a prefix than curr_. > OK >> +typedef struct BDRVPCacheState { >> + BlockDriverState **bs; > > This is unused. (Good thing, it looks weird.) > Apparently I forgot to remove it. >> + ReqStor pcache; >> + >> + struct { >> + QTAILQ_HEAD(pcache_head, BlockNode) head; >> + CoMutex lock; >> + } list; >> +} BDRVPCacheState; >> + >> typedef struct PrefCacheAIOCB { >> BlockAIOCB common; >> >> + BDRVPCacheState *s; > > Not really needed, you already have acb->common.bs. > >> QEMUIOVector *qiov; >> uint64_t sector_num; >> uint32_t nb_sectors; >> @@ -64,6 +109,124 @@ static QemuOptsList runtime_opts = { >> }, >> }; >> >> +#define PCNODE(_n) ((PCNode *)(_n)) > > container_of() would be preferable for type safety. > OK >> +static int pcache_key_cmp(const RbNodeKey *key1, const RbNodeKey *key2) >> +{ >> + assert(key1 != NULL); >> + assert(key2 != NULL); >> + >> + if (key1->num >= key2->num + key2->size) { >> + return 1; >> + } >> + if (key1->num + key1->size <= key2->num) { >> + return -1; >> + } >> + >> + return 0; >> +} >> + >> +static void *node_insert(struct RbRoot *root, BlockNode *node) >> +{ >> + struct RbNode **new = &(root->rb_node), *parent = NULL; >> + >> + /* Figure out where to put new node */ >> + while (*new) { >> + BlockNode *this = container_of(*new, BlockNode, rb_node); >> + int result = pcache_key_cmp(&node->key, &this->key); >> + if (result == 0) { >> + return this; >> + } >> + parent = *new; >> + new = result < 0 ? &((*new)->rb_left) : &((*new)->rb_right); >> + } >> + /* Add new node and rebalance tree. */ >> + rb_link_node(&node->rb_node, parent, new); >> + rb_insert_color(&node->rb_node, root); >> + >> + return node; >> +} >> + >> +static inline PCNode *pcache_node_insert(struct RbRoot *root, PCNode *node) >> +{ >> + return node_insert(root, &node->cm); >> +} >> + >> +static inline void pcache_node_free(PCNode *node) >> +{ >> + g_free(node->data); >> + g_slice_free1(sizeof(*node), node); >> +} > > We moved away from g_slice_* because it turned out that it hurt more > than it helped. > Somewhere I saw it, thanks. >> +static inline void *pcache_node_alloc(RbNodeKey* key) >> +{ >> + PCNode *node = g_slice_alloc(sizeof(*node)); >> + >> + node->cm.sector_num = key->num; >> + node->cm.nb_sectors = key->size; > > In other words, node->cm.key = *key; > Yep :) >> + node->data = g_malloc(node->cm.nb_sectors << BDRV_SECTOR_BITS); >> + >> + return node; >> +} >> + >> +static bool pcache_node_find_and_create(PrefCacheAIOCB *acb, RbNodeKey *key, >> + PCNode **out_node) >> +{ >> + BDRVPCacheState *s = acb->s; >> + PCNode *new_node = pcache_node_alloc(key); >> + PCNode *found; >> + >> + qemu_co_mutex_lock(&s->pcache.tree.lock); >> + found = pcache_node_insert(&s->pcache.tree.root, new_node); >> + qemu_co_mutex_unlock(&s->pcache.tree.lock); > > pcache_node_insert() doesn't yield, so the CoMutex is unnecessary. > >> + if (found != new_node) { >> + pcache_node_free(new_node); > > Isn't it a bit wasteful to allocate a new node just in case and then > immediately free it again if it turns out that we don't need it? > Yes, I know about this problem. I wrote about this in TODO list, but not very detailed. >> + *out_node = found; >> + return false; >> + } >> + atomic_add(&s->pcache.curr_size, new_node->cm.nb_sectors); > > atomic_add() implies that you have concurrent threads. I don't see any. > Yeah, but what about iothread? Doesn't presence of iothread lead to parallel handling of requests in multiple threads? >> + qemu_co_mutex_lock(&s->list.lock); >> + QTAILQ_INSERT_HEAD(&s->list.head, &new_node->cm, entry); >> + qemu_co_mutex_unlock(&s->list.lock); > > Same here as above, QTAILQ_INSERT_HEAD doesn't yield. > >> + *out_node = new_node; >> + return true; >> +} >> + >> +static inline void prefetch_init_key(PrefCacheAIOCB *acb, RbNodeKey* key) >> +{ >> + key->num = acb->sector_num; >> + key->size = acb->nb_sectors; >> +} >> + >> +enum { >> + PREFETCH_NEW_NODE = 0, >> + PREFETCH_FULL_UP = 1, >> + PREFETCH_PART_UP = 2 >> +}; >> + >> +static int32_t pcache_prefetch(PrefCacheAIOCB *acb) >> +{ >> + RbNodeKey key; >> + PCNode *node = NULL; >> + >> + prefetch_init_key(acb, &key); >> + if (pcache_node_find_and_create(acb, &key, &node)) { >> + return PREFETCH_NEW_NODE; >> + } >> + >> + /* Node covers the whole request */ >> + if (node->cm.sector_num <= acb->sector_num && >> + node->cm.sector_num + node->cm.nb_sectors >= acb->sector_num + >> + acb->nb_sectors) >> + { >> + return PREFETCH_FULL_UP; >> + } >> + >> + return PREFETCH_PART_UP; >> +} >> + >> static void pcache_aio_cb(void *opaque, int ret) >> { >> PrefCacheAIOCB *acb = opaque; >> @@ -80,6 +243,7 @@ static PrefCacheAIOCB *pcache_aio_get(BlockDriverState *bs, int64_t sector_num, >> { >> PrefCacheAIOCB *acb = qemu_aio_get(&pcache_aiocb_info, bs, cb, opaque); >> >> + acb->s = bs->opaque; >> acb->sector_num = sector_num; >> acb->nb_sectors = nb_sectors; >> acb->qiov = qiov; >> @@ -99,6 +263,8 @@ static BlockAIOCB *pcache_aio_readv(BlockDriverState *bs, >> PrefCacheAIOCB *acb = pcache_aio_get(bs, sector_num, qiov, nb_sectors, cb, >> opaque, QEMU_AIO_READ); >> >> + pcache_prefetch(acb); >> + >> bdrv_aio_readv(bs->file, sector_num, qiov, nb_sectors, >> pcache_aio_cb, acb); >> return &acb->common; >> @@ -119,6 +285,17 @@ static BlockAIOCB *pcache_aio_writev(BlockDriverState *bs, >> return &acb->common; >> } >> >> +static void pcache_state_init(QemuOpts *opts, BDRVPCacheState *s) >> +{ >> + DPRINTF("pcache configure:\n"); >> + >> + s->pcache.tree.root = RB_ROOT; >> + qemu_co_mutex_init(&s->pcache.tree.lock); >> + QTAILQ_INIT(&s->list.head); >> + qemu_co_mutex_init(&s->list.lock); > > QTAILQ_INIT() doesn't yield. > >> + s->pcache.curr_size = 0; >> +} >> + > > Kevin >