From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:59704) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1Upfp0-0007Wy-B5 for qemu-devel@nongnu.org; Thu, 20 Jun 2013 10:26:55 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1Upfon-0000Z2-0T for qemu-devel@nongnu.org; Thu, 20 Jun 2013 10:26:46 -0400 Received: from nodalink.pck.nerim.net ([62.212.105.220]:37542 helo=paradis.irqsave.net) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1Upfol-0000YJ-Hr for qemu-devel@nongnu.org; Thu, 20 Jun 2013 10:26:32 -0400 From: =?UTF-8?q?Beno=C3=AEt=20Canet?= Date: Thu, 20 Jun 2013 16:26:12 +0200 Message-Id: <1371738392-9594-5-git-send-email-benoit@irqsave.net> In-Reply-To: <1371738392-9594-1-git-send-email-benoit@irqsave.net> References: <1371738392-9594-1-git-send-email-benoit@irqsave.net> MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: quoted-printable Subject: [Qemu-devel] =?utf-8?q?=5BRFC_V8_04/24=5D_qcow2=3A_Create_the_log?= =?utf-8?q?_store=2E?= List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: qemu-devel@nongnu.org Cc: kwolf@redhat.com, =?UTF-8?q?Beno=C3=AEt=20Canet?= , stefanha@redhat.com The log store is an in RAM hash table used to index QCowJournalEntries. Combining the log store with a journal allows to get the benefits of the = journal (fewer random write and minimization of SSD wear out) with the advantages= of a hash table (being good at random access). Once the journal is full and not packable or if the log store hash table = is full the journal is frozen and converted into an on disk hash table used by th= e hash store. Signed-off-by: Benoit Canet --- block/Makefile.objs | 2 +- block/qcow2-log-store.c | 1039 +++++++++++++++++++++++++++++++++++++++++= ++++++ block/qcow2.h | 26 ++ 3 files changed, 1066 insertions(+), 1 deletion(-) create mode 100644 block/qcow2-log-store.c diff --git a/block/Makefile.objs b/block/Makefile.objs index ee894b5..8c0b646 100644 --- a/block/Makefile.objs +++ b/block/Makefile.objs @@ -1,6 +1,6 @@ block-obj-y +=3D raw.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o v= pc.o vvfat.o block-obj-y +=3D qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot= .o qcow2-cache.o -block-obj-y +=3D qcow2-journal.o +block-obj-y +=3D qcow2-journal.o qcow2-log-store.o block-obj-y +=3D qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluste= r.o block-obj-y +=3D qed-check.o block-obj-y +=3D vhdx.o diff --git a/block/qcow2-log-store.c b/block/qcow2-log-store.c new file mode 100644 index 0000000..24ae310 --- /dev/null +++ b/block/qcow2-log-store.c @@ -0,0 +1,1039 @@ +/* + * QCOW2 log store + * + * Copyright (C) Nodalink, SARL. 2013 + * + * Author: + * Beno=C3=AEt Canet + * + * Permission is hereby granted, free of charge, to any person obtaining= a copy + * of this software and associated documentation files (the "Software"),= to deal + * in the Software without restriction, including without limitation the= rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or = sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be includ= ed in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRE= SS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILI= TY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHA= LL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR = OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISI= NG FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALING= S IN + * THE SOFTWARE. + */ + +#include "qemu-common.h" +#include "block/block_int.h" +#include "block/qcow2.h" + +static uint64_t qcow2_dedup_get_h(QCowHashInfo *hash_info, int index, + uint32_t order) +{ + uint64_t *array =3D (uint64_t *) &hash_info->hash.data; + uint64_t result =3D array[index]; + return result & ((1 << order) - 1); +} + +uint64_t qcow2_dedup_h1(QCowHashInfo *hash_info, uint32_t order) +{ + return qcow2_dedup_get_h(hash_info, 0, order); +} + +uint64_t qcow2_dedup_h2(QCowHashInfo *hash_info, uint32_t order) +{ + return qcow2_dedup_get_h(hash_info, 1, order); +} + +static bool qcow2_dedup_h_equals(QCowHashInfo *hash_info, uint32_t order= ) +{ + return qcow2_dedup_h1(hash_info, order) =3D=3D + qcow2_dedup_h2(hash_info, order); +} + +static void qcow2_log_store_reset_kick_map(QCowLogStore *store) +{ + uint64_t i; + for (i =3D 0; i < qcow2_log_store_nb_entries(store->order); i++) { + store->kick_map[i] =3D false; + } +} + +static bool qcow2_log_store_is_in_kick_path(QCowLogStore *store, + uint64_t in_table_idx) +{ + return store->kick_map[in_table_idx]; +} + +static void qcow2_log_store_set_in_kick_path(QCowLogStore *store, + uint64_t in_table_idx) +{ + store->kick_map[in_table_idx] =3D true; +} + +static void qcow2_log_store_unset_from_kick_path(QCowLogStore *store, + uint64_t in_table_idx) +{ + store->kick_map[in_table_idx] =3D false; +} + +static void qcow2_log_store_set_perm_element(int *perm, int value, int r= nd) +{ + for (;perm[rnd % QCOW_LOG_STORE_BUCKET_SIZE] !=3D -1; rnd++); + perm[rnd % QCOW_LOG_STORE_BUCKET_SIZE] =3D value; +} + +/* This function create a random permutation of the set + * [0, QCOW_LOG_STORE_BUCKET_SIZE] + */ +static void qcow2_log_store_make_perm(int *perm) +{ + int i, rnd; + + /* reset */ + for (i =3D 0; i < QCOW_LOG_STORE_BUCKET_SIZE; i++) { + perm[i] =3D -1; + } + + /* create permutation */ + for (i =3D 0; i < QCOW_LOG_STORE_BUCKET_SIZE; i++) { + rnd =3D rand(); + qcow2_log_store_set_perm_element(perm, i, rnd); + } +} + +/* This function is used to make sure we don't create path cycle while k= icking + * + * The function uses a random permutation to compute the kick idx. + * This way two call to the function should not give the same results. + * Randomization increase hash table usage by 13% giving a 93% filling r= ate. + * + * @store: the store to work with + * @bucket_idx: the index of the bucket where we are trying to get an en= try to + * push + * @ret: [0, QCOW_LOG_STORE_BUCKET_SIZE] on succes else -2 + */ +static int qcow2_log_store_get_kick_idx(QCowLogStore *store, + uint64_t bucket_idx) +{ + QCowHashInfo *entry; + uint64_t in_table_idx; + int perm[QCOW_LOG_STORE_BUCKET_SIZE]; + int i; + + /* create a pseudo random permutation */ + qcow2_log_store_make_perm(&perm[0]); + + /* use the permutation to get entry to kick */ + for (i =3D 0; i < QCOW_LOG_STORE_BUCKET_SIZE; i++) { + in_table_idx =3D bucket_idx * QCOW_LOG_STORE_BUCKET_SIZE + perm[= i]; + entry =3D &store->hash_table[in_table_idx]; + + /* we take care of not selecting an entry already moved or an en= try + * which as no alternate location + */ + if (!qcow2_log_store_is_in_kick_path(store, in_table_idx) && + !qcow2_dedup_h_equals(entry, store->order)) { + qcow2_log_store_set_in_kick_path(store, in_table_idx); + return perm[i]; + } + } + + return -2; +} + + +uint64_t qcow2_log_store_nb_entries(uint32_t order) +{ + /* entries are grouped in QCOW_LOG_STORE_BUCKET_SIZE sized buckets *= / + return (1 << order) * QCOW_LOG_STORE_BUCKET_SIZE; +} + +static uint64_t qcow2_log_store_journal_size(uint32_t order) +{ + /* size of a QCowLog hash entry */ + uint32_t qcow_log_hash_size =3D sizeof(QCowHashInfo) + + QCOW_LOG_END_SIZE; + /* leave some extra room for insert/delete cycle and other QCOW2 jou= rnal + * insertion + */ + return qcow2_journal_round_to_erase_blocks(qcow2_log_store_nb_entrie= s(order) + * qcow_log_hash_size + * QCOW_LOG_STORE_JOURNAL_= RATIO); +} + +/* this function reset a log store state + * + * @store: the log store to reset + * @order: the bit order + */ +static void qcow2_log_store_reset(QCowLogStore *store, uint32_t order) +{ + store->order =3D order; + store->nb_kicks =3D 0; + qcow2_log_store_reset_kick_map(store); + memset(store->hash_table, 0, + qcow2_log_store_nb_entries(order) * sizeof(QCowHashInfo)); + memset(store->write_buf, 0, HASH_STORE_CLUSTER_SIZE); + store->write_buf_offset =3D 0; + store->in_buf_offset =3D 0; +} + +/* This function create a new log store + * + * The function does allocate journal disk space + * + * @store: the log store to work on + * @order: the bit order + * @ret: 0 on success, -errno on error + */ +int qcow2_log_store_create(BlockDriverState *bs, + QCowLogStore *store, + uint32_t order) +{ + qcow2_log_store_reset(store, order); + return qcow2_journal_disk_allocate(bs, + &store->journal, + qcow2_log_store_journal_size(store->order)); +} + + +int qcow2_log_store_recycle(BlockDriverState *bs, + QCowLogStore *store) +{ + qcow2_log_store_reset(store, store->order); + return qcow2_journal_recycle(bs, &store->journal); +} + +/* This function initialize a log store + * + * @store: the log store to initialize + */ +void qcow2_log_store_init(BlockDriverState *bs, + QCowLogStore *store, + uint32_t order) +{ + store->order =3D order; + qcow2_journal_init(bs, &store->journal); + store->hash_table =3D g_new0(QCowHashInfo, + qcow2_log_store_nb_entries(order)); + store->kick_map =3D g_new0(bool, qcow2_log_store_nb_entries(order)); + store->write_buf =3D qemu_blockalign(bs, HASH_STORE_CLUSTER_SIZE); +} + +void qcow2_log_store_cleanup(QCowLogStore *store) +{ + qemu_vfree(store->write_buf); + g_free(store->hash_table); + g_free(store->kick_map); + qcow2_journal_cleanup(&store->journal); +} + +static bool qcow2_log_store_is_used_by_table_idx(QCowHashInfo *table, + uint64_t in_table_idx) +{ + QCowHashInfo *entry; + entry =3D &table[in_table_idx]; + return entry->first_logical_sect & QCOW_LOG_STORE_ENTRY_USED; +} + +static void qcow2_log_store_copy_to_hash_table(QCowHashInfo *table, + uint64_t in_table_idx, + QCowHashInfo *hash_info) +{ + QCowHashInfo *entry; + + entry =3D &table[in_table_idx]; + memcpy(entry, hash_info, sizeof(QCowHashInfo)); + entry->first_logical_sect |=3D QCOW_LOG_STORE_ENTRY_USED; +} + +static void qcow2_log_store_copy_from_hash_table(QCowHashInfo *hash_info= , + QCowHashInfo *table, + uint64_t in_table_idx) +{ + QCowHashInfo *entry; + + entry =3D &table[in_table_idx]; + memcpy(hash_info, entry, sizeof(QCowHashInfo)); + hash_info->first_logical_sect &=3D ~QCOW_LOG_STORE_ENTRY_USED; +} + +static uint64_t qcow2_log_store_tag_by_bucket_idx(QCowLogStore *store, + QCowHashInfo *hash_inf= o, + uint64_t bucket_idx) +{ + uint64_t h[2]; + + h[0] =3D qcow2_dedup_h1(hash_info, store->order); + h[1] =3D qcow2_dedup_h2(hash_info, store->order); + + if (bucket_idx =3D=3D h[0]) { + return h[1]; + } + + if (bucket_idx =3D=3D h[1]) { + return h[0]; + } + + /* should not pass here */ + assert(false); + return 0; +} + +/* This function calculate the order (number of bit) required + * + * @min_nb_entries_required: minimal number of entry the log store must = handle + * @ret: number of bit (order) of the sub hashes fun= ctions + */ +int qcow2_log_store_compute_order(uint64_t min_nb_entries_required) +{ + uint32_t result =3D 1; + uint64_t min =3D min_nb_entries_required / QCOW_LOG_STORE_BUCKET_SIZ= E; + while (min) { + min >>=3D 1; + result++; + } + return result; +} + +/* This function get a hash info from the table given it's table index + * + * It get the result only if the given hash info as the same hash as the= one + * stored in the table + * + * @store: the store to work on + * @hash_info: the QCowHashInfo we are working with (will be overwrit= ten) + * @in_table_idx: the index of the entry in the cuckoo hash table + * @ret: true if hash match else false + */ +static int qcow2_log_store_try_get_hash_info(QCowLogStore *store, + QCowHashInfo *hash_info, + uint64_t in_table_idx) +{ + bool match; + QCowHashInfo *entry; + + entry =3D &store->hash_table[in_table_idx]; + + match =3D !memcmp(&entry->hash.data, + &hash_info->hash.data, + HASH_LENGTH); + + if (match) { + memcpy(hash_info, entry, sizeof(QCowHashInfo)); + hash_info->first_logical_sect &=3D ~QCOW_LOG_STORE_ENTRY_USED; + } + + return match; +} + +/* This functions try to get a QCowHashInfo the store hash_table + * + * @store: the store to work on + * @hash_info: the QCowHashInfo we are indexing + * @bucket_idx: the bucket index + * @index_only: if true only return the in bucket index and do not over= writte + * the QCowHashInfo parameter. + * This is used to check if we can overwrite a given ent= ry in + * the bucket. + * @ret: integer in range [0, (QCOW_LOG_STORE_BUCKET_SIZE - 1)] = or + * -1 if not found + */ +static int qcow2_log_store_try_get(QCowLogStore *store, + QCowHashInfo *hash_info, + uint64_t bucket_idx, + bool index_only) +{ + int i; + bool got =3D false; + uint64_t in_table_idx; + QCowHashInfo dummy; + + /* if index_only is true we want to work on a dummy QCowHashInfo cop= y */ + memcpy(&dummy, hash_info, sizeof(QCowHashInfo)); + + /* for each bucket entry */ + for (i =3D 0; i < QCOW_LOG_STORE_BUCKET_SIZE; i++) { + in_table_idx =3D bucket_idx * QCOW_LOG_STORE_BUCKET_SIZE + i; + + got =3D qcow2_log_store_try_get_hash_info(store, + index_only ? &dummy :has= h_info, + in_table_idx); + + /* the hash info is indexed by this entry */ + if (got) { + return i; + } + } + + /* no entry to overwrite -> not found */ + return -1; +} + +/* This function look for a given hash info in a log store + * + * @store: The QCowLogStore to lookup into + * @hash_info: The QCowHashInfo to complete : at last the hash must be f= illed. + * @ret: true on successfull lookup, false if not found + */ +bool qcow2_log_store_get(QCowLogStore *store, QCowHashInfo *hash_info) +{ + uint64_t h[2], bucket_idx; + int ret =3D 0; + int i; + + h[0] =3D qcow2_dedup_h1(hash_info, store->order); + h[1] =3D qcow2_dedup_h2(hash_info, store->order); + + /* try to get the hash info at the two alternate location */ + for (i =3D 0; i < 2; i++) { + bucket_idx =3D h[i]; + + ret =3D qcow2_log_store_try_get(store, hash_info, bucket_idx, fa= lse); + + /* on success (in range [0, (QCOW_LOG_STORE_BUCKET_SIZE - 1)]) *= / + if (ret >=3D 0) { + return true; + } + } + + /* not found */ + return false; +} + +/* This function try to append an entry in a new journal given it's tabl= e index + * + * The nop operation (no append to do) is considered as a success + * + * @table: the hash table to work on + * @dest: the new journal to insert the entry into + * @in_table_idx: the index in the hash table of the entry to append + * @ret: 0 on success, -1 if the new journal is full, + * -EFBIG on bogus entry offset, -errno on error + */ +static int qcow2_log_store_migrate_journal_entry_by_index(BlockDriverSta= te *bs, + QCowJournal *dest= , + QCowHashInfo *tab= le, + uint64_t in_table= _idx) +{ + QCowJournalEntry journal_entry; + QCowHashInfo hash_info; + int64_t offset; + + /* return success on nop (the entry is not used) */ + if (!qcow2_log_store_is_used_by_table_idx(table, in_table_idx)) { + return 0; + } + + qcow2_log_store_copy_from_hash_table(&hash_info, + table, + in_table_idx); + + qcow2_journal_entry_from_hash_info(&journal_entry, &hash_info); + + /* get the new offset by writing the journal entry in the new journa= l */ + offset =3D qcow2_journal_append(bs, dest, &journal_entry); + + /* error or full */ + if (offset < 0) { + return offset; + } + + return 0; +} + +static void qcow2_log_store_copy_hash_table(QCowLogStore *store) +{ + store->hash_table_copy =3D g_new0(QCowHashInfo, + qcow2_log_store_nb_entries(store->or= der)); + memcpy(store->hash_table_copy, store->hash_table, + qcow2_log_store_nb_entries(store->order) * sizeof(QCowHashInf= o)); +} + +/* this function do the real work of packing the store journal into a ne= w one + * + * The function take care of flushing the new journal + * + * @store: the store to work on + * @new_journal: the destination journal + * @ret: 0 on success, -1 if the new journal is full, + * -EFBIG on bogus entry offset, -errno on error + */ +static int qcow2_log_store_do_pack_journal(BlockDriverState *bs, + QCowLogStore *store, + QCowJournal *dest) +{ + uint64_t i; + int ret =3D 0; + + /* we will insert each used QCowLogStore entry into the new journal = */ + for (i =3D 0; i < qcow2_log_store_nb_entries(store->order); i++) { + ret =3D qcow2_log_store_migrate_journal_entry_by_index(bs, + dest, + store->hash_tab= le_copy, + i); + if (ret < 0) { + return ret; + } + } + + return qcow2_journal_flush(bs, dest); +} + +static void qcow2_log_store_commit_new_journal(BlockDriverState *bs, + QCowLogStore *store, + QCowJournal *new_journal) +{ + /* cleanup old data */ + qcow2_journal_disk_deallocate(bs, &store->journal); + g_free(store->hash_table); + /* commit new journal and hash table */ + memcpy(&store->journal, new_journal, sizeof(new_journal));; + store->hash_table =3D store->hash_table_copy; + store->hash_table_copy =3D NULL; +} + +/* This function pack a log store journal into a new one + * + * This is used to manage pathological cyclic insertion/deletion cases o= r the + * case where a log store journal is full due to too much entries insert= ed by + * other QCOW2 users. + * As this event should be rare we don't bother recycling the journal + * disk space. As a consequence the QCOW2 file may grow. + * + * @store: the log store from which the journal must be packed + * @need_save: will be set to true if new journal config must be saved l= ater + by the upper layers + * @ret: 0 on success, -1 if the new journal is full, + * -EFBIG on bogus entry offset, -errno on error + */ +static int qcow2_log_store_pack_journal(BlockDriverState *bs, + QCowLogStore *store, + bool *need_save) +{ + QCowJournal new_journal; + int ret =3D 0; + /* we won't need to save journal config on failure */ + *need_save =3D false; + + /* TODO: insert journal replay here if other part of QCOW make use o= f it */ + + /* we flush the journal first */ + ret =3D qcow2_journal_flush(bs, &store->journal); + + if (ret < 0) { + return ret; + } + + /* we allocate a new journal, we will copy used entry into */ + qcow2_journal_init(bs, &new_journal); + ret =3D qcow2_journal_disk_allocate(bs, + &new_journal, + qcow2_log_store_journal_size(store->order)); + + if (ret < 0) { + return ret; + } + + /* copy the hash table to be able to abort packing at any time */ + qcow2_log_store_copy_hash_table(store); + + ret =3D qcow2_log_store_do_pack_journal(bs, + store, + &new_journal); + + if (ret < 0) { + goto free_dealloc_error_exit; + } + + qcow2_log_store_commit_new_journal(bs, store, &new_journal); + + /* tell the upper layer to save the new journal config to disk */ + *need_save =3D true; + + return 0; + +free_dealloc_error_exit: + g_free(store->hash_table_copy); + qcow2_journal_disk_deallocate(bs, &new_journal); + qcow2_journal_cleanup(&new_journal); + return ret; +} + +/* This function append a QCowHashInfo to the journal + * + * The function will try to pack the journal and redo the insert if the = journal + * is full + * + * store: the QCowLogStore to work with + * hash_info: the QCowHashInfo to insert into the store + * @need_save: will be set to true if new journal info must be saved lat= er + * ret: the offset of the QCowJournalEntry on success, -1 if a jo= urnal + * packing failed or journal is full and unpackable, + -EFBIG on bogus entry offset while packing, + * -errno on error + */ +static int64_t qcow2_log_store_append_to_journal(BlockDriverState *bs, + QCowLogStore *store, + QCowHashInfo *hash_info= , + bool *need_save) +{ + QCowJournalEntry journal_entry; + int ret =3D 0; + int64_t offset; + *need_save =3D false; + + qcow2_journal_entry_from_hash_info(&journal_entry, hash_info); + + offset =3D qcow2_journal_append(bs, &store->journal, &journal_entry)= ; + + /* the journal is full : maybe a pathological cyclic insertion/delet= ion + * pattern made it full. + * We will create a new one and repack the current journal into it. + */ + if (offset =3D=3D -1) { + ret =3D qcow2_log_store_pack_journal(bs, store, need_save); + + if (ret < 0) { + return ret; + } + + offset =3D qcow2_journal_append(bs, &store->journal, &journal_en= try); + } + + return offset; +} + +/* This function find the first free entry index + * + * @store: the store to work on + * @bucket_idx: the bucket to scan index + * @ret: integer in range [0, (QCOW_LOG_STORE_BUCKET_SIZE - 1)] + * or -1 if no room found + */ +static int qcow2_log_store_first_free_entry_idx(QCowLogStore *store, + uint64_t bucket_idx) +{ + uint64_t in_table_idx; + bool used; + int i; + + /* for each entry of the bucket */ + for (i =3D 0; i < QCOW_LOG_STORE_BUCKET_SIZE; i++) { + in_table_idx =3D bucket_idx * QCOW_LOG_STORE_BUCKET_SIZE + i; + + /* return index of the first free entry */ + used =3D qcow2_log_store_is_used_by_table_idx(store->hash_table, + in_table_idx); + + if (!used) { + return i; + } + } + + /* no free entry found -> place not found */ + return -1; +} + +/* This function recursively kick an entry to it's alternate location + * + * @store: the QCowLogStore to work on + * @entry: the entry to kick + * @entry_bucket_idx: the index of the entry to kick (in bucket) + * @ret: 0 on success, -1 if the hash table is full, -2 on = kick + * path loop + */ +static int qcow2_log_store_kick_entry(QCowLogStore *store, + QCowHashInfo *entry, + uint64_t entry_bucket_idx) +{ + QCowHashInfo *dest_entry; + int ret =3D 0, in_bucket_idx; + uint64_t in_table_idx, tag; + + /* is hash table full */ + if (store->nb_kicks =3D=3D (QCOW_LOG_STORE_MAX_KICKS - 1)) { + return -1; + } + + store->nb_kicks++; + + tag =3D qcow2_log_store_tag_by_bucket_idx(store, entry, entry_bucket= _idx); + + /* look if we can insert the kicked entry in the alternate location = */ + in_bucket_idx =3D qcow2_log_store_first_free_entry_idx(store, tag); + + /* no room found in the alternate location we must kick again an ent= ry */ + if (in_bucket_idx =3D=3D -1) { + in_bucket_idx =3D qcow2_log_store_get_kick_idx(store, tag); + + /* the kick path is doing a loop */ + if (in_bucket_idx =3D=3D -2) { + return -2; + } + + in_table_idx =3D tag * QCOW_LOG_STORE_BUCKET_SIZE + in_bucket_id= x; + dest_entry =3D &store->hash_table[in_table_idx]; + + /* kick the random entry */ + ret =3D qcow2_log_store_kick_entry(store, + dest_entry, + tag); + + /* if table is full or kick path make a loop: propagate to the c= aller */ + if (ret < 0) { + qcow2_log_store_unset_from_kick_path(store, in_table_idx); + return ret; + } + } + + /* free room found, compute the index in the hash table */ + in_table_idx =3D tag * QCOW_LOG_STORE_BUCKET_SIZE + in_bucket_idx; + /* the old bucket index become the alternate location (tag) */ + qcow2_log_store_copy_to_hash_table(store->hash_table, + in_table_idx, + entry); + memset(entry, 0, sizeof(QCowHashInfo)); + qcow2_log_store_unset_from_kick_path(store, in_table_idx); + return 0; +} + +static int64_t qcow2_log_store_find_overwrite_idx(QCowLogStore *store, + QCowHashInfo *hash_inf= o) +{ + uint64_t h[2]; + int i, idx; + + h[0] =3D qcow2_dedup_h1(hash_info, store->order); + h[1] =3D qcow2_dedup_h2(hash_info, store->order); + + for (i =3D 0; i < 2; i++) { + idx =3D qcow2_log_store_try_get(store, + hash_info, + h[i], /* index */ + true); + + /* no suitable place found -> continue */ + if (idx =3D=3D -1) { + continue; + } + + return h[i] * QCOW_LOG_STORE_BUCKET_SIZE + idx; + } + + return -1; +} + +static int64_t qcow2_log_store_find_any_free_idx(QCowLogStore *store, + QCowHashInfo *hash_info= ) +{ + uint64_t h[2]; + int i, idx; + + h[0] =3D qcow2_dedup_h1(hash_info, store->order); + h[1] =3D qcow2_dedup_h2(hash_info, store->order); + + /* try to find a free entry in either alternate location */ + for (i =3D 0; i < 2; i++) { + idx =3D qcow2_log_store_first_free_entry_idx(store, h[i]); + + /* no suitable place found -> continue */ + if (idx =3D=3D -1) { + continue; + } + + return h[i] * QCOW_LOG_STORE_BUCKET_SIZE + idx; + } + + /* no suitable place found */ + return -1; +} + +/* this function scan for the two alternative buckets of a given QCowHas= hInfo + * and return entry index if a suitable place is found + * + * @store: the QCowLogStore to work on + * @hash_info: the QCowHashInfo we are trying to index + * @ret: integer in range [0, (QCOW_LOG_STORE_BUCKET_SIZE - 1)] o= r + * -1 if room not found + */ +static int64_t qcow2_log_store_scan_canditates_buckets(QCowLogStore *sto= re, + QCowHashInfo *has= h_info) +{ + int ret =3D 0; + + /* we try to overwrite in the two alternate location first because + * a kick may have swapped entries and we don't want duplicates. + */ + ret =3D qcow2_log_store_find_overwrite_idx(store, hash_info); + + /* place found return it */ + if (ret !=3D -1) { + return ret; + } + + /* else try to find any free entry in the two alternate location */ + return qcow2_log_store_find_any_free_idx(store, hash_info); +} + +/* This function will return the index of the hash table entry to copy i= nto + * + * It's a two step process : first the two alternate buckets are checked= . + * If they are not suitable the first entry of the first bucket is k= icked to + * it's alternate location. This process is recursive up to + * QCOW_LOG_STORE_MAX_KICKS times. + * + * @store: the QCowLogStore to work on + * @hash_info: the QCowHashInfo we are trying to index + * @ret: the index (in the hash table) or -1 if the hash table is = full + */ +static int64_t qcow2_log_store_get_in_table_idx(QCowLogStore *store, + QCowHashInfo *hash_info) +{ + QCowHashInfo *dest_entry; + int ret =3D 0, kick_retry; + uint64_t h[2]; + int64_t in_table_idx; + int in_bucket_idx; + + /* first check if any of the two candidate buckets has some room to = do an + * insertion. If so return the index + */ + in_table_idx =3D qcow2_log_store_scan_canditates_buckets(store, hash= _info); + + /* room available */ + if (in_table_idx >=3D 0) { + return in_table_idx; + } + + h[0] =3D qcow2_dedup_h1(hash_info, store->order); + h[1] =3D qcow2_dedup_h2(hash_info, store->order); + + /* we try 10 time to kick in order to try differents kick path and + * avoid kick path loops + */ + kick_retry =3D 10; + do { + store->nb_kicks =3D 0; + /* clear previous kick path */ + qcow2_log_store_unset_from_kick_path(store, in_table_idx); + + /* take the first not yet pushed entry of the h[0] bucket */ + in_bucket_idx =3D qcow2_log_store_get_kick_idx(store, h[0]); + + /* kick path cycle found */ + if (in_bucket_idx =3D=3D -2) { + in_table_idx =3D 0; + continue; + } + + in_table_idx =3D h[0] * QCOW_LOG_STORE_BUCKET_SIZE + in_bucket_i= dx; + + dest_entry =3D &store->hash_table[in_table_idx]; + + ret =3D qcow2_log_store_kick_entry(store, dest_entry, h[0]); + } while (ret =3D=3D -2 && kick_retry); + + /* hash table is full, or kick path was cycling multiple time */ + if (ret < 0) { + return -1; + } + + qcow2_log_store_unset_from_kick_path(store, in_table_idx); + return in_table_idx; +} + +/* This function insert a given QCowHashInfo in the log store + * + * This function will overwrite an already inserted entry + * + * @store: the QCowLogStore to insert into + * @hash_info: the QCowHashInfo to insert : at last the hash must be fil= led + * @need_save: will be set to true if new journal info must be saved lat= er + * @ret: 0 on success, 1 if the log store is full, -errno on error + */ +int qcow2_log_store_insert(BlockDriverState *bs, QCowLogStore *store, + QCowHashInfo *hash_info, bool *need_save) +{ + int64_t in_table_idx, offset; + + *need_save =3D false; + + /* find the place where to insert the entry in the cuckoo hash table= */ + in_table_idx =3D qcow2_log_store_get_in_table_idx(store, hash_info); + + /* hash table is full so log store is full -> need to freeze log sto= re */ + if (in_table_idx =3D=3D -1) { + return 1; + } + + offset =3D qcow2_log_store_append_to_journal(bs, store, hash_info, n= eed_save); + + /* the journal is full and packing failed so we should treat the log + * store as full + */ + if (offset =3D=3D -1) { + return 1; + } + + /* journal appending failed */ + if (offset < 0) { + return offset; + } + + qcow2_log_store_copy_to_hash_table(store->hash_table, + in_table_idx, + hash_info); + + return 0; +} + +/* This function delegate a flush to the QCowJournal of the log store + * + *=C2=A0@store: the QCowLogStore to work with + * @ret: 0 on success, -errno on error + */ +int qcow2_log_store_flush(BlockDriverState *bs, QCowLogStore *store) +{ + return qcow2_journal_flush(bs, &store->journal); +} + +/* This function flush and stop the log store journal + * + * @store: the QCowLogStore to work with + * @ret: 0 on success, -errno on error + */ +int qcow2_log_store_stop(BlockDriverState *bs, QCowLogStore *store) +{ + return qcow2_journal_stop(bs, &store->journal); +} + +/* This function index a QCowJournalEntry into the cuckoo hash table=20 + * + * @store: the QCowLogStore to work on + * @entry: the journal entry to convert validate and insert + * @offset: the offset of the entry in the journal + * @ret: 0 on success, -EINVAL on bogus journal entry + */ +static int qcow2_log_store_index_journal_entry(BlockDriverState *bs, + QCowLogStore *store, + QCowJournalEntry *entry, + uint64_t offset) +{ + QCowHashInfo hash_info; + int64_t in_table_idx; + int ret =3D 0; + + ret =3D qcow2_hash_info_from_journal_entry(&hash_info, entry); + + /* bogus entry size or invalid type */ + if (ret < 0) { + return ret; + } + + /* find the place where to insert the entry in the the cuckoo hash t= able */ + in_table_idx =3D qcow2_log_store_get_in_table_idx(store, &hash_info)= ; + + /* the cuckoo hash table should not be full when rebuilding */ + assert(in_table_idx >=3D 0); + + /* we store the offset and the tag in the in ram hash table */ + qcow2_log_store_copy_to_hash_table(store->hash_table, + in_table_idx, + &hash_info); + + return 0; +} + +/* This function rebuild the log store from its journal + * + * @store: the QCowLogStore to rebuild + * @ret: 0 on succes, -errno on error, -EINVAL on bogus journal entry + */ +int qcow2_log_store_rebuild_from_journal(BlockDriverState *bs, + QCowLogStore *store) +{ + QCowJournalEntry entry; + uint64_t offset =3D 0; + int ret =3D 0, size =3D 0; + + /* we walk through the journal (qcow2_journal_read is an iterator) *= / + size =3D qcow2_journal_read(bs, &store->journal, offset, &entry); + /* we stop on obviously bogus sized entry and end of journal */ + while (size >=3D QCOW_LOG_END_SIZE && entry.type !=3D QCOW_LOG_NONE)= { + /* it's a hash insert it */ + if (entry.type =3D=3D QCOW_LOG_HASH) { + ret =3D qcow2_log_store_index_journal_entry(bs, + store, + &entry, + offset); + + } + /* bogus entry -> return error */ + if (ret < 0) { + return ret; + } + /* read next journal entry */ + offset +=3D size; + size =3D qcow2_journal_read(bs, &store->journal, offset, &entry)= ; + } + + /* bogus entry size */ + if (size >=3D 0 && + size < QCOW_LOG_END_SIZE && + entry.type !=3D QCOW_LOG_NONE) { + return -EINVAL; + } + + /* on error */ + if (size < 0) { + return size; + } + + /* now inform the journal that it must append to existing data */ + return qcow2_journal_resume(bs, &store->journal, offset); +} + +/* the on disk size of a log store dump */ +size_t qcow2_log_store_dump_size(void) +{ + /* sizeof order + sizeof journal */ + return sizeof(uint32_t) + qcow2_journal_dump_size(); +} + +/* This function dump a log store infos into buffer + * + * @buf: the buffer to dump into + * @store: the log store to dump + * @ret: the size of the dump + */ +size_t qcow2_log_store_dump(uint8_t *buf, QCowLogStore *store) +{ + uint32_t *buf32 =3D (uint32_t *) buf; + + buf32[0] =3D cpu_to_be32(store->order); + + return sizeof(store->order) + + qcow2_journal_dump(buf + sizeof(store->order), + &store->journal); +} + +/* this function parse a log store from disk + * + * @store: the store to parse into + * @buf: the buffer to parse from + * @ret: the size of the parsed data + */ +size_t qcow2_log_store_parse(QCowLogStore *store, uint8_t *buf) +{ + uint32_t *buf32 =3D (uint32_t *) buf; + uint32_t order; + + order =3D be32_to_cpu(buf32[0]); + qcow2_log_store_reset(store, order); + qcow2_journal_parse(&store->journal, buf + sizeof(order)); + return qcow2_log_store_dump_size(); +} diff --git a/block/qcow2.h b/block/qcow2.h index adde631..d347471 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -635,4 +635,30 @@ size_t qcow2_journal_dump_size(void); size_t qcow2_journal_dump(uint8_t *buf, QCowJournal *journal); void qcow2_journal_parse(QCowJournal *journal, uint8_t *buf); =20 +/* qcow2-log-store.c functions */ + +uint64_t qcow2_dedup_h1(QCowHashInfo *hash_info, uint32_t order); +uint64_t qcow2_dedup_h2(QCowHashInfo *hash_info, uint32_t order); +uint64_t qcow2_log_store_nb_entries(uint32_t order); +int qcow2_log_store_create(BlockDriverState *bs, + QCowLogStore *store, + uint32_t order); +int qcow2_log_store_recycle(BlockDriverState *bs, + QCowLogStore *store); +void qcow2_log_store_init(BlockDriverState *bs, + QCowLogStore *store, + uint32_t order); +void qcow2_log_store_cleanup(QCowLogStore *store); +int qcow2_log_store_compute_order(uint64_t min_nb_entries_required); +bool qcow2_log_store_get(QCowLogStore *store, QCowHashInfo *hash_info); +int qcow2_log_store_insert(BlockDriverState *bs, QCowLogStore *store, + QCowHashInfo *hash_info, bool *need_save); +int qcow2_log_store_flush(BlockDriverState *bs, QCowLogStore *store); +int qcow2_log_store_stop(BlockDriverState *bs, QCowLogStore *store); +int qcow2_log_store_rebuild_from_journal(BlockDriverState *bs, + QCowLogStore *store); +size_t qcow2_log_store_dump_size(void); +size_t qcow2_log_store_dump(uint8_t *buf, QCowLogStore *store); +size_t qcow2_log_store_parse(QCowLogStore *store, uint8_t *buf); + #endif --=20 1.7.10.4