All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Daniel P. Berrangé" <berrange@redhat.com>
To: Christian Schoenebeck <qemu_oss@crudebyte.com>
Cc: qemu-devel@nongnu.org,
	Antonios Motakis <antonios.motakis@huawei.com>,
	Greg Kurz <groug@kaod.org>
Subject: Re: [Qemu-devel] [PATCH v3 1/5] 9p: mitigates most QID path collisions
Date: Tue, 7 May 2019 13:42:47 +0100	[thread overview]
Message-ID: <20190507124247.GN27205@redhat.com> (raw)
In-Reply-To: <5b5c005fbf4e31c07273468cb022d25a8907bc87.1557093245.git.qemu_oss@crudebyte.com>

On Tue, Apr 23, 2019 at 01:30:03PM +0200, Christian Schoenebeck via Qemu-devel wrote:
> This first patch here is an updated version of Antonios Motakis'
> original 4-patch set (using fixed length 16 bit prefixes), merged to one
> patch:
> 
> https://lists.gnu.org/archive/html/qemu-devel/2018-02/msg02283.html
> 
> * Updated to latest git master, specifically to new qht interface.
> 
> * Merged the original 4 patches to this single patch.

Why did you merge them all into one ?  The split patches were "best
practice" IMHO. The original patch authorship & S-o-B lines could
be preserved if you kept them split as before too.

> Signed-off-by: Christian Schoenebeck <qemu_oss@crudebyte.com>
> ---
>  fsdev/9p-marshal.h |   4 +-
>  hw/9pfs/9p.c       | 200 ++++++++++++++++++++++++++++++++++++++++++++++++-----
>  hw/9pfs/9p.h       |  21 ++++++
>  3 files changed, 204 insertions(+), 21 deletions(-)
> 
> diff --git a/fsdev/9p-marshal.h b/fsdev/9p-marshal.h
> index c8823d878f..d1ad3645c4 100644
> --- a/fsdev/9p-marshal.h
> +++ b/fsdev/9p-marshal.h
> @@ -10,8 +10,8 @@ typedef struct V9fsString
>  typedef struct V9fsQID
>  {
>      int8_t type;
> -    int32_t version;
> -    int64_t path;
> +    uint32_t version;
> +    uint64_t path;
>  } V9fsQID;
>  
>  typedef struct V9fsStat
> diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c
> index 55821343e5..b9bbdcbaee 100644
> --- a/hw/9pfs/9p.c
> +++ b/hw/9pfs/9p.c
> @@ -25,6 +25,7 @@
>  #include "trace.h"
>  #include "migration/blocker.h"
>  #include "sysemu/qtest.h"
> +#include "qemu/xxhash.h"
>  
>  int open_fd_hw;
>  int total_open_fd;
> @@ -571,14 +572,135 @@ static void coroutine_fn virtfs_reset(V9fsPDU *pdu)
>                                  P9_STAT_MODE_NAMED_PIPE |   \
>                                  P9_STAT_MODE_SOCKET)
>  
> -/* This is the algorithm from ufs in spfs */
> -static void stat_to_qid(const struct stat *stbuf, V9fsQID *qidp)
> +
> +/* creative abuse of qemu_xxhash7, which is based on xxhash */
> +static uint32_t qpp_hash(QppEntry e)
>  {
> -    size_t size;
> +    return qemu_xxhash7(e.ino_prefix, e.dev, 0, 0, 0);
> +}
> +
> +static uint32_t qpf_hash(QpfEntry e)
> +{
> +    return qemu_xxhash7(e.ino, e.dev, 0, 0, 0);
> +}
> +
> +static bool qpp_cmp_func(const void *obj, const void *userp)
> +{
> +    const QppEntry *e1 = obj, *e2 = userp;
> +    return (e1->dev == e2->dev) && (e1->ino_prefix == e2->ino_prefix);
> +}
> +
> +static bool qpf_cmp_func(const void *obj, const void *userp)
> +{
> +    const QpfEntry *e1 = obj, *e2 = userp;
> +    return (e1->dev == e2->dev) && (e1->ino == e2->ino);
> +}
> +
> +static void qp_table_remove(void *p, uint32_t h, void *up)
> +{
> +    g_free(p);
> +}
> +
> +static void qp_table_destroy(struct qht *ht)
> +{
> +    qht_iter(ht, qp_table_remove, NULL);
> +    qht_destroy(ht);
> +}
> +
> +static int qid_path_fullmap(V9fsPDU *pdu, const struct stat *stbuf,
> +                            uint64_t *path)
> +{
> +    QpfEntry lookup = {
> +        .dev = stbuf->st_dev,
> +        .ino = stbuf->st_ino
> +    }, *val;
> +    uint32_t hash = qpf_hash(lookup);
> +
> +    /* most users won't need the fullmap, so init the table lazily */
> +    if (!pdu->s->qpf_table.map) {
> +        qht_init(&pdu->s->qpf_table, qpf_cmp_func, 1 << 16, QHT_MODE_AUTO_RESIZE);
> +    }
> +
> +    val = qht_lookup(&pdu->s->qpf_table, &lookup, hash);
> +
> +    if (!val) {
> +        if (pdu->s->qp_fullpath_next == 0) {
> +            /* no more files can be mapped :'( */
> +            return -ENFILE;
> +        }
> +
> +        val = g_malloc0(sizeof(QppEntry));
> +        if (!val) {
> +            return -ENOMEM;
> +        }
> +        *val = lookup;
> +
> +        /* new unique inode and device combo */
> +        val->path = pdu->s->qp_fullpath_next++;
> +        pdu->s->qp_fullpath_next &= QPATH_INO_MASK;
> +        qht_insert(&pdu->s->qpf_table, val, hash, NULL);
> +    }
> +
> +    *path = val->path;
> +    return 0;
> +}
> +
> +/* stat_to_qid needs to map inode number (64 bits) and device id (32 bits)
> + * to a unique QID path (64 bits). To avoid having to map and keep track
> + * of up to 2^64 objects, we map only the 16 highest bits of the inode plus
> + * the device id to the 16 highest bits of the QID path. The 48 lowest bits
> + * of the QID path equal to the lowest bits of the inode number.
> + *
> + * This takes advantage of the fact that inode number are usually not
> + * random but allocated sequentially, so we have fewer items to keep
> + * track of.
> + */
> +static int qid_path_prefixmap(V9fsPDU *pdu, const struct stat *stbuf,
> +                                uint64_t *path)
> +{
> +    QppEntry lookup = {
> +        .dev = stbuf->st_dev,
> +        .ino_prefix = (uint16_t) (stbuf->st_ino >> 48)
> +    }, *val;
> +    uint32_t hash = qpp_hash(lookup);
> +
> +    val = qht_lookup(&pdu->s->qpp_table, &lookup, hash);
> +
> +    if (!val) {
> +        if (pdu->s->qp_prefix_next == 0) {
> +            /* we ran out of prefixes */
> +            return -ENFILE;
> +        }
> +
> +        val = g_malloc0(sizeof(QppEntry));
> +        if (!val) {
> +            return -ENOMEM;
> +        }
> +        *val = lookup;
> +
> +        /* new unique inode prefix and device combo */
> +        val->qp_prefix = pdu->s->qp_prefix_next++;
> +        qht_insert(&pdu->s->qpp_table, val, hash, NULL);
> +    }
> +
> +    *path = ((uint64_t)val->qp_prefix << 48) | (stbuf->st_ino & QPATH_INO_MASK);
> +    return 0;
> +}
> +
> +static int stat_to_qid(V9fsPDU *pdu, const struct stat *stbuf, V9fsQID *qidp)
> +{
> +    int err;
> +
> +    /* map inode+device to qid path (fast path) */
> +    err = qid_path_prefixmap(pdu, stbuf, &qidp->path);
> +    if (err == -ENFILE) {
> +        /* fast path didn't work, fal back to full map */
> +        err = qid_path_fullmap(pdu, stbuf, &qidp->path);
> +    }
> +    if (err) {
> +        return err;
> +    }
>  
> -    memset(&qidp->path, 0, sizeof(qidp->path));
> -    size = MIN(sizeof(stbuf->st_ino), sizeof(qidp->path));
> -    memcpy(&qidp->path, &stbuf->st_ino, size);
>      qidp->version = stbuf->st_mtime ^ (stbuf->st_size << 8);
>      qidp->type = 0;
>      if (S_ISDIR(stbuf->st_mode)) {
> @@ -587,6 +709,8 @@ static void stat_to_qid(const struct stat *stbuf, V9fsQID *qidp)
>      if (S_ISLNK(stbuf->st_mode)) {
>          qidp->type |= P9_QID_TYPE_SYMLINK;
>      }
> +
> +    return 0;
>  }
>  
>  static int coroutine_fn fid_to_qid(V9fsPDU *pdu, V9fsFidState *fidp,
> @@ -599,7 +723,10 @@ static int coroutine_fn fid_to_qid(V9fsPDU *pdu, V9fsFidState *fidp,
>      if (err < 0) {
>          return err;
>      }
> -    stat_to_qid(&stbuf, qidp);
> +    err = stat_to_qid(pdu, &stbuf, qidp);
> +    if (err < 0) {
> +        return err;
> +    }
>      return 0;
>  }
>  
> @@ -830,7 +957,10 @@ static int coroutine_fn stat_to_v9stat(V9fsPDU *pdu, V9fsPath *path,
>  
>      memset(v9stat, 0, sizeof(*v9stat));
>  
> -    stat_to_qid(stbuf, &v9stat->qid);
> +    err = stat_to_qid(pdu, stbuf, &v9stat->qid);
> +    if (err < 0) {
> +        return err;
> +    }
>      v9stat->mode = stat_to_v9mode(stbuf);
>      v9stat->atime = stbuf->st_atime;
>      v9stat->mtime = stbuf->st_mtime;
> @@ -891,7 +1021,7 @@ static int coroutine_fn stat_to_v9stat(V9fsPDU *pdu, V9fsPath *path,
>  #define P9_STATS_ALL           0x00003fffULL /* Mask for All fields above */
>  
>  
> -static void stat_to_v9stat_dotl(V9fsState *s, const struct stat *stbuf,
> +static int stat_to_v9stat_dotl(V9fsPDU *pdu, const struct stat *stbuf,
>                                  V9fsStatDotl *v9lstat)
>  {
>      memset(v9lstat, 0, sizeof(*v9lstat));
> @@ -913,7 +1043,7 @@ static void stat_to_v9stat_dotl(V9fsState *s, const struct stat *stbuf,
>      /* Currently we only support BASIC fields in stat */
>      v9lstat->st_result_mask = P9_STATS_BASIC;
>  
> -    stat_to_qid(stbuf, &v9lstat->qid);
> +    return stat_to_qid(pdu, stbuf, &v9lstat->qid);
>  }
>  
>  static void print_sg(struct iovec *sg, int cnt)
> @@ -1115,7 +1245,6 @@ static void coroutine_fn v9fs_getattr(void *opaque)
>      uint64_t request_mask;
>      V9fsStatDotl v9stat_dotl;
>      V9fsPDU *pdu = opaque;
> -    V9fsState *s = pdu->s;
>  
>      retval = pdu_unmarshal(pdu, offset, "dq", &fid, &request_mask);
>      if (retval < 0) {
> @@ -1136,7 +1265,10 @@ static void coroutine_fn v9fs_getattr(void *opaque)
>      if (retval < 0) {
>          goto out;
>      }
> -    stat_to_v9stat_dotl(s, &stbuf, &v9stat_dotl);
> +    retval = stat_to_v9stat_dotl(pdu, &stbuf, &v9stat_dotl);
> +    if (retval < 0) {
> +        goto out;
> +    }
>  
>      /*  fill st_gen if requested and supported by underlying fs */
>      if (request_mask & P9_STATS_GEN) {
> @@ -1381,7 +1513,10 @@ static void coroutine_fn v9fs_walk(void *opaque)
>              if (err < 0) {
>                  goto out;
>              }
> -            stat_to_qid(&stbuf, &qid);
> +            err = stat_to_qid(pdu, &stbuf, &qid);
> +            if (err < 0) {
> +                goto out;
> +            }
>              v9fs_path_copy(&dpath, &path);
>          }
>          memcpy(&qids[name_idx], &qid, sizeof(qid));
> @@ -1483,7 +1618,10 @@ static void coroutine_fn v9fs_open(void *opaque)
>      if (err < 0) {
>          goto out;
>      }
> -    stat_to_qid(&stbuf, &qid);
> +    err = stat_to_qid(pdu, &stbuf, &qid);
> +    if (err < 0) {
> +        goto out;
> +    }
>      if (S_ISDIR(stbuf.st_mode)) {
>          err = v9fs_co_opendir(pdu, fidp);
>          if (err < 0) {
> @@ -1593,7 +1731,10 @@ static void coroutine_fn v9fs_lcreate(void *opaque)
>          fidp->flags |= FID_NON_RECLAIMABLE;
>      }
>      iounit =  get_iounit(pdu, &fidp->path);
> -    stat_to_qid(&stbuf, &qid);
> +    err = stat_to_qid(pdu, &stbuf, &qid);
> +    if (err < 0) {
> +        goto out;
> +    }
>      err = pdu_marshal(pdu, offset, "Qd", &qid, iounit);
>      if (err < 0) {
>          goto out;
> @@ -2327,7 +2468,10 @@ static void coroutine_fn v9fs_create(void *opaque)
>          }
>      }
>      iounit = get_iounit(pdu, &fidp->path);
> -    stat_to_qid(&stbuf, &qid);
> +    err = stat_to_qid(pdu, &stbuf, &qid);
> +    if (err < 0) {
> +        goto out;
> +    }
>      err = pdu_marshal(pdu, offset, "Qd", &qid, iounit);
>      if (err < 0) {
>          goto out;
> @@ -2384,7 +2528,10 @@ static void coroutine_fn v9fs_symlink(void *opaque)
>      if (err < 0) {
>          goto out;
>      }
> -    stat_to_qid(&stbuf, &qid);
> +    err = stat_to_qid(pdu, &stbuf, &qid);
> +    if (err < 0) {
> +        goto out;
> +    }
>      err =  pdu_marshal(pdu, offset, "Q", &qid);
>      if (err < 0) {
>          goto out;
> @@ -3064,7 +3211,10 @@ static void coroutine_fn v9fs_mknod(void *opaque)
>      if (err < 0) {
>          goto out;
>      }
> -    stat_to_qid(&stbuf, &qid);
> +    err = stat_to_qid(pdu, &stbuf, &qid);
> +    if (err < 0) {
> +        goto out;
> +    }
>      err = pdu_marshal(pdu, offset, "Q", &qid);
>      if (err < 0) {
>          goto out;
> @@ -3222,7 +3372,10 @@ static void coroutine_fn v9fs_mkdir(void *opaque)
>      if (err < 0) {
>          goto out;
>      }
> -    stat_to_qid(&stbuf, &qid);
> +    err = stat_to_qid(pdu, &stbuf, &qid);
> +    if (err < 0) {
> +        goto out;
> +    }
>      err = pdu_marshal(pdu, offset, "Q", &qid);
>      if (err < 0) {
>          goto out;
> @@ -3633,6 +3786,11 @@ int v9fs_device_realize_common(V9fsState *s, const V9fsTransport *t,
>          goto out;
>      }
>  
> +    /* QID path hash table. 1 entry ought to be enough for anybody ;) */
> +    qht_init(&s->qpp_table, qpp_cmp_func, 1, QHT_MODE_AUTO_RESIZE);
> +    s->qp_prefix_next = 1; /* reserve 0 to detect overflow */
> +    s->qp_fullpath_next = 1;
> +
>      s->ctx.fst = &fse->fst;
>      fsdev_throttle_init(s->ctx.fst);
>  
> @@ -3646,6 +3804,8 @@ out:
>          }
>          g_free(s->tag);
>          g_free(s->ctx.fs_root);
> +        qp_table_destroy(&s->qpp_table);
> +        qp_table_destroy(&s->qpf_table);
>          v9fs_path_free(&path);
>      }
>      return rc;
> @@ -3658,6 +3818,8 @@ void v9fs_device_unrealize_common(V9fsState *s, Error **errp)
>      }
>      fsdev_throttle_cleanup(s->ctx.fst);
>      g_free(s->tag);
> +    qp_table_destroy(&s->qpp_table);
> +    qp_table_destroy(&s->qpf_table);
>      g_free(s->ctx.fs_root);
>  }
>  
> diff --git a/hw/9pfs/9p.h b/hw/9pfs/9p.h
> index 8883761b2c..44112ea97f 100644
> --- a/hw/9pfs/9p.h
> +++ b/hw/9pfs/9p.h
> @@ -8,6 +8,7 @@
>  #include "fsdev/9p-iov-marshal.h"
>  #include "qemu/thread.h"
>  #include "qemu/coroutine.h"
> +#include "qemu/qht.h"
>  
>  enum {
>      P9_TLERROR = 6,
> @@ -235,6 +236,22 @@ struct V9fsFidState
>      V9fsFidState *rclm_lst;
>  };
>  
> +#define QPATH_INO_MASK        (((unsigned long)1 << 48) - 1)
> +
> +/* QID path prefix entry, see stat_to_qid */
> +typedef struct {
> +    dev_t dev;
> +    uint16_t ino_prefix;
> +    uint16_t qp_prefix;
> +} QppEntry;
> +
> +/* QID path full entry, as above */
> +typedef struct {
> +    dev_t dev;
> +    ino_t ino;
> +    uint64_t path;
> +} QpfEntry;
> +
>  struct V9fsState
>  {
>      QLIST_HEAD(, V9fsPDU) free_list;
> @@ -256,6 +273,10 @@ struct V9fsState
>      Error *migration_blocker;
>      V9fsConf fsconf;
>      V9fsQID root_qid;
> +    struct qht qpp_table;
> +    struct qht qpf_table;
> +    uint16_t qp_prefix_next;
> +    uint64_t qp_fullpath_next;
>  };
>  
>  /* 9p2000.L open flags */
> -- 
> 2.11.0
> 
> 

Regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|


  reply	other threads:[~2019-05-07 12:44 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-05-06 14:37 [Qemu-devel] [PATCH v3 0/5] 9p: Fix file ID collisions Christian Schoenebeck via Qemu-devel
2019-04-23 11:30 ` [Qemu-devel] [PATCH v3 1/5] 9p: mitigates most QID path collisions Christian Schoenebeck via Qemu-devel
2019-05-07 12:42   ` Daniel P. Berrangé [this message]
2019-05-07 13:11     ` Christian Schoenebeck via Qemu-devel
2019-05-07 13:13       ` Daniel P. Berrangé
2019-04-23 11:35 ` [Qemu-devel] [PATCH v3 2/5] 9P: trivial cleanup of QID path collision mitigation Christian Schoenebeck via Qemu-devel
2019-05-07 12:43   ` Daniel P. Berrangé
2019-04-23 11:41 ` [Qemu-devel] [PATCH v3 3/5] 9p: persistency of QID path beyond reboots / suspensions Christian Schoenebeck via Qemu-devel
2019-05-03 14:51 ` [Qemu-devel] [PATCH v3 4/5] 9p: use variable length suffixes for inode mapping Christian Schoenebeck via Qemu-devel
2019-05-05 21:41 ` [Qemu-devel] [PATCH v3 5/5] 9p: adds virtfs 'vii' device parameter Christian Schoenebeck via Qemu-devel
2019-05-06 17:58   ` [Qemu-devel] [libvirt patch] qemu: adds support for virtfs 9p argument 'vii' Christian Schoenebeck via Qemu-devel
2019-05-07  9:55     ` Greg Kurz
2019-05-07 12:23       ` Christian Schoenebeck via Qemu-devel
2019-05-07 15:42         ` Greg Kurz
2019-05-07 16:16           ` Christian Schoenebeck via Qemu-devel
2019-05-17  8:40             ` Christian Schoenebeck via Qemu-devel
2019-05-17 12:30               ` Greg Kurz
2019-05-17 13:23                 ` Christian Schoenebeck via Qemu-devel
2019-05-17 14:47                   ` Greg Kurz
2019-05-17 20:53                     ` Christian Schoenebeck via Qemu-devel
2019-05-20 14:05                       ` Greg Kurz
2019-05-22 16:03                         ` Christian Schoenebeck via Qemu-devel
2019-06-03  6:57                           ` Greg Kurz
2019-06-03  9:17                             ` Christian Schoenebeck via Qemu-devel
2019-05-07 12:57     ` Daniel P. Berrangé
2019-05-07 13:48       ` Christian Schoenebeck via Qemu-devel

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190507124247.GN27205@redhat.com \
    --to=berrange@redhat.com \
    --cc=antonios.motakis@huawei.com \
    --cc=groug@kaod.org \
    --cc=qemu-devel@nongnu.org \
    --cc=qemu_oss@crudebyte.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.