[Qemu-devel] [PATCH -V3 1/8] hw/9pfs: Add V9fsfidmap in preparation for adding fd reclaim

qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed

* [Qemu-devel] [PATCH -V3 1/8] hw/9pfs: Add V9fsfidmap in preparation for adding fd reclaim
@ 2011-03-05 17:52 Aneesh Kumar K.V
  2011-03-05 17:52 ` [Qemu-devel] [PATCH -V3 2/8] hw/9pfs: Add file descriptor reclaim support Aneesh Kumar K.V
                   ` (7 more replies)
  0 siblings, 8 replies; 36+ messages in thread
From: Aneesh Kumar K.V @ 2011-03-05 17:52 UTC (permalink / raw)
  To: qemu-devel; +Cc: aliguori, Aneesh Kumar K.V

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 hw/9pfs/virtio-9p.c |  327 ++++++++++++++++++++++++++-------------------------
 hw/9pfs/virtio-9p.h |   22 +++-
 2 files changed, 184 insertions(+), 165 deletions(-)

diff --git a/hw/9pfs/virtio-9p.c b/hw/9pfs/virtio-9p.c
index 27e7750..a9f52c6 100644
--- a/hw/9pfs/virtio-9p.c
+++ b/hw/9pfs/virtio-9p.c
@@ -453,7 +453,7 @@ static V9fsFidState *alloc_fid(V9fsState *s, int32_t fid)
     f = qemu_mallocz(sizeof(V9fsFidState));
 
     f->fid = fid;
-    f->fid_type = P9_FID_NONE;
+    f->fsmap.fid_type = P9_FID_NONE;
 
     f->next = s->fid_list;
     s->fid_list = f;
@@ -465,7 +465,7 @@ static int v9fs_xattr_fid_clunk(V9fsState *s, V9fsFidState *fidp)
 {
     int retval = 0;
 
-    if (fidp->fs.xattr.copied_len == -1) {
+    if (fidp->fsmap.fs.xattr.copied_len == -1) {
         /* getxattr/listxattr fid */
         goto free_value;
     }
@@ -473,24 +473,26 @@ static int v9fs_xattr_fid_clunk(V9fsState *s, V9fsFidState *fidp)
      * if this is fid for setxattr. clunk should
      * result in setxattr localcall
      */
-    if (fidp->fs.xattr.len != fidp->fs.xattr.copied_len) {
+    if (fidp->fsmap.fs.xattr.len != fidp->fsmap.fs.xattr.copied_len) {
         /* clunk after partial write */
         retval = -EINVAL;
         goto free_out;
     }
-    if (fidp->fs.xattr.len) {
-        retval = v9fs_do_lsetxattr(s, &fidp->path, &fidp->fs.xattr.name,
-                                   fidp->fs.xattr.value,
-                                   fidp->fs.xattr.len,
-                                   fidp->fs.xattr.flags);
+    if (fidp->fsmap.fs.xattr.len) {
+        retval = v9fs_do_lsetxattr(s, &fidp->fsmap.path,
+                                   &fidp->fsmap.fs.xattr.name,
+                                   fidp->fsmap.fs.xattr.value,
+                                   fidp->fsmap.fs.xattr.len,
+                                   fidp->fsmap.fs.xattr.flags);
     } else {
-        retval = v9fs_do_lremovexattr(s, &fidp->path, &fidp->fs.xattr.name);
+        retval = v9fs_do_lremovexattr(s, &fidp->fsmap.path,
+                                      &fidp->fsmap.fs.xattr.name);
     }
 free_out:
-    v9fs_string_free(&fidp->fs.xattr.name);
+    v9fs_string_free(&fidp->fsmap.fs.xattr.name);
 free_value:
-    if (fidp->fs.xattr.value) {
-        qemu_free(fidp->fs.xattr.value);
+    if (fidp->fsmap.fs.xattr.value) {
+        qemu_free(fidp->fsmap.fs.xattr.value);
     }
     return retval;
 }
@@ -513,14 +515,14 @@ static int free_fid(V9fsState *s, int32_t fid)
     fidp = *fidpp;
     *fidpp = fidp->next;
 
-    if (fidp->fid_type == P9_FID_FILE) {
-        v9fs_do_close(s, fidp->fs.fd);
-    } else if (fidp->fid_type == P9_FID_DIR) {
-        v9fs_do_closedir(s, fidp->fs.dir);
-    } else if (fidp->fid_type == P9_FID_XATTR) {
+    if (fidp->fsmap.fid_type == P9_FID_FILE) {
+        v9fs_do_close(s, fidp->fsmap.fs.fd);
+    } else if (fidp->fsmap.fid_type == P9_FID_DIR) {
+        v9fs_do_closedir(s, fidp->fsmap.fs.dir);
+    } else if (fidp->fsmap.fid_type == P9_FID_XATTR) {
         retval = v9fs_xattr_fid_clunk(s, fidp);
     }
-    v9fs_string_free(&fidp->path);
+    v9fs_string_free(&fidp->fsmap.path);
     qemu_free(fidp);
 
     return retval;
@@ -573,7 +575,7 @@ static int fid_to_qid(V9fsState *s, V9fsFidState *fidp, V9fsQID *qidp)
     struct stat stbuf;
     int err;
 
-    err = v9fs_do_lstat(s, &fidp->path, &stbuf);
+    err = v9fs_do_lstat(s, &fidp->fsmap.path, &stbuf);
     if (err) {
         return err;
     }
@@ -1218,7 +1220,7 @@ static void v9fs_attach(V9fsState *s, V9fsPDU *pdu)
 
     fidp->uid = n_uname;
 
-    v9fs_string_sprintf(&fidp->path, "%s", "/");
+    v9fs_string_sprintf(&fidp->fsmap.path, "%s", "/");
     err = fid_to_qid(s, fidp, &qid);
     if (err) {
         err = -EINVAL;
@@ -1242,7 +1244,7 @@ static void v9fs_stat_post_lstat(V9fsState *s, V9fsStatState *vs, int err)
         goto out;
     }
 
-    err = stat_to_v9stat(s, &vs->fidp->path, &vs->stbuf, &vs->v9stat);
+    err = stat_to_v9stat(s, &vs->fidp->fsmap.path, &vs->stbuf, &vs->v9stat);
     if (err) {
         goto out;
     }
@@ -1275,7 +1277,7 @@ static void v9fs_stat(V9fsState *s, V9fsPDU *pdu)
         goto out;
     }
 
-    err = v9fs_do_lstat(s, &vs->fidp->path, &vs->stbuf);
+    err = v9fs_do_lstat(s, &vs->fidp->fsmap.path, &vs->stbuf);
     v9fs_stat_post_lstat(s, vs, err);
     return;
 
@@ -1327,7 +1329,7 @@ static void v9fs_getattr(V9fsState *s, V9fsPDU *pdu)
     /* Currently we only support BASIC fields in stat, so there is no
      * need to look at request_mask.
      */
-    err = v9fs_do_lstat(s, &fidp->path, &vs->stbuf);
+    err = v9fs_do_lstat(s, &fidp->fsmap.path, &vs->stbuf);
     v9fs_getattr_post_lstat(s, vs, err);
     return;
 
@@ -1370,7 +1372,7 @@ static void v9fs_setattr_post_chown(V9fsState *s, V9fsSetattrState *vs, int err)
     }
 
     if (vs->v9iattr.valid & (ATTR_SIZE)) {
-        err = v9fs_do_truncate(s, &vs->fidp->path, vs->v9iattr.size);
+        err = v9fs_do_truncate(s, &vs->fidp->fsmap.path, vs->v9iattr.size);
     }
     v9fs_setattr_post_truncate(s, vs, err);
     return;
@@ -1400,8 +1402,9 @@ static void v9fs_setattr_post_utimensat(V9fsState *s, V9fsSetattrState *vs,
         if (!(vs->v9iattr.valid & ATTR_GID)) {
             vs->v9iattr.gid = -1;
         }
-        err = v9fs_do_chown(s, &vs->fidp->path, vs->v9iattr.uid,
-                                                vs->v9iattr.gid);
+        err = v9fs_do_chown(s, &vs->fidp->fsmap.path,
+                            vs->v9iattr.uid,
+                            vs->v9iattr.gid);
     }
     v9fs_setattr_post_chown(s, vs, err);
     return;
@@ -1441,7 +1444,7 @@ static void v9fs_setattr_post_chmod(V9fsState *s, V9fsSetattrState *vs, int err)
         } else {
             times[1].tv_nsec = UTIME_OMIT;
         }
-        err = v9fs_do_utimensat(s, &vs->fidp->path, times);
+        err = v9fs_do_utimensat(s, &vs->fidp->fsmap.path, times);
     }
     v9fs_setattr_post_utimensat(s, vs, err);
     return;
@@ -1470,7 +1473,7 @@ static void v9fs_setattr(V9fsState *s, V9fsPDU *pdu)
     }
 
     if (vs->v9iattr.valid & ATTR_MODE) {
-        err = v9fs_do_chmod(s, &vs->fidp->path, vs->v9iattr.mode);
+        err = v9fs_do_chmod(s, &vs->fidp->fsmap.path, vs->v9iattr.mode);
     }
 
     v9fs_setattr_post_chmod(s, vs, err);
@@ -1520,11 +1523,11 @@ static void v9fs_walk_post_newfid_lstat(V9fsState *s, V9fsWalkState *vs,
 
     vs->name_idx++;
     if (vs->name_idx < vs->nwnames) {
-        v9fs_string_sprintf(&vs->path, "%s/%s", vs->newfidp->path.data,
+        v9fs_string_sprintf(&vs->path, "%s/%s", vs->newfidp->fsmap.path.data,
                                             vs->wnames[vs->name_idx].data);
-        v9fs_string_copy(&vs->newfidp->path, &vs->path);
+        v9fs_string_copy(&vs->newfidp->fsmap.path, &vs->path);
 
-        err = v9fs_do_lstat(s, &vs->newfidp->path, &vs->stbuf);
+        err = v9fs_do_lstat(s, &vs->newfidp->fsmap.path, &vs->stbuf);
         v9fs_walk_post_newfid_lstat(s, vs, err);
         return;
     }
@@ -1550,10 +1553,11 @@ static void v9fs_walk_post_oldfid_lstat(V9fsState *s, V9fsWalkState *vs,
     if (vs->name_idx < vs->nwnames) {
 
         v9fs_string_sprintf(&vs->path, "%s/%s",
-                vs->fidp->path.data, vs->wnames[vs->name_idx].data);
-        v9fs_string_copy(&vs->fidp->path, &vs->path);
+                            vs->fidp->fsmap.path.data,
+                            vs->wnames[vs->name_idx].data);
+        v9fs_string_copy(&vs->fidp->fsmap.path, &vs->path);
 
-        err = v9fs_do_lstat(s, &vs->fidp->path, &vs->stbuf);
+        err = v9fs_do_lstat(s, &vs->fidp->fsmap.path, &vs->stbuf);
         v9fs_walk_post_oldfid_lstat(s, vs, err);
         return;
     }
@@ -1601,16 +1605,17 @@ static void v9fs_walk(V9fsState *s, V9fsPDU *pdu)
     /* FIXME: is this really valid? */
     if (fid == newfid) {
 
-        BUG_ON(vs->fidp->fid_type != P9_FID_NONE);
+        BUG_ON(vs->fidp->fsmap.fid_type != P9_FID_NONE);
         v9fs_string_init(&vs->path);
         vs->name_idx = 0;
 
         if (vs->name_idx < vs->nwnames) {
             v9fs_string_sprintf(&vs->path, "%s/%s",
-                vs->fidp->path.data, vs->wnames[vs->name_idx].data);
-            v9fs_string_copy(&vs->fidp->path, &vs->path);
+                                vs->fidp->fsmap.path.data,
+                                vs->wnames[vs->name_idx].data);
+            v9fs_string_copy(&vs->fidp->fsmap.path, &vs->path);
 
-            err = v9fs_do_lstat(s, &vs->fidp->path, &vs->stbuf);
+            err = v9fs_do_lstat(s, &vs->fidp->fsmap.path, &vs->stbuf);
             v9fs_walk_post_oldfid_lstat(s, vs, err);
             return;
         }
@@ -1624,14 +1629,15 @@ static void v9fs_walk(V9fsState *s, V9fsPDU *pdu)
         vs->newfidp->uid = vs->fidp->uid;
         v9fs_string_init(&vs->path);
         vs->name_idx = 0;
-        v9fs_string_copy(&vs->newfidp->path, &vs->fidp->path);
+        v9fs_string_copy(&vs->newfidp->fsmap.path, &vs->fidp->fsmap.path);
 
         if (vs->name_idx < vs->nwnames) {
-            v9fs_string_sprintf(&vs->path, "%s/%s", vs->newfidp->path.data,
+            v9fs_string_sprintf(&vs->path, "%s/%s",
+                                vs->newfidp->fsmap.path.data,
                                 vs->wnames[vs->name_idx].data);
-            v9fs_string_copy(&vs->newfidp->path, &vs->path);
+            v9fs_string_copy(&vs->newfidp->fsmap.path, &vs->path);
 
-            err = v9fs_do_lstat(s, &vs->newfidp->path, &vs->stbuf);
+            err = v9fs_do_lstat(s, &vs->newfidp->fsmap.path, &vs->stbuf);
             v9fs_walk_post_newfid_lstat(s, vs, err);
             return;
         }
@@ -1665,11 +1671,11 @@ static int32_t get_iounit(V9fsState *s, V9fsString *name)
 
 static void v9fs_open_post_opendir(V9fsState *s, V9fsOpenState *vs, int err)
 {
-    if (vs->fidp->fs.dir == NULL) {
+    if (vs->fidp->fsmap.fs.dir == NULL) {
         err = -errno;
         goto out;
     }
-    vs->fidp->fid_type = P9_FID_DIR;
+    vs->fidp->fsmap.fid_type = P9_FID_DIR;
     vs->offset += pdu_marshal(vs->pdu, vs->offset, "Qd", &vs->qid, 0);
     err = vs->offset;
 out:
@@ -1689,12 +1695,12 @@ static void v9fs_open_post_getiounit(V9fsState *s, V9fsOpenState *vs)
 
 static void v9fs_open_post_open(V9fsState *s, V9fsOpenState *vs, int err)
 {
-    if (vs->fidp->fs.fd == -1) {
+    if (vs->fidp->fsmap.fs.fd == -1) {
         err = -errno;
         goto out;
     }
-    vs->fidp->fid_type = P9_FID_FILE;
-    vs->iounit = get_iounit(s, &vs->fidp->path);
+    vs->fidp->fsmap.fid_type = P9_FID_FILE;
+    vs->iounit = get_iounit(s, &vs->fidp->fsmap.path);
     v9fs_open_post_getiounit(s, vs);
     return;
 out:
@@ -1714,7 +1720,7 @@ static void v9fs_open_post_lstat(V9fsState *s, V9fsOpenState *vs, int err)
     stat_to_qid(&vs->stbuf, &vs->qid);
 
     if (S_ISDIR(vs->stbuf.st_mode)) {
-        vs->fidp->fs.dir = v9fs_do_opendir(s, &vs->fidp->path);
+        vs->fidp->fsmap.fs.dir = v9fs_do_opendir(s, &vs->fidp->fsmap.path);
         v9fs_open_post_opendir(s, vs, err);
     } else {
         if (s->proto_version == V9FS_PROTO_2000L) {
@@ -1725,7 +1731,7 @@ static void v9fs_open_post_lstat(V9fsState *s, V9fsOpenState *vs, int err)
         } else {
             flags = omode_to_uflags(vs->mode);
         }
-        vs->fidp->fs.fd = v9fs_do_open(s, &vs->fidp->path, flags);
+        vs->fidp->fsmap.fs.fd = v9fs_do_open(s, &vs->fidp->fsmap.path, flags);
         v9fs_open_post_open(s, vs, err);
     }
     return;
@@ -1757,9 +1763,9 @@ static void v9fs_open(V9fsState *s, V9fsPDU *pdu)
         goto out;
     }
 
-    BUG_ON(vs->fidp->fid_type != P9_FID_NONE);
+    BUG_ON(vs->fidp->fsmap.fid_type != P9_FID_NONE);
 
-    err = v9fs_do_lstat(s, &vs->fidp->path, &vs->stbuf);
+    err = v9fs_do_lstat(s, &vs->fidp->fsmap.path, &vs->stbuf);
 
     v9fs_open_post_lstat(s, vs, err);
     return;
@@ -1771,16 +1777,16 @@ out:
 static void v9fs_post_lcreate(V9fsState *s, V9fsLcreateState *vs, int err)
 {
     if (err == 0) {
-        v9fs_string_copy(&vs->fidp->path, &vs->fullname);
+        v9fs_string_copy(&vs->fidp->fsmap.path, &vs->fullname);
         stat_to_qid(&vs->stbuf, &vs->qid);
         vs->offset += pdu_marshal(vs->pdu, vs->offset, "Qd", &vs->qid,
                 vs->iounit);
         err = vs->offset;
     } else {
-        vs->fidp->fid_type = P9_FID_NONE;
+        vs->fidp->fsmap.fid_type = P9_FID_NONE;
         err = -errno;
-        if (vs->fidp->fs.fd > 0) {
-            close(vs->fidp->fs.fd);
+        if (vs->fidp->fsmap.fs.fd > 0) {
+            close(vs->fidp->fsmap.fs.fd);
         }
     }
 
@@ -1806,11 +1812,11 @@ out:
 static void v9fs_lcreate_post_do_open2(V9fsState *s, V9fsLcreateState *vs,
         int err)
 {
-    if (vs->fidp->fs.fd == -1) {
+    if (vs->fidp->fsmap.fs.fd == -1) {
         err = -errno;
         goto out;
     }
-    vs->fidp->fid_type = P9_FID_FILE;
+    vs->fidp->fsmap.fid_type = P9_FID_FILE;
     vs->iounit =  get_iounit(s, &vs->fullname);
     v9fs_lcreate_post_get_iounit(s, vs, err);
     return;
@@ -1841,13 +1847,13 @@ static void v9fs_lcreate(V9fsState *s, V9fsPDU *pdu)
         goto out;
     }
 
-    v9fs_string_sprintf(&vs->fullname, "%s/%s", vs->fidp->path.data,
+    v9fs_string_sprintf(&vs->fullname, "%s/%s", vs->fidp->fsmap.path.data,
              vs->name.data);
 
     /* Ignore direct disk access hint until the server supports it. */
     flags &= ~O_DIRECT;
 
-    vs->fidp->fs.fd = v9fs_do_open2(s, vs->fullname.data, vs->fidp->uid,
+    vs->fidp->fsmap.fs.fd = v9fs_do_open2(s, vs->fullname.data, vs->fidp->uid,
             gid, flags, mode);
     v9fs_lcreate_post_do_open2(s, vs, err);
     return;
@@ -1881,7 +1887,7 @@ static void v9fs_fsync(V9fsState *s, V9fsPDU *pdu)
         v9fs_post_do_fsync(s, pdu, err);
         return;
     }
-    err = v9fs_do_fsync(s, fidp->fs.fd, datasync);
+    err = v9fs_do_fsync(s, fidp->fsmap.fs.fd, datasync);
     v9fs_post_do_fsync(s, pdu, err);
 }
 
@@ -1938,7 +1944,7 @@ static void v9fs_read_post_dir_lstat(V9fsState *s, V9fsReadState *vs,
                             &vs->v9stat);
     if ((vs->len != (vs->v9stat.size + 2)) ||
             ((vs->count + vs->len) > vs->max_count)) {
-        v9fs_do_seekdir(s, vs->fidp->fs.dir, vs->dir_pos);
+        v9fs_do_seekdir(s, vs->fidp->fsmap.fs.dir, vs->dir_pos);
         v9fs_read_post_seekdir(s, vs, err);
         return;
     }
@@ -1946,11 +1952,11 @@ static void v9fs_read_post_dir_lstat(V9fsState *s, V9fsReadState *vs,
     v9fs_stat_free(&vs->v9stat);
     v9fs_string_free(&vs->name);
     vs->dir_pos = vs->dent->d_off;
-    vs->dent = v9fs_do_readdir(s, vs->fidp->fs.dir);
+    vs->dent = v9fs_do_readdir(s, vs->fidp->fsmap.fs.dir);
     v9fs_read_post_readdir(s, vs, err);
     return;
 out:
-    v9fs_do_seekdir(s, vs->fidp->fs.dir, vs->dir_pos);
+    v9fs_do_seekdir(s, vs->fidp->fsmap.fs.dir, vs->dir_pos);
     v9fs_read_post_seekdir(s, vs, err);
     return;
 
@@ -1961,7 +1967,7 @@ static void v9fs_read_post_readdir(V9fsState *s, V9fsReadState *vs, ssize_t err)
     if (vs->dent) {
         memset(&vs->v9stat, 0, sizeof(vs->v9stat));
         v9fs_string_init(&vs->name);
-        v9fs_string_sprintf(&vs->name, "%s/%s", vs->fidp->path.data,
+        v9fs_string_sprintf(&vs->name, "%s/%s", vs->fidp->fsmap.path.data,
                             vs->dent->d_name);
         err = v9fs_do_lstat(s, &vs->name, &vs->stbuf);
         v9fs_read_post_dir_lstat(s, vs, err);
@@ -1978,7 +1984,7 @@ static void v9fs_read_post_readdir(V9fsState *s, V9fsReadState *vs, ssize_t err)
 
 static void v9fs_read_post_telldir(V9fsState *s, V9fsReadState *vs, ssize_t err)
 {
-    vs->dent = v9fs_do_readdir(s, vs->fidp->fs.dir);
+    vs->dent = v9fs_do_readdir(s, vs->fidp->fsmap.fs.dir);
     v9fs_read_post_readdir(s, vs, err);
     return;
 }
@@ -1986,7 +1992,7 @@ static void v9fs_read_post_telldir(V9fsState *s, V9fsReadState *vs, ssize_t err)
 static void v9fs_read_post_rewinddir(V9fsState *s, V9fsReadState *vs,
                                        ssize_t err)
 {
-    vs->dir_pos = v9fs_do_telldir(s, vs->fidp->fs.dir);
+    vs->dir_pos = v9fs_do_telldir(s, vs->fidp->fsmap.fs.dir);
     v9fs_read_post_telldir(s, vs, err);
     return;
 }
@@ -2005,7 +2011,7 @@ static void v9fs_read_post_preadv(V9fsState *s, V9fsReadState *vs, ssize_t err)
             if (0) {
                 print_sg(vs->sg, vs->cnt);
             }
-            vs->len = v9fs_do_preadv(s, vs->fidp->fs.fd, vs->sg, vs->cnt,
+            vs->len = v9fs_do_preadv(s, vs->fidp->fsmap.fs.fd, vs->sg, vs->cnt,
                       vs->off);
             if (vs->len > 0) {
                 vs->off += vs->len;
@@ -2032,7 +2038,7 @@ static void v9fs_xattr_read(V9fsState *s, V9fsReadState *vs)
     int read_count;
     int64_t xattr_len;
 
-    xattr_len = vs->fidp->fs.xattr.len;
+    xattr_len = vs->fidp->fsmap.fs.xattr.len;
     read_count = xattr_len - vs->off;
     if (read_count > vs->count) {
         read_count = vs->count;
@@ -2044,7 +2050,7 @@ static void v9fs_xattr_read(V9fsState *s, V9fsReadState *vs)
     }
     vs->offset += pdu_marshal(vs->pdu, vs->offset, "d", read_count);
     vs->offset += pdu_pack(vs->pdu, vs->offset,
-                           ((char *)vs->fidp->fs.xattr.value) + vs->off,
+                           ((char *)vs->fidp->fsmap.fs.xattr.value) + vs->off,
                            read_count);
     err = vs->offset;
     complete_pdu(s, vs->pdu, err);
@@ -2072,20 +2078,20 @@ static void v9fs_read(V9fsState *s, V9fsPDU *pdu)
         goto out;
     }
 
-    if (vs->fidp->fid_type == P9_FID_DIR) {
+    if (vs->fidp->fsmap.fid_type == P9_FID_DIR) {
         vs->max_count = vs->count;
         vs->count = 0;
         if (vs->off == 0) {
-            v9fs_do_rewinddir(s, vs->fidp->fs.dir);
+            v9fs_do_rewinddir(s, vs->fidp->fsmap.fs.dir);
         }
         v9fs_read_post_rewinddir(s, vs, err);
         return;
-    } else if (vs->fidp->fid_type == P9_FID_FILE) {
+    } else if (vs->fidp->fsmap.fid_type == P9_FID_FILE) {
         vs->sg = vs->iov;
         pdu_marshal(vs->pdu, vs->offset + 4, "v", vs->sg, &vs->cnt);
         vs->sg = cap_sg(vs->sg, vs->count, &vs->cnt);
         if (vs->total <= vs->count) {
-            vs->len = v9fs_do_preadv(s, vs->fidp->fs.fd, vs->sg, vs->cnt,
+            vs->len = v9fs_do_preadv(s, vs->fidp->fsmap.fs.fd, vs->sg, vs->cnt,
                                     vs->off);
             if (vs->len > 0) {
                 vs->off += vs->len;
@@ -2094,7 +2100,7 @@ static void v9fs_read(V9fsState *s, V9fsPDU *pdu)
             v9fs_read_post_preadv(s, vs, err);
         }
         return;
-    } else if (vs->fidp->fid_type == P9_FID_XATTR) {
+    } else if (vs->fidp->fsmap.fid_type == P9_FID_XATTR) {
         v9fs_xattr_read(s, vs);
         return;
     } else {
@@ -2143,7 +2149,7 @@ static void v9fs_readdir_post_readdir(V9fsState *s, V9fsReadDirState *vs)
 
         if ((vs->count + V9_READDIR_DATA_SZ) > vs->max_count) {
             /* Ran out of buffer. Set dir back to old position and return */
-            v9fs_do_seekdir(s, vs->fidp->fs.dir, vs->saved_dir_pos);
+            v9fs_do_seekdir(s, vs->fidp->fsmap.fs.dir, vs->saved_dir_pos);
             v9fs_readdir_post_seekdir(s, vs);
             return;
         }
@@ -2164,7 +2170,7 @@ static void v9fs_readdir_post_readdir(V9fsState *s, V9fsReadDirState *vs)
         vs->count += len;
         v9fs_string_free(&vs->name);
         vs->saved_dir_pos = vs->dent->d_off;
-        vs->dent = v9fs_do_readdir(s, vs->fidp->fs.dir);
+        vs->dent = v9fs_do_readdir(s, vs->fidp->fsmap.fs.dir);
         v9fs_readdir_post_readdir(s, vs);
         return;
     }
@@ -2178,14 +2184,14 @@ static void v9fs_readdir_post_readdir(V9fsState *s, V9fsReadDirState *vs)
 
 static void v9fs_readdir_post_telldir(V9fsState *s, V9fsReadDirState *vs)
 {
-    vs->dent = v9fs_do_readdir(s, vs->fidp->fs.dir);
+    vs->dent = v9fs_do_readdir(s, vs->fidp->fsmap.fs.dir);
     v9fs_readdir_post_readdir(s, vs);
     return;
 }
 
 static void v9fs_readdir_post_setdir(V9fsState *s, V9fsReadDirState *vs)
 {
-    vs->saved_dir_pos = v9fs_do_telldir(s, vs->fidp->fs.dir);
+    vs->saved_dir_pos = v9fs_do_telldir(s, vs->fidp->fsmap.fs.dir);
     v9fs_readdir_post_telldir(s, vs);
     return;
 }
@@ -2206,15 +2212,15 @@ static void v9fs_readdir(V9fsState *s, V9fsPDU *pdu)
                                                         &vs->max_count);
 
     vs->fidp = lookup_fid(s, fid);
-    if (vs->fidp == NULL || !(vs->fidp->fs.dir)) {
+    if (vs->fidp == NULL || !(vs->fidp->fsmap.fs.dir)) {
         err = -EINVAL;
         goto out;
     }
 
     if (vs->initial_offset == 0) {
-        v9fs_do_rewinddir(s, vs->fidp->fs.dir);
+        v9fs_do_rewinddir(s, vs->fidp->fsmap.fs.dir);
     } else {
-        v9fs_do_seekdir(s, vs->fidp->fs.dir, vs->initial_offset);
+        v9fs_do_seekdir(s, vs->fidp->fsmap.fs.dir, vs->initial_offset);
     }
 
     v9fs_readdir_post_setdir(s, vs);
@@ -2241,7 +2247,7 @@ static void v9fs_write_post_pwritev(V9fsState *s, V9fsWriteState *vs,
             if (0) {
                 print_sg(vs->sg, vs->cnt);
             }
-            vs->len = v9fs_do_pwritev(s, vs->fidp->fs.fd, vs->sg, vs->cnt,
+            vs->len = v9fs_do_pwritev(s, vs->fidp->fsmap.fs.fd, vs->sg, vs->cnt,
                       vs->off);
             if (vs->len > 0) {
                 vs->off += vs->len;
@@ -2267,7 +2273,7 @@ static void v9fs_xattr_write(V9fsState *s, V9fsWriteState *vs)
     int write_count;
     int64_t xattr_len;
 
-    xattr_len = vs->fidp->fs.xattr.len;
+    xattr_len = vs->fidp->fsmap.fs.xattr.len;
     write_count = xattr_len - vs->off;
     if (write_count > vs->count) {
         write_count = vs->count;
@@ -2281,7 +2287,7 @@ static void v9fs_xattr_write(V9fsState *s, V9fsWriteState *vs)
     }
     vs->offset += pdu_marshal(vs->pdu, vs->offset, "d", write_count);
     err = vs->offset;
-    vs->fidp->fs.xattr.copied_len += write_count;
+    vs->fidp->fsmap.fs.xattr.copied_len += write_count;
     /*
      * Now copy the content from sg list
      */
@@ -2291,7 +2297,7 @@ static void v9fs_xattr_write(V9fsState *s, V9fsWriteState *vs)
         } else {
             to_copy = write_count;
         }
-        memcpy((char *)vs->fidp->fs.xattr.value + vs->off,
+        memcpy((char *)vs->fidp->fsmap.fs.xattr.value + vs->off,
                vs->sg[i].iov_base, to_copy);
         /* updating vs->off since we are not using below */
         vs->off += to_copy;
@@ -2325,12 +2331,12 @@ static void v9fs_write(V9fsState *s, V9fsPDU *pdu)
         goto out;
     }
 
-    if (vs->fidp->fid_type == P9_FID_FILE) {
-        if (vs->fidp->fs.fd == -1) {
+    if (vs->fidp->fsmap.fid_type == P9_FID_FILE) {
+        if (vs->fidp->fsmap.fs.fd == -1) {
             err = -EINVAL;
             goto out;
         }
-    } else if (vs->fidp->fid_type == P9_FID_XATTR) {
+    } else if (vs->fidp->fsmap.fid_type == P9_FID_XATTR) {
         /*
          * setxattr operation
          */
@@ -2342,7 +2348,8 @@ static void v9fs_write(V9fsState *s, V9fsPDU *pdu)
     }
     vs->sg = cap_sg(vs->sg, vs->count, &vs->cnt);
     if (vs->total <= vs->count) {
-        vs->len = v9fs_do_pwritev(s, vs->fidp->fs.fd, vs->sg, vs->cnt, vs->off);
+        vs->len = v9fs_do_pwritev(s, vs->fidp->fsmap.fs.fd,
+                                  vs->sg, vs->cnt, vs->off);
         if (vs->len > 0) {
             vs->off += vs->len;
         }
@@ -2358,7 +2365,7 @@ out:
 static void v9fs_create_post_getiounit(V9fsState *s, V9fsCreateState *vs)
 {
     int err;
-    v9fs_string_copy(&vs->fidp->path, &vs->fullname);
+    v9fs_string_copy(&vs->fidp->fsmap.path, &vs->fullname);
     stat_to_qid(&vs->stbuf, &vs->qid);
 
     vs->offset += pdu_marshal(vs->pdu, vs->offset, "Qd", &vs->qid, vs->iounit);
@@ -2374,7 +2381,7 @@ static void v9fs_create_post_getiounit(V9fsState *s, V9fsCreateState *vs)
 static void v9fs_post_create(V9fsState *s, V9fsCreateState *vs, int err)
 {
     if (err == 0) {
-        vs->iounit = get_iounit(s, &vs->fidp->path);
+        vs->iounit = get_iounit(s, &vs->fidp->fsmap.path);
         v9fs_create_post_getiounit(s, vs);
         return;
     }
@@ -2397,10 +2404,10 @@ static void v9fs_create_post_perms(V9fsState *s, V9fsCreateState *vs, int err)
 static void v9fs_create_post_opendir(V9fsState *s, V9fsCreateState *vs,
                                                                     int err)
 {
-    if (!vs->fidp->fs.dir) {
+    if (!vs->fidp->fsmap.fs.dir) {
         err = -errno;
     }
-    vs->fidp->fid_type = P9_FID_DIR;
+    vs->fidp->fsmap.fid_type = P9_FID_DIR;
     v9fs_post_create(s, vs, err);
 }
 
@@ -2412,7 +2419,7 @@ static void v9fs_create_post_dir_lstat(V9fsState *s, V9fsCreateState *vs,
         goto out;
     }
 
-    vs->fidp->fs.dir = v9fs_do_opendir(s, &vs->fullname);
+    vs->fidp->fsmap.fs.dir = v9fs_do_opendir(s, &vs->fullname);
     v9fs_create_post_opendir(s, vs, err);
     return;
 
@@ -2438,8 +2445,8 @@ out:
 static void v9fs_create_post_fstat(V9fsState *s, V9fsCreateState *vs, int err)
 {
     if (err) {
-        vs->fidp->fid_type = P9_FID_NONE;
-        close(vs->fidp->fs.fd);
+        vs->fidp->fsmap.fid_type = P9_FID_NONE;
+        close(vs->fidp->fsmap.fs.fd);
         err = -errno;
     }
     v9fs_post_create(s, vs, err);
@@ -2448,12 +2455,12 @@ static void v9fs_create_post_fstat(V9fsState *s, V9fsCreateState *vs, int err)
 
 static void v9fs_create_post_open2(V9fsState *s, V9fsCreateState *vs, int err)
 {
-    if (vs->fidp->fs.fd == -1) {
+    if (vs->fidp->fsmap.fs.fd == -1) {
         err = -errno;
         goto out;
     }
-    vs->fidp->fid_type = P9_FID_FILE;
-    err = v9fs_do_fstat(s, vs->fidp->fs.fd, &vs->stbuf);
+    vs->fidp->fsmap.fid_type = P9_FID_FILE;
+    err = v9fs_do_fstat(s, vs->fidp->fsmap.fs.fd, &vs->stbuf);
     v9fs_create_post_fstat(s, vs, err);
 
     return;
@@ -2486,7 +2493,7 @@ static void v9fs_create_post_lstat(V9fsState *s, V9fsCreateState *vs, int err)
             err = -errno;
             v9fs_post_create(s, vs, err);
         }
-        err = v9fs_do_link(s, &nfidp->path, &vs->fullname);
+        err = v9fs_do_link(s, &nfidp->fsmap.path, &vs->fullname);
         v9fs_create_post_perms(s, vs, err);
     } else if (vs->perm & P9_STAT_MODE_DEVICE) {
         char ctype;
@@ -2524,9 +2531,11 @@ static void v9fs_create_post_lstat(V9fsState *s, V9fsCreateState *vs, int err)
                 0, vs->fidp->uid, -1);
         v9fs_post_create(s, vs, err);
     } else {
-        vs->fidp->fs.fd = v9fs_do_open2(s, vs->fullname.data, vs->fidp->uid,
-                -1, omode_to_uflags(vs->mode)|O_CREAT, vs->perm);
-
+        vs->fidp->fsmap.fs.fd = v9fs_do_open2(s, vs->fullname.data,
+                                              vs->fidp->uid,
+                                              -1,
+                                              omode_to_uflags(vs->mode)|O_CREAT,
+                                              vs->perm);
         v9fs_create_post_open2(s, vs, err);
     }
 
@@ -2557,7 +2566,7 @@ static void v9fs_create(V9fsState *s, V9fsPDU *pdu)
         goto out;
     }
 
-    v9fs_string_sprintf(&vs->fullname, "%s/%s", vs->fidp->path.data,
+    v9fs_string_sprintf(&vs->fullname, "%s/%s", vs->fidp->fsmap.path.data,
                                                         vs->name.data);
 
     err = v9fs_do_lstat(s, &vs->fullname, &vs->stbuf);
@@ -2620,7 +2629,7 @@ static void v9fs_symlink(V9fsState *s, V9fsPDU *pdu)
         goto out;
     }
 
-    v9fs_string_sprintf(&vs->fullname, "%s/%s", vs->dfidp->path.data,
+    v9fs_string_sprintf(&vs->fullname, "%s/%s", vs->dfidp->fsmap.path.data,
             vs->name.data);
     err = v9fs_do_symlink(s, vs->dfidp, vs->symname.data,
             vs->fullname.data, gid);
@@ -2664,9 +2673,9 @@ static void v9fs_link(V9fsState *s, V9fsPDU *pdu)
         goto out;
     }
 
-    v9fs_string_sprintf(&fullname, "%s/%s", dfidp->path.data, name.data);
+    v9fs_string_sprintf(&fullname, "%s/%s", dfidp->fsmap.path.data, name.data);
     err = offset;
-    err = v9fs_do_link(s, &oldfidp->path, &fullname);
+    err = v9fs_do_link(s, &oldfidp->fsmap.path, &fullname);
     if (err) {
         err = -errno;
     }
@@ -2711,7 +2720,7 @@ static void v9fs_remove(V9fsState *s, V9fsPDU *pdu)
         goto out;
     }
 
-    err = v9fs_do_remove(s, &vs->fidp->path);
+    err = v9fs_do_remove(s, &vs->fidp->fsmap.path);
     v9fs_remove_post_remove(s, vs, err);
     return;
 
@@ -2740,7 +2749,7 @@ static void v9fs_wstat_post_rename(V9fsState *s, V9fsWstatState *vs, int err)
         goto out;
     }
     if (vs->v9stat.length != -1) {
-        if (v9fs_do_truncate(s, &vs->fidp->path, vs->v9stat.length) < 0) {
+        if (v9fs_do_truncate(s, &vs->fidp->fsmap.path, vs->v9stat.length) < 0) {
             err = -errno;
         }
     }
@@ -2768,15 +2777,15 @@ static int v9fs_complete_rename(V9fsState *s, V9fsRenameState *vs)
             goto out;
         }
 
-        BUG_ON(dirfidp->fid_type != P9_FID_NONE);
+        BUG_ON(dirfidp->fsmap.fid_type != P9_FID_NONE);
 
-        new_name = qemu_mallocz(dirfidp->path.size + vs->name.size + 2);
+        new_name = qemu_mallocz(dirfidp->fsmap.path.size + vs->name.size + 2);
 
-        strcpy(new_name, dirfidp->path.data);
+        strcpy(new_name, dirfidp->fsmap.path.data);
         strcat(new_name, "/");
-        strcat(new_name + dirfidp->path.size, vs->name.data);
+        strcat(new_name + dirfidp->fsmap.path.size, vs->name.data);
     } else {
-        old_name = vs->fidp->path.data;
+        old_name = vs->fidp->fsmap.path.data;
         end = strrchr(old_name, '/');
         if (end) {
             end++;
@@ -2793,8 +2802,8 @@ static int v9fs_complete_rename(V9fsState *s, V9fsRenameState *vs)
     vs->name.data = qemu_strdup(new_name);
     vs->name.size = strlen(new_name);
 
-    if (strcmp(new_name, vs->fidp->path.data) != 0) {
-        if (v9fs_do_rename(s, &vs->fidp->path, &vs->name)) {
+    if (strcmp(new_name, vs->fidp->fsmap.path.data) != 0) {
+        if (v9fs_do_rename(s, &vs->fidp->fsmap.path, &vs->name)) {
             err = -errno;
         } else {
             V9fsFidState *fidp;
@@ -2810,14 +2819,14 @@ static int v9fs_complete_rename(V9fsState *s, V9fsRenameState *vs)
                     */
                     continue;
                 }
-                if (!strncmp(vs->fidp->path.data, fidp->path.data,
-                    strlen(vs->fidp->path.data))) {
+                if (!strncmp(vs->fidp->fsmap.path.data, fidp->fsmap.path.data,
+                    strlen(vs->fidp->fsmap.path.data))) {
                     /* replace the name */
-                    v9fs_fix_path(&fidp->path, &vs->name,
-                                  strlen(vs->fidp->path.data));
+                    v9fs_fix_path(&fidp->fsmap.path, &vs->name,
+                                  strlen(vs->fidp->fsmap.path.data));
                 }
             }
-            v9fs_string_copy(&vs->fidp->path, &vs->name);
+            v9fs_string_copy(&vs->fidp->fsmap.path, &vs->name);
         }
     }
 out:
@@ -2878,7 +2887,7 @@ static void v9fs_rename(V9fsState *s, V9fsPDU *pdu)
         goto out;
     }
 
-    BUG_ON(vs->fidp->fid_type != P9_FID_NONE);
+    BUG_ON(vs->fidp->fsmap.fid_type != P9_FID_NONE);
 
     err = v9fs_complete_rename(s, vs);
     v9fs_rename_post_rename(s, vs, err);
@@ -2895,7 +2904,7 @@ static void v9fs_wstat_post_utime(V9fsState *s, V9fsWstatState *vs, int err)
     }
 
     if (vs->v9stat.n_gid != -1 || vs->v9stat.n_uid != -1) {
-        if (v9fs_do_chown(s, &vs->fidp->path, vs->v9stat.n_uid,
+        if (v9fs_do_chown(s, &vs->fidp->fsmap.path, vs->v9stat.n_uid,
                     vs->v9stat.n_gid)) {
             err = -errno;
         }
@@ -2930,7 +2939,7 @@ static void v9fs_wstat_post_chmod(V9fsState *s, V9fsWstatState *vs, int err)
             times[1].tv_nsec = UTIME_OMIT;
         }
 
-        if (v9fs_do_utimensat(s, &vs->fidp->path, times)) {
+        if (v9fs_do_utimensat(s, &vs->fidp->fsmap.path, times)) {
             err = -errno;
         }
     }
@@ -2972,7 +2981,7 @@ static void v9fs_wstat_post_lstat(V9fsState *s, V9fsWstatState *vs, int err)
             goto out;
     }
 
-    if (v9fs_do_chmod(s, &vs->fidp->path, v9mode_to_mode(vs->v9stat.mode,
+    if (v9fs_do_chmod(s, &vs->fidp->fsmap.path, v9mode_to_mode(vs->v9stat.mode,
                     &vs->v9stat.extension))) {
             err = -errno;
      }
@@ -3005,13 +3014,13 @@ static void v9fs_wstat(V9fsState *s, V9fsPDU *pdu)
 
     /* do we need to sync the file? */
     if (donttouch_stat(&vs->v9stat)) {
-        err = v9fs_do_fsync(s, vs->fidp->fs.fd, 0);
+        err = v9fs_do_fsync(s, vs->fidp->fsmap.fs.fd, 0);
         v9fs_wstat_post_fsync(s, vs, err);
         return;
     }
 
     if (vs->v9stat.mode != -1) {
-        err = v9fs_do_lstat(s, &vs->fidp->path, &vs->stbuf);
+        err = v9fs_do_lstat(s, &vs->fidp->fsmap.path, &vs->stbuf);
         v9fs_wstat_post_lstat(s, vs, err);
         return;
     }
@@ -3089,7 +3098,7 @@ static void v9fs_statfs(V9fsState *s, V9fsPDU *pdu)
         goto out;
     }
 
-    err = v9fs_do_statfs(s, &vs->fidp->path, &vs->stbuf);
+    err = v9fs_do_statfs(s, &vs->fidp->fsmap.path, &vs->stbuf);
     v9fs_statfs_post_statfs(s, vs, err);
     return;
 
@@ -3156,7 +3165,8 @@ static void v9fs_mknod(V9fsState *s, V9fsPDU *pdu)
         goto out;
     }
 
-    v9fs_string_sprintf(&vs->fullname, "%s/%s", fidp->path.data, vs->name.data);
+    v9fs_string_sprintf(&vs->fullname, "%s/%s", fidp->fsmap.path.data,
+                        vs->name.data);
     err = v9fs_do_mknod(s, vs->fullname.data, mode, makedev(major, minor),
         fidp->uid, gid);
     v9fs_mknod_post_mknod(s, vs, err);
@@ -3205,7 +3215,7 @@ static void v9fs_lock(V9fsState *s, V9fsPDU *pdu)
         goto out;
     }
 
-    err = v9fs_do_fstat(s, vs->fidp->fs.fd, &vs->stbuf);
+    err = v9fs_do_fstat(s, vs->fidp->fsmap.fs.fd, &vs->stbuf);
     if (err < 0) {
         err = -errno;
         goto out;
@@ -3243,7 +3253,7 @@ static void v9fs_getlock(V9fsState *s, V9fsPDU *pdu)
         goto out;
     }
 
-    err = v9fs_do_fstat(s, vs->fidp->fs.fd, &vs->stbuf);
+    err = v9fs_do_fstat(s, vs->fidp->fsmap.fs.fd, &vs->stbuf);
     if (err < 0) {
         err = -errno;
         goto out;
@@ -3315,7 +3325,8 @@ static void v9fs_mkdir(V9fsState *s, V9fsPDU *pdu)
         goto out;
     }
 
-    v9fs_string_sprintf(&vs->fullname, "%s/%s", fidp->path.data, vs->name.data);
+    v9fs_string_sprintf(&vs->fullname, "%s/%s", fidp->fsmap.path.data,
+                        vs->name.data);
     err = v9fs_do_mkdir(s, vs->fullname.data, mode, fidp->uid, gid);
     v9fs_mkdir_post_mkdir(s, vs, err);
     return;
@@ -3354,14 +3365,14 @@ static void v9fs_post_xattr_check(V9fsState *s, V9fsXattrState *vs, ssize_t err)
     /*
      * Read the xattr value
      */
-    vs->xattr_fidp->fs.xattr.len = vs->size;
-    vs->xattr_fidp->fid_type = P9_FID_XATTR;
-    vs->xattr_fidp->fs.xattr.copied_len = -1;
+    vs->xattr_fidp->fsmap.fs.xattr.len = vs->size;
+    vs->xattr_fidp->fsmap.fid_type = P9_FID_XATTR;
+    vs->xattr_fidp->fsmap.fs.xattr.copied_len = -1;
     if (vs->size) {
-        vs->xattr_fidp->fs.xattr.value = qemu_malloc(vs->size);
-        err = v9fs_do_lgetxattr(s, &vs->xattr_fidp->path,
-                                &vs->name, vs->xattr_fidp->fs.xattr.value,
-                                vs->xattr_fidp->fs.xattr.len);
+        vs->xattr_fidp->fsmap.fs.xattr.value = qemu_malloc(vs->size);
+        err = v9fs_do_lgetxattr(s, &vs->xattr_fidp->fsmap.path,
+                                &vs->name, vs->xattr_fidp->fsmap.fs.xattr.value,
+                                vs->xattr_fidp->fsmap.fs.xattr.len);
     }
     v9fs_post_xattr_getvalue(s, vs, err);
     return;
@@ -3399,14 +3410,14 @@ static void v9fs_post_lxattr_check(V9fsState *s,
     /*
      * Read the xattr value
      */
-    vs->xattr_fidp->fs.xattr.len = vs->size;
-    vs->xattr_fidp->fid_type = P9_FID_XATTR;
-    vs->xattr_fidp->fs.xattr.copied_len = -1;
+    vs->xattr_fidp->fsmap.fs.xattr.len = vs->size;
+    vs->xattr_fidp->fsmap.fid_type = P9_FID_XATTR;
+    vs->xattr_fidp->fsmap.fs.xattr.copied_len = -1;
     if (vs->size) {
-        vs->xattr_fidp->fs.xattr.value = qemu_malloc(vs->size);
-        err = v9fs_do_llistxattr(s, &vs->xattr_fidp->path,
-                                 vs->xattr_fidp->fs.xattr.value,
-                                 vs->xattr_fidp->fs.xattr.len);
+        vs->xattr_fidp->fsmap.fs.xattr.value = qemu_malloc(vs->size);
+        err = v9fs_do_llistxattr(s, &vs->xattr_fidp->fsmap.path,
+                                 vs->xattr_fidp->fsmap.fs.xattr.value,
+                                 vs->xattr_fidp->fsmap.fs.xattr.len);
     }
     v9fs_post_lxattr_getvalue(s, vs, err);
     return;
@@ -3439,12 +3450,12 @@ static void v9fs_xattrwalk(V9fsState *s, V9fsPDU *pdu)
         goto out;
     }
 
-    v9fs_string_copy(&vs->xattr_fidp->path, &vs->file_fidp->path);
+    v9fs_string_copy(&vs->xattr_fidp->fsmap.path, &vs->file_fidp->fsmap.path);
     if (vs->name.data[0] == 0) {
         /*
          * listxattr request. Get the size first
          */
-        vs->size = v9fs_do_llistxattr(s, &vs->xattr_fidp->path,
+        vs->size = v9fs_do_llistxattr(s, &vs->xattr_fidp->fsmap.path,
                                       NULL, 0);
         if (vs->size < 0) {
             err = vs->size;
@@ -3456,7 +3467,7 @@ static void v9fs_xattrwalk(V9fsState *s, V9fsPDU *pdu)
          * specific xattr fid. We check for xattr
          * presence also collect the xattr size
          */
-        vs->size = v9fs_do_lgetxattr(s, &vs->xattr_fidp->path,
+        vs->size = v9fs_do_lgetxattr(s, &vs->xattr_fidp->fsmap.path,
                                      &vs->name, NULL, 0);
         if (vs->size < 0) {
             err = vs->size;
@@ -3492,16 +3503,16 @@ static void v9fs_xattrcreate(V9fsState *s, V9fsPDU *pdu)
 
     /* Make the file fid point to xattr */
     vs->xattr_fidp = vs->file_fidp;
-    vs->xattr_fidp->fid_type = P9_FID_XATTR;
-    vs->xattr_fidp->fs.xattr.copied_len = 0;
-    vs->xattr_fidp->fs.xattr.len = vs->size;
-    vs->xattr_fidp->fs.xattr.flags = flags;
-    v9fs_string_init(&vs->xattr_fidp->fs.xattr.name);
-    v9fs_string_copy(&vs->xattr_fidp->fs.xattr.name, &vs->name);
+    vs->xattr_fidp->fsmap.fid_type = P9_FID_XATTR;
+    vs->xattr_fidp->fsmap.fs.xattr.copied_len = 0;
+    vs->xattr_fidp->fsmap.fs.xattr.len = vs->size;
+    vs->xattr_fidp->fsmap.fs.xattr.flags = flags;
+    v9fs_string_init(&vs->xattr_fidp->fsmap.fs.xattr.name);
+    v9fs_string_copy(&vs->xattr_fidp->fsmap.fs.xattr.name, &vs->name);
     if (vs->size)
-        vs->xattr_fidp->fs.xattr.value = qemu_malloc(vs->size);
+        vs->xattr_fidp->fsmap.fs.xattr.value = qemu_malloc(vs->size);
     else
-        vs->xattr_fidp->fs.xattr.value = NULL;
+        vs->xattr_fidp->fsmap.fs.xattr.value = NULL;
 
 out:
     complete_pdu(s, vs->pdu, err);
@@ -3544,7 +3555,7 @@ static void v9fs_readlink(V9fsState *s, V9fsPDU *pdu)
     }
 
     v9fs_string_init(&vs->target);
-    err = v9fs_do_readlink(s, &fidp->path, &vs->target);
+    err = v9fs_do_readlink(s, &fidp->fsmap.path, &vs->target);
     v9fs_readlink_post_readlink(s, vs, err);
     return;
 out:
diff --git a/hw/9pfs/virtio-9p.h b/hw/9pfs/virtio-9p.h
index 2ae4ce7..68d5906 100644
--- a/hw/9pfs/virtio-9p.h
+++ b/hw/9pfs/virtio-9p.h
@@ -101,7 +101,10 @@ enum p9_proto_version {
 #define P9_NOTAG    (u16)(~0)
 #define P9_NOFID    (u32)(~0)
 #define P9_MAXWELEM 16
+#define P9_FD_RECLAIM_THRES 1000
 
+#define FID_REFERENCED          0x1
+#define FID_NON_RECLAIMABLE     0x2
 /*
  * ample room for Twrite/Rread header
  * size[4] Tread/Twrite tag[2] fid[4] offset[8] count[4]
@@ -185,17 +188,22 @@ typedef struct V9fsXattr
     int flags;
 } V9fsXattr;
 
+typedef struct V9fsfidmap {
+    union {
+        int fd;
+        DIR *dir;
+        V9fsXattr xattr;
+    } fs;
+    int fid_type;
+    V9fsString path;
+    int flags;
+} V9fsFidMap;
+
 struct V9fsFidState
 {
-    int fid_type;
     int32_t fid;
-    V9fsString path;
-    union {
-	int fd;
-	DIR *dir;
-	V9fsXattr xattr;
-    } fs;
     uid_t uid;
+    V9fsFidMap fsmap;
     V9fsFidState *next;
 };
 
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [Qemu-devel] [PATCH -V3 2/8] hw/9pfs: Add file descriptor reclaim support
  2011-03-05 17:52 [Qemu-devel] [PATCH -V3 1/8] hw/9pfs: Add V9fsfidmap in preparation for adding fd reclaim Aneesh Kumar K.V
@ 2011-03-05 17:52 ` Aneesh Kumar K.V
  2011-03-13 16:08   ` Stefan Hajnoczi
  2011-03-05 17:52 ` [Qemu-devel] [PATCH -V3 3/8] hw/9pfs: Use v9fs_do_close instead of close Aneesh Kumar K.V
                   ` (6 subsequent siblings)
  7 siblings, 1 reply; 36+ messages in thread
From: Aneesh Kumar K.V @ 2011-03-05 17:52 UTC (permalink / raw)
  To: qemu-devel; +Cc: aliguori, Aneesh Kumar K.V

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 hw/9pfs/virtio-9p.c |   99 ++++++++++++++++++++++++++++++++++++++++++++++++--
 1 files changed, 95 insertions(+), 4 deletions(-)

diff --git a/hw/9pfs/virtio-9p.c b/hw/9pfs/virtio-9p.c
index a9f52c6..811ac38 100644
--- a/hw/9pfs/virtio-9p.c
+++ b/hw/9pfs/virtio-9p.c
@@ -20,6 +20,7 @@
 #include "virtio-9p-xattr.h"
 
 int debug_9p_pdu;
+static void v9fs_reclaim_fd(V9fsState *s);
 
 enum {
     Oread   = 0x00,
@@ -107,7 +108,12 @@ static int v9fs_do_closedir(V9fsState *s, DIR *dir)
 
 static int v9fs_do_open(V9fsState *s, V9fsString *path, int flags)
 {
-    return s->ops->open(&s->ctx, path->data, flags);
+    int fd;
+    fd = s->ops->open(&s->ctx, path->data, flags);
+    if (fd > P9_FD_RECLAIM_THRES) {
+        v9fs_reclaim_fd(s);
+    }
+    return fd;
 }
 
 static DIR *v9fs_do_opendir(V9fsState *s, V9fsString *path)
@@ -188,6 +194,7 @@ static int v9fs_do_fstat(V9fsState *s, int fd, struct stat *stbuf)
 static int v9fs_do_open2(V9fsState *s, char *fullname, uid_t uid, gid_t gid,
         int flags, int mode)
 {
+    int fd;
     FsCred cred;
 
     cred_init(&cred);
@@ -196,7 +203,11 @@ static int v9fs_do_open2(V9fsState *s, char *fullname, uid_t uid, gid_t gid,
     cred.fc_mode = mode & 07777;
     flags = flags;
 
-    return s->ops->open2(&s->ctx, fullname, flags, &cred);
+    fd = s->ops->open2(&s->ctx, fullname, flags, &cred);
+    if (fd > P9_FD_RECLAIM_THRES) {
+        v9fs_reclaim_fd(s);
+    }
+    return fd;
 }
 
 static int v9fs_do_symlink(V9fsState *s, V9fsFidState *fidp,
@@ -434,6 +445,23 @@ static V9fsFidState *lookup_fid(V9fsState *s, int32_t fid)
 
     for (f = s->fid_list; f; f = f->next) {
         if (f->fid == fid) {
+            /*
+             * check whether we need to reopen the
+             * file. We might have closed the fd
+             * while trying to free up some file
+             * descriptors.
+             */
+            if (f->fsmap.fid_type == P9_FID_FILE) {
+                /* FIXME!! should we remember the open flags ?*/
+                if (f->fsmap.fs.fd == -1) {
+                    f->fsmap.fs.fd = v9fs_do_open(s, &f->fsmap.path, O_RDWR);
+                }
+            }
+            /*
+             * Mark the fid as referenced so that the LRU
+             * reclaim won't close the file descriptor
+             */
+            f->fsmap.flags |= FID_REFERENCED;
             return f;
         }
     }
@@ -461,6 +489,62 @@ static V9fsFidState *alloc_fid(V9fsState *s, int32_t fid)
     return f;
 }
 
+static void v9fs_reclaim_fd(V9fsState *s)
+{
+    int reclaim_count = 0;
+    V9fsFidState *f;
+
+    for (f = s->fid_list; f; f = f->next) {
+        /*
+         * Unlink fids cannot be reclaimed. Check
+         * for them and skip them
+         */
+        if (f->fsmap.flags & FID_NON_RECLAIMABLE) {
+            continue;
+        }
+        /*
+         * if it is a recently referenced fid
+         * we leave the fid untouched and clear the
+         * reference bit. We come back to it later
+         * in the next iteration. (a simple LRU without
+         * moving list elements around)
+         */
+        if (f->fsmap.flags & FID_REFERENCED) {
+            f->fsmap.flags  &= ~FID_REFERENCED;
+            continue;
+        }
+        /*
+         * reclaim fd, by closing the file descriptors
+         */
+        if (f->fsmap.fid_type == P9_FID_FILE) {
+            if (f->fsmap.fs.fd != -1) {
+                v9fs_do_close(s, f->fsmap.fs.fd);
+                f->fsmap.fs.fd = -1;
+                reclaim_count++;
+            }
+        }
+        if (reclaim_count >= P9_FD_RECLAIM_THRES/2) {
+            break;
+        }
+    }
+}
+
+static void v9fs_mark_fids_unreclaim(V9fsState *s, V9fsString *str)
+{
+    V9fsFidState *fidp;
+    for (fidp = s->fid_list; fidp; fidp = fidp->next) {
+        if (!strcmp(fidp->fsmap.path.data, str->data)) {
+            /* Mark the fid non reclaimable. */
+            fidp->fsmap.flags  |= FID_NON_RECLAIMABLE;
+            /* reopen the file if already closed */
+            if (fidp->fsmap.fs.fd == -1) {
+                fidp->fsmap.fs.fd = v9fs_do_open(s, &fidp->fsmap.path, O_RDWR);
+            }
+        }
+    }
+}
+
+
 static int v9fs_xattr_fid_clunk(V9fsState *s, V9fsFidState *fidp)
 {
     int retval = 0;
@@ -516,7 +600,10 @@ static int free_fid(V9fsState *s, int32_t fid)
     *fidpp = fidp->next;
 
     if (fidp->fsmap.fid_type == P9_FID_FILE) {
-        v9fs_do_close(s, fidp->fsmap.fs.fd);
+        /* I we reclaimed the fd no need to close */
+        if (fidp->fsmap.fs.fd != -1) {
+            v9fs_do_close(s, fidp->fsmap.fs.fd);
+        }
     } else if (fidp->fsmap.fid_type == P9_FID_DIR) {
         v9fs_do_closedir(s, fidp->fsmap.fs.dir);
     } else if (fidp->fsmap.fid_type == P9_FID_XATTR) {
@@ -2719,7 +2806,11 @@ static void v9fs_remove(V9fsState *s, V9fsPDU *pdu)
         err = -EINVAL;
         goto out;
     }
-
+    /*
+     * IF the file is unlinked, we cannot reopen
+     * the file later. So don't reclaim fd
+     */
+    v9fs_mark_fids_unreclaim(s, &vs->fidp->fsmap.path);
     err = v9fs_do_remove(s, &vs->fidp->fsmap.path);
     v9fs_remove_post_remove(s, vs, err);
     return;
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [Qemu-devel] [PATCH -V3 3/8] hw/9pfs: Use v9fs_do_close instead of close
  2011-03-05 17:52 [Qemu-devel] [PATCH -V3 1/8] hw/9pfs: Add V9fsfidmap in preparation for adding fd reclaim Aneesh Kumar K.V
  2011-03-05 17:52 ` [Qemu-devel] [PATCH -V3 2/8] hw/9pfs: Add file descriptor reclaim support Aneesh Kumar K.V
@ 2011-03-05 17:52 ` Aneesh Kumar K.V
  2011-03-13 16:10   ` Stefan Hajnoczi
  2011-03-05 17:52 ` [Qemu-devel] [PATCH -V3 4/8] hw/9pfs: Implement syncfs Aneesh Kumar K.V
                   ` (5 subsequent siblings)
  7 siblings, 1 reply; 36+ messages in thread
From: Aneesh Kumar K.V @ 2011-03-05 17:52 UTC (permalink / raw)
  To: qemu-devel; +Cc: aliguori, Aneesh Kumar K.V

we should use the local abstraction instead of
directly calling close.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 hw/9pfs/virtio-9p.c |    4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/9pfs/virtio-9p.c b/hw/9pfs/virtio-9p.c
index 811ac38..c4b0198 100644
--- a/hw/9pfs/virtio-9p.c
+++ b/hw/9pfs/virtio-9p.c
@@ -1873,7 +1873,7 @@ static void v9fs_post_lcreate(V9fsState *s, V9fsLcreateState *vs, int err)
         vs->fidp->fsmap.fid_type = P9_FID_NONE;
         err = -errno;
         if (vs->fidp->fsmap.fs.fd > 0) {
-            close(vs->fidp->fsmap.fs.fd);
+            v9fs_do_close(s, vs->fidp->fsmap.fs.fd);
         }
     }
 
@@ -2533,7 +2533,7 @@ static void v9fs_create_post_fstat(V9fsState *s, V9fsCreateState *vs, int err)
 {
     if (err) {
         vs->fidp->fsmap.fid_type = P9_FID_NONE;
-        close(vs->fidp->fsmap.fs.fd);
+        v9fs_do_close(s, vs->fidp->fsmap.fs.fd);
         err = -errno;
     }
     v9fs_post_create(s, vs, err);
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [Qemu-devel] [PATCH -V3 4/8] hw/9pfs: Implement syncfs
  2011-03-05 17:52 [Qemu-devel] [PATCH -V3 1/8] hw/9pfs: Add V9fsfidmap in preparation for adding fd reclaim Aneesh Kumar K.V
  2011-03-05 17:52 ` [Qemu-devel] [PATCH -V3 2/8] hw/9pfs: Add file descriptor reclaim support Aneesh Kumar K.V
  2011-03-05 17:52 ` [Qemu-devel] [PATCH -V3 3/8] hw/9pfs: Use v9fs_do_close instead of close Aneesh Kumar K.V
@ 2011-03-05 17:52 ` Aneesh Kumar K.V
  2011-03-13 16:24   ` Stefan Hajnoczi
  2011-03-05 17:52 ` [Qemu-devel] [PATCH -V3 5/8] hw/9pfs: Add open flag to fid Aneesh Kumar K.V
                   ` (4 subsequent siblings)
  7 siblings, 1 reply; 36+ messages in thread
From: Aneesh Kumar K.V @ 2011-03-05 17:52 UTC (permalink / raw)
  To: qemu-devel; +Cc: aliguori, Aneesh Kumar K.V

SYNOPSIS
    size[4] Tsyncfs tag[2] fid[4]

    size[4] Rsyncfs tag[2]

DESCRIPTION
    The Tsyncfs transaction transfers ("flushes") all modified data of
    file system identified by fid to the disk device. The operation is
    equivalent to calling sync() on the file system.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 hw/9pfs/virtio-9p-local.c |    9 +++++++++
 hw/9pfs/virtio-9p.c       |   31 +++++++++++++++++++++++++++++++
 hw/9pfs/virtio-9p.h       |    2 ++
 hw/file-op-9p.h           |    1 +
 4 files changed, 43 insertions(+), 0 deletions(-)

diff --git a/hw/9pfs/virtio-9p-local.c b/hw/9pfs/virtio-9p-local.c
index 0a015de..43ff37c 100644
--- a/hw/9pfs/virtio-9p-local.c
+++ b/hw/9pfs/virtio-9p-local.c
@@ -528,6 +528,14 @@ static int local_lremovexattr(FsContext *ctx,
     return v9fs_remove_xattr(ctx, path, name);
 }
 
+static int local_syncfs(FsContext *ctx, int fd)
+{
+    /*
+     * We should be doing per file system sync here.
+     */
+    sync();
+    return 0;
+}
 
 FileOperations local_ops = {
     .lstat = local_lstat,
@@ -560,4 +568,5 @@ FileOperations local_ops = {
     .llistxattr = local_llistxattr,
     .lsetxattr = local_lsetxattr,
     .lremovexattr = local_lremovexattr,
+    .syncfs     = local_syncfs,
 };
diff --git a/hw/9pfs/virtio-9p.c b/hw/9pfs/virtio-9p.c
index c4b0198..ce09e55 100644
--- a/hw/9pfs/virtio-9p.c
+++ b/hw/9pfs/virtio-9p.c
@@ -299,6 +299,10 @@ static int v9fs_do_lremovexattr(V9fsState *s, V9fsString *path,
                                 xattr_name->data);
 }
 
+static int v9fs_do_syncfs(V9fsState *s, int fd)
+{
+    return s->ops->syncfs(&s->ctx, fd);
+}
 
 static void v9fs_string_init(V9fsString *str)
 {
@@ -1978,6 +1982,32 @@ static void v9fs_fsync(V9fsState *s, V9fsPDU *pdu)
     v9fs_post_do_fsync(s, pdu, err);
 }
 
+static void v9fs_post_do_syncfs(V9fsState *s, V9fsPDU *pdu, int err)
+{
+    if (err == -1) {
+        err = -errno;
+    }
+    complete_pdu(s, pdu, err);
+}
+
+static void v9fs_syncfs(V9fsState *s, V9fsPDU *pdu)
+{
+    int err;
+    int32_t fid;
+    size_t offset = 7;
+    V9fsFidState *fidp;
+
+    pdu_unmarshal(pdu, offset, "d", &fid);
+    fidp = lookup_fid(s, fid);
+    if (fidp == NULL) {
+        err = -ENOENT;
+        v9fs_post_do_syncfs(s, pdu, err);
+        return;
+    }
+    err = v9fs_do_syncfs(s, fidp->fsmap.fs.fd);
+    v9fs_post_do_syncfs(s, pdu, err);
+}
+
 static void v9fs_clunk(V9fsState *s, V9fsPDU *pdu)
 {
     int32_t fid;
@@ -3676,6 +3706,7 @@ static pdu_handler_t *pdu_handlers[] = {
     [P9_TWALK] = v9fs_walk,
     [P9_TCLUNK] = v9fs_clunk,
     [P9_TFSYNC] = v9fs_fsync,
+    [P9_TSYNCFS] = v9fs_syncfs,
     [P9_TOPEN] = v9fs_open,
     [P9_TREAD] = v9fs_read,
 #if 0
diff --git a/hw/9pfs/virtio-9p.h b/hw/9pfs/virtio-9p.h
index 68d5906..b0f8210 100644
--- a/hw/9pfs/virtio-9p.h
+++ b/hw/9pfs/virtio-9p.h
@@ -13,6 +13,8 @@
 #define VIRTIO_9P_MOUNT_TAG 0
 
 enum {
+    P9_TSYNCFS = 0,
+    P9_RSYNCFS,
     P9_TLERROR = 6,
     P9_RLERROR,
     P9_TSTATFS = 8,
diff --git a/hw/file-op-9p.h b/hw/file-op-9p.h
index 126e60e..e306305 100644
--- a/hw/file-op-9p.h
+++ b/hw/file-op-9p.h
@@ -94,6 +94,7 @@ typedef struct FileOperations
     int (*lsetxattr)(FsContext *, const char *,
                      const char *, void *, size_t, int);
     int (*lremovexattr)(FsContext *, const char *, const char *);
+    int (*syncfs)(FsContext *, int);
     void *opaque;
 } FileOperations;
 
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [Qemu-devel] [PATCH -V3 5/8] hw/9pfs: Add open flag to fid
  2011-03-05 17:52 [Qemu-devel] [PATCH -V3 1/8] hw/9pfs: Add V9fsfidmap in preparation for adding fd reclaim Aneesh Kumar K.V
                   ` (2 preceding siblings ...)
  2011-03-05 17:52 ` [Qemu-devel] [PATCH -V3 4/8] hw/9pfs: Implement syncfs Aneesh Kumar K.V
@ 2011-03-05 17:52 ` Aneesh Kumar K.V
  2011-03-13 16:38   ` Stefan Hajnoczi
  2011-03-05 17:52 ` [Qemu-devel] [PATCH -V3 6/8] hw/9pfs: Add directory reclaim support Aneesh Kumar K.V
                   ` (3 subsequent siblings)
  7 siblings, 1 reply; 36+ messages in thread
From: Aneesh Kumar K.V @ 2011-03-05 17:52 UTC (permalink / raw)
  To: qemu-devel; +Cc: aliguori, Aneesh Kumar K.V

We use this flag when we reopen the file. We need
to track open flag because if the open request have
flags like O_SYNC, we want to open the file with same flag
in host too

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 hw/9pfs/virtio-9p.c |   56 +++++++++++++++++++++++++++++++++++++++++---------
 hw/9pfs/virtio-9p.h |    1 +
 2 files changed, 47 insertions(+), 10 deletions(-)

diff --git a/hw/9pfs/virtio-9p.c b/hw/9pfs/virtio-9p.c
index ce09e55..1fa7256 100644
--- a/hw/9pfs/virtio-9p.c
+++ b/hw/9pfs/virtio-9p.c
@@ -22,6 +22,8 @@
 int debug_9p_pdu;
 static void v9fs_reclaim_fd(V9fsState *s);
 
+#define PASS_OPEN_FLAG (O_SYNC | O_DSYNC | O_RSYNC |  \
+                        O_EXCL)
 enum {
     Oread   = 0x00,
     Owrite  = 0x01,
@@ -68,6 +70,28 @@ static int omode_to_uflags(int8_t mode)
     return ret;
 }
 
+static int get_dotl_openflags(int oflags)
+{
+    int flags;
+    /*
+     * Since we can share the fd between multiple fids,
+     * open the file in read write mode
+     */
+    if ((oflags & O_ACCMODE) != O_RDONLY) {
+        flags = O_RDWR;
+    } else {
+        flags = O_RDONLY;
+    }
+    /*
+     * If the client asked for any of the below flags we
+     * should open the file with same open flag
+     */
+    if (oflags & PASS_OPEN_FLAG) {
+        flags |=  oflags & PASS_OPEN_FLAG;
+    }
+    return flags;
+}
+
 void cred_init(FsCred *credp)
 {
     credp->fc_uid = -1;
@@ -456,9 +480,9 @@ static V9fsFidState *lookup_fid(V9fsState *s, int32_t fid)
              * descriptors.
              */
             if (f->fsmap.fid_type == P9_FID_FILE) {
-                /* FIXME!! should we remember the open flags ?*/
                 if (f->fsmap.fs.fd == -1) {
-                    f->fsmap.fs.fd = v9fs_do_open(s, &f->fsmap.path, O_RDWR);
+                    f->fsmap.fs.fd = v9fs_do_open(s, &f->fsmap.path,
+                                                  f->fsmap.open_flags);
                 }
             }
             /*
@@ -1815,14 +1839,19 @@ static void v9fs_open_post_lstat(V9fsState *s, V9fsOpenState *vs, int err)
         v9fs_open_post_opendir(s, vs, err);
     } else {
         if (s->proto_version == V9FS_PROTO_2000L) {
-            flags = vs->mode;
-            flags &= ~(O_NOCTTY | O_ASYNC | O_CREAT);
-            /* Ignore direct disk access hint until the server supports it. */
-            flags &= ~O_DIRECT;
+            flags = get_dotl_openflags(vs->mode);
         } else {
             flags = omode_to_uflags(vs->mode);
         }
         vs->fidp->fsmap.fs.fd = v9fs_do_open(s, &vs->fidp->fsmap.path, flags);
+        vs->fidp->fsmap.open_flags = flags;
+        if (flags & O_EXCL) {
+            /*
+             * We let the host file system do O_EXCL check
+             * We should not reclaim such fd
+             */
+            vs->fidp->fsmap.flags  |= FID_NON_RECLAIMABLE;
+        }
         v9fs_open_post_open(s, vs, err);
     }
     return;
@@ -1941,11 +1970,17 @@ static void v9fs_lcreate(V9fsState *s, V9fsPDU *pdu)
     v9fs_string_sprintf(&vs->fullname, "%s/%s", vs->fidp->fsmap.path.data,
              vs->name.data);
 
-    /* Ignore direct disk access hint until the server supports it. */
-    flags &= ~O_DIRECT;
-
+    flags = get_dotl_openflags(flags);
     vs->fidp->fsmap.fs.fd = v9fs_do_open2(s, vs->fullname.data, vs->fidp->uid,
-            gid, flags, mode);
+                                          gid, flags | O_CREAT, mode);
+    vs->fidp->fsmap.open_flags = flags;
+    if (flags & O_EXCL) {
+        /*
+         * We let the host file system do O_EXCL check
+         * We should not reclaim such fd
+         */
+        vs->fidp->fsmap.flags  |= FID_NON_RECLAIMABLE;
+    }
     v9fs_lcreate_post_do_open2(s, vs, err);
     return;
 
@@ -2653,6 +2688,7 @@ static void v9fs_create_post_lstat(V9fsState *s, V9fsCreateState *vs, int err)
                                               -1,
                                               omode_to_uflags(vs->mode)|O_CREAT,
                                               vs->perm);
+        vs->fidp->fsmap.open_flags = omode_to_uflags(vs->mode);
         v9fs_create_post_open2(s, vs, err);
     }
 
diff --git a/hw/9pfs/virtio-9p.h b/hw/9pfs/virtio-9p.h
index b0f8210..10809ba 100644
--- a/hw/9pfs/virtio-9p.h
+++ b/hw/9pfs/virtio-9p.h
@@ -196,6 +196,7 @@ typedef struct V9fsfidmap {
         DIR *dir;
         V9fsXattr xattr;
     } fs;
+    int open_flags;
     int fid_type;
     V9fsString path;
     int flags;
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [Qemu-devel] [PATCH -V3 6/8] hw/9pfs: Add directory reclaim support
  2011-03-05 17:52 [Qemu-devel] [PATCH -V3 1/8] hw/9pfs: Add V9fsfidmap in preparation for adding fd reclaim Aneesh Kumar K.V
                   ` (3 preceding siblings ...)
  2011-03-05 17:52 ` [Qemu-devel] [PATCH -V3 5/8] hw/9pfs: Add open flag to fid Aneesh Kumar K.V
@ 2011-03-05 17:52 ` Aneesh Kumar K.V
  2011-03-13 16:42   ` Stefan Hajnoczi
  2011-03-05 17:52 ` [Qemu-devel] [PATCH -V3 7/8] hw/9pfs: Add new virtfs option cache=none to skip host page cache Aneesh Kumar K.V
                   ` (2 subsequent siblings)
  7 siblings, 1 reply; 36+ messages in thread
From: Aneesh Kumar K.V @ 2011-03-05 17:52 UTC (permalink / raw)
  To: qemu-devel; +Cc: aliguori, Aneesh Kumar K.V

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 hw/9pfs/virtio-9p.c |   21 +++++++++++++++++++--
 1 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/hw/9pfs/virtio-9p.c b/hw/9pfs/virtio-9p.c
index 1fa7256..293a562 100644
--- a/hw/9pfs/virtio-9p.c
+++ b/hw/9pfs/virtio-9p.c
@@ -142,7 +142,12 @@ static int v9fs_do_open(V9fsState *s, V9fsString *path, int flags)
 
 static DIR *v9fs_do_opendir(V9fsState *s, V9fsString *path)
 {
-    return s->ops->opendir(&s->ctx, path->data);
+    DIR *dir;
+    dir = s->ops->opendir(&s->ctx, path->data);
+    if (dirfd(dir) > P9_FD_RECLAIM_THRES) {
+        v9fs_reclaim_fd(s);
+    }
+    return dir;
 }
 
 static void v9fs_do_rewinddir(V9fsState *s, DIR *dir)
@@ -484,6 +489,10 @@ static V9fsFidState *lookup_fid(V9fsState *s, int32_t fid)
                     f->fsmap.fs.fd = v9fs_do_open(s, &f->fsmap.path,
                                                   f->fsmap.open_flags);
                 }
+            } else if (f->fsmap.fid_type == P9_FID_DIR) {
+                if (f->fsmap.fs.dir == NULL) {
+                    f->fsmap.fs.dir = v9fs_do_opendir(s, &f->fsmap.path);
+                }
             }
             /*
              * Mark the fid as referenced so that the LRU
@@ -550,6 +559,12 @@ static void v9fs_reclaim_fd(V9fsState *s)
                 f->fsmap.fs.fd = -1;
                 reclaim_count++;
             }
+        } else if (f->fsmap.fid_type == P9_FID_DIR) {
+            if (f->fsmap.fs.dir != NULL) {
+                v9fs_do_closedir(s, f->fsmap.fs.dir);
+                f->fsmap.fs.dir = NULL;
+                reclaim_count++;
+            }
         }
         if (reclaim_count >= P9_FD_RECLAIM_THRES/2) {
             break;
@@ -633,7 +648,9 @@ static int free_fid(V9fsState *s, int32_t fid)
             v9fs_do_close(s, fidp->fsmap.fs.fd);
         }
     } else if (fidp->fsmap.fid_type == P9_FID_DIR) {
-        v9fs_do_closedir(s, fidp->fsmap.fs.dir);
+        if (fidp->fsmap.fs.dir != NULL) {
+            v9fs_do_closedir(s, fidp->fsmap.fs.dir);
+        }
     } else if (fidp->fsmap.fid_type == P9_FID_XATTR) {
         retval = v9fs_xattr_fid_clunk(s, fidp);
     }
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [Qemu-devel] [PATCH -V3 7/8] hw/9pfs: Add new virtfs option cache=none to skip host page cache
  2011-03-05 17:52 [Qemu-devel] [PATCH -V3 1/8] hw/9pfs: Add V9fsfidmap in preparation for adding fd reclaim Aneesh Kumar K.V
                   ` (4 preceding siblings ...)
  2011-03-05 17:52 ` [Qemu-devel] [PATCH -V3 6/8] hw/9pfs: Add directory reclaim support Aneesh Kumar K.V
@ 2011-03-05 17:52 ` Aneesh Kumar K.V
  2011-03-13 17:23   ` Stefan Hajnoczi
  2011-03-05 17:52 ` [Qemu-devel] [PATCH -V3 8/8] hw/9pfs: Skip file system sync if we have specified cache=none option Aneesh Kumar K.V
  2011-03-13 15:46 ` [Qemu-devel] [PATCH -V3 1/8] hw/9pfs: Add V9fsfidmap in preparation for adding fd reclaim Stefan Hajnoczi
  7 siblings, 1 reply; 36+ messages in thread
From: Aneesh Kumar K.V @ 2011-03-05 17:52 UTC (permalink / raw)
  To: qemu-devel; +Cc: aliguori, Aneesh Kumar K.V

cache=none implies the file are opened in the host with O_SYNC open flag

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 fsdev/qemu-fsdev.c  |    8 +++++++-
 fsdev/qemu-fsdev.h  |    1 +
 hw/9pfs/virtio-9p.c |   17 ++++++++++++-----
 hw/file-op-9p.h     |    1 +
 qemu-config.c       |    6 ++++++
 qemu-options.hx     |   17 ++++++++++++-----
 vl.c                |   23 +++++++++++++++++++----
 7 files changed, 58 insertions(+), 15 deletions(-)

diff --git a/fsdev/qemu-fsdev.c b/fsdev/qemu-fsdev.c
index 0b33290..d803d47 100644
--- a/fsdev/qemu-fsdev.c
+++ b/fsdev/qemu-fsdev.c
@@ -33,6 +33,8 @@ int qemu_fsdev_add(QemuOpts *opts)
     const char *fstype = qemu_opt_get(opts, "fstype");
     const char *path = qemu_opt_get(opts, "path");
     const char *sec_model = qemu_opt_get(opts, "security_model");
+    const char *cache = qemu_opt_get(opts, "cache");
+
 
     if (!fsdev_id) {
         fprintf(stderr, "fsdev: No id specified\n");
@@ -71,10 +73,14 @@ int qemu_fsdev_add(QemuOpts *opts)
     fsle->fse.path = qemu_strdup(path);
     fsle->fse.security_model = qemu_strdup(sec_model);
     fsle->fse.ops = FsTypes[i].ops;
+    if (cache) {
+        fsle->fse.cache = qemu_strdup(cache);
+    } else {
+        fsle->fse.cache = NULL;
+    }
 
     QTAILQ_INSERT_TAIL(&fstype_entries, fsle, next);
     return 0;
-
 }
 
 FsTypeEntry *get_fsdev_fsentry(char *id)
diff --git a/fsdev/qemu-fsdev.h b/fsdev/qemu-fsdev.h
index a704043..d3a0fac 100644
--- a/fsdev/qemu-fsdev.h
+++ b/fsdev/qemu-fsdev.h
@@ -41,6 +41,7 @@ typedef struct FsTypeEntry {
     char *fsdev_id;
     char *path;
     char *security_model;
+    char *cache;
     FileOperations *ops;
 } FsTypeEntry;
 
diff --git a/hw/9pfs/virtio-9p.c b/hw/9pfs/virtio-9p.c
index 293a562..75df1a1 100644
--- a/hw/9pfs/virtio-9p.c
+++ b/hw/9pfs/virtio-9p.c
@@ -70,17 +70,18 @@ static int omode_to_uflags(int8_t mode)
     return ret;
 }
 
-static int get_dotl_openflags(int oflags)
+static int get_dotl_openflags(V9fsState *s, int oflags)
 {
     int flags;
     /*
      * Since we can share the fd between multiple fids,
      * open the file in read write mode
      */
+    flags = s->ctx.open_flags;
     if ((oflags & O_ACCMODE) != O_RDONLY) {
-        flags = O_RDWR;
+        flags |= O_RDWR;
     } else {
-        flags = O_RDONLY;
+        flags |= O_RDONLY;
     }
     /*
      * If the client asked for any of the below flags we
@@ -1856,7 +1857,7 @@ static void v9fs_open_post_lstat(V9fsState *s, V9fsOpenState *vs, int err)
         v9fs_open_post_opendir(s, vs, err);
     } else {
         if (s->proto_version == V9FS_PROTO_2000L) {
-            flags = get_dotl_openflags(vs->mode);
+            flags = get_dotl_openflags(s, vs->mode);
         } else {
             flags = omode_to_uflags(vs->mode);
         }
@@ -1987,7 +1988,7 @@ static void v9fs_lcreate(V9fsState *s, V9fsPDU *pdu)
     v9fs_string_sprintf(&vs->fullname, "%s/%s", vs->fidp->fsmap.path.data,
              vs->name.data);
 
-    flags = get_dotl_openflags(flags);
+    flags = get_dotl_openflags(s, flags);
     vs->fidp->fsmap.fs.fd = v9fs_do_open2(s, vs->fullname.data, vs->fidp->uid,
                                           gid, flags | O_CREAT, mode);
     vs->fidp->fsmap.open_flags = flags;
@@ -3912,6 +3913,12 @@ VirtIODevice *virtio_9p_init(DeviceState *dev, V9fsConf *conf)
         exit(1);
     }
 
+    if (fse->cache && !strcmp(fse->cache, "none")) {
+        s->ctx.open_flags = O_SYNC;
+    } else {
+        s->ctx.open_flags = 0;
+    }
+
     s->ctx.fs_root = qemu_strdup(fse->path);
     len = strlen(conf->tag);
     if (len > MAX_TAG_LEN) {
diff --git a/hw/file-op-9p.h b/hw/file-op-9p.h
index e306305..8577020 100644
--- a/hw/file-op-9p.h
+++ b/hw/file-op-9p.h
@@ -54,6 +54,7 @@ typedef struct FsContext
     char *fs_root;
     SecModel fs_sm;
     uid_t uid;
+    int open_flags;
     struct xattr_operations **xops;
 } FsContext;
 
diff --git a/qemu-config.c b/qemu-config.c
index 965fa46..4d87a39 100644
--- a/qemu-config.c
+++ b/qemu-config.c
@@ -165,6 +165,9 @@ QemuOptsList qemu_fsdev_opts = {
         }, {
             .name = "security_model",
             .type = QEMU_OPT_STRING,
+        }, {
+            .name = "cache",
+            .type = QEMU_OPT_STRING,
         },
         { /*End of list */ }
     },
@@ -187,6 +190,9 @@ QemuOptsList qemu_virtfs_opts = {
         }, {
             .name = "security_model",
             .type = QEMU_OPT_STRING,
+        }, {
+            .name = "cache",
+            .type = QEMU_OPT_STRING,
         },
 
         { /*End of list */ }
diff --git a/qemu-options.hx b/qemu-options.hx
index 898561d..ad27bf0 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -486,7 +486,8 @@ ETEXI
 DEFHEADING(File system options:)
 
 DEF("fsdev", HAS_ARG, QEMU_OPTION_fsdev,
-    "-fsdev local,id=id,path=path,security_model=[mapped|passthrough|none]\n",
+    "-fsdev local,id=id,path=path,security_model=[mapped|passthrough|none]\n"
+    "       [,cache=none]\n",
     QEMU_ARCH_ALL)
 
 STEXI
@@ -502,7 +503,7 @@ The specific Fstype will determine the applicable options.
 
 Options to each backend are described below.
 
-@item -fsdev local ,id=@var{id} ,path=@var{path} ,security_model=@var{security_model}
+@item -fsdev local ,id=@var{id} ,path=@var{path} ,security_model=@var{security_model}[,cache=@var{cache}]
 
 Create a file-system-"device" for local-filesystem.
 
@@ -513,13 +514,17 @@ Create a file-system-"device" for local-filesystem.
 @option{security_model} specifies the security model to be followed.
 @option{security_model} is required.
 
+@option{cache} specifies whether to skip the host page cache.
+@option{cache} is an optional argument.
+
 @end table
 ETEXI
 
 DEFHEADING(Virtual File system pass-through options:)
 
 DEF("virtfs", HAS_ARG, QEMU_OPTION_virtfs,
-    "-virtfs local,path=path,mount_tag=tag,security_model=[mapped|passthrough|none]\n",
+    "-virtfs local,path=path,mount_tag=tag,security_model=[mapped|passthrough|none]\n"
+    "        [,cache=none]\n",
     QEMU_ARCH_ALL)
 
 STEXI
@@ -535,7 +540,7 @@ The specific Fstype will determine the applicable options.
 
 Options to each backend are described below.
 
-@item -virtfs local ,path=@var{path} ,mount_tag=@var{mount_tag} ,security_model=@var{security_model}
+@item -virtfs local ,path=@var{path} ,mount_tag=@var{mount_tag} ,security_model=@var{security_model}[,cache=@var{cache}]
 
 Create a Virtual file-system-pass through for local-filesystem.
 
@@ -546,10 +551,12 @@ Create a Virtual file-system-pass through for local-filesystem.
 @option{security_model} specifies the security model to be followed.
 @option{security_model} is required.
 
-
 @option{mount_tag} specifies the tag with which the exported file is mounted.
 @option{mount_tag} is required.
 
+@option{cache} specifies whether to skip the host page cache.
+@option{cache} is an optional argument.
+
 @end table
 ETEXI
 
diff --git a/vl.c b/vl.c
index 14255c4..5c76ece 100644
--- a/vl.c
+++ b/vl.c
@@ -2379,6 +2379,8 @@ int main(int argc, char **argv, char **envp)
                 }
                 break;
             case QEMU_OPTION_virtfs: {
+                const char *cache;
+                char *arg_cache = NULL;
                 char *arg_fsdev = NULL;
                 char *arg_9p = NULL;
                 int len = 0;
@@ -2404,7 +2406,18 @@ int main(int argc, char **argv, char **envp)
                     exit(1);
                 }
 
+                cache = qemu_opt_get(opts, "cache");
+                if (cache) {
+                    len = strlen(",cache=");
+                    len += strlen(cache);
+                    arg_cache = qemu_malloc(len+1);
+                    snprintf(arg_cache, (len+1), ",cache=%s", cache);
+                } else {
+                    arg_cache = (char *)"";
+                }
+
                 len = strlen(",id=,path=,security_model=");
+                len += strlen(arg_cache);
                 len += strlen(qemu_opt_get(opts, "fstype"));
                 len += strlen(qemu_opt_get(opts, "mount_tag"));
                 len += strlen(qemu_opt_get(opts, "path"));
@@ -2412,20 +2425,22 @@ int main(int argc, char **argv, char **envp)
                 arg_fsdev = qemu_malloc((len + 1) * sizeof(*arg_fsdev));
 
                 snprintf(arg_fsdev, (len + 1) * sizeof(*arg_fsdev),
-                         "%s,id=%s,path=%s,security_model=%s",
+                         "%s,id=%s,path=%s,security_model=%s%s",
                          qemu_opt_get(opts, "fstype"),
                          qemu_opt_get(opts, "mount_tag"),
                          qemu_opt_get(opts, "path"),
-                         qemu_opt_get(opts, "security_model"));
+                         qemu_opt_get(opts, "security_model"),
+                         arg_cache);
 
                 len = strlen("virtio-9p-pci,fsdev=,mount_tag=");
                 len += 2*strlen(qemu_opt_get(opts, "mount_tag"));
                 arg_9p = qemu_malloc((len + 1) * sizeof(*arg_9p));
 
                 snprintf(arg_9p, (len + 1) * sizeof(*arg_9p),
-                         "virtio-9p-pci,fsdev=%s,mount_tag=%s",
+                         "virtio-9p-pci,fsdev=%s,mount_tag=%s%s",
+                         qemu_opt_get(opts, "mount_tag"),
                          qemu_opt_get(opts, "mount_tag"),
-                         qemu_opt_get(opts, "mount_tag"));
+                         arg_cache);
 
                 if (!qemu_opts_parse(qemu_find_opts("fsdev"), arg_fsdev, 1)) {
                     fprintf(stderr, "parse error [fsdev]: %s\n", optarg);
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [Qemu-devel] [PATCH -V3 8/8] hw/9pfs: Skip file system sync if we have specified cache=none option
  2011-03-05 17:52 [Qemu-devel] [PATCH -V3 1/8] hw/9pfs: Add V9fsfidmap in preparation for adding fd reclaim Aneesh Kumar K.V
                   ` (5 preceding siblings ...)
  2011-03-05 17:52 ` [Qemu-devel] [PATCH -V3 7/8] hw/9pfs: Add new virtfs option cache=none to skip host page cache Aneesh Kumar K.V
@ 2011-03-05 17:52 ` Aneesh Kumar K.V
  2011-03-13 15:46 ` [Qemu-devel] [PATCH -V3 1/8] hw/9pfs: Add V9fsfidmap in preparation for adding fd reclaim Stefan Hajnoczi
  7 siblings, 0 replies; 36+ messages in thread
From: Aneesh Kumar K.V @ 2011-03-05 17:52 UTC (permalink / raw)
  To: qemu-devel; +Cc: aliguori, Aneesh Kumar K.V

cache=none results in skipping the host page cache. So we can ignore
the tsyncfs request.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 hw/9pfs/virtio-9p.c |    5 ++++-
 1 files changed, 4 insertions(+), 1 deletions(-)

diff --git a/hw/9pfs/virtio-9p.c b/hw/9pfs/virtio-9p.c
index 75df1a1..dceefd5 100644
--- a/hw/9pfs/virtio-9p.c
+++ b/hw/9pfs/virtio-9p.c
@@ -331,7 +331,10 @@ static int v9fs_do_lremovexattr(V9fsState *s, V9fsString *path,
 
 static int v9fs_do_syncfs(V9fsState *s, int fd)
 {
-    return s->ops->syncfs(&s->ctx, fd);
+    if (!(s->ctx.open_flags & O_SYNC)) {
+        return s->ops->syncfs(&s->ctx, fd);
+    }
+    return 0;
 }
 
 static void v9fs_string_init(V9fsString *str)
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* Re: [Qemu-devel] [PATCH -V3 1/8] hw/9pfs: Add V9fsfidmap in preparation for adding fd reclaim
  2011-03-05 17:52 [Qemu-devel] [PATCH -V3 1/8] hw/9pfs: Add V9fsfidmap in preparation for adding fd reclaim Aneesh Kumar K.V
                   ` (6 preceding siblings ...)
  2011-03-05 17:52 ` [Qemu-devel] [PATCH -V3 8/8] hw/9pfs: Skip file system sync if we have specified cache=none option Aneesh Kumar K.V
@ 2011-03-13 15:46 ` Stefan Hajnoczi
  2011-03-13 19:06   ` Aneesh Kumar K. V
  7 siblings, 1 reply; 36+ messages in thread
From: Stefan Hajnoczi @ 2011-03-13 15:46 UTC (permalink / raw)
  To: Aneesh Kumar K.V; +Cc: aliguori, qemu-devel

On Sat, Mar 5, 2011 at 5:52 PM, Aneesh Kumar K.V
<aneesh.kumar@linux.vnet.ibm.com> wrote:
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
> ---
>  hw/9pfs/virtio-9p.c |  327 ++++++++++++++++++++++++++-------------------------
>  hw/9pfs/virtio-9p.h |   22 +++-
>  2 files changed, 184 insertions(+), 165 deletions(-)

I don't understand why this patch is necessary.  It introduces an
intermediate structure that doesn't seem to be useful by itself.
Don't we always need the V9fsFidState?

> @@ -185,17 +188,22 @@ typedef struct V9fsXattr
>     int flags;
>  } V9fsXattr;
>
> +typedef struct V9fsfidmap {

V9fsFidMap (naming convention)

> +    union {
> +        int fd;
> +        DIR *dir;
> +        V9fsXattr xattr;
> +    } fs;

The name "fs" is not meaningful.

> +    int fid_type;
> +    V9fsString path;
> +    int flags;
> +} V9fsFidMap;
> +
>  struct V9fsFidState
>  {
> -    int fid_type;
>     int32_t fid;
> -    V9fsString path;
> -    union {
> -       int fd;
> -       DIR *dir;
> -       V9fsXattr xattr;
> -    } fs;
>     uid_t uid;
> +    V9fsFidMap fsmap;

This name is confusing.  A "map" is usually a container that stores
key/value pairs.  V9fsFidMapEntry would be clearer.  But then I
thought that is what V9fsFidState is?

Stefan

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [Qemu-devel] [PATCH -V3 2/8] hw/9pfs: Add file descriptor reclaim support
  2011-03-05 17:52 ` [Qemu-devel] [PATCH -V3 2/8] hw/9pfs: Add file descriptor reclaim support Aneesh Kumar K.V
@ 2011-03-13 16:08   ` Stefan Hajnoczi
  2011-03-13 18:57     ` Aneesh Kumar K. V
  0 siblings, 1 reply; 36+ messages in thread
From: Stefan Hajnoczi @ 2011-03-13 16:08 UTC (permalink / raw)
  To: Aneesh Kumar K.V; +Cc: aliguori, qemu-devel

On Sat, Mar 5, 2011 at 5:52 PM, Aneesh Kumar K.V
<aneesh.kumar@linux.vnet.ibm.com> wrote:
> @@ -107,7 +108,12 @@ static int v9fs_do_closedir(V9fsState *s, DIR *dir)
>
>  static int v9fs_do_open(V9fsState *s, V9fsString *path, int flags)
>  {
> -    return s->ops->open(&s->ctx, path->data, flags);
> +    int fd;
> +    fd = s->ops->open(&s->ctx, path->data, flags);
> +    if (fd > P9_FD_RECLAIM_THRES) {
> +        v9fs_reclaim_fd(s);
> +    }

I think the threshold should depend on the file descriptor ulimit.
The hardcoded constant doesn't work if the ulimit is set to 1000 or
less (it would cause other users in QEMU to hit EMFILE errors).

> +            if (f->fsmap.fid_type == P9_FID_FILE) {
> +                /* FIXME!! should we remember the open flags ?*/
> +                if (f->fsmap.fs.fd == -1) {
> +                    f->fsmap.fs.fd = v9fs_do_open(s, &f->fsmap.path, O_RDWR);
> +                }

Please address the FIXME.  I think the case where O_RDWR breaks is if
QEMU has permissions to open the file for read only.  The the client
is able to open the file for read but when the file descriptor is
resurrected we'll get EPERM here.

> @@ -516,7 +600,10 @@ static int free_fid(V9fsState *s, int32_t fid)
>     *fidpp = fidp->next;
>
>     if (fidp->fsmap.fid_type == P9_FID_FILE) {
> -        v9fs_do_close(s, fidp->fsmap.fs.fd);
> +        /* I we reclaimed the fd no need to close */

s/I //

> +        if (fidp->fsmap.fs.fd != -1) {
> +            v9fs_do_close(s, fidp->fsmap.fs.fd);
> +        }
>     } else if (fidp->fsmap.fid_type == P9_FID_DIR) {
>         v9fs_do_closedir(s, fidp->fsmap.fs.dir);
>     } else if (fidp->fsmap.fid_type == P9_FID_XATTR) {
> @@ -2719,7 +2806,11 @@ static void v9fs_remove(V9fsState *s, V9fsPDU *pdu)
>         err = -EINVAL;
>         goto out;
>     }
> -
> +    /*
> +     * IF the file is unlinked, we cannot reopen
> +     * the file later. So don't reclaim fd
> +     */
> +    v9fs_mark_fids_unreclaim(s, &vs->fidp->fsmap.path);

This poses a problem for the case where guest and host are both
accessing the file system.  If the fd is reclaimed and the host
deletes the file, then the guest cannot access its open file anymore.

The same issue also affects rename and has not been covered by this patch.

Stefan

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [Qemu-devel] [PATCH -V3 3/8] hw/9pfs: Use v9fs_do_close instead of close
  2011-03-05 17:52 ` [Qemu-devel] [PATCH -V3 3/8] hw/9pfs: Use v9fs_do_close instead of close Aneesh Kumar K.V
@ 2011-03-13 16:10   ` Stefan Hajnoczi
  2011-03-13 18:58     ` Aneesh Kumar K. V
  0 siblings, 1 reply; 36+ messages in thread
From: Stefan Hajnoczi @ 2011-03-13 16:10 UTC (permalink / raw)
  To: Aneesh Kumar K.V; +Cc: aliguori, qemu-devel

On Sat, Mar 5, 2011 at 5:52 PM, Aneesh Kumar K.V
<aneesh.kumar@linux.vnet.ibm.com> wrote:
> @@ -1873,7 +1873,7 @@ static void v9fs_post_lcreate(V9fsState *s, V9fsLcreateState *vs, int err)
>         vs->fidp->fsmap.fid_type = P9_FID_NONE;
>         err = -errno;
>         if (vs->fidp->fsmap.fs.fd > 0) {

For completeness: vs->fdip->fsmap.fs.fd >= 0

Stefan

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [Qemu-devel] [PATCH -V3 4/8] hw/9pfs: Implement syncfs
  2011-03-05 17:52 ` [Qemu-devel] [PATCH -V3 4/8] hw/9pfs: Implement syncfs Aneesh Kumar K.V
@ 2011-03-13 16:24   ` Stefan Hajnoczi
  2011-03-13 18:59     ` Aneesh Kumar K. V
  0 siblings, 1 reply; 36+ messages in thread
From: Stefan Hajnoczi @ 2011-03-13 16:24 UTC (permalink / raw)
  To: Aneesh Kumar K.V; +Cc: aliguori, Venkateswararao Jujjuri (JV), qemu-devel

On Sat, Mar 5, 2011 at 5:52 PM, Aneesh Kumar K.V
<aneesh.kumar@linux.vnet.ibm.com> wrote:
> +static void v9fs_post_do_syncfs(V9fsState *s, V9fsPDU *pdu, int err)
> +{
> +    if (err == -1) {
> +        err = -errno;
> +    }

errno may have been clobbered by any standard library function/syscall
invoked by this thread before v9fs_post_do_syncfs() was called.  Why
not pass the -errno in the function argument?

static void v9fs_post_do_syncfs(V9fsState *s, V9fsPDU *pdu, int err)
{
    complete_pdu(s, pdu, err);
}

I strongly suggest doing a separate patch series to clean up of all of
virtio-9p to stop using errno.  It's bad practice and I've caught
several errno mistakes in the virtio-9p patches I've reviewed - I bet
there are other instances lurking around.

> +static void v9fs_syncfs(V9fsState *s, V9fsPDU *pdu)
> +{
> +    int err;
> +    int32_t fid;
> +    size_t offset = 7;
> +    V9fsFidState *fidp;
> +
> +    pdu_unmarshal(pdu, offset, "d", &fid);
> +    fidp = lookup_fid(s, fid);
> +    if (fidp == NULL) {
> +        err = -ENOENT;
> +        v9fs_post_do_syncfs(s, pdu, err);
> +        return;
> +    }

This wasn't setting errno but passed the error in err.  It can stay
like this if you change v9fs_post_do_syncfs().

Stefan

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [Qemu-devel] [PATCH -V3 5/8] hw/9pfs: Add open flag to fid
  2011-03-05 17:52 ` [Qemu-devel] [PATCH -V3 5/8] hw/9pfs: Add open flag to fid Aneesh Kumar K.V
@ 2011-03-13 16:38   ` Stefan Hajnoczi
  2011-03-13 19:01     ` Aneesh Kumar K. V
  0 siblings, 1 reply; 36+ messages in thread
From: Stefan Hajnoczi @ 2011-03-13 16:38 UTC (permalink / raw)
  To: Aneesh Kumar K.V; +Cc: aliguori, qemu-devel

On Sat, Mar 5, 2011 at 5:52 PM, Aneesh Kumar K.V
<aneesh.kumar@linux.vnet.ibm.com> wrote:
> +static int get_dotl_openflags(int oflags)
> +{
> +    int flags;
> +    /*
> +     * Since we can share the fd between multiple fids,
> +     * open the file in read write mode
> +     */

I didn't know that fds are shared between fids.  Also this code does
not always open O_RDWR.  This comment is incorrect (perhaps a later
patch changes assumptions, I haven't looked yet, but introducing
temporary inconsistencies makes it difficult to review and potentially
confusing for git-bisect users).

Stefan

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [Qemu-devel] [PATCH -V3 6/8] hw/9pfs: Add directory reclaim support
  2011-03-05 17:52 ` [Qemu-devel] [PATCH -V3 6/8] hw/9pfs: Add directory reclaim support Aneesh Kumar K.V
@ 2011-03-13 16:42   ` Stefan Hajnoczi
  2011-03-13 19:02     ` Aneesh Kumar K. V
  0 siblings, 1 reply; 36+ messages in thread
From: Stefan Hajnoczi @ 2011-03-13 16:42 UTC (permalink / raw)
  To: Aneesh Kumar K.V; +Cc: aliguori, qemu-devel

On Sat, Mar 5, 2011 at 5:52 PM, Aneesh Kumar K.V
<aneesh.kumar@linux.vnet.ibm.com> wrote:
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
> ---
>  hw/9pfs/virtio-9p.c |   21 +++++++++++++++++++--
>  1 files changed, 19 insertions(+), 2 deletions(-)
>
> diff --git a/hw/9pfs/virtio-9p.c b/hw/9pfs/virtio-9p.c
> index 1fa7256..293a562 100644
> --- a/hw/9pfs/virtio-9p.c
> +++ b/hw/9pfs/virtio-9p.c
> @@ -142,7 +142,12 @@ static int v9fs_do_open(V9fsState *s, V9fsString *path, int flags)
>
>  static DIR *v9fs_do_opendir(V9fsState *s, V9fsString *path)
>  {
> -    return s->ops->opendir(&s->ctx, path->data);
> +    DIR *dir;
> +    dir = s->ops->opendir(&s->ctx, path->data);
> +    if (dirfd(dir) > P9_FD_RECLAIM_THRES) {
> +        v9fs_reclaim_fd(s);
> +    }

dirfd(NULL) will crash so we need to check !dir first:

$ cat dirfd.c
#include <stdio.h>

int main(int argc, char **argv)
{
        printf("%d\n", dirfd(NULL));
        return 0;
}

$ gcc -o dirfd dirfd.c
$ ./dirfd
Segmentation fault

Stefan

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [Qemu-devel] [PATCH -V3 7/8] hw/9pfs: Add new virtfs option cache=none to skip host page cache
  2011-03-05 17:52 ` [Qemu-devel] [PATCH -V3 7/8] hw/9pfs: Add new virtfs option cache=none to skip host page cache Aneesh Kumar K.V
@ 2011-03-13 17:23   ` Stefan Hajnoczi
  2011-03-13 19:04     ` Aneesh Kumar K. V
  0 siblings, 1 reply; 36+ messages in thread
From: Stefan Hajnoczi @ 2011-03-13 17:23 UTC (permalink / raw)
  To: Aneesh Kumar K.V; +Cc: aliguori, qemu-devel

On Sat, Mar 5, 2011 at 5:52 PM, Aneesh Kumar K.V
<aneesh.kumar@linux.vnet.ibm.com> wrote:
> cache=none implies the file are opened in the host with O_SYNC open flag

O_SYNC does not bypass the host page cache.  It ensures that writes
only complete once data has been written to the disk.

O_DIRECT is a hint to bypass the host page cache when possible.

A boolean on|off option would be nicer than an option that takes the
special string "none".  For example, direct=on|off.  It also makes the
code nicer by using bools instead of strdup strings that get leaked.

> @@ -2379,6 +2379,8 @@ int main(int argc, char **argv, char **envp)
>                 }
>                 break;
>             case QEMU_OPTION_virtfs: {
> +                const char *cache;
> +                char *arg_cache = NULL;
>                 char *arg_fsdev = NULL;
>                 char *arg_9p = NULL;
>                 int len = 0;
> @@ -2404,7 +2406,18 @@ int main(int argc, char **argv, char **envp)
>                     exit(1);
>                 }
>
> +                cache = qemu_opt_get(opts, "cache");
> +                if (cache) {
> +                    len = strlen(",cache=");
> +                    len += strlen(cache);
> +                    arg_cache = qemu_malloc(len+1);
> +                    snprintf(arg_cache, (len+1), ",cache=%s", cache);
> +                } else {
> +                    arg_cache = (char *)"";
> +                }
> +
>                 len = strlen(",id=,path=,security_model=");
> +                len += strlen(arg_cache);
>                 len += strlen(qemu_opt_get(opts, "fstype"));
>                 len += strlen(qemu_opt_get(opts, "mount_tag"));
>                 len += strlen(qemu_opt_get(opts, "path"));
> @@ -2412,20 +2425,22 @@ int main(int argc, char **argv, char **envp)
>                 arg_fsdev = qemu_malloc((len + 1) * sizeof(*arg_fsdev));
>
>                 snprintf(arg_fsdev, (len + 1) * sizeof(*arg_fsdev),
> -                         "%s,id=%s,path=%s,security_model=%s",
> +                         "%s,id=%s,path=%s,security_model=%s%s",
>                          qemu_opt_get(opts, "fstype"),
>                          qemu_opt_get(opts, "mount_tag"),
>                          qemu_opt_get(opts, "path"),
> -                         qemu_opt_get(opts, "security_model"));
> +                         qemu_opt_get(opts, "security_model"),
> +                         arg_cache);
>
>                 len = strlen("virtio-9p-pci,fsdev=,mount_tag=");
>                 len += 2*strlen(qemu_opt_get(opts, "mount_tag"));
>                 arg_9p = qemu_malloc((len + 1) * sizeof(*arg_9p));
>
>                 snprintf(arg_9p, (len + 1) * sizeof(*arg_9p),
> -                         "virtio-9p-pci,fsdev=%s,mount_tag=%s",
> +                         "virtio-9p-pci,fsdev=%s,mount_tag=%s%s",
> +                         qemu_opt_get(opts, "mount_tag"),
>                          qemu_opt_get(opts, "mount_tag"),
> -                         qemu_opt_get(opts, "mount_tag"));
> +                         arg_cache);
>
>                 if (!qemu_opts_parse(qemu_find_opts("fsdev"), arg_fsdev, 1)) {
>                     fprintf(stderr, "parse error [fsdev]: %s\n", optarg);

arg_cache is leaked.

Stefan

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [Qemu-devel] [PATCH -V3 2/8] hw/9pfs: Add file descriptor reclaim support
  2011-03-13 16:08   ` Stefan Hajnoczi
@ 2011-03-13 18:57     ` Aneesh Kumar K. V
  2011-03-14 10:13       ` Stefan Hajnoczi
  0 siblings, 1 reply; 36+ messages in thread
From: Aneesh Kumar K. V @ 2011-03-13 18:57 UTC (permalink / raw)
  To: Stefan Hajnoczi; +Cc: aliguori, qemu-devel

On Sun, 13 Mar 2011 16:08:29 +0000, Stefan Hajnoczi <stefanha@gmail.com> wrote:
> On Sat, Mar 5, 2011 at 5:52 PM, Aneesh Kumar K.V
> <aneesh.kumar@linux.vnet.ibm.com> wrote:
> > @@ -107,7 +108,12 @@ static int v9fs_do_closedir(V9fsState *s, DIR *dir)
> >
> >  static int v9fs_do_open(V9fsState *s, V9fsString *path, int flags)
> >  {
> > -    return s->ops->open(&s->ctx, path->data, flags);
> > +    int fd;
> > +    fd = s->ops->open(&s->ctx, path->data, flags);
> > +    if (fd > P9_FD_RECLAIM_THRES) {
> > +        v9fs_reclaim_fd(s);
> > +    }
> 
> I think the threshold should depend on the file descriptor ulimit.
> The hardcoded constant doesn't work if the ulimit is set to 1000 or
> less (it would cause other users in QEMU to hit EMFILE errors).

Yes. That is suppose to be a follow up patch. I had that set to 100 for
all the early testing.

> 
> > +            if (f->fsmap.fid_type == P9_FID_FILE) {
> > +                /* FIXME!! should we remember the open flags ?*/
> > +                if (f->fsmap.fs.fd == -1) {
> > +                    f->fsmap.fs.fd = v9fs_do_open(s, &f->fsmap.path, O_RDWR);
> > +                }
> 
> Please address the FIXME.  I think the case where O_RDWR breaks is if
> QEMU has permissions to open the file for read only.  The the client
> is able to open the file for read but when the file descriptor is
> resurrected we'll get EPERM here.

The FIXME is fixed in the follow up patch  (patch 5)

> 
> > @@ -516,7 +600,10 @@ static int free_fid(V9fsState *s, int32_t fid)
> >     *fidpp = fidp->next;
> >
> >     if (fidp->fsmap.fid_type == P9_FID_FILE) {
> > -        v9fs_do_close(s, fidp->fsmap.fs.fd);
> > +        /* I we reclaimed the fd no need to close */
> 
> s/I //
> 
> > +        if (fidp->fsmap.fs.fd != -1) {
> > +            v9fs_do_close(s, fidp->fsmap.fs.fd);
> > +        }
> >     } else if (fidp->fsmap.fid_type == P9_FID_DIR) {
> >         v9fs_do_closedir(s, fidp->fsmap.fs.dir);
> >     } else if (fidp->fsmap.fid_type == P9_FID_XATTR) {
> > @@ -2719,7 +2806,11 @@ static void v9fs_remove(V9fsState *s, V9fsPDU *pdu)
> >         err = -EINVAL;
> >         goto out;
> >     }
> > -
> > +    /*
> > +     * IF the file is unlinked, we cannot reopen
> > +     * the file later. So don't reclaim fd
> > +     */
> > +    v9fs_mark_fids_unreclaim(s, &vs->fidp->fsmap.path);
> 
> This poses a problem for the case where guest and host are both
> accessing the file system.  If the fd is reclaimed and the host
> deletes the file, then the guest cannot access its open file anymore.
> 
> The same issue also affects rename and has not been covered by this patch.
> 

Currently virtFS don't handle the host rename/unlink. That we walk
a name and get the fid and then use the fid to open the file. In between
if the file get removed/renamed we will get an EINVAL.

All that will go away once we switch to handle based open.

-aneesh

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [Qemu-devel] [PATCH -V3 3/8] hw/9pfs: Use v9fs_do_close instead of close
  2011-03-13 16:10   ` Stefan Hajnoczi
@ 2011-03-13 18:58     ` Aneesh Kumar K. V
  0 siblings, 0 replies; 36+ messages in thread
From: Aneesh Kumar K. V @ 2011-03-13 18:58 UTC (permalink / raw)
  To: Stefan Hajnoczi; +Cc: aliguori, qemu-devel

On Sun, 13 Mar 2011 16:10:20 +0000, Stefan Hajnoczi <stefanha@gmail.com> wrote:
> On Sat, Mar 5, 2011 at 5:52 PM, Aneesh Kumar K.V
> <aneesh.kumar@linux.vnet.ibm.com> wrote:
> > @@ -1873,7 +1873,7 @@ static void v9fs_post_lcreate(V9fsState *s, V9fsLcreateState *vs, int err)
> >         vs->fidp->fsmap.fid_type = P9_FID_NONE;
> >         err = -errno;
> >         if (vs->fidp->fsmap.fs.fd > 0) {
> 
> For completeness: vs->fdip->fsmap.fs.fd >= 0
> 

Will update

-aneesh

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [Qemu-devel] [PATCH -V3 4/8] hw/9pfs: Implement syncfs
  2011-03-13 16:24   ` Stefan Hajnoczi
@ 2011-03-13 18:59     ` Aneesh Kumar K. V
  0 siblings, 0 replies; 36+ messages in thread
From: Aneesh Kumar K. V @ 2011-03-13 18:59 UTC (permalink / raw)
  To: Stefan Hajnoczi; +Cc: aliguori, Venkateswararao Jujjuri (JV), qemu-devel

On Sun, 13 Mar 2011 16:24:13 +0000, Stefan Hajnoczi <stefanha@gmail.com> wrote:
> On Sat, Mar 5, 2011 at 5:52 PM, Aneesh Kumar K.V
> <aneesh.kumar@linux.vnet.ibm.com> wrote:
> > +static void v9fs_post_do_syncfs(V9fsState *s, V9fsPDU *pdu, int err)
> > +{
> > +    if (err == -1) {
> > +        err = -errno;
> > +    }
> 
> errno may have been clobbered by any standard library function/syscall
> invoked by this thread before v9fs_post_do_syncfs() was called.  Why
> not pass the -errno in the function argument?
> 
> static void v9fs_post_do_syncfs(V9fsState *s, V9fsPDU *pdu, int err)
> {
>     complete_pdu(s, pdu, err);
> }
> 
> I strongly suggest doing a separate patch series to clean up of all of
> virtio-9p to stop using errno.  It's bad practice and I've caught
> several errno mistakes in the virtio-9p patches I've reviewed - I bet
> there are other instances lurking around.

Agreed.  I will look at the error handling more closely

> 
> > +static void v9fs_syncfs(V9fsState *s, V9fsPDU *pdu)
> > +{
> > +    int err;
> > +    int32_t fid;
> > +    size_t offset = 7;
> > +    V9fsFidState *fidp;
> > +
> > +    pdu_unmarshal(pdu, offset, "d", &fid);
> > +    fidp = lookup_fid(s, fid);
> > +    if (fidp == NULL) {
> > +        err = -ENOENT;
> > +        v9fs_post_do_syncfs(s, pdu, err);
> > +        return;
> > +    }
> 
> This wasn't setting errno but passed the error in err.  It can stay
> like this if you change v9fs_post_do_syncfs().
> 

nice catch. Will update.

-aneesh

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [Qemu-devel] [PATCH -V3 5/8] hw/9pfs: Add open flag to fid
  2011-03-13 16:38   ` Stefan Hajnoczi
@ 2011-03-13 19:01     ` Aneesh Kumar K. V
  0 siblings, 0 replies; 36+ messages in thread
From: Aneesh Kumar K. V @ 2011-03-13 19:01 UTC (permalink / raw)
  To: Stefan Hajnoczi; +Cc: aliguori, qemu-devel

On Sun, 13 Mar 2011 16:38:39 +0000, Stefan Hajnoczi <stefanha@gmail.com> wrote:
> On Sat, Mar 5, 2011 at 5:52 PM, Aneesh Kumar K.V
> <aneesh.kumar@linux.vnet.ibm.com> wrote:
> > +static int get_dotl_openflags(int oflags)
> > +{
> > +    int flags;
> > +    /*
> > +     * Since we can share the fd between multiple fids,
> > +     * open the file in read write mode
> > +     */
> 
> I didn't know that fds are shared between fids.  Also this code does
> not always open O_RDWR.  This comment is incorrect (perhaps a later
> patch changes assumptions, I haven't looked yet, but introducing
> temporary inconsistencies makes it difficult to review and potentially
> confusing for git-bisect users).
> 

What i wanted to say there was, we can possibly look at sharing fd
between multiple fids. Will update the comment.

-aneesh

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [Qemu-devel] [PATCH -V3 6/8] hw/9pfs: Add directory reclaim support
  2011-03-13 16:42   ` Stefan Hajnoczi
@ 2011-03-13 19:02     ` Aneesh Kumar K. V
  0 siblings, 0 replies; 36+ messages in thread
From: Aneesh Kumar K. V @ 2011-03-13 19:02 UTC (permalink / raw)
  To: Stefan Hajnoczi; +Cc: aliguori, qemu-devel

On Sun, 13 Mar 2011 16:42:56 +0000, Stefan Hajnoczi <stefanha@gmail.com> wrote:
> On Sat, Mar 5, 2011 at 5:52 PM, Aneesh Kumar K.V
> <aneesh.kumar@linux.vnet.ibm.com> wrote:
> > Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
> > ---
> >  hw/9pfs/virtio-9p.c |   21 +++++++++++++++++++--
> >  1 files changed, 19 insertions(+), 2 deletions(-)
> >
> > diff --git a/hw/9pfs/virtio-9p.c b/hw/9pfs/virtio-9p.c
> > index 1fa7256..293a562 100644
> > --- a/hw/9pfs/virtio-9p.c
> > +++ b/hw/9pfs/virtio-9p.c
> > @@ -142,7 +142,12 @@ static int v9fs_do_open(V9fsState *s, V9fsString *path, int flags)
> >
> >  static DIR *v9fs_do_opendir(V9fsState *s, V9fsString *path)
> >  {
> > -    return s->ops->opendir(&s->ctx, path->data);
> > +    DIR *dir;
> > +    dir = s->ops->opendir(&s->ctx, path->data);
> > +    if (dirfd(dir) > P9_FD_RECLAIM_THRES) {
> > +        v9fs_reclaim_fd(s);
> > +    }
> 
> dirfd(NULL) will crash so we need to check !dir first:
> 

Will add the check

-aneesh

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [Qemu-devel] [PATCH -V3 7/8] hw/9pfs: Add new virtfs option cache=none to skip host page cache
  2011-03-13 17:23   ` Stefan Hajnoczi
@ 2011-03-13 19:04     ` Aneesh Kumar K. V
  2011-03-13 20:57       ` Stefan Hajnoczi
  2011-03-14 10:20       ` Stefan Hajnoczi
  0 siblings, 2 replies; 36+ messages in thread
From: Aneesh Kumar K. V @ 2011-03-13 19:04 UTC (permalink / raw)
  To: Stefan Hajnoczi; +Cc: aliguori, qemu-devel

On Sun, 13 Mar 2011 17:23:50 +0000, Stefan Hajnoczi <stefanha@gmail.com> wrote:
> On Sat, Mar 5, 2011 at 5:52 PM, Aneesh Kumar K.V
> <aneesh.kumar@linux.vnet.ibm.com> wrote:
> > cache=none implies the file are opened in the host with O_SYNC open flag
> 
> O_SYNC does not bypass the host page cache.  It ensures that writes
> only complete once data has been written to the disk.
> 
> O_DIRECT is a hint to bypass the host page cache when possible.
> 
> A boolean on|off option would be nicer than an option that takes the
> special string "none".  For example, direct=on|off.  It also makes the
> code nicer by using bools instead of strdup strings that get leaked.
> 

What i wanted is the O_SYNC behavior. Well the comment should be updated. I
want to make sure that we don't have dirty data in host page cache after
a write. It is always good to make read hit the page cache

-aneesh

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [Qemu-devel] [PATCH -V3 1/8] hw/9pfs: Add V9fsfidmap in preparation for adding fd reclaim
  2011-03-13 15:46 ` [Qemu-devel] [PATCH -V3 1/8] hw/9pfs: Add V9fsfidmap in preparation for adding fd reclaim Stefan Hajnoczi
@ 2011-03-13 19:06   ` Aneesh Kumar K. V
  2011-03-13 20:53     ` Stefan Hajnoczi
  0 siblings, 1 reply; 36+ messages in thread
From: Aneesh Kumar K. V @ 2011-03-13 19:06 UTC (permalink / raw)
  To: Stefan Hajnoczi; +Cc: aliguori, qemu-devel

On Sun, 13 Mar 2011 15:46:29 +0000, Stefan Hajnoczi <stefanha@gmail.com> wrote:
> On Sat, Mar 5, 2011 at 5:52 PM, Aneesh Kumar K.V
> <aneesh.kumar@linux.vnet.ibm.com> wrote:
> > Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
> > ---
> >  hw/9pfs/virtio-9p.c |  327 ++++++++++++++++++++++++++-------------------------
> >  hw/9pfs/virtio-9p.h |   22 +++-
> >  2 files changed, 184 insertions(+), 165 deletions(-)
> 
> I don't understand why this patch is necessary.  It introduces an
> intermediate structure that doesn't seem to be useful by itself.
> Don't we always need the V9fsFidState?
> 
> > @@ -185,17 +188,22 @@ typedef struct V9fsXattr
> >     int flags;
> >  } V9fsXattr;
> >
> > +typedef struct V9fsfidmap {
> 
> V9fsFidMap (naming convention)
> 
> > +    union {
> > +        int fd;
> > +        DIR *dir;
> > +        V9fsXattr xattr;
> > +    } fs;
> 
> The name "fs" is not meaningful.
> 
> > +    int fid_type;
> > +    V9fsString path;
> > +    int flags;
> > +} V9fsFidMap;
> > +
> >  struct V9fsFidState
> >  {
> > -    int fid_type;
> >     int32_t fid;
> > -    V9fsString path;
> > -    union {
> > -       int fd;
> > -       DIR *dir;
> > -       V9fsXattr xattr;
> > -    } fs;
> >     uid_t uid;
> > +    V9fsFidMap fsmap;
> 
> This name is confusing.  A "map" is usually a container that stores
> key/value pairs.  V9fsFidMapEntry would be clearer.  But then I
> thought that is what V9fsFidState is?

I am bad at naming. I wanted to indicate something that can be shared
across multiple fids and also indicate the local file system
"mapping"/data. I will take any suggestion.

-aneesh

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [Qemu-devel] [PATCH -V3 1/8] hw/9pfs: Add V9fsfidmap in preparation for adding fd reclaim
  2011-03-13 19:06   ` Aneesh Kumar K. V
@ 2011-03-13 20:53     ` Stefan Hajnoczi
  2011-03-14 10:23       ` Stefan Hajnoczi
  2011-03-15  9:20       ` Aneesh Kumar K. V
  0 siblings, 2 replies; 36+ messages in thread
From: Stefan Hajnoczi @ 2011-03-13 20:53 UTC (permalink / raw)
  To: Aneesh Kumar K. V; +Cc: aliguori, qemu-devel

On Sun, Mar 13, 2011 at 7:06 PM, Aneesh Kumar K. V
<aneesh.kumar@linux.vnet.ibm.com> wrote:
> On Sun, 13 Mar 2011 15:46:29 +0000, Stefan Hajnoczi <stefanha@gmail.com> wrote:
>> On Sat, Mar 5, 2011 at 5:52 PM, Aneesh Kumar K.V
>> <aneesh.kumar@linux.vnet.ibm.com> wrote:
>> > @@ -185,17 +188,22 @@ typedef struct V9fsXattr
>> >     int flags;
>> >  } V9fsXattr;
>> >
>> > +typedef struct V9fsfidmap {
>>
>> V9fsFidMap (naming convention)
>>
>> > +    union {
>> > +        int fd;
>> > +        DIR *dir;
>> > +        V9fsXattr xattr;
>> > +    } fs;
>>
>> The name "fs" is not meaningful.
>>
>> > +    int fid_type;
>> > +    V9fsString path;
>> > +    int flags;
>> > +} V9fsFidMap;
>> > +
>> >  struct V9fsFidState
>> >  {
>> > -    int fid_type;
>> >     int32_t fid;
>> > -    V9fsString path;
>> > -    union {
>> > -       int fd;
>> > -       DIR *dir;
>> > -       V9fsXattr xattr;
>> > -    } fs;
>> >     uid_t uid;
>> > +    V9fsFidMap fsmap;
>>
>> This name is confusing.  A "map" is usually a container that stores
>> key/value pairs.  V9fsFidMapEntry would be clearer.  But then I
>> thought that is what V9fsFidState is?
>
> I am bad at naming. I wanted to indicate something that can be shared
> across multiple fids and also indicate the local file system
> "mapping"/data. I will take any suggestion.

Where does sharing happen, I didn't notice any code that shares fds
between fids?

Stefan

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [Qemu-devel] [PATCH -V3 7/8] hw/9pfs: Add new virtfs option cache=none to skip host page cache
  2011-03-13 19:04     ` Aneesh Kumar K. V
@ 2011-03-13 20:57       ` Stefan Hajnoczi
  2011-03-15  8:36         ` Aneesh Kumar K. V
  2011-03-14 10:20       ` Stefan Hajnoczi
  1 sibling, 1 reply; 36+ messages in thread
From: Stefan Hajnoczi @ 2011-03-13 20:57 UTC (permalink / raw)
  To: Aneesh Kumar K. V; +Cc: aliguori, qemu-devel

On Sun, Mar 13, 2011 at 7:04 PM, Aneesh Kumar K. V
<aneesh.kumar@linux.vnet.ibm.com> wrote:
> On Sun, 13 Mar 2011 17:23:50 +0000, Stefan Hajnoczi <stefanha@gmail.com> wrote:
>> On Sat, Mar 5, 2011 at 5:52 PM, Aneesh Kumar K.V
>> <aneesh.kumar@linux.vnet.ibm.com> wrote:
>> > cache=none implies the file are opened in the host with O_SYNC open flag
>>
>> O_SYNC does not bypass the host page cache.  It ensures that writes
>> only complete once data has been written to the disk.
>>
>> O_DIRECT is a hint to bypass the host page cache when possible.
>>
>> A boolean on|off option would be nicer than an option that takes the
>> special string "none".  For example, direct=on|off.  It also makes the
>> code nicer by using bools instead of strdup strings that get leaked.
>>
>
> What i wanted is the O_SYNC behavior. Well the comment should be updated. I
> want to make sure that we don't have dirty data in host page cache after
> a write. It is always good to make read hit the page cache

I have sent a patch to clean up the -virtfs option parsing, you are
CCed.  I think it will make it easier to add a new sync=on|off option.

Stefan

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [Qemu-devel] [PATCH -V3 2/8] hw/9pfs: Add file descriptor reclaim support
  2011-03-13 18:57     ` Aneesh Kumar K. V
@ 2011-03-14 10:13       ` Stefan Hajnoczi
  2011-03-15  8:35         ` Aneesh Kumar K. V
  0 siblings, 1 reply; 36+ messages in thread
From: Stefan Hajnoczi @ 2011-03-14 10:13 UTC (permalink / raw)
  To: Aneesh Kumar K. V; +Cc: aliguori, qemu-devel

On Sun, Mar 13, 2011 at 6:57 PM, Aneesh Kumar K. V
<aneesh.kumar@linux.vnet.ibm.com> wrote:
> On Sun, 13 Mar 2011 16:08:29 +0000, Stefan Hajnoczi <stefanha@gmail.com> wrote:
>> On Sat, Mar 5, 2011 at 5:52 PM, Aneesh Kumar K.V
>> <aneesh.kumar@linux.vnet.ibm.com> wrote:
>> > @@ -107,7 +108,12 @@ static int v9fs_do_closedir(V9fsState *s, DIR *dir)
>> >
>> >  static int v9fs_do_open(V9fsState *s, V9fsString *path, int flags)
>> >  {
>> > -    return s->ops->open(&s->ctx, path->data, flags);
>> > +    int fd;
>> > +    fd = s->ops->open(&s->ctx, path->data, flags);
>> > +    if (fd > P9_FD_RECLAIM_THRES) {
>> > +        v9fs_reclaim_fd(s);
>> > +    }
>>
>> I think the threshold should depend on the file descriptor ulimit.
>> The hardcoded constant doesn't work if the ulimit is set to 1000 or
>> less (it would cause other users in QEMU to hit EMFILE errors).
>
> Yes. That is suppose to be a follow up patch. I had that set to 100 for
> all the early testing.

Using getrlimit(2) to choose a good threshold at runtime shouldn't be
a lot of code.  Please add it to this patch so the threshold isn't
arbitrary and possibly ineffective due to ulimit.

>> > @@ -2719,7 +2806,11 @@ static void v9fs_remove(V9fsState *s, V9fsPDU *pdu)
>> >         err = -EINVAL;
>> >         goto out;
>> >     }
>> > -
>> > +    /*
>> > +     * IF the file is unlinked, we cannot reopen
>> > +     * the file later. So don't reclaim fd
>> > +     */
>> > +    v9fs_mark_fids_unreclaim(s, &vs->fidp->fsmap.path);
>>
>> This poses a problem for the case where guest and host are both
>> accessing the file system.  If the fd is reclaimed and the host
>> deletes the file, then the guest cannot access its open file anymore.
>>
>> The same issue also affects rename and has not been covered by this patch.
>>
>
> Currently virtFS don't handle the host rename/unlink. That we walk
> a name and get the fid and then use the fid to open the file. In between
> if the file get removed/renamed we will get an EINVAL.
>
> All that will go away once we switch to handle based open.

Can you explain this more?  Will multiple entities be able to safely
use the file system (e.g. host and guest)?

Stefan

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [Qemu-devel] [PATCH -V3 7/8] hw/9pfs: Add new virtfs option cache=none to skip host page cache
  2011-03-13 19:04     ` Aneesh Kumar K. V
  2011-03-13 20:57       ` Stefan Hajnoczi
@ 2011-03-14 10:20       ` Stefan Hajnoczi
  2011-03-15  9:19         ` Aneesh Kumar K. V
  1 sibling, 1 reply; 36+ messages in thread
From: Stefan Hajnoczi @ 2011-03-14 10:20 UTC (permalink / raw)
  To: Aneesh Kumar K. V; +Cc: aliguori, qemu-devel

On Sun, Mar 13, 2011 at 7:04 PM, Aneesh Kumar K. V
<aneesh.kumar@linux.vnet.ibm.com> wrote:
> On Sun, 13 Mar 2011 17:23:50 +0000, Stefan Hajnoczi <stefanha@gmail.com> wrote:
>> On Sat, Mar 5, 2011 at 5:52 PM, Aneesh Kumar K.V
>> <aneesh.kumar@linux.vnet.ibm.com> wrote:
>> > cache=none implies the file are opened in the host with O_SYNC open flag
>>
>> O_SYNC does not bypass the host page cache.  It ensures that writes
>> only complete once data has been written to the disk.
>>
>> O_DIRECT is a hint to bypass the host page cache when possible.
>>
>> A boolean on|off option would be nicer than an option that takes the
>> special string "none".  For example, direct=on|off.  It also makes the
>> code nicer by using bools instead of strdup strings that get leaked.
>>
>
> What i wanted is the O_SYNC behavior. Well the comment should be updated. I
> want to make sure that we don't have dirty data in host page cache after
> a write. It is always good to make read hit the page cache

Why silently enforce O_SYNC on the server side?  The client does not
know whether or not O_SYNC is in effect, cannot take advantage of that
knowledge, and cannot control it.

I think a more useful solution is a 9p client mount option called
"sync" that caused the client to always add O_SYNC and skip syncfs.
The whole stack becomes aware of O_SYNC and clients are in control
over whether or not they need O_SYNC semantics.

Stefan

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [Qemu-devel] [PATCH -V3 1/8] hw/9pfs: Add V9fsfidmap in preparation for adding fd reclaim
  2011-03-13 20:53     ` Stefan Hajnoczi
@ 2011-03-14 10:23       ` Stefan Hajnoczi
  2011-03-15  9:20       ` Aneesh Kumar K. V
  1 sibling, 0 replies; 36+ messages in thread
From: Stefan Hajnoczi @ 2011-03-14 10:23 UTC (permalink / raw)
  To: Aneesh Kumar K. V; +Cc: aliguori, qemu-devel

On Sun, Mar 13, 2011 at 8:53 PM, Stefan Hajnoczi <stefanha@gmail.com> wrote:
> On Sun, Mar 13, 2011 at 7:06 PM, Aneesh Kumar K. V
> <aneesh.kumar@linux.vnet.ibm.com> wrote:
>> On Sun, 13 Mar 2011 15:46:29 +0000, Stefan Hajnoczi <stefanha@gmail.com> wrote:
>>> On Sat, Mar 5, 2011 at 5:52 PM, Aneesh Kumar K.V
>>> <aneesh.kumar@linux.vnet.ibm.com> wrote:
>>> > @@ -185,17 +188,22 @@ typedef struct V9fsXattr
>>> >     int flags;
>>> >  } V9fsXattr;
>>> >
>>> > +typedef struct V9fsfidmap {
>>>
>>> V9fsFidMap (naming convention)
>>>
>>> > +    union {
>>> > +        int fd;
>>> > +        DIR *dir;
>>> > +        V9fsXattr xattr;
>>> > +    } fs;
>>>
>>> The name "fs" is not meaningful.
>>>
>>> > +    int fid_type;
>>> > +    V9fsString path;
>>> > +    int flags;
>>> > +} V9fsFidMap;
>>> > +
>>> >  struct V9fsFidState
>>> >  {
>>> > -    int fid_type;
>>> >     int32_t fid;
>>> > -    V9fsString path;
>>> > -    union {
>>> > -       int fd;
>>> > -       DIR *dir;
>>> > -       V9fsXattr xattr;
>>> > -    } fs;
>>> >     uid_t uid;
>>> > +    V9fsFidMap fsmap;
>>>
>>> This name is confusing.  A "map" is usually a container that stores
>>> key/value pairs.  V9fsFidMapEntry would be clearer.  But then I
>>> thought that is what V9fsFidState is?
>>
>> I am bad at naming. I wanted to indicate something that can be shared
>> across multiple fids and also indicate the local file system
>> "mapping"/data. I will take any suggestion.
>
> Where does sharing happen, I didn't notice any code that shares fds
> between fids?

I saw your response to a later patch in this series that fd sharing is
not implemented.  In that case this patch doesn't add value, it just
makes the code harder to understand by introducing an unused level of
indirection for which we don't have a good name yet.

Perhaps drop this patch from the series and send it later if you
implement fd sharing.

Stefan

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [Qemu-devel] [PATCH -V3 2/8] hw/9pfs: Add file descriptor reclaim support
  2011-03-14 10:13       ` Stefan Hajnoczi
@ 2011-03-15  8:35         ` Aneesh Kumar K. V
  0 siblings, 0 replies; 36+ messages in thread
From: Aneesh Kumar K. V @ 2011-03-15  8:35 UTC (permalink / raw)
  To: Stefan Hajnoczi; +Cc: aliguori, qemu-devel

On Mon, 14 Mar 2011 10:13:59 +0000, Stefan Hajnoczi <stefanha@gmail.com> wrote:
> On Sun, Mar 13, 2011 at 6:57 PM, Aneesh Kumar K. V
> <aneesh.kumar@linux.vnet.ibm.com> wrote:
> > On Sun, 13 Mar 2011 16:08:29 +0000, Stefan Hajnoczi <stefanha@gmail.com> wrote:
> >> On Sat, Mar 5, 2011 at 5:52 PM, Aneesh Kumar K.V
> >> <aneesh.kumar@linux.vnet.ibm.com> wrote:
> >> > @@ -107,7 +108,12 @@ static int v9fs_do_closedir(V9fsState *s, DIR *dir)
> >> >
> >> >  static int v9fs_do_open(V9fsState *s, V9fsString *path, int flags)
> >> >  {
> >> > -    return s->ops->open(&s->ctx, path->data, flags);
> >> > +    int fd;
> >> > +    fd = s->ops->open(&s->ctx, path->data, flags);
> >> > +    if (fd > P9_FD_RECLAIM_THRES) {
> >> > +        v9fs_reclaim_fd(s);
> >> > +    }
> >>
> >> I think the threshold should depend on the file descriptor ulimit.
> >> The hardcoded constant doesn't work if the ulimit is set to 1000 or
> >> less (it would cause other users in QEMU to hit EMFILE errors).
> >
> > Yes. That is suppose to be a follow up patch. I had that set to 100 for
> > all the early testing.
> 
> Using getrlimit(2) to choose a good threshold at runtime shouldn't be
> a lot of code.  Please add it to this patch so the threshold isn't
> arbitrary and possibly ineffective due to ulimit.

ok.

> 
> >> > @@ -2719,7 +2806,11 @@ static void v9fs_remove(V9fsState *s, V9fsPDU *pdu)
> >> >         err = -EINVAL;
> >> >         goto out;
> >> >     }
> >> > -
> >> > +    /*
> >> > +     * IF the file is unlinked, we cannot reopen
> >> > +     * the file later. So don't reclaim fd
> >> > +     */
> >> > +    v9fs_mark_fids_unreclaim(s, &vs->fidp->fsmap.path);
> >>
> >> This poses a problem for the case where guest and host are both
> >> accessing the file system.  If the fd is reclaimed and the host
> >> deletes the file, then the guest cannot access its open file anymore.
> >>
> >> The same issue also affects rename and has not been covered by this patch.
> >>
> >
> > Currently virtFS don't handle the host rename/unlink. That we walk
> > a name and get the fid and then use the fid to open the file. In between
> > if the file get removed/renamed we will get an EINVAL.
> >
> > All that will go away once we switch to handle based open.
> 
> Can you explain this more?  Will multiple entities be able to safely
> use the file system (e.g. host and guest)?

handles are stable across renames. So even if host rename the file, qemu
will be able to access it. But we still won't be able to handle unlink
on host. But that is true with even other file servers. They do get
ESTALE in that case.


-aneesh

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [Qemu-devel] [PATCH -V3 7/8] hw/9pfs: Add new virtfs option cache=none to skip host page cache
  2011-03-13 20:57       ` Stefan Hajnoczi
@ 2011-03-15  8:36         ` Aneesh Kumar K. V
  0 siblings, 0 replies; 36+ messages in thread
From: Aneesh Kumar K. V @ 2011-03-15  8:36 UTC (permalink / raw)
  To: Stefan Hajnoczi; +Cc: aliguori, qemu-devel

On Sun, 13 Mar 2011 20:57:12 +0000, Stefan Hajnoczi <stefanha@gmail.com> wrote:
> On Sun, Mar 13, 2011 at 7:04 PM, Aneesh Kumar K. V
> <aneesh.kumar@linux.vnet.ibm.com> wrote:
> > On Sun, 13 Mar 2011 17:23:50 +0000, Stefan Hajnoczi <stefanha@gmail.com> wrote:
> >> On Sat, Mar 5, 2011 at 5:52 PM, Aneesh Kumar K.V
> >> <aneesh.kumar@linux.vnet.ibm.com> wrote:
> >> > cache=none implies the file are opened in the host with O_SYNC open flag
> >>
> >> O_SYNC does not bypass the host page cache.  It ensures that writes
> >> only complete once data has been written to the disk.
> >>
> >> O_DIRECT is a hint to bypass the host page cache when possible.
> >>
> >> A boolean on|off option would be nicer than an option that takes the
> >> special string "none".  For example, direct=on|off.  It also makes the
> >> code nicer by using bools instead of strdup strings that get leaked.
> >>
> >
> > What i wanted is the O_SYNC behavior. Well the comment should be updated. I
> > want to make sure that we don't have dirty data in host page cache after
> > a write. It is always good to make read hit the page cache
> 
> I have sent a patch to clean up the -virtfs option parsing, you are
> CCed.  I think it will make it easier to add a new sync=on|off option.

Absolutely. So what i will do is i will carry the patch in the series
and later will drop the same once your changes get pushed upstream. 

-aneesh

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [Qemu-devel] [PATCH -V3 7/8] hw/9pfs: Add new virtfs option cache=none to skip host page cache
  2011-03-14 10:20       ` Stefan Hajnoczi
@ 2011-03-15  9:19         ` Aneesh Kumar K. V
  2011-03-15 11:11           ` Stefan Hajnoczi
  0 siblings, 1 reply; 36+ messages in thread
From: Aneesh Kumar K. V @ 2011-03-15  9:19 UTC (permalink / raw)
  To: Stefan Hajnoczi; +Cc: aliguori, qemu-devel

On Mon, 14 Mar 2011 10:20:57 +0000, Stefan Hajnoczi <stefanha@gmail.com> wrote:
> On Sun, Mar 13, 2011 at 7:04 PM, Aneesh Kumar K. V
> <aneesh.kumar@linux.vnet.ibm.com> wrote:
> > On Sun, 13 Mar 2011 17:23:50 +0000, Stefan Hajnoczi <stefanha@gmail.com> wrote:
> >> On Sat, Mar 5, 2011 at 5:52 PM, Aneesh Kumar K.V
> >> <aneesh.kumar@linux.vnet.ibm.com> wrote:
> >> > cache=none implies the file are opened in the host with O_SYNC open flag
> >>
> >> O_SYNC does not bypass the host page cache.  It ensures that writes
> >> only complete once data has been written to the disk.
> >>
> >> O_DIRECT is a hint to bypass the host page cache when possible.
> >>
> >> A boolean on|off option would be nicer than an option that takes the
> >> special string "none".  For example, direct=on|off.  It also makes the
> >> code nicer by using bools instead of strdup strings that get leaked.
> >>
> >
> > What i wanted is the O_SYNC behavior. Well the comment should be updated. I
> > want to make sure that we don't have dirty data in host page cache after
> > a write. It is always good to make read hit the page cache
> 
> Why silently enforce O_SYNC on the server side?  The client does not
> know whether or not O_SYNC is in effect, cannot take advantage of that
> knowledge, and cannot control it.
> 
> I think a more useful solution is a 9p client mount option called
> "sync" that caused the client to always add O_SYNC and skip syncfs.
> The whole stack becomes aware of O_SYNC and clients are in control
> over whether or not they need O_SYNC semantics.

The cache=none specifically enables us to ignore the tsyncfs request on
host. tsyncfs on host can be really slow in certain setup.

-aneesh

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [Qemu-devel] [PATCH -V3 1/8] hw/9pfs: Add V9fsfidmap in preparation for adding fd reclaim
  2011-03-13 20:53     ` Stefan Hajnoczi
  2011-03-14 10:23       ` Stefan Hajnoczi
@ 2011-03-15  9:20       ` Aneesh Kumar K. V
  2011-03-15 10:38         ` Stefan Hajnoczi
  1 sibling, 1 reply; 36+ messages in thread
From: Aneesh Kumar K. V @ 2011-03-15  9:20 UTC (permalink / raw)
  To: Stefan Hajnoczi; +Cc: aliguori, qemu-devel

On Sun, 13 Mar 2011 20:53:41 +0000, Stefan Hajnoczi <stefanha@gmail.com> wrote:
> On Sun, Mar 13, 2011 at 7:06 PM, Aneesh Kumar K. V
> <aneesh.kumar@linux.vnet.ibm.com> wrote:
> > On Sun, 13 Mar 2011 15:46:29 +0000, Stefan Hajnoczi <stefanha@gmail.com> wrote:
> >> On Sat, Mar 5, 2011 at 5:52 PM, Aneesh Kumar K.V
> >> <aneesh.kumar@linux.vnet.ibm.com> wrote:
> >> > @@ -185,17 +188,22 @@ typedef struct V9fsXattr
> >> >     int flags;
> >> >  } V9fsXattr;
> >> >
> >> > +typedef struct V9fsfidmap {
> >>
> >> V9fsFidMap (naming convention)
> >>
> >> > +    union {
> >> > +        int fd;
> >> > +        DIR *dir;
> >> > +        V9fsXattr xattr;
> >> > +    } fs;
> >>
> >> The name "fs" is not meaningful.
> >>
> >> > +    int fid_type;
> >> > +    V9fsString path;
> >> > +    int flags;
> >> > +} V9fsFidMap;
> >> > +
> >> >  struct V9fsFidState
> >> >  {
> >> > -    int fid_type;
> >> >     int32_t fid;
> >> > -    V9fsString path;
> >> > -    union {
> >> > -       int fd;
> >> > -       DIR *dir;
> >> > -       V9fsXattr xattr;
> >> > -    } fs;
> >> >     uid_t uid;
> >> > +    V9fsFidMap fsmap;
> >>
> >> This name is confusing.  A "map" is usually a container that stores
> >> key/value pairs.  V9fsFidMapEntry would be clearer.  But then I
> >> thought that is what V9fsFidState is?
> >
> > I am bad at naming. I wanted to indicate something that can be shared
> > across multiple fids and also indicate the local file system
> > "mapping"/data. I will take any suggestion.
> 
> Where does sharing happen, I didn't notice any code that shares fds
> between fids?

That patch is not yet there. We can only share fd if they open flags
match. Hence making sure we open files on host with limited set of open
flags which enables us much better sharing.

-aneesh

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [Qemu-devel] [PATCH -V3 1/8] hw/9pfs: Add V9fsfidmap in preparation for adding fd reclaim
  2011-03-15  9:20       ` Aneesh Kumar K. V
@ 2011-03-15 10:38         ` Stefan Hajnoczi
  2011-03-15 12:27           ` Aneesh Kumar K. V
  0 siblings, 1 reply; 36+ messages in thread
From: Stefan Hajnoczi @ 2011-03-15 10:38 UTC (permalink / raw)
  To: Aneesh Kumar K. V; +Cc: aliguori, qemu-devel

On Tue, Mar 15, 2011 at 9:20 AM, Aneesh Kumar K. V
<aneesh.kumar@linux.vnet.ibm.com> wrote:
> On Sun, 13 Mar 2011 20:53:41 +0000, Stefan Hajnoczi <stefanha@gmail.com> wrote:
>> On Sun, Mar 13, 2011 at 7:06 PM, Aneesh Kumar K. V
>> <aneesh.kumar@linux.vnet.ibm.com> wrote:
>> > On Sun, 13 Mar 2011 15:46:29 +0000, Stefan Hajnoczi <stefanha@gmail.com> wrote:
>> >> On Sat, Mar 5, 2011 at 5:52 PM, Aneesh Kumar K.V
>> >> <aneesh.kumar@linux.vnet.ibm.com> wrote:
>> >> > @@ -185,17 +188,22 @@ typedef struct V9fsXattr
>> >> >     int flags;
>> >> >  } V9fsXattr;
>> >> >
>> >> > +typedef struct V9fsfidmap {
>> >>
>> >> V9fsFidMap (naming convention)
>> >>
>> >> > +    union {
>> >> > +        int fd;
>> >> > +        DIR *dir;
>> >> > +        V9fsXattr xattr;
>> >> > +    } fs;
>> >>
>> >> The name "fs" is not meaningful.
>> >>
>> >> > +    int fid_type;
>> >> > +    V9fsString path;
>> >> > +    int flags;
>> >> > +} V9fsFidMap;
>> >> > +
>> >> >  struct V9fsFidState
>> >> >  {
>> >> > -    int fid_type;
>> >> >     int32_t fid;
>> >> > -    V9fsString path;
>> >> > -    union {
>> >> > -       int fd;
>> >> > -       DIR *dir;
>> >> > -       V9fsXattr xattr;
>> >> > -    } fs;
>> >> >     uid_t uid;
>> >> > +    V9fsFidMap fsmap;
>> >>
>> >> This name is confusing.  A "map" is usually a container that stores
>> >> key/value pairs.  V9fsFidMapEntry would be clearer.  But then I
>> >> thought that is what V9fsFidState is?
>> >
>> > I am bad at naming. I wanted to indicate something that can be shared
>> > across multiple fids and also indicate the local file system
>> > "mapping"/data. I will take any suggestion.
>>
>> Where does sharing happen, I didn't notice any code that shares fds
>> between fids?
>
> That patch is not yet there. We can only share fd if they open flags
> match. Hence making sure we open files on host with limited set of open
> flags which enables us much better sharing.

Tracking open flags is fine and is needed for fd reclaim.  But
splitting V9fsFidState into the V9fsFidMap structure and all the churn
that causes to the code isn't necessary yet.  Please wait with that
until you submit patches fd sharing.  The reason I make this
suggestion is that everyone reading or working on the code until fd
sharing is added now needs to deal with the V9fsFidMap structure which
(currently) serves no purpose.

Stefan

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [Qemu-devel] [PATCH -V3 7/8] hw/9pfs: Add new virtfs option cache=none to skip host page cache
  2011-03-15  9:19         ` Aneesh Kumar K. V
@ 2011-03-15 11:11           ` Stefan Hajnoczi
  2011-03-15 12:30             ` Aneesh Kumar K. V
  0 siblings, 1 reply; 36+ messages in thread
From: Stefan Hajnoczi @ 2011-03-15 11:11 UTC (permalink / raw)
  To: Aneesh Kumar K. V; +Cc: aliguori, qemu-devel

On Tue, Mar 15, 2011 at 9:19 AM, Aneesh Kumar K. V
<aneesh.kumar@linux.vnet.ibm.com> wrote:
> On Mon, 14 Mar 2011 10:20:57 +0000, Stefan Hajnoczi <stefanha@gmail.com> wrote:
>> On Sun, Mar 13, 2011 at 7:04 PM, Aneesh Kumar K. V
>> <aneesh.kumar@linux.vnet.ibm.com> wrote:
>> > On Sun, 13 Mar 2011 17:23:50 +0000, Stefan Hajnoczi <stefanha@gmail.com> wrote:
>> >> On Sat, Mar 5, 2011 at 5:52 PM, Aneesh Kumar K.V
>> >> <aneesh.kumar@linux.vnet.ibm.com> wrote:
>> >> > cache=none implies the file are opened in the host with O_SYNC open flag
>> >>
>> >> O_SYNC does not bypass the host page cache.  It ensures that writes
>> >> only complete once data has been written to the disk.
>> >>
>> >> O_DIRECT is a hint to bypass the host page cache when possible.
>> >>
>> >> A boolean on|off option would be nicer than an option that takes the
>> >> special string "none".  For example, direct=on|off.  It also makes the
>> >> code nicer by using bools instead of strdup strings that get leaked.
>> >>
>> >
>> > What i wanted is the O_SYNC behavior. Well the comment should be updated. I
>> > want to make sure that we don't have dirty data in host page cache after
>> > a write. It is always good to make read hit the page cache
>>
>> Why silently enforce O_SYNC on the server side?  The client does not
>> know whether or not O_SYNC is in effect, cannot take advantage of that
>> knowledge, and cannot control it.
>>
>> I think a more useful solution is a 9p client mount option called
>> "sync" that caused the client to always add O_SYNC and skip syncfs.
>> The whole stack becomes aware of O_SYNC and clients are in control
>> over whether or not they need O_SYNC semantics.
>
> The cache=none specifically enables us to ignore the tsyncfs request on
> host. tsyncfs on host can be really slow in certain setup.

If I'm a client with the "sync" mount option all my fids are O_SYNC
and I do not need to send TSYNCFS requests to the server because my
fids are already stable.

Stefan

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [Qemu-devel] [PATCH -V3 1/8] hw/9pfs: Add V9fsfidmap in preparation for adding fd reclaim
  2011-03-15 10:38         ` Stefan Hajnoczi
@ 2011-03-15 12:27           ` Aneesh Kumar K. V
  0 siblings, 0 replies; 36+ messages in thread
From: Aneesh Kumar K. V @ 2011-03-15 12:27 UTC (permalink / raw)
  To: Stefan Hajnoczi; +Cc: aliguori, qemu-devel

On Tue, 15 Mar 2011 10:38:31 +0000, Stefan Hajnoczi <stefanha@gmail.com> wrote:
> On Tue, Mar 15, 2011 at 9:20 AM, Aneesh Kumar K. V
> <aneesh.kumar@linux.vnet.ibm.com> wrote:
> > On Sun, 13 Mar 2011 20:53:41 +0000, Stefan Hajnoczi <stefanha@gmail.com> wrote:
> >> On Sun, Mar 13, 2011 at 7:06 PM, Aneesh Kumar K. V
> >> <aneesh.kumar@linux.vnet.ibm.com> wrote:
> >> > On Sun, 13 Mar 2011 15:46:29 +0000, Stefan Hajnoczi <stefanha@gmail.com> wrote:
> >> >> On Sat, Mar 5, 2011 at 5:52 PM, Aneesh Kumar K.V
> >> >> <aneesh.kumar@linux.vnet.ibm.com> wrote:
> >> >> > @@ -185,17 +188,22 @@ typedef struct V9fsXattr
> >> >> >     int flags;
> >> >> >  } V9fsXattr;
> >> >> >
> >> >> > +typedef struct V9fsfidmap {
> >> >>
> >> >> V9fsFidMap (naming convention)
> >> >>
> >> >> > +    union {
> >> >> > +        int fd;
> >> >> > +        DIR *dir;
> >> >> > +        V9fsXattr xattr;
> >> >> > +    } fs;
> >> >>
> >> >> The name "fs" is not meaningful.
> >> >>
> >> >> > +    int fid_type;
> >> >> > +    V9fsString path;
> >> >> > +    int flags;
> >> >> > +} V9fsFidMap;
> >> >> > +
> >> >> >  struct V9fsFidState
> >> >> >  {
> >> >> > -    int fid_type;
> >> >> >     int32_t fid;
> >> >> > -    V9fsString path;
> >> >> > -    union {
> >> >> > -       int fd;
> >> >> > -       DIR *dir;
> >> >> > -       V9fsXattr xattr;
> >> >> > -    } fs;
> >> >> >     uid_t uid;
> >> >> > +    V9fsFidMap fsmap;
> >> >>
> >> >> This name is confusing.  A "map" is usually a container that stores
> >> >> key/value pairs.  V9fsFidMapEntry would be clearer.  But then I
> >> >> thought that is what V9fsFidState is?
> >> >
> >> > I am bad at naming. I wanted to indicate something that can be shared
> >> > across multiple fids and also indicate the local file system
> >> > "mapping"/data. I will take any suggestion.
> >>
> >> Where does sharing happen, I didn't notice any code that shares fds
> >> between fids?
> >
> > That patch is not yet there. We can only share fd if they open flags
> > match. Hence making sure we open files on host with limited set of open
> > flags which enables us much better sharing.
> 
> Tracking open flags is fine and is needed for fd reclaim.  But
> splitting V9fsFidState into the V9fsFidMap structure and all the churn
> that causes to the code isn't necessary yet.  Please wait with that
> until you submit patches fd sharing.  The reason I make this
> suggestion is that everyone reading or working on the code until fd
> sharing is added now needs to deal with the V9fsFidMap structure which
> (currently) serves no purpose.

taken. will remove the patch from the series.

-aneesh

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [Qemu-devel] [PATCH -V3 7/8] hw/9pfs: Add new virtfs option cache=none to skip host page cache
  2011-03-15 11:11           ` Stefan Hajnoczi
@ 2011-03-15 12:30             ` Aneesh Kumar K. V
  2011-03-16  8:59               ` Stefan Hajnoczi
  0 siblings, 1 reply; 36+ messages in thread
From: Aneesh Kumar K. V @ 2011-03-15 12:30 UTC (permalink / raw)
  To: Stefan Hajnoczi; +Cc: aliguori, qemu-devel

On Tue, 15 Mar 2011 11:11:46 +0000, Stefan Hajnoczi <stefanha@gmail.com> wrote:
> On Tue, Mar 15, 2011 at 9:19 AM, Aneesh Kumar K. V
> <aneesh.kumar@linux.vnet.ibm.com> wrote:
> > On Mon, 14 Mar 2011 10:20:57 +0000, Stefan Hajnoczi <stefanha@gmail.com> wrote:
> >> On Sun, Mar 13, 2011 at 7:04 PM, Aneesh Kumar K. V
> >> <aneesh.kumar@linux.vnet.ibm.com> wrote:
> >> > On Sun, 13 Mar 2011 17:23:50 +0000, Stefan Hajnoczi <stefanha@gmail.com> wrote:
> >> >> On Sat, Mar 5, 2011 at 5:52 PM, Aneesh Kumar K.V
> >> >> <aneesh.kumar@linux.vnet.ibm.com> wrote:
> >> >> > cache=none implies the file are opened in the host with O_SYNC open flag
> >> >>
> >> >> O_SYNC does not bypass the host page cache.  It ensures that writes
> >> >> only complete once data has been written to the disk.
> >> >>
> >> >> O_DIRECT is a hint to bypass the host page cache when possible.
> >> >>
> >> >> A boolean on|off option would be nicer than an option that takes the
> >> >> special string "none".  For example, direct=on|off.  It also makes the
> >> >> code nicer by using bools instead of strdup strings that get leaked.
> >> >>
> >> >
> >> > What i wanted is the O_SYNC behavior. Well the comment should be updated. I
> >> > want to make sure that we don't have dirty data in host page cache after
> >> > a write. It is always good to make read hit the page cache
> >>
> >> Why silently enforce O_SYNC on the server side?  The client does not
> >> know whether or not O_SYNC is in effect, cannot take advantage of that
> >> knowledge, and cannot control it.
> >>
> >> I think a more useful solution is a 9p client mount option called
> >> "sync" that caused the client to always add O_SYNC and skip syncfs.
> >> The whole stack becomes aware of O_SYNC and clients are in control
> >> over whether or not they need O_SYNC semantics.
> >
> > The cache=none specifically enables us to ignore the tsyncfs request on
> > host. tsyncfs on host can be really slow in certain setup.
> 
> If I'm a client with the "sync" mount option all my fids are O_SYNC
> and I do not need to send TSYNCFS requests to the server because my
> fids are already stable.

Having sync mount option is useful, Infact for dotu we already default
O_SYNC on the client side because we don't have tsyncfs. But being able
to avoid the tfsyncfs flush from the server point of view also is
nice. Consider a setup where one doesn't have control on the guest
mount option but can control the qemu export options.

-aneesh

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [Qemu-devel] [PATCH -V3 7/8] hw/9pfs: Add new virtfs option cache=none to skip host page cache
  2011-03-15 12:30             ` Aneesh Kumar K. V
@ 2011-03-16  8:59               ` Stefan Hajnoczi
  0 siblings, 0 replies; 36+ messages in thread
From: Stefan Hajnoczi @ 2011-03-16  8:59 UTC (permalink / raw)
  To: Aneesh Kumar K. V; +Cc: aliguori, qemu-devel

On Tue, Mar 15, 2011 at 12:30 PM, Aneesh Kumar K. V
<aneesh.kumar@linux.vnet.ibm.com> wrote:
> On Tue, 15 Mar 2011 11:11:46 +0000, Stefan Hajnoczi <stefanha@gmail.com> wrote:
>> On Tue, Mar 15, 2011 at 9:19 AM, Aneesh Kumar K. V
>> <aneesh.kumar@linux.vnet.ibm.com> wrote:
>> > On Mon, 14 Mar 2011 10:20:57 +0000, Stefan Hajnoczi <stefanha@gmail.com> wrote:
>> >> On Sun, Mar 13, 2011 at 7:04 PM, Aneesh Kumar K. V
>> >> <aneesh.kumar@linux.vnet.ibm.com> wrote:
>> >> > On Sun, 13 Mar 2011 17:23:50 +0000, Stefan Hajnoczi <stefanha@gmail.com> wrote:
>> >> >> On Sat, Mar 5, 2011 at 5:52 PM, Aneesh Kumar K.V
>> >> >> <aneesh.kumar@linux.vnet.ibm.com> wrote:
>> >> >> > cache=none implies the file are opened in the host with O_SYNC open flag
>> >> >>
>> >> >> O_SYNC does not bypass the host page cache.  It ensures that writes
>> >> >> only complete once data has been written to the disk.
>> >> >>
>> >> >> O_DIRECT is a hint to bypass the host page cache when possible.
>> >> >>
>> >> >> A boolean on|off option would be nicer than an option that takes the
>> >> >> special string "none".  For example, direct=on|off.  It also makes the
>> >> >> code nicer by using bools instead of strdup strings that get leaked.
>> >> >>
>> >> >
>> >> > What i wanted is the O_SYNC behavior. Well the comment should be updated. I
>> >> > want to make sure that we don't have dirty data in host page cache after
>> >> > a write. It is always good to make read hit the page cache
>> >>
>> >> Why silently enforce O_SYNC on the server side?  The client does not
>> >> know whether or not O_SYNC is in effect, cannot take advantage of that
>> >> knowledge, and cannot control it.
>> >>
>> >> I think a more useful solution is a 9p client mount option called
>> >> "sync" that caused the client to always add O_SYNC and skip syncfs.
>> >> The whole stack becomes aware of O_SYNC and clients are in control
>> >> over whether or not they need O_SYNC semantics.
>> >
>> > The cache=none specifically enables us to ignore the tsyncfs request on
>> > host. tsyncfs on host can be really slow in certain setup.
>>
>> If I'm a client with the "sync" mount option all my fids are O_SYNC
>> and I do not need to send TSYNCFS requests to the server because my
>> fids are already stable.
>
> Having sync mount option is useful, Infact for dotu we already default
> O_SYNC on the client side because we don't have tsyncfs. But being able
> to avoid the tfsyncfs flush from the server point of view also is
> nice. Consider a setup where one doesn't have control on the guest
> mount option but can control the qemu export options.

If the guest mount is sync it should not send syncfs because all its
files are opened O_SYNC and do not need syncing.  Toggling this option
on the host side has no effect either way.

If the guest mount is not sync then setting the option on the host
side overrides the guest and forces O_SYNC.  The result will likely be
slower writes but nop syncfs.

In both cases, the guest (and the application) is where O_SYNC matters
from a data integrity perspective.  The guest always has control over
whether it opens files with O_SYNC.

So this host side control only enables you to force O_SYNC when the
guest doesn't expect it, which does not seem like a useful feature to
me.  What does this buy us?

Stefan

^ permalink raw reply	[flat|nested] 36+ messages in thread

end of thread, other threads:[~2011-03-16  8:59 UTC | newest]

Thread overview: 36+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-03-05 17:52 [Qemu-devel] [PATCH -V3 1/8] hw/9pfs: Add V9fsfidmap in preparation for adding fd reclaim Aneesh Kumar K.V
2011-03-05 17:52 ` [Qemu-devel] [PATCH -V3 2/8] hw/9pfs: Add file descriptor reclaim support Aneesh Kumar K.V
2011-03-13 16:08   ` Stefan Hajnoczi
2011-03-13 18:57     ` Aneesh Kumar K. V
2011-03-14 10:13       ` Stefan Hajnoczi
2011-03-15  8:35         ` Aneesh Kumar K. V
2011-03-05 17:52 ` [Qemu-devel] [PATCH -V3 3/8] hw/9pfs: Use v9fs_do_close instead of close Aneesh Kumar K.V
2011-03-13 16:10   ` Stefan Hajnoczi
2011-03-13 18:58     ` Aneesh Kumar K. V
2011-03-05 17:52 ` [Qemu-devel] [PATCH -V3 4/8] hw/9pfs: Implement syncfs Aneesh Kumar K.V
2011-03-13 16:24   ` Stefan Hajnoczi
2011-03-13 18:59     ` Aneesh Kumar K. V
2011-03-05 17:52 ` [Qemu-devel] [PATCH -V3 5/8] hw/9pfs: Add open flag to fid Aneesh Kumar K.V
2011-03-13 16:38   ` Stefan Hajnoczi
2011-03-13 19:01     ` Aneesh Kumar K. V
2011-03-05 17:52 ` [Qemu-devel] [PATCH -V3 6/8] hw/9pfs: Add directory reclaim support Aneesh Kumar K.V
2011-03-13 16:42   ` Stefan Hajnoczi
2011-03-13 19:02     ` Aneesh Kumar K. V
2011-03-05 17:52 ` [Qemu-devel] [PATCH -V3 7/8] hw/9pfs: Add new virtfs option cache=none to skip host page cache Aneesh Kumar K.V
2011-03-13 17:23   ` Stefan Hajnoczi
2011-03-13 19:04     ` Aneesh Kumar K. V
2011-03-13 20:57       ` Stefan Hajnoczi
2011-03-15  8:36         ` Aneesh Kumar K. V
2011-03-14 10:20       ` Stefan Hajnoczi
2011-03-15  9:19         ` Aneesh Kumar K. V
2011-03-15 11:11           ` Stefan Hajnoczi
2011-03-15 12:30             ` Aneesh Kumar K. V
2011-03-16  8:59               ` Stefan Hajnoczi
2011-03-05 17:52 ` [Qemu-devel] [PATCH -V3 8/8] hw/9pfs: Skip file system sync if we have specified cache=none option Aneesh Kumar K.V
2011-03-13 15:46 ` [Qemu-devel] [PATCH -V3 1/8] hw/9pfs: Add V9fsfidmap in preparation for adding fd reclaim Stefan Hajnoczi
2011-03-13 19:06   ` Aneesh Kumar K. V
2011-03-13 20:53     ` Stefan Hajnoczi
2011-03-14 10:23       ` Stefan Hajnoczi
2011-03-15  9:20       ` Aneesh Kumar K. V
2011-03-15 10:38         ` Stefan Hajnoczi
2011-03-15 12:27           ` Aneesh Kumar K. V

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).