From: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
To: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Cc: aliguori@us.ibm.com, qemu-devel@nongnu.org
Subject: Re: [Qemu-devel] [PATCH 3/6] hw/9pfs: Add file descriptor reclaim support
Date: Thu, 09 Jun 2011 18:27:51 -0700 [thread overview]
Message-ID: <4DF17317.10606@linux.vnet.ibm.com> (raw)
In-Reply-To: <1307380618-1963-3-git-send-email-aneesh.kumar@linux.vnet.ibm.com>
On 06/06/2011 10:16 AM, Aneesh Kumar K.V wrote:
> Signed-off-by: Aneesh Kumar K.V<aneesh.kumar@linux.vnet.ibm.com>
Minor comments below; I think this looks good.
Reviewed-by : Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
> ---
> hw/9pfs/codir.c | 4 +-
> hw/9pfs/cofile.c | 19 ++++-
> hw/9pfs/virtio-9p-coth.h | 4 +-
> hw/9pfs/virtio-9p-device.c | 1 +
> hw/9pfs/virtio-9p.c | 193 +++++++++++++++++++++++++++++++++++++++++++-
> hw/9pfs/virtio-9p.h | 23 +++++-
> 6 files changed, 229 insertions(+), 15 deletions(-)
>
> diff --git a/hw/9pfs/codir.c b/hw/9pfs/codir.c
> index 783d279..3347038 100644
> --- a/hw/9pfs/codir.c
> +++ b/hw/9pfs/codir.c
> @@ -101,12 +101,10 @@ int v9fs_co_opendir(V9fsState *s, V9fsFidState *fidp)
> return err;
> }
>
> -int v9fs_co_closedir(V9fsState *s, V9fsFidState *fidp)
> +int v9fs_co_closedir(V9fsState *s, DIR *dir)
> {
> int err;
> - DIR *dir;
>
> - dir = fidp->fs.dir;
> v9fs_co_run_in_worker(
> {
> err = s->ops->closedir(&s->ctx, dir);
> diff --git a/hw/9pfs/cofile.c b/hw/9pfs/cofile.c
> index e388146..0caf1e3 100644
> --- a/hw/9pfs/cofile.c
> +++ b/hw/9pfs/cofile.c
> @@ -58,6 +58,12 @@ int v9fs_co_open(V9fsState *s, V9fsFidState *fidp, int flags)
> err = 0;
> }
> });
> + if (!err) {
> + total_open_fd++;
> + if (total_open_fd> open_fd_hw) {
> + v9fs_reclaim_fd(s);
> + }
> + }
> return err;
> }
>
> @@ -79,15 +85,19 @@ int v9fs_co_open2(V9fsState *s, V9fsFidState *fidp, char *fullname, gid_t gid,
> err = -errno;
> }
> });
> + if (!err) {
> + total_open_fd++;
> + if (total_open_fd> open_fd_hw) {
> + v9fs_reclaim_fd(s);
> + }
> + }
> return err;
> }
>
> -int v9fs_co_close(V9fsState *s, V9fsFidState *fidp)
> +int v9fs_co_close(V9fsState *s, int fd)
> {
> - int fd;
> int err;
>
> - fd = fidp->fs.fd;
> v9fs_co_run_in_worker(
> {
> err = s->ops->close(&s->ctx, fd);
> @@ -95,6 +105,9 @@ int v9fs_co_close(V9fsState *s, V9fsFidState *fidp)
> err = -errno;
> }
> });
> + if (!err) {
> + total_open_fd--;
> + }
> return err;
> }
>
> diff --git a/hw/9pfs/virtio-9p-coth.h b/hw/9pfs/virtio-9p-coth.h
> index 48defb7..b7be9b5 100644
> --- a/hw/9pfs/virtio-9p-coth.h
> +++ b/hw/9pfs/virtio-9p-coth.h
> @@ -83,8 +83,8 @@ extern int v9fs_co_open2(V9fsState *, V9fsFidState *, char *, gid_t, int, int);
> extern int v9fs_co_lsetxattr(V9fsState *, V9fsString *, V9fsString *,
> void *, size_t, int);
> extern int v9fs_co_lremovexattr(V9fsState *, V9fsString *, V9fsString *);
> -extern int v9fs_co_closedir(V9fsState *, V9fsFidState *);
> -extern int v9fs_co_close(V9fsState *, V9fsFidState *);
> +extern int v9fs_co_closedir(V9fsState *, DIR *);
> +extern int v9fs_co_close(V9fsState *, int);
> extern int v9fs_co_fsync(V9fsState *, V9fsFidState *, int);
> extern int v9fs_co_symlink(V9fsState *, V9fsFidState *, const char *,
> const char *, gid_t);
> diff --git a/hw/9pfs/virtio-9p-device.c b/hw/9pfs/virtio-9p-device.c
> index 7811103..70629b2 100644
> --- a/hw/9pfs/virtio-9p-device.c
> +++ b/hw/9pfs/virtio-9p-device.c
> @@ -171,6 +171,7 @@ static PCIDeviceInfo virtio_9p_info = {
> static void virtio_9p_register_devices(void)
> {
> pci_qdev_register(&virtio_9p_info);
> + virtio_9p_set_fd_limit();
> }
>
> device_init(virtio_9p_register_devices)
> diff --git a/hw/9pfs/virtio-9p.c b/hw/9pfs/virtio-9p.c
> index f3127a5..21e07fb 100644
> --- a/hw/9pfs/virtio-9p.c
> +++ b/hw/9pfs/virtio-9p.c
> @@ -22,6 +22,9 @@
> #include "virtio-9p-coth.h"
>
> int debug_9p_pdu;
> +int open_fd_hw;
> +int total_open_fd;
> +static int open_fd_rc;
>
> enum {
> Oread = 0x00,
> @@ -234,11 +237,40 @@ static size_t v9fs_string_size(V9fsString *str)
>
> static V9fsFidState *get_fid(V9fsState *s, int32_t fid)
> {
> + int err;
> V9fsFidState *f;
>
> for (f = s->fid_list; f; f = f->next) {
> if (f->fid == fid&& !f->clunked) {
> + /*
> + * Update the fid ref upfront so that
> + * we don't get reclaimed when we yield
> + * in open later.
> + */
> f->ref++;
> +
> + /*
> + * check whether we need to reopen the
> + * file. We might have closed the fd
> + * while trying to free up some file
> + * descriptors.
> + */
> + if (f->fid_type == P9_FID_FILE) {
> + if (f->fs.fd == -1) {
> + do {
> + err = v9fs_co_open(s, f, f->open_flags);
> + } while (err == -EINTR);
> + if (err< 0) {
> + f->ref--;
> + return NULL;
> + }
> + }
> + }
> + /*
> + * Mark the fid as referenced so that the LRU
> + * reclaim won't close the file descriptor
> + */
> + f->flags |= FID_REFERENCED;
> return f;
> }
> }
> @@ -259,6 +291,11 @@ static V9fsFidState *alloc_fid(V9fsState *s, int32_t fid)
> f->fid = fid;
> f->fid_type = P9_FID_NONE;
> f->ref = 1;
> + /*
> + * Mark the fid as referenced so that the LRU
> + * reclaim won't close the file descriptor
> + */
> + f->flags |= FID_REFERENCED;
> f->next = s->fid_list;
> s->fid_list = f;
>
> @@ -304,9 +341,12 @@ static int release_fid(V9fsState *s, V9fsFidState *fidp)
> int retval = 0;
>
> if (fidp->fid_type == P9_FID_FILE) {
> - retval = v9fs_co_close(s, fidp);
> + /* If we reclaimed the fd no need to close */
> + if (fidp->fs.fd != -1) {
> + retval = v9fs_co_close(s, fidp->fs.fd);
> + }
> } else if (fidp->fid_type == P9_FID_DIR) {
> - retval = v9fs_co_closedir(s, fidp);
> + retval = v9fs_co_closedir(s, fidp->fs.dir);
> } else if (fidp->fid_type == P9_FID_XATTR) {
> retval = v9fs_xattr_fid_clunk(s, fidp);
> }
> @@ -319,7 +359,10 @@ static void put_fid(V9fsState *s, V9fsFidState *fidp)
> {
> BUG_ON(!fidp->ref);
> fidp->ref--;
> - if (!fidp->ref&& fidp->clunked) {
> + /*
> + * Don't free the fid if it is in reclaim list
> + */
> + if (!fidp->ref&& fidp->clunked&& !fidp->rclm_lst) {
> release_fid(s, fidp);
> }
> }
> @@ -343,6 +386,105 @@ static int free_fid(V9fsState *s, int32_t fid)
> return 0;
> }
>
> +void v9fs_reclaim_fd(V9fsState *s)
> +{
> + int reclaim_count = 0;
> + V9fsFidState *f, head_fid, *reclaim_list = NULL;
> +
> + /*
> + * We don't want multiple coroutine to do reclaim
> + */
> + if (s->reclaim_in_prgs) {
> + return;
> + }
> + s->reclaim_in_prgs = 1;
> +
> + head_fid.next = s->fid_list;
> + for (f = s->fid_list; f; f = f->next) {
> + /*
> + * Unlink fids cannot be reclaimed. Check
> + * for them and skip them. Also skip fids
> + * currently being operated on.
> + */
> + if (f->ref || f->flags& FID_NON_RECLAIMABLE) {
> + continue;
> + }
> + /*
> + * if it is a recently referenced fid
> + * we leave the fid untouched and clear the
> + * reference bit. We come back to it later
> + * in the next iteration. (a simple LRU without
> + * moving list elements around)
> + */
> + if (f->flags& FID_REFERENCED) {
> + f->flags&= ~FID_REFERENCED;
> + continue;
> + }
> + /*
> + * Add fids to reclaim list.
> + */
> + if (f->fid_type == P9_FID_FILE) {
> + if (f->fs.fd != -1) {
> + f->rclm_lst = reclaim_list;
> + reclaim_list = f;
> + f->fs_reclaim.fd = f->fs.fd;
> + f->fs.fd = -1;
> + reclaim_count++;
> + }
> + }
> + if (reclaim_count>= open_fd_rc) {
> + break;
> + }
> + }
> + /*
> + * Now close the fid in reclaim list. Free them if they
> + * are already clunked.
> + */
> + while (reclaim_list) {
> + f = reclaim_list;
> + reclaim_list = f->rclm_lst;
> + if (f->clunked) {
> + release_fid(s, f);
> + } else {
> + if (f->fid_type == P9_FID_FILE) {
> + v9fs_co_close(s, f->fs_reclaim.fd);
> + }
> + f->rclm_lst = NULL;
> + }
> + }
> + s->reclaim_in_prgs = 0;
> +}
> +
> +static int v9fs_mark_fids_unreclaim(V9fsState *s, V9fsString *str)
> +{
> + int err;
> + V9fsFidState *fidp, head_fid;
> +
> + head_fid.next = s->fid_list;
> + for (fidp = s->fid_list; fidp; fidp = fidp->next) {
> + if (!strcmp(fidp->path.data, str->data)) {
&& !( fidp->flags && FID_NON_RECLAIMABLE) To avoid unnecessarily
resetting the flag.
> + /* Mark the fid non reclaimable. */
> + fidp->flags |= FID_NON_RECLAIMABLE;
> + /* reopen the file if already closed */
> + if (fidp->fs.fd == -1) {
> + do {
> + err = v9fs_co_open(s, fidp, fidp->open_flags);
> + } while (err == -EINTR);
> + if (err< 0) {
> + return -1;
> + }
> + /*
> + * Go back to head of fid list because
> + * the list could have got updated when
> + * switched to the worker thread
> + */
> + fidp =&head_fid;
> + }
> + }
> + }
> + return 0;
> +}
> +
> #define P9_QID_TYPE_DIR 0x80
> #define P9_QID_TYPE_SYMLINK 0x02
>
> @@ -1388,6 +1530,14 @@ static void v9fs_open(void *opaque)
> goto out;
> }
> fidp->fid_type = P9_FID_FILE;
> + fidp->open_flags = flags;
> + if (flags& O_EXCL) {
> + /*
> + * We let the host file system do O_EXCL check
> + * We should not reclaim such fd
> + */
> + fidp->flags |= FID_NON_RECLAIMABLE;
> + }
> iounit = get_iounit(s,&fidp->path);
> offset += pdu_marshal(pdu, offset, "Qd",&qid, iounit);
> err = offset;
> @@ -1432,6 +1582,14 @@ static void v9fs_lcreate(void *opaque)
> goto out;
> }
> fidp->fid_type = P9_FID_FILE;
> + fidp->open_flags = flags;
> + if (flags& O_EXCL) {
> + /*
> + * We let the host file system do O_EXCL check
> + * We should not reclaim such fd
> + */
> + fidp->flags |= FID_NON_RECLAIMABLE;
> + }
> iounit = get_iounit(pdu->s,&fullname);
>
> err = v9fs_co_lstat(pdu->s,&fullname,&stbuf);
> @@ -1993,6 +2151,14 @@ static void v9fs_create(void *opaque)
> goto out;
> }
> fidp->fid_type = P9_FID_FILE;
> + fidp->open_flags = omode_to_uflags(mode);
> + if (fidp->open_flags& O_EXCL) {
> + /*
> + * We let the host file system do O_EXCL check
> + * We should not reclaim such fd
> + */
> + fidp->flags |= FID_NON_RECLAIMABLE;
> + }
> }
> err = v9fs_co_lstat(pdu->s,&fullname,&stbuf);
> if (err< 0) {
> @@ -2126,11 +2292,19 @@ static void v9fs_remove(void *opaque)
> err = -EINVAL;
> goto out_nofid;
> }
> + /*
> + * IF the file is unlinked, we cannot reopen
> + * the file later. So don't reclaim fd
> + */
> + err = v9fs_mark_fids_unreclaim(pdu->s,&fidp->path);
> + if (err< 0) {
> + goto out_err;
> + }
> err = v9fs_co_remove(pdu->s,&fidp->path);
> if (!err) {
> err = offset;
> }
> -
> +out_err:
> /* For TREMOVE we need to clunk the fid even on failed remove */
> free_fid(pdu->s, fidp->fid);
> put_fid(pdu->s, fidp);
> @@ -2826,3 +3000,14 @@ void handle_9p_output(VirtIODevice *vdev, VirtQueue *vq)
> }
> free_pdu(s, pdu);
> }
> +
> +void virtio_9p_set_fd_limit(void)
> +{
> + struct rlimit rlim;
> + if (getrlimit(RLIMIT_NOFILE,&rlim)< 0) {
> + fprintf(stderr, "Failed to get the resource limit\n");
> + exit(1);
> + }
> + open_fd_hw = rlim.rlim_cur - MIN(400, rlim.rlim_cur/3);
> + open_fd_rc = rlim.rlim_cur/2;
Why not call it LW?
> +}
> diff --git a/hw/9pfs/virtio-9p.h b/hw/9pfs/virtio-9p.h
> index e16e5f4..36aa4a5 100644
> --- a/hw/9pfs/virtio-9p.h
> +++ b/hw/9pfs/virtio-9p.h
> @@ -5,6 +5,7 @@
> #include<dirent.h>
> #include<sys/time.h>
> #include<utime.h>
> +#include<sys/resource.h>
> #include "hw/virtio.h"
> #include "fsdev/file-op-9p.h"
>
> @@ -101,6 +102,9 @@ enum p9_proto_version {
> #define P9_NOTAG (u16)(~0)
> #define P9_NOFID (u32)(~0)
> #define P9_MAXWELEM 16
> +
> +#define FID_REFERENCED 0x1
> +#define FID_NON_RECLAIMABLE 0x2
> static inline const char *rpath(FsContext *ctx, const char *path, char *buffer)
> {
> snprintf(buffer, PATH_MAX, "%s/%s", ctx->fs_root, path);
> @@ -198,14 +202,21 @@ struct V9fsFidState
> int32_t fid;
> V9fsString path;
> union {
> - int fd;
> - DIR *dir;
> - V9fsXattr xattr;
> + int fd;
> + DIR *dir;
> + V9fsXattr xattr;
> } fs;
> + union {
> + int fd;
> + DIR *dir;
> + } fs_reclaim;
> + int flags;
> + int open_flags;
> uid_t uid;
> int ref;
> int clunked;
> V9fsFidState *next;
> + V9fsFidState *rclm_lst;
> };
>
> typedef struct V9fsState
> @@ -222,6 +233,7 @@ typedef struct V9fsState
> size_t config_size;
> enum p9_proto_version proto_version;
> int32_t msize;
> + int reclaim_in_prgs;
> } V9fsState;
>
> typedef struct V9fsStatState {
> @@ -354,6 +366,9 @@ typedef struct V9fsGetlock
> V9fsString client_id;
> } V9fsGetlock;
>
> +extern int open_fd_hw;
> +extern int total_open_fd;
> +
> size_t pdu_packunpack(void *addr, struct iovec *sg, int sg_count,
> size_t offset, size_t size, int pack);
>
> @@ -364,4 +379,6 @@ static inline size_t do_pdu_unpack(void *dst, struct iovec *sg, int sg_count,
> }
>
> extern void handle_9p_output(VirtIODevice *vdev, VirtQueue *vq);
> +extern void virtio_9p_set_fd_limit(void);
> +extern void v9fs_reclaim_fd(V9fsState *s);
> #endif
next prev parent reply other threads:[~2011-06-10 1:28 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-06-06 17:16 [Qemu-devel] [PATCH 1/6] hw/9pfs: Add reference counting for fid Aneesh Kumar K.V
2011-06-06 17:16 ` [Qemu-devel] [PATCH 2/6] hw/9pfs: Don't free fid in clunk Aneesh Kumar K.V
2011-06-10 0:27 ` Venkateswararao Jujjuri
2011-06-10 6:19 ` Aneesh Kumar K.V
2011-06-06 17:16 ` [Qemu-devel] [PATCH 3/6] hw/9pfs: Add file descriptor reclaim support Aneesh Kumar K.V
2011-06-10 1:27 ` Venkateswararao Jujjuri [this message]
2011-06-06 17:16 ` [Qemu-devel] [PATCH 4/6] hw/9pfs: init fid list properly Aneesh Kumar K.V
2011-06-06 17:16 ` [Qemu-devel] [PATCH 5/6] hw/9pfs: Use v9fs_do_close instead of close Aneesh Kumar K.V
2011-06-10 1:32 ` Venkateswararao Jujjuri
2011-06-06 17:16 ` [Qemu-devel] [PATCH 6/6] hw/9pfs: Add directory reclaim support Aneesh Kumar K.V
2011-06-10 1:36 ` Venkateswararao Jujjuri
2011-06-09 23:10 ` [Qemu-devel] [PATCH 1/6] hw/9pfs: Add reference counting for fid Venkateswararao Jujjuri
2011-06-10 6:27 ` Aneesh Kumar K.V
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4DF17317.10606@linux.vnet.ibm.com \
--to=jvrao@linux.vnet.ibm.com \
--cc=aliguori@us.ibm.com \
--cc=aneesh.kumar@linux.vnet.ibm.com \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.