All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jeff Garzik <jeff@garzik.org>
To: Pete Zaitcev <zaitcev@redhat.com>
Cc: Project Hail List <hail-devel@vger.kernel.org>
Subject: Re: [patch tabled 6/8] Add filesystem back-end
Date: Mon, 13 Dec 2010 16:30:59 -0500	[thread overview]
Message-ID: <4D069093.2060108@garzik.org> (raw)
In-Reply-To: <20101128184108.2172f590@lembas.zaitcev.lan>

On 11/28/2010 08:41 PM, Pete Zaitcev wrote:
> This patch adds the first new back-end and makes some changes to the way
> nodes are added, to make the invariants of storage_node more sensible.
>
> The filesystem back-end itself is not intended for production use,
> so it makes no attempt to run any asynchronous transfers.
>
> We also add a test. Note that this differs from the preliminary versions
> of this patch. We used to add both chunk and fs back-ends, so that tabled
> replicates to both. This makes sense as a test of store path, but on
> retrieval tabled selects any one of available storage nodes with the
> object, randomly. It creates gaps in test coverage in any given run.
> Therefore, we test two back-end types sequentially now.
>
> Signed-off-by: Pete Zaitcev<zaitcev@redhat.com>
>
> ---
>   server/Makefile.am   |    2
>   server/stor_chunk.c  |   21 -
>   server/stor_fs.c     |  498 +++++++++++++++++++++++++++++++++++++++++
>   server/storage.c     |  157 ++++++++++--
>   server/storparse.c   |   97 +++++++
>   server/tabled.h      |   31 ++
>   test/Makefile.am     |    3
>   test/be_fs-test.conf |    5
>   test/combo-redux     |   74 ++++++
>   test/prep-db         |    4
>   test/start-daemon    |    1
>   test/stop-daemon     |    9
>   12 files changed, 835 insertions(+), 67 deletions(-)
>
> commit bccedeedabbe713e4053afa185314b3f57f3d204
> Author: Pete Zaitcev<zaitcev@yahoo.com>
> Date:   Sun Nov 28 17:58:05 2010 -0700
>
>      Add fs back-end, with a test.
>
> diff --git a/server/Makefile.am b/server/Makefile.am
> index 52beec4..71bcb35 100644
> --- a/server/Makefile.am
> +++ b/server/Makefile.am
> @@ -6,7 +6,7 @@ sbin_PROGRAMS	= tabled tdbadm
>   tabled_SOURCES	= tabled.h		\
>   		  bucket.c cldu.c config.c metarep.c object.c replica.c \
>   		  server.c status.c storage.c storparse.c \
> -		  stor_chunk.c util.c
> +		  stor_chunk.c stor_fs.c util.c
>   tabled_LDADD	= ../lib/libtdb.a		\
>   		  @HAIL_LIBS@ @PCRE_LIBS@ @GLIB_LIBS@ \
>   		  @CRYPTO_LIBS@ @DB4_LIBS@ @EVENT_LIBS@ @SSL_LIBS@
> diff --git a/server/stor_chunk.c b/server/stor_chunk.c
> index 815adcf..7462a9c 100644
> --- a/server/stor_chunk.c
> +++ b/server/stor_chunk.c
> @@ -31,8 +31,7 @@
>   #include<netdb.h>
>   #include "tabled.h"
>
> -static const char stor_key_fmt[] = "%016llx";
> -#define STOR_KEY_SLEN  16
> +static const char stor_key_fmt[] = STOR_KEY_FMT;
>
>   static int stor_new_stc(struct storage_node *stn, struct st_client **stcp)
>   {
> @@ -66,24 +65,6 @@ static int stor_new_stc(struct storage_node *stn, struct st_client **stcp)
>   	return 0;
>   }
>
> -static void stor_read_event(int fd, short events, void *userdata)
> -{
> -	struct open_chunk *cep = userdata;
> -
> -	cep->r_armed = false;		/* no EV_PERSIST */
> -	if (cep->ocb)
> -		(*cep->ocb)(cep);
> -}
> -
> -static void stor_write_event(int fd, short events, void *userdata)
> -{
> -	struct open_chunk *cep = userdata;
> -
> -	cep->w_armed = false;		/* no EV_PERSIST */
> -	if (cep->ocb)
> -		(*cep->ocb)(cep);
> -}
> -
>   /*
>    * Open *cep using stn, set up chunk session if needed.
>    */
> diff --git a/server/stor_fs.c b/server/stor_fs.c
> new file mode 100644
> index 0000000..b433a67
> --- /dev/null
> +++ b/server/stor_fs.c
> @@ -0,0 +1,498 @@
> +
> +/*
> + * Copyright 2010 Red Hat, Inc.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; see the file COPYING.  If not, write to
> + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
> + *
> + */
> +
> +#define _GNU_SOURCE
> +#include "tabled-config.h"
> +
> +#include<sys/types.h>
> +#include<sys/stat.h>
> +#include<errno.h>
> +#include<fcntl.h>
> +#include<syslog.h>
> +#include<string.h>
> +#include<glib.h>
> +#include<event.h>
> +#include "tabled.h"
> +
> +static const char stor_key_fmt[] = STOR_KEY_FMT;
> +
> +static char *fs_obj_pathname(const char *base, uint64_t key)
> +{
> +	enum { PREFIX_LEN = 3 };
> +	char prefix[PREFIX_LEN + 1];
> +	char stckey[STOR_KEY_SLEN+1];
> +	char *s;
> +	int rc;
> +
> +	/* we know that stckey is going to be longer than PREFIX_LEN */
> +	sprintf(stckey, stor_key_fmt, (unsigned long long) key);
> +	memcpy(prefix, stckey, PREFIX_LEN);
> +	prefix[PREFIX_LEN] = 0;
> +
> +	rc = asprintf(&s, "%s/%s/%s", base, prefix, stckey + PREFIX_LEN);
> +	if (rc<  0)
> +		goto err_out;
> +
> +	return s;
> +
> +err_out:
> +	return NULL;
> +}
> +
> +static char *fs_ctl_pathname(const char *base, const char *file)
> +{
> +	char *s;
> +	int rc;
> +
> +	rc = asprintf(&s, "%s/%s", base, file);
> +	if (rc<  0)
> +		return NULL;
> +	return s;
> +}
> +
> +static int fs_obj_mkpath(const char *path)
> +{
> +	struct stat statb;
> +	char *s;
> +	int rc;
> +
> +	/* one dir is enough */
> +	/* not using dirname because on some platforms it modifies its arg. */
> +	s = strrchr(path, '/');
> +	if (s == NULL)
> +		return -EINVAL;
> +	s = strndup(path, s-path);
> +	if (!s)
> +		return -ENOMEM;
> +
> +	/* create subdir on the fly, if not already exists */
> +	if (stat(s,&statb)<  0) {
> +		rc = errno;
> +		if (rc != ENOENT)
> +			goto err_out;
> +		if (mkdir(s, 0777)<  0) {
> +			rc = errno;
> +			/*
> +			 * Directory already exists, perhaps
> +			 * because we raced with another thread.
> +			 */
> +			if (rc != EEXIST)
> +				goto err_out;
> +		}
> +	} else {
> +		if (!S_ISDIR(statb.st_mode)) {
> +			rc = EINVAL;
> +			goto err_out;
> +		}
> +	}
> +
> +	free(s);
> +	return 0;
> +
> +err_out:
> +	free(s);
> +	return -rc;
> +}
> +
> +static int fs_open(struct open_chunk *cep, struct storage_node *stn,
> +		   struct event_base *ev_base)
> +{
> +	if (cep->node)
> +		return -EBUSY;
> +
> +	if (!stn->basepath) {
> +		applog(LOG_WARNING,
> +		       "No base path for Posix chunk, nid %u", stn->id);
> +		return -EINVAL;
> +	}
> +
> +	cep->evbase = ev_base;
> +	cep->node = stor_node_get(stn);
> +	cep->pfd = -1;
> +
> +	return 0;
> +}
> +
> +static int fs_open_read(struct open_chunk *cep,
> +			void (*cb)(struct open_chunk *),
> +			uint64_t key, uint64_t *psize)
> +{
> +	char *objpath;
> +	struct stat statb;
> +	uint64_t size;
> +	int rc;
> +
> +	if (!cep->node || cep->key)
> +		return -EBUSY;
> +
> +	objpath = fs_obj_pathname(cep->node->basepath, key);
> +	if (!objpath) {
> +		applog(LOG_WARNING, "No core");
> +		return -ENOMEM;
> +	}
> +
> +	rc = open(objpath, O_RDONLY);
> +	if (rc == -1) {
> +		rc = errno;
> +		applog(LOG_WARNING, "Cannot open file %s oid %llX: %s",
> +		       objpath, (long long) key, strerror(rc));
> +		free(objpath);
> +		return -rc;
> +	}
> +	cep->pfd = rc;
> +
> +	if (fstat(cep->pfd,&statb)<  0) {
> +		rc = errno;
> +		applog(LOG_WARNING, "Cannot stat file %s: %s",
> +		       objpath, strerror(rc));
> +		close(cep->pfd);
> +		cep->pfd = -1;
> +		free(objpath);
> +		return -rc;
> +	}
> +	size = statb.st_size;
> +
> +	*psize = size;
> +	cep->size = size;
> +	cep->done = 0;
> +	cep->key = key;
> +	cep->ocb = cb;
> +
> +	/*
> +	 * We cannot call cep->ocb directly. Instead, we steal the
> +	 * arm-disarm mechanism from chunk. This works because in Linux
> +	 * regular files can be polled and always return ready.
> +	 */
> +	event_set(&cep->revt, cep->pfd, EV_READ, stor_read_event, cep);
> +	event_base_set(cep->evbase,&cep->revt);
> +
> +	free(objpath);
> +	return 0;
> +}
> +
> +static void fs_close(struct open_chunk *cep)
> +{
> +	if (cep->node) {
> +		stor_node_put(cep->node);
> +		cep->node = NULL;
> +		if (cep->pfd != -1) {
> +			close(cep->pfd);
> +			cep->pfd = -1;
> +		}
> +	}
> +
> +	cep->done = 0;
> +	cep->size = 0;
> +
> +	if (cep->r_armed) {
> +		event_del(&cep->revt);
> +		cep->r_armed = false;
> +	}
> +
> +	if (cep->w_armed) {
> +		event_del(&cep->wevt);
> +		cep->w_armed = false;
> +	}
> +
> +	cep->key = 0;
> +}
> +
> +static void fs_abort(struct open_chunk *cep)
> +{
> +	if (cep->r_armed) {
> +		event_del(&cep->revt);
> +		cep->r_armed = false;
> +	}
> +	if (cep->w_armed) {
> +		event_del(&cep->wevt);
> +		cep->w_armed = false;
> +	}
> +	/* XXX delete the unfinished object under write */
> +	cep->key = 0;
> +}
> +
> +static int fs_put_start(struct open_chunk *cep,
> +			void (*cb)(struct open_chunk *),
> +			uint64_t key, uint64_t size)
> +{
> +	char *objpath;
> +	int rc;
> +
> +	if (!cep->node || cep->key)
> +		return -EBUSY;
> +
> +	objpath = fs_obj_pathname(cep->node->basepath, key);
> +	if (!objpath) {
> +		applog(LOG_WARNING, "No core");
> +		return -ENOMEM;
> +	}
> +
> +	rc = fs_obj_mkpath(objpath);
> +	if (rc) {
> +		applog(LOG_WARNING, "Cannot create a directory for %s: %s",
> +		       objpath, strerror(-rc));
> +		free(objpath);
> +		return rc;
> +	}
> +
> +	rc = open(objpath, O_WRONLY|O_TRUNC|O_CREAT, 0666);
> +	if (rc == -1) {
> +		rc = errno;
> +		applog(LOG_WARNING, "Cannot create file %s: %s",
> +		       objpath, strerror(rc));
> +		free(objpath);
> +		return -rc;
> +	}


Current chunkd intentionally prevents two random users from put'ing the 
same object.  The FS backend should do the same.

  reply	other threads:[~2010-12-13 21:30 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-11-29  1:41 [patch tabled 6/8] Add filesystem back-end Pete Zaitcev
2010-12-13 21:30 ` Jeff Garzik [this message]
2010-12-14  1:30   ` Pete Zaitcev

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4D069093.2060108@garzik.org \
    --to=jeff@garzik.org \
    --cc=hail-devel@vger.kernel.org \
    --cc=zaitcev@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.