* [PATCH] Add complex block layout discovery and mapping daemon
@ 2010-08-11 19:42 Jim Rees
[not found] ` <20100811194253.GA11453-8f4Pc2RrbJmHXe+LvDLADg@public.gmane.org>
0 siblings, 1 reply; 5+ messages in thread
From: Jim Rees @ 2010-08-11 19:42 UTC (permalink / raw)
To: bhalevy; +Cc: linux-nfs
This is a replacement for the patch I sent 21 July, incorporating feedback
from list members. Thank you for your comments.
I have tried to settle on "blkmapd" as the name and use it consistently for
the executable, service name, syslog, etc.
I did not change atomicio.c. That's because this is a copy of the file by
the same name in both idmapd and spnfsd. There is a patch in the works to
move this to the support library. I think the right thing to do is move
that patch forward, then fix atomicio.
Signed-off-by: Jim Rees <rees@umich.edu>
---
configure.ac | 4 +
utils/Makefile.am | 4 +
utils/blkmapd/Makefile.am | 63 +++++
utils/blkmapd/atomicio.c | 54 ++++
utils/blkmapd/cfg.c | 248 +++++++++++++++++
utils/blkmapd/cfg.h | 47 +++
utils/blkmapd/device-discovery.c | 502 +++++++++++++++++++++++++++++++++
utils/blkmapd/device-discovery.h | 162 +++++++++++
utils/blkmapd/device-inq.c | 235 ++++++++++++++++
utils/blkmapd/device-process.c | 394 ++++++++++++++++++++++++++
utils/blkmapd/dm-device.c | 509 ++++++++++++++++++++++++++++++++++
utils/blkmapd/etc/blkmapd.conf | 10 +
utils/blkmapd/etc/initd/initd.redhat | 76 +++++
13 files changed, 2308 insertions(+), 0 deletions(-)
create mode 100644 utils/blkmapd/Makefile.am
create mode 100644 utils/blkmapd/atomicio.c
create mode 100644 utils/blkmapd/cfg.c
create mode 100644 utils/blkmapd/cfg.h
create mode 100644 utils/blkmapd/device-discovery.c
create mode 100644 utils/blkmapd/device-discovery.h
create mode 100644 utils/blkmapd/device-inq.c
create mode 100644 utils/blkmapd/device-process.c
create mode 100644 utils/blkmapd/dm-device.c
create mode 100644 utils/blkmapd/etc/blkmapd.conf
create mode 100644 utils/blkmapd/etc/initd/initd.redhat
diff --git a/configure.ac b/configure.ac
index 4d12715..f57cd45 100644
--- a/configure.ac
+++ b/configure.ac
@@ -64,12 +64,15 @@ AC_ARG_ENABLE(nfsv4,
enable_nfsv4=yes)
if test "$enable_nfsv4" = yes; then
AC_DEFINE(NFS4_SUPPORTED, 1, [Define this if you want NFSv4 support compiled in])
+ BLKMAPD=blkmapd
IDMAPD=idmapd
SPNFSD=spnfsd
else
enable_nfsv4=
+ BLKMAPD=
IDMAPD=
fi
+ AC_SUBST(BLKMAPD)
AC_SUBST(IDMAPD)
AC_SUBST(enable_nfsv4)
AM_CONDITIONAL(CONFIG_NFSV4, [test "$enable_nfsv4" = "yes"])
@@ -429,6 +432,7 @@ AC_CONFIG_FILES([
tools/mountstats/Makefile
tools/nfs-iostat/Makefile
utils/Makefile
+ utils/blkmapd/Makefile
utils/exportfs/Makefile
utils/gssd/Makefile
utils/idmapd/Makefile
diff --git a/utils/Makefile.am b/utils/Makefile.am
index c777d21..c33835a 100644
--- a/utils/Makefile.am
+++ b/utils/Makefile.am
@@ -10,6 +10,10 @@ if CONFIG_NFSV4
OPTDIRS += spnfsd
endif
+if CONFIG_NFSV4
+OPTDIRS += blkmapd
+endif
+
if CONFIG_GSS
OPTDIRS += gssd
endif
diff --git a/utils/blkmapd/Makefile.am b/utils/blkmapd/Makefile.am
new file mode 100644
index 0000000..81cc420
--- /dev/null
+++ b/utils/blkmapd/Makefile.am
@@ -0,0 +1,63 @@
+## Process this file with automake to produce Makefile.in
+
+#man8_MANS = blkmapd.man
+
+AM_CFLAGS += -D_LARGEFILE64_SOURCE
+KPREFIX = @kprefix@
+sbin_PROGRAMS = blkmapd
+
+blkmapd_SOURCES = \
+ atomicio.c \
+ cfg.c \
+ device-discovery.c \
+ device-inq.c \
+ device-process.c \
+ dm-device.c \
+ \
+ cfg.h \
+ device-discovery.h
+
+blkmapd_LDADD = -ldevmapper ../../support/nfs/libnfs.a
+
+MAINTAINERCLEANFILES = Makefile.in
+
+#######################################################################
+# The following allows the current practice of having
+# daemons renamed during the install to include RPCPREFIX
+# and the KPREFIX
+# This could all be done much easier with program_transform_name
+# ( program_transform_name = s/^/$(RPCPREFIX)$(KPREFIX)/ )
+# but that also renames the man pages, which the current
+# practice does not do.
+install-exec-hook:
+ (cd $(DESTDIR)$(sbindir) && \
+ for p in $(sbin_PROGRAMS); do \
+ mv -f $$p$(EXEEXT) $(RPCPREFIX)$(KPREFIX)$$p$(EXEEXT) ;\
+ done)
+uninstall-hook:
+ (cd $(DESTDIR)$(sbindir) && \
+ for p in $(sbin_PROGRAMS); do \
+ rm -f $(RPCPREFIX)$(KPREFIX)$$p$(EXEEXT) ;\
+ done)
+
+
+# XXX This makes some assumptions about what automake does.
+# XXX But there is no install-man-hook or install-man-local.
+install-man: install-man8 install-man-links
+uninstall-man: uninstall-man8 uninstall-man-links
+
+install-man-links:
+ (cd $(DESTDIR)$(man8dir) && \
+ for m in $(man8_MANS) $(dist_man8_MANS) $(nodist_man8_MANS); do \
+ inst=`echo $$m | sed -e 's/man$$/8/'`; \
+ rm -f $(RPCPREFIX)$$inst ; \
+ $(LN_S) $$inst $(RPCPREFIX)$$inst ; \
+ done)
+
+uninstall-man-links:
+ (cd $(DESTDIR)$(man8dir) && \
+ for m in $(man8_MANS) $(dist_man8_MANS) $(nodist_man8_MANS); do \
+ inst=`echo $$m | sed -e 's/man$$/8/'`; \
+ rm -f $(RPCPREFIX)$$inst ; \
+ done)
+
diff --git a/utils/blkmapd/atomicio.c b/utils/blkmapd/atomicio.c
new file mode 100644
index 0000000..8db626e
--- /dev/null
+++ b/utils/blkmapd/atomicio.c
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2002 Marius Aamodt Eriksen <marius@monkey.org>
+ * Copyright (c) 1995,1999 Theo de Raadt. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <unistd.h>
+#include <errno.h>
+
+/*
+ * ensure all of data on socket comes through. f==read || f==write
+ */
+ssize_t atomicio(ssize_t(*f) (int, void *, size_t), int fd, void *_s, size_t n)
+{
+ char *s = _s;
+ ssize_t res, pos = 0;
+
+ while (n > pos) {
+ res = (f) (fd, s + pos, n - pos);
+ switch (res) {
+ case -1:
+ if (errno == EINTR || errno == EAGAIN)
+ continue;
+ case 0:
+ if (pos != 0)
+ return pos;
+ return res;
+ default:
+ pos += res;
+ }
+ }
+ return pos;
+}
diff --git a/utils/blkmapd/cfg.c b/utils/blkmapd/cfg.c
new file mode 100644
index 0000000..dab9d0f
--- /dev/null
+++ b/utils/blkmapd/cfg.c
@@ -0,0 +1,248 @@
+/*
+ * Copyright (c) 2010 EMC Corporation, Haiying Tang <Tang_Haiying@emc.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/stat.h>
+#include <linux/errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <fcntl.h>
+#include <ctype.h>
+
+#include "device-discovery.h"
+#include "cfg.h"
+
+char *conf_path = "/etc/blkmapd.conf";
+
+struct scan_root_list *scan_root_list_head;
+
+void bl_release_list(void)
+{
+ struct scan_root_list *root = scan_root_list_head;
+ struct scan_device_list *disk;
+
+ while (root) {
+ disk = root->disk;
+ while (disk) {
+ root->disk = disk->next;
+ free(disk->name);
+ free(disk);
+ disk = root->disk;
+ }
+ scan_root_list_head = root->next;
+ free(root->name);
+ free(root);
+ root = scan_root_list_head;
+ }
+}
+
+struct scan_root_list *bl_alloc_root_list(char *name, int all_disk)
+{
+ struct scan_root_list *root;
+
+ root = malloc(sizeof(struct scan_root_list));
+ if (!root)
+ goto nomem;
+
+ root->name = strdup(name);
+ if (!root->name)
+ goto nomem;
+ root->next = scan_root_list_head;
+ root->all_disk = all_disk;
+ scan_root_list_head = root;
+ return root;
+
+ nomem:
+ BL_LOG_ERR("%s: Out of memory!\n", __func__);
+ if (root)
+ free(root);
+ return NULL;
+}
+
+struct scan_device_list *bl_alloc_device_list(struct scan_root_list *root,
+ char *name)
+{
+ struct scan_device_list *device;
+
+ device = malloc(sizeof(struct scan_device_list));
+ if (!device)
+ goto nomem;
+
+ device->name = strdup(name);
+ if (!device->name)
+ goto nomem;
+ device->next = root->disk;
+ root->disk = device;
+ return device;
+
+ nomem:
+ BL_LOG_ERR("%s: Out of memory!\n", __func__);
+ if (device)
+ free(device);
+ return NULL;
+}
+
+struct scan_device_list *bl_insert_device_list(struct scan_root_list *root,
+ char *name)
+{
+ struct scan_device_list *device = root->disk;
+
+ /* Check whether this device has been inserted */
+ while (device) {
+ if (device->name && !strcmp(device->name, name))
+ return device;
+ device = device->next;
+ }
+
+ return bl_alloc_device_list(root, name);
+}
+
+struct scan_root_list *bl_insert_root_list(char *name, int all_disk)
+{
+ struct scan_root_list *root = scan_root_list_head;
+
+ /* Check whether this root has been inserted */
+ while (root) {
+ if (root->name && !strcmp(root->name, name))
+ return root;
+ root = root->next;
+ }
+
+ return bl_alloc_root_list(name, all_disk);
+}
+
+int bl_parse_line(char *line, struct scan_root_list **bl_root)
+{
+ char *root, *device, *end;
+
+ root = strdup(line);
+ end = root + strlen(line);
+
+ /* Skip comments */
+ if (*root == '#')
+ return 0;
+
+ /* Trim leading space */
+ while (*root != '\0' && isspace(*root))
+ root++;
+ if (*root == '\0')
+ return 0;
+
+ /* Trim trailing space and set "end" to last char */
+ while ((isspace(*end) || (*end == '\0')) && (end > root))
+ end--;
+
+ /* For lines ending with '/' or '/','*': add as a dir root */
+ if ((*end == '/') ||
+ ((*end == '*') && (end - root >= 1) && (*(end - 1) == '/'))) {
+ if (*end == '*')
+ end--;
+ if (*end == '/')
+ end--;
+ *(end + 1) = '\0';
+ *bl_root = bl_insert_root_list(root, 1);
+ return 0;
+ }
+
+ /* Other lines: add as a device */
+ device = end;
+ while ((*device != '/') && (device > root))
+ device--;
+ if (device == root) {
+ BL_LOG_ERR("%s: invalid config line\n", __func__);
+ return -1;
+ }
+ *device = '\0';
+ *bl_root = bl_insert_root_list(root, 0);
+ if (*bl_root == NULL)
+ return -ENOMEM;
+ if (*end == '*')
+ end--;
+ *(end + 1) = '\0';
+ if (bl_insert_device_list(*bl_root, device + 1) == NULL)
+ return -ENOMEM;
+
+ return 0;
+}
+
+int bl_set_default_conf(void)
+{
+ struct scan_root_list *root = NULL;
+ int rv;
+
+ bl_release_list();
+ rv = bl_parse_line("/dev/sd*", &root);
+ if (rv < 0)
+ return rv;
+ rv = bl_parse_line("/dev/mapper/", &root);
+ return rv;
+}
+
+int bl_parse_conf(char *buf)
+{
+ char *tmp = buf, *line = buf, *end = buf + strlen(buf);
+ struct scan_root_list *bl_root = NULL;
+ int rv;
+
+ while (tmp < end) {
+ if (*tmp == '\n') {
+ *tmp = '\0';
+ rv = bl_parse_line(line, &bl_root);
+ if (rv < 0)
+ return rv;
+ line = tmp + 1;
+ }
+ tmp++;
+ }
+
+ return 0;
+}
+
+int bl_cfg_init(void)
+{
+ struct scan_root_list *root = NULL;
+ FILE *f = NULL;
+ char buf[PATH_MAX];
+ int rv = 0;
+
+ f = fopen(conf_path, "r");
+ if (f == NULL)
+ rv = bl_set_default_conf();
+ else {
+ while (fgets(buf, sizeof buf, f) != NULL) {
+ rv = bl_parse_line(buf, &root);
+ if (rv < 0)
+ break;
+ }
+ }
+ if (!scan_root_list_head)
+ rv = -EINVAL;
+
+ if (f)
+ fclose(f);
+ return rv;
+}
diff --git a/utils/blkmapd/cfg.h b/utils/blkmapd/cfg.h
new file mode 100644
index 0000000..b9bf930
--- /dev/null
+++ b/utils/blkmapd/cfg.h
@@ -0,0 +1,47 @@
+/*
+ * bl-cfg.h
+ *
+ * Copyright (c) 2010 EMC Corporation, Haiying Tang <Tang_Haiying@emc.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef BL_CFG_H
+#define BL_CFG_H
+
+extern char *conf_path;
+extern struct scan_root_list *scan_root_list_head;
+
+struct scan_device_list {
+ struct scan_device_list *next;
+ char *name;
+};
+
+struct scan_root_list {
+ struct scan_root_list *next;
+ unsigned int all_disk;
+ char *name;
+ struct scan_device_list *disk;
+};
+
+int bl_cfg_init(void);
+
+#endif
diff --git a/utils/blkmapd/device-discovery.c b/utils/blkmapd/device-discovery.c
new file mode 100644
index 0000000..f42ddc8
--- /dev/null
+++ b/utils/blkmapd/device-discovery.c
@@ -0,0 +1,502 @@
+/*
+ * device-discovery.c: main function, discovering device and processing
+ * pipe request from kernel.
+ *
+ * Copyright (c) 2010 EMC Corporation, Haiying Tang <Tang_Haiying@emc.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <dirent.h>
+#include <ctype.h>
+#include <linux/kdev_t.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/mount.h>
+#include <sys/select.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <libgen.h>
+#include <errno.h>
+#include <scsi/scsi.h>
+#include <scsi/scsi_ioctl.h>
+#include <scsi/sg.h>
+#include "device-discovery.h"
+#include "cfg.h"
+
+#define BL_PIPE_FILE "/var/lib/nfs/rpc_pipefs/bl_device_pipe"
+#define PID_FILE "/var/run/blkmapd.pid"
+
+struct bl_disk *visible_disk_list;
+
+struct bl_disk_path *bl_get_path(const char *filepath,
+ struct bl_disk_path *paths)
+{
+ struct bl_disk_path *tmp = paths;
+ while (tmp) {
+ if (!strcmp(tmp->full_path, filepath))
+ break;
+ tmp = tmp->next;
+ }
+ return tmp;
+}
+
+/* Check whether valid_path is a substring(partition) of path */
+int bl_is_partition(struct bl_disk_path *valid_path, struct bl_disk_path *path)
+{
+ if (!strncmp(valid_path->full_path, path->full_path,
+ strlen(valid_path->full_path)))
+ return 1;
+
+ return 0;
+}
+
+/*
+ * For multipath devices, devices state could be PASSIVE/ACTIVE/PSEUDO,
+ * where PSEUDO > ACTIVE > PASSIVE. Device with highest state is used to
+ * create pseudo device. So if state is higher, the device path needs to
+ * be updated.
+ * If device-mapper multipath support is a must, pseudo devices should
+ * exist for each multipath device. If not, active device path will be
+ * chosen for device creation.
+ * Treat partition as invalid path.
+ */
+int bl_update_path(struct bl_disk_path *path, enum bl_path_state_e state,
+ struct bl_disk *disk)
+{
+ struct bl_disk_path *valid_path = disk->valid_path;
+
+ if (valid_path) {
+ if (valid_path->state >= state) {
+ if (bl_is_partition(valid_path, path))
+ return 0;
+ }
+ }
+ return 1;
+}
+
+void bl_release_disk(void)
+{
+ struct bl_disk *disk;
+ struct bl_disk_path *path = NULL;
+
+ while (visible_disk_list) {
+ disk = visible_disk_list;
+ path = disk->paths;
+ while (path) {
+ disk->paths = path->next;
+ free(path->full_path);
+ free(path);
+ path = disk->paths;
+ }
+ if (disk->serial)
+ free(disk->serial);
+ visible_disk_list = disk->next;
+ free(disk);
+ }
+}
+
+void bl_add_disk(char *filepath)
+{
+ struct bl_disk *disk = NULL;
+ int fd = 0;
+ struct stat sb;
+ off_t size = 0;
+ struct bl_serial *serial = NULL;
+ enum bl_path_state_e ap_state = BL_PATH_STATE_PASSIVE;
+ struct bl_disk_path *diskpath = NULL, *path = NULL;
+ dev_t dev;
+
+ BL_LOG_ERR("%s: %s\n", __func__, filepath);
+
+ fd = open(filepath, O_RDONLY | O_LARGEFILE);
+ if (fd < 0)
+ return;
+
+ if (fstat(fd, &sb)) {
+ close(fd);
+ return;
+ }
+
+ if (!sb.st_size)
+ ioctl(fd, BLKGETSIZE, &size);
+ else
+ size = sb.st_size;
+
+ if (!size) {
+ close(fd);
+ return;
+ }
+
+ dev = sb.st_rdev;
+ serial = bldev_read_serial(fd, filepath);
+
+ for (disk = visible_disk_list; disk != NULL; disk = disk->next) {
+ /* Already scanned or a partition?
+ * XXX: if released each time, maybe not need to compare
+ */
+ if ((serial->len == disk->serial->len) &&
+ !memcmp(serial->data, disk->serial->data, serial->len)) {
+ diskpath = bl_get_path(filepath, disk->paths);
+ break;
+ }
+ }
+
+ if (disk && diskpath) {
+ close(fd);
+ return;
+ }
+
+ bldev_read_ap_state(fd, &ap_state);
+ close(fd);
+
+ /*
+ * Not sure how to identify a pseudo device created by
+ * device-mapper, so leave /dev/mapper for now.
+ */
+ if (strncmp(filepath, "/dev/mapper", 11) == 0)
+ ap_state = BL_PATH_STATE_PSEUDO;
+
+ /* add path */
+ path = malloc(sizeof(struct bl_disk_path));
+ if (!path) {
+ BL_LOG_ERR("%s: Out of memory!\n", __func__);
+ goto out_err;
+ }
+ path->next = NULL;
+ path->state = ap_state;
+ path->full_path = strdup(filepath);
+ if (!path->full_path)
+ goto out_err;
+
+ if (!disk) { /* add disk */
+ disk = malloc(sizeof(struct bl_disk));
+ if (!disk) {
+ BL_LOG_ERR("%s: Out of memory!\n", __func__);
+ goto out_err;
+ }
+ disk->next = visible_disk_list;
+ disk->dev = dev;
+ disk->size = size;
+ disk->serial = serial;
+ disk->valid_path = path;
+ disk->paths = path;
+ visible_disk_list = disk;
+ } else {
+ path->next = disk->paths;
+ disk->paths = path;
+ /* check whether we need to update disk info */
+ if (bl_update_path(path, path->state, disk)) {
+ disk->dev = dev;
+ disk->size = size;
+ disk->valid_path = path;
+ }
+ }
+ return;
+
+ out_err:
+ if (path) {
+ if (path->full_path)
+ free(path->full_path);
+ free(path);
+ }
+ return;
+}
+
+void bl_devicescan(const char *filename, struct scan_root_list *root)
+{
+ /* scan all disks */
+ char filepath[PATH_MAX];
+ struct scan_device_list *device;
+
+ if (!strcmp(filename, ".") || !strcmp(filename, ".."))
+ return;
+
+ memset(filepath, 0, sizeof(filepath));
+ if (strlen(filename) < (PATH_MAX - strlen(root->name) - 2))
+ sprintf(filepath, "%s/%s", root->name, filename);
+ else {
+ BL_LOG_ERR("%s: name too long\n", __func__);
+ return;
+ }
+ if (root->all_disk)
+ goto valid;
+
+ device = root->disk;
+ while (device) {
+ /* If device->name is a subset of filename, this disk should be
+ * valid for scanning.
+ * For example, device->name is "sd", filename is "sda".
+ */
+ if (device->name
+ && !memcmp(filename, device->name, strlen(device->name)))
+ goto valid;
+ device = device->next;
+ }
+
+ return;
+
+ valid:
+ /*
+ * sg device is not a real device, but a device created according
+ * to each scsi device. It won't be used for pseudo device creation.
+ * I moved it here, so that sg devices will not be scanned.
+ */
+ if (!strncmp(filepath, "/dev/sg", 7))
+ return;
+ bl_add_disk(filepath);
+ return;
+}
+
+int bl_discover_devices(void)
+{
+ DIR *dir;
+ struct dirent *dp;
+ struct scan_root_list *root = scan_root_list_head;
+
+ /* release previous list */
+ bl_release_disk();
+
+ /* scan all disks */
+ while (root) {
+ dir = opendir(root->name);
+ if (dir == NULL) {
+ root = root->next;
+ continue;
+ }
+
+ while ((dp = readdir(dir)) != NULL)
+ bl_devicescan(dp->d_name, root);
+
+ root = root->next;
+ closedir(dir);
+ }
+
+ return 0;
+}
+
+/* process kernel request
+ * return 0: request processed, and no more request waiting;
+ * return 1: request processed, and more requests waiting;
+ * return < 0: error
+ */
+int bl_disk_inquiry_process(int fd)
+{
+ int ret = 0;
+ struct pipefs_hdr *head = NULL, *tmp;
+ char *buf = NULL;
+ uint32_t major, minor;
+ uint16_t buflen;
+ unsigned int len = 0;
+
+ head = calloc(1, sizeof(struct pipefs_hdr));
+ if (!head) {
+ BL_LOG_ERR("%s: Out of memory!\n", __func__);
+ return -ENOMEM;
+ }
+
+ /* read request */
+ if (atomicio(read, fd, head, sizeof(*head)) != sizeof(*head)) {
+ /* Note that an error in this or the next read is pretty
+ * catastrophic, as there is no good way to resync into
+ * the pipe's stream.
+ */
+ BL_LOG_ERR("Read pipefs head error!\n");
+ ret = -EIO;
+ goto out;
+ }
+
+ buflen = head->totallen - sizeof(*head);
+ buf = malloc(buflen);
+ if (!buf) {
+ BL_LOG_ERR("%s: Out of memory!\n", __func__);
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ if (atomicio(read, fd, buf, buflen) != buflen) {
+ BL_LOG_ERR("Read pipefs content error!\n");
+ ret = -EIO;
+ goto out;
+ }
+
+ head->status = BL_DEVICE_REQUEST_PROC;
+ switch (head->type) {
+ case BL_DEVICE_MOUNT:
+ if (!process_deviceinfo(buf, buflen, &major, &minor)) {
+ head->status = BL_DEVICE_REQUEST_ERR;
+ goto out;
+ }
+ tmp = realloc(head, sizeof(major) + sizeof(minor) +
+ sizeof(struct pipefs_hdr));
+ if (!tmp) {
+ BL_LOG_ERR("%s: Out of memory!\n", __func__);
+ ret = -ENOMEM;
+ goto out;
+ }
+ head = tmp;
+ memcpy((void *)head + sizeof(struct pipefs_hdr),
+ &major, sizeof(major));
+ memcpy((void *)head + sizeof(struct pipefs_hdr) + sizeof(major),
+ &minor, sizeof(minor));
+ len = sizeof(major) + sizeof(minor);
+ break;
+ case BL_DEVICE_UMOUNT:
+ if (!dm_device_remove_all((uint64_t *) buf))
+ head->status = BL_DEVICE_REQUEST_ERR;
+ bl_discover_devices();
+ break;
+ default:
+ head->status = BL_DEVICE_REQUEST_ERR;
+ }
+
+ head->totallen = sizeof(struct pipefs_hdr) + len;
+ /* write to pipefs */
+ if (atomicio((void *)write, fd, head, head->totallen)
+ != head->totallen) {
+ BL_LOG_ERR("Write pipefs error!\n");
+ ret = -EIO;
+ }
+
+ out:
+ if (buf)
+ free(buf);
+ if (head)
+ free(head);
+ return ret;
+}
+
+/* TODO: set bl_process_stop to 1 in command */
+unsigned int bl_process_stop;
+
+int bl_run_disk_inquiry_process(int fd)
+{
+ fd_set rset;
+ struct timeval tv;
+ int ret;
+
+ bl_process_stop = 0;
+
+ for (;;) {
+ if (bl_process_stop)
+ return 1;
+ FD_ZERO(&rset);
+ FD_SET(fd, &rset);
+ ret = 0;
+ tv.tv_sec = BL_DEVICE_DISCOVERY_INTERVAL;
+ switch (select(fd + 1, &rset, NULL, NULL, &tv)) {
+ case -1:
+ if (errno == EINTR)
+ continue;
+ else {
+ ret = -errno;
+ goto out;
+ }
+ case 0:
+ goto out;
+ default:
+ if (FD_ISSET(fd, &rset))
+ ret = bl_disk_inquiry_process(fd);
+ }
+ }
+ out:
+ return ret;
+}
+
+/* Daemon */
+int main(int argc, char **argv)
+{
+ int fd, opt, fg = 0, ret = 1;
+ struct stat statbuf;
+ char pidbuf[64];
+
+ while ((opt = getopt(argc, argv, "c:f")) != -1) {
+ switch (opt) {
+ case 'c':
+ conf_path = optarg;
+ break;
+ case 'f':
+ fg = 1;
+ break;
+ }
+ }
+
+ if (!stat(PID_FILE, &statbuf)) {
+ fprintf(stderr, "Pid file already existed\n");
+ return -1;
+ }
+
+ if (!fg && daemon(0, 0) != 0) {
+ fprintf(stderr, "Daemonize failed\n");
+ return -1;
+ }
+
+ openlog("blkmapd", LOG_PID, 0);
+ fd = open(PID_FILE, O_WRONLY | O_CREAT, 0644);
+ if (fd < 0) {
+ BL_LOG_ERR("Create pid file failed\n");
+ return -1;
+ }
+
+ if (lockf(fd, F_TLOCK, 0) < 0) {
+ BL_LOG_ERR("Lock pid file failed\n");
+ close(fd);
+ return -1;
+ }
+ ftruncate(fd, 0);
+ sprintf(pidbuf, "%d\n", getpid());
+ write(fd, pidbuf, strlen(pidbuf));
+
+ /* open pipe file */
+ fd = open(BL_PIPE_FILE, O_RDWR);
+ if (fd < 0) {
+ BL_LOG_ERR("open pipe file error\n");
+ return -1;
+ }
+
+ ret = bl_cfg_init();
+ if (ret < 0) {
+ if (ret == -ENOENT)
+ BL_LOG_WARNING("Config file not exist, use default\n");
+ else {
+ BL_LOG_ERR("Open/read Block pNFS config file error\n");
+ return -1;
+ }
+ }
+
+ while (1) {
+ /* discover device when needed */
+ bl_discover_devices();
+
+ ret = bl_run_disk_inquiry_process(fd);
+ if (ret < 0) {
+ /* what should we do with process error? */
+ BL_LOG_ERR("inquiry process return %d\n", ret);
+ }
+ }
+ close(fd);
+ return ret;
+}
diff --git a/utils/blkmapd/device-discovery.h b/utils/blkmapd/device-discovery.h
new file mode 100644
index 0000000..9f87ebe
--- /dev/null
+++ b/utils/blkmapd/device-discovery.h
@@ -0,0 +1,162 @@
+/*
+ * bl-device-discovery.h
+ *
+ * Copyright (c) 2010 EMC Corporation, Haiying Tang <Tang_Haiying@emc.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef BL_DEVICE_DISCOVERY_H
+#define BL_DEVICE_DISCOVERY_H
+
+#define BL_DEVICE_DISCOVERY_INTERVAL 60
+
+#include <stdint.h>
+#include <syslog.h>
+
+enum blk_vol_type {
+ BLOCK_VOLUME_SIMPLE = 0, /* maps to a single LU */
+ BLOCK_VOLUME_SLICE = 1, /* slice of another volume */
+ BLOCK_VOLUME_CONCAT = 2, /* concatenation of multiple volumes */
+ BLOCK_VOLUME_STRIPE = 3, /* striped across multiple volumes */
+ BLOCK_VOLUME_PSEUDO = 4,
+};
+
+/* All disk offset/lengths are stored in 512-byte sectors */
+struct bl_volume {
+ uint32_t bv_type;
+ off_t bv_size;
+ struct bl_volume **bv_vols;
+ int bv_vol_n;
+ union {
+ dev_t bv_dev; /*for BLOCK_VOLUME_SIMPLE(PSEUDO) */
+ off_t bv_stripe_unit; /*for BLOCK_VOLUME_STRIPE(CONCAT) */
+ off_t bv_offset; /*for BLOCK_VOLUME_SLICE */
+ } param;
+};
+
+struct bl_sig_comp {
+ int64_t bs_offset; /* In bytes */
+ uint32_t bs_length; /* In bytes */
+ char *bs_string;
+};
+
+/* Maximum number of signatures components in a simple volume */
+# define BLOCK_MAX_SIG_COMP 16
+
+struct bl_sig {
+ int si_num_comps;
+ struct bl_sig_comp si_comps[BLOCK_MAX_SIG_COMP];
+};
+
+/*
+ * Multipath support: ACTIVE or PSEUDO device is valid,
+ * PASSIVE is a standby for ACTIVE.
+ */
+enum bl_path_state_e {
+ BL_PATH_STATE_PASSIVE = 1,
+ BL_PATH_STATE_ACTIVE = 2,
+ BL_PATH_STATE_PSEUDO = 3,
+};
+
+struct bl_serial {
+ int len;
+ char *data;
+};
+
+struct bl_disk_path {
+ struct bl_disk_path *next;
+ char *full_path;
+ enum bl_path_state_e state;
+};
+
+struct bl_disk {
+ struct bl_disk *next;
+ struct bl_serial *serial;
+ dev_t dev;
+ off_t size;
+ struct bl_disk_path *valid_path;
+ struct bl_disk_path *paths;
+};
+
+struct bl_dev_id {
+ unsigned char type;
+ unsigned char ids;
+ unsigned char reserve;
+ unsigned char len;
+ char data[0];
+};
+
+struct pipefs_hdr {
+ uint32_t msgid;
+ uint8_t type;
+ uint8_t flags;
+ uint16_t totallen; /* length of entire message, including hdr */
+ uint32_t status;
+};
+
+#define BL_DEVICE_UMOUNT 0x0 /* Umount--delete devices */
+#define BL_DEVICE_MOUNT 0x1 /* Mount--create devices */
+#define BL_DEVICE_REQUEST_INIT 0x0 /* Start request */
+#define BL_DEVICE_REQUEST_PROC 0x1 /* User process succeeds */
+#define BL_DEVICE_REQUEST_ERR 0x2 /* User process fails */
+
+uint32_t *blk_overflow(uint32_t * p, uint32_t * end, size_t nbytes);
+
+#define BLK_READBUF(p, e, nbytes) do { \
+ p = blk_overflow(p, e, nbytes); \
+ if (!p) {\
+ goto out_err;\
+ } \
+} while (0)
+
+#define READ32(x) (x) = ntohl(*p++)
+
+#define READ64(x) do { \
+ (x) = (uint64_t)ntohl(*p++) << 32; \
+ (x) |= ntohl(*p++); \
+} while (0)
+
+#define READ_SECTOR(x) do { \
+ READ64(tmp); \
+ if (tmp & 0x1ff) { \
+ goto out_err; \
+ } \
+ (x) = tmp >> 9; \
+} while (0)
+
+extern struct bl_disk *visible_disk_list;
+uint64_t dm_device_create(struct bl_volume *vols, int num_vols);
+int dm_device_remove_all(uint64_t *dev);
+uint64_t process_deviceinfo(const char *dev_addr_buf,
+ unsigned int dev_addr_len,
+ uint32_t *major, uint32_t *minor);
+
+extern ssize_t atomicio(ssize_t(*f) (int, void *, size_t),
+ int fd, void *_s, size_t n);
+extern struct bl_serial *bldev_read_serial(int fd, const char *filename);
+extern void bldev_read_ap_state(int fd, enum bl_path_state_e *ap_state_out);
+extern int bl_discover_devices(void);
+
+#define BL_LOG_WARNING(fmt...) syslog(LOG_WARNING, fmt)
+#define BL_LOG_ERR(fmt...) syslog(LOG_ERR, fmt)
+#define BL_LOG_DEBUG(fmt...) syslog(LOG_DEBUG, fmt)
+#endif
diff --git a/utils/blkmapd/device-inq.c b/utils/blkmapd/device-inq.c
new file mode 100644
index 0000000..ff38fd6
--- /dev/null
+++ b/utils/blkmapd/device-inq.c
@@ -0,0 +1,235 @@
+/*
+ * device-inq.c: inquire SCSI device information.
+ *
+ * Copyright (c) 2010 EMC Corporation, Haiying Tang <Tang_Haiying@emc.com>
+ * All rights reserved.
+ *
+ * This program refers to "SCSI Primary Commands - 3 (SPC-3)
+ * at http://www.t10.org and sg_inq.c in sg3_utils-1.26 for
+ * Linux OS SCSI subsystem, by D. Gilbert.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <dirent.h>
+#include <ctype.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/mount.h>
+#include <sys/select.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <libgen.h>
+#include <errno.h>
+#include <scsi/scsi.h>
+#include <scsi/scsi_ioctl.h>
+#include <scsi/sg.h>
+#include "device-discovery.h"
+
+#define DEF_ALLOC_LEN 255
+#define MX_ALLOC_LEN (0xc000 + 0x80)
+
+struct bl_serial *bl_create_scsi_string(int len, const char *bytes)
+{
+ struct bl_serial *s;
+ s = malloc(sizeof(*s) + len);
+ if (s) {
+ s->data = (char *)&s[1];
+ s->len = len;
+ memcpy(s->data, bytes, len);
+ }
+ return s;
+}
+
+void bl_free_scsi_string(struct bl_serial *str)
+{
+ if (str)
+ free(str);
+}
+
+#define sg_io_ok(io_hdr) \
+ ((((io_hdr).status & 0x7e) == 0) && \
+ ((io_hdr).host_status == 0) && \
+ (((io_hdr).driver_status & 0x0f) == 0))
+
+static int sg_timeout = 1 * 1000;
+
+static int bldev_inquire_page(int fd, int page, char *buffer, int len)
+{
+ unsigned char cmd[] = { INQUIRY, 0, 0, 0, 0, 0 };
+ unsigned char sense_b[28];
+ struct sg_io_hdr io_hdr;
+ if (page >= 0) {
+ cmd[1] = 1;
+ cmd[2] = page;
+ }
+ cmd[3] = (unsigned char)((len >> 8) & 0xff);
+ cmd[4] = (unsigned char)(len & 0xff);
+
+ memset(&io_hdr, 0, sizeof(struct sg_io_hdr));
+ io_hdr.interface_id = 'S';
+ io_hdr.cmd_len = sizeof(cmd);
+ io_hdr.mx_sb_len = sizeof(sense_b);
+ io_hdr.dxfer_direction = SG_DXFER_FROM_DEV;
+ io_hdr.dxfer_len = len;
+ io_hdr.dxferp = buffer;
+ io_hdr.cmdp = cmd;
+ io_hdr.sbp = sense_b;
+ io_hdr.timeout = sg_timeout;
+ if (ioctl(fd, SG_IO, &io_hdr) < 0)
+ return -1;
+
+ if (sg_io_ok(io_hdr))
+ return 0;
+ return -1;
+}
+
+int bldev_inquire_pages(int fd, int page, char **buffer)
+{
+ int status = 0;
+ char *tmp;
+ int len;
+
+ *buffer = calloc(DEF_ALLOC_LEN, sizeof(char));
+ if (!*buffer) {
+ BL_LOG_ERR("%s: Out of memory!\n", __func__);
+ return -ENOMEM;
+ }
+
+ status = bldev_inquire_page(fd, page, *buffer, DEF_ALLOC_LEN);
+ if (status)
+ goto out;
+
+ status = -1;
+ if ((*(*buffer + 1) & 0xff) != page)
+ goto out;
+
+ len = (*(*buffer + 2) << 8) + *(*buffer + 3) + 4;
+ if (len > MX_ALLOC_LEN) {
+ BL_LOG_ERR("SCSI response length too long: %d\n", len);
+ goto out;
+ }
+ if (len > DEF_ALLOC_LEN) {
+ tmp = realloc(*buffer, len);
+ if (!tmp) {
+ BL_LOG_ERR("%s: Out of memory!\n", __func__);
+ status = -ENOMEM;
+ goto out;
+ }
+ *buffer = tmp;
+ status = bldev_inquire_page(fd, page, *buffer, len);
+ if (status)
+ goto out;
+ }
+ status = 0;
+ out:
+ return status;
+}
+
+/* For EMC multipath devices, use VPD page (0xc0) to get status.
+ * For other devices, return ACTIVE for now
+ */
+void bldev_read_ap_state(int fd, enum bl_path_state_e *ap_state_out)
+{
+ int status = 0;
+ char *buffer;
+
+ *ap_state_out = BL_PATH_STATE_ACTIVE;
+
+ status = bldev_inquire_pages(fd, 0xc0, &buffer);
+ if (status)
+ goto out;
+
+ if (buffer[4] < 0x02)
+ *ap_state_out = BL_PATH_STATE_PASSIVE;
+ out:
+ if (buffer)
+ free(buffer);
+ return;
+}
+
+struct bl_serial *bldev_read_serial(int fd, const char *filename)
+{
+ struct bl_serial *serial_out = NULL;
+ int status = 0, pos, len;
+ char *buffer;
+ struct bl_dev_id *dev_root, *dev_id;
+ unsigned int current_id = 0;
+
+ status = bldev_inquire_pages(fd, 0x83, &buffer);
+ if (status)
+ goto out;
+
+ dev_root = (struct bl_dev_id *)buffer;
+
+ pos = 0;
+ current_id = 0;
+ len = dev_root->len;
+ while (pos < (len - sizeof(struct bl_dev_id) + sizeof(unsigned char))) {
+ dev_id = (struct bl_dev_id *)&(dev_root->data[pos]);
+ if ((dev_id->ids & 0xf) < current_id)
+ continue;
+ switch (dev_id->ids & 0xf) {
+ /* We process SCSI ID with four ID cases: 0, 1, 2 and 3.
+ * When more than one ID is available, priority is
+ * 3>2>1>0.
+ */
+ case 2: /* EUI-64 based */
+ if ((dev_id->len != 8) && (dev_id->len != 12) &&
+ (dev_id->len != 16)) {
+ BL_LOG_ERR("EUI-64 only decodes 8, "
+ "12 and 16\n");
+ break;
+ }
+ case 3: /* NAA */
+ /* TODO: NAA validity judgement too complicated,
+ * so just ingore it here.
+ */
+ if ((dev_id->type & 0xf) != 1) {
+ BL_LOG_ERR("Binary code_set expected\n");
+ break;
+ }
+ case 0: /* vendor specific */
+ case 1: /* T10 vendor identification */
+ current_id = dev_id->ids & 0xf;
+ if (serial_out)
+ bl_free_scsi_string(serial_out);
+ serial_out = bl_create_scsi_string(dev_id->len,
+ dev_id->data);
+ break;
+ default:
+ break;
+ }
+ if (current_id == 3)
+ break;
+ pos += (dev_id->len + sizeof(struct bl_dev_id) -
+ sizeof(unsigned char));
+ }
+ out:
+ if (!serial_out)
+ serial_out = bl_create_scsi_string(strlen(filename), filename);
+ if (buffer)
+ free(buffer);
+ return serial_out;
+}
diff --git a/utils/blkmapd/device-process.c b/utils/blkmapd/device-process.c
new file mode 100644
index 0000000..9e91840
--- /dev/null
+++ b/utils/blkmapd/device-process.c
@@ -0,0 +1,394 @@
+/*
+ * device-process.c: detailed processing of device information sent
+ * from kernel.
+ *
+ * Copyright (c) 2006 The Regents of the University of Michigan.
+ * All rights reserved.
+ *
+ * Andy Adamson <andros@citi.umich.edu>
+ * Fred Isaman <iisaman@umich.edu>
+ *
+ * Copyright (c) 2010 EMC Corporation, Haiying Tang <Tang_Haiying@emc.com>
+ *
+ * Used codes in linux/fs/nfs/blocklayout/blocklayoutdev.c.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <libdevmapper.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/user.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <arpa/inet.h>
+#include <linux/kdev_t.h>
+#include "device-discovery.h"
+
+uint32_t *blk_overflow(uint32_t * p, uint32_t * end, size_t nbytes)
+{
+ uint32_t *q = p + ((nbytes + 3) >> 2);
+ if (q > end || q < p)
+ return NULL;
+ return p;
+}
+
+static int decode_blk_signature(uint32_t **pp, uint32_t *end,
+ struct bl_sig *sig)
+{
+ int i, tmp;
+ uint32_t *p = *pp;
+
+ BLK_READBUF(p, end, 4);
+ READ32(sig->si_num_comps);
+ if (sig->si_num_comps == 0) {
+ BL_LOG_ERR("0 components in sig\n");
+ goto out_err;
+ }
+ if (sig->si_num_comps >= BLOCK_MAX_SIG_COMP) {
+ BL_LOG_ERR("number of sig comps %i >= BLOCK_MAX_SIG_COMP\n",
+ sig->si_num_comps);
+ goto out_err;
+ }
+ for (i = 0; i < sig->si_num_comps; i++) {
+ BLK_READBUF(p, end, 12);
+ READ64(sig->si_comps[i].bs_offset);
+ READ32(tmp);
+ sig->si_comps[i].bs_length = tmp;
+ BLK_READBUF(p, end, tmp);
+ /* Note we rely here on fact that sig is used immediately
+ * for mapping, then thrown away.
+ */
+ sig->si_comps[i].bs_string = (char *)p;
+ BL_LOG_ERR("%s: si_comps[%d]: bs_length %d, bs_string %s\n",
+ __func__, i, sig->si_comps[i].bs_length,
+ sig->si_comps[i].bs_string);
+ p += ((tmp + 3) >> 2);
+ }
+ *pp = p;
+ return 0;
+ out_err:
+ return -EIO;
+}
+
+/* Read signature from device
+ * return 0: read successfully
+ * return -1: error
+ */
+int
+read_cmp_blk_sig(const char *dev_name, struct bl_sig_comp *comp,
+ int64_t bs_offset)
+{
+ int fd, ret = -1;
+ char *sig = NULL;
+
+ fd = open(dev_name, O_RDONLY | O_LARGEFILE);
+ if (fd < 0) {
+ BL_LOG_ERR("%s could not be opened for read\n", dev_name);
+ goto error;
+ }
+
+ sig = (char *)malloc(comp->bs_length);
+ if (!sig) {
+ BL_LOG_ERR("%s: Out of memory\n", __func__);
+ goto error;
+ }
+
+ if (lseek64(fd, bs_offset, SEEK_SET) == -1) {
+ BL_LOG_ERR("File %s lseek error\n", dev_name);
+ goto error;
+ }
+
+ if (atomicio(read, fd, sig, comp->bs_length) != comp->bs_length) {
+ BL_LOG_ERR("File %s read error\n", dev_name);
+ goto error;
+ }
+
+ BL_LOG_ERR
+ ("%s: %s sig: %s, bs_string: %s, bs_length: %d, bs_offset: %lld\n",
+ __func__, dev_name, sig, comp->bs_string, comp->bs_length,
+ (long long)bs_offset);
+ ret = memcmp(sig, comp->bs_string, comp->bs_length);
+
+ error:
+ if (sig)
+ free(sig);
+ if (fd >= 0)
+ close(fd);
+ return ret;
+}
+
+/*
+ * All signatures in sig must be found on disk for verification.
+ * Returns True if sig matches, False otherwise.
+ */
+static int verify_sig(struct bl_disk *disk, struct bl_sig *sig)
+{
+ struct bl_sig_comp *comp;
+ int i, ret;
+ int64_t bs_offset;
+
+ for (i = 0; i < sig->si_num_comps; i++) {
+ comp = &sig->si_comps[i];
+ bs_offset = comp->bs_offset;
+ if (bs_offset < 0)
+ bs_offset += (((int64_t) disk->size) << 9);
+ BL_LOG_ERR("%s: bs_offset: %lld\n",
+ __func__, (long long) bs_offset);
+ ret = read_cmp_blk_sig(disk->valid_path->full_path,
+ comp, bs_offset);
+ if (ret)
+ return 0;
+ }
+ return 1;
+}
+
+/*
+ * map_sig_to_device()
+ * Given a signature, walk the list of visible disks searching for
+ * a match. Returns True if mapping was done, False otherwise.
+ *
+ * While we're at it, fill in the vol->bv_size.
+ */
+static int map_sig_to_device(struct bl_sig *sig, struct bl_volume *vol)
+{
+ int mapped = 0;
+ struct bl_disk *disk = visible_disk_list;
+ char *filepath = 0;
+ struct bl_disk *lolDisk = disk;
+
+ while (lolDisk) {
+ BL_LOG_ERR("%s: visible_disk_list: %s\n", __func__,
+ lolDisk->valid_path->full_path);
+ lolDisk = lolDisk->next;
+ }
+
+ /* scan disk list to find out match device */
+ while (disk) {
+ /* FIXME: should we use better algorithm for disk scan? */
+ mapped = verify_sig(disk, sig);
+ if (mapped) {
+ vol->param.bv_dev = disk->dev;
+ filepath = disk->valid_path->full_path;
+ vol->bv_size = disk->size;
+ break;
+ }
+ disk = disk->next;
+ }
+ return mapped;
+}
+
+/* We are given an array of XDR encoded array indices, each of which should
+ * refer to a previously decoded device. Translate into a list of pointers
+ * to the appropriate pnfs_blk_volume's.
+ */
+static int set_vol_array(uint32_t **pp, uint32_t *end,
+ struct bl_volume *vols, int working)
+{
+ int i, index;
+ uint32_t *p = *pp;
+ struct bl_volume **array = vols[working].bv_vols;
+ for (i = 0; i < vols[working].bv_vol_n; i++) {
+ BLK_READBUF(p, end, 4);
+ READ32(index);
+ if ((index < 0) || (index >= working)) {
+ BL_LOG_ERR("set_vol_array: Id %i out of range\n",
+ index);
+ goto out_err;
+ }
+ array[i] = &vols[index];
+ }
+ *pp = p;
+ return 0;
+ out_err:
+ return -EIO;
+}
+
+static uint64_t sum_subvolume_sizes(struct bl_volume *vol)
+{
+ int i;
+ uint64_t sum = 0;
+ for (i = 0; i < vol->bv_vol_n; i++)
+ sum += vol->bv_vols[i]->bv_size;
+ return sum;
+}
+
+static int decode_blk_volume(uint32_t **pp, uint32_t *end,
+ struct bl_volume *vols, int i, int *array_cnt)
+{
+ int status = 0, j;
+ struct bl_sig sig;
+ uint32_t *p = *pp;
+ struct bl_volume *vol = &vols[i];
+ uint64_t tmp, tmp_size;
+ div_t d;
+
+ BLK_READBUF(p, end, 4);
+ READ32(vol->bv_type);
+ switch (vol->bv_type) {
+ case BLOCK_VOLUME_SIMPLE:
+ *array_cnt = 0;
+ status = decode_blk_signature(&p, end, &sig);
+ if (status)
+ return status;
+ status = map_sig_to_device(&sig, vol);
+ if (!status) {
+ BL_LOG_ERR("Could not find disk for device\n");
+ return -ENXIO;
+ }
+ status = 0;
+ break;
+ case BLOCK_VOLUME_SLICE:
+ BLK_READBUF(p, end, 16);
+ READ_SECTOR(vol->param.bv_offset);
+ READ_SECTOR(vol->bv_size);
+ *array_cnt = vol->bv_vol_n = 1;
+ status = set_vol_array(&p, end, vols, i);
+ break;
+ case BLOCK_VOLUME_STRIPE:
+ BLK_READBUF(p, end, 8);
+ READ_SECTOR(vol->param.bv_stripe_unit);
+ off_t chunksize = vol->param.bv_stripe_unit;
+ if ((chunksize == 0) ||
+ ((chunksize & (chunksize - 1)) != 0) ||
+ (chunksize < (PAGE_SIZE >> 9)))
+ return -EIO;
+ BLK_READBUF(p, end, 4);
+ READ32(vol->bv_vol_n);
+ if (!vol->bv_vol_n)
+ return -EIO;
+ *array_cnt = vol->bv_vol_n;
+ status = set_vol_array(&p, end, vols, i);
+ if (status)
+ return status;
+ for (j = 1; j < vol->bv_vol_n; j++) {
+ if (vol->bv_vols[j]->bv_size !=
+ vol->bv_vols[0]->bv_size) {
+ BL_LOG_ERR("varying subvol size\n");
+ return -EIO;
+ }
+ }
+ /* Make sure total size only includes addressable areas */
+ tmp_size = vol->bv_vols[0]->bv_size;
+ d = div(tmp_size, (uint32_t) vol->param.bv_stripe_unit);
+ tmp_size = d.quot;
+ vol->bv_size = tmp_size * vol->param.bv_stripe_unit;
+ break;
+ case BLOCK_VOLUME_CONCAT:
+ BLK_READBUF(p, end, 4);
+ READ32(vol->bv_vol_n);
+ if (!vol->bv_vol_n)
+ return -EIO;
+ *array_cnt = vol->bv_vol_n;
+ status = set_vol_array(&p, end, vols, i);
+ if (status)
+ return status;
+ vol->bv_size = sum_subvolume_sizes(vol);
+ break;
+ default:
+ BL_LOG_ERR("Unknown volume type %i\n", vol->bv_type);
+ out_err:
+ return -EIO;
+ }
+ *pp = p;
+ return status;
+}
+
+uint64_t process_deviceinfo(const char *dev_addr_buf,
+ unsigned int dev_addr_len,
+ uint32_t *major, uint32_t *minor)
+{
+ int num_vols, i, status, count;
+ uint32_t *p, *end;
+ struct bl_volume *vols = NULL, **arrays = NULL, **arrays_ptr = NULL;
+ uint64_t dev = 0;
+ int tried = 0;
+
+ restart:
+ p = (uint32_t *) dev_addr_buf;
+ end = (uint32_t *) ((char *)p + dev_addr_len);
+ /* Decode block volume */
+ BLK_READBUF(p, end, 4);
+ READ32(num_vols);
+ if (num_vols <= 0) {
+ BL_LOG_WARNING("Error: number of vols: %d\n", num_vols);
+ goto out_err;
+ }
+
+ vols = (struct bl_volume *)malloc(num_vols * sizeof(struct bl_volume));
+ if (!vols) {
+ BL_LOG_ERR("%s: Out of memory\n", __func__);
+ goto out_err;
+ }
+
+ /* Each volume in vols array needs its own array. Save time by
+ * allocating them all in one large hunk. Because each volume
+ * array can only reference previous volumes, and because once
+ * a concat or stripe references a volume, it may never be
+ * referenced again, the volume arrays are guaranteed to fit
+ * in the suprisingly small space allocated.
+ */
+ arrays =
+ (struct bl_volume **)malloc(num_vols * 2 *
+ sizeof(struct bl_volume *));
+ if (!arrays) {
+ BL_LOG_ERR("%s: Out of memory\n", __func__);
+ goto out_err;
+ }
+
+ arrays_ptr = arrays;
+
+ for (i = 0; i < num_vols; i++) {
+ vols[i].bv_vols = arrays_ptr;
+ status = decode_blk_volume(&p, end, vols, i, &count);
+ if (status == -ENXIO && (tried <= 5)) {
+ sleep(1);
+ BL_LOG_DEBUG("%s: discover again!\n", __func__);
+ bl_discover_devices();
+ tried++;
+ free(vols);
+ free(arrays);
+ goto restart;
+ }
+ if (status)
+ goto out_err;
+ arrays_ptr += count;
+ }
+
+ if (p != end) {
+ BL_LOG_ERR("p is not equal to end!\n");
+ goto out_err;
+ }
+
+ dev = dm_device_create(vols, num_vols);
+ *major = MAJOR(dev);
+ *minor = MINOR(dev);
+ out_err:
+ if (vols)
+ free(vols);
+ if (arrays)
+ free(arrays);
+ return dev;
+}
diff --git a/utils/blkmapd/dm-device.c b/utils/blkmapd/dm-device.c
new file mode 100644
index 0000000..8162706
--- /dev/null
+++ b/utils/blkmapd/dm-device.c
@@ -0,0 +1,509 @@
+/*
+ * dm-device.c: create or remove device via device mapper API.
+ *
+ * Copyright (c) 2010 EMC Corporation, Haiying Tang <Tang_Haiying@emc.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <libdevmapper.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <linux/kdev_t.h>
+#include "device-discovery.h"
+
+#define DM_DEV_NAME_LEN 256
+
+#ifndef DM_MAX_TYPE_NAME
+#define DM_MAX_TYPE_NAME 16
+#endif
+
+#define DM_PARAMS_LEN 512 /* XXX: is this enough for target? */
+#define DM_DIR "/dev/mapper"
+#define DM_DIR_LEN12
+#define TYPE_HAS_DEV(type) ((type == BLOCK_VOLUME_SIMPLE) || \
+ (type == BLOCK_VOLUME_PSEUDO))
+
+struct bl_dm_table {
+ uint64_t offset;
+ uint64_t size;
+ char target_type[DM_MAX_TYPE_NAME];
+ char params[DM_PARAMS_LEN];
+ struct bl_dm_table *next;
+};
+
+struct bl_dm_tree {
+ uint64_t dev;
+ struct dm_tree *tree;
+ struct bl_dm_tree *next;
+};
+
+static inline struct bl_dm_table *bl_dm_table_alloc(void)
+{
+ return (struct bl_dm_table *)calloc(1, sizeof(struct bl_dm_table));
+}
+
+void bl_dm_table_free(struct bl_dm_table *bl_table_head)
+{
+ struct bl_dm_table *p = bl_table_head;
+ while (bl_table_head) {
+ p = bl_table_head->next;
+ free(bl_table_head);
+ bl_table_head = p;
+ }
+}
+
+void add_to_bl_dm_table(struct bl_dm_table **bl_table_head,
+ struct bl_dm_table *table)
+{
+ struct bl_dm_table *pre;
+ if (!*bl_table_head) {
+ *bl_table_head = table;
+ return;
+ }
+ pre = *bl_table_head;
+ while (pre->next)
+ pre = pre->next;
+ pre->next = table;
+ return;
+}
+
+struct bl_dm_tree *bl_tree_head;
+
+struct bl_dm_tree *find_bl_dm_tree(uint64_t dev)
+{
+ struct bl_dm_tree *p = bl_tree_head;
+ while (p) {
+ if (p->dev == dev)
+ return p;
+ p = p->next;
+ }
+ return NULL;
+}
+
+void del_from_bl_dm_tree(uint64_t dev)
+{
+ struct bl_dm_tree *pre = bl_tree_head;
+ struct bl_dm_tree *p;
+
+ p = pre;
+ while (p) {
+ if (p->dev == dev) {
+ pre->next = p->next;
+ if (p == bl_tree_head)
+ bl_tree_head = bl_tree_head->next;
+ free(p);
+ break;
+ }
+ pre = p;
+ p = pre->next;
+ }
+}
+
+void add_to_bl_dm_tree(struct bl_dm_tree *tree)
+{
+ struct bl_dm_tree *pre;
+ if (!bl_tree_head) {
+ bl_tree_head = tree;
+ return;
+ }
+ pre = bl_tree_head;
+ while (pre->next)
+ pre = pre->next;
+ pre->next = tree;
+ return;
+}
+
+/* Create device via device mapper
+ * return 0 when creation failed
+ * return dev no for created device
+ */
+uint64_t dm_single_device_create(const char *dev_name, struct bl_dm_table * p)
+{
+ struct dm_task *dmt;
+ struct dm_info dminfo;
+ int ret = 0;
+
+ dmt = dm_task_create(DM_DEVICE_CREATE);
+ if (!dmt) {
+ BL_LOG_ERR("Create dm_task for %s failed\n", dev_name);
+ return 0;
+ }
+ ret = dm_task_set_name(dmt, dev_name);
+ if (!ret)
+ goto err_out;
+
+ while (p) {
+ ret = dm_task_add_target(dmt, p->offset, p->size,
+ p->target_type, p->params);
+ if (!ret)
+ goto err_out;
+ p = p->next;
+ }
+
+ ret = dm_task_run(dmt) &&
+ dm_task_get_info(dmt, &dminfo) && dminfo.exists;
+
+ if (!ret)
+ goto err_out;
+
+ dm_task_update_nodes();
+
+ err_out:
+ dm_task_destroy(dmt);
+
+ if (!ret) {
+ BL_LOG_ERR("Create device %s failed\n", dev_name);
+ return 0;
+ }
+ return MKDEV(dminfo.major, dminfo.minor);
+}
+
+int dm_device_remove_byname(const char *dev_name)
+{
+ struct dm_task *dmt;
+ int ret = 0;
+
+ dmt = dm_task_create(DM_DEVICE_REMOVE);
+ if (!dmt)
+ return -ENODEV;
+
+ ret = dm_task_set_name(dmt, dev_name) && dm_task_run(dmt);
+
+ dm_task_update_nodes();
+
+ if (dmt)
+ dm_task_destroy(dmt);
+
+ return ret;
+}
+
+int dm_device_remove(uint64_t dev)
+{
+ struct dm_task *dmt;
+ struct dm_names *dmnames;
+ char *names = NULL;
+ int ret = -1;
+
+ /* Look for dev_name via dev, if dev_name could be transferred here,
+ we could jump to DM_DEVICE_REMOVE directly */
+ dmt = dm_task_create(DM_DEVICE_LIST);
+ if (!dmt) {
+ BL_LOG_ERR("dm_task creation failed\n");
+ return -ENODEV;
+ }
+
+ ret = dm_task_run(dmt);
+ if (!ret) {
+ BL_LOG_ERR("dm_task_run failed\n");
+ goto error;
+ }
+
+ dmnames = dm_task_get_names(dmt);
+ if (!dmnames || !dmnames->dev) {
+ BL_LOG_ERR("dm_task_get_names failed\n");
+ goto error;
+ }
+
+ do {
+ if (dmnames->dev == dev) {
+ names = dmnames->name;
+ break;
+ }
+ dmnames = (void *)dmnames + dmnames->next;
+ } while (dmnames);
+
+ if (!names) {
+ BL_LOG_ERR("Could not find device\n");
+ goto error;
+ }
+
+ dm_task_update_nodes();
+
+ error:
+ dm_task_destroy(dmt);
+
+ /* Start to remove device */
+ if (names)
+ ret = dm_device_remove_byname(names);
+ return ret;
+}
+
+static unsigned long dev_count;
+
+void dm_devicelist_remove(unsigned long start, unsigned long end)
+{
+ char dev_name[DM_DEV_NAME_LEN];
+ unsigned long count;
+
+ if ((start >= dev_count) || (end <= 1) || (start >= end - 1))
+ return;
+
+ for (count = end - 1; count > start; count--) {
+ sprintf(dev_name, "pnfs_vol_%lu", count - 1);
+ dm_device_remove_byname(dev_name);
+ }
+
+ return;
+}
+
+void bl_dm_remove_tree(uint64_t dev)
+{
+ struct bl_dm_tree *p;
+
+ p = find_bl_dm_tree(dev);
+ if (!p)
+ return;
+
+ dm_tree_free(p->tree);
+ del_from_bl_dm_tree(dev);
+}
+
+void bl_dm_create_tree(uint64_t dev)
+{
+ struct dm_tree *tree;
+ struct bl_dm_tree *bl_tree;
+
+ bl_tree = find_bl_dm_tree(dev);
+ if (bl_tree)
+ return; /* XXX: error? */
+
+ tree = dm_tree_create();
+ if (!tree)
+ return;
+
+ if (!dm_tree_add_dev(tree, MAJOR(dev), MINOR(dev))) {
+ dm_tree_free(tree);
+ return;
+ }
+
+ bl_tree = malloc(sizeof(struct bl_dm_tree));
+ if (!bl_tree) {
+ dm_tree_free(tree);
+ return;
+ }
+
+ bl_tree->dev = dev;
+ bl_tree->tree = tree;
+ bl_tree->next = NULL;
+ add_to_bl_dm_tree(bl_tree);
+
+ return;
+}
+
+uint64_t dm_device_nametodev(char *dev_name)
+{
+ struct dm_task *dmt;
+ int ret = 0;
+ struct dm_info dminfo;
+
+ dmt = dm_task_create(DM_DEVICE_INFO);
+ if (!dmt)
+ return -ENODEV;
+
+ ret = dm_task_set_name(dmt, dev_name) &&
+ dm_task_run(dmt) && dm_task_get_info(dmt, &dminfo);
+
+ if (dmt)
+ dm_task_destroy(dmt);
+
+ if (!ret)
+ return 0;
+
+ return MKDEV(dminfo.major, dminfo.minor);
+}
+
+int dm_device_remove_all(uint64_t *dev)
+{
+ struct bl_dm_tree *p;
+ struct dm_tree_node *node;
+ const char *uuid;
+ int ret = 0;
+ uint32_t major, minor;
+ uint64_t bl_dev;
+
+ memcpy(&major, dev, sizeof(uint32_t));
+ memcpy(&minor, (void *)dev + sizeof(uint32_t), sizeof(uint32_t));
+ bl_dev = MKDEV(major, minor);
+ p = find_bl_dm_tree(bl_dev);
+ if (!p)
+ return ret;
+
+ node = dm_tree_find_node(p->tree, MAJOR(bl_dev), MINOR(bl_dev));
+ if (!node)
+ return ret;
+
+ uuid = dm_tree_node_get_uuid(node);
+ if (!uuid)
+ return ret;
+
+ dm_device_remove(bl_dev);
+ ret = dm_tree_deactivate_children(node, uuid, strlen(uuid));
+ dm_task_update_nodes();
+ bl_dm_remove_tree(bl_dev);
+ return ret;
+}
+
+/* TODO: check the value for DM_DEV_NAME_LEN, DM_TYPE_LEN, DM_PARAMS_LEN */
+uint64_t dm_device_create(struct bl_volume *vols, int num_vols)
+{
+ uint64_t size, dev = 0;
+ unsigned long count = dev_count;
+ int number = 0, i, pos;
+ struct bl_volume *node;
+ char *tmp;
+ struct bl_dm_table *table = NULL;
+ struct bl_dm_table *bl_table_head = NULL;
+ unsigned int len;
+ char *dev_name = NULL;
+ /* Create pseudo device here */
+ while (number < num_vols) {
+ node = &vols[number];
+ switch (node->bv_type) {
+ case BLOCK_VOLUME_SIMPLE:
+ /* Do not need to create device here */
+ dev = node->param.bv_dev;
+ goto continued;
+ case BLOCK_VOLUME_SLICE:
+ table = bl_dm_table_alloc();
+ if (!table)
+ goto out;
+ table->offset = 0;
+ table->size = node->bv_size;
+ strcpy(table->target_type, "linear");
+ if (!TYPE_HAS_DEV(node->bv_vols[0]->bv_type)) {
+ free(table);
+ goto out;
+ }
+ dev = node->bv_vols[0]->param.bv_dev;
+ tmp = table->params;
+ if (!dm_format_dev(tmp, DM_PARAMS_LEN,
+ MAJOR(dev), MINOR(dev))) {
+ free(table);
+ goto out;
+ }
+ tmp += strlen(tmp);
+ sprintf(tmp, " %lu", node->param.bv_offset);
+ add_to_bl_dm_table(&bl_table_head, table);
+ break;
+ case BLOCK_VOLUME_STRIPE:
+ table = bl_dm_table_alloc();
+ if (!table)
+ goto out;
+ table->offset = 0;
+ table->size = node->bv_size;
+ strcpy(table->target_type, "striped");
+ sprintf(table->params, "%d %lu %n", node->bv_vol_n,
+ node->param.bv_stripe_unit, &pos);
+ /* Repeatedly copy subdev to params */
+ tmp = table->params + pos;
+ len = DM_PARAMS_LEN - pos;
+ for (i = 0; i < node->bv_vol_n; i++) {
+ if (!TYPE_HAS_DEV(node->bv_vols[i]->bv_type)) {
+ free(table);
+ goto out;
+ }
+ dev = node->bv_vols[i]->param.bv_dev;
+ if (!dm_format_dev(tmp, len, MAJOR(dev),
+ MINOR(dev))) {
+ free(table);
+ goto out;
+ }
+ pos = strlen(tmp);
+ tmp += pos;
+ len -= pos;
+ sprintf(tmp, " %d ", 0);
+ tmp += 3;
+ len -= 3;
+ }
+ add_to_bl_dm_table(&bl_table_head, table);
+ break;
+ case BLOCK_VOLUME_CONCAT:
+ size = 0;
+ for (i = 0; i < node->bv_vol_n; i++) {
+ table = bl_dm_table_alloc();
+ if (!table)
+ goto out;
+ table->offset = size;
+ table->size = node->bv_vols[i]->bv_size;
+ if (!TYPE_HAS_DEV(node->bv_vols[i]->bv_type)) {
+ free(table);
+ goto out;
+ }
+ strcpy(table->target_type, "linear");
+ tmp = table->params;
+ dev = node->bv_vols[i]->param.bv_dev;
+ if (!dm_format_dev(tmp, DM_PARAMS_LEN,
+ MAJOR(dev), MINOR(dev))) {
+ free(table);
+ goto out;
+ }
+ tmp += strlen(tmp);
+ sprintf(tmp, " %d", 0);
+ size += table->size;
+ add_to_bl_dm_table(&bl_table_head, table);
+ }
+ break;
+ default:
+ /* Delete previous temporary devices */
+ dm_devicelist_remove(count, dev_count);
+ goto out;
+ } /* end of swtich */
+ /* Create dev_name here. Name of device is pnfs_vol_XXX */
+ if (dev_name)
+ free(dev_name);
+ dev_name = (char *)calloc(DM_DEV_NAME_LEN, sizeof(char));
+ if (!dev_name) {
+ BL_LOG_ERR("%s: Out of memory\n", __func__);
+ goto out;
+ }
+ sprintf(dev_name, "pnfs_vol_%lu", dev_count++);
+
+ dev = dm_single_device_create(dev_name, bl_table_head);
+ if (!dev) {
+ /* Delete previous temporary devices */
+ dm_devicelist_remove(count, dev_count);
+ goto out;
+ }
+ node->param.bv_dev = dev;
+ /* TODO: extend use with PSEUDO later */
+ node->bv_type = BLOCK_VOLUME_PSEUDO;
+ continued:
+ number++;
+ if (bl_table_head)
+ bl_dm_table_free(bl_table_head);
+ bl_table_head = NULL;
+ }
+ out:
+ if (bl_table_head)
+ bl_dm_table_free(bl_table_head);
+ bl_table_head = NULL;
+ if (dev)
+ bl_dm_create_tree(dev);
+ if (dev_name)
+ free(dev_name);
+ return dev;
+}
diff --git a/utils/blkmapd/etc/blkmapd.conf b/utils/blkmapd/etc/blkmapd.conf
new file mode 100644
index 0000000..da70d94
--- /dev/null
+++ b/utils/blkmapd/etc/blkmapd.conf
@@ -0,0 +1,10 @@
+# This is an example config file
+
+# Look at all /dev/sd* devices
+# /dev/sd or /dev/sd*
+/dev/sd*
+
+# Look at all /dev/mapper/* devices
+# /dev/mapper/* or
+# /dev/mapper/
+/dev/mapper/*
diff --git a/utils/blkmapd/etc/initd/initd.redhat b/utils/blkmapd/etc/initd/initd.redhat
new file mode 100644
index 0000000..d6a77e8
--- /dev/null
+++ b/utils/blkmapd/etc/initd/initd.redhat
@@ -0,0 +1,76 @@
+#!/bin/sh
+#
+# description: Starts and stops the iSCSI initiator
+#
+# processname: blkmapd
+# pidfile: /var/run/blkmapd.pid
+# config: /etc/blkmapd.conf
+
+# Source function library.
+if [ -f /etc/init.d/functions ] ; then
+ . /etc/init.d/functions
+elif [ -f /etc/rc.d/init.d/functions ] ; then
+ . /etc/rc.d/init.d/functions
+else
+ exit 0
+fi
+
+PATH=/sbin:/bin:/usr/sbin:/usr/bin
+
+RETVAL=0
+
+start()
+{
+ echo -n $"Starting pNFS block-layout device discovery service: "
+ modprobe -q blocklayoutdriver
+ daemon /usr/sbin/blkmapd
+ RETVAL=$?
+ if [ $RETVAL -eq 0 ]; then
+ touch /var/lock/subsys/blkmapd
+ fi
+ echo
+ return $RETVAL
+}
+
+stop()
+{
+ echo -n $"Stopping pNFS block-layout device discovery service: "
+ killproc blkmapd 2> /dev/null
+ rm -f /var/run/blkmapd.pid
+ RETVAL=$?
+ [ $RETVAL -eq 0 ] && rm -f /var/lock/subsys/blkmapd
+ if [ $RETVAL -eq 0 ]; then
+ echo_success
+ else
+ echo_failure
+ fi
+ echo
+ return $RETVAL
+}
+
+restart()
+{
+ stop
+ start
+}
+
+case "$1" in
+ start)
+ start
+ ;;
+ stop)
+ stop
+ ;;
+ restart)
+ stop
+ start
+ ;;
+ status)
+ status blkmapd
+ ;;
+ *)
+ echo $"Usage: $0 {start|stop|restart|status}"
+ exit 1
+esac
+
+exit $RETVAL
--
1.7.0.4
^ permalink raw reply related [flat|nested] 5+ messages in thread[parent not found: <20100811194253.GA11453-8f4Pc2RrbJmHXe+LvDLADg@public.gmane.org>]
* Re: [PATCH] Add complex block layout discovery and mapping daemon [not found] ` <20100811194253.GA11453-8f4Pc2RrbJmHXe+LvDLADg@public.gmane.org> @ 2010-08-12 13:42 ` Benny Halevy 2010-08-12 13:44 ` Benny Halevy 0 siblings, 1 reply; 5+ messages in thread From: Benny Halevy @ 2010-08-12 13:42 UTC (permalink / raw) To: Jim Rees; +Cc: linux-nfs Thanks! merged. Benny On Aug. 11, 2010, 22:42 +0300, Jim Rees <rees@umich.edu> wrote: > This is a replacement for the patch I sent 21 July, incorporating feedback > from list members. Thank you for your comments. > > I have tried to settle on "blkmapd" as the name and use it consistently for > the executable, service name, syslog, etc. > > I did not change atomicio.c. That's because this is a copy of the file by > the same name in both idmapd and spnfsd. There is a patch in the works to > move this to the support library. I think the right thing to do is move > that patch forward, then fix atomicio. > > Signed-off-by: Jim Rees <rees@umich.edu> > --- > configure.ac | 4 + > utils/Makefile.am | 4 + > utils/blkmapd/Makefile.am | 63 +++++ > utils/blkmapd/atomicio.c | 54 ++++ > utils/blkmapd/cfg.c | 248 +++++++++++++++++ > utils/blkmapd/cfg.h | 47 +++ > utils/blkmapd/device-discovery.c | 502 +++++++++++++++++++++++++++++++++ > utils/blkmapd/device-discovery.h | 162 +++++++++++ > utils/blkmapd/device-inq.c | 235 ++++++++++++++++ > utils/blkmapd/device-process.c | 394 ++++++++++++++++++++++++++ > utils/blkmapd/dm-device.c | 509 ++++++++++++++++++++++++++++++++++ > utils/blkmapd/etc/blkmapd.conf | 10 + > utils/blkmapd/etc/initd/initd.redhat | 76 +++++ > 13 files changed, 2308 insertions(+), 0 deletions(-) > create mode 100644 utils/blkmapd/Makefile.am > create mode 100644 utils/blkmapd/atomicio.c > create mode 100644 utils/blkmapd/cfg.c > create mode 100644 utils/blkmapd/cfg.h > create mode 100644 utils/blkmapd/device-discovery.c > create mode 100644 utils/blkmapd/device-discovery.h > create mode 100644 utils/blkmapd/device-inq.c > create mode 100644 utils/blkmapd/device-process.c > create mode 100644 utils/blkmapd/dm-device.c > create mode 100644 utils/blkmapd/etc/blkmapd.conf > create mode 100644 utils/blkmapd/etc/initd/initd.redhat > > diff --git a/configure.ac b/configure.ac > index 4d12715..f57cd45 100644 > --- a/configure.ac > +++ b/configure.ac > @@ -64,12 +64,15 @@ AC_ARG_ENABLE(nfsv4, > enable_nfsv4=yes) > if test "$enable_nfsv4" = yes; then > AC_DEFINE(NFS4_SUPPORTED, 1, [Define this if you want NFSv4 support compiled in]) > + BLKMAPD=blkmapd > IDMAPD=idmapd > SPNFSD=spnfsd > else > enable_nfsv4= > + BLKMAPD= > IDMAPD= > fi > + AC_SUBST(BLKMAPD) > AC_SUBST(IDMAPD) > AC_SUBST(enable_nfsv4) > AM_CONDITIONAL(CONFIG_NFSV4, [test "$enable_nfsv4" = "yes"]) > @@ -429,6 +432,7 @@ AC_CONFIG_FILES([ > tools/mountstats/Makefile > tools/nfs-iostat/Makefile > utils/Makefile > + utils/blkmapd/Makefile > utils/exportfs/Makefile > utils/gssd/Makefile > utils/idmapd/Makefile > diff --git a/utils/Makefile.am b/utils/Makefile.am > index c777d21..c33835a 100644 > --- a/utils/Makefile.am > +++ b/utils/Makefile.am > @@ -10,6 +10,10 @@ if CONFIG_NFSV4 > OPTDIRS += spnfsd > endif > > +if CONFIG_NFSV4 > +OPTDIRS += blkmapd > +endif > + > if CONFIG_GSS > OPTDIRS += gssd > endif > diff --git a/utils/blkmapd/Makefile.am b/utils/blkmapd/Makefile.am > new file mode 100644 > index 0000000..81cc420 > --- /dev/null > +++ b/utils/blkmapd/Makefile.am > @@ -0,0 +1,63 @@ > +## Process this file with automake to produce Makefile.in > + > +#man8_MANS = blkmapd.man > + > +AM_CFLAGS += -D_LARGEFILE64_SOURCE > +KPREFIX = @kprefix@ > +sbin_PROGRAMS = blkmapd > + > +blkmapd_SOURCES = \ > + atomicio.c \ > + cfg.c \ > + device-discovery.c \ > + device-inq.c \ > + device-process.c \ > + dm-device.c \ > + \ > + cfg.h \ > + device-discovery.h > + > +blkmapd_LDADD = -ldevmapper ../../support/nfs/libnfs.a > + > +MAINTAINERCLEANFILES = Makefile.in > + > +####################################################################### > +# The following allows the current practice of having > +# daemons renamed during the install to include RPCPREFIX > +# and the KPREFIX > +# This could all be done much easier with program_transform_name > +# ( program_transform_name = s/^/$(RPCPREFIX)$(KPREFIX)/ ) > +# but that also renames the man pages, which the current > +# practice does not do. > +install-exec-hook: > + (cd $(DESTDIR)$(sbindir) && \ > + for p in $(sbin_PROGRAMS); do \ > + mv -f $$p$(EXEEXT) $(RPCPREFIX)$(KPREFIX)$$p$(EXEEXT) ;\ > + done) > +uninstall-hook: > + (cd $(DESTDIR)$(sbindir) && \ > + for p in $(sbin_PROGRAMS); do \ > + rm -f $(RPCPREFIX)$(KPREFIX)$$p$(EXEEXT) ;\ > + done) > + > + > +# XXX This makes some assumptions about what automake does. > +# XXX But there is no install-man-hook or install-man-local. > +install-man: install-man8 install-man-links > +uninstall-man: uninstall-man8 uninstall-man-links > + > +install-man-links: > + (cd $(DESTDIR)$(man8dir) && \ > + for m in $(man8_MANS) $(dist_man8_MANS) $(nodist_man8_MANS); do \ > + inst=`echo $$m | sed -e 's/man$$/8/'`; \ > + rm -f $(RPCPREFIX)$$inst ; \ > + $(LN_S) $$inst $(RPCPREFIX)$$inst ; \ > + done) > + > +uninstall-man-links: > + (cd $(DESTDIR)$(man8dir) && \ > + for m in $(man8_MANS) $(dist_man8_MANS) $(nodist_man8_MANS); do \ > + inst=`echo $$m | sed -e 's/man$$/8/'`; \ > + rm -f $(RPCPREFIX)$$inst ; \ > + done) > + > diff --git a/utils/blkmapd/atomicio.c b/utils/blkmapd/atomicio.c > new file mode 100644 > index 0000000..8db626e > --- /dev/null > +++ b/utils/blkmapd/atomicio.c > @@ -0,0 +1,54 @@ > +/* > + * Copyright (c) 2002 Marius Aamodt Eriksen <marius@monkey.org> > + * Copyright (c) 1995,1999 Theo de Raadt. All rights reserved. > + * All rights reserved. > + * > + * Redistribution and use in source and binary forms, with or without > + * modification, are permitted provided that the following conditions > + * are met: > + * 1. Redistributions of source code must retain the above copyright > + * notice, this list of conditions and the following disclaimer. > + * 2. Redistributions in binary form must reproduce the above copyright > + * notice, this list of conditions and the following disclaimer in the > + * documentation and/or other materials provided with the distribution. > + * > + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR > + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES > + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. > + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, > + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT > + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF > + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > + */ > + > +#include <sys/types.h> > +#include <unistd.h> > +#include <errno.h> > + > +/* > + * ensure all of data on socket comes through. f==read || f==write > + */ > +ssize_t atomicio(ssize_t(*f) (int, void *, size_t), int fd, void *_s, size_t n) > +{ > + char *s = _s; > + ssize_t res, pos = 0; > + > + while (n > pos) { > + res = (f) (fd, s + pos, n - pos); > + switch (res) { > + case -1: > + if (errno == EINTR || errno == EAGAIN) > + continue; > + case 0: > + if (pos != 0) > + return pos; > + return res; > + default: > + pos += res; > + } > + } > + return pos; > +} > diff --git a/utils/blkmapd/cfg.c b/utils/blkmapd/cfg.c > new file mode 100644 > index 0000000..dab9d0f > --- /dev/null > +++ b/utils/blkmapd/cfg.c > @@ -0,0 +1,248 @@ > +/* > + * Copyright (c) 2010 EMC Corporation, Haiying Tang <Tang_Haiying@emc.com> > + * All rights reserved. > + * > + * Redistribution and use in source and binary forms, with or without > + * modification, are permitted provided that the following conditions > + * are met: > + * 1. Redistributions of source code must retain the above copyright > + * notice, this list of conditions and the following disclaimer. > + * 2. Redistributions in binary form must reproduce the above copyright > + * notice, this list of conditions and the following disclaimer in the > + * documentation and/or other materials provided with the distribution. > + * > + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR > + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES > + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. > + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, > + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT > + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF > + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > + */ > + > +#include <sys/param.h> > +#include <sys/stat.h> > +#include <linux/errno.h> > +#include <stdio.h> > +#include <stdlib.h> > +#include <unistd.h> > +#include <string.h> > +#include <fcntl.h> > +#include <ctype.h> > + > +#include "device-discovery.h" > +#include "cfg.h" > + > +char *conf_path = "/etc/blkmapd.conf"; > + > +struct scan_root_list *scan_root_list_head; > + > +void bl_release_list(void) > +{ > + struct scan_root_list *root = scan_root_list_head; > + struct scan_device_list *disk; > + > + while (root) { > + disk = root->disk; > + while (disk) { > + root->disk = disk->next; > + free(disk->name); > + free(disk); > + disk = root->disk; > + } > + scan_root_list_head = root->next; > + free(root->name); > + free(root); > + root = scan_root_list_head; > + } > +} > + > +struct scan_root_list *bl_alloc_root_list(char *name, int all_disk) > +{ > + struct scan_root_list *root; > + > + root = malloc(sizeof(struct scan_root_list)); > + if (!root) > + goto nomem; > + > + root->name = strdup(name); > + if (!root->name) > + goto nomem; > + root->next = scan_root_list_head; > + root->all_disk = all_disk; > + scan_root_list_head = root; > + return root; > + > + nomem: > + BL_LOG_ERR("%s: Out of memory!\n", __func__); > + if (root) > + free(root); > + return NULL; > +} > + > +struct scan_device_list *bl_alloc_device_list(struct scan_root_list *root, > + char *name) > +{ > + struct scan_device_list *device; > + > + device = malloc(sizeof(struct scan_device_list)); > + if (!device) > + goto nomem; > + > + device->name = strdup(name); > + if (!device->name) > + goto nomem; > + device->next = root->disk; > + root->disk = device; > + return device; > + > + nomem: > + BL_LOG_ERR("%s: Out of memory!\n", __func__); > + if (device) > + free(device); > + return NULL; > +} > + > +struct scan_device_list *bl_insert_device_list(struct scan_root_list *root, > + char *name) > +{ > + struct scan_device_list *device = root->disk; > + > + /* Check whether this device has been inserted */ > + while (device) { > + if (device->name && !strcmp(device->name, name)) > + return device; > + device = device->next; > + } > + > + return bl_alloc_device_list(root, name); > +} > + > +struct scan_root_list *bl_insert_root_list(char *name, int all_disk) > +{ > + struct scan_root_list *root = scan_root_list_head; > + > + /* Check whether this root has been inserted */ > + while (root) { > + if (root->name && !strcmp(root->name, name)) > + return root; > + root = root->next; > + } > + > + return bl_alloc_root_list(name, all_disk); > +} > + > +int bl_parse_line(char *line, struct scan_root_list **bl_root) > +{ > + char *root, *device, *end; > + > + root = strdup(line); > + end = root + strlen(line); > + > + /* Skip comments */ > + if (*root == '#') > + return 0; > + > + /* Trim leading space */ > + while (*root != '\0' && isspace(*root)) > + root++; > + if (*root == '\0') > + return 0; > + > + /* Trim trailing space and set "end" to last char */ > + while ((isspace(*end) || (*end == '\0')) && (end > root)) > + end--; > + > + /* For lines ending with '/' or '/','*': add as a dir root */ > + if ((*end == '/') || > + ((*end == '*') && (end - root >= 1) && (*(end - 1) == '/'))) { > + if (*end == '*') > + end--; > + if (*end == '/') > + end--; > + *(end + 1) = '\0'; > + *bl_root = bl_insert_root_list(root, 1); > + return 0; > + } > + > + /* Other lines: add as a device */ > + device = end; > + while ((*device != '/') && (device > root)) > + device--; > + if (device == root) { > + BL_LOG_ERR("%s: invalid config line\n", __func__); > + return -1; > + } > + *device = '\0'; > + *bl_root = bl_insert_root_list(root, 0); > + if (*bl_root == NULL) > + return -ENOMEM; > + if (*end == '*') > + end--; > + *(end + 1) = '\0'; > + if (bl_insert_device_list(*bl_root, device + 1) == NULL) > + return -ENOMEM; > + > + return 0; > +} > + > +int bl_set_default_conf(void) > +{ > + struct scan_root_list *root = NULL; > + int rv; > + > + bl_release_list(); > + rv = bl_parse_line("/dev/sd*", &root); > + if (rv < 0) > + return rv; > + rv = bl_parse_line("/dev/mapper/", &root); > + return rv; > +} > + > +int bl_parse_conf(char *buf) > +{ > + char *tmp = buf, *line = buf, *end = buf + strlen(buf); > + struct scan_root_list *bl_root = NULL; > + int rv; > + > + while (tmp < end) { > + if (*tmp == '\n') { > + *tmp = '\0'; > + rv = bl_parse_line(line, &bl_root); > + if (rv < 0) > + return rv; > + line = tmp + 1; > + } > + tmp++; > + } > + > + return 0; > +} > + > +int bl_cfg_init(void) > +{ > + struct scan_root_list *root = NULL; > + FILE *f = NULL; > + char buf[PATH_MAX]; > + int rv = 0; > + > + f = fopen(conf_path, "r"); > + if (f == NULL) > + rv = bl_set_default_conf(); > + else { > + while (fgets(buf, sizeof buf, f) != NULL) { > + rv = bl_parse_line(buf, &root); > + if (rv < 0) > + break; > + } > + } > + if (!scan_root_list_head) > + rv = -EINVAL; > + > + if (f) > + fclose(f); > + return rv; > +} > diff --git a/utils/blkmapd/cfg.h b/utils/blkmapd/cfg.h > new file mode 100644 > index 0000000..b9bf930 > --- /dev/null > +++ b/utils/blkmapd/cfg.h > @@ -0,0 +1,47 @@ > +/* > + * bl-cfg.h > + * > + * Copyright (c) 2010 EMC Corporation, Haiying Tang <Tang_Haiying@emc.com> > + * All rights reserved. > + * > + * Redistribution and use in source and binary forms, with or without > + * modification, are permitted provided that the following conditions > + * are met: > + * 1. Redistributions of source code must retain the above copyright > + * notice, this list of conditions and the following disclaimer. > + * 2. Redistributions in binary form must reproduce the above copyright > + * notice, this list of conditions and the following disclaimer in the > + * documentation and/or other materials provided with the distribution. > + * > + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR > + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES > + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. > + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, > + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT > + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF > + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > + */ > +#ifndef BL_CFG_H > +#define BL_CFG_H > + > +extern char *conf_path; > +extern struct scan_root_list *scan_root_list_head; > + > +struct scan_device_list { > + struct scan_device_list *next; > + char *name; > +}; > + > +struct scan_root_list { > + struct scan_root_list *next; > + unsigned int all_disk; > + char *name; > + struct scan_device_list *disk; > +}; > + > +int bl_cfg_init(void); > + > +#endif > diff --git a/utils/blkmapd/device-discovery.c b/utils/blkmapd/device-discovery.c > new file mode 100644 > index 0000000..f42ddc8 > --- /dev/null > +++ b/utils/blkmapd/device-discovery.c > @@ -0,0 +1,502 @@ > +/* > + * device-discovery.c: main function, discovering device and processing > + * pipe request from kernel. > + * > + * Copyright (c) 2010 EMC Corporation, Haiying Tang <Tang_Haiying@emc.com> > + * All rights reserved. > + * > + * Redistribution and use in source and binary forms, with or without > + * modification, are permitted provided that the following conditions > + * are met: > + * 1. Redistributions of source code must retain the above copyright > + * notice, this list of conditions and the following disclaimer. > + * 2. Redistributions in binary form must reproduce the above copyright > + * notice, this list of conditions and the following disclaimer in the > + * documentation and/or other materials provided with the distribution. > + * > + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR > + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES > + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. > + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, > + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT > + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF > + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > + */ > + > +#include <stdlib.h> > +#include <stdio.h> > +#include <string.h> > +#include <dirent.h> > +#include <ctype.h> > +#include <linux/kdev_t.h> > +#include <sys/types.h> > +#include <sys/stat.h> > +#include <sys/ioctl.h> > +#include <sys/mount.h> > +#include <sys/select.h> > +#include <fcntl.h> > +#include <unistd.h> > +#include <libgen.h> > +#include <errno.h> > +#include <scsi/scsi.h> > +#include <scsi/scsi_ioctl.h> > +#include <scsi/sg.h> > +#include "device-discovery.h" > +#include "cfg.h" > + > +#define BL_PIPE_FILE "/var/lib/nfs/rpc_pipefs/bl_device_pipe" > +#define PID_FILE "/var/run/blkmapd.pid" > + > +struct bl_disk *visible_disk_list; > + > +struct bl_disk_path *bl_get_path(const char *filepath, > + struct bl_disk_path *paths) > +{ > + struct bl_disk_path *tmp = paths; > + while (tmp) { > + if (!strcmp(tmp->full_path, filepath)) > + break; > + tmp = tmp->next; > + } > + return tmp; > +} > + > +/* Check whether valid_path is a substring(partition) of path */ > +int bl_is_partition(struct bl_disk_path *valid_path, struct bl_disk_path *path) > +{ > + if (!strncmp(valid_path->full_path, path->full_path, > + strlen(valid_path->full_path))) > + return 1; > + > + return 0; > +} > + > +/* > + * For multipath devices, devices state could be PASSIVE/ACTIVE/PSEUDO, > + * where PSEUDO > ACTIVE > PASSIVE. Device with highest state is used to > + * create pseudo device. So if state is higher, the device path needs to > + * be updated. > + * If device-mapper multipath support is a must, pseudo devices should > + * exist for each multipath device. If not, active device path will be > + * chosen for device creation. > + * Treat partition as invalid path. > + */ > +int bl_update_path(struct bl_disk_path *path, enum bl_path_state_e state, > + struct bl_disk *disk) > +{ > + struct bl_disk_path *valid_path = disk->valid_path; > + > + if (valid_path) { > + if (valid_path->state >= state) { > + if (bl_is_partition(valid_path, path)) > + return 0; > + } > + } > + return 1; > +} > + > +void bl_release_disk(void) > +{ > + struct bl_disk *disk; > + struct bl_disk_path *path = NULL; > + > + while (visible_disk_list) { > + disk = visible_disk_list; > + path = disk->paths; > + while (path) { > + disk->paths = path->next; > + free(path->full_path); > + free(path); > + path = disk->paths; > + } > + if (disk->serial) > + free(disk->serial); > + visible_disk_list = disk->next; > + free(disk); > + } > +} > + > +void bl_add_disk(char *filepath) > +{ > + struct bl_disk *disk = NULL; > + int fd = 0; > + struct stat sb; > + off_t size = 0; > + struct bl_serial *serial = NULL; > + enum bl_path_state_e ap_state = BL_PATH_STATE_PASSIVE; > + struct bl_disk_path *diskpath = NULL, *path = NULL; > + dev_t dev; > + > + BL_LOG_ERR("%s: %s\n", __func__, filepath); > + > + fd = open(filepath, O_RDONLY | O_LARGEFILE); > + if (fd < 0) > + return; > + > + if (fstat(fd, &sb)) { > + close(fd); > + return; > + } > + > + if (!sb.st_size) > + ioctl(fd, BLKGETSIZE, &size); > + else > + size = sb.st_size; > + > + if (!size) { > + close(fd); > + return; > + } > + > + dev = sb.st_rdev; > + serial = bldev_read_serial(fd, filepath); > + > + for (disk = visible_disk_list; disk != NULL; disk = disk->next) { > + /* Already scanned or a partition? > + * XXX: if released each time, maybe not need to compare > + */ > + if ((serial->len == disk->serial->len) && > + !memcmp(serial->data, disk->serial->data, serial->len)) { > + diskpath = bl_get_path(filepath, disk->paths); > + break; > + } > + } > + > + if (disk && diskpath) { > + close(fd); > + return; > + } > + > + bldev_read_ap_state(fd, &ap_state); > + close(fd); > + > + /* > + * Not sure how to identify a pseudo device created by > + * device-mapper, so leave /dev/mapper for now. > + */ > + if (strncmp(filepath, "/dev/mapper", 11) == 0) > + ap_state = BL_PATH_STATE_PSEUDO; > + > + /* add path */ > + path = malloc(sizeof(struct bl_disk_path)); > + if (!path) { > + BL_LOG_ERR("%s: Out of memory!\n", __func__); > + goto out_err; > + } > + path->next = NULL; > + path->state = ap_state; > + path->full_path = strdup(filepath); > + if (!path->full_path) > + goto out_err; > + > + if (!disk) { /* add disk */ > + disk = malloc(sizeof(struct bl_disk)); > + if (!disk) { > + BL_LOG_ERR("%s: Out of memory!\n", __func__); > + goto out_err; > + } > + disk->next = visible_disk_list; > + disk->dev = dev; > + disk->size = size; > + disk->serial = serial; > + disk->valid_path = path; > + disk->paths = path; > + visible_disk_list = disk; > + } else { > + path->next = disk->paths; > + disk->paths = path; > + /* check whether we need to update disk info */ > + if (bl_update_path(path, path->state, disk)) { > + disk->dev = dev; > + disk->size = size; > + disk->valid_path = path; > + } > + } > + return; > + > + out_err: > + if (path) { > + if (path->full_path) > + free(path->full_path); > + free(path); > + } > + return; > +} > + > +void bl_devicescan(const char *filename, struct scan_root_list *root) > +{ > + /* scan all disks */ > + char filepath[PATH_MAX]; > + struct scan_device_list *device; > + > + if (!strcmp(filename, ".") || !strcmp(filename, "..")) > + return; > + > + memset(filepath, 0, sizeof(filepath)); > + if (strlen(filename) < (PATH_MAX - strlen(root->name) - 2)) > + sprintf(filepath, "%s/%s", root->name, filename); > + else { > + BL_LOG_ERR("%s: name too long\n", __func__); > + return; > + } > + if (root->all_disk) > + goto valid; > + > + device = root->disk; > + while (device) { > + /* If device->name is a subset of filename, this disk should be > + * valid for scanning. > + * For example, device->name is "sd", filename is "sda". > + */ > + if (device->name > + && !memcmp(filename, device->name, strlen(device->name))) > + goto valid; > + device = device->next; > + } > + > + return; > + > + valid: > + /* > + * sg device is not a real device, but a device created according > + * to each scsi device. It won't be used for pseudo device creation. > + * I moved it here, so that sg devices will not be scanned. > + */ > + if (!strncmp(filepath, "/dev/sg", 7)) > + return; > + bl_add_disk(filepath); > + return; > +} > + > +int bl_discover_devices(void) > +{ > + DIR *dir; > + struct dirent *dp; > + struct scan_root_list *root = scan_root_list_head; > + > + /* release previous list */ > + bl_release_disk(); > + > + /* scan all disks */ > + while (root) { > + dir = opendir(root->name); > + if (dir == NULL) { > + root = root->next; > + continue; > + } > + > + while ((dp = readdir(dir)) != NULL) > + bl_devicescan(dp->d_name, root); > + > + root = root->next; > + closedir(dir); > + } > + > + return 0; > +} > + > +/* process kernel request > + * return 0: request processed, and no more request waiting; > + * return 1: request processed, and more requests waiting; > + * return < 0: error > + */ > +int bl_disk_inquiry_process(int fd) > +{ > + int ret = 0; > + struct pipefs_hdr *head = NULL, *tmp; > + char *buf = NULL; > + uint32_t major, minor; > + uint16_t buflen; > + unsigned int len = 0; > + > + head = calloc(1, sizeof(struct pipefs_hdr)); > + if (!head) { > + BL_LOG_ERR("%s: Out of memory!\n", __func__); > + return -ENOMEM; > + } > + > + /* read request */ > + if (atomicio(read, fd, head, sizeof(*head)) != sizeof(*head)) { > + /* Note that an error in this or the next read is pretty > + * catastrophic, as there is no good way to resync into > + * the pipe's stream. > + */ > + BL_LOG_ERR("Read pipefs head error!\n"); > + ret = -EIO; > + goto out; > + } > + > + buflen = head->totallen - sizeof(*head); > + buf = malloc(buflen); > + if (!buf) { > + BL_LOG_ERR("%s: Out of memory!\n", __func__); > + ret = -ENOMEM; > + goto out; > + } > + > + if (atomicio(read, fd, buf, buflen) != buflen) { > + BL_LOG_ERR("Read pipefs content error!\n"); > + ret = -EIO; > + goto out; > + } > + > + head->status = BL_DEVICE_REQUEST_PROC; > + switch (head->type) { > + case BL_DEVICE_MOUNT: > + if (!process_deviceinfo(buf, buflen, &major, &minor)) { > + head->status = BL_DEVICE_REQUEST_ERR; > + goto out; > + } > + tmp = realloc(head, sizeof(major) + sizeof(minor) + > + sizeof(struct pipefs_hdr)); > + if (!tmp) { > + BL_LOG_ERR("%s: Out of memory!\n", __func__); > + ret = -ENOMEM; > + goto out; > + } > + head = tmp; > + memcpy((void *)head + sizeof(struct pipefs_hdr), > + &major, sizeof(major)); > + memcpy((void *)head + sizeof(struct pipefs_hdr) + sizeof(major), > + &minor, sizeof(minor)); > + len = sizeof(major) + sizeof(minor); > + break; > + case BL_DEVICE_UMOUNT: > + if (!dm_device_remove_all((uint64_t *) buf)) > + head->status = BL_DEVICE_REQUEST_ERR; > + bl_discover_devices(); > + break; > + default: > + head->status = BL_DEVICE_REQUEST_ERR; > + } > + > + head->totallen = sizeof(struct pipefs_hdr) + len; > + /* write to pipefs */ > + if (atomicio((void *)write, fd, head, head->totallen) > + != head->totallen) { > + BL_LOG_ERR("Write pipefs error!\n"); > + ret = -EIO; > + } > + > + out: > + if (buf) > + free(buf); > + if (head) > + free(head); > + return ret; > +} > + > +/* TODO: set bl_process_stop to 1 in command */ > +unsigned int bl_process_stop; > + > +int bl_run_disk_inquiry_process(int fd) > +{ > + fd_set rset; > + struct timeval tv; > + int ret; > + > + bl_process_stop = 0; > + > + for (;;) { > + if (bl_process_stop) > + return 1; > + FD_ZERO(&rset); > + FD_SET(fd, &rset); > + ret = 0; > + tv.tv_sec = BL_DEVICE_DISCOVERY_INTERVAL; > + switch (select(fd + 1, &rset, NULL, NULL, &tv)) { > + case -1: > + if (errno == EINTR) > + continue; > + else { > + ret = -errno; > + goto out; > + } > + case 0: > + goto out; > + default: > + if (FD_ISSET(fd, &rset)) > + ret = bl_disk_inquiry_process(fd); > + } > + } > + out: > + return ret; > +} > + > +/* Daemon */ > +int main(int argc, char **argv) > +{ > + int fd, opt, fg = 0, ret = 1; > + struct stat statbuf; > + char pidbuf[64]; > + > + while ((opt = getopt(argc, argv, "c:f")) != -1) { > + switch (opt) { > + case 'c': > + conf_path = optarg; > + break; > + case 'f': > + fg = 1; > + break; > + } > + } > + > + if (!stat(PID_FILE, &statbuf)) { > + fprintf(stderr, "Pid file already existed\n"); > + return -1; > + } > + > + if (!fg && daemon(0, 0) != 0) { > + fprintf(stderr, "Daemonize failed\n"); > + return -1; > + } > + > + openlog("blkmapd", LOG_PID, 0); > + fd = open(PID_FILE, O_WRONLY | O_CREAT, 0644); > + if (fd < 0) { > + BL_LOG_ERR("Create pid file failed\n"); > + return -1; > + } > + > + if (lockf(fd, F_TLOCK, 0) < 0) { > + BL_LOG_ERR("Lock pid file failed\n"); > + close(fd); > + return -1; > + } > + ftruncate(fd, 0); > + sprintf(pidbuf, "%d\n", getpid()); > + write(fd, pidbuf, strlen(pidbuf)); > + > + /* open pipe file */ > + fd = open(BL_PIPE_FILE, O_RDWR); > + if (fd < 0) { > + BL_LOG_ERR("open pipe file error\n"); > + return -1; > + } > + > + ret = bl_cfg_init(); > + if (ret < 0) { > + if (ret == -ENOENT) > + BL_LOG_WARNING("Config file not exist, use default\n"); > + else { > + BL_LOG_ERR("Open/read Block pNFS config file error\n"); > + return -1; > + } > + } > + > + while (1) { > + /* discover device when needed */ > + bl_discover_devices(); > + > + ret = bl_run_disk_inquiry_process(fd); > + if (ret < 0) { > + /* what should we do with process error? */ > + BL_LOG_ERR("inquiry process return %d\n", ret); > + } > + } > + close(fd); > + return ret; > +} > diff --git a/utils/blkmapd/device-discovery.h b/utils/blkmapd/device-discovery.h > new file mode 100644 > index 0000000..9f87ebe > --- /dev/null > +++ b/utils/blkmapd/device-discovery.h > @@ -0,0 +1,162 @@ > +/* > + * bl-device-discovery.h > + * > + * Copyright (c) 2010 EMC Corporation, Haiying Tang <Tang_Haiying@emc.com> > + * All rights reserved. > + * > + * Redistribution and use in source and binary forms, with or without > + * modification, are permitted provided that the following conditions > + * are met: > + * 1. Redistributions of source code must retain the above copyright > + * notice, this list of conditions and the following disclaimer. > + * 2. Redistributions in binary form must reproduce the above copyright > + * notice, this list of conditions and the following disclaimer in the > + * documentation and/or other materials provided with the distribution. > + * > + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR > + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES > + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. > + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, > + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT > + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF > + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > + */ > +#ifndef BL_DEVICE_DISCOVERY_H > +#define BL_DEVICE_DISCOVERY_H > + > +#define BL_DEVICE_DISCOVERY_INTERVAL 60 > + > +#include <stdint.h> > +#include <syslog.h> > + > +enum blk_vol_type { > + BLOCK_VOLUME_SIMPLE = 0, /* maps to a single LU */ > + BLOCK_VOLUME_SLICE = 1, /* slice of another volume */ > + BLOCK_VOLUME_CONCAT = 2, /* concatenation of multiple volumes */ > + BLOCK_VOLUME_STRIPE = 3, /* striped across multiple volumes */ > + BLOCK_VOLUME_PSEUDO = 4, > +}; > + > +/* All disk offset/lengths are stored in 512-byte sectors */ > +struct bl_volume { > + uint32_t bv_type; > + off_t bv_size; > + struct bl_volume **bv_vols; > + int bv_vol_n; > + union { > + dev_t bv_dev; /*for BLOCK_VOLUME_SIMPLE(PSEUDO) */ > + off_t bv_stripe_unit; /*for BLOCK_VOLUME_STRIPE(CONCAT) */ > + off_t bv_offset; /*for BLOCK_VOLUME_SLICE */ > + } param; > +}; > + > +struct bl_sig_comp { > + int64_t bs_offset; /* In bytes */ > + uint32_t bs_length; /* In bytes */ > + char *bs_string; > +}; > + > +/* Maximum number of signatures components in a simple volume */ > +# define BLOCK_MAX_SIG_COMP 16 > + > +struct bl_sig { > + int si_num_comps; > + struct bl_sig_comp si_comps[BLOCK_MAX_SIG_COMP]; > +}; > + > +/* > + * Multipath support: ACTIVE or PSEUDO device is valid, > + * PASSIVE is a standby for ACTIVE. > + */ > +enum bl_path_state_e { > + BL_PATH_STATE_PASSIVE = 1, > + BL_PATH_STATE_ACTIVE = 2, > + BL_PATH_STATE_PSEUDO = 3, > +}; > + > +struct bl_serial { > + int len; > + char *data; > +}; > + > +struct bl_disk_path { > + struct bl_disk_path *next; > + char *full_path; > + enum bl_path_state_e state; > +}; > + > +struct bl_disk { > + struct bl_disk *next; > + struct bl_serial *serial; > + dev_t dev; > + off_t size; > + struct bl_disk_path *valid_path; > + struct bl_disk_path *paths; > +}; > + > +struct bl_dev_id { > + unsigned char type; > + unsigned char ids; > + unsigned char reserve; > + unsigned char len; > + char data[0]; > +}; > + > +struct pipefs_hdr { > + uint32_t msgid; > + uint8_t type; > + uint8_t flags; > + uint16_t totallen; /* length of entire message, including hdr */ > + uint32_t status; > +}; > + > +#define BL_DEVICE_UMOUNT 0x0 /* Umount--delete devices */ > +#define BL_DEVICE_MOUNT 0x1 /* Mount--create devices */ > +#define BL_DEVICE_REQUEST_INIT 0x0 /* Start request */ > +#define BL_DEVICE_REQUEST_PROC 0x1 /* User process succeeds */ > +#define BL_DEVICE_REQUEST_ERR 0x2 /* User process fails */ > + > +uint32_t *blk_overflow(uint32_t * p, uint32_t * end, size_t nbytes); > + > +#define BLK_READBUF(p, e, nbytes) do { \ > + p = blk_overflow(p, e, nbytes); \ > + if (!p) {\ > + goto out_err;\ > + } \ > +} while (0) > + > +#define READ32(x) (x) = ntohl(*p++) > + > +#define READ64(x) do { \ > + (x) = (uint64_t)ntohl(*p++) << 32; \ > + (x) |= ntohl(*p++); \ > +} while (0) > + > +#define READ_SECTOR(x) do { \ > + READ64(tmp); \ > + if (tmp & 0x1ff) { \ > + goto out_err; \ > + } \ > + (x) = tmp >> 9; \ > +} while (0) > + > +extern struct bl_disk *visible_disk_list; > +uint64_t dm_device_create(struct bl_volume *vols, int num_vols); > +int dm_device_remove_all(uint64_t *dev); > +uint64_t process_deviceinfo(const char *dev_addr_buf, > + unsigned int dev_addr_len, > + uint32_t *major, uint32_t *minor); > + > +extern ssize_t atomicio(ssize_t(*f) (int, void *, size_t), > + int fd, void *_s, size_t n); > +extern struct bl_serial *bldev_read_serial(int fd, const char *filename); > +extern void bldev_read_ap_state(int fd, enum bl_path_state_e *ap_state_out); > +extern int bl_discover_devices(void); > + > +#define BL_LOG_WARNING(fmt...) syslog(LOG_WARNING, fmt) > +#define BL_LOG_ERR(fmt...) syslog(LOG_ERR, fmt) > +#define BL_LOG_DEBUG(fmt...) syslog(LOG_DEBUG, fmt) > +#endif > diff --git a/utils/blkmapd/device-inq.c b/utils/blkmapd/device-inq.c > new file mode 100644 > index 0000000..ff38fd6 > --- /dev/null > +++ b/utils/blkmapd/device-inq.c > @@ -0,0 +1,235 @@ > +/* > + * device-inq.c: inquire SCSI device information. > + * > + * Copyright (c) 2010 EMC Corporation, Haiying Tang <Tang_Haiying@emc.com> > + * All rights reserved. > + * > + * This program refers to "SCSI Primary Commands - 3 (SPC-3) > + * at http://www.t10.org and sg_inq.c in sg3_utils-1.26 for > + * Linux OS SCSI subsystem, by D. Gilbert. > + * > + * Redistribution and use in source and binary forms, with or without > + * modification, are permitted provided that the following conditions > + * are met: > + * 1. Redistributions of source code must retain the above copyright > + * notice, this list of conditions and the following disclaimer. > + * 2. Redistributions in binary form must reproduce the above copyright > + * notice, this list of conditions and the following disclaimer in the > + * documentation and/or other materials provided with the distribution. > + * > + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR > + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES > + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. > + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, > + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT > + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF > + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > + */ > +#include <stdlib.h> > +#include <stdio.h> > +#include <string.h> > +#include <dirent.h> > +#include <ctype.h> > +#include <sys/types.h> > +#include <sys/stat.h> > +#include <sys/ioctl.h> > +#include <sys/mount.h> > +#include <sys/select.h> > +#include <fcntl.h> > +#include <unistd.h> > +#include <libgen.h> > +#include <errno.h> > +#include <scsi/scsi.h> > +#include <scsi/scsi_ioctl.h> > +#include <scsi/sg.h> > +#include "device-discovery.h" > + > +#define DEF_ALLOC_LEN 255 > +#define MX_ALLOC_LEN (0xc000 + 0x80) > + > +struct bl_serial *bl_create_scsi_string(int len, const char *bytes) > +{ > + struct bl_serial *s; > + s = malloc(sizeof(*s) + len); > + if (s) { > + s->data = (char *)&s[1]; > + s->len = len; > + memcpy(s->data, bytes, len); > + } > + return s; > +} > + > +void bl_free_scsi_string(struct bl_serial *str) > +{ > + if (str) > + free(str); > +} > + > +#define sg_io_ok(io_hdr) \ > + ((((io_hdr).status & 0x7e) == 0) && \ > + ((io_hdr).host_status == 0) && \ > + (((io_hdr).driver_status & 0x0f) == 0)) > + > +static int sg_timeout = 1 * 1000; > + > +static int bldev_inquire_page(int fd, int page, char *buffer, int len) > +{ > + unsigned char cmd[] = { INQUIRY, 0, 0, 0, 0, 0 }; > + unsigned char sense_b[28]; > + struct sg_io_hdr io_hdr; > + if (page >= 0) { > + cmd[1] = 1; > + cmd[2] = page; > + } > + cmd[3] = (unsigned char)((len >> 8) & 0xff); > + cmd[4] = (unsigned char)(len & 0xff); > + > + memset(&io_hdr, 0, sizeof(struct sg_io_hdr)); > + io_hdr.interface_id = 'S'; > + io_hdr.cmd_len = sizeof(cmd); > + io_hdr.mx_sb_len = sizeof(sense_b); > + io_hdr.dxfer_direction = SG_DXFER_FROM_DEV; > + io_hdr.dxfer_len = len; > + io_hdr.dxferp = buffer; > + io_hdr.cmdp = cmd; > + io_hdr.sbp = sense_b; > + io_hdr.timeout = sg_timeout; > + if (ioctl(fd, SG_IO, &io_hdr) < 0) > + return -1; > + > + if (sg_io_ok(io_hdr)) > + return 0; > + return -1; > +} > + > +int bldev_inquire_pages(int fd, int page, char **buffer) > +{ > + int status = 0; > + char *tmp; > + int len; > + > + *buffer = calloc(DEF_ALLOC_LEN, sizeof(char)); > + if (!*buffer) { > + BL_LOG_ERR("%s: Out of memory!\n", __func__); > + return -ENOMEM; > + } > + > + status = bldev_inquire_page(fd, page, *buffer, DEF_ALLOC_LEN); > + if (status) > + goto out; > + > + status = -1; > + if ((*(*buffer + 1) & 0xff) != page) > + goto out; > + > + len = (*(*buffer + 2) << 8) + *(*buffer + 3) + 4; > + if (len > MX_ALLOC_LEN) { > + BL_LOG_ERR("SCSI response length too long: %d\n", len); > + goto out; > + } > + if (len > DEF_ALLOC_LEN) { > + tmp = realloc(*buffer, len); > + if (!tmp) { > + BL_LOG_ERR("%s: Out of memory!\n", __func__); > + status = -ENOMEM; > + goto out; > + } > + *buffer = tmp; > + status = bldev_inquire_page(fd, page, *buffer, len); > + if (status) > + goto out; > + } > + status = 0; > + out: > + return status; > +} > + > +/* For EMC multipath devices, use VPD page (0xc0) to get status. > + * For other devices, return ACTIVE for now > + */ > +void bldev_read_ap_state(int fd, enum bl_path_state_e *ap_state_out) > +{ > + int status = 0; > + char *buffer; > + > + *ap_state_out = BL_PATH_STATE_ACTIVE; > + > + status = bldev_inquire_pages(fd, 0xc0, &buffer); > + if (status) > + goto out; > + > + if (buffer[4] < 0x02) > + *ap_state_out = BL_PATH_STATE_PASSIVE; > + out: > + if (buffer) > + free(buffer); > + return; > +} > + > +struct bl_serial *bldev_read_serial(int fd, const char *filename) > +{ > + struct bl_serial *serial_out = NULL; > + int status = 0, pos, len; > + char *buffer; > + struct bl_dev_id *dev_root, *dev_id; > + unsigned int current_id = 0; > + > + status = bldev_inquire_pages(fd, 0x83, &buffer); > + if (status) > + goto out; > + > + dev_root = (struct bl_dev_id *)buffer; > + > + pos = 0; > + current_id = 0; > + len = dev_root->len; > + while (pos < (len - sizeof(struct bl_dev_id) + sizeof(unsigned char))) { > + dev_id = (struct bl_dev_id *)&(dev_root->data[pos]); > + if ((dev_id->ids & 0xf) < current_id) > + continue; > + switch (dev_id->ids & 0xf) { > + /* We process SCSI ID with four ID cases: 0, 1, 2 and 3. > + * When more than one ID is available, priority is > + * 3>2>1>0. > + */ > + case 2: /* EUI-64 based */ > + if ((dev_id->len != 8) && (dev_id->len != 12) && > + (dev_id->len != 16)) { > + BL_LOG_ERR("EUI-64 only decodes 8, " > + "12 and 16\n"); > + break; > + } > + case 3: /* NAA */ > + /* TODO: NAA validity judgement too complicated, > + * so just ingore it here. > + */ > + if ((dev_id->type & 0xf) != 1) { > + BL_LOG_ERR("Binary code_set expected\n"); > + break; > + } > + case 0: /* vendor specific */ > + case 1: /* T10 vendor identification */ > + current_id = dev_id->ids & 0xf; > + if (serial_out) > + bl_free_scsi_string(serial_out); > + serial_out = bl_create_scsi_string(dev_id->len, > + dev_id->data); > + break; > + default: > + break; > + } > + if (current_id == 3) > + break; > + pos += (dev_id->len + sizeof(struct bl_dev_id) - > + sizeof(unsigned char)); > + } > + out: > + if (!serial_out) > + serial_out = bl_create_scsi_string(strlen(filename), filename); > + if (buffer) > + free(buffer); > + return serial_out; > +} > diff --git a/utils/blkmapd/device-process.c b/utils/blkmapd/device-process.c > new file mode 100644 > index 0000000..9e91840 > --- /dev/null > +++ b/utils/blkmapd/device-process.c > @@ -0,0 +1,394 @@ > +/* > + * device-process.c: detailed processing of device information sent > + * from kernel. > + * > + * Copyright (c) 2006 The Regents of the University of Michigan. > + * All rights reserved. > + * > + * Andy Adamson <andros@citi.umich.edu> > + * Fred Isaman <iisaman@umich.edu> > + * > + * Copyright (c) 2010 EMC Corporation, Haiying Tang <Tang_Haiying@emc.com> > + * > + * Used codes in linux/fs/nfs/blocklayout/blocklayoutdev.c. > + * > + * Redistribution and use in source and binary forms, with or without > + * modification, are permitted provided that the following conditions > + * are met: > + * 1. Redistributions of source code must retain the above copyright > + * notice, this list of conditions and the following disclaimer. > + * 2. Redistributions in binary form must reproduce the above copyright > + * notice, this list of conditions and the following disclaimer in the > + * documentation and/or other materials provided with the distribution. > + * > + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR > + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES > + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. > + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, > + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT > + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF > + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > + */ > + > +#include <libdevmapper.h> > +#include <stdio.h> > +#include <stdlib.h> > +#include <string.h> > +#include <unistd.h> > +#include <sys/types.h> > +#include <sys/stat.h> > +#include <sys/user.h> > +#include <fcntl.h> > +#include <errno.h> > +#include <arpa/inet.h> > +#include <linux/kdev_t.h> > +#include "device-discovery.h" > + > +uint32_t *blk_overflow(uint32_t * p, uint32_t * end, size_t nbytes) > +{ > + uint32_t *q = p + ((nbytes + 3) >> 2); > + if (q > end || q < p) > + return NULL; > + return p; > +} > + > +static int decode_blk_signature(uint32_t **pp, uint32_t *end, > + struct bl_sig *sig) > +{ > + int i, tmp; > + uint32_t *p = *pp; > + > + BLK_READBUF(p, end, 4); > + READ32(sig->si_num_comps); > + if (sig->si_num_comps == 0) { > + BL_LOG_ERR("0 components in sig\n"); > + goto out_err; > + } > + if (sig->si_num_comps >= BLOCK_MAX_SIG_COMP) { > + BL_LOG_ERR("number of sig comps %i >= BLOCK_MAX_SIG_COMP\n", > + sig->si_num_comps); > + goto out_err; > + } > + for (i = 0; i < sig->si_num_comps; i++) { > + BLK_READBUF(p, end, 12); > + READ64(sig->si_comps[i].bs_offset); > + READ32(tmp); > + sig->si_comps[i].bs_length = tmp; > + BLK_READBUF(p, end, tmp); > + /* Note we rely here on fact that sig is used immediately > + * for mapping, then thrown away. > + */ > + sig->si_comps[i].bs_string = (char *)p; > + BL_LOG_ERR("%s: si_comps[%d]: bs_length %d, bs_string %s\n", > + __func__, i, sig->si_comps[i].bs_length, > + sig->si_comps[i].bs_string); > + p += ((tmp + 3) >> 2); > + } > + *pp = p; > + return 0; > + out_err: > + return -EIO; > +} > + > +/* Read signature from device > + * return 0: read successfully > + * return -1: error > + */ > +int > +read_cmp_blk_sig(const char *dev_name, struct bl_sig_comp *comp, > + int64_t bs_offset) > +{ > + int fd, ret = -1; > + char *sig = NULL; > + > + fd = open(dev_name, O_RDONLY | O_LARGEFILE); > + if (fd < 0) { > + BL_LOG_ERR("%s could not be opened for read\n", dev_name); > + goto error; > + } > + > + sig = (char *)malloc(comp->bs_length); > + if (!sig) { > + BL_LOG_ERR("%s: Out of memory\n", __func__); > + goto error; > + } > + > + if (lseek64(fd, bs_offset, SEEK_SET) == -1) { > + BL_LOG_ERR("File %s lseek error\n", dev_name); > + goto error; > + } > + > + if (atomicio(read, fd, sig, comp->bs_length) != comp->bs_length) { > + BL_LOG_ERR("File %s read error\n", dev_name); > + goto error; > + } > + > + BL_LOG_ERR > + ("%s: %s sig: %s, bs_string: %s, bs_length: %d, bs_offset: %lld\n", > + __func__, dev_name, sig, comp->bs_string, comp->bs_length, > + (long long)bs_offset); > + ret = memcmp(sig, comp->bs_string, comp->bs_length); > + > + error: > + if (sig) > + free(sig); > + if (fd >= 0) > + close(fd); > + return ret; > +} > + > +/* > + * All signatures in sig must be found on disk for verification. > + * Returns True if sig matches, False otherwise. > + */ > +static int verify_sig(struct bl_disk *disk, struct bl_sig *sig) > +{ > + struct bl_sig_comp *comp; > + int i, ret; > + int64_t bs_offset; > + > + for (i = 0; i < sig->si_num_comps; i++) { > + comp = &sig->si_comps[i]; > + bs_offset = comp->bs_offset; > + if (bs_offset < 0) > + bs_offset += (((int64_t) disk->size) << 9); > + BL_LOG_ERR("%s: bs_offset: %lld\n", > + __func__, (long long) bs_offset); > + ret = read_cmp_blk_sig(disk->valid_path->full_path, > + comp, bs_offset); > + if (ret) > + return 0; > + } > + return 1; > +} > + > +/* > + * map_sig_to_device() > + * Given a signature, walk the list of visible disks searching for > + * a match. Returns True if mapping was done, False otherwise. > + * > + * While we're at it, fill in the vol->bv_size. > + */ > +static int map_sig_to_device(struct bl_sig *sig, struct bl_volume *vol) > +{ > + int mapped = 0; > + struct bl_disk *disk = visible_disk_list; > + char *filepath = 0; > + struct bl_disk *lolDisk = disk; > + > + while (lolDisk) { > + BL_LOG_ERR("%s: visible_disk_list: %s\n", __func__, > + lolDisk->valid_path->full_path); > + lolDisk = lolDisk->next; > + } > + > + /* scan disk list to find out match device */ > + while (disk) { > + /* FIXME: should we use better algorithm for disk scan? */ > + mapped = verify_sig(disk, sig); > + if (mapped) { > + vol->param.bv_dev = disk->dev; > + filepath = disk->valid_path->full_path; > + vol->bv_size = disk->size; > + break; > + } > + disk = disk->next; > + } > + return mapped; > +} > + > +/* We are given an array of XDR encoded array indices, each of which should > + * refer to a previously decoded device. Translate into a list of pointers > + * to the appropriate pnfs_blk_volume's. > + */ > +static int set_vol_array(uint32_t **pp, uint32_t *end, > + struct bl_volume *vols, int working) > +{ > + int i, index; > + uint32_t *p = *pp; > + struct bl_volume **array = vols[working].bv_vols; > + for (i = 0; i < vols[working].bv_vol_n; i++) { > + BLK_READBUF(p, end, 4); > + READ32(index); > + if ((index < 0) || (index >= working)) { > + BL_LOG_ERR("set_vol_array: Id %i out of range\n", > + index); > + goto out_err; > + } > + array[i] = &vols[index]; > + } > + *pp = p; > + return 0; > + out_err: > + return -EIO; > +} > + > +static uint64_t sum_subvolume_sizes(struct bl_volume *vol) > +{ > + int i; > + uint64_t sum = 0; > + for (i = 0; i < vol->bv_vol_n; i++) > + sum += vol->bv_vols[i]->bv_size; > + return sum; > +} > + > +static int decode_blk_volume(uint32_t **pp, uint32_t *end, > + struct bl_volume *vols, int i, int *array_cnt) > +{ > + int status = 0, j; > + struct bl_sig sig; > + uint32_t *p = *pp; > + struct bl_volume *vol = &vols[i]; > + uint64_t tmp, tmp_size; > + div_t d; > + > + BLK_READBUF(p, end, 4); > + READ32(vol->bv_type); > + switch (vol->bv_type) { > + case BLOCK_VOLUME_SIMPLE: > + *array_cnt = 0; > + status = decode_blk_signature(&p, end, &sig); > + if (status) > + return status; > + status = map_sig_to_device(&sig, vol); > + if (!status) { > + BL_LOG_ERR("Could not find disk for device\n"); > + return -ENXIO; > + } > + status = 0; > + break; > + case BLOCK_VOLUME_SLICE: > + BLK_READBUF(p, end, 16); > + READ_SECTOR(vol->param.bv_offset); > + READ_SECTOR(vol->bv_size); > + *array_cnt = vol->bv_vol_n = 1; > + status = set_vol_array(&p, end, vols, i); > + break; > + case BLOCK_VOLUME_STRIPE: > + BLK_READBUF(p, end, 8); > + READ_SECTOR(vol->param.bv_stripe_unit); > + off_t chunksize = vol->param.bv_stripe_unit; > + if ((chunksize == 0) || > + ((chunksize & (chunksize - 1)) != 0) || > + (chunksize < (PAGE_SIZE >> 9))) > + return -EIO; > + BLK_READBUF(p, end, 4); > + READ32(vol->bv_vol_n); > + if (!vol->bv_vol_n) > + return -EIO; > + *array_cnt = vol->bv_vol_n; > + status = set_vol_array(&p, end, vols, i); > + if (status) > + return status; > + for (j = 1; j < vol->bv_vol_n; j++) { > + if (vol->bv_vols[j]->bv_size != > + vol->bv_vols[0]->bv_size) { > + BL_LOG_ERR("varying subvol size\n"); > + return -EIO; > + } > + } > + /* Make sure total size only includes addressable areas */ > + tmp_size = vol->bv_vols[0]->bv_size; > + d = div(tmp_size, (uint32_t) vol->param.bv_stripe_unit); > + tmp_size = d.quot; > + vol->bv_size = tmp_size * vol->param.bv_stripe_unit; > + break; > + case BLOCK_VOLUME_CONCAT: > + BLK_READBUF(p, end, 4); > + READ32(vol->bv_vol_n); > + if (!vol->bv_vol_n) > + return -EIO; > + *array_cnt = vol->bv_vol_n; > + status = set_vol_array(&p, end, vols, i); > + if (status) > + return status; > + vol->bv_size = sum_subvolume_sizes(vol); > + break; > + default: > + BL_LOG_ERR("Unknown volume type %i\n", vol->bv_type); > + out_err: > + return -EIO; > + } > + *pp = p; > + return status; > +} > + > +uint64_t process_deviceinfo(const char *dev_addr_buf, > + unsigned int dev_addr_len, > + uint32_t *major, uint32_t *minor) > +{ > + int num_vols, i, status, count; > + uint32_t *p, *end; > + struct bl_volume *vols = NULL, **arrays = NULL, **arrays_ptr = NULL; > + uint64_t dev = 0; > + int tried = 0; > + > + restart: > + p = (uint32_t *) dev_addr_buf; > + end = (uint32_t *) ((char *)p + dev_addr_len); > + /* Decode block volume */ > + BLK_READBUF(p, end, 4); > + READ32(num_vols); > + if (num_vols <= 0) { > + BL_LOG_WARNING("Error: number of vols: %d\n", num_vols); > + goto out_err; > + } > + > + vols = (struct bl_volume *)malloc(num_vols * sizeof(struct bl_volume)); > + if (!vols) { > + BL_LOG_ERR("%s: Out of memory\n", __func__); > + goto out_err; > + } > + > + /* Each volume in vols array needs its own array. Save time by > + * allocating them all in one large hunk. Because each volume > + * array can only reference previous volumes, and because once > + * a concat or stripe references a volume, it may never be > + * referenced again, the volume arrays are guaranteed to fit > + * in the suprisingly small space allocated. > + */ > + arrays = > + (struct bl_volume **)malloc(num_vols * 2 * > + sizeof(struct bl_volume *)); > + if (!arrays) { > + BL_LOG_ERR("%s: Out of memory\n", __func__); > + goto out_err; > + } > + > + arrays_ptr = arrays; > + > + for (i = 0; i < num_vols; i++) { > + vols[i].bv_vols = arrays_ptr; > + status = decode_blk_volume(&p, end, vols, i, &count); > + if (status == -ENXIO && (tried <= 5)) { > + sleep(1); > + BL_LOG_DEBUG("%s: discover again!\n", __func__); > + bl_discover_devices(); > + tried++; > + free(vols); > + free(arrays); > + goto restart; > + } > + if (status) > + goto out_err; > + arrays_ptr += count; > + } > + > + if (p != end) { > + BL_LOG_ERR("p is not equal to end!\n"); > + goto out_err; > + } > + > + dev = dm_device_create(vols, num_vols); > + *major = MAJOR(dev); > + *minor = MINOR(dev); > + out_err: > + if (vols) > + free(vols); > + if (arrays) > + free(arrays); > + return dev; > +} > diff --git a/utils/blkmapd/dm-device.c b/utils/blkmapd/dm-device.c > new file mode 100644 > index 0000000..8162706 > --- /dev/null > +++ b/utils/blkmapd/dm-device.c > @@ -0,0 +1,509 @@ > +/* > + * dm-device.c: create or remove device via device mapper API. > + * > + * Copyright (c) 2010 EMC Corporation, Haiying Tang <Tang_Haiying@emc.com> > + * All rights reserved. > + * > + * Redistribution and use in source and binary forms, with or without > + * modification, are permitted provided that the following conditions > + * are met: > + * 1. Redistributions of source code must retain the above copyright > + * notice, this list of conditions and the following disclaimer. > + * 2. Redistributions in binary form must reproduce the above copyright > + * notice, this list of conditions and the following disclaimer in the > + * documentation and/or other materials provided with the distribution. > + * > + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR > + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES > + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. > + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, > + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT > + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF > + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > + */ > +#include <libdevmapper.h> > +#include <stdio.h> > +#include <stdlib.h> > +#include <string.h> > +#include <sys/types.h> > +#include <sys/stat.h> > +#include <fcntl.h> > +#include <errno.h> > +#include <linux/kdev_t.h> > +#include "device-discovery.h" > + > +#define DM_DEV_NAME_LEN 256 > + > +#ifndef DM_MAX_TYPE_NAME > +#define DM_MAX_TYPE_NAME 16 > +#endif > + > +#define DM_PARAMS_LEN 512 /* XXX: is this enough for target? */ > +#define DM_DIR "/dev/mapper" > +#define DM_DIR_LEN12 > +#define TYPE_HAS_DEV(type) ((type == BLOCK_VOLUME_SIMPLE) || \ > + (type == BLOCK_VOLUME_PSEUDO)) > + > +struct bl_dm_table { > + uint64_t offset; > + uint64_t size; > + char target_type[DM_MAX_TYPE_NAME]; > + char params[DM_PARAMS_LEN]; > + struct bl_dm_table *next; > +}; > + > +struct bl_dm_tree { > + uint64_t dev; > + struct dm_tree *tree; > + struct bl_dm_tree *next; > +}; > + > +static inline struct bl_dm_table *bl_dm_table_alloc(void) > +{ > + return (struct bl_dm_table *)calloc(1, sizeof(struct bl_dm_table)); > +} > + > +void bl_dm_table_free(struct bl_dm_table *bl_table_head) > +{ > + struct bl_dm_table *p = bl_table_head; > + while (bl_table_head) { > + p = bl_table_head->next; > + free(bl_table_head); > + bl_table_head = p; > + } > +} > + > +void add_to_bl_dm_table(struct bl_dm_table **bl_table_head, > + struct bl_dm_table *table) > +{ > + struct bl_dm_table *pre; > + if (!*bl_table_head) { > + *bl_table_head = table; > + return; > + } > + pre = *bl_table_head; > + while (pre->next) > + pre = pre->next; > + pre->next = table; > + return; > +} > + > +struct bl_dm_tree *bl_tree_head; > + > +struct bl_dm_tree *find_bl_dm_tree(uint64_t dev) > +{ > + struct bl_dm_tree *p = bl_tree_head; > + while (p) { > + if (p->dev == dev) > + return p; > + p = p->next; > + } > + return NULL; > +} > + > +void del_from_bl_dm_tree(uint64_t dev) > +{ > + struct bl_dm_tree *pre = bl_tree_head; > + struct bl_dm_tree *p; > + > + p = pre; > + while (p) { > + if (p->dev == dev) { > + pre->next = p->next; > + if (p == bl_tree_head) > + bl_tree_head = bl_tree_head->next; > + free(p); > + break; > + } > + pre = p; > + p = pre->next; > + } > +} > + > +void add_to_bl_dm_tree(struct bl_dm_tree *tree) > +{ > + struct bl_dm_tree *pre; > + if (!bl_tree_head) { > + bl_tree_head = tree; > + return; > + } > + pre = bl_tree_head; > + while (pre->next) > + pre = pre->next; > + pre->next = tree; > + return; > +} > + > +/* Create device via device mapper > + * return 0 when creation failed > + * return dev no for created device > + */ > +uint64_t dm_single_device_create(const char *dev_name, struct bl_dm_table * p) > +{ > + struct dm_task *dmt; > + struct dm_info dminfo; > + int ret = 0; > + > + dmt = dm_task_create(DM_DEVICE_CREATE); > + if (!dmt) { > + BL_LOG_ERR("Create dm_task for %s failed\n", dev_name); > + return 0; > + } > + ret = dm_task_set_name(dmt, dev_name); > + if (!ret) > + goto err_out; > + > + while (p) { > + ret = dm_task_add_target(dmt, p->offset, p->size, > + p->target_type, p->params); > + if (!ret) > + goto err_out; > + p = p->next; > + } > + > + ret = dm_task_run(dmt) && > + dm_task_get_info(dmt, &dminfo) && dminfo.exists; > + > + if (!ret) > + goto err_out; > + > + dm_task_update_nodes(); > + > + err_out: > + dm_task_destroy(dmt); > + > + if (!ret) { > + BL_LOG_ERR("Create device %s failed\n", dev_name); > + return 0; > + } > + return MKDEV(dminfo.major, dminfo.minor); > +} > + > +int dm_device_remove_byname(const char *dev_name) > +{ > + struct dm_task *dmt; > + int ret = 0; > + > + dmt = dm_task_create(DM_DEVICE_REMOVE); > + if (!dmt) > + return -ENODEV; > + > + ret = dm_task_set_name(dmt, dev_name) && dm_task_run(dmt); > + > + dm_task_update_nodes(); > + > + if (dmt) > + dm_task_destroy(dmt); > + > + return ret; > +} > + > +int dm_device_remove(uint64_t dev) > +{ > + struct dm_task *dmt; > + struct dm_names *dmnames; > + char *names = NULL; > + int ret = -1; > + > + /* Look for dev_name via dev, if dev_name could be transferred here, > + we could jump to DM_DEVICE_REMOVE directly */ > + dmt = dm_task_create(DM_DEVICE_LIST); > + if (!dmt) { > + BL_LOG_ERR("dm_task creation failed\n"); > + return -ENODEV; > + } > + > + ret = dm_task_run(dmt); > + if (!ret) { > + BL_LOG_ERR("dm_task_run failed\n"); > + goto error; > + } > + > + dmnames = dm_task_get_names(dmt); > + if (!dmnames || !dmnames->dev) { > + BL_LOG_ERR("dm_task_get_names failed\n"); > + goto error; > + } > + > + do { > + if (dmnames->dev == dev) { > + names = dmnames->name; > + break; > + } > + dmnames = (void *)dmnames + dmnames->next; > + } while (dmnames); > + > + if (!names) { > + BL_LOG_ERR("Could not find device\n"); > + goto error; > + } > + > + dm_task_update_nodes(); > + > + error: > + dm_task_destroy(dmt); > + > + /* Start to remove device */ > + if (names) > + ret = dm_device_remove_byname(names); > + return ret; > +} > + > +static unsigned long dev_count; > + > +void dm_devicelist_remove(unsigned long start, unsigned long end) > +{ > + char dev_name[DM_DEV_NAME_LEN]; > + unsigned long count; > + > + if ((start >= dev_count) || (end <= 1) || (start >= end - 1)) > + return; > + > + for (count = end - 1; count > start; count--) { > + sprintf(dev_name, "pnfs_vol_%lu", count - 1); > + dm_device_remove_byname(dev_name); > + } > + > + return; > +} > + > +void bl_dm_remove_tree(uint64_t dev) > +{ > + struct bl_dm_tree *p; > + > + p = find_bl_dm_tree(dev); > + if (!p) > + return; > + > + dm_tree_free(p->tree); > + del_from_bl_dm_tree(dev); > +} > + > +void bl_dm_create_tree(uint64_t dev) > +{ > + struct dm_tree *tree; > + struct bl_dm_tree *bl_tree; > + > + bl_tree = find_bl_dm_tree(dev); > + if (bl_tree) > + return; /* XXX: error? */ > + > + tree = dm_tree_create(); > + if (!tree) > + return; > + > + if (!dm_tree_add_dev(tree, MAJOR(dev), MINOR(dev))) { > + dm_tree_free(tree); > + return; > + } > + > + bl_tree = malloc(sizeof(struct bl_dm_tree)); > + if (!bl_tree) { > + dm_tree_free(tree); > + return; > + } > + > + bl_tree->dev = dev; > + bl_tree->tree = tree; > + bl_tree->next = NULL; > + add_to_bl_dm_tree(bl_tree); > + > + return; > +} > + > +uint64_t dm_device_nametodev(char *dev_name) > +{ > + struct dm_task *dmt; > + int ret = 0; > + struct dm_info dminfo; > + > + dmt = dm_task_create(DM_DEVICE_INFO); > + if (!dmt) > + return -ENODEV; > + > + ret = dm_task_set_name(dmt, dev_name) && > + dm_task_run(dmt) && dm_task_get_info(dmt, &dminfo); > + > + if (dmt) > + dm_task_destroy(dmt); > + > + if (!ret) > + return 0; > + > + return MKDEV(dminfo.major, dminfo.minor); > +} > + > +int dm_device_remove_all(uint64_t *dev) > +{ > + struct bl_dm_tree *p; > + struct dm_tree_node *node; > + const char *uuid; > + int ret = 0; > + uint32_t major, minor; > + uint64_t bl_dev; > + > + memcpy(&major, dev, sizeof(uint32_t)); > + memcpy(&minor, (void *)dev + sizeof(uint32_t), sizeof(uint32_t)); > + bl_dev = MKDEV(major, minor); > + p = find_bl_dm_tree(bl_dev); > + if (!p) > + return ret; > + > + node = dm_tree_find_node(p->tree, MAJOR(bl_dev), MINOR(bl_dev)); > + if (!node) > + return ret; > + > + uuid = dm_tree_node_get_uuid(node); > + if (!uuid) > + return ret; > + > + dm_device_remove(bl_dev); > + ret = dm_tree_deactivate_children(node, uuid, strlen(uuid)); > + dm_task_update_nodes(); > + bl_dm_remove_tree(bl_dev); > + return ret; > +} > + > +/* TODO: check the value for DM_DEV_NAME_LEN, DM_TYPE_LEN, DM_PARAMS_LEN */ > +uint64_t dm_device_create(struct bl_volume *vols, int num_vols) > +{ > + uint64_t size, dev = 0; > + unsigned long count = dev_count; > + int number = 0, i, pos; > + struct bl_volume *node; > + char *tmp; > + struct bl_dm_table *table = NULL; > + struct bl_dm_table *bl_table_head = NULL; > + unsigned int len; > + char *dev_name = NULL; > + /* Create pseudo device here */ > + while (number < num_vols) { > + node = &vols[number]; > + switch (node->bv_type) { > + case BLOCK_VOLUME_SIMPLE: > + /* Do not need to create device here */ > + dev = node->param.bv_dev; > + goto continued; > + case BLOCK_VOLUME_SLICE: > + table = bl_dm_table_alloc(); > + if (!table) > + goto out; > + table->offset = 0; > + table->size = node->bv_size; > + strcpy(table->target_type, "linear"); > + if (!TYPE_HAS_DEV(node->bv_vols[0]->bv_type)) { > + free(table); > + goto out; > + } > + dev = node->bv_vols[0]->param.bv_dev; > + tmp = table->params; > + if (!dm_format_dev(tmp, DM_PARAMS_LEN, > + MAJOR(dev), MINOR(dev))) { > + free(table); > + goto out; > + } > + tmp += strlen(tmp); > + sprintf(tmp, " %lu", node->param.bv_offset); > + add_to_bl_dm_table(&bl_table_head, table); > + break; > + case BLOCK_VOLUME_STRIPE: > + table = bl_dm_table_alloc(); > + if (!table) > + goto out; > + table->offset = 0; > + table->size = node->bv_size; > + strcpy(table->target_type, "striped"); > + sprintf(table->params, "%d %lu %n", node->bv_vol_n, > + node->param.bv_stripe_unit, &pos); > + /* Repeatedly copy subdev to params */ > + tmp = table->params + pos; > + len = DM_PARAMS_LEN - pos; > + for (i = 0; i < node->bv_vol_n; i++) { > + if (!TYPE_HAS_DEV(node->bv_vols[i]->bv_type)) { > + free(table); > + goto out; > + } > + dev = node->bv_vols[i]->param.bv_dev; > + if (!dm_format_dev(tmp, len, MAJOR(dev), > + MINOR(dev))) { > + free(table); > + goto out; > + } > + pos = strlen(tmp); > + tmp += pos; > + len -= pos; > + sprintf(tmp, " %d ", 0); > + tmp += 3; > + len -= 3; > + } > + add_to_bl_dm_table(&bl_table_head, table); > + break; > + case BLOCK_VOLUME_CONCAT: > + size = 0; > + for (i = 0; i < node->bv_vol_n; i++) { > + table = bl_dm_table_alloc(); > + if (!table) > + goto out; > + table->offset = size; > + table->size = node->bv_vols[i]->bv_size; > + if (!TYPE_HAS_DEV(node->bv_vols[i]->bv_type)) { > + free(table); > + goto out; > + } > + strcpy(table->target_type, "linear"); > + tmp = table->params; > + dev = node->bv_vols[i]->param.bv_dev; > + if (!dm_format_dev(tmp, DM_PARAMS_LEN, > + MAJOR(dev), MINOR(dev))) { > + free(table); > + goto out; > + } > + tmp += strlen(tmp); > + sprintf(tmp, " %d", 0); > + size += table->size; > + add_to_bl_dm_table(&bl_table_head, table); > + } > + break; > + default: > + /* Delete previous temporary devices */ > + dm_devicelist_remove(count, dev_count); > + goto out; > + } /* end of swtich */ > + /* Create dev_name here. Name of device is pnfs_vol_XXX */ > + if (dev_name) > + free(dev_name); > + dev_name = (char *)calloc(DM_DEV_NAME_LEN, sizeof(char)); > + if (!dev_name) { > + BL_LOG_ERR("%s: Out of memory\n", __func__); > + goto out; > + } > + sprintf(dev_name, "pnfs_vol_%lu", dev_count++); > + > + dev = dm_single_device_create(dev_name, bl_table_head); > + if (!dev) { > + /* Delete previous temporary devices */ > + dm_devicelist_remove(count, dev_count); > + goto out; > + } > + node->param.bv_dev = dev; > + /* TODO: extend use with PSEUDO later */ > + node->bv_type = BLOCK_VOLUME_PSEUDO; > + continued: > + number++; > + if (bl_table_head) > + bl_dm_table_free(bl_table_head); > + bl_table_head = NULL; > + } > + out: > + if (bl_table_head) > + bl_dm_table_free(bl_table_head); > + bl_table_head = NULL; > + if (dev) > + bl_dm_create_tree(dev); > + if (dev_name) > + free(dev_name); > + return dev; > +} > diff --git a/utils/blkmapd/etc/blkmapd.conf b/utils/blkmapd/etc/blkmapd.conf > new file mode 100644 > index 0000000..da70d94 > --- /dev/null > +++ b/utils/blkmapd/etc/blkmapd.conf > @@ -0,0 +1,10 @@ > +# This is an example config file > + > +# Look at all /dev/sd* devices > +# /dev/sd or /dev/sd* > +/dev/sd* > + > +# Look at all /dev/mapper/* devices > +# /dev/mapper/* or > +# /dev/mapper/ > +/dev/mapper/* > diff --git a/utils/blkmapd/etc/initd/initd.redhat b/utils/blkmapd/etc/initd/initd.redhat > new file mode 100644 > index 0000000..d6a77e8 > --- /dev/null > +++ b/utils/blkmapd/etc/initd/initd.redhat > @@ -0,0 +1,76 @@ > +#!/bin/sh > +# > +# description: Starts and stops the iSCSI initiator > +# > +# processname: blkmapd > +# pidfile: /var/run/blkmapd.pid > +# config: /etc/blkmapd.conf > + > +# Source function library. > +if [ -f /etc/init.d/functions ] ; then > + . /etc/init.d/functions > +elif [ -f /etc/rc.d/init.d/functions ] ; then > + . /etc/rc.d/init.d/functions > +else > + exit 0 > +fi > + > +PATH=/sbin:/bin:/usr/sbin:/usr/bin > + > +RETVAL=0 > + > +start() > +{ > + echo -n $"Starting pNFS block-layout device discovery service: " > + modprobe -q blocklayoutdriver > + daemon /usr/sbin/blkmapd > + RETVAL=$? > + if [ $RETVAL -eq 0 ]; then > + touch /var/lock/subsys/blkmapd > + fi > + echo > + return $RETVAL > +} > + > +stop() > +{ > + echo -n $"Stopping pNFS block-layout device discovery service: " > + killproc blkmapd 2> /dev/null > + rm -f /var/run/blkmapd.pid > + RETVAL=$? > + [ $RETVAL -eq 0 ] && rm -f /var/lock/subsys/blkmapd > + if [ $RETVAL -eq 0 ]; then > + echo_success > + else > + echo_failure > + fi > + echo > + return $RETVAL > +} > + > +restart() > +{ > + stop > + start > +} > + > +case "$1" in > + start) > + start > + ;; > + stop) > + stop > + ;; > + restart) > + stop > + start > + ;; > + status) > + status blkmapd > + ;; > + *) > + echo $"Usage: $0 {start|stop|restart|status}" > + exit 1 > +esac > + > +exit $RETVAL ^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] Add complex block layout discovery and mapping daemon 2010-08-12 13:42 ` Benny Halevy @ 2010-08-12 13:44 ` Benny Halevy 0 siblings, 0 replies; 5+ messages in thread From: Benny Halevy @ 2010-08-12 13:44 UTC (permalink / raw) To: Jim Rees; +Cc: linux-nfs On 2010-08-12 16:42, Benny Halevy wrote: > Thanks! merged. That's in git://linux-nfs.org/~bhalevy/pnfs-nfs-utils.git master > > Benny > > On Aug. 11, 2010, 22:42 +0300, Jim Rees <rees@umich.edu> wrote: >> This is a replacement for the patch I sent 21 July, incorporating feedback >> from list members. Thank you for your comments. >> >> I have tried to settle on "blkmapd" as the name and use it consistently for >> the executable, service name, syslog, etc. >> >> I did not change atomicio.c. That's because this is a copy of the file by >> the same name in both idmapd and spnfsd. There is a patch in the works to >> move this to the support library. I think the right thing to do is move >> that patch forward, then fix atomicio. >> >> Signed-off-by: Jim Rees <rees@umich.edu> >> --- >> configure.ac | 4 + >> utils/Makefile.am | 4 + >> utils/blkmapd/Makefile.am | 63 +++++ >> utils/blkmapd/atomicio.c | 54 ++++ >> utils/blkmapd/cfg.c | 248 +++++++++++++++++ >> utils/blkmapd/cfg.h | 47 +++ >> utils/blkmapd/device-discovery.c | 502 +++++++++++++++++++++++++++++++++ >> utils/blkmapd/device-discovery.h | 162 +++++++++++ >> utils/blkmapd/device-inq.c | 235 ++++++++++++++++ >> utils/blkmapd/device-process.c | 394 ++++++++++++++++++++++++++ >> utils/blkmapd/dm-device.c | 509 ++++++++++++++++++++++++++++++++++ >> utils/blkmapd/etc/blkmapd.conf | 10 + >> utils/blkmapd/etc/initd/initd.redhat | 76 +++++ >> 13 files changed, 2308 insertions(+), 0 deletions(-) >> create mode 100644 utils/blkmapd/Makefile.am >> create mode 100644 utils/blkmapd/atomicio.c >> create mode 100644 utils/blkmapd/cfg.c >> create mode 100644 utils/blkmapd/cfg.h >> create mode 100644 utils/blkmapd/device-discovery.c >> create mode 100644 utils/blkmapd/device-discovery.h >> create mode 100644 utils/blkmapd/device-inq.c >> create mode 100644 utils/blkmapd/device-process.c >> create mode 100644 utils/blkmapd/dm-device.c >> create mode 100644 utils/blkmapd/etc/blkmapd.conf >> create mode 100644 utils/blkmapd/etc/initd/initd.redhat >> >> diff --git a/configure.ac b/configure.ac >> index 4d12715..f57cd45 100644 >> --- a/configure.ac >> +++ b/configure.ac >> @@ -64,12 +64,15 @@ AC_ARG_ENABLE(nfsv4, >> enable_nfsv4=yes) >> if test "$enable_nfsv4" = yes; then >> AC_DEFINE(NFS4_SUPPORTED, 1, [Define this if you want NFSv4 support compiled in]) >> + BLKMAPD=blkmapd >> IDMAPD=idmapd >> SPNFSD=spnfsd >> else >> enable_nfsv4= >> + BLKMAPD= >> IDMAPD= >> fi >> + AC_SUBST(BLKMAPD) >> AC_SUBST(IDMAPD) >> AC_SUBST(enable_nfsv4) >> AM_CONDITIONAL(CONFIG_NFSV4, [test "$enable_nfsv4" = "yes"]) >> @@ -429,6 +432,7 @@ AC_CONFIG_FILES([ >> tools/mountstats/Makefile >> tools/nfs-iostat/Makefile >> utils/Makefile >> + utils/blkmapd/Makefile >> utils/exportfs/Makefile >> utils/gssd/Makefile >> utils/idmapd/Makefile >> diff --git a/utils/Makefile.am b/utils/Makefile.am >> index c777d21..c33835a 100644 >> --- a/utils/Makefile.am >> +++ b/utils/Makefile.am >> @@ -10,6 +10,10 @@ if CONFIG_NFSV4 >> OPTDIRS += spnfsd >> endif >> >> +if CONFIG_NFSV4 >> +OPTDIRS += blkmapd >> +endif >> + >> if CONFIG_GSS >> OPTDIRS += gssd >> endif >> diff --git a/utils/blkmapd/Makefile.am b/utils/blkmapd/Makefile.am >> new file mode 100644 >> index 0000000..81cc420 >> --- /dev/null >> +++ b/utils/blkmapd/Makefile.am >> @@ -0,0 +1,63 @@ >> +## Process this file with automake to produce Makefile.in >> + >> +#man8_MANS = blkmapd.man >> + >> +AM_CFLAGS += -D_LARGEFILE64_SOURCE >> +KPREFIX = @kprefix@ >> +sbin_PROGRAMS = blkmapd >> + >> +blkmapd_SOURCES = \ >> + atomicio.c \ >> + cfg.c \ >> + device-discovery.c \ >> + device-inq.c \ >> + device-process.c \ >> + dm-device.c \ >> + \ >> + cfg.h \ >> + device-discovery.h >> + >> +blkmapd_LDADD = -ldevmapper ../../support/nfs/libnfs.a >> + >> +MAINTAINERCLEANFILES = Makefile.in >> + >> +####################################################################### >> +# The following allows the current practice of having >> +# daemons renamed during the install to include RPCPREFIX >> +# and the KPREFIX >> +# This could all be done much easier with program_transform_name >> +# ( program_transform_name = s/^/$(RPCPREFIX)$(KPREFIX)/ ) >> +# but that also renames the man pages, which the current >> +# practice does not do. >> +install-exec-hook: >> + (cd $(DESTDIR)$(sbindir) && \ >> + for p in $(sbin_PROGRAMS); do \ >> + mv -f $$p$(EXEEXT) $(RPCPREFIX)$(KPREFIX)$$p$(EXEEXT) ;\ >> + done) >> +uninstall-hook: >> + (cd $(DESTDIR)$(sbindir) && \ >> + for p in $(sbin_PROGRAMS); do \ >> + rm -f $(RPCPREFIX)$(KPREFIX)$$p$(EXEEXT) ;\ >> + done) >> + >> + >> +# XXX This makes some assumptions about what automake does. >> +# XXX But there is no install-man-hook or install-man-local. >> +install-man: install-man8 install-man-links >> +uninstall-man: uninstall-man8 uninstall-man-links >> + >> +install-man-links: >> + (cd $(DESTDIR)$(man8dir) && \ >> + for m in $(man8_MANS) $(dist_man8_MANS) $(nodist_man8_MANS); do \ >> + inst=`echo $$m | sed -e 's/man$$/8/'`; \ >> + rm -f $(RPCPREFIX)$$inst ; \ >> + $(LN_S) $$inst $(RPCPREFIX)$$inst ; \ >> + done) >> + >> +uninstall-man-links: >> + (cd $(DESTDIR)$(man8dir) && \ >> + for m in $(man8_MANS) $(dist_man8_MANS) $(nodist_man8_MANS); do \ >> + inst=`echo $$m | sed -e 's/man$$/8/'`; \ >> + rm -f $(RPCPREFIX)$$inst ; \ >> + done) >> + >> diff --git a/utils/blkmapd/atomicio.c b/utils/blkmapd/atomicio.c >> new file mode 100644 >> index 0000000..8db626e >> --- /dev/null >> +++ b/utils/blkmapd/atomicio.c >> @@ -0,0 +1,54 @@ >> +/* >> + * Copyright (c) 2002 Marius Aamodt Eriksen <marius@monkey.org> >> + * Copyright (c) 1995,1999 Theo de Raadt. All rights reserved. >> + * All rights reserved. >> + * >> + * Redistribution and use in source and binary forms, with or without >> + * modification, are permitted provided that the following conditions >> + * are met: >> + * 1. Redistributions of source code must retain the above copyright >> + * notice, this list of conditions and the following disclaimer. >> + * 2. Redistributions in binary form must reproduce the above copyright >> + * notice, this list of conditions and the following disclaimer in the >> + * documentation and/or other materials provided with the distribution. >> + * >> + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR >> + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES >> + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. >> + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, >> + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT >> + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, >> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY >> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT >> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF >> + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. >> + */ >> + >> +#include <sys/types.h> >> +#include <unistd.h> >> +#include <errno.h> >> + >> +/* >> + * ensure all of data on socket comes through. f==read || f==write >> + */ >> +ssize_t atomicio(ssize_t(*f) (int, void *, size_t), int fd, void *_s, size_t n) >> +{ >> + char *s = _s; >> + ssize_t res, pos = 0; >> + >> + while (n > pos) { >> + res = (f) (fd, s + pos, n - pos); >> + switch (res) { >> + case -1: >> + if (errno == EINTR || errno == EAGAIN) >> + continue; >> + case 0: >> + if (pos != 0) >> + return pos; >> + return res; >> + default: >> + pos += res; >> + } >> + } >> + return pos; >> +} >> diff --git a/utils/blkmapd/cfg.c b/utils/blkmapd/cfg.c >> new file mode 100644 >> index 0000000..dab9d0f >> --- /dev/null >> +++ b/utils/blkmapd/cfg.c >> @@ -0,0 +1,248 @@ >> +/* >> + * Copyright (c) 2010 EMC Corporation, Haiying Tang <Tang_Haiying@emc.com> >> + * All rights reserved. >> + * >> + * Redistribution and use in source and binary forms, with or without >> + * modification, are permitted provided that the following conditions >> + * are met: >> + * 1. Redistributions of source code must retain the above copyright >> + * notice, this list of conditions and the following disclaimer. >> + * 2. Redistributions in binary form must reproduce the above copyright >> + * notice, this list of conditions and the following disclaimer in the >> + * documentation and/or other materials provided with the distribution. >> + * >> + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR >> + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES >> + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. >> + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, >> + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT >> + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, >> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY >> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT >> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF >> + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. >> + */ >> + >> +#include <sys/param.h> >> +#include <sys/stat.h> >> +#include <linux/errno.h> >> +#include <stdio.h> >> +#include <stdlib.h> >> +#include <unistd.h> >> +#include <string.h> >> +#include <fcntl.h> >> +#include <ctype.h> >> + >> +#include "device-discovery.h" >> +#include "cfg.h" >> + >> +char *conf_path = "/etc/blkmapd.conf"; >> + >> +struct scan_root_list *scan_root_list_head; >> + >> +void bl_release_list(void) >> +{ >> + struct scan_root_list *root = scan_root_list_head; >> + struct scan_device_list *disk; >> + >> + while (root) { >> + disk = root->disk; >> + while (disk) { >> + root->disk = disk->next; >> + free(disk->name); >> + free(disk); >> + disk = root->disk; >> + } >> + scan_root_list_head = root->next; >> + free(root->name); >> + free(root); >> + root = scan_root_list_head; >> + } >> +} >> + >> +struct scan_root_list *bl_alloc_root_list(char *name, int all_disk) >> +{ >> + struct scan_root_list *root; >> + >> + root = malloc(sizeof(struct scan_root_list)); >> + if (!root) >> + goto nomem; >> + >> + root->name = strdup(name); >> + if (!root->name) >> + goto nomem; >> + root->next = scan_root_list_head; >> + root->all_disk = all_disk; >> + scan_root_list_head = root; >> + return root; >> + >> + nomem: >> + BL_LOG_ERR("%s: Out of memory!\n", __func__); >> + if (root) >> + free(root); >> + return NULL; >> +} >> + >> +struct scan_device_list *bl_alloc_device_list(struct scan_root_list *root, >> + char *name) >> +{ >> + struct scan_device_list *device; >> + >> + device = malloc(sizeof(struct scan_device_list)); >> + if (!device) >> + goto nomem; >> + >> + device->name = strdup(name); >> + if (!device->name) >> + goto nomem; >> + device->next = root->disk; >> + root->disk = device; >> + return device; >> + >> + nomem: >> + BL_LOG_ERR("%s: Out of memory!\n", __func__); >> + if (device) >> + free(device); >> + return NULL; >> +} >> + >> +struct scan_device_list *bl_insert_device_list(struct scan_root_list *root, >> + char *name) >> +{ >> + struct scan_device_list *device = root->disk; >> + >> + /* Check whether this device has been inserted */ >> + while (device) { >> + if (device->name && !strcmp(device->name, name)) >> + return device; >> + device = device->next; >> + } >> + >> + return bl_alloc_device_list(root, name); >> +} >> + >> +struct scan_root_list *bl_insert_root_list(char *name, int all_disk) >> +{ >> + struct scan_root_list *root = scan_root_list_head; >> + >> + /* Check whether this root has been inserted */ >> + while (root) { >> + if (root->name && !strcmp(root->name, name)) >> + return root; >> + root = root->next; >> + } >> + >> + return bl_alloc_root_list(name, all_disk); >> +} >> + >> +int bl_parse_line(char *line, struct scan_root_list **bl_root) >> +{ >> + char *root, *device, *end; >> + >> + root = strdup(line); >> + end = root + strlen(line); >> + >> + /* Skip comments */ >> + if (*root == '#') >> + return 0; >> + >> + /* Trim leading space */ >> + while (*root != '\0' && isspace(*root)) >> + root++; >> + if (*root == '\0') >> + return 0; >> + >> + /* Trim trailing space and set "end" to last char */ >> + while ((isspace(*end) || (*end == '\0')) && (end > root)) >> + end--; >> + >> + /* For lines ending with '/' or '/','*': add as a dir root */ >> + if ((*end == '/') || >> + ((*end == '*') && (end - root >= 1) && (*(end - 1) == '/'))) { >> + if (*end == '*') >> + end--; >> + if (*end == '/') >> + end--; >> + *(end + 1) = '\0'; >> + *bl_root = bl_insert_root_list(root, 1); >> + return 0; >> + } >> + >> + /* Other lines: add as a device */ >> + device = end; >> + while ((*device != '/') && (device > root)) >> + device--; >> + if (device == root) { >> + BL_LOG_ERR("%s: invalid config line\n", __func__); >> + return -1; >> + } >> + *device = '\0'; >> + *bl_root = bl_insert_root_list(root, 0); >> + if (*bl_root == NULL) >> + return -ENOMEM; >> + if (*end == '*') >> + end--; >> + *(end + 1) = '\0'; >> + if (bl_insert_device_list(*bl_root, device + 1) == NULL) >> + return -ENOMEM; >> + >> + return 0; >> +} >> + >> +int bl_set_default_conf(void) >> +{ >> + struct scan_root_list *root = NULL; >> + int rv; >> + >> + bl_release_list(); >> + rv = bl_parse_line("/dev/sd*", &root); >> + if (rv < 0) >> + return rv; >> + rv = bl_parse_line("/dev/mapper/", &root); >> + return rv; >> +} >> + >> +int bl_parse_conf(char *buf) >> +{ >> + char *tmp = buf, *line = buf, *end = buf + strlen(buf); >> + struct scan_root_list *bl_root = NULL; >> + int rv; >> + >> + while (tmp < end) { >> + if (*tmp == '\n') { >> + *tmp = '\0'; >> + rv = bl_parse_line(line, &bl_root); >> + if (rv < 0) >> + return rv; >> + line = tmp + 1; >> + } >> + tmp++; >> + } >> + >> + return 0; >> +} >> + >> +int bl_cfg_init(void) >> +{ >> + struct scan_root_list *root = NULL; >> + FILE *f = NULL; >> + char buf[PATH_MAX]; >> + int rv = 0; >> + >> + f = fopen(conf_path, "r"); >> + if (f == NULL) >> + rv = bl_set_default_conf(); >> + else { >> + while (fgets(buf, sizeof buf, f) != NULL) { >> + rv = bl_parse_line(buf, &root); >> + if (rv < 0) >> + break; >> + } >> + } >> + if (!scan_root_list_head) >> + rv = -EINVAL; >> + >> + if (f) >> + fclose(f); >> + return rv; >> +} >> diff --git a/utils/blkmapd/cfg.h b/utils/blkmapd/cfg.h >> new file mode 100644 >> index 0000000..b9bf930 >> --- /dev/null >> +++ b/utils/blkmapd/cfg.h >> @@ -0,0 +1,47 @@ >> +/* >> + * bl-cfg.h >> + * >> + * Copyright (c) 2010 EMC Corporation, Haiying Tang <Tang_Haiying@emc.com> >> + * All rights reserved. >> + * >> + * Redistribution and use in source and binary forms, with or without >> + * modification, are permitted provided that the following conditions >> + * are met: >> + * 1. Redistributions of source code must retain the above copyright >> + * notice, this list of conditions and the following disclaimer. >> + * 2. Redistributions in binary form must reproduce the above copyright >> + * notice, this list of conditions and the following disclaimer in the >> + * documentation and/or other materials provided with the distribution. >> + * >> + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR >> + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES >> + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. >> + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, >> + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT >> + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, >> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY >> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT >> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF >> + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. >> + */ >> +#ifndef BL_CFG_H >> +#define BL_CFG_H >> + >> +extern char *conf_path; >> +extern struct scan_root_list *scan_root_list_head; >> + >> +struct scan_device_list { >> + struct scan_device_list *next; >> + char *name; >> +}; >> + >> +struct scan_root_list { >> + struct scan_root_list *next; >> + unsigned int all_disk; >> + char *name; >> + struct scan_device_list *disk; >> +}; >> + >> +int bl_cfg_init(void); >> + >> +#endif >> diff --git a/utils/blkmapd/device-discovery.c b/utils/blkmapd/device-discovery.c >> new file mode 100644 >> index 0000000..f42ddc8 >> --- /dev/null >> +++ b/utils/blkmapd/device-discovery.c >> @@ -0,0 +1,502 @@ >> +/* >> + * device-discovery.c: main function, discovering device and processing >> + * pipe request from kernel. >> + * >> + * Copyright (c) 2010 EMC Corporation, Haiying Tang <Tang_Haiying@emc.com> >> + * All rights reserved. >> + * >> + * Redistribution and use in source and binary forms, with or without >> + * modification, are permitted provided that the following conditions >> + * are met: >> + * 1. Redistributions of source code must retain the above copyright >> + * notice, this list of conditions and the following disclaimer. >> + * 2. Redistributions in binary form must reproduce the above copyright >> + * notice, this list of conditions and the following disclaimer in the >> + * documentation and/or other materials provided with the distribution. >> + * >> + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR >> + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES >> + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. >> + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, >> + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT >> + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, >> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY >> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT >> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF >> + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. >> + */ >> + >> +#include <stdlib.h> >> +#include <stdio.h> >> +#include <string.h> >> +#include <dirent.h> >> +#include <ctype.h> >> +#include <linux/kdev_t.h> >> +#include <sys/types.h> >> +#include <sys/stat.h> >> +#include <sys/ioctl.h> >> +#include <sys/mount.h> >> +#include <sys/select.h> >> +#include <fcntl.h> >> +#include <unistd.h> >> +#include <libgen.h> >> +#include <errno.h> >> +#include <scsi/scsi.h> >> +#include <scsi/scsi_ioctl.h> >> +#include <scsi/sg.h> >> +#include "device-discovery.h" >> +#include "cfg.h" >> + >> +#define BL_PIPE_FILE "/var/lib/nfs/rpc_pipefs/bl_device_pipe" >> +#define PID_FILE "/var/run/blkmapd.pid" >> + >> +struct bl_disk *visible_disk_list; >> + >> +struct bl_disk_path *bl_get_path(const char *filepath, >> + struct bl_disk_path *paths) >> +{ >> + struct bl_disk_path *tmp = paths; >> + while (tmp) { >> + if (!strcmp(tmp->full_path, filepath)) >> + break; >> + tmp = tmp->next; >> + } >> + return tmp; >> +} >> + >> +/* Check whether valid_path is a substring(partition) of path */ >> +int bl_is_partition(struct bl_disk_path *valid_path, struct bl_disk_path *path) >> +{ >> + if (!strncmp(valid_path->full_path, path->full_path, >> + strlen(valid_path->full_path))) >> + return 1; >> + >> + return 0; >> +} >> + >> +/* >> + * For multipath devices, devices state could be PASSIVE/ACTIVE/PSEUDO, >> + * where PSEUDO > ACTIVE > PASSIVE. Device with highest state is used to >> + * create pseudo device. So if state is higher, the device path needs to >> + * be updated. >> + * If device-mapper multipath support is a must, pseudo devices should >> + * exist for each multipath device. If not, active device path will be >> + * chosen for device creation. >> + * Treat partition as invalid path. >> + */ >> +int bl_update_path(struct bl_disk_path *path, enum bl_path_state_e state, >> + struct bl_disk *disk) >> +{ >> + struct bl_disk_path *valid_path = disk->valid_path; >> + >> + if (valid_path) { >> + if (valid_path->state >= state) { >> + if (bl_is_partition(valid_path, path)) >> + return 0; >> + } >> + } >> + return 1; >> +} >> + >> +void bl_release_disk(void) >> +{ >> + struct bl_disk *disk; >> + struct bl_disk_path *path = NULL; >> + >> + while (visible_disk_list) { >> + disk = visible_disk_list; >> + path = disk->paths; >> + while (path) { >> + disk->paths = path->next; >> + free(path->full_path); >> + free(path); >> + path = disk->paths; >> + } >> + if (disk->serial) >> + free(disk->serial); >> + visible_disk_list = disk->next; >> + free(disk); >> + } >> +} >> + >> +void bl_add_disk(char *filepath) >> +{ >> + struct bl_disk *disk = NULL; >> + int fd = 0; >> + struct stat sb; >> + off_t size = 0; >> + struct bl_serial *serial = NULL; >> + enum bl_path_state_e ap_state = BL_PATH_STATE_PASSIVE; >> + struct bl_disk_path *diskpath = NULL, *path = NULL; >> + dev_t dev; >> + >> + BL_LOG_ERR("%s: %s\n", __func__, filepath); >> + >> + fd = open(filepath, O_RDONLY | O_LARGEFILE); >> + if (fd < 0) >> + return; >> + >> + if (fstat(fd, &sb)) { >> + close(fd); >> + return; >> + } >> + >> + if (!sb.st_size) >> + ioctl(fd, BLKGETSIZE, &size); >> + else >> + size = sb.st_size; >> + >> + if (!size) { >> + close(fd); >> + return; >> + } >> + >> + dev = sb.st_rdev; >> + serial = bldev_read_serial(fd, filepath); >> + >> + for (disk = visible_disk_list; disk != NULL; disk = disk->next) { >> + /* Already scanned or a partition? >> + * XXX: if released each time, maybe not need to compare >> + */ >> + if ((serial->len == disk->serial->len) && >> + !memcmp(serial->data, disk->serial->data, serial->len)) { >> + diskpath = bl_get_path(filepath, disk->paths); >> + break; >> + } >> + } >> + >> + if (disk && diskpath) { >> + close(fd); >> + return; >> + } >> + >> + bldev_read_ap_state(fd, &ap_state); >> + close(fd); >> + >> + /* >> + * Not sure how to identify a pseudo device created by >> + * device-mapper, so leave /dev/mapper for now. >> + */ >> + if (strncmp(filepath, "/dev/mapper", 11) == 0) >> + ap_state = BL_PATH_STATE_PSEUDO; >> + >> + /* add path */ >> + path = malloc(sizeof(struct bl_disk_path)); >> + if (!path) { >> + BL_LOG_ERR("%s: Out of memory!\n", __func__); >> + goto out_err; >> + } >> + path->next = NULL; >> + path->state = ap_state; >> + path->full_path = strdup(filepath); >> + if (!path->full_path) >> + goto out_err; >> + >> + if (!disk) { /* add disk */ >> + disk = malloc(sizeof(struct bl_disk)); >> + if (!disk) { >> + BL_LOG_ERR("%s: Out of memory!\n", __func__); >> + goto out_err; >> + } >> + disk->next = visible_disk_list; >> + disk->dev = dev; >> + disk->size = size; >> + disk->serial = serial; >> + disk->valid_path = path; >> + disk->paths = path; >> + visible_disk_list = disk; >> + } else { >> + path->next = disk->paths; >> + disk->paths = path; >> + /* check whether we need to update disk info */ >> + if (bl_update_path(path, path->state, disk)) { >> + disk->dev = dev; >> + disk->size = size; >> + disk->valid_path = path; >> + } >> + } >> + return; >> + >> + out_err: >> + if (path) { >> + if (path->full_path) >> + free(path->full_path); >> + free(path); >> + } >> + return; >> +} >> + >> +void bl_devicescan(const char *filename, struct scan_root_list *root) >> +{ >> + /* scan all disks */ >> + char filepath[PATH_MAX]; >> + struct scan_device_list *device; >> + >> + if (!strcmp(filename, ".") || !strcmp(filename, "..")) >> + return; >> + >> + memset(filepath, 0, sizeof(filepath)); >> + if (strlen(filename) < (PATH_MAX - strlen(root->name) - 2)) >> + sprintf(filepath, "%s/%s", root->name, filename); >> + else { >> + BL_LOG_ERR("%s: name too long\n", __func__); >> + return; >> + } >> + if (root->all_disk) >> + goto valid; >> + >> + device = root->disk; >> + while (device) { >> + /* If device->name is a subset of filename, this disk should be >> + * valid for scanning. >> + * For example, device->name is "sd", filename is "sda". >> + */ >> + if (device->name >> + && !memcmp(filename, device->name, strlen(device->name))) >> + goto valid; >> + device = device->next; >> + } >> + >> + return; >> + >> + valid: >> + /* >> + * sg device is not a real device, but a device created according >> + * to each scsi device. It won't be used for pseudo device creation. >> + * I moved it here, so that sg devices will not be scanned. >> + */ >> + if (!strncmp(filepath, "/dev/sg", 7)) >> + return; >> + bl_add_disk(filepath); >> + return; >> +} >> + >> +int bl_discover_devices(void) >> +{ >> + DIR *dir; >> + struct dirent *dp; >> + struct scan_root_list *root = scan_root_list_head; >> + >> + /* release previous list */ >> + bl_release_disk(); >> + >> + /* scan all disks */ >> + while (root) { >> + dir = opendir(root->name); >> + if (dir == NULL) { >> + root = root->next; >> + continue; >> + } >> + >> + while ((dp = readdir(dir)) != NULL) >> + bl_devicescan(dp->d_name, root); >> + >> + root = root->next; >> + closedir(dir); >> + } >> + >> + return 0; >> +} >> + >> +/* process kernel request >> + * return 0: request processed, and no more request waiting; >> + * return 1: request processed, and more requests waiting; >> + * return < 0: error >> + */ >> +int bl_disk_inquiry_process(int fd) >> +{ >> + int ret = 0; >> + struct pipefs_hdr *head = NULL, *tmp; >> + char *buf = NULL; >> + uint32_t major, minor; >> + uint16_t buflen; >> + unsigned int len = 0; >> + >> + head = calloc(1, sizeof(struct pipefs_hdr)); >> + if (!head) { >> + BL_LOG_ERR("%s: Out of memory!\n", __func__); >> + return -ENOMEM; >> + } >> + >> + /* read request */ >> + if (atomicio(read, fd, head, sizeof(*head)) != sizeof(*head)) { >> + /* Note that an error in this or the next read is pretty >> + * catastrophic, as there is no good way to resync into >> + * the pipe's stream. >> + */ >> + BL_LOG_ERR("Read pipefs head error!\n"); >> + ret = -EIO; >> + goto out; >> + } >> + >> + buflen = head->totallen - sizeof(*head); >> + buf = malloc(buflen); >> + if (!buf) { >> + BL_LOG_ERR("%s: Out of memory!\n", __func__); >> + ret = -ENOMEM; >> + goto out; >> + } >> + >> + if (atomicio(read, fd, buf, buflen) != buflen) { >> + BL_LOG_ERR("Read pipefs content error!\n"); >> + ret = -EIO; >> + goto out; >> + } >> + >> + head->status = BL_DEVICE_REQUEST_PROC; >> + switch (head->type) { >> + case BL_DEVICE_MOUNT: >> + if (!process_deviceinfo(buf, buflen, &major, &minor)) { >> + head->status = BL_DEVICE_REQUEST_ERR; >> + goto out; >> + } >> + tmp = realloc(head, sizeof(major) + sizeof(minor) + >> + sizeof(struct pipefs_hdr)); >> + if (!tmp) { >> + BL_LOG_ERR("%s: Out of memory!\n", __func__); >> + ret = -ENOMEM; >> + goto out; >> + } >> + head = tmp; >> + memcpy((void *)head + sizeof(struct pipefs_hdr), >> + &major, sizeof(major)); >> + memcpy((void *)head + sizeof(struct pipefs_hdr) + sizeof(major), >> + &minor, sizeof(minor)); >> + len = sizeof(major) + sizeof(minor); >> + break; >> + case BL_DEVICE_UMOUNT: >> + if (!dm_device_remove_all((uint64_t *) buf)) >> + head->status = BL_DEVICE_REQUEST_ERR; >> + bl_discover_devices(); >> + break; >> + default: >> + head->status = BL_DEVICE_REQUEST_ERR; >> + } >> + >> + head->totallen = sizeof(struct pipefs_hdr) + len; >> + /* write to pipefs */ >> + if (atomicio((void *)write, fd, head, head->totallen) >> + != head->totallen) { >> + BL_LOG_ERR("Write pipefs error!\n"); >> + ret = -EIO; >> + } >> + >> + out: >> + if (buf) >> + free(buf); >> + if (head) >> + free(head); >> + return ret; >> +} >> + >> +/* TODO: set bl_process_stop to 1 in command */ >> +unsigned int bl_process_stop; >> + >> +int bl_run_disk_inquiry_process(int fd) >> +{ >> + fd_set rset; >> + struct timeval tv; >> + int ret; >> + >> + bl_process_stop = 0; >> + >> + for (;;) { >> + if (bl_process_stop) >> + return 1; >> + FD_ZERO(&rset); >> + FD_SET(fd, &rset); >> + ret = 0; >> + tv.tv_sec = BL_DEVICE_DISCOVERY_INTERVAL; >> + switch (select(fd + 1, &rset, NULL, NULL, &tv)) { >> + case -1: >> + if (errno == EINTR) >> + continue; >> + else { >> + ret = -errno; >> + goto out; >> + } >> + case 0: >> + goto out; >> + default: >> + if (FD_ISSET(fd, &rset)) >> + ret = bl_disk_inquiry_process(fd); >> + } >> + } >> + out: >> + return ret; >> +} >> + >> +/* Daemon */ >> +int main(int argc, char **argv) >> +{ >> + int fd, opt, fg = 0, ret = 1; >> + struct stat statbuf; >> + char pidbuf[64]; >> + >> + while ((opt = getopt(argc, argv, "c:f")) != -1) { >> + switch (opt) { >> + case 'c': >> + conf_path = optarg; >> + break; >> + case 'f': >> + fg = 1; >> + break; >> + } >> + } >> + >> + if (!stat(PID_FILE, &statbuf)) { >> + fprintf(stderr, "Pid file already existed\n"); >> + return -1; >> + } >> + >> + if (!fg && daemon(0, 0) != 0) { >> + fprintf(stderr, "Daemonize failed\n"); >> + return -1; >> + } >> + >> + openlog("blkmapd", LOG_PID, 0); >> + fd = open(PID_FILE, O_WRONLY | O_CREAT, 0644); >> + if (fd < 0) { >> + BL_LOG_ERR("Create pid file failed\n"); >> + return -1; >> + } >> + >> + if (lockf(fd, F_TLOCK, 0) < 0) { >> + BL_LOG_ERR("Lock pid file failed\n"); >> + close(fd); >> + return -1; >> + } >> + ftruncate(fd, 0); >> + sprintf(pidbuf, "%d\n", getpid()); >> + write(fd, pidbuf, strlen(pidbuf)); >> + >> + /* open pipe file */ >> + fd = open(BL_PIPE_FILE, O_RDWR); >> + if (fd < 0) { >> + BL_LOG_ERR("open pipe file error\n"); >> + return -1; >> + } >> + >> + ret = bl_cfg_init(); >> + if (ret < 0) { >> + if (ret == -ENOENT) >> + BL_LOG_WARNING("Config file not exist, use default\n"); >> + else { >> + BL_LOG_ERR("Open/read Block pNFS config file error\n"); >> + return -1; >> + } >> + } >> + >> + while (1) { >> + /* discover device when needed */ >> + bl_discover_devices(); >> + >> + ret = bl_run_disk_inquiry_process(fd); >> + if (ret < 0) { >> + /* what should we do with process error? */ >> + BL_LOG_ERR("inquiry process return %d\n", ret); >> + } >> + } >> + close(fd); >> + return ret; >> +} >> diff --git a/utils/blkmapd/device-discovery.h b/utils/blkmapd/device-discovery.h >> new file mode 100644 >> index 0000000..9f87ebe >> --- /dev/null >> +++ b/utils/blkmapd/device-discovery.h >> @@ -0,0 +1,162 @@ >> +/* >> + * bl-device-discovery.h >> + * >> + * Copyright (c) 2010 EMC Corporation, Haiying Tang <Tang_Haiying@emc.com> >> + * All rights reserved. >> + * >> + * Redistribution and use in source and binary forms, with or without >> + * modification, are permitted provided that the following conditions >> + * are met: >> + * 1. Redistributions of source code must retain the above copyright >> + * notice, this list of conditions and the following disclaimer. >> + * 2. Redistributions in binary form must reproduce the above copyright >> + * notice, this list of conditions and the following disclaimer in the >> + * documentation and/or other materials provided with the distribution. >> + * >> + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR >> + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES >> + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. >> + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, >> + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT >> + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, >> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY >> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT >> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF >> + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. >> + */ >> +#ifndef BL_DEVICE_DISCOVERY_H >> +#define BL_DEVICE_DISCOVERY_H >> + >> +#define BL_DEVICE_DISCOVERY_INTERVAL 60 >> + >> +#include <stdint.h> >> +#include <syslog.h> >> + >> +enum blk_vol_type { >> + BLOCK_VOLUME_SIMPLE = 0, /* maps to a single LU */ >> + BLOCK_VOLUME_SLICE = 1, /* slice of another volume */ >> + BLOCK_VOLUME_CONCAT = 2, /* concatenation of multiple volumes */ >> + BLOCK_VOLUME_STRIPE = 3, /* striped across multiple volumes */ >> + BLOCK_VOLUME_PSEUDO = 4, >> +}; >> + >> +/* All disk offset/lengths are stored in 512-byte sectors */ >> +struct bl_volume { >> + uint32_t bv_type; >> + off_t bv_size; >> + struct bl_volume **bv_vols; >> + int bv_vol_n; >> + union { >> + dev_t bv_dev; /*for BLOCK_VOLUME_SIMPLE(PSEUDO) */ >> + off_t bv_stripe_unit; /*for BLOCK_VOLUME_STRIPE(CONCAT) */ >> + off_t bv_offset; /*for BLOCK_VOLUME_SLICE */ >> + } param; >> +}; >> + >> +struct bl_sig_comp { >> + int64_t bs_offset; /* In bytes */ >> + uint32_t bs_length; /* In bytes */ >> + char *bs_string; >> +}; >> + >> +/* Maximum number of signatures components in a simple volume */ >> +# define BLOCK_MAX_SIG_COMP 16 >> + >> +struct bl_sig { >> + int si_num_comps; >> + struct bl_sig_comp si_comps[BLOCK_MAX_SIG_COMP]; >> +}; >> + >> +/* >> + * Multipath support: ACTIVE or PSEUDO device is valid, >> + * PASSIVE is a standby for ACTIVE. >> + */ >> +enum bl_path_state_e { >> + BL_PATH_STATE_PASSIVE = 1, >> + BL_PATH_STATE_ACTIVE = 2, >> + BL_PATH_STATE_PSEUDO = 3, >> +}; >> + >> +struct bl_serial { >> + int len; >> + char *data; >> +}; >> + >> +struct bl_disk_path { >> + struct bl_disk_path *next; >> + char *full_path; >> + enum bl_path_state_e state; >> +}; >> + >> +struct bl_disk { >> + struct bl_disk *next; >> + struct bl_serial *serial; >> + dev_t dev; >> + off_t size; >> + struct bl_disk_path *valid_path; >> + struct bl_disk_path *paths; >> +}; >> + >> +struct bl_dev_id { >> + unsigned char type; >> + unsigned char ids; >> + unsigned char reserve; >> + unsigned char len; >> + char data[0]; >> +}; >> + >> +struct pipefs_hdr { >> + uint32_t msgid; >> + uint8_t type; >> + uint8_t flags; >> + uint16_t totallen; /* length of entire message, including hdr */ >> + uint32_t status; >> +}; >> + >> +#define BL_DEVICE_UMOUNT 0x0 /* Umount--delete devices */ >> +#define BL_DEVICE_MOUNT 0x1 /* Mount--create devices */ >> +#define BL_DEVICE_REQUEST_INIT 0x0 /* Start request */ >> +#define BL_DEVICE_REQUEST_PROC 0x1 /* User process succeeds */ >> +#define BL_DEVICE_REQUEST_ERR 0x2 /* User process fails */ >> + >> +uint32_t *blk_overflow(uint32_t * p, uint32_t * end, size_t nbytes); >> + >> +#define BLK_READBUF(p, e, nbytes) do { \ >> + p = blk_overflow(p, e, nbytes); \ >> + if (!p) {\ >> + goto out_err;\ >> + } \ >> +} while (0) >> + >> +#define READ32(x) (x) = ntohl(*p++) >> + >> +#define READ64(x) do { \ >> + (x) = (uint64_t)ntohl(*p++) << 32; \ >> + (x) |= ntohl(*p++); \ >> +} while (0) >> + >> +#define READ_SECTOR(x) do { \ >> + READ64(tmp); \ >> + if (tmp & 0x1ff) { \ >> + goto out_err; \ >> + } \ >> + (x) = tmp >> 9; \ >> +} while (0) >> + >> +extern struct bl_disk *visible_disk_list; >> +uint64_t dm_device_create(struct bl_volume *vols, int num_vols); >> +int dm_device_remove_all(uint64_t *dev); >> +uint64_t process_deviceinfo(const char *dev_addr_buf, >> + unsigned int dev_addr_len, >> + uint32_t *major, uint32_t *minor); >> + >> +extern ssize_t atomicio(ssize_t(*f) (int, void *, size_t), >> + int fd, void *_s, size_t n); >> +extern struct bl_serial *bldev_read_serial(int fd, const char *filename); >> +extern void bldev_read_ap_state(int fd, enum bl_path_state_e *ap_state_out); >> +extern int bl_discover_devices(void); >> + >> +#define BL_LOG_WARNING(fmt...) syslog(LOG_WARNING, fmt) >> +#define BL_LOG_ERR(fmt...) syslog(LOG_ERR, fmt) >> +#define BL_LOG_DEBUG(fmt...) syslog(LOG_DEBUG, fmt) >> +#endif >> diff --git a/utils/blkmapd/device-inq.c b/utils/blkmapd/device-inq.c >> new file mode 100644 >> index 0000000..ff38fd6 >> --- /dev/null >> +++ b/utils/blkmapd/device-inq.c >> @@ -0,0 +1,235 @@ >> +/* >> + * device-inq.c: inquire SCSI device information. >> + * >> + * Copyright (c) 2010 EMC Corporation, Haiying Tang <Tang_Haiying@emc.com> >> + * All rights reserved. >> + * >> + * This program refers to "SCSI Primary Commands - 3 (SPC-3) >> + * at http://www.t10.org and sg_inq.c in sg3_utils-1.26 for >> + * Linux OS SCSI subsystem, by D. Gilbert. >> + * >> + * Redistribution and use in source and binary forms, with or without >> + * modification, are permitted provided that the following conditions >> + * are met: >> + * 1. Redistributions of source code must retain the above copyright >> + * notice, this list of conditions and the following disclaimer. >> + * 2. Redistributions in binary form must reproduce the above copyright >> + * notice, this list of conditions and the following disclaimer in the >> + * documentation and/or other materials provided with the distribution. >> + * >> + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR >> + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES >> + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. >> + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, >> + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT >> + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, >> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY >> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT >> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF >> + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. >> + */ >> +#include <stdlib.h> >> +#include <stdio.h> >> +#include <string.h> >> +#include <dirent.h> >> +#include <ctype.h> >> +#include <sys/types.h> >> +#include <sys/stat.h> >> +#include <sys/ioctl.h> >> +#include <sys/mount.h> >> +#include <sys/select.h> >> +#include <fcntl.h> >> +#include <unistd.h> >> +#include <libgen.h> >> +#include <errno.h> >> +#include <scsi/scsi.h> >> +#include <scsi/scsi_ioctl.h> >> +#include <scsi/sg.h> >> +#include "device-discovery.h" >> + >> +#define DEF_ALLOC_LEN 255 >> +#define MX_ALLOC_LEN (0xc000 + 0x80) >> + >> +struct bl_serial *bl_create_scsi_string(int len, const char *bytes) >> +{ >> + struct bl_serial *s; >> + s = malloc(sizeof(*s) + len); >> + if (s) { >> + s->data = (char *)&s[1]; >> + s->len = len; >> + memcpy(s->data, bytes, len); >> + } >> + return s; >> +} >> + >> +void bl_free_scsi_string(struct bl_serial *str) >> +{ >> + if (str) >> + free(str); >> +} >> + >> +#define sg_io_ok(io_hdr) \ >> + ((((io_hdr).status & 0x7e) == 0) && \ >> + ((io_hdr).host_status == 0) && \ >> + (((io_hdr).driver_status & 0x0f) == 0)) >> + >> +static int sg_timeout = 1 * 1000; >> + >> +static int bldev_inquire_page(int fd, int page, char *buffer, int len) >> +{ >> + unsigned char cmd[] = { INQUIRY, 0, 0, 0, 0, 0 }; >> + unsigned char sense_b[28]; >> + struct sg_io_hdr io_hdr; >> + if (page >= 0) { >> + cmd[1] = 1; >> + cmd[2] = page; >> + } >> + cmd[3] = (unsigned char)((len >> 8) & 0xff); >> + cmd[4] = (unsigned char)(len & 0xff); >> + >> + memset(&io_hdr, 0, sizeof(struct sg_io_hdr)); >> + io_hdr.interface_id = 'S'; >> + io_hdr.cmd_len = sizeof(cmd); >> + io_hdr.mx_sb_len = sizeof(sense_b); >> + io_hdr.dxfer_direction = SG_DXFER_FROM_DEV; >> + io_hdr.dxfer_len = len; >> + io_hdr.dxferp = buffer; >> + io_hdr.cmdp = cmd; >> + io_hdr.sbp = sense_b; >> + io_hdr.timeout = sg_timeout; >> + if (ioctl(fd, SG_IO, &io_hdr) < 0) >> + return -1; >> + >> + if (sg_io_ok(io_hdr)) >> + return 0; >> + return -1; >> +} >> + >> +int bldev_inquire_pages(int fd, int page, char **buffer) >> +{ >> + int status = 0; >> + char *tmp; >> + int len; >> + >> + *buffer = calloc(DEF_ALLOC_LEN, sizeof(char)); >> + if (!*buffer) { >> + BL_LOG_ERR("%s: Out of memory!\n", __func__); >> + return -ENOMEM; >> + } >> + >> + status = bldev_inquire_page(fd, page, *buffer, DEF_ALLOC_LEN); >> + if (status) >> + goto out; >> + >> + status = -1; >> + if ((*(*buffer + 1) & 0xff) != page) >> + goto out; >> + >> + len = (*(*buffer + 2) << 8) + *(*buffer + 3) + 4; >> + if (len > MX_ALLOC_LEN) { >> + BL_LOG_ERR("SCSI response length too long: %d\n", len); >> + goto out; >> + } >> + if (len > DEF_ALLOC_LEN) { >> + tmp = realloc(*buffer, len); >> + if (!tmp) { >> + BL_LOG_ERR("%s: Out of memory!\n", __func__); >> + status = -ENOMEM; >> + goto out; >> + } >> + *buffer = tmp; >> + status = bldev_inquire_page(fd, page, *buffer, len); >> + if (status) >> + goto out; >> + } >> + status = 0; >> + out: >> + return status; >> +} >> + >> +/* For EMC multipath devices, use VPD page (0xc0) to get status. >> + * For other devices, return ACTIVE for now >> + */ >> +void bldev_read_ap_state(int fd, enum bl_path_state_e *ap_state_out) >> +{ >> + int status = 0; >> + char *buffer; >> + >> + *ap_state_out = BL_PATH_STATE_ACTIVE; >> + >> + status = bldev_inquire_pages(fd, 0xc0, &buffer); >> + if (status) >> + goto out; >> + >> + if (buffer[4] < 0x02) >> + *ap_state_out = BL_PATH_STATE_PASSIVE; >> + out: >> + if (buffer) >> + free(buffer); >> + return; >> +} >> + >> +struct bl_serial *bldev_read_serial(int fd, const char *filename) >> +{ >> + struct bl_serial *serial_out = NULL; >> + int status = 0, pos, len; >> + char *buffer; >> + struct bl_dev_id *dev_root, *dev_id; >> + unsigned int current_id = 0; >> + >> + status = bldev_inquire_pages(fd, 0x83, &buffer); >> + if (status) >> + goto out; >> + >> + dev_root = (struct bl_dev_id *)buffer; >> + >> + pos = 0; >> + current_id = 0; >> + len = dev_root->len; >> + while (pos < (len - sizeof(struct bl_dev_id) + sizeof(unsigned char))) { >> + dev_id = (struct bl_dev_id *)&(dev_root->data[pos]); >> + if ((dev_id->ids & 0xf) < current_id) >> + continue; >> + switch (dev_id->ids & 0xf) { >> + /* We process SCSI ID with four ID cases: 0, 1, 2 and 3. >> + * When more than one ID is available, priority is >> + * 3>2>1>0. >> + */ >> + case 2: /* EUI-64 based */ >> + if ((dev_id->len != 8) && (dev_id->len != 12) && >> + (dev_id->len != 16)) { >> + BL_LOG_ERR("EUI-64 only decodes 8, " >> + "12 and 16\n"); >> + break; >> + } >> + case 3: /* NAA */ >> + /* TODO: NAA validity judgement too complicated, >> + * so just ingore it here. >> + */ >> + if ((dev_id->type & 0xf) != 1) { >> + BL_LOG_ERR("Binary code_set expected\n"); >> + break; >> + } >> + case 0: /* vendor specific */ >> + case 1: /* T10 vendor identification */ >> + current_id = dev_id->ids & 0xf; >> + if (serial_out) >> + bl_free_scsi_string(serial_out); >> + serial_out = bl_create_scsi_string(dev_id->len, >> + dev_id->data); >> + break; >> + default: >> + break; >> + } >> + if (current_id == 3) >> + break; >> + pos += (dev_id->len + sizeof(struct bl_dev_id) - >> + sizeof(unsigned char)); >> + } >> + out: >> + if (!serial_out) >> + serial_out = bl_create_scsi_string(strlen(filename), filename); >> + if (buffer) >> + free(buffer); >> + return serial_out; >> +} >> diff --git a/utils/blkmapd/device-process.c b/utils/blkmapd/device-process.c >> new file mode 100644 >> index 0000000..9e91840 >> --- /dev/null >> +++ b/utils/blkmapd/device-process.c >> @@ -0,0 +1,394 @@ >> +/* >> + * device-process.c: detailed processing of device information sent >> + * from kernel. >> + * >> + * Copyright (c) 2006 The Regents of the University of Michigan. >> + * All rights reserved. >> + * >> + * Andy Adamson <andros@citi.umich.edu> >> + * Fred Isaman <iisaman@umich.edu> >> + * >> + * Copyright (c) 2010 EMC Corporation, Haiying Tang <Tang_Haiying@emc.com> >> + * >> + * Used codes in linux/fs/nfs/blocklayout/blocklayoutdev.c. >> + * >> + * Redistribution and use in source and binary forms, with or without >> + * modification, are permitted provided that the following conditions >> + * are met: >> + * 1. Redistributions of source code must retain the above copyright >> + * notice, this list of conditions and the following disclaimer. >> + * 2. Redistributions in binary form must reproduce the above copyright >> + * notice, this list of conditions and the following disclaimer in the >> + * documentation and/or other materials provided with the distribution. >> + * >> + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR >> + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES >> + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. >> + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, >> + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT >> + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, >> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY >> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT >> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF >> + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. >> + */ >> + >> +#include <libdevmapper.h> >> +#include <stdio.h> >> +#include <stdlib.h> >> +#include <string.h> >> +#include <unistd.h> >> +#include <sys/types.h> >> +#include <sys/stat.h> >> +#include <sys/user.h> >> +#include <fcntl.h> >> +#include <errno.h> >> +#include <arpa/inet.h> >> +#include <linux/kdev_t.h> >> +#include "device-discovery.h" >> + >> +uint32_t *blk_overflow(uint32_t * p, uint32_t * end, size_t nbytes) >> +{ >> + uint32_t *q = p + ((nbytes + 3) >> 2); >> + if (q > end || q < p) >> + return NULL; >> + return p; >> +} >> + >> +static int decode_blk_signature(uint32_t **pp, uint32_t *end, >> + struct bl_sig *sig) >> +{ >> + int i, tmp; >> + uint32_t *p = *pp; >> + >> + BLK_READBUF(p, end, 4); >> + READ32(sig->si_num_comps); >> + if (sig->si_num_comps == 0) { >> + BL_LOG_ERR("0 components in sig\n"); >> + goto out_err; >> + } >> + if (sig->si_num_comps >= BLOCK_MAX_SIG_COMP) { >> + BL_LOG_ERR("number of sig comps %i >= BLOCK_MAX_SIG_COMP\n", >> + sig->si_num_comps); >> + goto out_err; >> + } >> + for (i = 0; i < sig->si_num_comps; i++) { >> + BLK_READBUF(p, end, 12); >> + READ64(sig->si_comps[i].bs_offset); >> + READ32(tmp); >> + sig->si_comps[i].bs_length = tmp; >> + BLK_READBUF(p, end, tmp); >> + /* Note we rely here on fact that sig is used immediately >> + * for mapping, then thrown away. >> + */ >> + sig->si_comps[i].bs_string = (char *)p; >> + BL_LOG_ERR("%s: si_comps[%d]: bs_length %d, bs_string %s\n", >> + __func__, i, sig->si_comps[i].bs_length, >> + sig->si_comps[i].bs_string); >> + p += ((tmp + 3) >> 2); >> + } >> + *pp = p; >> + return 0; >> + out_err: >> + return -EIO; >> +} >> + >> +/* Read signature from device >> + * return 0: read successfully >> + * return -1: error >> + */ >> +int >> +read_cmp_blk_sig(const char *dev_name, struct bl_sig_comp *comp, >> + int64_t bs_offset) >> +{ >> + int fd, ret = -1; >> + char *sig = NULL; >> + >> + fd = open(dev_name, O_RDONLY | O_LARGEFILE); >> + if (fd < 0) { >> + BL_LOG_ERR("%s could not be opened for read\n", dev_name); >> + goto error; >> + } >> + >> + sig = (char *)malloc(comp->bs_length); >> + if (!sig) { >> + BL_LOG_ERR("%s: Out of memory\n", __func__); >> + goto error; >> + } >> + >> + if (lseek64(fd, bs_offset, SEEK_SET) == -1) { >> + BL_LOG_ERR("File %s lseek error\n", dev_name); >> + goto error; >> + } >> + >> + if (atomicio(read, fd, sig, comp->bs_length) != comp->bs_length) { >> + BL_LOG_ERR("File %s read error\n", dev_name); >> + goto error; >> + } >> + >> + BL_LOG_ERR >> + ("%s: %s sig: %s, bs_string: %s, bs_length: %d, bs_offset: %lld\n", >> + __func__, dev_name, sig, comp->bs_string, comp->bs_length, >> + (long long)bs_offset); >> + ret = memcmp(sig, comp->bs_string, comp->bs_length); >> + >> + error: >> + if (sig) >> + free(sig); >> + if (fd >= 0) >> + close(fd); >> + return ret; >> +} >> + >> +/* >> + * All signatures in sig must be found on disk for verification. >> + * Returns True if sig matches, False otherwise. >> + */ >> +static int verify_sig(struct bl_disk *disk, struct bl_sig *sig) >> +{ >> + struct bl_sig_comp *comp; >> + int i, ret; >> + int64_t bs_offset; >> + >> + for (i = 0; i < sig->si_num_comps; i++) { >> + comp = &sig->si_comps[i]; >> + bs_offset = comp->bs_offset; >> + if (bs_offset < 0) >> + bs_offset += (((int64_t) disk->size) << 9); >> + BL_LOG_ERR("%s: bs_offset: %lld\n", >> + __func__, (long long) bs_offset); >> + ret = read_cmp_blk_sig(disk->valid_path->full_path, >> + comp, bs_offset); >> + if (ret) >> + return 0; >> + } >> + return 1; >> +} >> + >> +/* >> + * map_sig_to_device() >> + * Given a signature, walk the list of visible disks searching for >> + * a match. Returns True if mapping was done, False otherwise. >> + * >> + * While we're at it, fill in the vol->bv_size. >> + */ >> +static int map_sig_to_device(struct bl_sig *sig, struct bl_volume *vol) >> +{ >> + int mapped = 0; >> + struct bl_disk *disk = visible_disk_list; >> + char *filepath = 0; >> + struct bl_disk *lolDisk = disk; >> + >> + while (lolDisk) { >> + BL_LOG_ERR("%s: visible_disk_list: %s\n", __func__, >> + lolDisk->valid_path->full_path); >> + lolDisk = lolDisk->next; >> + } >> + >> + /* scan disk list to find out match device */ >> + while (disk) { >> + /* FIXME: should we use better algorithm for disk scan? */ >> + mapped = verify_sig(disk, sig); >> + if (mapped) { >> + vol->param.bv_dev = disk->dev; >> + filepath = disk->valid_path->full_path; >> + vol->bv_size = disk->size; >> + break; >> + } >> + disk = disk->next; >> + } >> + return mapped; >> +} >> + >> +/* We are given an array of XDR encoded array indices, each of which should >> + * refer to a previously decoded device. Translate into a list of pointers >> + * to the appropriate pnfs_blk_volume's. >> + */ >> +static int set_vol_array(uint32_t **pp, uint32_t *end, >> + struct bl_volume *vols, int working) >> +{ >> + int i, index; >> + uint32_t *p = *pp; >> + struct bl_volume **array = vols[working].bv_vols; >> + for (i = 0; i < vols[working].bv_vol_n; i++) { >> + BLK_READBUF(p, end, 4); >> + READ32(index); >> + if ((index < 0) || (index >= working)) { >> + BL_LOG_ERR("set_vol_array: Id %i out of range\n", >> + index); >> + goto out_err; >> + } >> + array[i] = &vols[index]; >> + } >> + *pp = p; >> + return 0; >> + out_err: >> + return -EIO; >> +} >> + >> +static uint64_t sum_subvolume_sizes(struct bl_volume *vol) >> +{ >> + int i; >> + uint64_t sum = 0; >> + for (i = 0; i < vol->bv_vol_n; i++) >> + sum += vol->bv_vols[i]->bv_size; >> + return sum; >> +} >> + >> +static int decode_blk_volume(uint32_t **pp, uint32_t *end, >> + struct bl_volume *vols, int i, int *array_cnt) >> +{ >> + int status = 0, j; >> + struct bl_sig sig; >> + uint32_t *p = *pp; >> + struct bl_volume *vol = &vols[i]; >> + uint64_t tmp, tmp_size; >> + div_t d; >> + >> + BLK_READBUF(p, end, 4); >> + READ32(vol->bv_type); >> + switch (vol->bv_type) { >> + case BLOCK_VOLUME_SIMPLE: >> + *array_cnt = 0; >> + status = decode_blk_signature(&p, end, &sig); >> + if (status) >> + return status; >> + status = map_sig_to_device(&sig, vol); >> + if (!status) { >> + BL_LOG_ERR("Could not find disk for device\n"); >> + return -ENXIO; >> + } >> + status = 0; >> + break; >> + case BLOCK_VOLUME_SLICE: >> + BLK_READBUF(p, end, 16); >> + READ_SECTOR(vol->param.bv_offset); >> + READ_SECTOR(vol->bv_size); >> + *array_cnt = vol->bv_vol_n = 1; >> + status = set_vol_array(&p, end, vols, i); >> + break; >> + case BLOCK_VOLUME_STRIPE: >> + BLK_READBUF(p, end, 8); >> + READ_SECTOR(vol->param.bv_stripe_unit); >> + off_t chunksize = vol->param.bv_stripe_unit; >> + if ((chunksize == 0) || >> + ((chunksize & (chunksize - 1)) != 0) || >> + (chunksize < (PAGE_SIZE >> 9))) >> + return -EIO; >> + BLK_READBUF(p, end, 4); >> + READ32(vol->bv_vol_n); >> + if (!vol->bv_vol_n) >> + return -EIO; >> + *array_cnt = vol->bv_vol_n; >> + status = set_vol_array(&p, end, vols, i); >> + if (status) >> + return status; >> + for (j = 1; j < vol->bv_vol_n; j++) { >> + if (vol->bv_vols[j]->bv_size != >> + vol->bv_vols[0]->bv_size) { >> + BL_LOG_ERR("varying subvol size\n"); >> + return -EIO; >> + } >> + } >> + /* Make sure total size only includes addressable areas */ >> + tmp_size = vol->bv_vols[0]->bv_size; >> + d = div(tmp_size, (uint32_t) vol->param.bv_stripe_unit); >> + tmp_size = d.quot; >> + vol->bv_size = tmp_size * vol->param.bv_stripe_unit; >> + break; >> + case BLOCK_VOLUME_CONCAT: >> + BLK_READBUF(p, end, 4); >> + READ32(vol->bv_vol_n); >> + if (!vol->bv_vol_n) >> + return -EIO; >> + *array_cnt = vol->bv_vol_n; >> + status = set_vol_array(&p, end, vols, i); >> + if (status) >> + return status; >> + vol->bv_size = sum_subvolume_sizes(vol); >> + break; >> + default: >> + BL_LOG_ERR("Unknown volume type %i\n", vol->bv_type); >> + out_err: >> + return -EIO; >> + } >> + *pp = p; >> + return status; >> +} >> + >> +uint64_t process_deviceinfo(const char *dev_addr_buf, >> + unsigned int dev_addr_len, >> + uint32_t *major, uint32_t *minor) >> +{ >> + int num_vols, i, status, count; >> + uint32_t *p, *end; >> + struct bl_volume *vols = NULL, **arrays = NULL, **arrays_ptr = NULL; >> + uint64_t dev = 0; >> + int tried = 0; >> + >> + restart: >> + p = (uint32_t *) dev_addr_buf; >> + end = (uint32_t *) ((char *)p + dev_addr_len); >> + /* Decode block volume */ >> + BLK_READBUF(p, end, 4); >> + READ32(num_vols); >> + if (num_vols <= 0) { >> + BL_LOG_WARNING("Error: number of vols: %d\n", num_vols); >> + goto out_err; >> + } >> + >> + vols = (struct bl_volume *)malloc(num_vols * sizeof(struct bl_volume)); >> + if (!vols) { >> + BL_LOG_ERR("%s: Out of memory\n", __func__); >> + goto out_err; >> + } >> + >> + /* Each volume in vols array needs its own array. Save time by >> + * allocating them all in one large hunk. Because each volume >> + * array can only reference previous volumes, and because once >> + * a concat or stripe references a volume, it may never be >> + * referenced again, the volume arrays are guaranteed to fit >> + * in the suprisingly small space allocated. >> + */ >> + arrays = >> + (struct bl_volume **)malloc(num_vols * 2 * >> + sizeof(struct bl_volume *)); >> + if (!arrays) { >> + BL_LOG_ERR("%s: Out of memory\n", __func__); >> + goto out_err; >> + } >> + >> + arrays_ptr = arrays; >> + >> + for (i = 0; i < num_vols; i++) { >> + vols[i].bv_vols = arrays_ptr; >> + status = decode_blk_volume(&p, end, vols, i, &count); >> + if (status == -ENXIO && (tried <= 5)) { >> + sleep(1); >> + BL_LOG_DEBUG("%s: discover again!\n", __func__); >> + bl_discover_devices(); >> + tried++; >> + free(vols); >> + free(arrays); >> + goto restart; >> + } >> + if (status) >> + goto out_err; >> + arrays_ptr += count; >> + } >> + >> + if (p != end) { >> + BL_LOG_ERR("p is not equal to end!\n"); >> + goto out_err; >> + } >> + >> + dev = dm_device_create(vols, num_vols); >> + *major = MAJOR(dev); >> + *minor = MINOR(dev); >> + out_err: >> + if (vols) >> + free(vols); >> + if (arrays) >> + free(arrays); >> + return dev; >> +} >> diff --git a/utils/blkmapd/dm-device.c b/utils/blkmapd/dm-device.c >> new file mode 100644 >> index 0000000..8162706 >> --- /dev/null >> +++ b/utils/blkmapd/dm-device.c >> @@ -0,0 +1,509 @@ >> +/* >> + * dm-device.c: create or remove device via device mapper API. >> + * >> + * Copyright (c) 2010 EMC Corporation, Haiying Tang <Tang_Haiying@emc.com> >> + * All rights reserved. >> + * >> + * Redistribution and use in source and binary forms, with or without >> + * modification, are permitted provided that the following conditions >> + * are met: >> + * 1. Redistributions of source code must retain the above copyright >> + * notice, this list of conditions and the following disclaimer. >> + * 2. Redistributions in binary form must reproduce the above copyright >> + * notice, this list of conditions and the following disclaimer in the >> + * documentation and/or other materials provided with the distribution. >> + * >> + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR >> + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES >> + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. >> + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, >> + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT >> + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, >> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY >> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT >> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF >> + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. >> + */ >> +#include <libdevmapper.h> >> +#include <stdio.h> >> +#include <stdlib.h> >> +#include <string.h> >> +#include <sys/types.h> >> +#include <sys/stat.h> >> +#include <fcntl.h> >> +#include <errno.h> >> +#include <linux/kdev_t.h> >> +#include "device-discovery.h" >> + >> +#define DM_DEV_NAME_LEN 256 >> + >> +#ifndef DM_MAX_TYPE_NAME >> +#define DM_MAX_TYPE_NAME 16 >> +#endif >> + >> +#define DM_PARAMS_LEN 512 /* XXX: is this enough for target? */ >> +#define DM_DIR "/dev/mapper" >> +#define DM_DIR_LEN12 >> +#define TYPE_HAS_DEV(type) ((type == BLOCK_VOLUME_SIMPLE) || \ >> + (type == BLOCK_VOLUME_PSEUDO)) >> + >> +struct bl_dm_table { >> + uint64_t offset; >> + uint64_t size; >> + char target_type[DM_MAX_TYPE_NAME]; >> + char params[DM_PARAMS_LEN]; >> + struct bl_dm_table *next; >> +}; >> + >> +struct bl_dm_tree { >> + uint64_t dev; >> + struct dm_tree *tree; >> + struct bl_dm_tree *next; >> +}; >> + >> +static inline struct bl_dm_table *bl_dm_table_alloc(void) >> +{ >> + return (struct bl_dm_table *)calloc(1, sizeof(struct bl_dm_table)); >> +} >> + >> +void bl_dm_table_free(struct bl_dm_table *bl_table_head) >> +{ >> + struct bl_dm_table *p = bl_table_head; >> + while (bl_table_head) { >> + p = bl_table_head->next; >> + free(bl_table_head); >> + bl_table_head = p; >> + } >> +} >> + >> +void add_to_bl_dm_table(struct bl_dm_table **bl_table_head, >> + struct bl_dm_table *table) >> +{ >> + struct bl_dm_table *pre; >> + if (!*bl_table_head) { >> + *bl_table_head = table; >> + return; >> + } >> + pre = *bl_table_head; >> + while (pre->next) >> + pre = pre->next; >> + pre->next = table; >> + return; >> +} >> + >> +struct bl_dm_tree *bl_tree_head; >> + >> +struct bl_dm_tree *find_bl_dm_tree(uint64_t dev) >> +{ >> + struct bl_dm_tree *p = bl_tree_head; >> + while (p) { >> + if (p->dev == dev) >> + return p; >> + p = p->next; >> + } >> + return NULL; >> +} >> + >> +void del_from_bl_dm_tree(uint64_t dev) >> +{ >> + struct bl_dm_tree *pre = bl_tree_head; >> + struct bl_dm_tree *p; >> + >> + p = pre; >> + while (p) { >> + if (p->dev == dev) { >> + pre->next = p->next; >> + if (p == bl_tree_head) >> + bl_tree_head = bl_tree_head->next; >> + free(p); >> + break; >> + } >> + pre = p; >> + p = pre->next; >> + } >> +} >> + >> +void add_to_bl_dm_tree(struct bl_dm_tree *tree) >> +{ >> + struct bl_dm_tree *pre; >> + if (!bl_tree_head) { >> + bl_tree_head = tree; >> + return; >> + } >> + pre = bl_tree_head; >> + while (pre->next) >> + pre = pre->next; >> + pre->next = tree; >> + return; >> +} >> + >> +/* Create device via device mapper >> + * return 0 when creation failed >> + * return dev no for created device >> + */ >> +uint64_t dm_single_device_create(const char *dev_name, struct bl_dm_table * p) >> +{ >> + struct dm_task *dmt; >> + struct dm_info dminfo; >> + int ret = 0; >> + >> + dmt = dm_task_create(DM_DEVICE_CREATE); >> + if (!dmt) { >> + BL_LOG_ERR("Create dm_task for %s failed\n", dev_name); >> + return 0; >> + } >> + ret = dm_task_set_name(dmt, dev_name); >> + if (!ret) >> + goto err_out; >> + >> + while (p) { >> + ret = dm_task_add_target(dmt, p->offset, p->size, >> + p->target_type, p->params); >> + if (!ret) >> + goto err_out; >> + p = p->next; >> + } >> + >> + ret = dm_task_run(dmt) && >> + dm_task_get_info(dmt, &dminfo) && dminfo.exists; >> + >> + if (!ret) >> + goto err_out; >> + >> + dm_task_update_nodes(); >> + >> + err_out: >> + dm_task_destroy(dmt); >> + >> + if (!ret) { >> + BL_LOG_ERR("Create device %s failed\n", dev_name); >> + return 0; >> + } >> + return MKDEV(dminfo.major, dminfo.minor); >> +} >> + >> +int dm_device_remove_byname(const char *dev_name) >> +{ >> + struct dm_task *dmt; >> + int ret = 0; >> + >> + dmt = dm_task_create(DM_DEVICE_REMOVE); >> + if (!dmt) >> + return -ENODEV; >> + >> + ret = dm_task_set_name(dmt, dev_name) && dm_task_run(dmt); >> + >> + dm_task_update_nodes(); >> + >> + if (dmt) >> + dm_task_destroy(dmt); >> + >> + return ret; >> +} >> + >> +int dm_device_remove(uint64_t dev) >> +{ >> + struct dm_task *dmt; >> + struct dm_names *dmnames; >> + char *names = NULL; >> + int ret = -1; >> + >> + /* Look for dev_name via dev, if dev_name could be transferred here, >> + we could jump to DM_DEVICE_REMOVE directly */ >> + dmt = dm_task_create(DM_DEVICE_LIST); >> + if (!dmt) { >> + BL_LOG_ERR("dm_task creation failed\n"); >> + return -ENODEV; >> + } >> + >> + ret = dm_task_run(dmt); >> + if (!ret) { >> + BL_LOG_ERR("dm_task_run failed\n"); >> + goto error; >> + } >> + >> + dmnames = dm_task_get_names(dmt); >> + if (!dmnames || !dmnames->dev) { >> + BL_LOG_ERR("dm_task_get_names failed\n"); >> + goto error; >> + } >> + >> + do { >> + if (dmnames->dev == dev) { >> + names = dmnames->name; >> + break; >> + } >> + dmnames = (void *)dmnames + dmnames->next; >> + } while (dmnames); >> + >> + if (!names) { >> + BL_LOG_ERR("Could not find device\n"); >> + goto error; >> + } >> + >> + dm_task_update_nodes(); >> + >> + error: >> + dm_task_destroy(dmt); >> + >> + /* Start to remove device */ >> + if (names) >> + ret = dm_device_remove_byname(names); >> + return ret; >> +} >> + >> +static unsigned long dev_count; >> + >> +void dm_devicelist_remove(unsigned long start, unsigned long end) >> +{ >> + char dev_name[DM_DEV_NAME_LEN]; >> + unsigned long count; >> + >> + if ((start >= dev_count) || (end <= 1) || (start >= end - 1)) >> + return; >> + >> + for (count = end - 1; count > start; count--) { >> + sprintf(dev_name, "pnfs_vol_%lu", count - 1); >> + dm_device_remove_byname(dev_name); >> + } >> + >> + return; >> +} >> + >> +void bl_dm_remove_tree(uint64_t dev) >> +{ >> + struct bl_dm_tree *p; >> + >> + p = find_bl_dm_tree(dev); >> + if (!p) >> + return; >> + >> + dm_tree_free(p->tree); >> + del_from_bl_dm_tree(dev); >> +} >> + >> +void bl_dm_create_tree(uint64_t dev) >> +{ >> + struct dm_tree *tree; >> + struct bl_dm_tree *bl_tree; >> + >> + bl_tree = find_bl_dm_tree(dev); >> + if (bl_tree) >> + return; /* XXX: error? */ >> + >> + tree = dm_tree_create(); >> + if (!tree) >> + return; >> + >> + if (!dm_tree_add_dev(tree, MAJOR(dev), MINOR(dev))) { >> + dm_tree_free(tree); >> + return; >> + } >> + >> + bl_tree = malloc(sizeof(struct bl_dm_tree)); >> + if (!bl_tree) { >> + dm_tree_free(tree); >> + return; >> + } >> + >> + bl_tree->dev = dev; >> + bl_tree->tree = tree; >> + bl_tree->next = NULL; >> + add_to_bl_dm_tree(bl_tree); >> + >> + return; >> +} >> + >> +uint64_t dm_device_nametodev(char *dev_name) >> +{ >> + struct dm_task *dmt; >> + int ret = 0; >> + struct dm_info dminfo; >> + >> + dmt = dm_task_create(DM_DEVICE_INFO); >> + if (!dmt) >> + return -ENODEV; >> + >> + ret = dm_task_set_name(dmt, dev_name) && >> + dm_task_run(dmt) && dm_task_get_info(dmt, &dminfo); >> + >> + if (dmt) >> + dm_task_destroy(dmt); >> + >> + if (!ret) >> + return 0; >> + >> + return MKDEV(dminfo.major, dminfo.minor); >> +} >> + >> +int dm_device_remove_all(uint64_t *dev) >> +{ >> + struct bl_dm_tree *p; >> + struct dm_tree_node *node; >> + const char *uuid; >> + int ret = 0; >> + uint32_t major, minor; >> + uint64_t bl_dev; >> + >> + memcpy(&major, dev, sizeof(uint32_t)); >> + memcpy(&minor, (void *)dev + sizeof(uint32_t), sizeof(uint32_t)); >> + bl_dev = MKDEV(major, minor); >> + p = find_bl_dm_tree(bl_dev); >> + if (!p) >> + return ret; >> + >> + node = dm_tree_find_node(p->tree, MAJOR(bl_dev), MINOR(bl_dev)); >> + if (!node) >> + return ret; >> + >> + uuid = dm_tree_node_get_uuid(node); >> + if (!uuid) >> + return ret; >> + >> + dm_device_remove(bl_dev); >> + ret = dm_tree_deactivate_children(node, uuid, strlen(uuid)); >> + dm_task_update_nodes(); >> + bl_dm_remove_tree(bl_dev); >> + return ret; >> +} >> + >> +/* TODO: check the value for DM_DEV_NAME_LEN, DM_TYPE_LEN, DM_PARAMS_LEN */ >> +uint64_t dm_device_create(struct bl_volume *vols, int num_vols) >> +{ >> + uint64_t size, dev = 0; >> + unsigned long count = dev_count; >> + int number = 0, i, pos; >> + struct bl_volume *node; >> + char *tmp; >> + struct bl_dm_table *table = NULL; >> + struct bl_dm_table *bl_table_head = NULL; >> + unsigned int len; >> + char *dev_name = NULL; >> + /* Create pseudo device here */ >> + while (number < num_vols) { >> + node = &vols[number]; >> + switch (node->bv_type) { >> + case BLOCK_VOLUME_SIMPLE: >> + /* Do not need to create device here */ >> + dev = node->param.bv_dev; >> + goto continued; >> + case BLOCK_VOLUME_SLICE: >> + table = bl_dm_table_alloc(); >> + if (!table) >> + goto out; >> + table->offset = 0; >> + table->size = node->bv_size; >> + strcpy(table->target_type, "linear"); >> + if (!TYPE_HAS_DEV(node->bv_vols[0]->bv_type)) { >> + free(table); >> + goto out; >> + } >> + dev = node->bv_vols[0]->param.bv_dev; >> + tmp = table->params; >> + if (!dm_format_dev(tmp, DM_PARAMS_LEN, >> + MAJOR(dev), MINOR(dev))) { >> + free(table); >> + goto out; >> + } >> + tmp += strlen(tmp); >> + sprintf(tmp, " %lu", node->param.bv_offset); >> + add_to_bl_dm_table(&bl_table_head, table); >> + break; >> + case BLOCK_VOLUME_STRIPE: >> + table = bl_dm_table_alloc(); >> + if (!table) >> + goto out; >> + table->offset = 0; >> + table->size = node->bv_size; >> + strcpy(table->target_type, "striped"); >> + sprintf(table->params, "%d %lu %n", node->bv_vol_n, >> + node->param.bv_stripe_unit, &pos); >> + /* Repeatedly copy subdev to params */ >> + tmp = table->params + pos; >> + len = DM_PARAMS_LEN - pos; >> + for (i = 0; i < node->bv_vol_n; i++) { >> + if (!TYPE_HAS_DEV(node->bv_vols[i]->bv_type)) { >> + free(table); >> + goto out; >> + } >> + dev = node->bv_vols[i]->param.bv_dev; >> + if (!dm_format_dev(tmp, len, MAJOR(dev), >> + MINOR(dev))) { >> + free(table); >> + goto out; >> + } >> + pos = strlen(tmp); >> + tmp += pos; >> + len -= pos; >> + sprintf(tmp, " %d ", 0); >> + tmp += 3; >> + len -= 3; >> + } >> + add_to_bl_dm_table(&bl_table_head, table); >> + break; >> + case BLOCK_VOLUME_CONCAT: >> + size = 0; >> + for (i = 0; i < node->bv_vol_n; i++) { >> + table = bl_dm_table_alloc(); >> + if (!table) >> + goto out; >> + table->offset = size; >> + table->size = node->bv_vols[i]->bv_size; >> + if (!TYPE_HAS_DEV(node->bv_vols[i]->bv_type)) { >> + free(table); >> + goto out; >> + } >> + strcpy(table->target_type, "linear"); >> + tmp = table->params; >> + dev = node->bv_vols[i]->param.bv_dev; >> + if (!dm_format_dev(tmp, DM_PARAMS_LEN, >> + MAJOR(dev), MINOR(dev))) { >> + free(table); >> + goto out; >> + } >> + tmp += strlen(tmp); >> + sprintf(tmp, " %d", 0); >> + size += table->size; >> + add_to_bl_dm_table(&bl_table_head, table); >> + } >> + break; >> + default: >> + /* Delete previous temporary devices */ >> + dm_devicelist_remove(count, dev_count); >> + goto out; >> + } /* end of swtich */ >> + /* Create dev_name here. Name of device is pnfs_vol_XXX */ >> + if (dev_name) >> + free(dev_name); >> + dev_name = (char *)calloc(DM_DEV_NAME_LEN, sizeof(char)); >> + if (!dev_name) { >> + BL_LOG_ERR("%s: Out of memory\n", __func__); >> + goto out; >> + } >> + sprintf(dev_name, "pnfs_vol_%lu", dev_count++); >> + >> + dev = dm_single_device_create(dev_name, bl_table_head); >> + if (!dev) { >> + /* Delete previous temporary devices */ >> + dm_devicelist_remove(count, dev_count); >> + goto out; >> + } >> + node->param.bv_dev = dev; >> + /* TODO: extend use with PSEUDO later */ >> + node->bv_type = BLOCK_VOLUME_PSEUDO; >> + continued: >> + number++; >> + if (bl_table_head) >> + bl_dm_table_free(bl_table_head); >> + bl_table_head = NULL; >> + } >> + out: >> + if (bl_table_head) >> + bl_dm_table_free(bl_table_head); >> + bl_table_head = NULL; >> + if (dev) >> + bl_dm_create_tree(dev); >> + if (dev_name) >> + free(dev_name); >> + return dev; >> +} >> diff --git a/utils/blkmapd/etc/blkmapd.conf b/utils/blkmapd/etc/blkmapd.conf >> new file mode 100644 >> index 0000000..da70d94 >> --- /dev/null >> +++ b/utils/blkmapd/etc/blkmapd.conf >> @@ -0,0 +1,10 @@ >> +# This is an example config file >> + >> +# Look at all /dev/sd* devices >> +# /dev/sd or /dev/sd* >> +/dev/sd* >> + >> +# Look at all /dev/mapper/* devices >> +# /dev/mapper/* or >> +# /dev/mapper/ >> +/dev/mapper/* >> diff --git a/utils/blkmapd/etc/initd/initd.redhat b/utils/blkmapd/etc/initd/initd.redhat >> new file mode 100644 >> index 0000000..d6a77e8 >> --- /dev/null >> +++ b/utils/blkmapd/etc/initd/initd.redhat >> @@ -0,0 +1,76 @@ >> +#!/bin/sh >> +# >> +# description: Starts and stops the iSCSI initiator >> +# >> +# processname: blkmapd >> +# pidfile: /var/run/blkmapd.pid >> +# config: /etc/blkmapd.conf >> + >> +# Source function library. >> +if [ -f /etc/init.d/functions ] ; then >> + . /etc/init.d/functions >> +elif [ -f /etc/rc.d/init.d/functions ] ; then >> + . /etc/rc.d/init.d/functions >> +else >> + exit 0 >> +fi >> + >> +PATH=/sbin:/bin:/usr/sbin:/usr/bin >> + >> +RETVAL=0 >> + >> +start() >> +{ >> + echo -n $"Starting pNFS block-layout device discovery service: " >> + modprobe -q blocklayoutdriver >> + daemon /usr/sbin/blkmapd >> + RETVAL=$? >> + if [ $RETVAL -eq 0 ]; then >> + touch /var/lock/subsys/blkmapd >> + fi >> + echo >> + return $RETVAL >> +} >> + >> +stop() >> +{ >> + echo -n $"Stopping pNFS block-layout device discovery service: " >> + killproc blkmapd 2> /dev/null >> + rm -f /var/run/blkmapd.pid >> + RETVAL=$? >> + [ $RETVAL -eq 0 ] && rm -f /var/lock/subsys/blkmapd >> + if [ $RETVAL -eq 0 ]; then >> + echo_success >> + else >> + echo_failure >> + fi >> + echo >> + return $RETVAL >> +} >> + >> +restart() >> +{ >> + stop >> + start >> +} >> + >> +case "$1" in >> + start) >> + start >> + ;; >> + stop) >> + stop >> + ;; >> + restart) >> + stop >> + start >> + ;; >> + status) >> + status blkmapd >> + ;; >> + *) >> + echo $"Usage: $0 {start|stop|restart|status}" >> + exit 1 >> +esac >> + >> +exit $RETVAL > -- > To unsubscribe from this list: send the line "unsubscribe linux-nfs" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html ^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH] Add complex block layout discovery and mapping daemon
@ 2010-07-21 22:31 Jim Rees
[not found] ` <20100721223119.GA6618-8f4Pc2RrbJmHXe+LvDLADg@public.gmane.org>
0 siblings, 1 reply; 5+ messages in thread
From: Jim Rees @ 2010-07-21 22:31 UTC (permalink / raw)
To: bhalevy; +Cc: linux-nfs
Signed-off-by: Haiying Tang <Tang_Haiying@emc.com>
Signed-off-by: Eric Anderle <eanderle@umich.edu>
Signed-off-by: Jim Rees <rees@umich.edu>
---
configure.ac | 4 +
utils/Makefile.am | 4 +
utils/blkmapd/Makefile.am | 63 ++++
utils/blkmapd/atomicio.c | 58 ++++
utils/blkmapd/cfg.c | 272 +++++++++++++++++
utils/blkmapd/cfg.h | 48 +++
utils/blkmapd/device-discovery.c | 542 ++++++++++++++++++++++++++++++++++
utils/blkmapd/device-discovery.h | 162 ++++++++++
utils/blkmapd/device-inq.c | 235 +++++++++++++++
utils/blkmapd/device-process.c | 391 ++++++++++++++++++++++++
utils/blkmapd/dm-device.c | 509 +++++++++++++++++++++++++++++++
utils/blkmapd/etc/initd/initd.redhat | 76 +++++
utils/blkmapd/etc/pnfs-block.conf | 10 +
13 files changed, 2374 insertions(+), 0 deletions(-)
create mode 100644 utils/blkmapd/Makefile.am
create mode 100644 utils/blkmapd/atomicio.c
create mode 100644 utils/blkmapd/cfg.c
create mode 100644 utils/blkmapd/cfg.h
create mode 100644 utils/blkmapd/device-discovery.c
create mode 100644 utils/blkmapd/device-discovery.h
create mode 100644 utils/blkmapd/device-inq.c
create mode 100644 utils/blkmapd/device-process.c
create mode 100644 utils/blkmapd/dm-device.c
create mode 100644 utils/blkmapd/etc/initd/initd.redhat
create mode 100644 utils/blkmapd/etc/pnfs-block.conf
diff --git a/configure.ac b/configure.ac
index 4d12715..f57cd45 100644
--- a/configure.ac
+++ b/configure.ac
@@ -64,12 +64,15 @@ AC_ARG_ENABLE(nfsv4,
enable_nfsv4=yes)
if test "$enable_nfsv4" = yes; then
AC_DEFINE(NFS4_SUPPORTED, 1, [Define this if you want NFSv4 support compiled in])
+ BLKMAPD=blkmapd
IDMAPD=idmapd
SPNFSD=spnfsd
else
enable_nfsv4=
+ BLKMAPD=
IDMAPD=
fi
+ AC_SUBST(BLKMAPD)
AC_SUBST(IDMAPD)
AC_SUBST(enable_nfsv4)
AM_CONDITIONAL(CONFIG_NFSV4, [test "$enable_nfsv4" = "yes"])
@@ -429,6 +432,7 @@ AC_CONFIG_FILES([
tools/mountstats/Makefile
tools/nfs-iostat/Makefile
utils/Makefile
+ utils/blkmapd/Makefile
utils/exportfs/Makefile
utils/gssd/Makefile
utils/idmapd/Makefile
diff --git a/utils/Makefile.am b/utils/Makefile.am
index c777d21..c33835a 100644
--- a/utils/Makefile.am
+++ b/utils/Makefile.am
@@ -10,6 +10,10 @@ if CONFIG_NFSV4
OPTDIRS += spnfsd
endif
+if CONFIG_NFSV4
+OPTDIRS += blkmapd
+endif
+
if CONFIG_GSS
OPTDIRS += gssd
endif
diff --git a/utils/blkmapd/Makefile.am b/utils/blkmapd/Makefile.am
new file mode 100644
index 0000000..e8c9fc0
--- /dev/null
+++ b/utils/blkmapd/Makefile.am
@@ -0,0 +1,63 @@
+## Process this file with automake to produce Makefile.in
+
+#man8_MANS = blkmapd.man
+
+RPCPREFIX = rpc.
+KPREFIX = @kprefix@
+sbin_PROGRAMS = blkmapd
+
+blkmapd_SOURCES = \
+ atomicio.c \
+ cfg.c \
+ device-discovery.c \
+ device-inq.c \
+ device-process.c \
+ dm-device.c \
+ \
+ cfg.h \
+ device-discovery.h
+
+blkmapd_LDADD = -ldevmapper ../../support/nfs/libnfs.a
+
+MAINTAINERCLEANFILES = Makefile.in
+
+#######################################################################
+# The following allows the current practice of having
+# daemons renamed during the install to include RPCPREFIX
+# and the KPREFIX
+# This could all be done much easier with program_transform_name
+# ( program_transform_name = s/^/$(RPCPREFIX)$(KPREFIX)/ )
+# but that also renames the man pages, which the current
+# practice does not do.
+install-exec-hook:
+ (cd $(DESTDIR)$(sbindir) && \
+ for p in $(sbin_PROGRAMS); do \
+ mv -f $$p$(EXEEXT) $(RPCPREFIX)$(KPREFIX)$$p$(EXEEXT) ;\
+ done)
+uninstall-hook:
+ (cd $(DESTDIR)$(sbindir) && \
+ for p in $(sbin_PROGRAMS); do \
+ rm -f $(RPCPREFIX)$(KPREFIX)$$p$(EXEEXT) ;\
+ done)
+
+
+# XXX This makes some assumptions about what automake does.
+# XXX But there is no install-man-hook or install-man-local.
+install-man: install-man8 install-man-links
+uninstall-man: uninstall-man8 uninstall-man-links
+
+install-man-links:
+ (cd $(DESTDIR)$(man8dir) && \
+ for m in $(man8_MANS) $(dist_man8_MANS) $(nodist_man8_MANS); do \
+ inst=`echo $$m | sed -e 's/man$$/8/'`; \
+ rm -f $(RPCPREFIX)$$inst ; \
+ $(LN_S) $$inst $(RPCPREFIX)$$inst ; \
+ done)
+
+uninstall-man-links:
+ (cd $(DESTDIR)$(man8dir) && \
+ for m in $(man8_MANS) $(dist_man8_MANS) $(nodist_man8_MANS); do \
+ inst=`echo $$m | sed -e 's/man$$/8/'`; \
+ rm -f $(RPCPREFIX)$$inst ; \
+ done)
+
diff --git a/utils/blkmapd/atomicio.c b/utils/blkmapd/atomicio.c
new file mode 100644
index 0000000..3c3c864
--- /dev/null
+++ b/utils/blkmapd/atomicio.c
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2002 Marius Aamodt Eriksen <marius@monkey.org>
+ * Copyright (c) 1995,1999 Theo de Raadt. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <unistd.h>
+#include <errno.h>
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif /* HAVE_CONFIG_H */
+
+/*
+ * ensure all of data on socket comes through. f==read || f==write
+ */
+ssize_t atomicio(ssize_t(*f) (int, void *, size_t), int fd, void *_s, size_t n)
+{
+ char *s = _s;
+ ssize_t res, pos = 0;
+
+ while (n > pos) {
+ res = (f) (fd, s + pos, n - pos);
+ switch (res) {
+ case -1:
+ if (errno == EINTR || errno == EAGAIN)
+ continue;
+ case 0:
+ if (pos != 0)
+ return pos;
+ return res;
+ default:
+ pos += res;
+ }
+ }
+ return pos;
+}
diff --git a/utils/blkmapd/cfg.c b/utils/blkmapd/cfg.c
new file mode 100644
index 0000000..b303352
--- /dev/null
+++ b/utils/blkmapd/cfg.c
@@ -0,0 +1,272 @@
+/*
+ * Copyright (c) 2010 EMC Corporation, Haiying Tang <Tang_Haiying@emc.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <linux/errno.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include "device-discovery.h"
+#include "cfg.h"
+
+struct scan_root_list *scan_root_list_head;
+
+void bl_release_list(void)
+{
+ struct scan_root_list *root = scan_root_list_head;
+ struct scan_device_list *disk;
+
+ while (root) {
+ disk = root->disk;
+ while (disk) {
+ root->disk = disk->next;
+ /*free disk */
+ free(disk->name);
+ free(disk);
+ disk = root->disk;
+ }
+ scan_root_list_head = root->next;
+ /*free root */
+ free(root->name);
+ free(root);
+ root = scan_root_list_head;
+ }
+ return;
+}
+
+struct scan_root_list *bl_alloc_root_list(char *name, unsigned int len)
+{
+ struct scan_root_list *root;
+
+ root = malloc(sizeof(struct scan_root_list));
+ if (!root) {
+ BL_LOG_ERR("%s: Out of memory!\n", __func__);
+ return NULL;
+ }
+
+ root->name = malloc(len + 1);
+ if (!root->name) {
+ BL_LOG_ERR("%s: Out of memory!\n", __func__);
+ goto out;
+ }
+ strncpy(root->name, name, len);
+ root->name[len] = '\0';
+ root->next = scan_root_list_head;
+ root->all_disk = 0;
+ scan_root_list_head = root;
+
+ return root;
+ out:
+ if (root)
+ free(root);
+ return NULL;
+}
+
+void bl_alloc_device_list(struct scan_root_list *root, char *name,
+ unsigned int len)
+{
+ struct scan_device_list *device;
+
+ device = malloc(sizeof(struct scan_device_list));
+ if (!device) {
+ BL_LOG_ERR("%s: Out of memory!\n", __func__);
+ return;
+ }
+
+ device->name = malloc(len + 1);
+ if (!device->name) {
+ BL_LOG_ERR("%s: Out of memory!\n", __func__);
+ goto out;
+ }
+ strncpy(device->name, name, len);
+ device->name[len] = '\0';
+ device->next = root->disk;
+ root->disk = device;
+ return;
+ out:
+ if (device)
+ free(device);
+ return;
+}
+
+void bl_set_default_conf(void)
+{
+ struct scan_root_list *root;
+
+ bl_release_list();
+
+ root = bl_alloc_root_list("/dev", 4);
+ if (root)
+ bl_alloc_device_list(root, "sd", 2);
+
+ root = bl_alloc_root_list("/dev/mapper", 11);
+ if (root)
+ root->all_disk = 1;
+ return;
+}
+
+void bl_insert_device_list(struct scan_root_list *root, char *name,
+ unsigned int len)
+{
+ struct scan_device_list *device = root->disk;
+ /* Check whether this device has been inserted */
+ while (device) {
+ if (device->name && !strcmp(device->name, name))
+ return;
+ device = device->next;
+ }
+
+ bl_alloc_device_list(root, name, len);
+
+ return;
+}
+
+struct scan_root_list *bl_insert_root_list(char *name, unsigned int len)
+{
+ struct scan_root_list *root = scan_root_list_head;
+
+ /* Check whether this root has been inserted */
+ while (root) {
+ if (!strcmp(root->name, name))
+ return root;
+ root = root->next;
+ }
+
+ root = bl_alloc_root_list(name, len);
+ return root;
+}
+
+void bl_parse_line(char *line, size_t len, struct scan_root_list **bl_root)
+{
+ char *root;
+ char *device;
+ char *end;
+
+ if (*line == '#')
+ return;
+
+ root = line;
+ while (((*root == ' ') || (*root == '\t')) && (root < line + len))
+ root++;
+ if (root == line + len)
+ return;
+
+ end = line + len;
+ while (((*end == '\n') || (*end == ' ') || (*end == '\t') ||
+ (*end == '\0')) && (end > root)) {
+ end--;
+ }
+ /* For lines ended up with "/" or "/""*": add as a dir root */
+ if ((*end == '/') ||
+ ((*end == '*') && (end - root >= 1) && (*(end - 1) == '/'))) {
+ if (*end == '*')
+ end--;
+ *bl_root = bl_insert_root_list(root, end - root + 1);
+ if (*bl_root)
+ (*bl_root)->all_disk = 1;
+ return;
+ }
+
+ /* Other lines: add as a device */
+ device = end;
+ while ((*device != '/') && (device > root))
+ device--;
+ if (device == root)
+ return;
+ *bl_root = bl_insert_root_list(root, device - root + 1);
+ if (*end == '*')
+ end--;
+ if (*bl_root)
+ bl_insert_device_list(*bl_root, device + 1, end - device);
+
+ return;
+}
+
+void bl_parse_conf(char *buf, size_t size)
+{
+ char *tmp = buf, *line = buf, *end = buf + size;
+ struct scan_root_list *bl_root = NULL;
+
+ while (tmp < end) {
+ if (*tmp == '\n') {
+ *tmp = '\0';
+ bl_parse_line(line, tmp - line, &bl_root);
+ line = tmp + 1;
+ }
+ tmp++;
+ }
+
+ return;
+}
+
+int bl_cfg_init(void)
+{
+ struct stat sb;
+ size_t size;
+ int fd;
+ char *buf = NULL;
+ int ret = -ENOENT;
+
+ if (stat(bl_conf_path, &sb) == 0) {
+ ret = -EPERM;
+ size = sb.st_size;
+ if (!size)
+ goto err_out;
+
+ fd = open(bl_conf_path, O_RDONLY, 0);
+ if (fd == -1) {
+ BL_LOG_ERR("File %s open failed\n", bl_conf_path);
+ goto err_out;
+ }
+
+ buf = calloc(size, sizeof(char));
+ if (!buf) {
+ close(fd);
+ BL_LOG_ERR("%s: Out of memory\n", __func__);
+ ret = -ENOMEM;
+ goto err_out;
+ }
+
+ if (atomicio(read, fd, buf, size) != size) {
+ close(fd);
+ BL_LOG_ERR("Read file %s failed\n", bl_conf_path);
+ goto err_out;
+ }
+
+ ret = 0;
+ close(fd);
+ bl_parse_conf(buf, size);
+ if (!scan_root_list_head)
+ ret = -EINVAL;
+ } else
+ bl_set_default_conf();
+ err_out:
+ if (buf)
+ free(buf);
+ return ret;
+}
diff --git a/utils/blkmapd/cfg.h b/utils/blkmapd/cfg.h
new file mode 100644
index 0000000..8d7bcf4
--- /dev/null
+++ b/utils/blkmapd/cfg.h
@@ -0,0 +1,48 @@
+/*
+ * bl-cfg.h
+ *
+ * Copyright (c) 2010 EMC Corporation, Haiying Tang <Tang_Haiying@emc.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef BL_CFG_H
+#define BL_CFG_H
+
+#define bl_conf_path "/etc/pnfs-block.conf"
+
+extern struct scan_root_list *scan_root_list_head;
+
+struct scan_device_list {
+ struct scan_device_list *next;
+ char *name;
+};
+
+struct scan_root_list {
+ struct scan_root_list *next;
+ unsigned int all_disk;
+ char *name;
+ struct scan_device_list *disk;
+};
+
+int bl_cfg_init(void);
+
+#endif
diff --git a/utils/blkmapd/device-discovery.c b/utils/blkmapd/device-discovery.c
new file mode 100644
index 0000000..79cb2b5
--- /dev/null
+++ b/utils/blkmapd/device-discovery.c
@@ -0,0 +1,542 @@
+/*
+ * device-discovery.c: main function, discovering device and processing
+ * pipe request from kernel.
+ *
+ * Copyright (c) 2010 EMC Corporation, Haiying Tang <Tang_Haiying@emc.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#define _LARGEFILE64_SOURCE
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <dirent.h>
+#include <ctype.h>
+#include <linux/kdev_t.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/mount.h>
+#include <sys/select.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <libgen.h>
+#include <errno.h>
+#include <scsi/scsi.h>
+#include <scsi/scsi_ioctl.h>
+#include <scsi/sg.h>
+#include "device-discovery.h"
+#include "cfg.h"
+
+#define BL_PIPE_FILE "/var/lib/nfs/rpc_pipefs/bl_device_pipe"
+#define PID_FILE "/var/run/pnfs-block.pid"
+
+struct bl_disk *visible_disk_list;
+
+struct bl_disk_path *bl_get_path(const char *filepath,
+ struct bl_disk_path *paths)
+{
+ struct bl_disk_path *tmp = paths;
+ while (tmp) {
+ if (!strcmp(tmp->full_path, filepath))
+ break;
+ tmp = tmp->next;
+ }
+ return tmp;
+}
+
+/* Check whether valid_path is a substring(partition) of path */
+int bl_is_partition(struct bl_disk_path *valid_path, struct bl_disk_path *path)
+{
+ if (!strncmp(valid_path->full_path, path->full_path,
+ strlen(valid_path->full_path)))
+ return 1;
+
+ return 0;
+}
+
+/*
+ * For multipath devices, devices state could be PASSIVE/ACTIVE/PSEUDO,
+ * where PSEUDO > ACTIVE > PASSIVE. Device with highest state is used to
+ * create pseudo device. So if state is higher, the device path needs to
+ * be updated.
+ * If device-mapper multipath support is a must, pseudo devices should
+ * exist for each multipath device. If not, active device path will be
+ * chosen for device creation.
+ * Treat partition as invalid path.
+ */
+int bl_update_path(struct bl_disk_path *path, enum bl_path_state_e state,
+ struct bl_disk *disk)
+{
+ struct bl_disk_path *valid_path = disk->valid_path;
+
+ if (valid_path) {
+ if (valid_path->state >= state) {
+ if (bl_is_partition(valid_path, path))
+ return 0;
+ }
+ }
+ return 1;
+}
+
+void bl_release_disk(void)
+{
+ struct bl_disk *disk = visible_disk_list, *tmp;
+ struct bl_disk_path *path = NULL;
+
+ while (disk) {
+ path = disk->paths;
+ while (path) {
+ disk->paths = path->next;
+ free(path->full_path);
+ free(path);
+ path = disk->paths;
+ }
+ if (disk->serial)
+ free(disk->serial);
+ tmp = disk->next;
+ free(disk);
+ disk = tmp;
+ }
+
+ visible_disk_list = NULL;
+}
+
+void bl_add_disk(char *filepath)
+{
+ struct bl_disk *disk = NULL;
+ struct bl_disk *tmp = visible_disk_list;
+ int fd = 0;
+ struct stat sb;
+ off_t size = 0;
+ struct bl_serial *serial = NULL;
+ enum bl_path_state_e ap_state = BL_PATH_STATE_PASSIVE;
+ struct bl_disk_path *diskpath = NULL, *path = NULL;
+ dev_t dev;
+
+ fd = open(filepath, O_RDONLY | O_LARGEFILE);
+ if (fd < 0)
+ return;
+
+ if (fstat(fd, &sb)) {
+ close(fd);
+ return;
+ }
+
+ if (!sb.st_size)
+ ioctl(fd, BLKGETSIZE, &size);
+ else
+ size = sb.st_size;
+
+ if (!size) {
+ close(fd);
+ return;
+ }
+
+ dev = sb.st_rdev;
+
+ serial = bldev_read_serial(fd, filepath);
+ while (tmp) {
+ /*Already scanned or a partition?
+ *XXX: if released each time, maybe not need to compare
+ */
+ if ((serial->len == tmp->serial->len) &&
+ (memcmp(serial->data, tmp->serial->data, serial->len) ==
+ 0)) {
+ diskpath = bl_get_path(filepath, tmp->paths);
+ break;
+ }
+ tmp = tmp->next;
+ }
+
+ if (tmp && diskpath) {
+ close(fd);
+ return;
+ }
+
+ bldev_read_ap_state(fd, &ap_state);
+ close(fd);
+
+ /*
+ * Not sure how to identify a pseudo device created by
+ * device-mapper, so leave /dev/mapper for now.
+ */
+ if (strncmp(filepath, "/dev/mapper", 11) == 0)
+ ap_state = BL_PATH_STATE_PSEUDO;
+
+ /*add path */
+ path = malloc(sizeof(struct bl_disk_path));
+ if (!path) {
+ BL_LOG_ERR("%s: Out of memory!\n", __func__);
+ goto out_err;
+ }
+ path->next = NULL;
+ path->state = ap_state;
+ path->full_path = strdup(filepath);
+ if (!path->full_path)
+ goto out_err;
+
+ if (!tmp) { /*add disk */
+ disk = malloc(sizeof(struct bl_disk));
+ if (!disk) {
+ BL_LOG_ERR("%s: Out of memory!\n", __func__);
+ goto out_err;
+ }
+ disk->next = visible_disk_list;
+ disk->dev = dev;
+ disk->size = size;
+ disk->serial = serial;
+ disk->valid_path = path;
+ disk->paths = path;
+ visible_disk_list = disk;
+ } else {
+ path->next = tmp->paths;
+ tmp->paths = path;
+ /*check whether we need to update disk info */
+ if (bl_update_path(path, path->state, tmp)) {
+ tmp->dev = dev;
+ tmp->size = size;
+ tmp->valid_path = path;
+ }
+ }
+ return;
+
+ out_err:
+ if (path) {
+ if (path->full_path)
+ free(path->full_path);
+ free(path);
+ }
+ if (disk) {
+ if (disk->serial)
+ free(disk->serial);
+ free(disk);
+ }
+ return;
+}
+
+void bl_devicescan(const char *filename, struct scan_root_list *root)
+{
+ /*scan all disks */
+ char filepath[PATH_MAX];
+ struct scan_device_list *device;
+
+ if (!strcmp(filename, ".") || !strcmp(filename, ".."))
+ return;
+
+ memset(filepath, 0, PATH_MAX);
+ if (strlen(filename) < (PATH_MAX - strlen(root->name) - 2))
+ sprintf(filepath, "%s/%s", root->name, filename);
+ else
+ return;
+ if (root->all_disk)
+ goto valid;
+
+ device = root->disk;
+ while (device) {
+ /* If device->name is a subset of filename, this disk should be
+ * valid for scanning.
+ * For example, device->name is "sd", filename is "sda".
+ */
+ if (device->name
+ && !memcmp(filename, device->name, strlen(device->name)))
+ goto valid;
+ device = device->next;
+ }
+
+ return;
+
+ valid:
+ /*
+ * sg device is not a real device, but a device created according
+ * to each scsi device. It won't be used for pseudo device creation.
+ * I moved it here, so that sg devices will not be scanned.
+ */
+ if (!strncmp(filename, "/dev/sg", 7))
+ return;
+ bl_add_disk(filepath);
+ return;
+}
+
+/*
+ * Delete disks with multi-paths and no pseudo device path.
+ *
+ * If only passive device or more than one active devices available,
+ * I consider it as error since multipath of device-mapper should have worked
+ * and pseudo device should have been created.
+ */
+void bl_del_invalid_disk(void)
+{
+ struct bl_disk *disk = visible_disk_list, *pre;
+ struct bl_disk_path *path = NULL;
+
+ pre = disk;
+ while (disk) {
+ if ((disk->valid_path->state == BL_PATH_STATE_PASSIVE) ||
+ ((disk->valid_path->state == BL_PATH_STATE_ACTIVE) &&
+ (disk->paths->next))) {
+ path = disk->paths;
+ while (path) {
+ disk->paths = path->next;
+ free(path->full_path);
+ free(path);
+ path = disk->paths;
+ }
+ if (disk->serial)
+ free(disk->serial);
+ if (pre == visible_disk_list) {
+ visible_disk_list = disk->next;
+ free(disk);
+ disk = visible_disk_list;
+ } else {
+ pre->next = disk->next;
+ free(disk);
+ disk = pre->next;
+ }
+ } else {
+ pre = disk;
+ disk = disk->next;
+ }
+ }
+ return;
+}
+
+int bl_discover_devices(void)
+{
+ DIR *dir;
+ struct dirent *dp;
+ struct scan_root_list *root = scan_root_list_head;
+ /*release previous list */
+ bl_release_disk();
+ /*scan all disks */
+ while (root) {
+ dir = opendir(root->name);
+ if (dir == NULL) {
+ root = root->next;
+ continue;
+ }
+
+ while ((dp = readdir(dir)) != NULL)
+ bl_devicescan(dp->d_name, root);
+
+ root = root->next;
+ closedir(dir);
+ }
+
+#ifdef DEL_INVALID_DISKS
+ bl_del_invalid_disk();
+#endif
+
+ return 0;
+}
+
+/* process kernel request
+ * return 0: request processed, and no more request waiting;
+ * return 1: request processed, and more requests waiting;
+ * return < 0: error
+ */
+int bl_disk_inquiry_process(int fd)
+{
+ int ret = 0;
+ struct pipefs_hdr *head = NULL, *tmp;
+ char *buf = NULL;
+ uint32_t major, minor;
+ uint16_t buflen;
+ unsigned int len = 0;
+
+ head = calloc(1, sizeof(struct pipefs_hdr));
+ if (!head) {
+ BL_LOG_ERR("%s: Out of memory!\n", __func__);
+ return -ENOMEM;
+ }
+
+ /*read request */
+ if (atomicio(read, fd, head, sizeof(*head)) != sizeof(*head)) {
+ /* Note that an error in this or the next read is pretty
+ * catastrophic, as there is no good way to resync into
+ * the pipe's stream.
+ */
+ BL_LOG_ERR("Read pipefs head error!\n");
+ ret = -EIO;
+ goto out;
+ }
+
+ buflen = head->totallen - sizeof(*head);
+ buf = malloc(buflen);
+ if (!buf) {
+ BL_LOG_ERR("%s: Out of memory!\n", __func__);
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ if (atomicio(read, fd, buf, buflen) != buflen) {
+ BL_LOG_ERR("Read pipefs content error!\n");
+ ret = -EIO;
+ goto out;
+ }
+
+ head->status = BL_DEVICE_REQUEST_PROC;
+ switch (head->type) {
+ case BL_DEVICE_MOUNT:
+ if (!process_deviceinfo(buf, buflen, &major, &minor)) {
+ head->status = BL_DEVICE_REQUEST_ERR;
+ goto out;
+ }
+ tmp = realloc(head, sizeof(major) + sizeof(minor) +
+ sizeof(struct pipefs_hdr));
+ if (!tmp) {
+ BL_LOG_ERR("%s: Out of memory!\n", __func__);
+ ret = -ENOMEM;
+ goto out;
+ }
+ head = tmp;
+ memcpy((void *)head + sizeof(struct pipefs_hdr),
+ &major, sizeof(major));
+ memcpy((void *)head + sizeof(struct pipefs_hdr) + sizeof(major),
+ &minor, sizeof(minor));
+ len = sizeof(major) + sizeof(minor);
+ break;
+ case BL_DEVICE_UMOUNT:
+ if (!dm_device_remove_all((uint64_t *) buf))
+ head->status = BL_DEVICE_REQUEST_ERR;
+ bl_discover_devices();
+ break;
+ default:
+ head->status = BL_DEVICE_REQUEST_ERR;
+ }
+
+ head->totallen = sizeof(struct pipefs_hdr) + len;
+ /* write to pipefs */
+ if (atomicio((void *)write, fd, head, head->totallen)
+ != head->totallen) {
+ BL_LOG_ERR("Write pipefs error!\n");
+ ret = -EIO;
+ }
+
+ out:
+ if (buf)
+ free(buf);
+ if (head)
+ free(head);
+ return ret;
+}
+
+/*TODO: set bl_process_stop to 1 in command*/
+unsigned int bl_process_stop;
+
+int bl_run_disk_inquiry_process(int fd)
+{
+ fd_set rset;
+ struct timeval tv;
+ int ret;
+
+ bl_process_stop = 0;
+
+ for (;;) {
+ if (bl_process_stop)
+ return 1;
+ FD_ZERO(&rset);
+ FD_SET(fd, &rset);
+ ret = 0;
+ tv.tv_sec = BL_DEVICE_DISCOVERY_INTERVAL;
+ switch (select(fd + 1, &rset, NULL, NULL, &tv)) {
+ case -1:
+ if (errno == EINTR)
+ continue;
+ else {
+ ret = -errno;
+ goto out;
+ }
+ case 0:
+ goto out;
+ default:
+ if (FD_ISSET(fd, &rset))
+ ret = bl_disk_inquiry_process(fd);
+ }
+ }
+ out:
+ return ret;
+}
+
+/* Daemon */
+int main(void)
+{
+ int fd, ret = 1;
+ struct stat statbuf;
+ char pidbuf[64];
+
+ if (!stat(PID_FILE, &statbuf)) {
+ fprintf(stderr, "Pid file already existed\n");
+ return -1;
+ }
+
+ if (daemon(0, 0) != 0) {
+ fprintf(stderr, "Daemonize failed\n");
+ return -1;
+ }
+
+ openlog("pnfs-block", LOG_PID, 0);
+ fd = open(PID_FILE, O_WRONLY | O_CREAT, 0644);
+ if (fd < 0) {
+ BL_LOG_ERR("Create pid file failed\n");
+ return -1;
+ }
+
+ if (lockf(fd, F_TLOCK, 0) < 0) {
+ BL_LOG_ERR("Lock pid file failed\n");
+ close(fd);
+ return -1;
+ }
+ ftruncate(fd, 0);
+ sprintf(pidbuf, "%d\n", getpid());
+ write(fd, pidbuf, strlen(pidbuf));
+
+ /*open pipe file */
+ fd = open(BL_PIPE_FILE, O_RDWR);
+ if (fd < 0) {
+ BL_LOG_ERR("open pipe file error\n");
+ return -1;
+ }
+
+ ret = bl_cfg_init();
+ if (ret < 0) {
+ if (ret == -ENOENT)
+ BL_LOG_WARNING("Config file not exist, use default\n");
+ else {
+ BL_LOG_ERR("Open/read Block pNFS config file error\n");
+ return -1;
+ }
+ }
+
+ while (1) {
+ /*discover device when needed */
+ bl_discover_devices();
+
+ ret = bl_run_disk_inquiry_process(fd);
+ if (ret < 0) {
+ /* what should we do with process error? */
+ BL_LOG_ERR("inquiry process return %d\n", ret);
+ }
+ }
+ close(fd);
+ return ret;
+}
diff --git a/utils/blkmapd/device-discovery.h b/utils/blkmapd/device-discovery.h
new file mode 100644
index 0000000..9f87ebe
--- /dev/null
+++ b/utils/blkmapd/device-discovery.h
@@ -0,0 +1,162 @@
+/*
+ * bl-device-discovery.h
+ *
+ * Copyright (c) 2010 EMC Corporation, Haiying Tang <Tang_Haiying@emc.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef BL_DEVICE_DISCOVERY_H
+#define BL_DEVICE_DISCOVERY_H
+
+#define BL_DEVICE_DISCOVERY_INTERVAL 60
+
+#include <stdint.h>
+#include <syslog.h>
+
+enum blk_vol_type {
+ BLOCK_VOLUME_SIMPLE = 0, /* maps to a single LU */
+ BLOCK_VOLUME_SLICE = 1, /* slice of another volume */
+ BLOCK_VOLUME_CONCAT = 2, /* concatenation of multiple volumes */
+ BLOCK_VOLUME_STRIPE = 3, /* striped across multiple volumes */
+ BLOCK_VOLUME_PSEUDO = 4,
+};
+
+/* All disk offset/lengths are stored in 512-byte sectors */
+struct bl_volume {
+ uint32_t bv_type;
+ off_t bv_size;
+ struct bl_volume **bv_vols;
+ int bv_vol_n;
+ union {
+ dev_t bv_dev; /*for BLOCK_VOLUME_SIMPLE(PSEUDO) */
+ off_t bv_stripe_unit; /*for BLOCK_VOLUME_STRIPE(CONCAT) */
+ off_t bv_offset; /*for BLOCK_VOLUME_SLICE */
+ } param;
+};
+
+struct bl_sig_comp {
+ int64_t bs_offset; /* In bytes */
+ uint32_t bs_length; /* In bytes */
+ char *bs_string;
+};
+
+/* Maximum number of signatures components in a simple volume */
+# define BLOCK_MAX_SIG_COMP 16
+
+struct bl_sig {
+ int si_num_comps;
+ struct bl_sig_comp si_comps[BLOCK_MAX_SIG_COMP];
+};
+
+/*
+ * Multipath support: ACTIVE or PSEUDO device is valid,
+ * PASSIVE is a standby for ACTIVE.
+ */
+enum bl_path_state_e {
+ BL_PATH_STATE_PASSIVE = 1,
+ BL_PATH_STATE_ACTIVE = 2,
+ BL_PATH_STATE_PSEUDO = 3,
+};
+
+struct bl_serial {
+ int len;
+ char *data;
+};
+
+struct bl_disk_path {
+ struct bl_disk_path *next;
+ char *full_path;
+ enum bl_path_state_e state;
+};
+
+struct bl_disk {
+ struct bl_disk *next;
+ struct bl_serial *serial;
+ dev_t dev;
+ off_t size;
+ struct bl_disk_path *valid_path;
+ struct bl_disk_path *paths;
+};
+
+struct bl_dev_id {
+ unsigned char type;
+ unsigned char ids;
+ unsigned char reserve;
+ unsigned char len;
+ char data[0];
+};
+
+struct pipefs_hdr {
+ uint32_t msgid;
+ uint8_t type;
+ uint8_t flags;
+ uint16_t totallen; /* length of entire message, including hdr */
+ uint32_t status;
+};
+
+#define BL_DEVICE_UMOUNT 0x0 /* Umount--delete devices */
+#define BL_DEVICE_MOUNT 0x1 /* Mount--create devices */
+#define BL_DEVICE_REQUEST_INIT 0x0 /* Start request */
+#define BL_DEVICE_REQUEST_PROC 0x1 /* User process succeeds */
+#define BL_DEVICE_REQUEST_ERR 0x2 /* User process fails */
+
+uint32_t *blk_overflow(uint32_t * p, uint32_t * end, size_t nbytes);
+
+#define BLK_READBUF(p, e, nbytes) do { \
+ p = blk_overflow(p, e, nbytes); \
+ if (!p) {\
+ goto out_err;\
+ } \
+} while (0)
+
+#define READ32(x) (x) = ntohl(*p++)
+
+#define READ64(x) do { \
+ (x) = (uint64_t)ntohl(*p++) << 32; \
+ (x) |= ntohl(*p++); \
+} while (0)
+
+#define READ_SECTOR(x) do { \
+ READ64(tmp); \
+ if (tmp & 0x1ff) { \
+ goto out_err; \
+ } \
+ (x) = tmp >> 9; \
+} while (0)
+
+extern struct bl_disk *visible_disk_list;
+uint64_t dm_device_create(struct bl_volume *vols, int num_vols);
+int dm_device_remove_all(uint64_t *dev);
+uint64_t process_deviceinfo(const char *dev_addr_buf,
+ unsigned int dev_addr_len,
+ uint32_t *major, uint32_t *minor);
+
+extern ssize_t atomicio(ssize_t(*f) (int, void *, size_t),
+ int fd, void *_s, size_t n);
+extern struct bl_serial *bldev_read_serial(int fd, const char *filename);
+extern void bldev_read_ap_state(int fd, enum bl_path_state_e *ap_state_out);
+extern int bl_discover_devices(void);
+
+#define BL_LOG_WARNING(fmt...) syslog(LOG_WARNING, fmt)
+#define BL_LOG_ERR(fmt...) syslog(LOG_ERR, fmt)
+#define BL_LOG_DEBUG(fmt...) syslog(LOG_DEBUG, fmt)
+#endif
diff --git a/utils/blkmapd/device-inq.c b/utils/blkmapd/device-inq.c
new file mode 100644
index 0000000..ff38fd6
--- /dev/null
+++ b/utils/blkmapd/device-inq.c
@@ -0,0 +1,235 @@
+/*
+ * device-inq.c: inquire SCSI device information.
+ *
+ * Copyright (c) 2010 EMC Corporation, Haiying Tang <Tang_Haiying@emc.com>
+ * All rights reserved.
+ *
+ * This program refers to "SCSI Primary Commands - 3 (SPC-3)
+ * at http://www.t10.org and sg_inq.c in sg3_utils-1.26 for
+ * Linux OS SCSI subsystem, by D. Gilbert.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <dirent.h>
+#include <ctype.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/mount.h>
+#include <sys/select.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <libgen.h>
+#include <errno.h>
+#include <scsi/scsi.h>
+#include <scsi/scsi_ioctl.h>
+#include <scsi/sg.h>
+#include "device-discovery.h"
+
+#define DEF_ALLOC_LEN 255
+#define MX_ALLOC_LEN (0xc000 + 0x80)
+
+struct bl_serial *bl_create_scsi_string(int len, const char *bytes)
+{
+ struct bl_serial *s;
+ s = malloc(sizeof(*s) + len);
+ if (s) {
+ s->data = (char *)&s[1];
+ s->len = len;
+ memcpy(s->data, bytes, len);
+ }
+ return s;
+}
+
+void bl_free_scsi_string(struct bl_serial *str)
+{
+ if (str)
+ free(str);
+}
+
+#define sg_io_ok(io_hdr) \
+ ((((io_hdr).status & 0x7e) == 0) && \
+ ((io_hdr).host_status == 0) && \
+ (((io_hdr).driver_status & 0x0f) == 0))
+
+static int sg_timeout = 1 * 1000;
+
+static int bldev_inquire_page(int fd, int page, char *buffer, int len)
+{
+ unsigned char cmd[] = { INQUIRY, 0, 0, 0, 0, 0 };
+ unsigned char sense_b[28];
+ struct sg_io_hdr io_hdr;
+ if (page >= 0) {
+ cmd[1] = 1;
+ cmd[2] = page;
+ }
+ cmd[3] = (unsigned char)((len >> 8) & 0xff);
+ cmd[4] = (unsigned char)(len & 0xff);
+
+ memset(&io_hdr, 0, sizeof(struct sg_io_hdr));
+ io_hdr.interface_id = 'S';
+ io_hdr.cmd_len = sizeof(cmd);
+ io_hdr.mx_sb_len = sizeof(sense_b);
+ io_hdr.dxfer_direction = SG_DXFER_FROM_DEV;
+ io_hdr.dxfer_len = len;
+ io_hdr.dxferp = buffer;
+ io_hdr.cmdp = cmd;
+ io_hdr.sbp = sense_b;
+ io_hdr.timeout = sg_timeout;
+ if (ioctl(fd, SG_IO, &io_hdr) < 0)
+ return -1;
+
+ if (sg_io_ok(io_hdr))
+ return 0;
+ return -1;
+}
+
+int bldev_inquire_pages(int fd, int page, char **buffer)
+{
+ int status = 0;
+ char *tmp;
+ int len;
+
+ *buffer = calloc(DEF_ALLOC_LEN, sizeof(char));
+ if (!*buffer) {
+ BL_LOG_ERR("%s: Out of memory!\n", __func__);
+ return -ENOMEM;
+ }
+
+ status = bldev_inquire_page(fd, page, *buffer, DEF_ALLOC_LEN);
+ if (status)
+ goto out;
+
+ status = -1;
+ if ((*(*buffer + 1) & 0xff) != page)
+ goto out;
+
+ len = (*(*buffer + 2) << 8) + *(*buffer + 3) + 4;
+ if (len > MX_ALLOC_LEN) {
+ BL_LOG_ERR("SCSI response length too long: %d\n", len);
+ goto out;
+ }
+ if (len > DEF_ALLOC_LEN) {
+ tmp = realloc(*buffer, len);
+ if (!tmp) {
+ BL_LOG_ERR("%s: Out of memory!\n", __func__);
+ status = -ENOMEM;
+ goto out;
+ }
+ *buffer = tmp;
+ status = bldev_inquire_page(fd, page, *buffer, len);
+ if (status)
+ goto out;
+ }
+ status = 0;
+ out:
+ return status;
+}
+
+/* For EMC multipath devices, use VPD page (0xc0) to get status.
+ * For other devices, return ACTIVE for now
+ */
+void bldev_read_ap_state(int fd, enum bl_path_state_e *ap_state_out)
+{
+ int status = 0;
+ char *buffer;
+
+ *ap_state_out = BL_PATH_STATE_ACTIVE;
+
+ status = bldev_inquire_pages(fd, 0xc0, &buffer);
+ if (status)
+ goto out;
+
+ if (buffer[4] < 0x02)
+ *ap_state_out = BL_PATH_STATE_PASSIVE;
+ out:
+ if (buffer)
+ free(buffer);
+ return;
+}
+
+struct bl_serial *bldev_read_serial(int fd, const char *filename)
+{
+ struct bl_serial *serial_out = NULL;
+ int status = 0, pos, len;
+ char *buffer;
+ struct bl_dev_id *dev_root, *dev_id;
+ unsigned int current_id = 0;
+
+ status = bldev_inquire_pages(fd, 0x83, &buffer);
+ if (status)
+ goto out;
+
+ dev_root = (struct bl_dev_id *)buffer;
+
+ pos = 0;
+ current_id = 0;
+ len = dev_root->len;
+ while (pos < (len - sizeof(struct bl_dev_id) + sizeof(unsigned char))) {
+ dev_id = (struct bl_dev_id *)&(dev_root->data[pos]);
+ if ((dev_id->ids & 0xf) < current_id)
+ continue;
+ switch (dev_id->ids & 0xf) {
+ /* We process SCSI ID with four ID cases: 0, 1, 2 and 3.
+ * When more than one ID is available, priority is
+ * 3>2>1>0.
+ */
+ case 2: /* EUI-64 based */
+ if ((dev_id->len != 8) && (dev_id->len != 12) &&
+ (dev_id->len != 16)) {
+ BL_LOG_ERR("EUI-64 only decodes 8, "
+ "12 and 16\n");
+ break;
+ }
+ case 3: /* NAA */
+ /* TODO: NAA validity judgement too complicated,
+ * so just ingore it here.
+ */
+ if ((dev_id->type & 0xf) != 1) {
+ BL_LOG_ERR("Binary code_set expected\n");
+ break;
+ }
+ case 0: /* vendor specific */
+ case 1: /* T10 vendor identification */
+ current_id = dev_id->ids & 0xf;
+ if (serial_out)
+ bl_free_scsi_string(serial_out);
+ serial_out = bl_create_scsi_string(dev_id->len,
+ dev_id->data);
+ break;
+ default:
+ break;
+ }
+ if (current_id == 3)
+ break;
+ pos += (dev_id->len + sizeof(struct bl_dev_id) -
+ sizeof(unsigned char));
+ }
+ out:
+ if (!serial_out)
+ serial_out = bl_create_scsi_string(strlen(filename), filename);
+ if (buffer)
+ free(buffer);
+ return serial_out;
+}
diff --git a/utils/blkmapd/device-process.c b/utils/blkmapd/device-process.c
new file mode 100644
index 0000000..6252552
--- /dev/null
+++ b/utils/blkmapd/device-process.c
@@ -0,0 +1,391 @@
+/*
+ * device-process.c: detailed processing of device information sent
+ * from kernel.
+ *
+ * Copyright (c) 2006 The Regents of the University of Michigan.
+ * All rights reserved.
+ *
+ * Andy Adamson <andros@citi.umich.edu>
+ * Fred Isaman <iisaman@umich.edu>
+ *
+ * Copyright (c) 2010 EMC Corporation, Haiying Tang <Tang_Haiying@emc.com>
+ *
+ * Used codes in linux/fs/nfs/blocklayout/blocklayoutdev.c.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#define _LARGEFILE64_SOURCE
+#include <libdevmapper.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/user.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <arpa/inet.h>
+#include <linux/kdev_t.h>
+#include "device-discovery.h"
+
+uint32_t *blk_overflow(uint32_t * p, uint32_t * end, size_t nbytes)
+{
+ uint32_t *q = p + ((nbytes + 3) >> 2);
+ if (q > end || q < p)
+ return NULL;
+ return p;
+}
+
+static int decode_blk_signature(uint32_t **pp, uint32_t *end,
+ struct bl_sig *sig)
+{
+ int i, tmp;
+ uint32_t *p = *pp;
+
+ BLK_READBUF(p, end, 4);
+ READ32(sig->si_num_comps);
+ if (sig->si_num_comps == 0) {
+ BL_LOG_ERR("0 components in sig\n");
+ goto out_err;
+ }
+ if (sig->si_num_comps >= BLOCK_MAX_SIG_COMP) {
+ BL_LOG_ERR("number of sig comps %i >= BLOCK_MAX_SIG_COMP\n",
+ sig->si_num_comps);
+ goto out_err;
+ }
+ for (i = 0; i < sig->si_num_comps; i++) {
+ BLK_READBUF(p, end, 12);
+ READ64(sig->si_comps[i].bs_offset);
+ READ32(tmp);
+ sig->si_comps[i].bs_length = tmp;
+ BLK_READBUF(p, end, tmp);
+ /* Note we rely here on fact that sig is used immediately
+ * for mapping, then thrown away.
+ */
+ sig->si_comps[i].bs_string = (char *)p;
+ BL_LOG_ERR("%s: si_comps[%d]: bs_length %d, bs_string %s\n",
+ __func__, i, sig->si_comps[i].bs_length,
+ sig->si_comps[i].bs_string);
+ p += ((tmp + 3) >> 2);
+ }
+ *pp = p;
+ return 0;
+ out_err:
+ return -EIO;
+}
+
+/* Read signature from device
+ * return 0: read successfully
+ * return -1: error
+ */
+int read_cmp_blk_sig(const char *dev_name, struct bl_sig_comp *comp,
+ int64_t bs_offset)
+{
+ int fd, ret = -1;
+ char *sig = NULL;
+
+ BL_LOG_ERR("%s: dev_name %s\n", __func__, dev_name);
+ fd = open(dev_name, O_RDONLY | O_LARGEFILE);
+ if (fd < 0) {
+ BL_LOG_ERR("%s could not be opened for read\n", dev_name);
+ goto error;
+ }
+
+ sig = (char *)malloc(comp->bs_length);
+ if (!sig) {
+ BL_LOG_ERR("%s: Out of memory\n", __func__);
+ goto error;
+ }
+
+ if (lseek64(fd, bs_offset, SEEK_SET) == -1) {
+ BL_LOG_ERR("File %s lseek error\n", dev_name);
+ goto error;
+ }
+
+ if (atomicio(read, fd, sig, comp->bs_length) != comp->bs_length) {
+ BL_LOG_ERR("File %s read error\n", dev_name);
+ goto error;
+ }
+
+ BL_LOG_ERR
+ ("%s: sig: %s, bs_string: %s, bs_length: %d, bs_offset: %lld\n",
+ __func__, sig, comp->bs_string, comp->bs_length, bs_offset);
+ ret = memcmp(sig, comp->bs_string, comp->bs_length);
+
+ error:
+ if (sig)
+ free(sig);
+ if (fd >= 0)
+ close(fd);
+ return ret;
+}
+
+/*
+ * All signatures in sig must be found on disk for verification.
+ * Returns True if sig matches, False otherwise.
+ */
+static int verify_sig(struct bl_disk *disk, struct bl_sig *sig)
+{
+ struct bl_sig_comp *comp;
+ int i, ret;
+ int64_t bs_offset;
+
+ for (i = 0; i < sig->si_num_comps; i++) {
+ comp = &sig->si_comps[i];
+ bs_offset = comp->bs_offset;
+ if (bs_offset < 0)
+ bs_offset += (((int64_t) disk->size) << 9);
+ BL_LOG_ERR("%s: bs_offset: %lld\n", __func__, bs_offset);
+ ret = read_cmp_blk_sig(disk->valid_path->full_path,
+ comp, bs_offset);
+ if (ret)
+ return 0;
+ }
+ return 1;
+}
+
+/*
+ * map_sig_to_device()
+ * Given a signature, walk the list of visible disks searching for
+ * a match. Returns True if mapping was done, False otherwise.
+ *
+ * While we're at it, fill in the vol->bv_size.
+ */
+static int map_sig_to_device(struct bl_sig *sig, struct bl_volume *vol)
+{
+ int mapped = 0;
+ struct bl_disk *disk = visible_disk_list;
+ char *filepath = 0;
+ struct bl_disk *lolDisk = disk;
+ while (lolDisk) {
+ BL_LOG_ERR("%s: visible_disk_list: %s\n", __func__,
+ lolDisk->valid_path->full_path);
+ lolDisk = lolDisk->next;
+ }
+
+ /*scan disk list to find out match device */
+ while (disk) {
+ /* FIXME: should we use better algorithm for disk scan? */
+ mapped = verify_sig(disk, sig);
+ if (mapped) {
+ vol->param.bv_dev = disk->dev;
+ filepath = disk->valid_path->full_path;
+ vol->bv_size = disk->size;
+ break;
+ }
+ disk = disk->next;
+ }
+ return mapped;
+}
+
+/* We are given an array of XDR encoded array indices, each of which should
+ * refer to a previously decoded device. Translate into a list of pointers
+ * to the appropriate pnfs_blk_volume's.
+ */
+static int set_vol_array(uint32_t **pp, uint32_t *end,
+ struct bl_volume *vols, int working)
+{
+ int i, index;
+ uint32_t *p = *pp;
+ struct bl_volume **array = vols[working].bv_vols;
+ for (i = 0; i < vols[working].bv_vol_n; i++) {
+ BLK_READBUF(p, end, 4);
+ READ32(index);
+ if ((index < 0) || (index >= working)) {
+ BL_LOG_ERR("set_vol_array: Id %i out of range\n",
+ index);
+ goto out_err;
+ }
+ array[i] = &vols[index];
+ }
+ *pp = p;
+ return 0;
+ out_err:
+ return -EIO;
+}
+
+static uint64_t sum_subvolume_sizes(struct bl_volume *vol)
+{
+ int i;
+ uint64_t sum = 0;
+ for (i = 0; i < vol->bv_vol_n; i++)
+ sum += vol->bv_vols[i]->bv_size;
+ return sum;
+}
+
+static int decode_blk_volume(uint32_t **pp, uint32_t *end,
+ struct bl_volume *vols, int i, int *array_cnt)
+{
+ int status = 0, j;
+ struct bl_sig sig;
+ uint32_t *p = *pp;
+ struct bl_volume *vol = &vols[i];
+ uint64_t tmp, tmp_size;
+ div_t d;
+
+ BLK_READBUF(p, end, 4);
+ READ32(vol->bv_type);
+ switch (vol->bv_type) {
+ case BLOCK_VOLUME_SIMPLE:
+ *array_cnt = 0;
+ status = decode_blk_signature(&p, end, &sig);
+ if (status)
+ return status;
+ status = map_sig_to_device(&sig, vol);
+ if (!status) {
+ BL_LOG_ERR("Could not find disk for device\n");
+ return -ENXIO;
+ }
+ status = 0;
+ break;
+ case BLOCK_VOLUME_SLICE:
+ BLK_READBUF(p, end, 16);
+ READ_SECTOR(vol->param.bv_offset);
+ READ_SECTOR(vol->bv_size);
+ *array_cnt = vol->bv_vol_n = 1;
+ status = set_vol_array(&p, end, vols, i);
+ break;
+ case BLOCK_VOLUME_STRIPE:
+ BLK_READBUF(p, end, 8);
+ READ_SECTOR(vol->param.bv_stripe_unit);
+ off_t chunksize = vol->param.bv_stripe_unit;
+ if ((chunksize == 0) ||
+ ((chunksize & (chunksize - 1)) != 0) ||
+ (chunksize < (PAGE_SIZE >> 9)))
+ return -EIO;
+ BLK_READBUF(p, end, 4);
+ READ32(vol->bv_vol_n);
+ if (!vol->bv_vol_n)
+ return -EIO;
+ *array_cnt = vol->bv_vol_n;
+ status = set_vol_array(&p, end, vols, i);
+ if (status)
+ return status;
+ for (j = 1; j < vol->bv_vol_n; j++) {
+ if (vol->bv_vols[j]->bv_size !=
+ vol->bv_vols[0]->bv_size) {
+ BL_LOG_ERR("varying subvol size\n");
+ return -EIO;
+ }
+ }
+ /* Make sure total size only includes addressable areas */
+ tmp_size = vol->bv_vols[0]->bv_size;
+ d = div(tmp_size, (uint32_t) vol->param.bv_stripe_unit);
+ tmp_size = d.quot;
+ vol->bv_size = tmp_size * vol->param.bv_stripe_unit;
+ break;
+ case BLOCK_VOLUME_CONCAT:
+ BLK_READBUF(p, end, 4);
+ READ32(vol->bv_vol_n);
+ if (!vol->bv_vol_n)
+ return -EIO;
+ *array_cnt = vol->bv_vol_n;
+ status = set_vol_array(&p, end, vols, i);
+ if (status)
+ return status;
+ vol->bv_size = sum_subvolume_sizes(vol);
+ break;
+ default:
+ BL_LOG_ERR("Unknown volume type %i\n", vol->bv_type);
+ out_err:
+ return -EIO;
+ }
+ *pp = p;
+ return status;
+}
+
+uint64_t process_deviceinfo(const char *dev_addr_buf,
+ unsigned int dev_addr_len,
+ uint32_t *major, uint32_t *minor)
+{
+ int num_vols, i, status, count;
+ uint32_t *p, *end;
+ struct bl_volume *vols = NULL, **arrays = NULL, **arrays_ptr = NULL;
+ uint64_t dev = 0;
+ int tried = 0;
+
+ restart:
+ p = (uint32_t *) dev_addr_buf;
+ end = (uint32_t *) ((char *)p + dev_addr_len);
+ /* Decode block volume */
+ BLK_READBUF(p, end, 4);
+ READ32(num_vols);
+ if (num_vols <= 0) {
+ BL_LOG_WARNING("Error: number of vols: %d\n", num_vols);
+ goto out_err;
+ }
+
+ vols = (struct bl_volume *)malloc(num_vols * sizeof(struct bl_volume));
+ if (!vols) {
+ BL_LOG_ERR("%s: Out of memory\n", __func__);
+ goto out_err;
+ }
+
+ /* Each volume in vols array needs its own array. Save time by
+ * allocating them all in one large hunk. Because each volume
+ * array can only reference previous volumes, and because once
+ * a concat or stripe references a volume, it may never be
+ * referenced again, the volume arrays are guaranteed to fit
+ * in the suprisingly small space allocated.
+ */
+ arrays =
+ (struct bl_volume **)malloc(num_vols * 2 *
+ sizeof(struct bl_volume *));
+ if (!arrays) {
+ BL_LOG_ERR("%s: Out of memory\n", __func__);
+ goto out_err;
+ }
+
+ arrays_ptr = arrays;
+
+ for (i = 0; i < num_vols; i++) {
+ vols[i].bv_vols = arrays_ptr;
+ status = decode_blk_volume(&p, end, vols, i, &count);
+ if (status == -ENXIO && (tried <= 5)) {
+ sleep(1);
+ BL_LOG_DEBUG("%s: discover again!\n", __func__);
+ bl_discover_devices();
+ tried++;
+ free(vols);
+ free(arrays);
+ goto restart;
+ }
+ if (status)
+ goto out_err;
+ arrays_ptr += count;
+ }
+
+ if (p != end) {
+ BL_LOG_ERR("p is not equal to end!\n");
+ goto out_err;
+ }
+
+ dev = dm_device_create(vols, num_vols);
+ *major = MAJOR(dev);
+ *minor = MINOR(dev);
+ out_err:
+ if (vols)
+ free(vols);
+ if (arrays)
+ free(arrays);
+ return dev;
+}
diff --git a/utils/blkmapd/dm-device.c b/utils/blkmapd/dm-device.c
new file mode 100644
index 0000000..f08df7b
--- /dev/null
+++ b/utils/blkmapd/dm-device.c
@@ -0,0 +1,509 @@
+/*
+ * dm-device.c: create or remove device via device mapper API.
+ *
+ * Copyright (c) 2010 EMC Corporation, Haiying Tang <Tang_Haiying@emc.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <libdevmapper.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <linux/kdev_t.h>
+#include "device-discovery.h"
+
+#define DM_DEV_NAME_LEN 256
+
+#ifndef DM_MAX_TYPE_NAME
+#define DM_MAX_TYPE_NAME 16
+#endif
+
+#define DM_PARAMS_LEN 512 /*XXX: is this enough for target? */
+#define DM_DIR "/dev/mapper"
+#define DM_DIR_LEN12
+#define TYPE_HAS_DEV(type) ((type == BLOCK_VOLUME_SIMPLE) || \
+ (type == BLOCK_VOLUME_PSEUDO))
+
+struct bl_dm_table {
+ uint64_t offset;
+ uint64_t size;
+ char target_type[DM_MAX_TYPE_NAME];
+ char params[DM_PARAMS_LEN];
+ struct bl_dm_table *next;
+};
+
+struct bl_dm_tree {
+ uint64_t dev;
+ struct dm_tree *tree;
+ struct bl_dm_tree *next;
+};
+
+static inline struct bl_dm_table *bl_dm_table_alloc(void)
+{
+ return (struct bl_dm_table *)calloc(1, sizeof(struct bl_dm_table));
+}
+
+void bl_dm_table_free(struct bl_dm_table *bl_table_head)
+{
+ struct bl_dm_table *p = bl_table_head;
+ while (bl_table_head) {
+ p = bl_table_head->next;
+ free(bl_table_head);
+ bl_table_head = p;
+ }
+}
+
+void add_to_bl_dm_table(struct bl_dm_table **bl_table_head,
+ struct bl_dm_table *table)
+{
+ struct bl_dm_table *pre;
+ if (!*bl_table_head) {
+ *bl_table_head = table;
+ return;
+ }
+ pre = *bl_table_head;
+ while (pre->next)
+ pre = pre->next;
+ pre->next = table;
+ return;
+}
+
+struct bl_dm_tree *bl_tree_head;
+
+struct bl_dm_tree *find_bl_dm_tree(uint64_t dev)
+{
+ struct bl_dm_tree *p = bl_tree_head;
+ while (p) {
+ if (p->dev == dev)
+ return p;
+ p = p->next;
+ }
+ return NULL;
+}
+
+void del_from_bl_dm_tree(uint64_t dev)
+{
+ struct bl_dm_tree *pre = bl_tree_head;
+ struct bl_dm_tree *p;
+
+ p = pre;
+ while (p) {
+ if (p->dev == dev) {
+ pre->next = p->next;
+ if (p == bl_tree_head)
+ bl_tree_head = bl_tree_head->next;
+ free(p);
+ break;
+ }
+ pre = p;
+ p = pre->next;
+ }
+}
+
+void add_to_bl_dm_tree(struct bl_dm_tree *tree)
+{
+ struct bl_dm_tree *pre;
+ if (!bl_tree_head) {
+ bl_tree_head = tree;
+ return;
+ }
+ pre = bl_tree_head;
+ while (pre->next)
+ pre = pre->next;
+ pre->next = tree;
+ return;
+}
+
+/* Create device via device mapper
+ * return 0 when creation failed
+ * return dev no for created device
+ */
+uint64_t dm_single_device_create(const char *dev_name, struct bl_dm_table * p)
+{
+ struct dm_task *dmt;
+ struct dm_info dminfo;
+ int ret = 0;
+
+ dmt = dm_task_create(DM_DEVICE_CREATE);
+ if (!dmt) {
+ BL_LOG_ERR("Create dm_task for %s failed\n", dev_name);
+ return 0;
+ }
+ ret = dm_task_set_name(dmt, dev_name);
+ if (!ret)
+ goto err_out;
+
+ while (p) {
+ ret = dm_task_add_target(dmt, p->offset, p->size,
+ p->target_type, p->params);
+ if (!ret)
+ goto err_out;
+ p = p->next;
+ }
+
+ ret = dm_task_run(dmt) &&
+ dm_task_get_info(dmt, &dminfo) && dminfo.exists;
+
+ if (!ret)
+ goto err_out;
+
+ dm_task_update_nodes();
+
+ err_out:
+ dm_task_destroy(dmt);
+
+ if (!ret) {
+ BL_LOG_ERR("Create device %s failed\n", dev_name);
+ return 0;
+ }
+ return MKDEV(dminfo.major, dminfo.minor);
+}
+
+int dm_device_remove_byname(const char *dev_name)
+{
+ struct dm_task *dmt;
+ int ret = 0;
+
+ dmt = dm_task_create(DM_DEVICE_REMOVE);
+ if (!dmt)
+ return -ENODEV;
+
+ ret = dm_task_set_name(dmt, dev_name) && dm_task_run(dmt);
+
+ dm_task_update_nodes();
+
+ if (dmt)
+ dm_task_destroy(dmt);
+
+ return ret;
+}
+
+int dm_device_remove(uint64_t dev)
+{
+ struct dm_task *dmt;
+ struct dm_names *dmnames;
+ char *names = NULL;
+ int ret = -1;
+
+ /* Look for dev_name via dev, if dev_name could be transferred here,
+ we could jump to DM_DEVICE_REMOVE directly */
+ dmt = dm_task_create(DM_DEVICE_LIST);
+ if (!dmt) {
+ BL_LOG_ERR("dm_task creation failed\n");
+ return -ENODEV;
+ }
+
+ ret = dm_task_run(dmt);
+ if (!ret) {
+ BL_LOG_ERR("dm_task_run failed\n");
+ goto error;
+ }
+
+ dmnames = dm_task_get_names(dmt);
+ if (!dmnames || !dmnames->dev) {
+ BL_LOG_ERR("dm_task_get_names failed\n");
+ goto error;
+ }
+
+ do {
+ if (dmnames->dev == dev) {
+ names = dmnames->name;
+ break;
+ }
+ dmnames = (void *)dmnames + dmnames->next;
+ } while (dmnames);
+
+ if (!names) {
+ BL_LOG_ERR("Could not find device\n");
+ goto error;
+ }
+
+ dm_task_update_nodes();
+
+ error:
+ dm_task_destroy(dmt);
+
+ /*Start to remove device */
+ if (names)
+ ret = dm_device_remove_byname(names);
+ return ret;
+}
+
+static unsigned long dev_count;
+
+void dm_devicelist_remove(unsigned long start, unsigned long end)
+{
+ char dev_name[DM_DEV_NAME_LEN];
+ unsigned long count;
+
+ if ((start >= dev_count) || (end <= 1) || (start >= end - 1))
+ return;
+
+ for (count = end - 1; count > start; count--) {
+ sprintf(dev_name, "pnfs_vol_%lu", count - 1);
+ dm_device_remove_byname(dev_name);
+ }
+
+ return;
+}
+
+void bl_dm_remove_tree(uint64_t dev)
+{
+ struct bl_dm_tree *p;
+
+ p = find_bl_dm_tree(dev);
+ if (!p)
+ return;
+
+ dm_tree_free(p->tree);
+ del_from_bl_dm_tree(dev);
+}
+
+void bl_dm_create_tree(uint64_t dev)
+{
+ struct dm_tree *tree;
+ struct bl_dm_tree *bl_tree;
+
+ bl_tree = find_bl_dm_tree(dev);
+ if (bl_tree)
+ return; /*XXX: error? */
+
+ tree = dm_tree_create();
+ if (!tree)
+ return;
+
+ if (!dm_tree_add_dev(tree, MAJOR(dev), MINOR(dev))) {
+ dm_tree_free(tree);
+ return;
+ }
+
+ bl_tree = malloc(sizeof(struct bl_dm_tree));
+ if (!bl_tree) {
+ dm_tree_free(tree);
+ return;
+ }
+
+ bl_tree->dev = dev;
+ bl_tree->tree = tree;
+ bl_tree->next = NULL;
+ add_to_bl_dm_tree(bl_tree);
+
+ return;
+}
+
+uint64_t dm_device_nametodev(char *dev_name)
+{
+ struct dm_task *dmt;
+ int ret = 0;
+ struct dm_info dminfo;
+
+ dmt = dm_task_create(DM_DEVICE_INFO);
+ if (!dmt)
+ return -ENODEV;
+
+ ret = dm_task_set_name(dmt, dev_name) &&
+ dm_task_run(dmt) && dm_task_get_info(dmt, &dminfo);
+
+ if (dmt)
+ dm_task_destroy(dmt);
+
+ if (!ret)
+ return 0;
+
+ return MKDEV(dminfo.major, dminfo.minor);
+}
+
+int dm_device_remove_all(uint64_t *dev)
+{
+ struct bl_dm_tree *p;
+ struct dm_tree_node *node;
+ const char *uuid;
+ int ret = 0;
+ uint32_t major, minor;
+ uint64_t bl_dev;
+
+ memcpy(&major, dev, sizeof(uint32_t));
+ memcpy(&minor, (void *)dev + sizeof(uint32_t), sizeof(uint32_t));
+ bl_dev = MKDEV(major, minor);
+ p = find_bl_dm_tree(bl_dev);
+ if (!p)
+ return ret;
+
+ node = dm_tree_find_node(p->tree, MAJOR(bl_dev), MINOR(bl_dev));
+ if (!node)
+ return ret;
+
+ uuid = dm_tree_node_get_uuid(node);
+ if (!uuid)
+ return ret;
+
+ dm_device_remove(bl_dev);
+ ret = dm_tree_deactivate_children(node, uuid, strlen(uuid));
+ dm_task_update_nodes();
+ bl_dm_remove_tree(bl_dev);
+ return ret;
+}
+
+/* TODO: check the value for DM_DEV_NAME_LEN, DM_TYPE_LEN, DM_PARAMS_LEN */
+uint64_t dm_device_create(struct bl_volume *vols, int num_vols)
+{
+ uint64_t size, dev = 0;
+ unsigned long count = dev_count;
+ int number = 0, i, pos;
+ struct bl_volume *node;
+ char *tmp;
+ struct bl_dm_table *table = NULL;
+ struct bl_dm_table *bl_table_head = NULL;
+ unsigned int len;
+ char *dev_name = NULL;
+ /* Create pseudo device here */
+ while (number < num_vols) {
+ node = &vols[number];
+ switch (node->bv_type) {
+ case BLOCK_VOLUME_SIMPLE:
+ /* Do not need to create device here */
+ dev = node->param.bv_dev;
+ goto continued;
+ case BLOCK_VOLUME_SLICE:
+ table = bl_dm_table_alloc();
+ if (!table)
+ goto out;
+ table->offset = 0;
+ table->size = node->bv_size;
+ strcpy(table->target_type, "linear");
+ if (!TYPE_HAS_DEV(node->bv_vols[0]->bv_type)) {
+ free(table);
+ goto out;
+ }
+ dev = node->bv_vols[0]->param.bv_dev;
+ tmp = table->params;
+ if (!dm_format_dev(tmp, DM_PARAMS_LEN,
+ MAJOR(dev), MINOR(dev))) {
+ free(table);
+ goto out;
+ }
+ tmp += strlen(tmp);
+ sprintf(tmp, " %lu", node->param.bv_offset);
+ add_to_bl_dm_table(&bl_table_head, table);
+ break;
+ case BLOCK_VOLUME_STRIPE:
+ table = bl_dm_table_alloc();
+ if (!table)
+ goto out;
+ table->offset = 0;
+ table->size = node->bv_size;
+ strcpy(table->target_type, "striped");
+ sprintf(table->params, "%d %lu %n", node->bv_vol_n,
+ node->param.bv_stripe_unit, &pos);
+ /* Repeatedly copy subdev to params */
+ tmp = table->params + pos;
+ len = DM_PARAMS_LEN - pos;
+ for (i = 0; i < node->bv_vol_n; i++) {
+ if (!TYPE_HAS_DEV(node->bv_vols[i]->bv_type)) {
+ free(table);
+ goto out;
+ }
+ dev = node->bv_vols[i]->param.bv_dev;
+ if (!dm_format_dev(tmp, len, MAJOR(dev),
+ MINOR(dev))) {
+ free(table);
+ goto out;
+ }
+ pos = strlen(tmp);
+ tmp += pos;
+ len -= pos;
+ sprintf(tmp, " %d ", 0);
+ tmp += 3;
+ len -= 3;
+ }
+ add_to_bl_dm_table(&bl_table_head, table);
+ break;
+ case BLOCK_VOLUME_CONCAT:
+ size = 0;
+ for (i = 0; i < node->bv_vol_n; i++) {
+ table = bl_dm_table_alloc();
+ if (!table)
+ goto out;
+ table->offset = size;
+ table->size = node->bv_vols[i]->bv_size;
+ if (!TYPE_HAS_DEV(node->bv_vols[i]->bv_type)) {
+ free(table);
+ goto out;
+ }
+ strcpy(table->target_type, "linear");
+ tmp = table->params;
+ dev = node->bv_vols[i]->param.bv_dev;
+ if (!dm_format_dev(tmp, DM_PARAMS_LEN,
+ MAJOR(dev), MINOR(dev))) {
+ free(table);
+ goto out;
+ }
+ tmp += strlen(tmp);
+ sprintf(tmp, " %d", 0);
+ size += table->size;
+ add_to_bl_dm_table(&bl_table_head, table);
+ }
+ break;
+ default:
+ /* Delete previous temporary devices */
+ dm_devicelist_remove(count, dev_count);
+ goto out;
+ } /*end of swtich */
+ /* Create dev_name here. Name of device is pnfs_vol_XXX */
+ if (dev_name)
+ free(dev_name);
+ dev_name = (char *)calloc(DM_DEV_NAME_LEN, sizeof(char));
+ if (!dev_name) {
+ BL_LOG_ERR("%s: Out of memory\n", __func__);
+ goto out;
+ }
+ sprintf(dev_name, "pnfs_vol_%lu", dev_count++);
+
+ dev = dm_single_device_create(dev_name, bl_table_head);
+ if (!dev) {
+ /* Delete previous temporary devices */
+ dm_devicelist_remove(count, dev_count);
+ goto out;
+ }
+ node->param.bv_dev = dev;
+ /*TODO: extend use with PSEUDO later */
+ node->bv_type = BLOCK_VOLUME_PSEUDO;
+ continued:
+ number++;
+ if (bl_table_head)
+ bl_dm_table_free(bl_table_head);
+ bl_table_head = NULL;
+ }
+ out:
+ if (bl_table_head)
+ bl_dm_table_free(bl_table_head);
+ bl_table_head = NULL;
+ if (dev)
+ bl_dm_create_tree(dev);
+ if (dev_name)
+ free(dev_name);
+ return dev;
+}
diff --git a/utils/blkmapd/etc/initd/initd.redhat b/utils/blkmapd/etc/initd/initd.redhat
new file mode 100644
index 0000000..a52250c
--- /dev/null
+++ b/utils/blkmapd/etc/initd/initd.redhat
@@ -0,0 +1,76 @@
+#!/bin/sh
+#
+# description: Starts and stops the iSCSI initiator
+#
+# processname: pnfsi-block
+# pidfile: /var/run/pnfs-block.pid
+# config: /etc/pnfs-block.conf
+
+# Source function library.
+if [ -f /etc/init.d/functions ] ; then
+ . /etc/init.d/functions
+elif [ -f /etc/rc.d/init.d/functions ] ; then
+ . /etc/rc.d/init.d/functions
+else
+ exit 0
+fi
+
+PATH=/sbin:/bin:/usr/sbin:/usr/bin
+
+RETVAL=0
+
+start()
+{
+ echo -n $"Starting pNFS block-layout device discovery service: "
+ modprobe -q blocklayoutdriver
+ daemon /usr/sbin/bl-device
+ RETVAL=$?
+ if [ $RETVAL -eq 0 ]; then
+ touch /var/lock/subsys/pnfs-block
+ fi
+ echo
+ return $RETVAL
+}
+
+stop()
+{
+ echo -n $"Stopping pNFS block-layout device discovery service: "
+ killproc bl-device 2> /dev/null
+ rm -f /var/run/pnfs-block.pid
+ RETVAL=$?
+ [ $RETVAL -eq 0 ] && rm -f /var/lock/subsys/pnfs-block
+ if [ $RETVAL -eq 0 ]; then
+ echo_success
+ else
+ echo_failure
+ fi
+ echo
+ return $RETVAL
+}
+
+restart()
+{
+ stop
+ start
+}
+
+case "$1" in
+ start)
+ start
+ ;;
+ stop)
+ stop
+ ;;
+ restart)
+ stop
+ start
+ ;;
+ status)
+ status pnfs-block
+ ;;
+ *)
+ echo $"Usage: $0 {start|stop|restart|status}"
+ exit 1
+esac
+
+exit $RETVAL
diff --git a/utils/blkmapd/etc/pnfs-block.conf b/utils/blkmapd/etc/pnfs-block.conf
new file mode 100644
index 0000000..da70d94
--- /dev/null
+++ b/utils/blkmapd/etc/pnfs-block.conf
@@ -0,0 +1,10 @@
+# This is an example config file
+
+# Look at all /dev/sd* devices
+# /dev/sd or /dev/sd*
+/dev/sd*
+
+# Look at all /dev/mapper/* devices
+# /dev/mapper/* or
+# /dev/mapper/
+/dev/mapper/*
--
1.7.0.4
^ permalink raw reply related [flat|nested] 5+ messages in thread[parent not found: <20100721223119.GA6618-8f4Pc2RrbJmHXe+LvDLADg@public.gmane.org>]
* Re: [PATCH] Add complex block layout discovery and mapping daemon [not found] ` <20100721223119.GA6618-8f4Pc2RrbJmHXe+LvDLADg@public.gmane.org> @ 2010-07-22 19:35 ` Benny Halevy 0 siblings, 0 replies; 5+ messages in thread From: Benny Halevy @ 2010-07-22 19:35 UTC (permalink / raw) To: Jim Rees, Haiying Tang; +Cc: linux-nfs, Steve Dickson On Jul. 22, 2010, 1:31 +0300, Jim Rees <rees@umich.edu> wrote: > Signed-off-by: Haiying Tang <Tang_Haiying@emc.com> > Signed-off-by: Eric Anderle <eanderle@umich.edu> > Signed-off-by: Jim Rees <rees@umich.edu> > --- > configure.ac | 4 + > utils/Makefile.am | 4 + > utils/blkmapd/Makefile.am | 63 ++++ > utils/blkmapd/atomicio.c | 58 ++++ > utils/blkmapd/cfg.c | 272 +++++++++++++++++ > utils/blkmapd/cfg.h | 48 +++ > utils/blkmapd/device-discovery.c | 542 ++++++++++++++++++++++++++++++++++ > utils/blkmapd/device-discovery.h | 162 ++++++++++ > utils/blkmapd/device-inq.c | 235 +++++++++++++++ > utils/blkmapd/device-process.c | 391 ++++++++++++++++++++++++ > utils/blkmapd/dm-device.c | 509 +++++++++++++++++++++++++++++++ > utils/blkmapd/etc/initd/initd.redhat | 76 +++++ > utils/blkmapd/etc/pnfs-block.conf | 10 + > 13 files changed, 2374 insertions(+), 0 deletions(-) > create mode 100644 utils/blkmapd/Makefile.am > create mode 100644 utils/blkmapd/atomicio.c > create mode 100644 utils/blkmapd/cfg.c > create mode 100644 utils/blkmapd/cfg.h > create mode 100644 utils/blkmapd/device-discovery.c > create mode 100644 utils/blkmapd/device-discovery.h > create mode 100644 utils/blkmapd/device-inq.c > create mode 100644 utils/blkmapd/device-process.c > create mode 100644 utils/blkmapd/dm-device.c > create mode 100644 utils/blkmapd/etc/initd/initd.redhat > create mode 100644 utils/blkmapd/etc/pnfs-block.conf > > diff --git a/configure.ac b/configure.ac > index 4d12715..f57cd45 100644 > --- a/configure.ac > +++ b/configure.ac > @@ -64,12 +64,15 @@ AC_ARG_ENABLE(nfsv4, > enable_nfsv4=yes) > if test "$enable_nfsv4" = yes; then > AC_DEFINE(NFS4_SUPPORTED, 1, [Define this if you want NFSv4 support compiled in]) > + BLKMAPD=blkmapd > IDMAPD=idmapd > SPNFSD=spnfsd > else > enable_nfsv4= > + BLKMAPD= > IDMAPD= > fi > + AC_SUBST(BLKMAPD) > AC_SUBST(IDMAPD) > AC_SUBST(enable_nfsv4) > AM_CONDITIONAL(CONFIG_NFSV4, [test "$enable_nfsv4" = "yes"]) > @@ -429,6 +432,7 @@ AC_CONFIG_FILES([ > tools/mountstats/Makefile > tools/nfs-iostat/Makefile > utils/Makefile > + utils/blkmapd/Makefile > utils/exportfs/Makefile > utils/gssd/Makefile > utils/idmapd/Makefile > diff --git a/utils/Makefile.am b/utils/Makefile.am > index c777d21..c33835a 100644 > --- a/utils/Makefile.am > +++ b/utils/Makefile.am > @@ -10,6 +10,10 @@ if CONFIG_NFSV4 > OPTDIRS += spnfsd > endif > > +if CONFIG_NFSV4 > +OPTDIRS += blkmapd > +endif > + > if CONFIG_GSS > OPTDIRS += gssd > endif > diff --git a/utils/blkmapd/Makefile.am b/utils/blkmapd/Makefile.am > new file mode 100644 > index 0000000..e8c9fc0 > --- /dev/null > +++ b/utils/blkmapd/Makefile.am > @@ -0,0 +1,63 @@ > +## Process this file with automake to produce Makefile.in > + > +#man8_MANS = blkmapd.man > + > +RPCPREFIX = rpc. > +KPREFIX = @kprefix@ > +sbin_PROGRAMS = blkmapd > + > +blkmapd_SOURCES = \ > + atomicio.c \ > + cfg.c \ > + device-discovery.c \ > + device-inq.c \ > + device-process.c \ > + dm-device.c \ > + \ > + cfg.h \ > + device-discovery.h > + > +blkmapd_LDADD = -ldevmapper ../../support/nfs/libnfs.a > + > +MAINTAINERCLEANFILES = Makefile.in > + > +####################################################################### > +# The following allows the current practice of having > +# daemons renamed during the install to include RPCPREFIX > +# and the KPREFIX > +# This could all be done much easier with program_transform_name > +# ( program_transform_name = s/^/$(RPCPREFIX)$(KPREFIX)/ ) > +# but that also renames the man pages, which the current > +# practice does not do. > +install-exec-hook: > + (cd $(DESTDIR)$(sbindir) && \ > + for p in $(sbin_PROGRAMS); do \ > + mv -f $$p$(EXEEXT) $(RPCPREFIX)$(KPREFIX)$$p$(EXEEXT) ;\ > + done) > +uninstall-hook: > + (cd $(DESTDIR)$(sbindir) && \ > + for p in $(sbin_PROGRAMS); do \ > + rm -f $(RPCPREFIX)$(KPREFIX)$$p$(EXEEXT) ;\ > + done) > + > + > +# XXX This makes some assumptions about what automake does. > +# XXX But there is no install-man-hook or install-man-local. > +install-man: install-man8 install-man-links > +uninstall-man: uninstall-man8 uninstall-man-links > + > +install-man-links: > + (cd $(DESTDIR)$(man8dir) && \ > + for m in $(man8_MANS) $(dist_man8_MANS) $(nodist_man8_MANS); do \ > + inst=`echo $$m | sed -e 's/man$$/8/'`; \ > + rm -f $(RPCPREFIX)$$inst ; \ > + $(LN_S) $$inst $(RPCPREFIX)$$inst ; \ > + done) > + > +uninstall-man-links: > + (cd $(DESTDIR)$(man8dir) && \ > + for m in $(man8_MANS) $(dist_man8_MANS) $(nodist_man8_MANS); do \ > + inst=`echo $$m | sed -e 's/man$$/8/'`; \ > + rm -f $(RPCPREFIX)$$inst ; \ > + done) > + > diff --git a/utils/blkmapd/atomicio.c b/utils/blkmapd/atomicio.c > new file mode 100644 > index 0000000..3c3c864 > --- /dev/null > +++ b/utils/blkmapd/atomicio.c > @@ -0,0 +1,58 @@ > +/* > + * Copyright (c) 2002 Marius Aamodt Eriksen <marius@monkey.org> > + * Copyright (c) 1995,1999 Theo de Raadt. All rights reserved. > + * All rights reserved. > + * > + * Redistribution and use in source and binary forms, with or without > + * modification, are permitted provided that the following conditions > + * are met: > + * 1. Redistributions of source code must retain the above copyright > + * notice, this list of conditions and the following disclaimer. > + * 2. Redistributions in binary form must reproduce the above copyright > + * notice, this list of conditions and the following disclaimer in the > + * documentation and/or other materials provided with the distribution. > + * > + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR > + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES > + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. > + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, > + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT > + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF > + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > + */ > + > +#include <sys/types.h> > +#include <unistd.h> > +#include <errno.h> > + > +#ifdef HAVE_CONFIG_H > +#include "config.h" > +#endif /* HAVE_CONFIG_H */ We don't need this in nfs-utils... > + > +/* > + * ensure all of data on socket comes through. f==read || f==write > + */ > +ssize_t atomicio(ssize_t(*f) (int, void *, size_t), int fd, void *_s, size_t n) Strong type checking won't like calling this function with write() as an argument, as it's declared with a const void *buf. > +{ > + char *s = _s; > + ssize_t res, pos = 0; > + > + while (n > pos) { > + res = (f) (fd, s + pos, n - pos); > + switch (res) { > + case -1: > + if (errno == EINTR || errno == EAGAIN) > + continue; /* FALLTHRU */ > + case 0: > + if (pos != 0) > + return pos; so it's not really atomic in this case :-/ why not return the error? > + return res; So on EOF this function returns 0 regardless of how much it read until it reached there? Oh well, this function could just return the number of bytes it read/written or -1 on error. > + default: > + pos += res; > + } > + } > + return pos; > +} > diff --git a/utils/blkmapd/cfg.c b/utils/blkmapd/cfg.c > new file mode 100644 > index 0000000..b303352 > --- /dev/null > +++ b/utils/blkmapd/cfg.c > @@ -0,0 +1,272 @@ > +/* > + * Copyright (c) 2010 EMC Corporation, Haiying Tang <Tang_Haiying@emc.com> > + * All rights reserved. > + * > + * Redistribution and use in source and binary forms, with or without > + * modification, are permitted provided that the following conditions > + * are met: > + * 1. Redistributions of source code must retain the above copyright > + * notice, this list of conditions and the following disclaimer. > + * 2. Redistributions in binary form must reproduce the above copyright > + * notice, this list of conditions and the following disclaimer in the > + * documentation and/or other materials provided with the distribution. > + * > + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR > + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES > + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. > + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, > + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT > + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF > + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > + */ > + > +#include <stdio.h> > +#include <stdlib.h> > +#include <string.h> > +#include <sys/types.h> > +#include <sys/stat.h> > +#include <linux/errno.h> > +#include <fcntl.h> > +#include <unistd.h> > +#include "device-discovery.h" > +#include "cfg.h" > + > +struct scan_root_list *scan_root_list_head; > + > +void bl_release_list(void) > +{ > + struct scan_root_list *root = scan_root_list_head; > + struct scan_device_list *disk; > + > + while (root) { > + disk = root->disk; > + while (disk) { > + root->disk = disk->next; > + /*free disk */ missing space after '*' actually, this comment is useless anyway doesn't give you any more information than free(disk) :-) > + free(disk->name); > + free(disk); > + disk = root->disk; > + } > + scan_root_list_head = root->next; > + /*free root */ ditto > + free(root->name); > + free(root); > + root = scan_root_list_head; > + } > + return; this return statement is superfluous as well... > +} > + > +struct scan_root_list *bl_alloc_root_list(char *name, unsigned int len) > +{ > + struct scan_root_list *root; > + > + root = malloc(sizeof(struct scan_root_list)); > + if (!root) { > + BL_LOG_ERR("%s: Out of memory!\n", __func__); > + return NULL; > + } > + > + root->name = malloc(len + 1); > + if (!root->name) { > + BL_LOG_ERR("%s: Out of memory!\n", __func__); > + goto out; > + } > + strncpy(root->name, name, len); > + root->name[len] = '\0'; That's equivalent to root->name = strndup(name, len) > + root->next = scan_root_list_head; > + root->all_disk = 0; > + scan_root_list_head = root; > + > + return root; > + out: Since this is the error path better call the label accordingly. > + if (root) > + free(root); root will never be NULL with the current implementation. Why not move BL_LOG_ERR here and goto err also on the first failure? > + return NULL; > +} > + > +void bl_alloc_device_list(struct scan_root_list *root, char *name, > + unsigned int len) > +{ > + struct scan_device_list *device; > + > + device = malloc(sizeof(struct scan_device_list)); > + if (!device) { > + BL_LOG_ERR("%s: Out of memory!\n", __func__); > + return; > + } > + > + device->name = malloc(len + 1); > + if (!device->name) { > + BL_LOG_ERR("%s: Out of memory!\n", __func__); > + goto out; > + } > + strncpy(device->name, name, len); > + device->name[len] = '\0'; ditto > + device->next = root->disk; > + root->disk = device; > + return; > + out: > + if (device) > + free(device); ditto > + return; > +} > + > +void bl_set_default_conf(void) > +{ > + struct scan_root_list *root; > + > + bl_release_list(); > + > + root = bl_alloc_root_list("/dev", 4); > + if (root) > + bl_alloc_device_list(root, "sd", 2); > + > + root = bl_alloc_root_list("/dev/mapper", 11); I'd consider defining these names more formally and using either compile- (sizeof) or run-time (strlen) way to determine their length. This is too fragile and error prone in case someone changes the names in the future. > + if (root) > + root->all_disk = 1; > + return; Hmm, better return an error if allocation failed. > +} > + > +void bl_insert_device_list(struct scan_root_list *root, char *name, > + unsigned int len) > +{ > + struct scan_device_list *device = root->disk; > + /* Check whether this device has been inserted */ > + while (device) { > + if (device->name && !strcmp(device->name, name)) Can device->name ever be NULL? Also, name might not be null terminated, better use strncmp() but root->name might also be longer than len, so it should be: if (!strncmp(root->name, name, len) && strlen(root->name) <= len) > + return; > + device = device->next; > + } > + > + bl_alloc_device_list(root, name, len); > + > + return; ditto (return status) > +} > + > +struct scan_root_list *bl_insert_root_list(char *name, unsigned int len) > +{ > + struct scan_root_list *root = scan_root_list_head; > + > + /* Check whether this root has been inserted */ > + while (root) { > + if (!strcmp(root->name, name)) ditto > + return root; > + root = root->next; > + } > + > + root = bl_alloc_root_list(name, len); > + return root; just return the function result, not need for the intermediate assignment. > +} > + > +void bl_parse_line(char *line, size_t len, struct scan_root_list **bl_root) > +{ > + char *root; > + char *device; > + char *end; > + wanna skip leading whitespaces? > + if (*line == '#') > + return; > + > + root = line; > + while (((*root == ' ') || (*root == '\t')) && (root < line + len)) isblank(*root) > + root++; this looks like a for look to me, no? :) for (root = line; (root < line + len) && isblank(*root); root++) ; > + if (root == line + len) > + return; > + > + end = line + len; can move that before previous loop and use it there > + while (((*end == '\n') || (*end == ' ') || (*end == '\t') || > + (*end == '\0')) && (end > root)) { (*end == '\0') || isspace(*root) > + end--; > + } > + /* For lines ended up with "/" or "/""*": add as a dir root */ the comment is written in a confusing way (because of the C comment avoidance trick) how about: + /* For lines ended up with '/' or '/','*': add as a dir root */ > + if ((*end == '/') || > + ((*end == '*') && (end - root >= 1) && (*(end - 1) == '/'))) { > + if (*end == '*') > + end--; > + *bl_root = bl_insert_root_list(root, end - root + 1); > + if (*bl_root) > + (*bl_root)->all_disk = 1; how about adding the all_disk flag as a parameter? > + return; status? > + } > + > + /* Other lines: add as a device */ > + device = end; > + while ((*device != '/') && (device > root)) > + device--; > + if (device == root) > + return; so that's an invalid line? better print out an error > + *bl_root = bl_insert_root_list(root, device - root + 1); > + if (*end == '*') > + end--; so the terminating '*' doesn't really matter for devices, right? > + if (*bl_root) if not, you should return (an error) earlier > + bl_insert_device_list(*bl_root, device + 1, end - device); > + > + return; > +} > + > +void bl_parse_conf(char *buf, size_t size) > +{ > + char *tmp = buf, *line = buf, *end = buf + size; > + struct scan_root_list *bl_root = NULL; > + > + while (tmp < end) { > + if (*tmp == '\n') { > + *tmp = '\0'; > + bl_parse_line(line, tmp - line, &bl_root); > + line = tmp + 1; > + } so we lose the last line if it's not terminated with a newline? I wonder of just using getline wouldn't be simpler... > + tmp++; > + } > + > + return; > +} > + > +int bl_cfg_init(void) > +{ > + struct stat sb; > + size_t size; > + int fd; > + char *buf = NULL; > + int ret = -ENOENT; > + > + if (stat(bl_conf_path, &sb) == 0) { > + ret = -EPERM; > + size = sb.st_size; > + if (!size) > + goto err_out; > + > + fd = open(bl_conf_path, O_RDONLY, 0); > + if (fd == -1) { > + BL_LOG_ERR("File %s open failed\n", bl_conf_path); > + goto err_out; > + } > + > + buf = calloc(size, sizeof(char)); > + if (!buf) { > + close(fd); > + BL_LOG_ERR("%s: Out of memory\n", __func__); > + ret = -ENOMEM; > + goto err_out; > + } > + > + if (atomicio(read, fd, buf, size) != size) { > + close(fd); > + BL_LOG_ERR("Read file %s failed\n", bl_conf_path); > + goto err_out; > + } > + > + ret = 0; > + close(fd); > + bl_parse_conf(buf, size); > + if (!scan_root_list_head) > + ret = -EINVAL; > + } else > + bl_set_default_conf(); > + err_out: > + if (buf) > + free(buf); > + return ret; > +} > diff --git a/utils/blkmapd/cfg.h b/utils/blkmapd/cfg.h > new file mode 100644 > index 0000000..8d7bcf4 > --- /dev/null > +++ b/utils/blkmapd/cfg.h > @@ -0,0 +1,48 @@ > +/* > + * bl-cfg.h > + * > + * Copyright (c) 2010 EMC Corporation, Haiying Tang <Tang_Haiying@emc.com> > + * All rights reserved. > + * > + * Redistribution and use in source and binary forms, with or without > + * modification, are permitted provided that the following conditions > + * are met: > + * 1. Redistributions of source code must retain the above copyright > + * notice, this list of conditions and the following disclaimer. > + * 2. Redistributions in binary form must reproduce the above copyright > + * notice, this list of conditions and the following disclaimer in the > + * documentation and/or other materials provided with the distribution. > + * > + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR > + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES > + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. > + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, > + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT > + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF > + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > + */ > +#ifndef BL_CFG_H > +#define BL_CFG_H > + > +#define bl_conf_path "/etc/pnfs-block.conf" I'd consider having the default in a variable and allowing to override it as an option. Also, since the daemon is called blkmapd it makes more sense to call the config file blkmapd.conf > + > +extern struct scan_root_list *scan_root_list_head; > + > +struct scan_device_list { > + struct scan_device_list *next; > + char *name; keeping the name length could be useful for quick comparisons (you have it anyway on insertion) > +}; > + > +struct scan_root_list { > + struct scan_root_list *next; > + unsigned int all_disk; > + char *name; ditto > + struct scan_device_list *disk; > +}; > + > +int bl_cfg_init(void); > + > +#endif > diff --git a/utils/blkmapd/device-discovery.c b/utils/blkmapd/device-discovery.c > new file mode 100644 > index 0000000..79cb2b5 > --- /dev/null > +++ b/utils/blkmapd/device-discovery.c > @@ -0,0 +1,542 @@ > +/* > + * device-discovery.c: main function, discovering device and processing > + * pipe request from kernel. > + * > + * Copyright (c) 2010 EMC Corporation, Haiying Tang <Tang_Haiying@emc.com> > + * All rights reserved. > + * > + * Redistribution and use in source and binary forms, with or without > + * modification, are permitted provided that the following conditions > + * are met: > + * 1. Redistributions of source code must retain the above copyright > + * notice, this list of conditions and the following disclaimer. > + * 2. Redistributions in binary form must reproduce the above copyright > + * notice, this list of conditions and the following disclaimer in the > + * documentation and/or other materials provided with the distribution. > + * > + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR > + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES > + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. > + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, > + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT > + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF > + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > + */ > +#define _LARGEFILE64_SOURCE Is this the right place to define it? Why not in the Makefile for all files in the binary? > +#include <stdlib.h> > +#include <stdio.h> > +#include <string.h> > +#include <dirent.h> > +#include <ctype.h> > +#include <linux/kdev_t.h> > +#include <sys/types.h> > +#include <sys/stat.h> > +#include <sys/ioctl.h> > +#include <sys/mount.h> > +#include <sys/select.h> > +#include <fcntl.h> > +#include <unistd.h> > +#include <libgen.h> > +#include <errno.h> > +#include <scsi/scsi.h> > +#include <scsi/scsi_ioctl.h> > +#include <scsi/sg.h> > +#include "device-discovery.h" > +#include "cfg.h" > + > +#define BL_PIPE_FILE "/var/lib/nfs/rpc_pipefs/bl_device_pipe" > +#define PID_FILE "/var/run/pnfs-block.pid" s/pnfs-block/blkmapd/ to conform with the binary name (and use a symbolic constant for it defined in some central header file if it makes sense) > + > +struct bl_disk *visible_disk_list; > + > +struct bl_disk_path *bl_get_path(const char *filepath, > + struct bl_disk_path *paths) > +{ > + struct bl_disk_path *tmp = paths; > + while (tmp) { > + if (!strcmp(tmp->full_path, filepath)) > + break; > + tmp = tmp->next; > + } > + return tmp; > +} > + > +/* Check whether valid_path is a substring(partition) of path */ > +int bl_is_partition(struct bl_disk_path *valid_path, struct bl_disk_path *path) > +{ > + if (!strncmp(valid_path->full_path, path->full_path, > + strlen(valid_path->full_path))) > + return 1; > + > + return 0; > +} > + > +/* > + * For multipath devices, devices state could be PASSIVE/ACTIVE/PSEUDO, > + * where PSEUDO > ACTIVE > PASSIVE. Device with highest state is used to > + * create pseudo device. So if state is higher, the device path needs to > + * be updated. > + * If device-mapper multipath support is a must, pseudo devices should > + * exist for each multipath device. If not, active device path will be > + * chosen for device creation. > + * Treat partition as invalid path. > + */ > +int bl_update_path(struct bl_disk_path *path, enum bl_path_state_e state, > + struct bl_disk *disk) > +{ > + struct bl_disk_path *valid_path = disk->valid_path; > + > + if (valid_path) { > + if (valid_path->state >= state) { > + if (bl_is_partition(valid_path, path)) > + return 0; > + } > + } can there be an else case? > + return 1; > +} > + > +void bl_release_disk(void) > +{ > + struct bl_disk *disk = visible_disk_list, *tmp; > + struct bl_disk_path *path = NULL; > + > + while (disk) { > + path = disk->paths; > + while (path) { > + disk->paths = path->next; > + free(path->full_path); > + free(path); > + path = disk->paths; > + } > + if (disk->serial) can it be NULL? > + free(disk->serial); > + tmp = disk->next; you could use visible_disk_list for tmp, no? > + free(disk); > + disk = tmp; > + } > + > + visible_disk_list = NULL; > +} > + > +void bl_add_disk(char *filepath) > +{ > + struct bl_disk *disk = NULL; > + struct bl_disk *tmp = visible_disk_list; > + int fd = 0; > + struct stat sb; > + off_t size = 0; > + struct bl_serial *serial = NULL; > + enum bl_path_state_e ap_state = BL_PATH_STATE_PASSIVE; > + struct bl_disk_path *diskpath = NULL, *path = NULL; > + dev_t dev; > + > + fd = open(filepath, O_RDONLY | O_LARGEFILE); > + if (fd < 0) > + return; > + > + if (fstat(fd, &sb)) { > + close(fd); > + return; > + } > + > + if (!sb.st_size) > + ioctl(fd, BLKGETSIZE, &size); > + else > + size = sb.st_size; > + > + if (!size) { > + close(fd); > + return; > + } > + > + dev = sb.st_rdev; > + > + serial = bldev_read_serial(fd, filepath); > + while (tmp) { > + /*Already scanned or a partition? > + *XXX: if released each time, maybe not need to compare please add space after "/*" (can use kernel scripts/checkpatch.pl) > + */ > + if ((serial->len == tmp->serial->len) && > + (memcmp(serial->data, tmp->serial->data, serial->len) == > + 0)) { > + diskpath = bl_get_path(filepath, tmp->paths); > + break; > + } > + tmp = tmp->next; > + } > + > + if (tmp && diskpath) { why not call tmp with a more meaningful name? > + close(fd); > + return; > + } > + > + bldev_read_ap_state(fd, &ap_state); > + close(fd); > + > + /* > + * Not sure how to identify a pseudo device created by > + * device-mapper, so leave /dev/mapper for now. > + */ > + if (strncmp(filepath, "/dev/mapper", 11) == 0) > + ap_state = BL_PATH_STATE_PSEUDO; > + > + /*add path */ > + path = malloc(sizeof(struct bl_disk_path)); > + if (!path) { > + BL_LOG_ERR("%s: Out of memory!\n", __func__); > + goto out_err; > + } > + path->next = NULL; > + path->state = ap_state; > + path->full_path = strdup(filepath); > + if (!path->full_path) > + goto out_err; > + > + if (!tmp) { /*add disk */ by here, I managed to forget what tmp is all about :) please give it a useful name... > + disk = malloc(sizeof(struct bl_disk)); > + if (!disk) { > + BL_LOG_ERR("%s: Out of memory!\n", __func__); > + goto out_err; > + } > + disk->next = visible_disk_list; > + disk->dev = dev; > + disk->size = size; > + disk->serial = serial; > + disk->valid_path = path; > + disk->paths = path; > + visible_disk_list = disk; > + } else { > + path->next = tmp->paths; > + tmp->paths = path; > + /*check whether we need to update disk info */ > + if (bl_update_path(path, path->state, tmp)) { > + tmp->dev = dev; > + tmp->size = size; > + tmp->valid_path = path; > + } > + } > + return; > + > + out_err: > + if (path) { > + if (path->full_path) > + free(path->full_path); again, these should never be NULL, no? > + free(path); > + } > + if (disk) { > + if (disk->serial) > + free(disk->serial); ditto > + free(disk); or just define a bl_free that checks for NULL before calling free... > + } > + return; > +} > + > +void bl_devicescan(const char *filename, struct scan_root_list *root) > +{ > + /*scan all disks */ > + char filepath[PATH_MAX]; > + struct scan_device_list *device; > + > + if (!strcmp(filename, ".") || !strcmp(filename, "..")) > + return; > + > + memset(filepath, 0, PATH_MAX); > + if (strlen(filename) < (PATH_MAX - strlen(root->name) - 2)) > + sprintf(filepath, "%s/%s", root->name, filename); > + else > + return; print error for name too long? > + if (root->all_disk) > + goto valid; > + > + device = root->disk; > + while (device) { > + /* If device->name is a subset of filename, this disk should be > + * valid for scanning. > + * For example, device->name is "sd", filename is "sda". > + */ > + if (device->name > + && !memcmp(filename, device->name, strlen(device->name))) > + goto valid; > + device = device->next; > + } > + > + return; > + > + valid: > + /* > + * sg device is not a real device, but a device created according > + * to each scsi device. It won't be used for pseudo device creation. > + * I moved it here, so that sg devices will not be scanned. > + */ > + if (!strncmp(filename, "/dev/sg", 7)) I'm confused... Is /dev part of filename or root->name? > + return; > + bl_add_disk(filepath); > + return; > +} > + > +/* > + * Delete disks with multi-paths and no pseudo device path. > + * > + * If only passive device or more than one active devices available, > + * I consider it as error since multipath of device-mapper should have worked > + * and pseudo device should have been created. > + */ > +void bl_del_invalid_disk(void) > +{ > + struct bl_disk *disk = visible_disk_list, *pre; > + struct bl_disk_path *path = NULL; > + > + pre = disk; > + while (disk) { > + if ((disk->valid_path->state == BL_PATH_STATE_PASSIVE) || > + ((disk->valid_path->state == BL_PATH_STATE_ACTIVE) && > + (disk->paths->next))) { > + path = disk->paths; > + while (path) { > + disk->paths = path->next; > + free(path->full_path); > + free(path); > + path = disk->paths; > + } You could refactor the code a bit for these kind of loops... > + if (disk->serial) > + free(disk->serial); > + if (pre == visible_disk_list) { > + visible_disk_list = disk->next; > + free(disk); > + disk = visible_disk_list; > + } else { > + pre->next = disk->next; > + free(disk); > + disk = pre->next; > + } btw, if pre would be a ** you could just always set *pre to disk->next, right? > + } else { > + pre = disk; > + disk = disk->next; > + } > + } > + return; > +} > + > +int bl_discover_devices(void) > +{ > + DIR *dir; > + struct dirent *dp; > + struct scan_root_list *root = scan_root_list_head; > + /*release previous list */ > + bl_release_disk(); > + /*scan all disks */ > + while (root) { > + dir = opendir(root->name); > + if (dir == NULL) { > + root = root->next; > + continue; > + } > + > + while ((dp = readdir(dir)) != NULL) > + bl_devicescan(dp->d_name, root); > + > + root = root->next; > + closedir(dir); > + } > + > +#ifdef DEL_INVALID_DISKS > + bl_del_invalid_disk(); > +#endif This is dead code. Can you please keep it in your own git repository or enable it if it is any good? :-) > + > + return 0; > +} > + > +/* process kernel request > + * return 0: request processed, and no more request waiting; > + * return 1: request processed, and more requests waiting; > + * return < 0: error > + */ > +int bl_disk_inquiry_process(int fd) > +{ > + int ret = 0; > + struct pipefs_hdr *head = NULL, *tmp; > + char *buf = NULL; > + uint32_t major, minor; > + uint16_t buflen; > + unsigned int len = 0; > + > + head = calloc(1, sizeof(struct pipefs_hdr)); > + if (!head) { > + BL_LOG_ERR("%s: Out of memory!\n", __func__); > + return -ENOMEM; > + } > + > + /*read request */ > + if (atomicio(read, fd, head, sizeof(*head)) != sizeof(*head)) { > + /* Note that an error in this or the next read is pretty > + * catastrophic, as there is no good way to resync into > + * the pipe's stream. > + */ > + BL_LOG_ERR("Read pipefs head error!\n"); > + ret = -EIO; > + goto out; > + } > + > + buflen = head->totallen - sizeof(*head); > + buf = malloc(buflen); > + if (!buf) { > + BL_LOG_ERR("%s: Out of memory!\n", __func__); > + ret = -ENOMEM; > + goto out; > + } > + > + if (atomicio(read, fd, buf, buflen) != buflen) { > + BL_LOG_ERR("Read pipefs content error!\n"); > + ret = -EIO; > + goto out; > + } > + > + head->status = BL_DEVICE_REQUEST_PROC; > + switch (head->type) { > + case BL_DEVICE_MOUNT: > + if (!process_deviceinfo(buf, buflen, &major, &minor)) { > + head->status = BL_DEVICE_REQUEST_ERR; > + goto out; > + } > + tmp = realloc(head, sizeof(major) + sizeof(minor) + > + sizeof(struct pipefs_hdr)); > + if (!tmp) { > + BL_LOG_ERR("%s: Out of memory!\n", __func__); > + ret = -ENOMEM; > + goto out; > + } > + head = tmp; > + memcpy((void *)head + sizeof(struct pipefs_hdr), > + &major, sizeof(major)); > + memcpy((void *)head + sizeof(struct pipefs_hdr) + sizeof(major), > + &minor, sizeof(minor)); > + len = sizeof(major) + sizeof(minor); > + break; > + case BL_DEVICE_UMOUNT: > + if (!dm_device_remove_all((uint64_t *) buf)) > + head->status = BL_DEVICE_REQUEST_ERR; > + bl_discover_devices(); > + break; > + default: > + head->status = BL_DEVICE_REQUEST_ERR; > + } > + > + head->totallen = sizeof(struct pipefs_hdr) + len; > + /* write to pipefs */ > + if (atomicio((void *)write, fd, head, head->totallen) this just calls for atomic_read, atomic_write because of the type cast. (or not "atomic", as it's not... I'd call it readn()/writen()...) > + != head->totallen) { > + BL_LOG_ERR("Write pipefs error!\n"); > + ret = -EIO; > + } > + > + out: > + if (buf) > + free(buf); > + if (head) > + free(head); > + return ret; > +} > + > +/*TODO: set bl_process_stop to 1 in command*/ > +unsigned int bl_process_stop; volatile maybe? > + > +int bl_run_disk_inquiry_process(int fd) > +{ > + fd_set rset; > + struct timeval tv; > + int ret; > + > + bl_process_stop = 0; > + > + for (;;) { > + if (bl_process_stop) > + return 1; > + FD_ZERO(&rset); > + FD_SET(fd, &rset); > + ret = 0; > + tv.tv_sec = BL_DEVICE_DISCOVERY_INTERVAL; > + switch (select(fd + 1, &rset, NULL, NULL, &tv)) { > + case -1: > + if (errno == EINTR) > + continue; > + else { > + ret = -errno; > + goto out; > + } > + case 0: > + goto out; > + default: > + if (FD_ISSET(fd, &rset)) > + ret = bl_disk_inquiry_process(fd); > + } > + } > + out: > + return ret; > +} > + > +/* Daemon */ > +int main(void) > +{ > + int fd, ret = 1; > + struct stat statbuf; > + char pidbuf[64]; > + > + if (!stat(PID_FILE, &statbuf)) { > + fprintf(stderr, "Pid file already existed\n"); > + return -1; > + } > + > + if (daemon(0, 0) != 0) { > + fprintf(stderr, "Daemonize failed\n"); > + return -1; > + } > + > + openlog("pnfs-block", LOG_PID, 0); ditto using the binary name Benny > + fd = open(PID_FILE, O_WRONLY | O_CREAT, 0644); > + if (fd < 0) { > + BL_LOG_ERR("Create pid file failed\n"); > + return -1; > + } > + > + if (lockf(fd, F_TLOCK, 0) < 0) { > + BL_LOG_ERR("Lock pid file failed\n"); > + close(fd); > + return -1; > + } > + ftruncate(fd, 0); > + sprintf(pidbuf, "%d\n", getpid()); > + write(fd, pidbuf, strlen(pidbuf)); > + > + /*open pipe file */ > + fd = open(BL_PIPE_FILE, O_RDWR); > + if (fd < 0) { > + BL_LOG_ERR("open pipe file error\n"); > + return -1; > + } > + > + ret = bl_cfg_init(); > + if (ret < 0) { > + if (ret == -ENOENT) > + BL_LOG_WARNING("Config file not exist, use default\n"); > + else { > + BL_LOG_ERR("Open/read Block pNFS config file error\n"); > + return -1; > + } > + } > + > + while (1) { > + /*discover device when needed */ > + bl_discover_devices(); > + > + ret = bl_run_disk_inquiry_process(fd); > + if (ret < 0) { > + /* what should we do with process error? */ > + BL_LOG_ERR("inquiry process return %d\n", ret); > + } > + } > + close(fd); > + return ret; > +} > diff --git a/utils/blkmapd/device-discovery.h b/utils/blkmapd/device-discovery.h > new file mode 100644 > index 0000000..9f87ebe > --- /dev/null > +++ b/utils/blkmapd/device-discovery.h > @@ -0,0 +1,162 @@ > +/* > + * bl-device-discovery.h > + * > + * Copyright (c) 2010 EMC Corporation, Haiying Tang <Tang_Haiying@emc.com> > + * All rights reserved. > + * > + * Redistribution and use in source and binary forms, with or without > + * modification, are permitted provided that the following conditions > + * are met: > + * 1. Redistributions of source code must retain the above copyright > + * notice, this list of conditions and the following disclaimer. > + * 2. Redistributions in binary form must reproduce the above copyright > + * notice, this list of conditions and the following disclaimer in the > + * documentation and/or other materials provided with the distribution. > + * > + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR > + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES > + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. > + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, > + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT > + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF > + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > + */ > +#ifndef BL_DEVICE_DISCOVERY_H > +#define BL_DEVICE_DISCOVERY_H > + > +#define BL_DEVICE_DISCOVERY_INTERVAL 60 > + > +#include <stdint.h> > +#include <syslog.h> > + > +enum blk_vol_type { > + BLOCK_VOLUME_SIMPLE = 0, /* maps to a single LU */ > + BLOCK_VOLUME_SLICE = 1, /* slice of another volume */ > + BLOCK_VOLUME_CONCAT = 2, /* concatenation of multiple volumes */ > + BLOCK_VOLUME_STRIPE = 3, /* striped across multiple volumes */ > + BLOCK_VOLUME_PSEUDO = 4, > +}; > + > +/* All disk offset/lengths are stored in 512-byte sectors */ > +struct bl_volume { > + uint32_t bv_type; > + off_t bv_size; > + struct bl_volume **bv_vols; > + int bv_vol_n; > + union { > + dev_t bv_dev; /*for BLOCK_VOLUME_SIMPLE(PSEUDO) */ > + off_t bv_stripe_unit; /*for BLOCK_VOLUME_STRIPE(CONCAT) */ > + off_t bv_offset; /*for BLOCK_VOLUME_SLICE */ > + } param; > +}; > + > +struct bl_sig_comp { > + int64_t bs_offset; /* In bytes */ > + uint32_t bs_length; /* In bytes */ > + char *bs_string; > +}; > + > +/* Maximum number of signatures components in a simple volume */ > +# define BLOCK_MAX_SIG_COMP 16 > + > +struct bl_sig { > + int si_num_comps; > + struct bl_sig_comp si_comps[BLOCK_MAX_SIG_COMP]; > +}; > + > +/* > + * Multipath support: ACTIVE or PSEUDO device is valid, > + * PASSIVE is a standby for ACTIVE. > + */ > +enum bl_path_state_e { > + BL_PATH_STATE_PASSIVE = 1, > + BL_PATH_STATE_ACTIVE = 2, > + BL_PATH_STATE_PSEUDO = 3, > +}; > + > +struct bl_serial { > + int len; > + char *data; > +}; > + > +struct bl_disk_path { > + struct bl_disk_path *next; > + char *full_path; > + enum bl_path_state_e state; > +}; > + > +struct bl_disk { > + struct bl_disk *next; > + struct bl_serial *serial; > + dev_t dev; > + off_t size; > + struct bl_disk_path *valid_path; > + struct bl_disk_path *paths; > +}; > + > +struct bl_dev_id { > + unsigned char type; > + unsigned char ids; > + unsigned char reserve; > + unsigned char len; > + char data[0]; > +}; > + > +struct pipefs_hdr { > + uint32_t msgid; > + uint8_t type; > + uint8_t flags; > + uint16_t totallen; /* length of entire message, including hdr */ > + uint32_t status; > +}; > + > +#define BL_DEVICE_UMOUNT 0x0 /* Umount--delete devices */ > +#define BL_DEVICE_MOUNT 0x1 /* Mount--create devices */ > +#define BL_DEVICE_REQUEST_INIT 0x0 /* Start request */ > +#define BL_DEVICE_REQUEST_PROC 0x1 /* User process succeeds */ > +#define BL_DEVICE_REQUEST_ERR 0x2 /* User process fails */ > + > +uint32_t *blk_overflow(uint32_t * p, uint32_t * end, size_t nbytes); > + > +#define BLK_READBUF(p, e, nbytes) do { \ > + p = blk_overflow(p, e, nbytes); \ > + if (!p) {\ > + goto out_err;\ > + } \ > +} while (0) > + > +#define READ32(x) (x) = ntohl(*p++) > + > +#define READ64(x) do { \ > + (x) = (uint64_t)ntohl(*p++) << 32; \ > + (x) |= ntohl(*p++); \ > +} while (0) > + > +#define READ_SECTOR(x) do { \ > + READ64(tmp); \ > + if (tmp & 0x1ff) { \ > + goto out_err; \ > + } \ > + (x) = tmp >> 9; \ > +} while (0) > + > +extern struct bl_disk *visible_disk_list; > +uint64_t dm_device_create(struct bl_volume *vols, int num_vols); > +int dm_device_remove_all(uint64_t *dev); > +uint64_t process_deviceinfo(const char *dev_addr_buf, > + unsigned int dev_addr_len, > + uint32_t *major, uint32_t *minor); > + > +extern ssize_t atomicio(ssize_t(*f) (int, void *, size_t), > + int fd, void *_s, size_t n); > +extern struct bl_serial *bldev_read_serial(int fd, const char *filename); > +extern void bldev_read_ap_state(int fd, enum bl_path_state_e *ap_state_out); > +extern int bl_discover_devices(void); > + > +#define BL_LOG_WARNING(fmt...) syslog(LOG_WARNING, fmt) > +#define BL_LOG_ERR(fmt...) syslog(LOG_ERR, fmt) > +#define BL_LOG_DEBUG(fmt...) syslog(LOG_DEBUG, fmt) > +#endif > diff --git a/utils/blkmapd/device-inq.c b/utils/blkmapd/device-inq.c > new file mode 100644 > index 0000000..ff38fd6 > --- /dev/null > +++ b/utils/blkmapd/device-inq.c > @@ -0,0 +1,235 @@ > +/* > + * device-inq.c: inquire SCSI device information. > + * > + * Copyright (c) 2010 EMC Corporation, Haiying Tang <Tang_Haiying@emc.com> > + * All rights reserved. > + * > + * This program refers to "SCSI Primary Commands - 3 (SPC-3) > + * at http://www.t10.org and sg_inq.c in sg3_utils-1.26 for > + * Linux OS SCSI subsystem, by D. Gilbert. > + * > + * Redistribution and use in source and binary forms, with or without > + * modification, are permitted provided that the following conditions > + * are met: > + * 1. Redistributions of source code must retain the above copyright > + * notice, this list of conditions and the following disclaimer. > + * 2. Redistributions in binary form must reproduce the above copyright > + * notice, this list of conditions and the following disclaimer in the > + * documentation and/or other materials provided with the distribution. > + * > + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR > + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES > + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. > + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, > + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT > + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF > + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > + */ > +#include <stdlib.h> > +#include <stdio.h> > +#include <string.h> > +#include <dirent.h> > +#include <ctype.h> > +#include <sys/types.h> > +#include <sys/stat.h> > +#include <sys/ioctl.h> > +#include <sys/mount.h> > +#include <sys/select.h> > +#include <fcntl.h> > +#include <unistd.h> > +#include <libgen.h> > +#include <errno.h> > +#include <scsi/scsi.h> > +#include <scsi/scsi_ioctl.h> > +#include <scsi/sg.h> > +#include "device-discovery.h" > + > +#define DEF_ALLOC_LEN 255 > +#define MX_ALLOC_LEN (0xc000 + 0x80) > + > +struct bl_serial *bl_create_scsi_string(int len, const char *bytes) > +{ > + struct bl_serial *s; > + s = malloc(sizeof(*s) + len); > + if (s) { > + s->data = (char *)&s[1]; > + s->len = len; > + memcpy(s->data, bytes, len); > + } > + return s; > +} > + > +void bl_free_scsi_string(struct bl_serial *str) > +{ > + if (str) > + free(str); > +} > + > +#define sg_io_ok(io_hdr) \ > + ((((io_hdr).status & 0x7e) == 0) && \ > + ((io_hdr).host_status == 0) && \ > + (((io_hdr).driver_status & 0x0f) == 0)) > + > +static int sg_timeout = 1 * 1000; > + > +static int bldev_inquire_page(int fd, int page, char *buffer, int len) > +{ > + unsigned char cmd[] = { INQUIRY, 0, 0, 0, 0, 0 }; > + unsigned char sense_b[28]; > + struct sg_io_hdr io_hdr; > + if (page >= 0) { > + cmd[1] = 1; > + cmd[2] = page; > + } > + cmd[3] = (unsigned char)((len >> 8) & 0xff); > + cmd[4] = (unsigned char)(len & 0xff); > + > + memset(&io_hdr, 0, sizeof(struct sg_io_hdr)); > + io_hdr.interface_id = 'S'; > + io_hdr.cmd_len = sizeof(cmd); > + io_hdr.mx_sb_len = sizeof(sense_b); > + io_hdr.dxfer_direction = SG_DXFER_FROM_DEV; > + io_hdr.dxfer_len = len; > + io_hdr.dxferp = buffer; > + io_hdr.cmdp = cmd; > + io_hdr.sbp = sense_b; > + io_hdr.timeout = sg_timeout; > + if (ioctl(fd, SG_IO, &io_hdr) < 0) > + return -1; > + > + if (sg_io_ok(io_hdr)) > + return 0; > + return -1; > +} > + > +int bldev_inquire_pages(int fd, int page, char **buffer) > +{ > + int status = 0; > + char *tmp; > + int len; > + > + *buffer = calloc(DEF_ALLOC_LEN, sizeof(char)); > + if (!*buffer) { > + BL_LOG_ERR("%s: Out of memory!\n", __func__); > + return -ENOMEM; > + } > + > + status = bldev_inquire_page(fd, page, *buffer, DEF_ALLOC_LEN); > + if (status) > + goto out; > + > + status = -1; > + if ((*(*buffer + 1) & 0xff) != page) > + goto out; > + > + len = (*(*buffer + 2) << 8) + *(*buffer + 3) + 4; > + if (len > MX_ALLOC_LEN) { > + BL_LOG_ERR("SCSI response length too long: %d\n", len); > + goto out; > + } > + if (len > DEF_ALLOC_LEN) { > + tmp = realloc(*buffer, len); > + if (!tmp) { > + BL_LOG_ERR("%s: Out of memory!\n", __func__); > + status = -ENOMEM; > + goto out; > + } > + *buffer = tmp; > + status = bldev_inquire_page(fd, page, *buffer, len); > + if (status) > + goto out; > + } > + status = 0; > + out: > + return status; > +} > + > +/* For EMC multipath devices, use VPD page (0xc0) to get status. > + * For other devices, return ACTIVE for now > + */ > +void bldev_read_ap_state(int fd, enum bl_path_state_e *ap_state_out) > +{ > + int status = 0; > + char *buffer; > + > + *ap_state_out = BL_PATH_STATE_ACTIVE; > + > + status = bldev_inquire_pages(fd, 0xc0, &buffer); > + if (status) > + goto out; > + > + if (buffer[4] < 0x02) > + *ap_state_out = BL_PATH_STATE_PASSIVE; > + out: > + if (buffer) > + free(buffer); > + return; > +} > + > +struct bl_serial *bldev_read_serial(int fd, const char *filename) > +{ > + struct bl_serial *serial_out = NULL; > + int status = 0, pos, len; > + char *buffer; > + struct bl_dev_id *dev_root, *dev_id; > + unsigned int current_id = 0; > + > + status = bldev_inquire_pages(fd, 0x83, &buffer); > + if (status) > + goto out; > + > + dev_root = (struct bl_dev_id *)buffer; > + > + pos = 0; > + current_id = 0; > + len = dev_root->len; > + while (pos < (len - sizeof(struct bl_dev_id) + sizeof(unsigned char))) { > + dev_id = (struct bl_dev_id *)&(dev_root->data[pos]); > + if ((dev_id->ids & 0xf) < current_id) > + continue; > + switch (dev_id->ids & 0xf) { > + /* We process SCSI ID with four ID cases: 0, 1, 2 and 3. > + * When more than one ID is available, priority is > + * 3>2>1>0. > + */ > + case 2: /* EUI-64 based */ > + if ((dev_id->len != 8) && (dev_id->len != 12) && > + (dev_id->len != 16)) { > + BL_LOG_ERR("EUI-64 only decodes 8, " > + "12 and 16\n"); > + break; > + } > + case 3: /* NAA */ > + /* TODO: NAA validity judgement too complicated, > + * so just ingore it here. > + */ > + if ((dev_id->type & 0xf) != 1) { > + BL_LOG_ERR("Binary code_set expected\n"); > + break; > + } > + case 0: /* vendor specific */ > + case 1: /* T10 vendor identification */ > + current_id = dev_id->ids & 0xf; > + if (serial_out) > + bl_free_scsi_string(serial_out); > + serial_out = bl_create_scsi_string(dev_id->len, > + dev_id->data); > + break; > + default: > + break; > + } > + if (current_id == 3) > + break; > + pos += (dev_id->len + sizeof(struct bl_dev_id) - > + sizeof(unsigned char)); > + } > + out: > + if (!serial_out) > + serial_out = bl_create_scsi_string(strlen(filename), filename); > + if (buffer) > + free(buffer); > + return serial_out; > +} > diff --git a/utils/blkmapd/device-process.c b/utils/blkmapd/device-process.c > new file mode 100644 > index 0000000..6252552 > --- /dev/null > +++ b/utils/blkmapd/device-process.c > @@ -0,0 +1,391 @@ > +/* > + * device-process.c: detailed processing of device information sent > + * from kernel. > + * > + * Copyright (c) 2006 The Regents of the University of Michigan. > + * All rights reserved. > + * > + * Andy Adamson <andros@citi.umich.edu> > + * Fred Isaman <iisaman@umich.edu> > + * > + * Copyright (c) 2010 EMC Corporation, Haiying Tang <Tang_Haiying@emc.com> > + * > + * Used codes in linux/fs/nfs/blocklayout/blocklayoutdev.c. > + * > + * Redistribution and use in source and binary forms, with or without > + * modification, are permitted provided that the following conditions > + * are met: > + * 1. Redistributions of source code must retain the above copyright > + * notice, this list of conditions and the following disclaimer. > + * 2. Redistributions in binary form must reproduce the above copyright > + * notice, this list of conditions and the following disclaimer in the > + * documentation and/or other materials provided with the distribution. > + * > + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR > + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES > + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. > + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, > + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT > + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF > + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > + */ > +#define _LARGEFILE64_SOURCE > +#include <libdevmapper.h> > +#include <stdio.h> > +#include <stdlib.h> > +#include <string.h> > +#include <unistd.h> > +#include <sys/types.h> > +#include <sys/stat.h> > +#include <sys/user.h> > +#include <fcntl.h> > +#include <errno.h> > +#include <arpa/inet.h> > +#include <linux/kdev_t.h> > +#include "device-discovery.h" > + > +uint32_t *blk_overflow(uint32_t * p, uint32_t * end, size_t nbytes) > +{ > + uint32_t *q = p + ((nbytes + 3) >> 2); > + if (q > end || q < p) > + return NULL; > + return p; > +} > + > +static int decode_blk_signature(uint32_t **pp, uint32_t *end, > + struct bl_sig *sig) > +{ > + int i, tmp; > + uint32_t *p = *pp; > + > + BLK_READBUF(p, end, 4); > + READ32(sig->si_num_comps); > + if (sig->si_num_comps == 0) { > + BL_LOG_ERR("0 components in sig\n"); > + goto out_err; > + } > + if (sig->si_num_comps >= BLOCK_MAX_SIG_COMP) { > + BL_LOG_ERR("number of sig comps %i >= BLOCK_MAX_SIG_COMP\n", > + sig->si_num_comps); > + goto out_err; > + } > + for (i = 0; i < sig->si_num_comps; i++) { > + BLK_READBUF(p, end, 12); > + READ64(sig->si_comps[i].bs_offset); > + READ32(tmp); > + sig->si_comps[i].bs_length = tmp; > + BLK_READBUF(p, end, tmp); > + /* Note we rely here on fact that sig is used immediately > + * for mapping, then thrown away. > + */ > + sig->si_comps[i].bs_string = (char *)p; > + BL_LOG_ERR("%s: si_comps[%d]: bs_length %d, bs_string %s\n", > + __func__, i, sig->si_comps[i].bs_length, > + sig->si_comps[i].bs_string); > + p += ((tmp + 3) >> 2); > + } > + *pp = p; > + return 0; > + out_err: > + return -EIO; > +} > + > +/* Read signature from device > + * return 0: read successfully > + * return -1: error > + */ > +int read_cmp_blk_sig(const char *dev_name, struct bl_sig_comp *comp, > + int64_t bs_offset) > +{ > + int fd, ret = -1; > + char *sig = NULL; > + > + BL_LOG_ERR("%s: dev_name %s\n", __func__, dev_name); > + fd = open(dev_name, O_RDONLY | O_LARGEFILE); > + if (fd < 0) { > + BL_LOG_ERR("%s could not be opened for read\n", dev_name); > + goto error; > + } > + > + sig = (char *)malloc(comp->bs_length); > + if (!sig) { > + BL_LOG_ERR("%s: Out of memory\n", __func__); > + goto error; > + } > + > + if (lseek64(fd, bs_offset, SEEK_SET) == -1) { > + BL_LOG_ERR("File %s lseek error\n", dev_name); > + goto error; > + } > + > + if (atomicio(read, fd, sig, comp->bs_length) != comp->bs_length) { > + BL_LOG_ERR("File %s read error\n", dev_name); > + goto error; > + } > + > + BL_LOG_ERR > + ("%s: sig: %s, bs_string: %s, bs_length: %d, bs_offset: %lld\n", > + __func__, sig, comp->bs_string, comp->bs_length, bs_offset); > + ret = memcmp(sig, comp->bs_string, comp->bs_length); > + > + error: > + if (sig) > + free(sig); > + if (fd >= 0) > + close(fd); > + return ret; > +} > + > +/* > + * All signatures in sig must be found on disk for verification. > + * Returns True if sig matches, False otherwise. > + */ > +static int verify_sig(struct bl_disk *disk, struct bl_sig *sig) > +{ > + struct bl_sig_comp *comp; > + int i, ret; > + int64_t bs_offset; > + > + for (i = 0; i < sig->si_num_comps; i++) { > + comp = &sig->si_comps[i]; > + bs_offset = comp->bs_offset; > + if (bs_offset < 0) > + bs_offset += (((int64_t) disk->size) << 9); > + BL_LOG_ERR("%s: bs_offset: %lld\n", __func__, bs_offset); > + ret = read_cmp_blk_sig(disk->valid_path->full_path, > + comp, bs_offset); > + if (ret) > + return 0; > + } > + return 1; > +} > + > +/* > + * map_sig_to_device() > + * Given a signature, walk the list of visible disks searching for > + * a match. Returns True if mapping was done, False otherwise. > + * > + * While we're at it, fill in the vol->bv_size. > + */ > +static int map_sig_to_device(struct bl_sig *sig, struct bl_volume *vol) > +{ > + int mapped = 0; > + struct bl_disk *disk = visible_disk_list; > + char *filepath = 0; > + struct bl_disk *lolDisk = disk; > + while (lolDisk) { > + BL_LOG_ERR("%s: visible_disk_list: %s\n", __func__, > + lolDisk->valid_path->full_path); > + lolDisk = lolDisk->next; > + } > + > + /*scan disk list to find out match device */ > + while (disk) { > + /* FIXME: should we use better algorithm for disk scan? */ > + mapped = verify_sig(disk, sig); > + if (mapped) { > + vol->param.bv_dev = disk->dev; > + filepath = disk->valid_path->full_path; > + vol->bv_size = disk->size; > + break; > + } > + disk = disk->next; > + } > + return mapped; > +} > + > +/* We are given an array of XDR encoded array indices, each of which should > + * refer to a previously decoded device. Translate into a list of pointers > + * to the appropriate pnfs_blk_volume's. > + */ > +static int set_vol_array(uint32_t **pp, uint32_t *end, > + struct bl_volume *vols, int working) > +{ > + int i, index; > + uint32_t *p = *pp; > + struct bl_volume **array = vols[working].bv_vols; > + for (i = 0; i < vols[working].bv_vol_n; i++) { > + BLK_READBUF(p, end, 4); > + READ32(index); > + if ((index < 0) || (index >= working)) { > + BL_LOG_ERR("set_vol_array: Id %i out of range\n", > + index); > + goto out_err; > + } > + array[i] = &vols[index]; > + } > + *pp = p; > + return 0; > + out_err: > + return -EIO; > +} > + > +static uint64_t sum_subvolume_sizes(struct bl_volume *vol) > +{ > + int i; > + uint64_t sum = 0; > + for (i = 0; i < vol->bv_vol_n; i++) > + sum += vol->bv_vols[i]->bv_size; > + return sum; > +} > + > +static int decode_blk_volume(uint32_t **pp, uint32_t *end, > + struct bl_volume *vols, int i, int *array_cnt) > +{ > + int status = 0, j; > + struct bl_sig sig; > + uint32_t *p = *pp; > + struct bl_volume *vol = &vols[i]; > + uint64_t tmp, tmp_size; > + div_t d; > + > + BLK_READBUF(p, end, 4); > + READ32(vol->bv_type); > + switch (vol->bv_type) { > + case BLOCK_VOLUME_SIMPLE: > + *array_cnt = 0; > + status = decode_blk_signature(&p, end, &sig); > + if (status) > + return status; > + status = map_sig_to_device(&sig, vol); > + if (!status) { > + BL_LOG_ERR("Could not find disk for device\n"); > + return -ENXIO; > + } > + status = 0; > + break; > + case BLOCK_VOLUME_SLICE: > + BLK_READBUF(p, end, 16); > + READ_SECTOR(vol->param.bv_offset); > + READ_SECTOR(vol->bv_size); > + *array_cnt = vol->bv_vol_n = 1; > + status = set_vol_array(&p, end, vols, i); > + break; > + case BLOCK_VOLUME_STRIPE: > + BLK_READBUF(p, end, 8); > + READ_SECTOR(vol->param.bv_stripe_unit); > + off_t chunksize = vol->param.bv_stripe_unit; > + if ((chunksize == 0) || > + ((chunksize & (chunksize - 1)) != 0) || > + (chunksize < (PAGE_SIZE >> 9))) > + return -EIO; > + BLK_READBUF(p, end, 4); > + READ32(vol->bv_vol_n); > + if (!vol->bv_vol_n) > + return -EIO; > + *array_cnt = vol->bv_vol_n; > + status = set_vol_array(&p, end, vols, i); > + if (status) > + return status; > + for (j = 1; j < vol->bv_vol_n; j++) { > + if (vol->bv_vols[j]->bv_size != > + vol->bv_vols[0]->bv_size) { > + BL_LOG_ERR("varying subvol size\n"); > + return -EIO; > + } > + } > + /* Make sure total size only includes addressable areas */ > + tmp_size = vol->bv_vols[0]->bv_size; > + d = div(tmp_size, (uint32_t) vol->param.bv_stripe_unit); > + tmp_size = d.quot; > + vol->bv_size = tmp_size * vol->param.bv_stripe_unit; > + break; > + case BLOCK_VOLUME_CONCAT: > + BLK_READBUF(p, end, 4); > + READ32(vol->bv_vol_n); > + if (!vol->bv_vol_n) > + return -EIO; > + *array_cnt = vol->bv_vol_n; > + status = set_vol_array(&p, end, vols, i); > + if (status) > + return status; > + vol->bv_size = sum_subvolume_sizes(vol); > + break; > + default: > + BL_LOG_ERR("Unknown volume type %i\n", vol->bv_type); > + out_err: > + return -EIO; > + } > + *pp = p; > + return status; > +} > + > +uint64_t process_deviceinfo(const char *dev_addr_buf, > + unsigned int dev_addr_len, > + uint32_t *major, uint32_t *minor) > +{ > + int num_vols, i, status, count; > + uint32_t *p, *end; > + struct bl_volume *vols = NULL, **arrays = NULL, **arrays_ptr = NULL; > + uint64_t dev = 0; > + int tried = 0; > + > + restart: > + p = (uint32_t *) dev_addr_buf; > + end = (uint32_t *) ((char *)p + dev_addr_len); > + /* Decode block volume */ > + BLK_READBUF(p, end, 4); > + READ32(num_vols); > + if (num_vols <= 0) { > + BL_LOG_WARNING("Error: number of vols: %d\n", num_vols); > + goto out_err; > + } > + > + vols = (struct bl_volume *)malloc(num_vols * sizeof(struct bl_volume)); > + if (!vols) { > + BL_LOG_ERR("%s: Out of memory\n", __func__); > + goto out_err; > + } > + > + /* Each volume in vols array needs its own array. Save time by > + * allocating them all in one large hunk. Because each volume > + * array can only reference previous volumes, and because once > + * a concat or stripe references a volume, it may never be > + * referenced again, the volume arrays are guaranteed to fit > + * in the suprisingly small space allocated. > + */ > + arrays = > + (struct bl_volume **)malloc(num_vols * 2 * > + sizeof(struct bl_volume *)); > + if (!arrays) { > + BL_LOG_ERR("%s: Out of memory\n", __func__); > + goto out_err; > + } > + > + arrays_ptr = arrays; > + > + for (i = 0; i < num_vols; i++) { > + vols[i].bv_vols = arrays_ptr; > + status = decode_blk_volume(&p, end, vols, i, &count); > + if (status == -ENXIO && (tried <= 5)) { > + sleep(1); > + BL_LOG_DEBUG("%s: discover again!\n", __func__); > + bl_discover_devices(); > + tried++; > + free(vols); > + free(arrays); > + goto restart; > + } > + if (status) > + goto out_err; > + arrays_ptr += count; > + } > + > + if (p != end) { > + BL_LOG_ERR("p is not equal to end!\n"); > + goto out_err; > + } > + > + dev = dm_device_create(vols, num_vols); > + *major = MAJOR(dev); > + *minor = MINOR(dev); > + out_err: > + if (vols) > + free(vols); > + if (arrays) > + free(arrays); > + return dev; > +} > diff --git a/utils/blkmapd/dm-device.c b/utils/blkmapd/dm-device.c > new file mode 100644 > index 0000000..f08df7b > --- /dev/null > +++ b/utils/blkmapd/dm-device.c > @@ -0,0 +1,509 @@ > +/* > + * dm-device.c: create or remove device via device mapper API. > + * > + * Copyright (c) 2010 EMC Corporation, Haiying Tang <Tang_Haiying@emc.com> > + * All rights reserved. > + * > + * Redistribution and use in source and binary forms, with or without > + * modification, are permitted provided that the following conditions > + * are met: > + * 1. Redistributions of source code must retain the above copyright > + * notice, this list of conditions and the following disclaimer. > + * 2. Redistributions in binary form must reproduce the above copyright > + * notice, this list of conditions and the following disclaimer in the > + * documentation and/or other materials provided with the distribution. > + * > + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR > + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES > + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. > + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, > + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT > + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF > + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > + */ > +#include <libdevmapper.h> > +#include <stdio.h> > +#include <stdlib.h> > +#include <string.h> > +#include <sys/types.h> > +#include <sys/stat.h> > +#include <fcntl.h> > +#include <errno.h> > +#include <linux/kdev_t.h> > +#include "device-discovery.h" > + > +#define DM_DEV_NAME_LEN 256 > + > +#ifndef DM_MAX_TYPE_NAME > +#define DM_MAX_TYPE_NAME 16 > +#endif > + > +#define DM_PARAMS_LEN 512 /*XXX: is this enough for target? */ > +#define DM_DIR "/dev/mapper" > +#define DM_DIR_LEN12 > +#define TYPE_HAS_DEV(type) ((type == BLOCK_VOLUME_SIMPLE) || \ > + (type == BLOCK_VOLUME_PSEUDO)) > + > +struct bl_dm_table { > + uint64_t offset; > + uint64_t size; > + char target_type[DM_MAX_TYPE_NAME]; > + char params[DM_PARAMS_LEN]; > + struct bl_dm_table *next; > +}; > + > +struct bl_dm_tree { > + uint64_t dev; > + struct dm_tree *tree; > + struct bl_dm_tree *next; > +}; > + > +static inline struct bl_dm_table *bl_dm_table_alloc(void) > +{ > + return (struct bl_dm_table *)calloc(1, sizeof(struct bl_dm_table)); > +} > + > +void bl_dm_table_free(struct bl_dm_table *bl_table_head) > +{ > + struct bl_dm_table *p = bl_table_head; > + while (bl_table_head) { > + p = bl_table_head->next; > + free(bl_table_head); > + bl_table_head = p; > + } > +} > + > +void add_to_bl_dm_table(struct bl_dm_table **bl_table_head, > + struct bl_dm_table *table) > +{ > + struct bl_dm_table *pre; > + if (!*bl_table_head) { > + *bl_table_head = table; > + return; > + } > + pre = *bl_table_head; > + while (pre->next) > + pre = pre->next; > + pre->next = table; > + return; > +} > + > +struct bl_dm_tree *bl_tree_head; > + > +struct bl_dm_tree *find_bl_dm_tree(uint64_t dev) > +{ > + struct bl_dm_tree *p = bl_tree_head; > + while (p) { > + if (p->dev == dev) > + return p; > + p = p->next; > + } > + return NULL; > +} > + > +void del_from_bl_dm_tree(uint64_t dev) > +{ > + struct bl_dm_tree *pre = bl_tree_head; > + struct bl_dm_tree *p; > + > + p = pre; > + while (p) { > + if (p->dev == dev) { > + pre->next = p->next; > + if (p == bl_tree_head) > + bl_tree_head = bl_tree_head->next; > + free(p); > + break; > + } > + pre = p; > + p = pre->next; > + } > +} > + > +void add_to_bl_dm_tree(struct bl_dm_tree *tree) > +{ > + struct bl_dm_tree *pre; > + if (!bl_tree_head) { > + bl_tree_head = tree; > + return; > + } > + pre = bl_tree_head; > + while (pre->next) > + pre = pre->next; > + pre->next = tree; > + return; > +} > + > +/* Create device via device mapper > + * return 0 when creation failed > + * return dev no for created device > + */ > +uint64_t dm_single_device_create(const char *dev_name, struct bl_dm_table * p) > +{ > + struct dm_task *dmt; > + struct dm_info dminfo; > + int ret = 0; > + > + dmt = dm_task_create(DM_DEVICE_CREATE); > + if (!dmt) { > + BL_LOG_ERR("Create dm_task for %s failed\n", dev_name); > + return 0; > + } > + ret = dm_task_set_name(dmt, dev_name); > + if (!ret) > + goto err_out; > + > + while (p) { > + ret = dm_task_add_target(dmt, p->offset, p->size, > + p->target_type, p->params); > + if (!ret) > + goto err_out; > + p = p->next; > + } > + > + ret = dm_task_run(dmt) && > + dm_task_get_info(dmt, &dminfo) && dminfo.exists; > + > + if (!ret) > + goto err_out; > + > + dm_task_update_nodes(); > + > + err_out: > + dm_task_destroy(dmt); > + > + if (!ret) { > + BL_LOG_ERR("Create device %s failed\n", dev_name); > + return 0; > + } > + return MKDEV(dminfo.major, dminfo.minor); > +} > + > +int dm_device_remove_byname(const char *dev_name) > +{ > + struct dm_task *dmt; > + int ret = 0; > + > + dmt = dm_task_create(DM_DEVICE_REMOVE); > + if (!dmt) > + return -ENODEV; > + > + ret = dm_task_set_name(dmt, dev_name) && dm_task_run(dmt); > + > + dm_task_update_nodes(); > + > + if (dmt) > + dm_task_destroy(dmt); > + > + return ret; > +} > + > +int dm_device_remove(uint64_t dev) > +{ > + struct dm_task *dmt; > + struct dm_names *dmnames; > + char *names = NULL; > + int ret = -1; > + > + /* Look for dev_name via dev, if dev_name could be transferred here, > + we could jump to DM_DEVICE_REMOVE directly */ > + dmt = dm_task_create(DM_DEVICE_LIST); > + if (!dmt) { > + BL_LOG_ERR("dm_task creation failed\n"); > + return -ENODEV; > + } > + > + ret = dm_task_run(dmt); > + if (!ret) { > + BL_LOG_ERR("dm_task_run failed\n"); > + goto error; > + } > + > + dmnames = dm_task_get_names(dmt); > + if (!dmnames || !dmnames->dev) { > + BL_LOG_ERR("dm_task_get_names failed\n"); > + goto error; > + } > + > + do { > + if (dmnames->dev == dev) { > + names = dmnames->name; > + break; > + } > + dmnames = (void *)dmnames + dmnames->next; > + } while (dmnames); > + > + if (!names) { > + BL_LOG_ERR("Could not find device\n"); > + goto error; > + } > + > + dm_task_update_nodes(); > + > + error: > + dm_task_destroy(dmt); > + > + /*Start to remove device */ > + if (names) > + ret = dm_device_remove_byname(names); > + return ret; > +} > + > +static unsigned long dev_count; > + > +void dm_devicelist_remove(unsigned long start, unsigned long end) > +{ > + char dev_name[DM_DEV_NAME_LEN]; > + unsigned long count; > + > + if ((start >= dev_count) || (end <= 1) || (start >= end - 1)) > + return; > + > + for (count = end - 1; count > start; count--) { > + sprintf(dev_name, "pnfs_vol_%lu", count - 1); > + dm_device_remove_byname(dev_name); > + } > + > + return; > +} > + > +void bl_dm_remove_tree(uint64_t dev) > +{ > + struct bl_dm_tree *p; > + > + p = find_bl_dm_tree(dev); > + if (!p) > + return; > + > + dm_tree_free(p->tree); > + del_from_bl_dm_tree(dev); > +} > + > +void bl_dm_create_tree(uint64_t dev) > +{ > + struct dm_tree *tree; > + struct bl_dm_tree *bl_tree; > + > + bl_tree = find_bl_dm_tree(dev); > + if (bl_tree) > + return; /*XXX: error? */ > + > + tree = dm_tree_create(); > + if (!tree) > + return; > + > + if (!dm_tree_add_dev(tree, MAJOR(dev), MINOR(dev))) { > + dm_tree_free(tree); > + return; > + } > + > + bl_tree = malloc(sizeof(struct bl_dm_tree)); > + if (!bl_tree) { > + dm_tree_free(tree); > + return; > + } > + > + bl_tree->dev = dev; > + bl_tree->tree = tree; > + bl_tree->next = NULL; > + add_to_bl_dm_tree(bl_tree); > + > + return; > +} > + > +uint64_t dm_device_nametodev(char *dev_name) > +{ > + struct dm_task *dmt; > + int ret = 0; > + struct dm_info dminfo; > + > + dmt = dm_task_create(DM_DEVICE_INFO); > + if (!dmt) > + return -ENODEV; > + > + ret = dm_task_set_name(dmt, dev_name) && > + dm_task_run(dmt) && dm_task_get_info(dmt, &dminfo); > + > + if (dmt) > + dm_task_destroy(dmt); > + > + if (!ret) > + return 0; > + > + return MKDEV(dminfo.major, dminfo.minor); > +} > + > +int dm_device_remove_all(uint64_t *dev) > +{ > + struct bl_dm_tree *p; > + struct dm_tree_node *node; > + const char *uuid; > + int ret = 0; > + uint32_t major, minor; > + uint64_t bl_dev; > + > + memcpy(&major, dev, sizeof(uint32_t)); > + memcpy(&minor, (void *)dev + sizeof(uint32_t), sizeof(uint32_t)); > + bl_dev = MKDEV(major, minor); > + p = find_bl_dm_tree(bl_dev); > + if (!p) > + return ret; > + > + node = dm_tree_find_node(p->tree, MAJOR(bl_dev), MINOR(bl_dev)); > + if (!node) > + return ret; > + > + uuid = dm_tree_node_get_uuid(node); > + if (!uuid) > + return ret; > + > + dm_device_remove(bl_dev); > + ret = dm_tree_deactivate_children(node, uuid, strlen(uuid)); > + dm_task_update_nodes(); > + bl_dm_remove_tree(bl_dev); > + return ret; > +} > + > +/* TODO: check the value for DM_DEV_NAME_LEN, DM_TYPE_LEN, DM_PARAMS_LEN */ > +uint64_t dm_device_create(struct bl_volume *vols, int num_vols) > +{ > + uint64_t size, dev = 0; > + unsigned long count = dev_count; > + int number = 0, i, pos; > + struct bl_volume *node; > + char *tmp; > + struct bl_dm_table *table = NULL; > + struct bl_dm_table *bl_table_head = NULL; > + unsigned int len; > + char *dev_name = NULL; > + /* Create pseudo device here */ > + while (number < num_vols) { > + node = &vols[number]; > + switch (node->bv_type) { > + case BLOCK_VOLUME_SIMPLE: > + /* Do not need to create device here */ > + dev = node->param.bv_dev; > + goto continued; > + case BLOCK_VOLUME_SLICE: > + table = bl_dm_table_alloc(); > + if (!table) > + goto out; > + table->offset = 0; > + table->size = node->bv_size; > + strcpy(table->target_type, "linear"); > + if (!TYPE_HAS_DEV(node->bv_vols[0]->bv_type)) { > + free(table); > + goto out; > + } > + dev = node->bv_vols[0]->param.bv_dev; > + tmp = table->params; > + if (!dm_format_dev(tmp, DM_PARAMS_LEN, > + MAJOR(dev), MINOR(dev))) { > + free(table); > + goto out; > + } > + tmp += strlen(tmp); > + sprintf(tmp, " %lu", node->param.bv_offset); > + add_to_bl_dm_table(&bl_table_head, table); > + break; > + case BLOCK_VOLUME_STRIPE: > + table = bl_dm_table_alloc(); > + if (!table) > + goto out; > + table->offset = 0; > + table->size = node->bv_size; > + strcpy(table->target_type, "striped"); > + sprintf(table->params, "%d %lu %n", node->bv_vol_n, > + node->param.bv_stripe_unit, &pos); > + /* Repeatedly copy subdev to params */ > + tmp = table->params + pos; > + len = DM_PARAMS_LEN - pos; > + for (i = 0; i < node->bv_vol_n; i++) { > + if (!TYPE_HAS_DEV(node->bv_vols[i]->bv_type)) { > + free(table); > + goto out; > + } > + dev = node->bv_vols[i]->param.bv_dev; > + if (!dm_format_dev(tmp, len, MAJOR(dev), > + MINOR(dev))) { > + free(table); > + goto out; > + } > + pos = strlen(tmp); > + tmp += pos; > + len -= pos; > + sprintf(tmp, " %d ", 0); > + tmp += 3; > + len -= 3; > + } > + add_to_bl_dm_table(&bl_table_head, table); > + break; > + case BLOCK_VOLUME_CONCAT: > + size = 0; > + for (i = 0; i < node->bv_vol_n; i++) { > + table = bl_dm_table_alloc(); > + if (!table) > + goto out; > + table->offset = size; > + table->size = node->bv_vols[i]->bv_size; > + if (!TYPE_HAS_DEV(node->bv_vols[i]->bv_type)) { > + free(table); > + goto out; > + } > + strcpy(table->target_type, "linear"); > + tmp = table->params; > + dev = node->bv_vols[i]->param.bv_dev; > + if (!dm_format_dev(tmp, DM_PARAMS_LEN, > + MAJOR(dev), MINOR(dev))) { > + free(table); > + goto out; > + } > + tmp += strlen(tmp); > + sprintf(tmp, " %d", 0); > + size += table->size; > + add_to_bl_dm_table(&bl_table_head, table); > + } > + break; > + default: > + /* Delete previous temporary devices */ > + dm_devicelist_remove(count, dev_count); > + goto out; > + } /*end of swtich */ > + /* Create dev_name here. Name of device is pnfs_vol_XXX */ > + if (dev_name) > + free(dev_name); > + dev_name = (char *)calloc(DM_DEV_NAME_LEN, sizeof(char)); > + if (!dev_name) { > + BL_LOG_ERR("%s: Out of memory\n", __func__); > + goto out; > + } > + sprintf(dev_name, "pnfs_vol_%lu", dev_count++); > + > + dev = dm_single_device_create(dev_name, bl_table_head); > + if (!dev) { > + /* Delete previous temporary devices */ > + dm_devicelist_remove(count, dev_count); > + goto out; > + } > + node->param.bv_dev = dev; > + /*TODO: extend use with PSEUDO later */ > + node->bv_type = BLOCK_VOLUME_PSEUDO; > + continued: > + number++; > + if (bl_table_head) > + bl_dm_table_free(bl_table_head); > + bl_table_head = NULL; > + } > + out: > + if (bl_table_head) > + bl_dm_table_free(bl_table_head); > + bl_table_head = NULL; > + if (dev) > + bl_dm_create_tree(dev); > + if (dev_name) > + free(dev_name); > + return dev; > +} > diff --git a/utils/blkmapd/etc/initd/initd.redhat b/utils/blkmapd/etc/initd/initd.redhat > new file mode 100644 > index 0000000..a52250c > --- /dev/null > +++ b/utils/blkmapd/etc/initd/initd.redhat > @@ -0,0 +1,76 @@ > +#!/bin/sh > +# > +# description: Starts and stops the iSCSI initiator > +# > +# processname: pnfsi-block > +# pidfile: /var/run/pnfs-block.pid > +# config: /etc/pnfs-block.conf > + > +# Source function library. > +if [ -f /etc/init.d/functions ] ; then > + . /etc/init.d/functions > +elif [ -f /etc/rc.d/init.d/functions ] ; then > + . /etc/rc.d/init.d/functions > +else > + exit 0 > +fi > + > +PATH=/sbin:/bin:/usr/sbin:/usr/bin > + > +RETVAL=0 > + > +start() > +{ > + echo -n $"Starting pNFS block-layout device discovery service: " > + modprobe -q blocklayoutdriver > + daemon /usr/sbin/bl-device > + RETVAL=$? > + if [ $RETVAL -eq 0 ]; then > + touch /var/lock/subsys/pnfs-block > + fi > + echo > + return $RETVAL > +} > + > +stop() > +{ > + echo -n $"Stopping pNFS block-layout device discovery service: " > + killproc bl-device 2> /dev/null > + rm -f /var/run/pnfs-block.pid > + RETVAL=$? > + [ $RETVAL -eq 0 ] && rm -f /var/lock/subsys/pnfs-block > + if [ $RETVAL -eq 0 ]; then > + echo_success > + else > + echo_failure > + fi > + echo > + return $RETVAL > +} > + > +restart() > +{ > + stop > + start > +} > + > +case "$1" in > + start) > + start > + ;; > + stop) > + stop > + ;; > + restart) > + stop > + start > + ;; > + status) > + status pnfs-block > + ;; > + *) > + echo $"Usage: $0 {start|stop|restart|status}" > + exit 1 > +esac > + > +exit $RETVAL > diff --git a/utils/blkmapd/etc/pnfs-block.conf b/utils/blkmapd/etc/pnfs-block.conf > new file mode 100644 > index 0000000..da70d94 > --- /dev/null > +++ b/utils/blkmapd/etc/pnfs-block.conf > @@ -0,0 +1,10 @@ > +# This is an example config file > + > +# Look at all /dev/sd* devices > +# /dev/sd or /dev/sd* > +/dev/sd* > + > +# Look at all /dev/mapper/* devices > +# /dev/mapper/* or > +# /dev/mapper/ > +/dev/mapper/* ^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2010-08-12 13:44 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-08-11 19:42 [PATCH] Add complex block layout discovery and mapping daemon Jim Rees
[not found] ` <20100811194253.GA11453-8f4Pc2RrbJmHXe+LvDLADg@public.gmane.org>
2010-08-12 13:42 ` Benny Halevy
2010-08-12 13:44 ` Benny Halevy
-- strict thread matches above, loose matches on Subject: below --
2010-07-21 22:31 Jim Rees
[not found] ` <20100721223119.GA6618-8f4Pc2RrbJmHXe+LvDLADg@public.gmane.org>
2010-07-22 19:35 ` Benny Halevy
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).