From mboxrd@z Thu Jan 1 00:00:00 1970 From: lhh@sourceware.org Date: 4 Dec 2007 20:40:55 -0000 Subject: [Cluster-devel] cluster/cman/qdisk disk.c disk.h disk_util.c m ... Message-ID: <20071204204055.1241.qmail@sourceware.org> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit CVSROOT: /cvs/cluster Module name: cluster Branch: RHEL4 Changes by: lhh at sourceware.org 2007-12-04 20:40:55 Modified files: cman/qdisk : disk.c disk.h disk_util.c main.c mkqdisk.c proc.c Log message: Make qdiskd work with sector sizes other than 512 bytes. Import patch from Fabio M. Di Nitto to make qdiskd use (node_count - 1) for votes if there's none specified in cluster.conf Patches: http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/qdisk/disk.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.4&r2=1.1.2.5 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/qdisk/disk.h.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.6&r2=1.1.2.7 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/qdisk/disk_util.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.3&r2=1.1.2.4 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/qdisk/main.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.8&r2=1.1.2.9 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/qdisk/mkqdisk.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.3&r2=1.1.2.4 http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/qdisk/proc.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.1&r2=1.1.2.2 --- cluster/cman/qdisk/disk.c 2007/10/29 20:38:12 1.1.2.4 +++ cluster/cman/qdisk/disk.c 2007/12/04 20:40:54 1.1.2.5 @@ -43,8 +43,9 @@ #include #include #include +#include -static int diskRawRead(int fd, char *buf, int len); +static int diskRawRead(target_info_t *disk, char *buf, int len); uint32_t clu_crc32(const char *data, size_t count); @@ -211,49 +212,58 @@ * Returns - (the file descriptor), a value >= 0 on success. */ int -qdisk_open(char *name) +qdisk_open(char *name, target_info_t *disk) { - int fd; - int retval; + int ret; + unsigned long ssz; /* * Open for synchronous writes to insure all writes go directly * to disk. */ - fd = open(name, O_RDWR | O_SYNC | O_DIRECT); - if (fd < 0) { - return fd; - } + disk->d_fd = open(name, O_RDWR | O_SYNC | O_DIRECT); + if (disk->d_fd < 0) + return disk->d_fd; + + disk->d_blksz = 512; + ret = ioctl(disk->d_fd, BLKSSZGET, &ssz); + if (ret < 0) + perror("qdisk_open: ioctl(BLKSSZGET)"); + else + /* Sorry, no sector sizes >4GB please */ + disk->d_blksz = (uint32_t)ssz; - /* Check to verify that the partition is large enough.*/ - retval = lseek(fd, END_OF_DISK, SEEK_SET); + disk->d_pagesz = sysconf(_SC_PAGESIZE); - if (retval < 0) { + /* Check to verify that the partition is large enough.*/ + ret = lseek(disk->d_fd, END_OF_DISK(disk->d_blksz), SEEK_SET); + if (ret < 0) { perror("open_partition: seek"); return -1; } - if (retval < END_OF_DISK) { + if (ret < END_OF_DISK(disk->d_blksz)) { fprintf(stderr, "Partition %s too small\n", name); errno = EINVAL; return -1; } /* Set close-on-exec bit */ - retval = fcntl(fd, F_GETFD, 0); - if (retval < 0) { - close(fd); + ret = fcntl(disk->d_fd, F_GETFD, 0); + if (ret < 0) { + perror("open_partition: fcntl(F_GETFD)"); + close(disk->d_fd); return -1; } - retval |= FD_CLOEXEC; - if (fcntl(fd, F_SETFD, retval) < 0) { - perror("open_partition: fcntl"); - close(fd); + ret |= FD_CLOEXEC; + if (fcntl(disk->d_fd, F_SETFD, ret) < 0) { + perror("open_partition: fcntl(F_SETFD)"); + close(disk->d_fd); return -1; } - return fd; + return 0; } @@ -263,17 +273,17 @@ * Returns - value from close syscall. */ int -qdisk_close(int *fd) +qdisk_close(target_info_t *disk) { int retval; - if (!fd || *fd < 0) { + if (!disk || disk->d_fd < 0) { errno = EINVAL; return -1; } - retval = close(*fd); - *fd = -1; + retval = close(disk->d_fd); + disk->d_fd = -1; return retval; } @@ -288,7 +298,7 @@ qdisk_validate(char *name) { struct stat stat_st, *stat_ptr; - int fd; + target_info_t disk; stat_ptr = &stat_st; if (stat(name, stat_ptr) < 0) { @@ -310,26 +320,25 @@ /* * Verify read/write permission. */ - fd = qdisk_open(name); - if (fd < 0) { + if (qdisk_open(name, &disk) < 0) { fprintf(stderr, "%s: open of %s for RDWR failed: %s\n", __FUNCTION__, name, strerror(errno)); return -1; } - qdisk_close(&fd); + qdisk_close(&disk); return 0; } static int -diskRawReadShadow(int fd, off_t readOffset, char *buf, int len) +diskRawReadShadow(target_info_t *disk, off_t readOffset, char *buf, int len) { int ret; shared_header_t *hdrp; char *data; int datalen; - ret = lseek(fd, readOffset, SEEK_SET); + ret = lseek(disk->d_fd, readOffset, SEEK_SET); if (ret != readOffset) { #if 0 fprintf(stderr, @@ -340,7 +349,7 @@ return -1; } - ret = diskRawRead(fd, buf, len); + ret = diskRawRead(disk, buf, len); if (ret != len) { #if 0 fprintf(stderr, "diskRawReadShadow: aligned read " @@ -375,7 +384,7 @@ * Here we check for alignment and do a bounceio if necessary. */ static int -diskRawRead(int fd, char *buf, int len) +diskRawRead(target_info_t *disk, char *buf, int len) { char *alignedBuf; int readret; @@ -383,21 +392,24 @@ int readlen; int bounceNeeded = 1; - if ((((unsigned long) buf & (unsigned long) 0x3ff) == 0) && - ((len % 512) == 0)) { + + /* was 3ff, which is (512<<1-1) */ + if ((((unsigned long) buf & + (unsigned long) ((disk->d_blksz << 1) -1)) == 0) && + ((len % (disk->d_blksz)) == 0)) { bounceNeeded = 0; } if (bounceNeeded == 0) { /* Already aligned and even multiple of 512, no bounceio * required. */ - return (read(fd, buf, len)); + return (read(disk->d_fd, buf, len)); } - if (len > 512) { + if (len > disk->d_blksz) { fprintf(stderr, "diskRawRead: not setup for reads larger than %d.\n", - 512); + (int)disk->d_blksz); return (-1); } /* @@ -406,8 +418,8 @@ * XXX - if the on-disk offsets don't provide enough room we're cooked! */ extraLength = 0; - if (len % 512) { - extraLength = 512 - (len % 512); + if (len % disk->d_blksz) { + extraLength = disk->d_blksz - (len % disk->d_blksz); } readlen = len; @@ -415,18 +427,18 @@ readlen += extraLength; } - readret = posix_memalign((void **)&alignedBuf, 512, 512); + readret = posix_memalign((void **)&alignedBuf, disk->d_pagesz, disk->d_blksz); if (readret < 0) { return -1; } - readret = read(fd, alignedBuf, readlen); + readret = read(disk->d_fd, alignedBuf, readlen); if (readret > 0) { if (readret > len) { - bcopy(alignedBuf, buf, len); + memcpy(alignedBuf, buf, len); readret = len; } else { - bcopy(alignedBuf, buf, readret); + memcpy(alignedBuf, buf, readret); } } @@ -445,7 +457,7 @@ * Here we check for alignment and do a bounceio if necessary. */ static int -diskRawWrite(int fd, char *buf, int len) +diskRawWrite(target_info_t *disk, char *buf, int len) { char *alignedBuf; int ret; @@ -453,31 +465,33 @@ int writelen; int bounceNeeded = 1; - if ((((unsigned long) buf & (unsigned long) 0x3ff) == 0) && - ((len % 512) == 0)) { + /* was 3ff, which is (512<<1-1) */ + if ((((unsigned long) buf & + (unsigned long) ((disk->d_blksz << 1) -1)) == 0) && + ((len % (disk->d_blksz)) == 0)) { bounceNeeded = 0; } + if (bounceNeeded == 0) { /* Already aligned and even multiple of 512, no bounceio * required. */ - return (write(fd, buf, len)); + return (write(disk->d_fd, buf, len)); } - if (len > 512) { + if (len > disk->d_blksz) { fprintf(stderr, - "diskRawWrite: not setup for larger than %d.\n", - 512); + "diskRawRead: not setup for reads larger than %d.\n", + (int)disk->d_blksz); return (-1); } - /* * All IOs must be of size which is a multiple of 512. Here we * just add in enough extra to accommodate. * XXX - if the on-disk offsets don't provide enough room we're cooked! */ extraLength = 0; - if (len % 512) { - extraLength = 512 - (len % 512); + if (len % disk->d_blksz) { + extraLength = disk->d_blksz - (len % disk->d_blksz); } writelen = len; @@ -485,13 +499,20 @@ writelen += extraLength; } - ret = posix_memalign((void **)&alignedBuf, 512,512); + ret = posix_memalign((void **)&alignedBuf, disk->d_pagesz, disk->d_blksz); if (ret < 0) { + return -1; + } + + if (len > disk->d_blksz) { + fprintf(stderr, + "diskRawWrite: not setup for larger than %d.\n", + (int)disk->d_blksz); return (-1); } - bcopy(buf, alignedBuf, len); - ret = write(fd, alignedBuf, writelen); + memcpy(buf, alignedBuf, len); + ret = write(disk->d_fd, alignedBuf, writelen); if (ret > len) { ret = len; } @@ -507,7 +528,7 @@ static int -diskRawWriteShadow(int fd, __off64_t writeOffset, char *buf, int len) +diskRawWriteShadow(target_info_t *disk, __off64_t writeOffset, char *buf, int len) { off_t retval_seek; ssize_t retval_write; @@ -519,7 +540,7 @@ return (-1); } - retval_seek = lseek(fd, writeOffset, SEEK_SET); + retval_seek = lseek(disk->d_fd, writeOffset, SEEK_SET); if (retval_seek != writeOffset) { fprintf(stderr, "diskRawWriteShadow: can't seek to offset %d\n", @@ -527,7 +548,7 @@ return (-1); } - retval_write = diskRawWrite(fd, buf, len); + retval_write = diskRawWrite(disk, buf, len); if (retval_write != len) { if (retval_write == -1) { fprintf(stderr, "%s: %s\n", __FUNCTION__, @@ -544,7 +565,7 @@ int -qdisk_read(int fd, __off64_t offset, void *buf, int count) +qdisk_read(target_info_t *disk, __off64_t offset, void *buf, int count) { shared_header_t *hdrp; char *data; @@ -556,15 +577,15 @@ * Raw blocks are 512 byte aligned. */ total = count + sizeof(shared_header_t); - if (total < 512) - total = 512; + if (total < disk->d_blksz) + total = disk->d_blksz; /* Round it up */ - if (total % 512) - total = total + (512 * !!(total % 512)) - (total % 512); + if (total % disk->d_blksz) + total = total + (disk->d_blksz * !!(total % disk->d_blksz)) - (total % disk->d_blksz); hdrp = NULL; - rv = posix_memalign((void **)&hdrp, sysconf(_SC_PAGESIZE), total); + rv = posix_memalign((void **)&hdrp, disk->d_pagesz, disk->d_blksz); if (rv < 0) return -1; @@ -573,7 +594,7 @@ data = (char *)hdrp + sizeof(shared_header_t); - rv = diskRawReadShadow(fd, offset, (char *)hdrp, total); + rv = diskRawReadShadow(disk, offset, (char *)hdrp, disk->d_blksz); if (rv == -1) { return -1; @@ -594,12 +615,12 @@ int -qdisk_write(int fd, __off64_t offset, const void *buf, int count) +qdisk_write(target_info_t *disk, __off64_t offset, const void *buf, int count) { size_t maxsize; shared_header_t *hdrp; char *data; - size_t total = 0, rv = -1, psz = 512; //sysconf(_SC_PAGESIZE); + size_t total = 0, rv = -1, psz = disk->d_blksz; //sysconf(_SC_PAGESIZE); maxsize = psz - (sizeof(shared_header_t)); if (count >= (maxsize + sizeof(shared_header_t))) { @@ -611,7 +632,6 @@ /* * Calculate the total length of the buffer, including the header. - * Raw blocks are 512 byte aligned. */ total = count + sizeof(shared_header_t); if (total < psz) @@ -622,7 +642,7 @@ total = total + (psz * !!(total % psz)) - (total % psz); hdrp = NULL; - rv = posix_memalign((void **)&hdrp, sysconf(_SC_PAGESIZE), total); + rv = posix_memalign((void **)&hdrp, disk->d_pagesz, total); if (rv < 0) { perror("posix_memalign"); return -1; @@ -645,7 +665,7 @@ * about locking here. */ if (total == psz) - rv = diskRawWriteShadow(fd, offset, (char *)hdrp, psz); + rv = diskRawWriteShadow(disk, offset, (char *)hdrp, psz); if (rv == -1) perror("diskRawWriteShadow"); @@ -658,11 +678,11 @@ static int -header_init(int fd, char *label) +header_init(target_info_t *disk, char *label) { quorum_header_t qh; - if (qdisk_read(fd, OFFSET_HEADER, &qh, sizeof(qh)) == sizeof(qh)) { + if (qdisk_read(disk, OFFSET_HEADER, &qh, sizeof(qh)) == sizeof(qh)) { swab_quorum_header_t(&qh); if (qh.qh_magic == HEADER_MAGIC_OLD) { printf("Warning: Red Hat Cluster Manager 1.2.x " @@ -681,14 +701,18 @@ /* Copy in the cluster/label name */ snprintf(qh.qh_cluster, sizeof(qh.qh_cluster)-1, "%s", label); + qh.qh_version = VERSION_MAGIC_V2; if ((qh.qh_timestamp = (uint64_t)time(NULL)) <= 0) { perror("time"); return -1; } qh.qh_magic = HEADER_MAGIC_NUMBER; + qh.qh_blksz = disk->d_blksz; + qh.qh_pad = 0; + swab_quorum_header_t(&qh); - if (qdisk_write(fd, OFFSET_HEADER, &qh, sizeof(qh)) != sizeof(qh)) { + if (qdisk_write(disk, OFFSET_HEADER, &qh, sizeof(qh)) != sizeof(qh)) { return -1; } @@ -699,24 +723,24 @@ int qdisk_init(char *partname, char *label) { - int fd; + target_info_t disk; status_block_t ps, wps; - int nid; + int nid, ret; time_t t; - fd = qdisk_validate(partname); - if (fd < 0) { + ret = qdisk_validate(partname); + if (ret < 0) { perror("qdisk_verify"); return -1; } - fd = qdisk_open(partname); - if (fd < 0) { + ret = qdisk_open(partname, &disk); + if (ret < 0) { perror("qdisk_open"); return -1; } - if (header_init(fd, label) < 0) { + if (header_init(&disk, label) < 0) { return -1; } @@ -744,14 +768,14 @@ wps = ps; swab_status_block_t(&wps); - if (qdisk_write(fd, qdisk_nodeid_offset(nid), &wps, sizeof(wps)) < 0) { + if (qdisk_write(&disk, qdisk_nodeid_offset(nid, disk.d_blksz), &wps, sizeof(wps)) < 0) { printf("Error writing node ID block %d\n", nid); - qdisk_close(&fd); + qdisk_close(&disk); return -1; } } - qdisk_close(&fd); + qdisk_close(&disk); return 0; } --- cluster/cman/qdisk/disk.h 2007/02/21 20:19:43 1.1.2.6 +++ cluster/cman/qdisk/disk.h 2007/12/04 20:40:54 1.1.2.7 @@ -72,7 +72,8 @@ RF_DEBUG = 0x4, RF_PARANOID = 0x8, RF_ALLOW_KILL = 0x10, - RF_UPTIME = 0x20 + RF_UPTIME = 0x20, + RF_CMAN_LABEL = 0x40 } run_flag_t; @@ -86,6 +87,9 @@ #define STATE_MAGIC_NUMBER 0x47bacef8 /* Status block */ #define SHARED_HEADER_MAGIC 0x00DEBB1E /* Per-block headeer */ +/* Version magic. */ +#define VERSION_MAGIC_V2 0x389fabc4 + typedef struct __attribute__ ((packed)) { uint32_t ps_magic; @@ -152,16 +156,21 @@ */ typedef struct __attribute__ ((packed)) { uint32_t qh_magic; - uint32_t qh_align; // 64-bit-ism: alignment fixer. + uint32_t qh_version; // uint64_t qh_timestamp; // time of last update char qh_updatehost[128];// Hostname who put this here... - char qh_cluster[128]; // Cluster name + char qh_cluster[120]; // Cluster name; CMAN only + // supports 16 chars. + uint32_t qh_blksz; // Known block size @ creation + uint32_t qh_pad; } quorum_header_t; #define swab_quorum_header_t(ptr) \ {\ swab32((ptr)->qh_magic); \ - swab32((ptr)->qh_align); \ + swab32((ptr)->qh_version); \ + swab32((ptr)->qh_blksz); \ + swab32((ptr)->qh_pad); \ swab64((ptr)->qh_timestamp); \ } @@ -196,31 +205,35 @@ /* Offsets from RHCM 1.2.x */ #define OFFSET_HEADER 0 -#define HEADER_SIZE 4096 /* Page size for now */ +#define HEADER_SIZE(ssz) (ssz<4096?4096:ssz) -#define OFFSET_FIRST_STATUS_BLOCK (OFFSET_HEADER + HEADER_SIZE) -#define SPACE_PER_STATUS_BLOCK 4096 /* Page size for now */ +#define OFFSET_FIRST_STATUS_BLOCK(ssz) (OFFSET_HEADER + HEADER_SIZE(ssz)) +#define SPACE_PER_STATUS_BLOCK(ssz) (ssz<4096?4096:ssz) #define STATUS_BLOCK_COUNT MAX_NODES_DISK -#define SPACE_PER_MESSAGE_BLOCK (4096) -#define MESSAGE_BLOCK_COUNT MAX_NODES_DISK - -#define END_OF_DISK (OFFSET_FIRST_STATUS_BLOCK + \ +#define END_OF_DISK(ssz) (OFFSET_FIRST_STATUS_BLOCK(ssz) + \ (MAX_NODES_DISK + 1) * \ - SPACE_PER_STATUS_BLOCK) \ + SPACE_PER_STATUS_BLOCK(ssz)) \ + +typedef struct { + int d_fd; + int _pad_; + size_t d_blksz; + size_t d_pagesz; +} target_info_t; /* From disk.c */ -int qdisk_open(char *name); -int qdisk_close(int *fd); +int qdisk_open(char *name, target_info_t *disk); +int qdisk_close(target_info_t *disk); int qdisk_init(char *name, char *clustername); int qdisk_validate(char *name); -int qdisk_read(int fd, __off64_t ofs, void *buf, int len); -int qdisk_write(int fd, __off64_t ofs, const void *buf, int len); +int qdisk_read(target_info_t *disk, __off64_t ofs, void *buf, int len); +int qdisk_write(target_info_t *disk, __off64_t ofs, const void *buf, int len); -#define qdisk_nodeid_offset(nodeid) \ - (OFFSET_FIRST_STATUS_BLOCK + (SPACE_PER_STATUS_BLOCK * (nodeid - 1))) +#define qdisk_nodeid_offset(nodeid, ssz) \ + (OFFSET_FIRST_STATUS_BLOCK(ssz) + (SPACE_PER_STATUS_BLOCK(ssz) * (nodeid - 1))) /* From disk_utils.c */ #define HISTORY_LENGTH 60 @@ -231,11 +244,12 @@ uint16_t pad0; } disk_msg_t; + typedef struct { uint64_t qc_incarnation; struct timeval qc_average; struct timeval qc_last[HISTORY_LENGTH]; - int qc_fd; + target_info_t qc_disk; int qc_my_id; int qc_writes; int qc_interval; @@ -256,6 +270,7 @@ char *qc_device; char *qc_label; char *qc_status_file; + char *qc_cman_label; } qd_ctx; typedef struct { @@ -272,14 +287,15 @@ int qd_write_status(qd_ctx *ctx, int nid, disk_node_state_t state, disk_msg_t *msg, memb_mask_t mask, memb_mask_t master); -int qd_read_print_status(int fd, int nid); +int qd_read_print_status(target_info_t *disk, int nid); int qd_init(qd_ctx *ctx, cman_handle_t ch, int me); void qd_destroy(qd_ctx *ctx); /* proc.c */ int find_partitions(const char *partfile, const char *label, char *devname, size_t devlen, int print); -int check_device(char *device, char *label, quorum_header_t *qh); +int check_device(char *device, char *label, int *ssz, quorum_header_t *qh, + int flags); #endif --- cluster/cman/qdisk/disk_util.c 2007/01/26 14:34:26 1.1.2.3 +++ cluster/cman/qdisk/disk_util.c 2007/12/04 20:40:54 1.1.2.4 @@ -201,8 +201,9 @@ if (get_time(&start, ctx->qc_flags&RF_UPTIME) < 0) utime_ok = 0; swab_status_block_t(&ps); - if (qdisk_write(ctx->qc_fd, qdisk_nodeid_offset(nid), &ps, - sizeof(ps)) < 0) { + if (qdisk_write(&ctx->qc_disk, + qdisk_nodeid_offset(nid, ctx->qc_disk.d_blksz), + &ps, sizeof(ps)) < 0) { printf("Error writing node ID block %d\n", nid); return -1; } @@ -223,12 +224,12 @@ int -qd_print_status(status_block_t *ps) +qd_print_status(target_info_t *disk, status_block_t *ps) { int x; printf("Data @ offset %d:\n", - (int)qdisk_nodeid_offset(ps->ps_nodeid)); + (int)qdisk_nodeid_offset(ps->ps_nodeid, disk->d_blksz)); printf("status_block_t {\n"); printf("\t.ps_magic = %08x;\n", (int)ps->ps_magic); printf("\t.ps_nodeid = %d;\n", (int)ps->ps_nodeid); @@ -261,11 +262,11 @@ int -qd_read_print_status(int fd, int nid) +qd_read_print_status(target_info_t *disk, int nid) { status_block_t ps; - if (fd < 0) { + if (!disk || disk->d_fd < 0) { errno = EINVAL; return -1; } @@ -275,13 +276,13 @@ return -1; } - if (qdisk_read(fd, qdisk_nodeid_offset(nid), &ps, + if (qdisk_read(disk, qdisk_nodeid_offset(nid, disk->d_blksz), &ps, sizeof(ps)) < 0) { printf("Error reading node ID block %d\n", nid); return -1; } swab_status_block_t(&ps); - qd_print_status(&ps); + qd_print_status(disk, &ps); return 0; } @@ -339,6 +340,5 @@ free(ctx->qc_device); ctx->qc_device = NULL; } - close(ctx->qc_fd); - ctx->qc_fd = -1; + qdisk_close(&ctx->qc_disk); } --- cluster/cman/qdisk/main.c 2007/03/20 19:36:14 1.1.2.8 +++ cluster/cman/qdisk/main.c 2007/12/04 20:40:54 1.1.2.9 @@ -147,7 +147,8 @@ sb = &ni[x].ni_status; - if (qdisk_read(ctx->qc_fd, qdisk_nodeid_offset(x+1), + if (qdisk_read(&ctx->qc_disk, + qdisk_nodeid_offset(x+1, ctx->qc_disk.d_blksz), sb, sizeof(*sb)) < 0) { clulog(LOG_WARNING,"Error reading node ID block %d\n", x+1); @@ -452,6 +453,7 @@ quorum_init(qd_ctx *ctx, node_info_t *ni, int max, struct h_data *h, int maxh) { int x = 0, score, maxscore, score_req; + char buf[64]; clulog(LOG_INFO, "Quorum Daemon Initializing\n"); @@ -462,12 +464,28 @@ if (qdisk_validate(ctx->qc_device) < 0) return -1; - ctx->qc_fd = qdisk_open(ctx->qc_device); - if (ctx->qc_fd < 0) { + if (qdisk_open(ctx->qc_device, &ctx->qc_disk) < 0) { clulog(LOG_CRIT, "Failed to open %s: %s\n", ctx->qc_device, strerror(errno)); return -1; } + + if (strlen(ctx->qc_device) > 15 && !(ctx->qc_flags & RF_CMAN_LABEL)) { + if (ctx->qc_label && strlen(ctx->qc_label) <= 15) { + ctx->qc_cman_label = strdup(ctx->qc_label); + } else { + snprintf(buf, sizeof(buf), "QDisk[%d]", + strlen(ctx->qc_device)); + ctx->qc_cman_label = strdup(buf); + } + + ctx->qc_flags |= RF_CMAN_LABEL; + clulog(LOG_DEBUG, "Device too long! Setting CMAN label to: %s\n", + ctx->qc_cman_label); + } + + clulog(LOG_DEBUG, "I/O Size: %d Page Size: %d\n", + ctx->qc_disk.d_blksz, ctx->qc_disk.d_pagesz); if (h && maxh) { start_score_thread(ctx, h, maxh); @@ -1209,14 +1227,30 @@ } if (ctx->qc_master_wait <= ctx->qc_tko_up) ctx->qc_master_wait = ctx->qc_tko_up + 1; - + /* Get votes */ + + /* check if votes is set in cluster.conf */ snprintf(query, sizeof(query), "/cluster/quorumd/@votes"); if (ccs_get(ccsfd, query, &val) == 0) { ctx->qc_votes = atoi(val); free(val); if (ctx->qc_votes < 0) ctx->qc_votes = 0; + } else { /* if votes is not set, default to node_num - 1 */ + int nodes = 0, error; + for (;;) { + error = ccs_get_list(ccsfd, "/cluster/clusternodes/child::*", &val); + if (error || !val) + break; + + nodes++; + } + nodes--; + if (nodes < 0) + nodes = 0; + + ctx->qc_votes = nodes; } /* Get device */ @@ -1285,6 +1319,15 @@ ctx->qc_flags &= ~RF_REBOOT; free(val); } + + /* Get cman_label */ + snprintf(query, sizeof(query), "/cluster/quorumd/@cman_label"); + if (ccs_get(ccsfd, query, &val) == 0) { + if (strlen(val) > 0 && strlen(val) <= 15) { + ctx->qc_flags |= RF_CMAN_LABEL; + ctx->qc_cman_label = val; + } + } /* * Get flag to see if we're supposed to kill cman if qdisk is not @@ -1347,8 +1390,9 @@ *cfh = configure_heuristics(ccsfd, h, maxh); clulog(LOG_DEBUG, - "Quorum Daemon: %d heuristics, %d interval, %d tko, %d votes\n", - *cfh, ctx->qc_interval, ctx->qc_tko, ctx->qc_votes); + "Quorum Daemon: %d heuristics, %d interval, %d tko, %d votes," + " flags=%08x\n", + *cfh, ctx->qc_interval, ctx->qc_tko, ctx->qc_votes, ctx->qc_flags); ccs_disconnect(ccsfd); @@ -1391,6 +1435,7 @@ char debug = 0, foreground = 0; char device[128]; pid_t pid; + quorum_header_t qh; if (check_process_running(argv[0], &pid) && pid !=getpid()) { printf("QDisk services already running\n"); @@ -1493,13 +1538,24 @@ clulog(LOG_INFO, "Quorum Partition: %s Label: %s\n", ctx.qc_device, ctx.qc_label); } else if (ctx.qc_device) { - if (check_device(ctx.qc_device, NULL, NULL) != 0) { + if (check_device(ctx.qc_device, NULL, &rv, &qh, 0) != 0) { clulog(LOG_CRIT, "Specified partition %s does not have a " "qdisk label\n", ctx.qc_device); check_stop_cman(&ctx); return -1; } + + if (qh.qh_version == VERSION_MAGIC_V2 && + qh.qh_blksz != rv) { + clulog(LOG_CRIT, + "Specified device %s does match kernel's " + "reported sector size (%d != %d)\n", + ctx.qc_device, + ctx.qc_disk.d_blksz, rv); + check_stop_cman(&ctx); + return -1; + } } if (!foreground && !forked) { @@ -1518,7 +1574,11 @@ if (!_running) return 0; - cman_register_quorum_device(ctx.qc_ch, ctx.qc_device, ctx.qc_votes); + cman_register_quorum_device(ctx.qc_ch, + (ctx.qc_flags&RF_CMAN_LABEL)? + ctx.qc_cman_label: + ctx.qc_device, + ctx.qc_votes); /* XXX this always returns -1 / EBUSY even when it works?!!! --- cluster/cman/qdisk/mkqdisk.c 2006/11/21 14:50:01 1.1.2.3 +++ cluster/cman/qdisk/mkqdisk.c 2007/12/04 20:40:54 1.1.2.4 @@ -39,7 +39,7 @@ char *newdev = NULL, *newlabel = NULL; int rv; - printf("mkqdisk v0.5.1\n"); + printf("mkqdisk v0.5.2\n"); while ((rv = getopt(argc, argv, "Lf:c:l:h")) != EOF) { switch (rv) { --- cluster/cman/qdisk/proc.c 2006/06/23 16:01:02 1.1.2.1 +++ cluster/cman/qdisk/proc.c 2007/12/04 20:40:54 1.1.2.2 @@ -32,27 +32,33 @@ int -check_device(char *device, char *label, quorum_header_t *qh) +check_device(char *device, char *label, int *ssz, quorum_header_t *qh, + int flags) { - int fd = -1, ret = -1; + int ret = -1; quorum_header_t qh_local; + target_info_t disk; if (!qh) qh = &qh_local; - fd = qdisk_validate(device); - if (fd < 0) { + ret = qdisk_validate(device); + if (ret < 0) { perror("qdisk_verify"); return -1; } - fd = qdisk_open(device); - if (fd < 0) { + ret = qdisk_open(device, &disk); + if (ret < 0) { perror("qdisk_open"); return -1; } - if (qdisk_read(fd, OFFSET_HEADER, qh, sizeof(*qh)) == sizeof(*qh)) { + if (ssz) + *ssz = disk.d_blksz; + + ret = -1; + if (qdisk_read(&disk, OFFSET_HEADER, qh, sizeof(*qh)) == sizeof(*qh)) { swab_quorum_header_t(qh); if (qh->qh_magic == HEADER_MAGIC_NUMBER) { if (!label || !strcmp(qh->qh_cluster, label)) { @@ -61,7 +67,14 @@ } } - qdisk_close(&fd); + /* only flag now is 'strict device check'; i.e., + "block size recorded must match kernel's reported size" */ + if (flags && qh->qh_version == VERSION_MAGIC_V2 && + disk.d_blksz != qh->qh_blksz) { + ret = -1; + } + + qdisk_close(&disk); return ret; } @@ -78,6 +91,7 @@ char device[128]; char realdev[256]; quorum_header_t qh; + int ssz; fp = fopen(partfile, "r"); if (!fp) @@ -96,16 +110,30 @@ if (strlen(device)) { snprintf(realdev, sizeof(realdev), "/dev/%s", device); - if (check_device(realdev, (char *)label, &qh) != 0) + + /* If we're not "just printing", then + then reject devices which don't match + the recorded sector size */ + if (check_device(realdev, (char *)label, &ssz, + &qh, !print) != 0) continue; if (print) { printf("%s:\n", realdev); - printf("\tMagic: %08x\n", qh.qh_magic); - printf("\tLabel: %s\n", qh.qh_cluster); - printf("\tCreated: %s", + printf("\tMagic: %08x\n", qh.qh_magic); + printf("\tLabel: %s\n", qh.qh_cluster); + printf("\tCreated: %s", ctime((time_t *)&qh.qh_timestamp)); - printf("\tHost: %s\n\n", qh.qh_updatehost); + printf("\tHost: %s\n", qh.qh_updatehost); + printf("\tKernel Sector Size: %d\n", ssz); + if (qh.qh_version == VERSION_MAGIC_V2) { + printf("\tRecorded Sector Size: %d\n\n", (int)qh.qh_blksz); + if (qh.qh_blksz != ssz) { + printf("WARNING: Sector size mismatch: Header: %d Kernel: %d\n", + (int)qh.qh_blksz, ssz); + } + } else + printf("\n"); } if (devname && devlen) {