From: lhh@sourceware.org <lhh@sourceware.org>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] cluster/cman/qdisk disk.c disk.h disk_util.c m ...
Date: 4 Dec 2007 20:40:55 -0000 [thread overview]
Message-ID: <20071204204055.1241.qmail@sourceware.org> (raw)
CVSROOT: /cvs/cluster
Module name: cluster
Branch: RHEL4
Changes by: lhh at sourceware.org 2007-12-04 20:40:55
Modified files:
cman/qdisk : disk.c disk.h disk_util.c main.c mkqdisk.c
proc.c
Log message:
Make qdiskd work with sector sizes other than 512 bytes. Import patch from Fabio M. Di Nitto to make qdiskd use (node_count - 1) for votes if there's none specified in cluster.conf
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/qdisk/disk.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.4&r2=1.1.2.5
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/qdisk/disk.h.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.6&r2=1.1.2.7
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/qdisk/disk_util.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.3&r2=1.1.2.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/qdisk/main.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.8&r2=1.1.2.9
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/qdisk/mkqdisk.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.3&r2=1.1.2.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/qdisk/proc.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.1&r2=1.1.2.2
--- cluster/cman/qdisk/disk.c 2007/10/29 20:38:12 1.1.2.4
+++ cluster/cman/qdisk/disk.c 2007/12/04 20:40:54 1.1.2.5
@@ -43,8 +43,9 @@
#include <platform.h>
#include <unistd.h>
#include <time.h>
+#include <linux/fs.h>
-static int diskRawRead(int fd, char *buf, int len);
+static int diskRawRead(target_info_t *disk, char *buf, int len);
uint32_t clu_crc32(const char *data, size_t count);
@@ -211,49 +212,58 @@
* Returns - (the file descriptor), a value >= 0 on success.
*/
int
-qdisk_open(char *name)
+qdisk_open(char *name, target_info_t *disk)
{
- int fd;
- int retval;
+ int ret;
+ unsigned long ssz;
/*
* Open for synchronous writes to insure all writes go directly
* to disk.
*/
- fd = open(name, O_RDWR | O_SYNC | O_DIRECT);
- if (fd < 0) {
- return fd;
- }
+ disk->d_fd = open(name, O_RDWR | O_SYNC | O_DIRECT);
+ if (disk->d_fd < 0)
+ return disk->d_fd;
+
+ disk->d_blksz = 512;
+ ret = ioctl(disk->d_fd, BLKSSZGET, &ssz);
+ if (ret < 0)
+ perror("qdisk_open: ioctl(BLKSSZGET)");
+ else
+ /* Sorry, no sector sizes >4GB please */
+ disk->d_blksz = (uint32_t)ssz;
- /* Check to verify that the partition is large enough.*/
- retval = lseek(fd, END_OF_DISK, SEEK_SET);
+ disk->d_pagesz = sysconf(_SC_PAGESIZE);
- if (retval < 0) {
+ /* Check to verify that the partition is large enough.*/
+ ret = lseek(disk->d_fd, END_OF_DISK(disk->d_blksz), SEEK_SET);
+ if (ret < 0) {
perror("open_partition: seek");
return -1;
}
- if (retval < END_OF_DISK) {
+ if (ret < END_OF_DISK(disk->d_blksz)) {
fprintf(stderr, "Partition %s too small\n", name);
errno = EINVAL;
return -1;
}
/* Set close-on-exec bit */
- retval = fcntl(fd, F_GETFD, 0);
- if (retval < 0) {
- close(fd);
+ ret = fcntl(disk->d_fd, F_GETFD, 0);
+ if (ret < 0) {
+ perror("open_partition: fcntl(F_GETFD)");
+ close(disk->d_fd);
return -1;
}
- retval |= FD_CLOEXEC;
- if (fcntl(fd, F_SETFD, retval) < 0) {
- perror("open_partition: fcntl");
- close(fd);
+ ret |= FD_CLOEXEC;
+ if (fcntl(disk->d_fd, F_SETFD, ret) < 0) {
+ perror("open_partition: fcntl(F_SETFD)");
+ close(disk->d_fd);
return -1;
}
- return fd;
+ return 0;
}
@@ -263,17 +273,17 @@
* Returns - value from close syscall.
*/
int
-qdisk_close(int *fd)
+qdisk_close(target_info_t *disk)
{
int retval;
- if (!fd || *fd < 0) {
+ if (!disk || disk->d_fd < 0) {
errno = EINVAL;
return -1;
}
- retval = close(*fd);
- *fd = -1;
+ retval = close(disk->d_fd);
+ disk->d_fd = -1;
return retval;
}
@@ -288,7 +298,7 @@
qdisk_validate(char *name)
{
struct stat stat_st, *stat_ptr;
- int fd;
+ target_info_t disk;
stat_ptr = &stat_st;
if (stat(name, stat_ptr) < 0) {
@@ -310,26 +320,25 @@
/*
* Verify read/write permission.
*/
- fd = qdisk_open(name);
- if (fd < 0) {
+ if (qdisk_open(name, &disk) < 0) {
fprintf(stderr, "%s: open of %s for RDWR failed: %s\n",
__FUNCTION__, name, strerror(errno));
return -1;
}
- qdisk_close(&fd);
+ qdisk_close(&disk);
return 0;
}
static int
-diskRawReadShadow(int fd, off_t readOffset, char *buf, int len)
+diskRawReadShadow(target_info_t *disk, off_t readOffset, char *buf, int len)
{
int ret;
shared_header_t *hdrp;
char *data;
int datalen;
- ret = lseek(fd, readOffset, SEEK_SET);
+ ret = lseek(disk->d_fd, readOffset, SEEK_SET);
if (ret != readOffset) {
#if 0
fprintf(stderr,
@@ -340,7 +349,7 @@
return -1;
}
- ret = diskRawRead(fd, buf, len);
+ ret = diskRawRead(disk, buf, len);
if (ret != len) {
#if 0
fprintf(stderr, "diskRawReadShadow: aligned read "
@@ -375,7 +384,7 @@
* Here we check for alignment and do a bounceio if necessary.
*/
static int
-diskRawRead(int fd, char *buf, int len)
+diskRawRead(target_info_t *disk, char *buf, int len)
{
char *alignedBuf;
int readret;
@@ -383,21 +392,24 @@
int readlen;
int bounceNeeded = 1;
- if ((((unsigned long) buf & (unsigned long) 0x3ff) == 0) &&
- ((len % 512) == 0)) {
+
+ /* was 3ff, which is (512<<1-1) */
+ if ((((unsigned long) buf &
+ (unsigned long) ((disk->d_blksz << 1) -1)) == 0) &&
+ ((len % (disk->d_blksz)) == 0)) {
bounceNeeded = 0;
}
if (bounceNeeded == 0) {
/* Already aligned and even multiple of 512, no bounceio
* required. */
- return (read(fd, buf, len));
+ return (read(disk->d_fd, buf, len));
}
- if (len > 512) {
+ if (len > disk->d_blksz) {
fprintf(stderr,
"diskRawRead: not setup for reads larger than %d.\n",
- 512);
+ (int)disk->d_blksz);
return (-1);
}
/*
@@ -406,8 +418,8 @@
* XXX - if the on-disk offsets don't provide enough room we're cooked!
*/
extraLength = 0;
- if (len % 512) {
- extraLength = 512 - (len % 512);
+ if (len % disk->d_blksz) {
+ extraLength = disk->d_blksz - (len % disk->d_blksz);
}
readlen = len;
@@ -415,18 +427,18 @@
readlen += extraLength;
}
- readret = posix_memalign((void **)&alignedBuf, 512, 512);
+ readret = posix_memalign((void **)&alignedBuf, disk->d_pagesz, disk->d_blksz);
if (readret < 0) {
return -1;
}
- readret = read(fd, alignedBuf, readlen);
+ readret = read(disk->d_fd, alignedBuf, readlen);
if (readret > 0) {
if (readret > len) {
- bcopy(alignedBuf, buf, len);
+ memcpy(alignedBuf, buf, len);
readret = len;
} else {
- bcopy(alignedBuf, buf, readret);
+ memcpy(alignedBuf, buf, readret);
}
}
@@ -445,7 +457,7 @@
* Here we check for alignment and do a bounceio if necessary.
*/
static int
-diskRawWrite(int fd, char *buf, int len)
+diskRawWrite(target_info_t *disk, char *buf, int len)
{
char *alignedBuf;
int ret;
@@ -453,31 +465,33 @@
int writelen;
int bounceNeeded = 1;
- if ((((unsigned long) buf & (unsigned long) 0x3ff) == 0) &&
- ((len % 512) == 0)) {
+ /* was 3ff, which is (512<<1-1) */
+ if ((((unsigned long) buf &
+ (unsigned long) ((disk->d_blksz << 1) -1)) == 0) &&
+ ((len % (disk->d_blksz)) == 0)) {
bounceNeeded = 0;
}
+
if (bounceNeeded == 0) {
/* Already aligned and even multiple of 512, no bounceio
* required. */
- return (write(fd, buf, len));
+ return (write(disk->d_fd, buf, len));
}
- if (len > 512) {
+ if (len > disk->d_blksz) {
fprintf(stderr,
- "diskRawWrite: not setup for larger than %d.\n",
- 512);
+ "diskRawRead: not setup for reads larger than %d.\n",
+ (int)disk->d_blksz);
return (-1);
}
-
/*
* All IOs must be of size which is a multiple of 512. Here we
* just add in enough extra to accommodate.
* XXX - if the on-disk offsets don't provide enough room we're cooked!
*/
extraLength = 0;
- if (len % 512) {
- extraLength = 512 - (len % 512);
+ if (len % disk->d_blksz) {
+ extraLength = disk->d_blksz - (len % disk->d_blksz);
}
writelen = len;
@@ -485,13 +499,20 @@
writelen += extraLength;
}
- ret = posix_memalign((void **)&alignedBuf, 512,512);
+ ret = posix_memalign((void **)&alignedBuf, disk->d_pagesz, disk->d_blksz);
if (ret < 0) {
+ return -1;
+ }
+
+ if (len > disk->d_blksz) {
+ fprintf(stderr,
+ "diskRawWrite: not setup for larger than %d.\n",
+ (int)disk->d_blksz);
return (-1);
}
- bcopy(buf, alignedBuf, len);
- ret = write(fd, alignedBuf, writelen);
+ memcpy(buf, alignedBuf, len);
+ ret = write(disk->d_fd, alignedBuf, writelen);
if (ret > len) {
ret = len;
}
@@ -507,7 +528,7 @@
static int
-diskRawWriteShadow(int fd, __off64_t writeOffset, char *buf, int len)
+diskRawWriteShadow(target_info_t *disk, __off64_t writeOffset, char *buf, int len)
{
off_t retval_seek;
ssize_t retval_write;
@@ -519,7 +540,7 @@
return (-1);
}
- retval_seek = lseek(fd, writeOffset, SEEK_SET);
+ retval_seek = lseek(disk->d_fd, writeOffset, SEEK_SET);
if (retval_seek != writeOffset) {
fprintf(stderr,
"diskRawWriteShadow: can't seek to offset %d\n",
@@ -527,7 +548,7 @@
return (-1);
}
- retval_write = diskRawWrite(fd, buf, len);
+ retval_write = diskRawWrite(disk, buf, len);
if (retval_write != len) {
if (retval_write == -1) {
fprintf(stderr, "%s: %s\n", __FUNCTION__,
@@ -544,7 +565,7 @@
int
-qdisk_read(int fd, __off64_t offset, void *buf, int count)
+qdisk_read(target_info_t *disk, __off64_t offset, void *buf, int count)
{
shared_header_t *hdrp;
char *data;
@@ -556,15 +577,15 @@
* Raw blocks are 512 byte aligned.
*/
total = count + sizeof(shared_header_t);
- if (total < 512)
- total = 512;
+ if (total < disk->d_blksz)
+ total = disk->d_blksz;
/* Round it up */
- if (total % 512)
- total = total + (512 * !!(total % 512)) - (total % 512);
+ if (total % disk->d_blksz)
+ total = total + (disk->d_blksz * !!(total % disk->d_blksz)) - (total % disk->d_blksz);
hdrp = NULL;
- rv = posix_memalign((void **)&hdrp, sysconf(_SC_PAGESIZE), total);
+ rv = posix_memalign((void **)&hdrp, disk->d_pagesz, disk->d_blksz);
if (rv < 0)
return -1;
@@ -573,7 +594,7 @@
data = (char *)hdrp + sizeof(shared_header_t);
- rv = diskRawReadShadow(fd, offset, (char *)hdrp, total);
+ rv = diskRawReadShadow(disk, offset, (char *)hdrp, disk->d_blksz);
if (rv == -1) {
return -1;
@@ -594,12 +615,12 @@
int
-qdisk_write(int fd, __off64_t offset, const void *buf, int count)
+qdisk_write(target_info_t *disk, __off64_t offset, const void *buf, int count)
{
size_t maxsize;
shared_header_t *hdrp;
char *data;
- size_t total = 0, rv = -1, psz = 512; //sysconf(_SC_PAGESIZE);
+ size_t total = 0, rv = -1, psz = disk->d_blksz; //sysconf(_SC_PAGESIZE);
maxsize = psz - (sizeof(shared_header_t));
if (count >= (maxsize + sizeof(shared_header_t))) {
@@ -611,7 +632,6 @@
/*
* Calculate the total length of the buffer, including the header.
- * Raw blocks are 512 byte aligned.
*/
total = count + sizeof(shared_header_t);
if (total < psz)
@@ -622,7 +642,7 @@
total = total + (psz * !!(total % psz)) - (total % psz);
hdrp = NULL;
- rv = posix_memalign((void **)&hdrp, sysconf(_SC_PAGESIZE), total);
+ rv = posix_memalign((void **)&hdrp, disk->d_pagesz, total);
if (rv < 0) {
perror("posix_memalign");
return -1;
@@ -645,7 +665,7 @@
* about locking here.
*/
if (total == psz)
- rv = diskRawWriteShadow(fd, offset, (char *)hdrp, psz);
+ rv = diskRawWriteShadow(disk, offset, (char *)hdrp, psz);
if (rv == -1)
perror("diskRawWriteShadow");
@@ -658,11 +678,11 @@
static int
-header_init(int fd, char *label)
+header_init(target_info_t *disk, char *label)
{
quorum_header_t qh;
- if (qdisk_read(fd, OFFSET_HEADER, &qh, sizeof(qh)) == sizeof(qh)) {
+ if (qdisk_read(disk, OFFSET_HEADER, &qh, sizeof(qh)) == sizeof(qh)) {
swab_quorum_header_t(&qh);
if (qh.qh_magic == HEADER_MAGIC_OLD) {
printf("Warning: Red Hat Cluster Manager 1.2.x "
@@ -681,14 +701,18 @@
/* Copy in the cluster/label name */
snprintf(qh.qh_cluster, sizeof(qh.qh_cluster)-1, "%s", label);
+ qh.qh_version = VERSION_MAGIC_V2;
if ((qh.qh_timestamp = (uint64_t)time(NULL)) <= 0) {
perror("time");
return -1;
}
qh.qh_magic = HEADER_MAGIC_NUMBER;
+ qh.qh_blksz = disk->d_blksz;
+ qh.qh_pad = 0;
+
swab_quorum_header_t(&qh);
- if (qdisk_write(fd, OFFSET_HEADER, &qh, sizeof(qh)) != sizeof(qh)) {
+ if (qdisk_write(disk, OFFSET_HEADER, &qh, sizeof(qh)) != sizeof(qh)) {
return -1;
}
@@ -699,24 +723,24 @@
int
qdisk_init(char *partname, char *label)
{
- int fd;
+ target_info_t disk;
status_block_t ps, wps;
- int nid;
+ int nid, ret;
time_t t;
- fd = qdisk_validate(partname);
- if (fd < 0) {
+ ret = qdisk_validate(partname);
+ if (ret < 0) {
perror("qdisk_verify");
return -1;
}
- fd = qdisk_open(partname);
- if (fd < 0) {
+ ret = qdisk_open(partname, &disk);
+ if (ret < 0) {
perror("qdisk_open");
return -1;
}
- if (header_init(fd, label) < 0) {
+ if (header_init(&disk, label) < 0) {
return -1;
}
@@ -744,14 +768,14 @@
wps = ps;
swab_status_block_t(&wps);
- if (qdisk_write(fd, qdisk_nodeid_offset(nid), &wps, sizeof(wps)) < 0) {
+ if (qdisk_write(&disk, qdisk_nodeid_offset(nid, disk.d_blksz), &wps, sizeof(wps)) < 0) {
printf("Error writing node ID block %d\n", nid);
- qdisk_close(&fd);
+ qdisk_close(&disk);
return -1;
}
}
- qdisk_close(&fd);
+ qdisk_close(&disk);
return 0;
}
--- cluster/cman/qdisk/disk.h 2007/02/21 20:19:43 1.1.2.6
+++ cluster/cman/qdisk/disk.h 2007/12/04 20:40:54 1.1.2.7
@@ -72,7 +72,8 @@
RF_DEBUG = 0x4,
RF_PARANOID = 0x8,
RF_ALLOW_KILL = 0x10,
- RF_UPTIME = 0x20
+ RF_UPTIME = 0x20,
+ RF_CMAN_LABEL = 0x40
} run_flag_t;
@@ -86,6 +87,9 @@
#define STATE_MAGIC_NUMBER 0x47bacef8 /* Status block */
#define SHARED_HEADER_MAGIC 0x00DEBB1E /* Per-block headeer */
+/* Version magic. */
+#define VERSION_MAGIC_V2 0x389fabc4
+
typedef struct __attribute__ ((packed)) {
uint32_t ps_magic;
@@ -152,16 +156,21 @@
*/
typedef struct __attribute__ ((packed)) {
uint32_t qh_magic;
- uint32_t qh_align; // 64-bit-ism: alignment fixer.
+ uint32_t qh_version; //
uint64_t qh_timestamp; // time of last update
char qh_updatehost[128];// Hostname who put this here...
- char qh_cluster[128]; // Cluster name
+ char qh_cluster[120]; // Cluster name; CMAN only
+ // supports 16 chars.
+ uint32_t qh_blksz; // Known block size @ creation
+ uint32_t qh_pad;
} quorum_header_t;
#define swab_quorum_header_t(ptr) \
{\
swab32((ptr)->qh_magic); \
- swab32((ptr)->qh_align); \
+ swab32((ptr)->qh_version); \
+ swab32((ptr)->qh_blksz); \
+ swab32((ptr)->qh_pad); \
swab64((ptr)->qh_timestamp); \
}
@@ -196,31 +205,35 @@
/* Offsets from RHCM 1.2.x */
#define OFFSET_HEADER 0
-#define HEADER_SIZE 4096 /* Page size for now */
+#define HEADER_SIZE(ssz) (ssz<4096?4096:ssz)
-#define OFFSET_FIRST_STATUS_BLOCK (OFFSET_HEADER + HEADER_SIZE)
-#define SPACE_PER_STATUS_BLOCK 4096 /* Page size for now */
+#define OFFSET_FIRST_STATUS_BLOCK(ssz) (OFFSET_HEADER + HEADER_SIZE(ssz))
+#define SPACE_PER_STATUS_BLOCK(ssz) (ssz<4096?4096:ssz)
#define STATUS_BLOCK_COUNT MAX_NODES_DISK
-#define SPACE_PER_MESSAGE_BLOCK (4096)
-#define MESSAGE_BLOCK_COUNT MAX_NODES_DISK
-
-#define END_OF_DISK (OFFSET_FIRST_STATUS_BLOCK + \
+#define END_OF_DISK(ssz) (OFFSET_FIRST_STATUS_BLOCK(ssz) + \
(MAX_NODES_DISK + 1) * \
- SPACE_PER_STATUS_BLOCK) \
+ SPACE_PER_STATUS_BLOCK(ssz)) \
+
+typedef struct {
+ int d_fd;
+ int _pad_;
+ size_t d_blksz;
+ size_t d_pagesz;
+} target_info_t;
/* From disk.c */
-int qdisk_open(char *name);
-int qdisk_close(int *fd);
+int qdisk_open(char *name, target_info_t *disk);
+int qdisk_close(target_info_t *disk);
int qdisk_init(char *name, char *clustername);
int qdisk_validate(char *name);
-int qdisk_read(int fd, __off64_t ofs, void *buf, int len);
-int qdisk_write(int fd, __off64_t ofs, const void *buf, int len);
+int qdisk_read(target_info_t *disk, __off64_t ofs, void *buf, int len);
+int qdisk_write(target_info_t *disk, __off64_t ofs, const void *buf, int len);
-#define qdisk_nodeid_offset(nodeid) \
- (OFFSET_FIRST_STATUS_BLOCK + (SPACE_PER_STATUS_BLOCK * (nodeid - 1)))
+#define qdisk_nodeid_offset(nodeid, ssz) \
+ (OFFSET_FIRST_STATUS_BLOCK(ssz) + (SPACE_PER_STATUS_BLOCK(ssz) * (nodeid - 1)))
/* From disk_utils.c */
#define HISTORY_LENGTH 60
@@ -231,11 +244,12 @@
uint16_t pad0;
} disk_msg_t;
+
typedef struct {
uint64_t qc_incarnation;
struct timeval qc_average;
struct timeval qc_last[HISTORY_LENGTH];
- int qc_fd;
+ target_info_t qc_disk;
int qc_my_id;
int qc_writes;
int qc_interval;
@@ -256,6 +270,7 @@
char *qc_device;
char *qc_label;
char *qc_status_file;
+ char *qc_cman_label;
} qd_ctx;
typedef struct {
@@ -272,14 +287,15 @@
int qd_write_status(qd_ctx *ctx, int nid, disk_node_state_t state,
disk_msg_t *msg, memb_mask_t mask, memb_mask_t master);
-int qd_read_print_status(int fd, int nid);
+int qd_read_print_status(target_info_t *disk, int nid);
int qd_init(qd_ctx *ctx, cman_handle_t ch, int me);
void qd_destroy(qd_ctx *ctx);
/* proc.c */
int find_partitions(const char *partfile, const char *label,
char *devname, size_t devlen, int print);
-int check_device(char *device, char *label, quorum_header_t *qh);
+int check_device(char *device, char *label, int *ssz, quorum_header_t *qh,
+ int flags);
#endif
--- cluster/cman/qdisk/disk_util.c 2007/01/26 14:34:26 1.1.2.3
+++ cluster/cman/qdisk/disk_util.c 2007/12/04 20:40:54 1.1.2.4
@@ -201,8 +201,9 @@
if (get_time(&start, ctx->qc_flags&RF_UPTIME) < 0)
utime_ok = 0;
swab_status_block_t(&ps);
- if (qdisk_write(ctx->qc_fd, qdisk_nodeid_offset(nid), &ps,
- sizeof(ps)) < 0) {
+ if (qdisk_write(&ctx->qc_disk,
+ qdisk_nodeid_offset(nid, ctx->qc_disk.d_blksz),
+ &ps, sizeof(ps)) < 0) {
printf("Error writing node ID block %d\n", nid);
return -1;
}
@@ -223,12 +224,12 @@
int
-qd_print_status(status_block_t *ps)
+qd_print_status(target_info_t *disk, status_block_t *ps)
{
int x;
printf("Data @ offset %d:\n",
- (int)qdisk_nodeid_offset(ps->ps_nodeid));
+ (int)qdisk_nodeid_offset(ps->ps_nodeid, disk->d_blksz));
printf("status_block_t {\n");
printf("\t.ps_magic = %08x;\n", (int)ps->ps_magic);
printf("\t.ps_nodeid = %d;\n", (int)ps->ps_nodeid);
@@ -261,11 +262,11 @@
int
-qd_read_print_status(int fd, int nid)
+qd_read_print_status(target_info_t *disk, int nid)
{
status_block_t ps;
- if (fd < 0) {
+ if (!disk || disk->d_fd < 0) {
errno = EINVAL;
return -1;
}
@@ -275,13 +276,13 @@
return -1;
}
- if (qdisk_read(fd, qdisk_nodeid_offset(nid), &ps,
+ if (qdisk_read(disk, qdisk_nodeid_offset(nid, disk->d_blksz), &ps,
sizeof(ps)) < 0) {
printf("Error reading node ID block %d\n", nid);
return -1;
}
swab_status_block_t(&ps);
- qd_print_status(&ps);
+ qd_print_status(disk, &ps);
return 0;
}
@@ -339,6 +340,5 @@
free(ctx->qc_device);
ctx->qc_device = NULL;
}
- close(ctx->qc_fd);
- ctx->qc_fd = -1;
+ qdisk_close(&ctx->qc_disk);
}
--- cluster/cman/qdisk/main.c 2007/03/20 19:36:14 1.1.2.8
+++ cluster/cman/qdisk/main.c 2007/12/04 20:40:54 1.1.2.9
@@ -147,7 +147,8 @@
sb = &ni[x].ni_status;
- if (qdisk_read(ctx->qc_fd, qdisk_nodeid_offset(x+1),
+ if (qdisk_read(&ctx->qc_disk,
+ qdisk_nodeid_offset(x+1, ctx->qc_disk.d_blksz),
sb, sizeof(*sb)) < 0) {
clulog(LOG_WARNING,"Error reading node ID block %d\n",
x+1);
@@ -452,6 +453,7 @@
quorum_init(qd_ctx *ctx, node_info_t *ni, int max, struct h_data *h, int maxh)
{
int x = 0, score, maxscore, score_req;
+ char buf[64];
clulog(LOG_INFO, "Quorum Daemon Initializing\n");
@@ -462,12 +464,28 @@
if (qdisk_validate(ctx->qc_device) < 0)
return -1;
- ctx->qc_fd = qdisk_open(ctx->qc_device);
- if (ctx->qc_fd < 0) {
+ if (qdisk_open(ctx->qc_device, &ctx->qc_disk) < 0) {
clulog(LOG_CRIT, "Failed to open %s: %s\n", ctx->qc_device,
strerror(errno));
return -1;
}
+
+ if (strlen(ctx->qc_device) > 15 && !(ctx->qc_flags & RF_CMAN_LABEL)) {
+ if (ctx->qc_label && strlen(ctx->qc_label) <= 15) {
+ ctx->qc_cman_label = strdup(ctx->qc_label);
+ } else {
+ snprintf(buf, sizeof(buf), "QDisk[%d]",
+ strlen(ctx->qc_device));
+ ctx->qc_cman_label = strdup(buf);
+ }
+
+ ctx->qc_flags |= RF_CMAN_LABEL;
+ clulog(LOG_DEBUG, "Device too long! Setting CMAN label to: %s\n",
+ ctx->qc_cman_label);
+ }
+
+ clulog(LOG_DEBUG, "I/O Size: %d Page Size: %d\n",
+ ctx->qc_disk.d_blksz, ctx->qc_disk.d_pagesz);
if (h && maxh) {
start_score_thread(ctx, h, maxh);
@@ -1209,14 +1227,30 @@
}
if (ctx->qc_master_wait <= ctx->qc_tko_up)
ctx->qc_master_wait = ctx->qc_tko_up + 1;
-
+
/* Get votes */
+
+ /* check if votes is set in cluster.conf */
snprintf(query, sizeof(query), "/cluster/quorumd/@votes");
if (ccs_get(ccsfd, query, &val) == 0) {
ctx->qc_votes = atoi(val);
free(val);
if (ctx->qc_votes < 0)
ctx->qc_votes = 0;
+ } else { /* if votes is not set, default to node_num - 1 */
+ int nodes = 0, error;
+ for (;;) {
+ error = ccs_get_list(ccsfd, "/cluster/clusternodes/child::*", &val);
+ if (error || !val)
+ break;
+
+ nodes++;
+ }
+ nodes--;
+ if (nodes < 0)
+ nodes = 0;
+
+ ctx->qc_votes = nodes;
}
/* Get device */
@@ -1285,6 +1319,15 @@
ctx->qc_flags &= ~RF_REBOOT;
free(val);
}
+
+ /* Get cman_label */
+ snprintf(query, sizeof(query), "/cluster/quorumd/@cman_label");
+ if (ccs_get(ccsfd, query, &val) == 0) {
+ if (strlen(val) > 0 && strlen(val) <= 15) {
+ ctx->qc_flags |= RF_CMAN_LABEL;
+ ctx->qc_cman_label = val;
+ }
+ }
/*
* Get flag to see if we're supposed to kill cman if qdisk is not
@@ -1347,8 +1390,9 @@
*cfh = configure_heuristics(ccsfd, h, maxh);
clulog(LOG_DEBUG,
- "Quorum Daemon: %d heuristics, %d interval, %d tko, %d votes\n",
- *cfh, ctx->qc_interval, ctx->qc_tko, ctx->qc_votes);
+ "Quorum Daemon: %d heuristics, %d interval, %d tko, %d votes,"
+ " flags=%08x\n",
+ *cfh, ctx->qc_interval, ctx->qc_tko, ctx->qc_votes, ctx->qc_flags);
ccs_disconnect(ccsfd);
@@ -1391,6 +1435,7 @@
char debug = 0, foreground = 0;
char device[128];
pid_t pid;
+ quorum_header_t qh;
if (check_process_running(argv[0], &pid) && pid !=getpid()) {
printf("QDisk services already running\n");
@@ -1493,13 +1538,24 @@
clulog(LOG_INFO, "Quorum Partition: %s Label: %s\n",
ctx.qc_device, ctx.qc_label);
} else if (ctx.qc_device) {
- if (check_device(ctx.qc_device, NULL, NULL) != 0) {
+ if (check_device(ctx.qc_device, NULL, &rv, &qh, 0) != 0) {
clulog(LOG_CRIT,
"Specified partition %s does not have a "
"qdisk label\n", ctx.qc_device);
check_stop_cman(&ctx);
return -1;
}
+
+ if (qh.qh_version == VERSION_MAGIC_V2 &&
+ qh.qh_blksz != rv) {
+ clulog(LOG_CRIT,
+ "Specified device %s does match kernel's "
+ "reported sector size (%d != %d)\n",
+ ctx.qc_device,
+ ctx.qc_disk.d_blksz, rv);
+ check_stop_cman(&ctx);
+ return -1;
+ }
}
if (!foreground && !forked) {
@@ -1518,7 +1574,11 @@
if (!_running)
return 0;
- cman_register_quorum_device(ctx.qc_ch, ctx.qc_device, ctx.qc_votes);
+ cman_register_quorum_device(ctx.qc_ch,
+ (ctx.qc_flags&RF_CMAN_LABEL)?
+ ctx.qc_cman_label:
+ ctx.qc_device,
+ ctx.qc_votes);
/*
XXX this always returns -1 / EBUSY even when it works?!!!
--- cluster/cman/qdisk/mkqdisk.c 2006/11/21 14:50:01 1.1.2.3
+++ cluster/cman/qdisk/mkqdisk.c 2007/12/04 20:40:54 1.1.2.4
@@ -39,7 +39,7 @@
char *newdev = NULL, *newlabel = NULL;
int rv;
- printf("mkqdisk v0.5.1\n");
+ printf("mkqdisk v0.5.2\n");
while ((rv = getopt(argc, argv, "Lf:c:l:h")) != EOF) {
switch (rv) {
--- cluster/cman/qdisk/proc.c 2006/06/23 16:01:02 1.1.2.1
+++ cluster/cman/qdisk/proc.c 2007/12/04 20:40:54 1.1.2.2
@@ -32,27 +32,33 @@
int
-check_device(char *device, char *label, quorum_header_t *qh)
+check_device(char *device, char *label, int *ssz, quorum_header_t *qh,
+ int flags)
{
- int fd = -1, ret = -1;
+ int ret = -1;
quorum_header_t qh_local;
+ target_info_t disk;
if (!qh)
qh = &qh_local;
- fd = qdisk_validate(device);
- if (fd < 0) {
+ ret = qdisk_validate(device);
+ if (ret < 0) {
perror("qdisk_verify");
return -1;
}
- fd = qdisk_open(device);
- if (fd < 0) {
+ ret = qdisk_open(device, &disk);
+ if (ret < 0) {
perror("qdisk_open");
return -1;
}
- if (qdisk_read(fd, OFFSET_HEADER, qh, sizeof(*qh)) == sizeof(*qh)) {
+ if (ssz)
+ *ssz = disk.d_blksz;
+
+ ret = -1;
+ if (qdisk_read(&disk, OFFSET_HEADER, qh, sizeof(*qh)) == sizeof(*qh)) {
swab_quorum_header_t(qh);
if (qh->qh_magic == HEADER_MAGIC_NUMBER) {
if (!label || !strcmp(qh->qh_cluster, label)) {
@@ -61,7 +67,14 @@
}
}
- qdisk_close(&fd);
+ /* only flag now is 'strict device check'; i.e.,
+ "block size recorded must match kernel's reported size" */
+ if (flags && qh->qh_version == VERSION_MAGIC_V2 &&
+ disk.d_blksz != qh->qh_blksz) {
+ ret = -1;
+ }
+
+ qdisk_close(&disk);
return ret;
}
@@ -78,6 +91,7 @@
char device[128];
char realdev[256];
quorum_header_t qh;
+ int ssz;
fp = fopen(partfile, "r");
if (!fp)
@@ -96,16 +110,30 @@
if (strlen(device)) {
snprintf(realdev, sizeof(realdev),
"/dev/%s", device);
- if (check_device(realdev, (char *)label, &qh) != 0)
+
+ /* If we're not "just printing", then
+ then reject devices which don't match
+ the recorded sector size */
+ if (check_device(realdev, (char *)label, &ssz,
+ &qh, !print) != 0)
continue;
if (print) {
printf("%s:\n", realdev);
- printf("\tMagic: %08x\n", qh.qh_magic);
- printf("\tLabel: %s\n", qh.qh_cluster);
- printf("\tCreated: %s",
+ printf("\tMagic: %08x\n", qh.qh_magic);
+ printf("\tLabel: %s\n", qh.qh_cluster);
+ printf("\tCreated: %s",
ctime((time_t *)&qh.qh_timestamp));
- printf("\tHost: %s\n\n", qh.qh_updatehost);
+ printf("\tHost: %s\n", qh.qh_updatehost);
+ printf("\tKernel Sector Size: %d\n", ssz);
+ if (qh.qh_version == VERSION_MAGIC_V2) {
+ printf("\tRecorded Sector Size: %d\n\n", (int)qh.qh_blksz);
+ if (qh.qh_blksz != ssz) {
+ printf("WARNING: Sector size mismatch: Header: %d Kernel: %d\n",
+ (int)qh.qh_blksz, ssz);
+ }
+ } else
+ printf("\n");
}
if (devname && devlen) {
next reply other threads:[~2007-12-04 20:40 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-12-04 20:40 lhh [this message]
-- strict thread matches above, loose matches on Subject: below --
2007-12-04 20:24 [Cluster-devel] cluster/cman/qdisk disk.c disk.h disk_util.c m lhh
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20071204204055.1241.qmail@sourceware.org \
--to=lhh@sourceware.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.