* [PATCH 2/2] multipath-tools/libmultipath: Add args min_avg_latency for path_latency.
2017-07-13 7:51 [PATCH 0/2] multipath-tools/libmultipath: Support for the native NVMe Ioctl command and add " Yang Feng
@ 2017-07-13 7:51 ` Yang Feng
0 siblings, 0 replies; 13+ messages in thread
From: Yang Feng @ 2017-07-13 7:51 UTC (permalink / raw)
To: dm-devel
Cc: zouming.zouming, chengjike.cheng, guanjunxiong, philip.yang,
shenhong09, hege09
Add args min_avg_latency of logarithmic scale, for prioritizers/path_latency.c.
Min average latency is not constant 1us, and is set by user. Certainly, max average
latency value is still 100s. It make support better for more scenes, because it can
deal better with the normal standard deviation of path latency. For example, when the
standard deviation value is 200us and the average latency of the normal paths is 1ms,
args base_num can be set to 5 and args min_avg_latency can be set to 2ms, so the paths
will be grouped in priority groups with path latency <=2ms, (2ms, 10ms], (10ms, 50ms],
etc.
Signed-off-by: Yang Feng <philip.yang@huawei.com>
---
libmultipath/prioritizers/path_latency.c | 61 ++++++++++++++++++++++----------
multipath/multipath.conf.5 | 14 +++++---
2 files changed, 52 insertions(+), 23 deletions(-)
diff --git a/libmultipath/prioritizers/path_latency.c b/libmultipath/prioritizers/path_latency.c
index 21209ff..c8fe318 100644
--- a/libmultipath/prioritizers/path_latency.c
+++ b/libmultipath/prioritizers/path_latency.c
@@ -76,7 +76,7 @@ static int do_readsector0(struct path *pp, unsigned int timeout)
return 1;
}
-int check_args_valid(int io_num, int base_num)
+int check_args_valid(int io_num, int base_num, int min_avg_latency)
{
if ((io_num < MIN_IO_NUM) || (io_num > MAX_IO_NUM))
{
@@ -90,24 +90,33 @@ int check_args_valid(int io_num, int base_num)
return 0;
}
+ if ((min_avg_latency < MIN_AVG_LATENCY) || (min_avg_latency > MAX_AVG_LATENCY))
+ {
+ pp_pl_log(0, "args min_avg_latency is outside the valid range");
+ return 0;
+ }
+
return 1;
}
-/* In multipath.conf, args form: io_num|base_num. For example,
-* args is "20|10", this function can get io_num value 20, and
- base_num value 10.
+/* In multipath.conf, args form: io_num|base_num|min_avg_latency.
+* For example, args is "20|10|1", this function can get io_num
+* value 20, base_num value 10, min_avg_latency value 1 (us).
*/
-static int get_ionum_and_basenum(char *args,
- int *ionum,
- int *basenum)
+static int get_prio_args(char *args,
+ int *ionum,
+ int *basenum,
+ int *minavglatency)
{
char source[MAX_CHAR_SIZE];
char vertica = '|';
- char *endstrbefore = NULL;
- char *endstrafter = NULL;
+ char *endstr1 = NULL;
+ char *endstr2 = NULL;
+ char *endstr3 = NULL;
unsigned int size = strlen(args);
- if ((args == NULL) || (ionum == NULL) || (basenum == NULL))
+ if ((args == NULL) || (ionum == NULL)
+ || (basenum == NULL) || (minavglatency == NULL))
{
pp_pl_log(0, "args string is NULL");
return 0;
@@ -127,21 +136,34 @@ static int get_ionum_and_basenum(char *args,
return 0;
}
- *ionum = (int)strtoul(source, &endstrbefore, 10);
- if (endstrbefore[0] != vertica)
+ *ionum = (int)strtoul(source, &endstr1, 10);
+ if (endstr1[0] != vertica)
+ {
+ pp_pl_log(0, "invalid prio_args format: %s", source);
+ return 0;
+ }
+
+ if (!isdigit(endstr1[1]))
+ {
+ pp_pl_log(0, "invalid prio_args format: %s", source);
+ return 0;
+ }
+
+ *basenum = (long long)strtol(&endstr1[1], &endstr2, 10);
+ if (endstr2[0] != vertica)
{
pp_pl_log(0, "invalid prio_args format: %s", source);
return 0;
}
- if (!isdigit(endstrbefore[1]))
+ if (!isdigit(endstr2[1]))
{
pp_pl_log(0, "invalid prio_args format: %s", source);
return 0;
}
- *basenum = (long long)strtol(&endstrbefore[1], &endstrafter, 10);
- if (check_args_valid(*ionum, *basenum) == 0)
+ *minavglatency = (long long)strtol(&endstr2[1], &endstr3, 10);
+ if (check_args_valid(*ionum, *basenum, *minavglatency) == 0)
{
return 0;
}
@@ -204,6 +226,7 @@ int getprio (struct path *pp, char *args, unsigned int timeout)
int index = 0;
int io_num;
int base_num;
+ int min_avg_latency;
long long avglatency;
long long latency_interval;
long long standard_deviation;
@@ -214,7 +237,7 @@ int getprio (struct path *pp, char *args, unsigned int timeout)
if (pp->fd < 0)
return -1;
- if (get_ionum_and_basenum(args, &io_num, &base_num) == 0)
+ if (get_prio_args(args, &io_num, &base_num, &min_avg_latency) == 0)
{
pp_pl_log(0, "%s: get path_latency args fail", pp->dev);
return DEFAULT_PRIORITY;
@@ -255,13 +278,13 @@ int getprio (struct path *pp, char *args, unsigned int timeout)
set can change latency_interval value corresponding to avglatency and is not constant.
Warn the user if latency_interval is smaller than (2 * standard_deviation), or equal */
standard_deviation = calc_standard_deviation(path_latency, index, avglatency);
- latency_interval = calc_latency_interval(avglatency, MAX_AVG_LATENCY, MIN_AVG_LATENCY, base_num);
- if ((latency_interval!= 0)
+ latency_interval = calc_latency_interval(avglatency, MAX_AVG_LATENCY, min_avg_latency, base_num);
+ if ((latency_interval != 0)
&& (latency_interval <= (2 * standard_deviation)))
pp_pl_log(3, "%s: latency interval (%lld) according to average latency (%lld us) is smaller than "
"2 * standard deviation (%lld us), or equal, args base_num (%d) needs to be set bigger value",
pp->dev, latency_interval, avglatency, standard_deviation, base_num);
- rc = calcPrio(avglatency, MAX_AVG_LATENCY, MIN_AVG_LATENCY, base_num);
+ rc = calcPrio(avglatency, MAX_AVG_LATENCY, min_avg_latency, base_num);
return rc;
}
diff --git a/multipath/multipath.conf.5 b/multipath/multipath.conf.5
index 915cc50..e3fb461 100644
--- a/multipath/multipath.conf.5
+++ b/multipath/multipath.conf.5
@@ -341,7 +341,7 @@ these values can be looked up through sysfs or by running \fImultipathd show pat
.TP 12
.I path_latency
Needs a value of the form
-\fI"<io_num>|<base_num>"\fR
+\fI"<io_num>|<base_num>|<min_avg_latency>"\fR
.RS
.TP 8
.I io_num
@@ -350,9 +350,15 @@ Valid Values: Integer, [2, 200].
.TP
.I base_num
The base number value of logarithmic scale, used to partition different priority ranks. Valid Values: Integer,
-[2, 10]. And Max average latency value is 100s, min average latency value is 1us.
-For example: If base_num=10, the paths will be grouped in priority groups with path latency <=1us, (1us, 10us],
-(10us, 100us], (100us, 1ms], (1ms, 10ms], (10ms, 100ms], (100ms, 1s], (1s, 10s], (10s, 100s], >100s.
+[2, 10]. And Max average latency value is constant 100s.
+For example: If base_num=10 and min_avg_latency=1, the paths will be grouped in priority groups with path latency <=1us,
+(1us, 10us], (10us, 100us], (100us, 1ms], (1ms, 10ms], (10ms, 100ms], (100ms, 1s], (1s, 10s], (10s, 100s], >100s.
+.TP
+.I min_avg_latency
+The min average latency value of logarithmic scale, used to partition different priority ranks. Valid Values:
+Integer, [1, 10000000] (us). And Max average latency value is constant 100s.
+For example: If base_num=10 and min_avg_latency=1000, the paths will be grouped in priority groups with path latency <=1ms,
+(1ms, 10ms], (10ms, 100ms], (100ms, 1s], (1s, 10s], (10s, 100s], >100s.
.RE
.TP 12
.I alua
--
2.6.4.windows.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH 0/2] multipath-tools/libmultipath: Support for the native NVMe Ioctl command and add args min_avg_latency for path_latency.
@ 2017-07-20 3:36 Yang Feng
2017-07-20 3:36 ` [PATCH 1/2] multipath-tools/libmultipath: Support for the native NVMe Ioctl command Yang Feng
2017-07-20 3:36 ` [PATCH 2/2] multipath-tools/libmultipath: Add args min_avg_latency for path_latency Yang Feng
0 siblings, 2 replies; 13+ messages in thread
From: Yang Feng @ 2017-07-20 3:36 UTC (permalink / raw)
To: dm-devel, mwilck, xose.vazquez
Cc: philip.yang, zouming.zouming, hege09, guanjunxiong, shenhong09
1. The SCSI-to-NVMe translations have been removed in the patch "nvme:
Remove SCSI translations" in the linux-nvme, so the native NVMe Ioctl
command should be supported in the multipath-tools.
In the prioritizers/path_latency.c, modify the func do_readsector0():
send a native NVMe Read Ioctl command to the nvme device, and send a SG
Read Ioctl command to the scsi device.
In the tur checker, add support for the native NVMe Keep Alive Ioctl
command to the nvme device.
2. Add args min_avg_latency of logarithmic scale, for prioritizers/
path_latency.c. Min average latency is not constant 1us, and can be set
by user. Certainly, max average latency value is still constant 100s.
It make support better for more scenes, because it can deal better with
the normal standard deviation of path latency.
For example, when the standard deviation value is 200us and the average
latency of the normal paths is 1ms, args base_num can be set to 5 and args
min_avg_latency can be set to 2ms, so the paths will be grouped in priority
groups with path latency <=2ms, (2ms, 10ms], (10ms, 50ms], etc.
Changes from v1:
* Fix according to Xose's reviews, don't rename the tur checker.
Yang Feng (1):
multipath-tools/libmultipath: Support for the native NVMe Ioctl command
and add args min_avg_latency for path_latency.
libmultipath/checkers.c | 7 ++
libmultipath/checkers.h | 4 +
libmultipath/checkers/Makefile | 4 +-
libmultipath/checkers/emc_clariion.c | 4 +-
libmultipath/checkers/libsg.c | 94 ----------------------
libmultipath/checkers/libsg.h | 9 ---
libmultipath/checkers/readsector0.c | 4 +-
libmultipath/checkers/tur.c | 64 ++++++++++-----
libmultipath/discovery.c | 1 +
libmultipath/libnvme.c | 130 +++++++++++++++++++++++++++++++
libmultipath/libnvme.h | 10 +++
libmultipath/libsg.c | 113 +++++++++++++++++++++++++++
libmultipath/libsg.h | 13 ++++
libmultipath/prioritizers/Makefile | 2 +-
libmultipath/prioritizers/path_latency.c | 95 ++++++++++++++--------
multipath/multipath.conf.5 | 16 ++--
16 files changed, 406 insertions(+), 164 deletions(-)
delete mode 100644 libmultipath/checkers/libsg.c
delete mode 100644 libmultipath/checkers/libsg.h
create mode 100644 libmultipath/libnvme.c
create mode 100644 libmultipath/libnvme.h
create mode 100644 libmultipath/libsg.c
create mode 100644 libmultipath/libsg.h
--
2.6.4.windows.1
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH 1/2] multipath-tools/libmultipath: Support for the native NVMe Ioctl command.
2017-07-20 3:36 [PATCH 0/2] multipath-tools/libmultipath: Support for the native NVMe Ioctl command and add args min_avg_latency for path_latency Yang Feng
@ 2017-07-20 3:36 ` Yang Feng
2017-07-20 17:50 ` Martin Wilck
2017-07-20 3:36 ` [PATCH 2/2] multipath-tools/libmultipath: Add args min_avg_latency for path_latency Yang Feng
1 sibling, 1 reply; 13+ messages in thread
From: Yang Feng @ 2017-07-20 3:36 UTC (permalink / raw)
To: dm-devel, mwilck, xose.vazquez
Cc: philip.yang, zouming.zouming, hege09, guanjunxiong, shenhong09
1. The SCSI-to-NVMe translations have been removed in the patch "nvme:
Remove SCSI translations" in the linux-nvme, so the native NVMe Ioctl
command should be supported in the multipath-tools.
2. In the prioritizers/path_latency.c, modify the func do_readsector0():
send a native NVMe Read Ioctl command to the nvme device, and send a SG
Read Ioctl command to the scsi device.
3. In the tur checker, add support for the native NVMe Keep Alive Ioctl
command to the nvme device.
Signed-off-by: Yang Feng <philip.yang@huawei.com>
Reviewed-by: Martin Wilck <mwilck@suse.com>
Reviewed-by: Xose Vazquez Perez <xose.vazquez@gmail.com>
---
libmultipath/checkers.c | 7 ++
libmultipath/checkers.h | 4 +
libmultipath/checkers/Makefile | 4 +-
libmultipath/checkers/emc_clariion.c | 4 +-
libmultipath/checkers/libsg.c | 94 ----------------------
libmultipath/checkers/libsg.h | 9 ---
libmultipath/checkers/readsector0.c | 4 +-
libmultipath/checkers/tur.c | 64 ++++++++++-----
libmultipath/discovery.c | 1 +
libmultipath/libnvme.c | 130 +++++++++++++++++++++++++++++++
libmultipath/libnvme.h | 10 +++
libmultipath/libsg.c | 113 +++++++++++++++++++++++++++
libmultipath/libsg.h | 13 ++++
libmultipath/prioritizers/Makefile | 2 +-
libmultipath/prioritizers/path_latency.c | 34 +++++---
multipath/multipath.conf.5 | 2 +-
16 files changed, 354 insertions(+), 141 deletions(-)
delete mode 100644 libmultipath/checkers/libsg.c
delete mode 100644 libmultipath/checkers/libsg.h
create mode 100644 libmultipath/libnvme.c
create mode 100644 libmultipath/libnvme.h
create mode 100644 libmultipath/libsg.c
create mode 100644 libmultipath/libsg.h
diff --git a/libmultipath/checkers.c b/libmultipath/checkers.c
index 05e024f..00fbd6e 100644
--- a/libmultipath/checkers.c
+++ b/libmultipath/checkers.c
@@ -162,6 +162,13 @@ void checker_set_fd (struct checker * c, int fd)
c->fd = fd;
}
+void checker_set_dev(struct checker *c, char *dev)
+{
+ if (!c)
+ return;
+ strncpy(c->dev, dev, strlen(dev)+1);
+}
+
void checker_set_sync (struct checker * c)
{
if (!c)
diff --git a/libmultipath/checkers.h b/libmultipath/checkers.h
index 1d225de..f924624 100644
--- a/libmultipath/checkers.h
+++ b/libmultipath/checkers.h
@@ -97,6 +97,8 @@ enum path_check_state {
#define CHECKER_DEV_LEN 256
#define LIB_CHECKER_NAMELEN 256
+#define FILE_NAME_SIZE 256
+
struct checker {
struct list_head node;
void *handle;
@@ -107,6 +109,7 @@ struct checker {
int disable;
char name[CHECKER_NAME_LEN];
char message[CHECKER_MSG_LEN]; /* comm with callers */
+ char dev[FILE_NAME_SIZE];
void * context; /* store for persistent data */
void ** mpcontext; /* store for persistent data shared
multipath-wide. Use MALLOC if
@@ -132,6 +135,7 @@ void checker_reset (struct checker *);
void checker_set_sync (struct checker *);
void checker_set_async (struct checker *);
void checker_set_fd (struct checker *, int);
+void checker_set_dev(struct checker *c, char *dev);
void checker_enable (struct checker *);
void checker_disable (struct checker *);
void checker_repair (struct checker *);
diff --git a/libmultipath/checkers/Makefile b/libmultipath/checkers/Makefile
index bce6b8b..a9be6b6 100644
--- a/libmultipath/checkers/Makefile
+++ b/libmultipath/checkers/Makefile
@@ -24,10 +24,10 @@ all: $(LIBS)
libcheckrbd.so: rbd.o
$(CC) $(LDFLAGS) $(SHARED_FLAGS) -o $@ $^ -lrados -ludev
-libcheckdirectio.so: libsg.o directio.o
+libcheckdirectio.so: ../libsg.o ../libnvme.o directio.o
$(CC) $(LDFLAGS) $(SHARED_FLAGS) -o $@ $^ -laio
-libcheck%.so: libsg.o %.o
+libcheck%.so: ../libsg.o ../libnvme.o %.o
$(CC) $(LDFLAGS) $(SHARED_FLAGS) -o $@ $^
install:
diff --git a/libmultipath/checkers/emc_clariion.c b/libmultipath/checkers/emc_clariion.c
index 9c1ffed..12c1e3e 100644
--- a/libmultipath/checkers/emc_clariion.c
+++ b/libmultipath/checkers/emc_clariion.c
@@ -12,7 +12,7 @@
#include <errno.h>
#include "../libmultipath/sg_include.h"
-#include "libsg.h"
+#include "../libmultipath/libsg.h"
#include "checkers.h"
#include "debug.h"
#include "memory.h"
@@ -21,6 +21,8 @@
#define INQUIRY_CMDLEN 6
#define HEAVY_CHECK_COUNT 10
+#define SENSE_BUFF_LEN 32
+
/*
* Mechanism to track CLARiiON inactive snapshot LUs.
* This is done so that we can fail passive paths
diff --git a/libmultipath/checkers/libsg.c b/libmultipath/checkers/libsg.c
deleted file mode 100644
index 958ea92..0000000
--- a/libmultipath/checkers/libsg.c
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Copyright (c) 2004, 2005 Christophe Varoqui
- */
-#include <string.h>
-#include <sys/ioctl.h>
-#include <errno.h>
-#include <sys/stat.h>
-
-#include "checkers.h"
-#include "libsg.h"
-#include "../libmultipath/sg_include.h"
-
-int
-sg_read (int sg_fd, unsigned char * buff, int buff_len,
- unsigned char * sense, int sense_len, unsigned int timeout)
-{
- /* defaults */
- int blocks;
- long long start_block = 0;
- int bs = 512;
- int cdbsz = 10;
-
- unsigned char rdCmd[cdbsz];
- unsigned char *sbb = sense;
- struct sg_io_hdr io_hdr;
- int res;
- int rd_opcode[] = {0x8, 0x28, 0xa8, 0x88};
- int sz_ind;
- struct stat filestatus;
- int retry_count = 3;
-
- if (fstat(sg_fd, &filestatus) != 0)
- return PATH_DOWN;
- bs = (filestatus.st_blksize > 4096)? 4096: filestatus.st_blksize;
- blocks = buff_len / bs;
- memset(rdCmd, 0, cdbsz);
- sz_ind = 1;
- rdCmd[0] = rd_opcode[sz_ind];
- rdCmd[2] = (unsigned char)((start_block >> 24) & 0xff);
- rdCmd[3] = (unsigned char)((start_block >> 16) & 0xff);
- rdCmd[4] = (unsigned char)((start_block >> 8) & 0xff);
- rdCmd[5] = (unsigned char)(start_block & 0xff);
- rdCmd[7] = (unsigned char)((blocks >> 8) & 0xff);
- rdCmd[8] = (unsigned char)(blocks & 0xff);
-
- memset(&io_hdr, 0, sizeof(struct sg_io_hdr));
- io_hdr.interface_id = 'S';
- io_hdr.cmd_len = cdbsz;
- io_hdr.cmdp = rdCmd;
- io_hdr.dxfer_direction = SG_DXFER_FROM_DEV;
- io_hdr.dxfer_len = bs * blocks;
- io_hdr.dxferp = buff;
- io_hdr.mx_sb_len = sense_len;
- io_hdr.sbp = sense;
- io_hdr.timeout = timeout * 1000;
- io_hdr.pack_id = (int)start_block;
-
-retry:
- memset(sense, 0, sense_len);
- while (((res = ioctl(sg_fd, SG_IO, &io_hdr)) < 0) && (EINTR == errno));
-
- if (res < 0) {
- if (ENOMEM == errno) {
- return PATH_UP;
- }
- return PATH_DOWN;
- }
-
- if ((0 == io_hdr.status) &&
- (0 == io_hdr.host_status) &&
- (0 == io_hdr.driver_status)) {
- return PATH_UP;
- } else {
- int key = 0;
-
- if (io_hdr.sb_len_wr > 3) {
- if (sbb[0] == 0x72 || sbb[0] == 0x73)
- key = sbb[1] & 0x0f;
- else if (io_hdr.sb_len_wr > 13 &&
- ((sbb[0] & 0x7f) == 0x70 ||
- (sbb[0] & 0x7f) == 0x71))
- key = sbb[2] & 0x0f;
- }
-
- /*
- * Retry if UNIT_ATTENTION check condition.
- */
- if (key == 0x6) {
- if (--retry_count)
- goto retry;
- }
- return PATH_DOWN;
- }
-}
diff --git a/libmultipath/checkers/libsg.h b/libmultipath/checkers/libsg.h
deleted file mode 100644
index 3994f45..0000000
--- a/libmultipath/checkers/libsg.h
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef _LIBSG_H
-#define _LIBSG_H
-
-#define SENSE_BUFF_LEN 32
-
-int sg_read (int sg_fd, unsigned char * buff, int buff_len,
- unsigned char * sense, int sense_len, unsigned int timeout);
-
-#endif /* _LIBSG_H */
diff --git a/libmultipath/checkers/readsector0.c b/libmultipath/checkers/readsector0.c
index 8fccb46..e485810 100644
--- a/libmultipath/checkers/readsector0.c
+++ b/libmultipath/checkers/readsector0.c
@@ -4,11 +4,13 @@
#include <stdio.h>
#include "checkers.h"
-#include "libsg.h"
+#include "../libmultipath/libsg.h"
#define MSG_READSECTOR0_UP "readsector0 checker reports path is up"
#define MSG_READSECTOR0_DOWN "readsector0 checker reports path is down"
+#define SENSE_BUFF_LEN 32
+
struct readsector0_checker_context {
void * dummy;
};
diff --git a/libmultipath/checkers/tur.c b/libmultipath/checkers/tur.c
index b4a5cb2..a0382fa 100644
--- a/libmultipath/checkers/tur.c
+++ b/libmultipath/checkers/tur.c
@@ -1,5 +1,8 @@
/*
- * Some code borrowed from sg-utils.
+ * Some code borrowed from sg-utils and
+ * NVM-Express command line utility,
+ * including using of a TUR command and
+ * a Keep Alive command.
*
* Copyright (c) 2004 Christophe Varoqui
*/
@@ -22,10 +25,10 @@
#include "../libmultipath/sg_include.h"
#include "../libmultipath/util.h"
#include "../libmultipath/time-util.h"
-#include "../libmultipath/util.h"
+#include "../libmultipath/libsg.h"
+#include "../libmultipath/libnvme.h"
-#define TUR_CMD_LEN 6
-#define HEAVY_CHECK_COUNT 10
+#define SENSE_BUFF_LEN 32
#define MSG_TUR_UP "tur checker reports path is up"
#define MSG_TUR_DOWN "tur checker reports path is down"
@@ -39,6 +42,7 @@ struct tur_checker_context {
int state;
int running;
int fd;
+ char dev[FILE_NAME_SIZE];
unsigned int timeout;
time_t time;
pthread_t thread;
@@ -75,6 +79,7 @@ int libcheck_init (struct checker * c)
ct->state = PATH_UNCHECKED;
ct->fd = -1;
ct->holders = 1;
+ memset(ct->dev, 0, sizeof(ct->dev));
pthread_cond_init_mono(&ct->active);
pthread_mutexattr_init(&attr);
pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE);
@@ -133,22 +138,12 @@ tur_check(int fd, unsigned int timeout,
void (*copy_message)(void *, const char *), void *cb_arg)
{
struct sg_io_hdr io_hdr;
- unsigned char turCmdBlk[TUR_CMD_LEN] = { 0x00, 0, 0, 0, 0, 0 };
- unsigned char sense_buffer[32];
+ unsigned char sense_buffer[SENSE_BUFF_LEN];
int retry_tur = 5;
retry:
- memset(&io_hdr, 0, sizeof (struct sg_io_hdr));
- memset(&sense_buffer, 0, 32);
- io_hdr.interface_id = 'S';
- io_hdr.cmd_len = sizeof (turCmdBlk);
- io_hdr.mx_sb_len = sizeof (sense_buffer);
- io_hdr.dxfer_direction = SG_DXFER_NONE;
- io_hdr.cmdp = turCmdBlk;
- io_hdr.sbp = sense_buffer;
- io_hdr.timeout = timeout * 1000;
- io_hdr.pack_id = 0;
- if (ioctl(fd, SG_IO, &io_hdr) < 0) {
+ if (sg_tur(fd, &io_hdr, sense_buffer,
+ sizeof(sense_buffer), timeout) < 0) {
TUR_MSG(MSG_TUR_DOWN);
return PATH_DOWN;
}
@@ -213,6 +208,35 @@ retry:
return PATH_UP;
}
+static int
+keep_alive_check(int fd, unsigned int timeout,
+ void (*copy_message)(void *, const char *), void *cb_arg)
+{
+ int err;
+
+ err = nvme_keep_alive(fd, timeout);
+ if (err == 0) {
+ TUR_MSG(MSG_TUR_UP);
+ return PATH_UP;
+ }
+
+ TUR_MSG(MSG_TUR_DOWN);
+ return PATH_DOWN;
+}
+
+static int
+ping_check(int fd, char *dev, unsigned int timeout,
+ void (*copy_message)(void *, const char *), void *cb_arg)
+{
+ if (!strncmp(dev, "nvme", 4))
+ {
+ return keep_alive_check(fd, timeout, copy_message, cb_arg);
+ }
+ else
+ {
+ return tur_check(fd, timeout, copy_message, cb_arg);
+ }
+}
#define tur_thread_cleanup_push(ct) pthread_cleanup_push(cleanup_func, ct)
#define tur_thread_cleanup_pop(ct) pthread_cleanup_pop(1)
@@ -266,8 +290,7 @@ static void *tur_thread(void *ctx)
ct->state = PATH_PENDING;
ct->message[0] = '\0';
pthread_mutex_unlock(&ct->lock);
-
- state = tur_check(ct->fd, ct->timeout, copy_msg_to_tcc, ct->message);
+ state = ping_check(ct->fd, ct->dev, ct->timeout, copy_msg_to_tcc, ct->message);
pthread_testcancel();
/* TUR checker done */
@@ -390,6 +413,7 @@ int libcheck_check(struct checker * c)
/* Start new TUR checker */
ct->state = PATH_UNCHECKED;
ct->fd = c->fd;
+ strncpy(ct->dev, c->dev, strlen(c->dev)+1);
ct->timeout = c->timeout;
pthread_spin_lock(&ct->hldr_lock);
ct->holders++;
@@ -406,7 +430,7 @@ int libcheck_check(struct checker * c)
ct->thread = 0;
condlog(3, "%s: failed to start tur thread, using"
" sync mode", tur_devt(devt, sizeof(devt), ct));
- return tur_check(c->fd, c->timeout,
+ return ping_check(c->fd, c->dev, c->timeout,
copy_msg_to_checker, c);
}
tur_timeout(&tsp);
diff --git a/libmultipath/discovery.c b/libmultipath/discovery.c
index 663c8ea..bae5d24 100644
--- a/libmultipath/discovery.c
+++ b/libmultipath/discovery.c
@@ -1539,6 +1539,7 @@ get_state (struct path * pp, struct config *conf, int daemon)
return PATH_UNCHECKED;
}
checker_set_fd(c, pp->fd);
+ checker_set_dev(c, pp->dev);
if (checker_init(c, pp->mpp?&pp->mpp->mpcontext:NULL)) {
memset(c, 0x0, sizeof(struct checker));
condlog(3, "%s: checker init failed", pp->dev);
diff --git a/libmultipath/libnvme.c b/libmultipath/libnvme.c
new file mode 100644
index 0000000..97c9125
--- /dev/null
+++ b/libmultipath/libnvme.c
@@ -0,0 +1,130 @@
+/*
+ * (C) Copyright HUAWEI Technology Corp. 2017, All Rights Reserved.
+ *
+ * libnvme.c
+ *
+ * Some code borrowed from NVM-Express command line utility.
+ *
+ * Author(s): Yang Feng <philip.yang@huawei.com>
+ *
+ * This file is released under the GPL version 2, or any later version.
+ *
+ */
+#include <linux/types.h>
+#include <sys/ioctl.h>
+#include <stdint.h>
+
+struct nvme_user_io {
+ __u8 opcode;
+ __u8 flags;
+ __u16 control;
+ __u16 nblocks;
+ __u16 rsvd;
+ __u64 metadata;
+ __u64 addr;
+ __u64 slba;
+ __u32 dsmgmt;
+ __u32 reftag;
+ __u16 apptag;
+ __u16 appmask;
+};
+
+struct nvme_admin_cmd {
+ __u8 opcode;
+ __u8 flags;
+ __u16 rsvd1;
+ __u32 nsid;
+ __u32 cdw2;
+ __u32 cdw3;
+ __u64 metadata;
+ __u64 addr;
+ __u32 metadata_len;
+ __u32 data_len;
+ __u32 cdw10;
+ __u32 cdw11;
+ __u32 cdw12;
+ __u32 cdw13;
+ __u32 cdw14;
+ __u32 cdw15;
+ __u32 timeout_ms;
+ __u32 result;
+};
+
+#define NVME_IOCTL_ADMIN_CMD _IOWR('N', 0x41, struct nvme_admin_cmd)
+#define NVME_IOCTL_SUBMIT_IO _IOW('N', 0x42, struct nvme_user_io)
+
+static int nvme_io(int fd, __u8 opcode, __u64 slba, __u16 nblocks, __u16 control,
+ __u32 dsmgmt, __u32 reftag, __u16 apptag, __u16 appmask, void *data,
+ void *metadata)
+{
+ struct nvme_user_io io = {
+ .opcode = opcode,
+ .flags = 0,
+ .control = control,
+ .nblocks = nblocks,
+ .rsvd = 0,
+ .metadata = (__u64)(uintptr_t) metadata,
+ .addr = (__u64)(uintptr_t) data,
+ .slba = slba,
+ .dsmgmt = dsmgmt,
+ .reftag = reftag,
+ .appmask = apptag,
+ .apptag = appmask,
+ };
+
+ return ioctl(fd, NVME_IOCTL_SUBMIT_IO, &io);
+}
+
+int nvme_read(int fd, __u64 slba, __u16 nblocks, __u16 control, __u32 dsmgmt,
+ __u32 reftag, __u16 apptag, __u16 appmask, void *data, void *metadata)
+{
+ return nvme_io(fd, 0x2, slba, nblocks, control, dsmgmt,
+ reftag, apptag, appmask, data, metadata);
+}
+
+static int nvme_submit_passthru(int fd, int ioctl_cmd, struct nvme_admin_cmd *cmd)
+{
+ return ioctl(fd, ioctl_cmd, cmd);
+}
+
+int nvme_passthru(int fd, int ioctl_cmd, __u8 opcode, __u8 flags, __u16 rsvd,
+ __u32 nsid, __u32 cdw2, __u32 cdw3, __u32 cdw10, __u32 cdw11,
+ __u32 cdw12, __u32 cdw13, __u32 cdw14, __u32 cdw15,
+ __u32 data_len, void *data, __u32 metadata_len,
+ void *metadata, __u32 timeout_ms, __u32 *result)
+{
+ struct nvme_admin_cmd cmd = {
+ .opcode = opcode,
+ .flags = flags,
+ .rsvd1 = rsvd,
+ .nsid = nsid,
+ .cdw2 = cdw2,
+ .cdw3 = cdw3,
+ .metadata = (__u64)(uintptr_t) metadata,
+ .addr = (__u64)(uintptr_t) data,
+ .metadata_len = metadata_len,
+ .data_len = data_len,
+ .cdw10 = cdw10,
+ .cdw11 = cdw11,
+ .cdw12 = cdw12,
+ .cdw13 = cdw13,
+ .cdw14 = cdw14,
+ .cdw15 = cdw15,
+ .timeout_ms = timeout_ms,
+ .result = 0,
+ };
+ int err;
+
+ err = nvme_submit_passthru(fd, ioctl_cmd, &cmd);
+ if (!err && result)
+ *result = cmd.result;
+ return err;
+}
+
+int nvme_keep_alive(int fd, __u32 timeout_ms)
+{
+ __u32 result;
+
+ return nvme_passthru(fd, NVME_IOCTL_ADMIN_CMD, 0x18, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0,0 , timeout_ms, &result);
+}
diff --git a/libmultipath/libnvme.h b/libmultipath/libnvme.h
new file mode 100644
index 0000000..a2b5460
--- /dev/null
+++ b/libmultipath/libnvme.h
@@ -0,0 +1,10 @@
+#ifndef _LIBNVME_H
+#define _LIBNVME_H
+
+#include <linux/types.h>
+
+int nvme_read(int fd, __u64 slba, __u16 nblocks, __u16 control, __u32 dsmgmt,
+ __u32 reftag, __u16 apptag, __u16 appmask, void *data, void *metadata);
+int nvme_keep_alive(int fd, __u32 timeout_ms);
+
+#endif /* _LIBNVME_H */
diff --git a/libmultipath/libsg.c b/libmultipath/libsg.c
new file mode 100644
index 0000000..900103e
--- /dev/null
+++ b/libmultipath/libsg.c
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2004, 2005 Christophe Varoqui
+ */
+#include <string.h>
+#include <sys/ioctl.h>
+#include <errno.h>
+#include <sys/stat.h>
+
+#include "checkers.h"
+#include "libsg.h"
+
+int
+sg_read (int sg_fd, unsigned char * buff, int buff_len,
+ unsigned char * sense, int sense_len, unsigned int timeout)
+{
+ /* defaults */
+ int blocks;
+ long long start_block = 0;
+ int bs = 512;
+ int cdbsz = 10;
+
+ unsigned char rdCmd[cdbsz];
+ unsigned char *sbb = sense;
+ struct sg_io_hdr io_hdr;
+ int res;
+ int rd_opcode[] = {0x8, 0x28, 0xa8, 0x88};
+ int sz_ind;
+ struct stat filestatus;
+ int retry_count = 3;
+
+ if (fstat(sg_fd, &filestatus) != 0)
+ return PATH_DOWN;
+ bs = (filestatus.st_blksize > 4096)? 4096: filestatus.st_blksize;
+ blocks = buff_len / bs;
+ memset(rdCmd, 0, cdbsz);
+ sz_ind = 1;
+ rdCmd[0] = rd_opcode[sz_ind];
+ rdCmd[2] = (unsigned char)((start_block >> 24) & 0xff);
+ rdCmd[3] = (unsigned char)((start_block >> 16) & 0xff);
+ rdCmd[4] = (unsigned char)((start_block >> 8) & 0xff);
+ rdCmd[5] = (unsigned char)(start_block & 0xff);
+ rdCmd[7] = (unsigned char)((blocks >> 8) & 0xff);
+ rdCmd[8] = (unsigned char)(blocks & 0xff);
+
+ memset(&io_hdr, 0, sizeof(struct sg_io_hdr));
+ io_hdr.interface_id = 'S';
+ io_hdr.cmd_len = cdbsz;
+ io_hdr.cmdp = rdCmd;
+ io_hdr.dxfer_direction = SG_DXFER_FROM_DEV;
+ io_hdr.dxfer_len = bs * blocks;
+ io_hdr.dxferp = buff;
+ io_hdr.mx_sb_len = sense_len;
+ io_hdr.sbp = sense;
+ io_hdr.timeout = timeout * 1000;
+ io_hdr.pack_id = (int)start_block;
+
+retry:
+ memset(sense, 0, sense_len);
+ while (((res = ioctl(sg_fd, SG_IO, &io_hdr)) < 0) && (EINTR == errno));
+
+ if (res < 0) {
+ if (ENOMEM == errno) {
+ return PATH_UP;
+ }
+ return PATH_DOWN;
+ }
+
+ if ((0 == io_hdr.status) &&
+ (0 == io_hdr.host_status) &&
+ (0 == io_hdr.driver_status)) {
+ return PATH_UP;
+ } else {
+ int key = 0;
+
+ if (io_hdr.sb_len_wr > 3) {
+ if (sbb[0] == 0x72 || sbb[0] == 0x73)
+ key = sbb[1] & 0x0f;
+ else if (io_hdr.sb_len_wr > 13 &&
+ ((sbb[0] & 0x7f) == 0x70 ||
+ (sbb[0] & 0x7f) == 0x71))
+ key = sbb[2] & 0x0f;
+ }
+
+ /*
+ * Retry if UNIT_ATTENTION check condition.
+ */
+ if (key == 0x6) {
+ if (--retry_count)
+ goto retry;
+ }
+ return PATH_DOWN;
+ }
+}
+
+int
+sg_tur(int fd, struct sg_io_hdr *io_hdr, unsigned char *sense,
+ int sense_len, unsigned int timeout)
+{
+ unsigned char turCmdBlk[TUR_CMD_LEN] = { 0x00, 0, 0, 0, 0, 0 };
+
+ memset(io_hdr, 0, sizeof(struct sg_io_hdr));
+ memset(sense, 0, sense_len);
+ io_hdr->interface_id = 'S';
+ io_hdr->cmd_len = sizeof(turCmdBlk);
+ io_hdr->mx_sb_len = sense_len;
+ io_hdr->dxfer_direction = SG_DXFER_NONE;
+ io_hdr->cmdp = turCmdBlk;
+ io_hdr->sbp = sense;
+ io_hdr->timeout = timeout * 1000;
+ io_hdr->pack_id = 0;
+
+ return ioctl(fd, SG_IO, io_hdr);
+}
diff --git a/libmultipath/libsg.h b/libmultipath/libsg.h
new file mode 100644
index 0000000..70049a2
--- /dev/null
+++ b/libmultipath/libsg.h
@@ -0,0 +1,13 @@
+#ifndef _LIBSG_H
+#define _LIBSG_H
+
+#include "sg_include.h"
+
+#define TUR_CMD_LEN 6
+
+int sg_read (int sg_fd, unsigned char * buff, int buff_len,
+ unsigned char * sense, int sense_len, unsigned int timeout);
+int sg_tur(int fd, struct sg_io_hdr *io_hdr, unsigned char *sense,
+ int sense_len, unsigned int timeout);
+
+#endif /* _LIBSG_H */
diff --git a/libmultipath/prioritizers/Makefile b/libmultipath/prioritizers/Makefile
index 0c71e63..0c5c69b 100644
--- a/libmultipath/prioritizers/Makefile
+++ b/libmultipath/prioritizers/Makefile
@@ -26,7 +26,7 @@ all: $(LIBS)
libprioalua.so: alua.o alua_rtpg.o
$(CC) $(LDFLAGS) $(SHARED_FLAGS) -o $@ $^
-libpriopath_latency.so: path_latency.o ../checkers/libsg.o
+libpriopath_latency.so: path_latency.o ../libsg.o ../libnvme.o
$(CC) $(LDFLAGS) $(SHARED_FLAGS) -o $@ $^ -lm
libprio%.so: %.o
diff --git a/libmultipath/prioritizers/path_latency.c b/libmultipath/prioritizers/path_latency.c
index 34b734b..21209ff 100644
--- a/libmultipath/prioritizers/path_latency.c
+++ b/libmultipath/prioritizers/path_latency.c
@@ -23,11 +23,11 @@
#include <math.h>
#include <ctype.h>
#include <time.h>
-
#include "debug.h"
#include "prio.h"
#include "structs.h"
-#include "../checkers/libsg.h"
+#include "libsg.h"
+#include "libnvme.h"
#define pp_pl_log(prio, fmt, args...) condlog(prio, "path_latency prio: " fmt, ##args)
@@ -44,6 +44,8 @@
#define MAX_CHAR_SIZE 30
+#define SENSE_BUFF_LEN 32
+
#define USEC_PER_SEC 1000000LL
#define NSEC_PER_USEC 1000LL
@@ -51,19 +53,27 @@ static long long path_latency[MAX_IO_NUM];
static inline long long timeval_to_us(const struct timespec *tv)
{
- return ((long long) tv->tv_sec * USEC_PER_SEC) + (tv->tv_nsec / NSEC_PER_USEC);
+ return ((long long) tv->tv_sec * USEC_PER_SEC) + (tv->tv_nsec / NSEC_PER_USEC);
}
-static int do_readsector0(int fd, unsigned int timeout)
+static int do_readsector0(struct path *pp, unsigned int timeout)
{
- unsigned char buf[4096];
- unsigned char sbuf[SENSE_BUFF_LEN];
- int ret;
+ unsigned char buf[4096];
+ unsigned char mbuf[512];
+ unsigned char sbuf[SENSE_BUFF_LEN];
- ret = sg_read(fd, &buf[0], 4096, &sbuf[0],
- SENSE_BUFF_LEN, timeout);
+ if (!strncmp(pp->dev, "nvme", 4))
+ {
+ if (nvme_read(pp->fd, 0, 1, 0, 0, 0, 0, 0, buf, mbuf) != 0)
+ return 0;
+ }
+ else
+ {
+ if (sg_read(pp->fd, &buf[0], 4096, &sbuf[0], SENSE_BUFF_LEN, timeout) == 2)
+ return 0;
+ }
- return ret;
+ return 1;
}
int check_args_valid(int io_num, int base_num)
@@ -218,9 +228,9 @@ int getprio (struct path *pp, char *args, unsigned int timeout)
(void)clock_gettime(CLOCK_MONOTONIC, &tv);
before = timeval_to_us(&tv);
- if (do_readsector0(pp->fd, timeout) == 2)
+ if (do_readsector0(pp, timeout) == 0)
{
- pp_pl_log(0, "%s: path down", pp->dev);
+ pp_pl_log(0, "%s: send read sector0 command fail", pp->dev);
return -1;
}
diff --git a/multipath/multipath.conf.5 b/multipath/multipath.conf.5
index 0049cba..a8d4797 100644
--- a/multipath/multipath.conf.5
+++ b/multipath/multipath.conf.5
@@ -419,7 +419,7 @@ are:
deprecated, please use \fItur\fR instead.
.TP
.I tur
-Issue a \fITEST UNIT READY\fR command to the device.
+Issue a \fITEST UNIT READY\fR command or a \fIKEEP ALIVE\fR command to the device.
.TP
.I emc_clariion
(Hardware-dependent)
--
2.6.4.windows.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH 2/2] multipath-tools/libmultipath: Add args min_avg_latency for path_latency.
2017-07-20 3:36 [PATCH 0/2] multipath-tools/libmultipath: Support for the native NVMe Ioctl command and add args min_avg_latency for path_latency Yang Feng
2017-07-20 3:36 ` [PATCH 1/2] multipath-tools/libmultipath: Support for the native NVMe Ioctl command Yang Feng
@ 2017-07-20 3:36 ` Yang Feng
2017-07-20 18:07 ` Martin Wilck
1 sibling, 1 reply; 13+ messages in thread
From: Yang Feng @ 2017-07-20 3:36 UTC (permalink / raw)
To: dm-devel, mwilck, xose.vazquez
Cc: philip.yang, zouming.zouming, hege09, guanjunxiong, shenhong09
Add args min_avg_latency of logarithmic scale, for prioritizers/path_latency.c.
Min average latency is not constant 1us, and is set by user. Certainly, max average
latency value is still 100s. It make support better for more scenes, because it can
deal better with the normal standard deviation of path latency. For example, when the
standard deviation value is 200us and the average latency of the normal paths is 1ms,
args base_num can be set to 5 and args min_avg_latency can be set to 2ms, so the paths
will be grouped in priority groups with path latency <=2ms, (2ms, 10ms], (10ms, 50ms],
etc.
Signed-off-by: Yang Feng <philip.yang@huawei.com>
Reviewed-by: Martin Wilck <mwilck@suse.com>
Reviewed-by: Xose Vazquez Perez <xose.vazquez@gmail.com>
---
libmultipath/prioritizers/path_latency.c | 61 ++++++++++++++++++++++----------
multipath/multipath.conf.5 | 14 +++++---
2 files changed, 52 insertions(+), 23 deletions(-)
diff --git a/libmultipath/prioritizers/path_latency.c b/libmultipath/prioritizers/path_latency.c
index 34b734b..a71faff 100644
--- a/libmultipath/prioritizers/path_latency.c
+++ b/libmultipath/prioritizers/path_latency.c
@@ -66,7 +66,7 @@ static int do_readsector0(int fd, unsigned int timeout)
return ret;
}
-int check_args_valid(int io_num, int base_num)
+int check_args_valid(int io_num, int base_num, int min_avg_latency)
{
if ((io_num < MIN_IO_NUM) || (io_num > MAX_IO_NUM))
{
@@ -80,24 +80,33 @@ int check_args_valid(int io_num, int base_num)
return 0;
}
+ if ((min_avg_latency < MIN_AVG_LATENCY) || (min_avg_latency > MAX_AVG_LATENCY))
+ {
+ pp_pl_log(0, "args min_avg_latency is outside the valid range");
+ return 0;
+ }
+
return 1;
}
-/* In multipath.conf, args form: io_num|base_num. For example,
-* args is "20|10", this function can get io_num value 20, and
- base_num value 10.
+/* In multipath.conf, args form: io_num|base_num|min_avg_latency.
+* For example, args is "20|10|1", this function can get io_num
+* value 20, base_num value 10, min_avg_latency value 1 (us).
*/
-static int get_ionum_and_basenum(char *args,
- int *ionum,
- int *basenum)
+static int get_prio_args(char *args,
+ int *ionum,
+ int *basenum,
+ int *minavglatency)
{
char source[MAX_CHAR_SIZE];
char vertica = '|';
- char *endstrbefore = NULL;
- char *endstrafter = NULL;
+ char *endstr1 = NULL;
+ char *endstr2 = NULL;
+ char *endstr3 = NULL;
unsigned int size = strlen(args);
- if ((args == NULL) || (ionum == NULL) || (basenum == NULL))
+ if ((args == NULL) || (ionum == NULL)
+ || (basenum == NULL) || (minavglatency == NULL))
{
pp_pl_log(0, "args string is NULL");
return 0;
@@ -117,21 +126,34 @@ static int get_ionum_and_basenum(char *args,
return 0;
}
- *ionum = (int)strtoul(source, &endstrbefore, 10);
- if (endstrbefore[0] != vertica)
+ *ionum = (int)strtoul(source, &endstr1, 10);
+ if (endstr1[0] != vertica)
+ {
+ pp_pl_log(0, "invalid prio_args format: %s", source);
+ return 0;
+ }
+
+ if (!isdigit(endstr1[1]))
+ {
+ pp_pl_log(0, "invalid prio_args format: %s", source);
+ return 0;
+ }
+
+ *basenum = (long long)strtol(&endstr1[1], &endstr2, 10);
+ if (endstr2[0] != vertica)
{
pp_pl_log(0, "invalid prio_args format: %s", source);
return 0;
}
- if (!isdigit(endstrbefore[1]))
+ if (!isdigit(endstr2[1]))
{
pp_pl_log(0, "invalid prio_args format: %s", source);
return 0;
}
- *basenum = (long long)strtol(&endstrbefore[1], &endstrafter, 10);
- if (check_args_valid(*ionum, *basenum) == 0)
+ *minavglatency = (long long)strtol(&endstr2[1], &endstr3, 10);
+ if (check_args_valid(*ionum, *basenum, *minavglatency) == 0)
{
return 0;
}
@@ -194,6 +216,7 @@ int getprio (struct path *pp, char *args, unsigned int timeout)
int index = 0;
int io_num;
int base_num;
+ int min_avg_latency;
long long avglatency;
long long latency_interval;
long long standard_deviation;
@@ -204,7 +227,7 @@ int getprio (struct path *pp, char *args, unsigned int timeout)
if (pp->fd < 0)
return -1;
- if (get_ionum_and_basenum(args, &io_num, &base_num) == 0)
+ if (get_prio_args(args, &io_num, &base_num, &min_avg_latency) == 0)
{
pp_pl_log(0, "%s: get path_latency args fail", pp->dev);
return DEFAULT_PRIORITY;
@@ -245,13 +268,13 @@ int getprio (struct path *pp, char *args, unsigned int timeout)
set can change latency_interval value corresponding to avglatency and is not constant.
Warn the user if latency_interval is smaller than (2 * standard_deviation), or equal */
standard_deviation = calc_standard_deviation(path_latency, index, avglatency);
- latency_interval = calc_latency_interval(avglatency, MAX_AVG_LATENCY, MIN_AVG_LATENCY, base_num);
- if ((latency_interval!= 0)
+ latency_interval = calc_latency_interval(avglatency, MAX_AVG_LATENCY, min_avg_latency, base_num);
+ if ((latency_interval != 0)
&& (latency_interval <= (2 * standard_deviation)))
pp_pl_log(3, "%s: latency interval (%lld) according to average latency (%lld us) is smaller than "
"2 * standard deviation (%lld us), or equal, args base_num (%d) needs to be set bigger value",
pp->dev, latency_interval, avglatency, standard_deviation, base_num);
- rc = calcPrio(avglatency, MAX_AVG_LATENCY, MIN_AVG_LATENCY, base_num);
+ rc = calcPrio(avglatency, MAX_AVG_LATENCY, min_avg_latency, base_num);
return rc;
}
diff --git a/multipath/multipath.conf.5 b/multipath/multipath.conf.5
index 0049cba..e68e681 100644
--- a/multipath/multipath.conf.5
+++ b/multipath/multipath.conf.5
@@ -341,7 +341,7 @@ these values can be looked up through sysfs or by running \fImultipathd show pat
.TP 12
.I path_latency
Needs a value of the form
-\fI"<io_num>|<base_num>"\fR
+\fI"<io_num>|<base_num>|<min_avg_latency>"\fR
.RS
.TP 8
.I io_num
@@ -350,9 +350,15 @@ Valid Values: Integer, [2, 200].
.TP
.I base_num
The base number value of logarithmic scale, used to partition different priority ranks. Valid Values: Integer,
-[2, 10]. And Max average latency value is 100s, min average latency value is 1us.
-For example: If base_num=10, the paths will be grouped in priority groups with path latency <=1us, (1us, 10us],
-(10us, 100us], (100us, 1ms], (1ms, 10ms], (10ms, 100ms], (100ms, 1s], (1s, 10s], (10s, 100s], >100s.
+[2, 10]. And Max average latency value is constant 100s.
+For example: If base_num=10 and min_avg_latency=1, the paths will be grouped in priority groups with path latency <=1us,
+(1us, 10us], (10us, 100us], (100us, 1ms], (1ms, 10ms], (10ms, 100ms], (100ms, 1s], (1s, 10s], (10s, 100s], >100s.
+.TP
+.I min_avg_latency
+The min average latency value of logarithmic scale, used to partition different priority ranks. Valid Values:
+Integer, [1, 10000000] (us). And Max average latency value is constant 100s.
+For example: If base_num=10 and min_avg_latency=1000, the paths will be grouped in priority groups with path latency <=1ms,
+(1ms, 10ms], (10ms, 100ms], (100ms, 1s], (1s, 10s], (10s, 100s], >100s.
.RE
.TP 12
.I alua
--
2.6.4.windows.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH 1/2] multipath-tools/libmultipath: Support for the native NVMe Ioctl command.
2017-07-20 3:36 ` [PATCH 1/2] multipath-tools/libmultipath: Support for the native NVMe Ioctl command Yang Feng
@ 2017-07-20 17:50 ` Martin Wilck
2017-07-21 3:30 ` Yang Feng
0 siblings, 1 reply; 13+ messages in thread
From: Martin Wilck @ 2017-07-20 17:50 UTC (permalink / raw)
To: Yang Feng, dm-devel, xose.vazquez
Cc: zouming.zouming, hege09, guanjunxiong, shenhong09
Dear Yang,
On Thu, 2017-07-20 at 11:36 +0800, Yang Feng wrote:
> 1. The SCSI-to-NVMe translations have been removed in the patch
> "nvme:
> Remove SCSI translations" in the linux-nvme, so the native NVMe Ioctl
> command should be supported in the multipath-tools.
> 2. In the prioritizers/path_latency.c, modify the func
> do_readsector0():
> send a native NVMe Read Ioctl command to the nvme device, and send a
> SG
> Read Ioctl command to the scsi device.
As noted previously, I would prefer to use a directio command here. I
see no advantage of using sg or nvme ioctls over directio.
> 3. In the tur checker, add support for the native NVMe Keep Alive
> Ioctl
> command to the nvme device.
No, please don't. I'm still with Xose here. There's no need to use the
same checker for NVMe and SCSI devices. This is what we have the
hwtable for. Keep the tur checker as it was before, create a keepalive
checker for NVMe, and use hwtable entries to match them appropriately
to devices.
See below for some details.
NAK from my side for the patch in this form.
Martin
>
> Signed-off-by: Yang Feng <philip.yang@huawei.com>
> Reviewed-by: Martin Wilck <mwilck@suse.com>
> Reviewed-by: Xose Vazquez Perez <xose.vazquez@gmail.com>
> ---
> libmultipath/checkers.c | 7 ++
> libmultipath/checkers.h | 4 +
> libmultipath/checkers/Makefile | 4 +-
> libmultipath/checkers/emc_clariion.c | 4 +-
> libmultipath/checkers/libsg.c | 94 -------------------
IIRC you did this in your previous patch already. Can you give a good
reason for moving this code? It makes your patch unnecessarily big and
messes up git history. If it's REALLY necessary, separate it out,
please.
> ---
> libmultipath/checkers/libsg.h | 9 ---
> libmultipath/checkers/readsector0.c | 4 +-
> libmultipath/checkers/tur.c | 64 ++++++++++-----
> libmultipath/discovery.c | 1 +
> libmultipath/libnvme.c | 130
> +++++++++++++++++++++++++++++++
> libmultipath/libnvme.h | 10 +++
> libmultipath/libsg.c | 113
> +++++++++++++++++++++++++++
> libmultipath/libsg.h | 13 ++++
> libmultipath/prioritizers/Makefile | 2 +-
> libmultipath/prioritizers/path_latency.c | 34 +++++---
> multipath/multipath.conf.5 | 2 +-
> 16 files changed, 354 insertions(+), 141 deletions(-)
> delete mode 100644 libmultipath/checkers/libsg.c
> delete mode 100644 libmultipath/checkers/libsg.h
> create mode 100644 libmultipath/libnvme.c
> create mode 100644 libmultipath/libnvme.h
> create mode 100644 libmultipath/libsg.c
> create mode 100644 libmultipath/libsg.h
>
> diff --git a/libmultipath/checkers.c b/libmultipath/checkers.c
> index 05e024f..00fbd6e 100644
> --- a/libmultipath/checkers.c
> +++ b/libmultipath/checkers.c
> @@ -162,6 +162,13 @@ void checker_set_fd (struct checker * c, int fd)
> c->fd = fd;
> }
>
> +void checker_set_dev(struct checker *c, char *dev)
> +{
> + if (!c)
> + return;
> + strncpy(c->dev, dev, strlen(dev)+1);
> +}
> +
> void checker_set_sync (struct checker * c)
> {
> if (!c)
> diff --git a/libmultipath/checkers.h b/libmultipath/checkers.h
> index 1d225de..f924624 100644
> --- a/libmultipath/checkers.h
> +++ b/libmultipath/checkers.h
> @@ -97,6 +97,8 @@ enum path_check_state {
> #define CHECKER_DEV_LEN 256
> #define LIB_CHECKER_NAMELEN 256
>
> +#define FILE_NAME_SIZE 256
> +
> struct checker {
> struct list_head node;
> void *handle;
> @@ -107,6 +109,7 @@ struct checker {
> int disable;
> char name[CHECKER_NAME_LEN];
> char message[CHECKER_MSG_LEN]; /* comm with callers */
> + char dev[FILE_NAME_SIZE];
FILE_NAME_SIZE more memory usage only to distinguish SCSI from NVMe.
Please, no.
> void * context; /* store for persistent
> data */
> void ** mpcontext; /* store for persistent
> data shared
> multipath-wide. Use
> MALLOC if
> @@ -132,6 +135,7 @@ void checker_reset (struct checker *);
> void checker_set_sync (struct checker *);
> void checker_set_async (struct checker *);
> void checker_set_fd (struct checker *, int);
> +void checker_set_dev(struct checker *c, char *dev);
> void checker_enable (struct checker *);
> void checker_disable (struct checker *);
> void checker_repair (struct checker *);
> diff --git a/libmultipath/checkers/Makefile
> b/libmultipath/checkers/Makefile
> index bce6b8b..a9be6b6 100644
> --- a/libmultipath/checkers/Makefile
> +++ b/libmultipath/checkers/Makefile
> @@ -24,10 +24,10 @@ all: $(LIBS)
> libcheckrbd.so: rbd.o
> $(CC) $(LDFLAGS) $(SHARED_FLAGS) -o $@ $^ -lrados -ludev
>
> -libcheckdirectio.so: libsg.o directio.o
> +libcheckdirectio.so: ../libsg.o ../libnvme.o directio.o
> $(CC) $(LDFLAGS) $(SHARED_FLAGS) -o $@ $^ -laio
>
> -libcheck%.so: libsg.o %.o
> +libcheck%.so: ../libsg.o ../libnvme.o %.o
> $(CC) $(LDFLAGS) $(SHARED_FLAGS) -o $@ $^
>
> install:
> diff --git a/libmultipath/checkers/emc_clariion.c
> b/libmultipath/checkers/emc_clariion.c
> index 9c1ffed..12c1e3e 100644
> --- a/libmultipath/checkers/emc_clariion.c
> +++ b/libmultipath/checkers/emc_clariion.c
> @@ -12,7 +12,7 @@
> #include <errno.h>
>
> #include "../libmultipath/sg_include.h"
> -#include "libsg.h"
> +#include "../libmultipath/libsg.h"
> #include "checkers.h"
> #include "debug.h"
> #include "memory.h"
> @@ -21,6 +21,8 @@
> #define INQUIRY_CMDLEN 6
> #define HEAVY_CHECK_COUNT 10
>
> +#define SENSE_BUFF_LEN 32
> +
> /*
> * Mechanism to track CLARiiON inactive snapshot LUs.
> * This is done so that we can fail passive paths
> diff --git a/libmultipath/checkers/libsg.c
> b/libmultipath/checkers/libsg.c
> deleted file mode 100644
See above
> diff --git a/libmultipath/checkers/libsg.h
> b/libmultipath/checkers/libsg.h
> deleted file mode 100644
> index 3994f45..0000000
> --- a/libmultipath/checkers/libsg.h
> +++ /dev/null
> @@ -1,9 +0,0 @@
> -#ifndef _LIBSG_H
> -#define _LIBSG_H
> -
> -#define SENSE_BUFF_LEN 32
> -
> -int sg_read (int sg_fd, unsigned char * buff, int buff_len,
> - unsigned char * sense, int sense_len, unsigned int
> timeout);
> -
> -#endif /* _LIBSG_H */
> diff --git a/libmultipath/checkers/readsector0.c
> b/libmultipath/checkers/readsector0.c
> index 8fccb46..e485810 100644
> --- a/libmultipath/checkers/readsector0.c
> +++ b/libmultipath/checkers/readsector0.c
> @@ -4,11 +4,13 @@
> #include <stdio.h>
>
> #include "checkers.h"
> -#include "libsg.h"
> +#include "../libmultipath/libsg.h"
>
> #define MSG_READSECTOR0_UP "readsector0 checker reports path
> is up"
> #define MSG_READSECTOR0_DOWN "readsector0 checker reports
> path is down"
>
> +#define SENSE_BUFF_LEN 32
> +
> struct readsector0_checker_context {
> void * dummy;
> };
> diff --git a/libmultipath/checkers/tur.c
> b/libmultipath/checkers/tur.c
> index b4a5cb2..a0382fa 100644
> --- a/libmultipath/checkers/tur.c
> +++ b/libmultipath/checkers/tur.c
> @@ -1,5 +1,8 @@
> /*
> - * Some code borrowed from sg-utils.
> + * Some code borrowed from sg-utils and
> + * NVM-Express command line utility,
> + * including using of a TUR command and
> + * a Keep Alive command.
> *
> * Copyright (c) 2004 Christophe Varoqui
> */
> @@ -22,10 +25,10 @@
> #include "../libmultipath/sg_include.h"
> #include "../libmultipath/util.h"
> #include "../libmultipath/time-util.h"
> -#include "../libmultipath/util.h"
> +#include "../libmultipath/libsg.h"
> +#include "../libmultipath/libnvme.h"
>
> -#define TUR_CMD_LEN 6
> -#define HEAVY_CHECK_COUNT 10
> +#define SENSE_BUFF_LEN 32
>
> #define MSG_TUR_UP "tur checker reports path is up"
> #define MSG_TUR_DOWN "tur checker reports path is down"
> @@ -39,6 +42,7 @@ struct tur_checker_context {
> int state;
> int running;
> int fd;
> + char dev[FILE_NAME_SIZE];
see above
> unsigned int timeout;
> time_t time;
> pthread_t thread;
> @@ -75,6 +79,7 @@ int libcheck_init (struct checker * c)
> ct->state = PATH_UNCHECKED;
> ct->fd = -1;
> ct->holders = 1;
> + memset(ct->dev, 0, sizeof(ct->dev));
> pthread_cond_init_mono(&ct->active);
> pthread_mutexattr_init(&attr);
> pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE);
> @@ -133,22 +138,12 @@ tur_check(int fd, unsigned int timeout,
> void (*copy_message)(void *, const char *), void *cb_arg)
> {
> struct sg_io_hdr io_hdr;
> - unsigned char turCmdBlk[TUR_CMD_LEN] = { 0x00, 0, 0, 0, 0, 0
> };
> - unsigned char sense_buffer[32];
> + unsigned char sense_buffer[SENSE_BUFF_LEN];
> int retry_tur = 5;
>
> retry:
> - memset(&io_hdr, 0, sizeof (struct sg_io_hdr));
> - memset(&sense_buffer, 0, 32);
> - io_hdr.interface_id = 'S';
> - io_hdr.cmd_len = sizeof (turCmdBlk);
> - io_hdr.mx_sb_len = sizeof (sense_buffer);
> - io_hdr.dxfer_direction = SG_DXFER_NONE;
> - io_hdr.cmdp = turCmdBlk;
> - io_hdr.sbp = sense_buffer;
> - io_hdr.timeout = timeout * 1000;
> - io_hdr.pack_id = 0;
> - if (ioctl(fd, SG_IO, &io_hdr) < 0) {
> + if (sg_tur(fd, &io_hdr, sense_buffer,
> + sizeof(sense_buffer), timeout) < 0) {
> TUR_MSG(MSG_TUR_DOWN);
> return PATH_DOWN;
> }
> @@ -213,6 +208,35 @@ retry:
> return PATH_UP;
> }
>
> +static int
> +keep_alive_check(int fd, unsigned int timeout,
> + void (*copy_message)(void *, const char *), void *cb_arg)
> +{
> + int err;
> +
> + err = nvme_keep_alive(fd, timeout);
> + if (err == 0) {
> + TUR_MSG(MSG_TUR_UP);
> + return PATH_UP;
> + }
> +
> + TUR_MSG(MSG_TUR_DOWN);
> + return PATH_DOWN;
> +}
> +
> +static int
> +ping_check(int fd, char *dev, unsigned int timeout,
> + void (*copy_message)(void *, const char *), void *cb_arg)
> +{
> + if (!strncmp(dev, "nvme", 4))
> + {
> + return keep_alive_check(fd, timeout, copy_message, cb_arg);
> + }
> + else
> + {
> + return tur_check(fd, timeout, copy_message, cb_arg);
> + }
> +}
> #define tur_thread_cleanup_push(ct)
> pthread_cleanup_push(cleanup_func, ct)
> #define tur_thread_cleanup_pop(ct) pthread_cleanup_pop(1)
>
> @@ -266,8 +290,7 @@ static void *tur_thread(void *ctx)
> ct->state = PATH_PENDING;
> ct->message[0] = '\0';
> pthread_mutex_unlock(&ct->lock);
> -
> - state = tur_check(ct->fd, ct->timeout, copy_msg_to_tcc, ct-
> >message);
> + state = ping_check(ct->fd, ct->dev, ct->timeout,
> copy_msg_to_tcc, ct->message);
> pthread_testcancel();
>
> /* TUR checker done */
> @@ -390,6 +413,7 @@ int libcheck_check(struct checker * c)
> /* Start new TUR checker */
> ct->state = PATH_UNCHECKED;
> ct->fd = c->fd;
> + strncpy(ct->dev, c->dev, strlen(c->dev)+1);
You seem to have some whitespace issues.
> ct->timeout = c->timeout;
> pthread_spin_lock(&ct->hldr_lock);
> ct->holders++;
> @@ -406,7 +430,7 @@ int libcheck_check(struct checker * c)
> ct->thread = 0;
> condlog(3, "%s: failed to start tur thread,
> using"
> " sync mode", tur_devt(devt,
> sizeof(devt), ct));
> - return tur_check(c->fd, c->timeout,
> + return ping_check(c->fd, c->dev, c->timeout,
> copy_msg_to_checker, c);
> }
> tur_timeout(&tsp);
> diff --git a/libmultipath/discovery.c b/libmultipath/discovery.c
> index 663c8ea..bae5d24 100644
> --- a/libmultipath/discovery.c
> +++ b/libmultipath/discovery.c
> @@ -1539,6 +1539,7 @@ get_state (struct path * pp, struct config
> *conf, int daemon)
> return PATH_UNCHECKED;
> }
> checker_set_fd(c, pp->fd);
> + checker_set_dev(c, pp->dev);
> if (checker_init(c, pp->mpp?&pp->mpp-
> >mpcontext:NULL)) {
> memset(c, 0x0, sizeof(struct checker));
> condlog(3, "%s: checker init failed", pp-
> >dev);
> diff --git a/libmultipath/libnvme.c b/libmultipath/libnvme.c
> new file mode 100644
> index 0000000..97c9125
> --- /dev/null
> +++ b/libmultipath/libnvme.c
> @@ -0,0 +1,130 @@
> +/*
> + * (C) Copyright HUAWEI Technology Corp. 2017, All Rights Reserved.
> + *
> + * libnvme.c
> + *
> + * Some code borrowed from NVM-Express command line utility.
> + *
> + * Author(s): Yang Feng <philip.yang@huawei.com>
> + *
> + * This file is released under the GPL version 2, or any later
> version.
> + *
> + */
> +#include <linux/types.h>
> +#include <sys/ioctl.h>
> +#include <stdint.h>
> +
> +struct nvme_user_io {
> + __u8 opcode;
> + __u8 flags;
> + __u16 control;
> + __u16 nblocks;
> + __u16 rsvd;
> + __u64 metadata;
> + __u64 addr;
> + __u64 slba;
> + __u32 dsmgmt;
> + __u32 reftag;
> + __u16 apptag;
> + __u16 appmask;
> +};
Please simply include linux/nvme_ioctl.h. Makefiles should check if
that file exists and compile the nvme stuff conditionally.
> +
> +struct nvme_admin_cmd {
> + __u8 opcode;
> + __u8 flags;
> + __u16 rsvd1;
> + __u32 nsid;
> + __u32 cdw2;
> + __u32 cdw3;
> + __u64 metadata;
> + __u64 addr;
> + __u32 metadata_len;
> + __u32 data_len;
> + __u32 cdw10;
> + __u32 cdw11;
> + __u32 cdw12;
> + __u32 cdw13;
> + __u32 cdw14;
> + __u32 cdw15;
> + __u32 timeout_ms;
> + __u32 result;
> +};
> +
> +#define NVME_IOCTL_ADMIN_CMD _IOWR('N', 0x41, struct
> nvme_admin_cmd)
> +#define NVME_IOCTL_SUBMIT_IO _IOW('N', 0x42, struct nvme_user_io)
> +
> +static int nvme_io(int fd, __u8 opcode, __u64 slba, __u16 nblocks,
> __u16 control,
> + __u32 dsmgmt, __u32 reftag, __u16 apptag, __u16 appmask,
> void *data,
> + void *metadata)
> +{
> + struct nvme_user_io io = {
> + .opcode = opcode,
> + .flags = 0,
> + .control = control,
> + .nblocks = nblocks,
> + .rsvd = 0,
> + .metadata = (__u64)(uintptr_t) metadata,
> + .addr = (__u64)(uintptr_t) data,
> + .slba = slba,
> + .dsmgmt = dsmgmt,
> + .reftag = reftag,
> + .appmask = apptag,
> + .apptag = appmask,
> + };
> +
> + return ioctl(fd, NVME_IOCTL_SUBMIT_IO, &io);
> +}
> +
> +int nvme_read(int fd, __u64 slba, __u16 nblocks, __u16 control,
> __u32 dsmgmt,
> + __u32 reftag, __u16 apptag, __u16 appmask, void *data,
> void *metadata)
> +{
> + return nvme_io(fd, 0x2, slba, nblocks, control, dsmgmt,
> + reftag, apptag, appmask, data, metadata);
> +}
This API is bloated. Out of these 10(!) function arguments, 6 are zero
in the only call you have.
> +
> +static int nvme_submit_passthru(int fd, int ioctl_cmd, struct
> nvme_admin_cmd *cmd)
> +{
> + return ioctl(fd, ioctl_cmd, cmd);
> +}
> +
> +int nvme_passthru(int fd, int ioctl_cmd, __u8 opcode, __u8 flags,
> __u16 rsvd,
> + __u32 nsid, __u32 cdw2, __u32 cdw3, __u32 cdw10,
> __u32 cdw11,
> + __u32 cdw12, __u32 cdw13, __u32 cdw14, __u32
> cdw15,
> + __u32 data_len, void *data, __u32 metadata_len,
> + void *metadata, __u32 timeout_ms, __u32 *result)
This is even worse. This function takes 20(!!!) parameters, and 15 of
them are 0 in the call. Do you see any other use case of
nvme_admin_cmd() in multipath-tools that would justify the extra
parameters?
> +{
> + struct nvme_admin_cmd cmd = {
> + .opcode = opcode,
> + .flags = flags,
> + .rsvd1 = rsvd,
> + .nsid = nsid,
> + .cdw2 = cdw2,
> + .cdw3 = cdw3,
> + .metadata = (__u64)(uintptr_t) metadata,
> + .addr = (__u64)(uintptr_t) data,
> + .metadata_len = metadata_len,
> + .data_len = data_len,
> + .cdw10 = cdw10,
> + .cdw11 = cdw11,
> + .cdw12 = cdw12,
> + .cdw13 = cdw13,
> + .cdw14 = cdw14,
> + .cdw15 = cdw15,
> + .timeout_ms = timeout_ms,
> + .result = 0,
> + };
> + int err;
> +
> + err = nvme_submit_passthru(fd, ioctl_cmd, &cmd);
> + if (!err && result)
> + *result = cmd.result;
> + return err;
> +}
> +
> +int nvme_keep_alive(int fd, __u32 timeout_ms)
> +{
> + __u32 result;
> +
> + return nvme_passthru(fd, NVME_IOCTL_ADMIN_CMD, 0x18, 0, 0, 0, 0,
> 0, 0, 0,
> + 0, 0, 0, 0, 0, 0, 0,0 , timeout_ms,
> &result);
> +}
Honestly, why do you define the API at all? It would be more readable
to call nvme_submit_passthru() directly.
> diff --git a/libmultipath/libnvme.h b/libmultipath/libnvme.h
> new file mode 100644
> index 0000000..a2b5460
> --- /dev/null
> +++ b/libmultipath/libnvme.h
> @@ -0,0 +1,10 @@
> +#ifndef _LIBNVME_H
> +#define _LIBNVME_H
> +
> +#include <linux/types.h>
> +
> +int nvme_read(int fd, __u64 slba, __u16 nblocks, __u16 control,
> __u32 dsmgmt,
> + __u32 reftag, __u16 apptag, __u16 appmask, void *data,
> void *metadata);
> +int nvme_keep_alive(int fd, __u32 timeout_ms);
> +
> +#endif /* _LIBNVME_H */
> diff --git a/libmultipath/libsg.c b/libmultipath/libsg.c
> new file mode 100644
see above
diff --git a/libmultipath/prioritizers/path_latency.c
> b/libmultipath/prioritizers/path_latency.c
> index 34b734b..21209ff 100644
> --- a/libmultipath/prioritizers/path_latency.c
> +++ b/libmultipath/prioritizers/path_latency.c
> @@ -23,11 +23,11 @@
> #include <math.h>
> #include <ctype.h>
> #include <time.h>
> -
> #include "debug.h"
> #include "prio.h"
> #include "structs.h"
> -#include "../checkers/libsg.h"
> +#include "libsg.h"
> +#include "libnvme.h"
>
> #define pp_pl_log(prio, fmt, args...) condlog(prio, "path_latency
> prio: " fmt, ##args)
>
> @@ -44,6 +44,8 @@
>
> #define MAX_CHAR_SIZE 30
>
> +#define SENSE_BUFF_LEN 32
> +
> #define USEC_PER_SEC 1000000LL
> #define NSEC_PER_USEC 1000LL
>
> @@ -51,19 +53,27 @@ static long long path_latency[MAX_IO_NUM];
>
> static inline long long timeval_to_us(const struct timespec *tv)
> {
> - return ((long long) tv->tv_sec * USEC_PER_SEC) + (tv-
> >tv_nsec / NSEC_PER_USEC);
> + return ((long long) tv->tv_sec * USEC_PER_SEC) + (tv->tv_nsec /
> NSEC_PER_USEC);
> }
>
> -static int do_readsector0(int fd, unsigned int timeout)
> +static int do_readsector0(struct path *pp, unsigned int timeout)
> {
> - unsigned char buf[4096];
> - unsigned char sbuf[SENSE_BUFF_LEN];
> - int ret;
> + unsigned char buf[4096];
> + unsigned char mbuf[512];
> + unsigned char sbuf[SENSE_BUFF_LEN];
>
> - ret = sg_read(fd, &buf[0], 4096, &sbuf[0],
> - SENSE_BUFF_LEN, timeout);
> + if (!strncmp(pp->dev, "nvme", 4))
> + {
> + if (nvme_read(pp->fd, 0, 1, 0, 0, 0, 0, 0, buf, mbuf) != 0)
> + return 0;
> + }
> + else
> + {
> + if (sg_read(pp->fd, &buf[0], 4096, &sbuf[0], SENSE_BUFF_LEN,
> timeout) == 2)
> + return 0;
> + }
>
> - return ret;
> + return 1;
> }
>
> int check_args_valid(int io_num, int base_num)
> @@ -218,9 +228,9 @@ int getprio (struct path *pp, char *args,
> unsigned int timeout)
> (void)clock_gettime(CLOCK_MONOTONIC, &tv);
> before = timeval_to_us(&tv);
>
> - if (do_readsector0(pp->fd, timeout) == 2)
> + if (do_readsector0(pp, timeout) == 0)
> {
> - pp_pl_log(0, "%s: path down", pp->dev);
> + pp_pl_log(0, "%s: send read sector0 command fail", pp-
> >dev);
> return -1;
> }
>
> diff --git a/multipath/multipath.conf.5 b/multipath/multipath.conf.5
> index 0049cba..a8d4797 100644
> --- a/multipath/multipath.conf.5
> +++ b/multipath/multipath.conf.5
> @@ -419,7 +419,7 @@ are:
> deprecated, please use \fItur\fR instead.
> .TP
> .I tur
> -Issue a \fITEST UNIT READY\fR command to the device.
> +Issue a \fITEST UNIT READY\fR command or a \fIKEEP ALIVE\fR command
> to the device.
> .TP
> .I emc_clariion
> (Hardware-dependent)
--
Dr. Martin Wilck <mwilck@suse.com>, Tel. +49 (0)911 74053 2107
SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton
HRB 21284 (AG Nürnberg)
--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 2/2] multipath-tools/libmultipath: Add args min_avg_latency for path_latency.
2017-07-20 3:36 ` [PATCH 2/2] multipath-tools/libmultipath: Add args min_avg_latency for path_latency Yang Feng
@ 2017-07-20 18:07 ` Martin Wilck
2017-07-21 3:38 ` Yang Feng
0 siblings, 1 reply; 13+ messages in thread
From: Martin Wilck @ 2017-07-20 18:07 UTC (permalink / raw)
To: Yang Feng, dm-devel, xose.vazquez
Cc: zouming.zouming, hege09, guanjunxiong, shenhong09
Dear Yang,
On Thu, 2017-07-20 at 11:36 +0800, Yang Feng wrote:
> Add args min_avg_latency of logarithmic scale, for
> prioritizers/path_latency.c.
> Min average latency is not constant 1us, and is set by user.
> Certainly, max average
> latency value is still 100s. It make support better for more scenes,
> because it can
> deal better with the normal standard deviation of path latency. For
> example, when the
> standard deviation value is 200us and the average latency of the
> normal paths is 1ms,
> args base_num can be set to 5 and args min_avg_latency can be set to
> 2ms, so the paths
> will be grouped in priority groups with path latency <=2ms, (2ms,
> 10ms], (10ms, 50ms],
> etc.
Nack. You need this because you are using a wrong calculation for
standard deviation. If your scale is logarithmic, you need to calculate
the standard deviation on a log scale, too. The scientific term is
"geometric standard deviation".
https://en.wikipedia.org/wiki/Geometric_standard_deviation
Suppose you really have 3 classes of devices with us, ms, and seconds
average latency, respectively. The latency of the devices in the
"seconds" class will also vary on the order of seconds (e.g. 3.5-5s).
That's obviously impossible for the us and ms class devices. Rather,
it's reasonable to assume that the us devices will have us (or sub-us)
standard deviation and the ms devices have ms (or sub-ms) standard
deviation, or in other words, the uncertainty is roughly in the same
order of magnitude as the average.
This is exactly how the geometric standard deviation behaves.
I think I actually remarked this on your previous patch, but the patch
has been applied without that having been fixed. It would be good if
you could fix it now.
>
> Signed-off-by: Yang Feng <philip.yang@huawei.com>
> Reviewed-by: Martin Wilck <mwilck@suse.com>
Please don't add my Reviewed-by: tag for patches I haven't reviewed, or
have negatively reviewed. Reviewed-by: is supposed to indicate
approval.
Wrt your prio_args parser, could you perhaps support a syntax that is
similar to other prioritizers, e.g. "base_num=5 io_num=10"?
Regards
Martin
> Reviewed-by: Xose Vazquez Perez <xose.vazquez@gmail.com>
> ---
> libmultipath/prioritizers/path_latency.c | 61
> ++++++++++++++++++++++----------
> multipath/multipath.conf.5 | 14 +++++---
> 2 files changed, 52 insertions(+), 23 deletions(-)
>
> diff --git a/libmultipath/prioritizers/path_latency.c
> b/libmultipath/prioritizers/path_latency.c
> index 34b734b..a71faff 100644
> --- a/libmultipath/prioritizers/path_latency.c
> +++ b/libmultipath/prioritizers/path_latency.c
> @@ -66,7 +66,7 @@ static int do_readsector0(int fd, unsigned int
> timeout)
> return ret;
> }
>
> -int check_args_valid(int io_num, int base_num)
> +int check_args_valid(int io_num, int base_num, int min_avg_latency)
> {
> if ((io_num < MIN_IO_NUM) || (io_num > MAX_IO_NUM))
> {
> @@ -80,24 +80,33 @@ int check_args_valid(int io_num, int base_num)
> return 0;
> }
>
> + if ((min_avg_latency < MIN_AVG_LATENCY) || (min_avg_latency >
> MAX_AVG_LATENCY))
> + {
> + pp_pl_log(0, "args min_avg_latency is outside the valid
> range");
> + return 0;
> + }
> +
> return 1;
> }
>
> -/* In multipath.conf, args form: io_num|base_num. For example,
> -* args is "20|10", this function can get io_num value 20, and
> - base_num value 10.
> +/* In multipath.conf, args form: io_num|base_num|min_avg_latency.
> +* For example, args is "20|10|1", this function can get io_num
> +* value 20, base_num value 10, min_avg_latency value 1 (us).
> */
> -static int get_ionum_and_basenum(char *args,
> - int *ionum,
> - int *basenum)
> +static int get_prio_args(char *args,
> + int *ionum,
> + int *basenum,
> + int *minavglatency)
> {
> char source[MAX_CHAR_SIZE];
> char vertica = '|';
> - char *endstrbefore = NULL;
> - char *endstrafter = NULL;
> + char *endstr1 = NULL;
> + char *endstr2 = NULL;
> + char *endstr3 = NULL;
> unsigned int size = strlen(args);
>
> - if ((args == NULL) || (ionum == NULL) || (basenum == NULL))
> + if ((args == NULL) || (ionum == NULL)
> + || (basenum == NULL) || (minavglatency == NULL))
> {
> pp_pl_log(0, "args string is NULL");
> return 0;
> @@ -117,21 +126,34 @@ static int get_ionum_and_basenum(char *args,
> return 0;
> }
>
> - *ionum = (int)strtoul(source, &endstrbefore, 10);
> - if (endstrbefore[0] != vertica)
> + *ionum = (int)strtoul(source, &endstr1, 10);
> + if (endstr1[0] != vertica)
> + {
> + pp_pl_log(0, "invalid prio_args format: %s", source);
> + return 0;
> + }
> +
> + if (!isdigit(endstr1[1]))
> + {
> + pp_pl_log(0, "invalid prio_args format: %s", source);
> + return 0;
> + }
> +
> + *basenum = (long long)strtol(&endstr1[1], &endstr2, 10);
> + if (endstr2[0] != vertica)
> {
> pp_pl_log(0, "invalid prio_args format: %s", source);
> return 0;
> }
>
> - if (!isdigit(endstrbefore[1]))
> + if (!isdigit(endstr2[1]))
> {
> pp_pl_log(0, "invalid prio_args format: %s", source);
> return 0;
> }
>
> - *basenum = (long long)strtol(&endstrbefore[1], &endstrafter,
> 10);
> - if (check_args_valid(*ionum, *basenum) == 0)
> + *minavglatency = (long long)strtol(&endstr2[1], &endstr3, 10);
> + if (check_args_valid(*ionum, *basenum, *minavglatency) == 0)
> {
> return 0;
> }
> @@ -194,6 +216,7 @@ int getprio (struct path *pp, char *args,
> unsigned int timeout)
> int index = 0;
> int io_num;
> int base_num;
> + int min_avg_latency;
> long long avglatency;
> long long latency_interval;
> long long standard_deviation;
> @@ -204,7 +227,7 @@ int getprio (struct path *pp, char *args,
> unsigned int timeout)
> if (pp->fd < 0)
> return -1;
>
> - if (get_ionum_and_basenum(args, &io_num, &base_num) == 0)
> + if (get_prio_args(args, &io_num, &base_num, &min_avg_latency) ==
> 0)
> {
> pp_pl_log(0, "%s: get path_latency args fail", pp->dev);
> return DEFAULT_PRIORITY;
> @@ -245,13 +268,13 @@ int getprio (struct path *pp, char *args,
> unsigned int timeout)
> set can change latency_interval value corresponding to
> avglatency and is not constant.
> Warn the user if latency_interval is smaller than (2 *
> standard_deviation), or equal */
> standard_deviation = calc_standard_deviation(path_latency,
> index, avglatency);
> - latency_interval = calc_latency_interval(avglatency,
> MAX_AVG_LATENCY, MIN_AVG_LATENCY, base_num);
> - if ((latency_interval!= 0)
> + latency_interval = calc_latency_interval(avglatency,
> MAX_AVG_LATENCY, min_avg_latency, base_num);
> + if ((latency_interval != 0)
> && (latency_interval <= (2 * standard_deviation)))
> pp_pl_log(3, "%s: latency interval (%lld) according to
> average latency (%lld us) is smaller than "
> "2 * standard deviation (%lld us), or equal, args
> base_num (%d) needs to be set bigger value",
> pp->dev, latency_interval, avglatency,
> standard_deviation, base_num);
>
> - rc = calcPrio(avglatency, MAX_AVG_LATENCY, MIN_AVG_LATENCY,
> base_num);
> + rc = calcPrio(avglatency, MAX_AVG_LATENCY, min_avg_latency,
> base_num);
> return rc;
> }
> diff --git a/multipath/multipath.conf.5 b/multipath/multipath.conf.5
> index 0049cba..e68e681 100644
> --- a/multipath/multipath.conf.5
> +++ b/multipath/multipath.conf.5
> @@ -341,7 +341,7 @@ these values can be looked up through sysfs or by
> running \fImultipathd show pat
> .TP 12
> .I path_latency
> Needs a value of the form
> -\fI"<io_num>|<base_num>"\fR
> +\fI"<io_num>|<base_num>|<min_avg_latency>"\fR
> .RS
> .TP 8
> .I io_num
> @@ -350,9 +350,15 @@ Valid Values: Integer, [2, 200].
> .TP
> .I base_num
> The base number value of logarithmic scale, used to partition
> different priority ranks. Valid Values: Integer,
> -[2, 10]. And Max average latency value is 100s, min average latency
> value is 1us.
> -For example: If base_num=10, the paths will be grouped in priority
> groups with path latency <=1us, (1us, 10us],
> -(10us, 100us], (100us, 1ms], (1ms, 10ms], (10ms, 100ms], (100ms,
> 1s], (1s, 10s], (10s, 100s], >100s.
> +[2, 10]. And Max average latency value is constant 100s.
> +For example: If base_num=10 and min_avg_latency=1, the paths will be
> grouped in priority groups with path latency <=1us,
> +(1us, 10us], (10us, 100us], (100us, 1ms], (1ms, 10ms], (10ms,
> 100ms], (100ms, 1s], (1s, 10s], (10s, 100s], >100s.
> +.TP
> +.I min_avg_latency
> +The min average latency value of logarithmic scale, used to
> partition different priority ranks. Valid Values:
> +Integer, [1, 10000000] (us). And Max average latency value is
> constant 100s.
> +For example: If base_num=10 and min_avg_latency=1000, the paths will
> be grouped in priority groups with path latency <=1ms,
> +(1ms, 10ms], (10ms, 100ms], (100ms, 1s], (1s, 10s], (10s, 100s],
> >100s.
> .RE
> .TP 12
> .I alua
--
Dr. Martin Wilck <mwilck@suse.com>, Tel. +49 (0)911 74053 2107
SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton
HRB 21284 (AG Nürnberg)
--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 1/2] multipath-tools/libmultipath: Support for the native NVMe Ioctl command.
2017-07-20 17:50 ` Martin Wilck
@ 2017-07-21 3:30 ` Yang Feng
2017-07-21 10:23 ` Martin Wilck
0 siblings, 1 reply; 13+ messages in thread
From: Yang Feng @ 2017-07-21 3:30 UTC (permalink / raw)
To: Martin Wilck, dm-devel, xose.vazquez
Cc: zouming.zouming, hege09, guanjunxiong, shenhong09
Dear Martin,
Thanks a lot for your reviews.
Please find my replys as follows.
And the up-to-date patch will be sent later.
Regards,
-Yang
On 2017/7/21 1:50, Martin Wilck wrote:
> Dear Yang,
>
> On Thu, 2017-07-20 at 11:36 +0800, Yang Feng wrote:
>> 1. The SCSI-to-NVMe translations have been removed in the patch
>> "nvme:
>> Remove SCSI translations" in the linux-nvme, so the native NVMe Ioctl
>> command should be supported in the multipath-tools.
>
>> 2. In the prioritizers/path_latency.c, modify the func
>> do_readsector0():
>> send a native NVMe Read Ioctl command to the nvme device, and send a
>> SG
>> Read Ioctl command to the scsi device.
>
> As noted previously, I would prefer to use a directio command here. I
> see no advantage of using sg or nvme ioctls over directio.
>
Thanks, insteadly, directio will be used.
>> 3. In the tur checker, add support for the native NVMe Keep Alive
>> Ioctl
>> command to the nvme device.
>
> No, please don't. I'm still with Xose here. There's no need to use the
> same checker for NVMe and SCSI devices. This is what we have the
> hwtable for. Keep the tur checker as it was before, create a keepalive
> checker for NVMe, and use hwtable entries to match them appropriately
> to devices.
>
OK, it will be fixed.
> See below for some details.
> NAK from my side for the patch in this form.
>
> Martin
>
>>
>> Signed-off-by: Yang Feng <philip.yang@huawei.com>
>> Reviewed-by: Martin Wilck <mwilck@suse.com>
>> Reviewed-by: Xose Vazquez Perez <xose.vazquez@gmail.com>
>> ---
>> libmultipath/checkers.c | 7 ++
>> libmultipath/checkers.h | 4 +
>> libmultipath/checkers/Makefile | 4 +-
>> libmultipath/checkers/emc_clariion.c | 4 +-
>> libmultipath/checkers/libsg.c | 94 -------------------
>
> IIRC you did this in your previous patch already. Can you give a good
> reason for moving this code? It makes your patch unnecessarily big and
> messes up git history. If it's REALLY necessary, separate it out,
> please.
>
OK, the libsg.c/libsg.h will not be moved.
>
>> ---
>> libmultipath/checkers/libsg.h | 9 ---
>> libmultipath/checkers/readsector0.c | 4 +-
>> libmultipath/checkers/tur.c | 64 ++++++++++-----
>> libmultipath/discovery.c | 1 +
>> libmultipath/libnvme.c | 130
>> +++++++++++++++++++++++++++++++
>> libmultipath/libnvme.h | 10 +++
>> libmultipath/libsg.c | 113
>> +++++++++++++++++++++++++++
>> libmultipath/libsg.h | 13 ++++
>> libmultipath/prioritizers/Makefile | 2 +-
>> libmultipath/prioritizers/path_latency.c | 34 +++++---
>> multipath/multipath.conf.5 | 2 +-
>> 16 files changed, 354 insertions(+), 141 deletions(-)
>> delete mode 100644 libmultipath/checkers/libsg.c
>> delete mode 100644 libmultipath/checkers/libsg.h
>> create mode 100644 libmultipath/libnvme.c
>> create mode 100644 libmultipath/libnvme.h
>> create mode 100644 libmultipath/libsg.c
>> create mode 100644 libmultipath/libsg.h
>>
>> diff --git a/libmultipath/checkers.c b/libmultipath/checkers.c
>> index 05e024f..00fbd6e 100644
>> --- a/libmultipath/checkers.c
>> +++ b/libmultipath/checkers.c
>> @@ -162,6 +162,13 @@ void checker_set_fd (struct checker * c, int fd)
>> c->fd = fd;
>> }
>>
>> +void checker_set_dev(struct checker *c, char *dev)
>> +{
>> + if (!c)
>> + return;
>> + strncpy(c->dev, dev, strlen(dev)+1);
>> +}
>> +
>> void checker_set_sync (struct checker * c)
>> {
>> if (!c)
>> diff --git a/libmultipath/checkers.h b/libmultipath/checkers.h
>> index 1d225de..f924624 100644
>> --- a/libmultipath/checkers.h
>> +++ b/libmultipath/checkers.h
>> @@ -97,6 +97,8 @@ enum path_check_state {
>> #define CHECKER_DEV_LEN 256
>> #define LIB_CHECKER_NAMELEN 256
>>
>> +#define FILE_NAME_SIZE 256
>> +
>> struct checker {
>> struct list_head node;
>> void *handle;
>> @@ -107,6 +109,7 @@ struct checker {
>> int disable;
>> char name[CHECKER_NAME_LEN];
>> char message[CHECKER_MSG_LEN]; /* comm with callers */
>> + char dev[FILE_NAME_SIZE];
>
> FILE_NAME_SIZE more memory usage only to distinguish SCSI from NVMe.
> Please, no.
>
OK, it will be fixed.
>
>> void * context; /* store for persistent
>> data */
>> void ** mpcontext; /* store for persistent
>> data shared
>> multipath-wide. Use
>> MALLOC if
>> @@ -132,6 +135,7 @@ void checker_reset (struct checker *);
>> void checker_set_sync (struct checker *);
>> void checker_set_async (struct checker *);
>> void checker_set_fd (struct checker *, int);
>> +void checker_set_dev(struct checker *c, char *dev);
>> void checker_enable (struct checker *);
>> void checker_disable (struct checker *);
>> void checker_repair (struct checker *);
>> diff --git a/libmultipath/checkers/Makefile
>> b/libmultipath/checkers/Makefile
>> index bce6b8b..a9be6b6 100644
>> --- a/libmultipath/checkers/Makefile
>> +++ b/libmultipath/checkers/Makefile
>> @@ -24,10 +24,10 @@ all: $(LIBS)
>> libcheckrbd.so: rbd.o
>> $(CC) $(LDFLAGS) $(SHARED_FLAGS) -o $@ $^ -lrados -ludev
>>
>> -libcheckdirectio.so: libsg.o directio.o
>> +libcheckdirectio.so: ../libsg.o ../libnvme.o directio.o
>> $(CC) $(LDFLAGS) $(SHARED_FLAGS) -o $@ $^ -laio
>>
>> -libcheck%.so: libsg.o %.o
>> +libcheck%.so: ../libsg.o ../libnvme.o %.o
>> $(CC) $(LDFLAGS) $(SHARED_FLAGS) -o $@ $^
>>
>> install:
>> diff --git a/libmultipath/checkers/emc_clariion.c
>> b/libmultipath/checkers/emc_clariion.c
>> index 9c1ffed..12c1e3e 100644
>> --- a/libmultipath/checkers/emc_clariion.c
>> +++ b/libmultipath/checkers/emc_clariion.c
>> @@ -12,7 +12,7 @@
>> #include <errno.h>
>>
>> #include "../libmultipath/sg_include.h"
>> -#include "libsg.h"
>> +#include "../libmultipath/libsg.h"
>> #include "checkers.h"
>> #include "debug.h"
>> #include "memory.h"
>> @@ -21,6 +21,8 @@
>> #define INQUIRY_CMDLEN 6
>> #define HEAVY_CHECK_COUNT 10
>>
>> +#define SENSE_BUFF_LEN 32
>> +
>> /*
>> * Mechanism to track CLARiiON inactive snapshot LUs.
>> * This is done so that we can fail passive paths
>> diff --git a/libmultipath/checkers/libsg.c
>> b/libmultipath/checkers/libsg.c
>> deleted file mode 100644
>
> See above
>
>> diff --git a/libmultipath/checkers/libsg.h
>> b/libmultipath/checkers/libsg.h
>> deleted file mode 100644
>> index 3994f45..0000000
>> --- a/libmultipath/checkers/libsg.h
>> +++ /dev/null
>> @@ -1,9 +0,0 @@
>> -#ifndef _LIBSG_H
>> -#define _LIBSG_H
>> -
>> -#define SENSE_BUFF_LEN 32
>> -
>> -int sg_read (int sg_fd, unsigned char * buff, int buff_len,
>> - unsigned char * sense, int sense_len, unsigned int
>> timeout);
>> -
>> -#endif /* _LIBSG_H */
>> diff --git a/libmultipath/checkers/readsector0.c
>> b/libmultipath/checkers/readsector0.c
>> index 8fccb46..e485810 100644
>> --- a/libmultipath/checkers/readsector0.c
>> +++ b/libmultipath/checkers/readsector0.c
>> @@ -4,11 +4,13 @@
>> #include <stdio.h>
>>
>> #include "checkers.h"
>> -#include "libsg.h"
>> +#include "../libmultipath/libsg.h"
>>
>> #define MSG_READSECTOR0_UP "readsector0 checker reports path
>> is up"
>> #define MSG_READSECTOR0_DOWN "readsector0 checker reports
>> path is down"
>>
>> +#define SENSE_BUFF_LEN 32
>> +
>> struct readsector0_checker_context {
>> void * dummy;
>> };
>> diff --git a/libmultipath/checkers/tur.c
>> b/libmultipath/checkers/tur.c
>> index b4a5cb2..a0382fa 100644
>> --- a/libmultipath/checkers/tur.c
>> +++ b/libmultipath/checkers/tur.c
>> @@ -1,5 +1,8 @@
>> /*
>> - * Some code borrowed from sg-utils.
>> + * Some code borrowed from sg-utils and
>> + * NVM-Express command line utility,
>> + * including using of a TUR command and
>> + * a Keep Alive command.
>> *
>> * Copyright (c) 2004 Christophe Varoqui
>> */
>> @@ -22,10 +25,10 @@
>> #include "../libmultipath/sg_include.h"
>> #include "../libmultipath/util.h"
>> #include "../libmultipath/time-util.h"
>> -#include "../libmultipath/util.h"
>> +#include "../libmultipath/libsg.h"
>> +#include "../libmultipath/libnvme.h"
>>
>> -#define TUR_CMD_LEN 6
>> -#define HEAVY_CHECK_COUNT 10
>> +#define SENSE_BUFF_LEN 32
>>
>> #define MSG_TUR_UP "tur checker reports path is up"
>> #define MSG_TUR_DOWN "tur checker reports path is down"
>> @@ -39,6 +42,7 @@ struct tur_checker_context {
>> int state;
>> int running;
>> int fd;
>> + char dev[FILE_NAME_SIZE];
>
> see above
>
>> unsigned int timeout;
>> time_t time;
>> pthread_t thread;
>> @@ -75,6 +79,7 @@ int libcheck_init (struct checker * c)
>> ct->state = PATH_UNCHECKED;
>> ct->fd = -1;
>> ct->holders = 1;
>> + memset(ct->dev, 0, sizeof(ct->dev));
>> pthread_cond_init_mono(&ct->active);
>> pthread_mutexattr_init(&attr);
>> pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE);
>> @@ -133,22 +138,12 @@ tur_check(int fd, unsigned int timeout,
>> void (*copy_message)(void *, const char *), void *cb_arg)
>> {
>> struct sg_io_hdr io_hdr;
>> - unsigned char turCmdBlk[TUR_CMD_LEN] = { 0x00, 0, 0, 0, 0, 0
>> };
>> - unsigned char sense_buffer[32];
>> + unsigned char sense_buffer[SENSE_BUFF_LEN];
>> int retry_tur = 5;
>>
>> retry:
>> - memset(&io_hdr, 0, sizeof (struct sg_io_hdr));
>> - memset(&sense_buffer, 0, 32);
>> - io_hdr.interface_id = 'S';
>> - io_hdr.cmd_len = sizeof (turCmdBlk);
>> - io_hdr.mx_sb_len = sizeof (sense_buffer);
>> - io_hdr.dxfer_direction = SG_DXFER_NONE;
>> - io_hdr.cmdp = turCmdBlk;
>> - io_hdr.sbp = sense_buffer;
>> - io_hdr.timeout = timeout * 1000;
>> - io_hdr.pack_id = 0;
>> - if (ioctl(fd, SG_IO, &io_hdr) < 0) {
>> + if (sg_tur(fd, &io_hdr, sense_buffer,
>> + sizeof(sense_buffer), timeout) < 0) {
>> TUR_MSG(MSG_TUR_DOWN);
>> return PATH_DOWN;
>> }
>> @@ -213,6 +208,35 @@ retry:
>> return PATH_UP;
>> }
>>
>> +static int
>> +keep_alive_check(int fd, unsigned int timeout,
>> + void (*copy_message)(void *, const char *), void *cb_arg)
>> +{
>> + int err;
>> +
>> + err = nvme_keep_alive(fd, timeout);
>> + if (err == 0) {
>> + TUR_MSG(MSG_TUR_UP);
>> + return PATH_UP;
>> + }
>> +
>> + TUR_MSG(MSG_TUR_DOWN);
>> + return PATH_DOWN;
>> +}
>> +
>> +static int
>> +ping_check(int fd, char *dev, unsigned int timeout,
>> + void (*copy_message)(void *, const char *), void *cb_arg)
>> +{
>> + if (!strncmp(dev, "nvme", 4))
>> + {
>> + return keep_alive_check(fd, timeout, copy_message, cb_arg);
>> + }
>> + else
>> + {
>> + return tur_check(fd, timeout, copy_message, cb_arg);
>> + }
>> +}
>> #define tur_thread_cleanup_push(ct)
>> pthread_cleanup_push(cleanup_func, ct)
>> #define tur_thread_cleanup_pop(ct) pthread_cleanup_pop(1)
>>
>> @@ -266,8 +290,7 @@ static void *tur_thread(void *ctx)
>> ct->state = PATH_PENDING;
>> ct->message[0] = '\0';
>> pthread_mutex_unlock(&ct->lock);
>> -
>> - state = tur_check(ct->fd, ct->timeout, copy_msg_to_tcc, ct-
>>> message);
>> + state = ping_check(ct->fd, ct->dev, ct->timeout,
>> copy_msg_to_tcc, ct->message);
>> pthread_testcancel();
>>
>> /* TUR checker done */
>> @@ -390,6 +413,7 @@ int libcheck_check(struct checker * c)
>> /* Start new TUR checker */
>> ct->state = PATH_UNCHECKED;
>> ct->fd = c->fd;
>> + strncpy(ct->dev, c->dev, strlen(c->dev)+1);
>
> You seem to have some whitespace issues.
Thanks, it will be fixed.
>
>> ct->timeout = c->timeout;
>> pthread_spin_lock(&ct->hldr_lock);
>> ct->holders++;
>> @@ -406,7 +430,7 @@ int libcheck_check(struct checker * c)
>> ct->thread = 0;
>> condlog(3, "%s: failed to start tur thread,
>> using"
>> " sync mode", tur_devt(devt,
>> sizeof(devt), ct));
>> - return tur_check(c->fd, c->timeout,
>> + return ping_check(c->fd, c->dev, c->timeout,
>> copy_msg_to_checker, c);
>> }
>> tur_timeout(&tsp);
>> diff --git a/libmultipath/discovery.c b/libmultipath/discovery.c
>> index 663c8ea..bae5d24 100644
>> --- a/libmultipath/discovery.c
>> +++ b/libmultipath/discovery.c
>> @@ -1539,6 +1539,7 @@ get_state (struct path * pp, struct config
>> *conf, int daemon)
>> return PATH_UNCHECKED;
>> }
>> checker_set_fd(c, pp->fd);
>> + checker_set_dev(c, pp->dev);
>> if (checker_init(c, pp->mpp?&pp->mpp-
>>> mpcontext:NULL)) {
>> memset(c, 0x0, sizeof(struct checker));
>> condlog(3, "%s: checker init failed", pp-
>>> dev);
>> diff --git a/libmultipath/libnvme.c b/libmultipath/libnvme.c
>> new file mode 100644
>> index 0000000..97c9125
>> --- /dev/null
>> +++ b/libmultipath/libnvme.c
>> @@ -0,0 +1,130 @@
>> +/*
>> + * (C) Copyright HUAWEI Technology Corp. 2017, All Rights Reserved.
>> + *
>> + * libnvme.c
>> + *
>> + * Some code borrowed from NVM-Express command line utility.
>> + *
>> + * Author(s): Yang Feng <philip.yang@huawei.com>
>> + *
>> + * This file is released under the GPL version 2, or any later
>> version.
>> + *
>> + */
>> +#include <linux/types.h>
>> +#include <sys/ioctl.h>
>> +#include <stdint.h>
>> +
>> +struct nvme_user_io {
>> + __u8 opcode;
>> + __u8 flags;
>> + __u16 control;
>> + __u16 nblocks;
>> + __u16 rsvd;
>> + __u64 metadata;
>> + __u64 addr;
>> + __u64 slba;
>> + __u32 dsmgmt;
>> + __u32 reftag;
>> + __u16 apptag;
>> + __u16 appmask;
>> +};
>
> Please simply include linux/nvme_ioctl.h. Makefiles should check if
> that file exists and compile the nvme stuff conditionally.
>
OK, it will be added.
>> +
>> +struct nvme_admin_cmd {
>> + __u8 opcode;
>> + __u8 flags;
>> + __u16 rsvd1;
>> + __u32 nsid;
>> + __u32 cdw2;
>> + __u32 cdw3;
>> + __u64 metadata;
>> + __u64 addr;
>> + __u32 metadata_len;
>> + __u32 data_len;
>> + __u32 cdw10;
>> + __u32 cdw11;
>> + __u32 cdw12;
>> + __u32 cdw13;
>> + __u32 cdw14;
>> + __u32 cdw15;
>> + __u32 timeout_ms;
>> + __u32 result;
>> +};
>> +
>> +#define NVME_IOCTL_ADMIN_CMD _IOWR('N', 0x41, struct
>> nvme_admin_cmd)
>> +#define NVME_IOCTL_SUBMIT_IO _IOW('N', 0x42, struct nvme_user_io)
>> +
>> +static int nvme_io(int fd, __u8 opcode, __u64 slba, __u16 nblocks,
>> __u16 control,
>> + __u32 dsmgmt, __u32 reftag, __u16 apptag, __u16 appmask,
>> void *data,
>> + void *metadata)
>> +{
>> + struct nvme_user_io io = {
>> + .opcode = opcode,
>> + .flags = 0,
>> + .control = control,
>> + .nblocks = nblocks,
>> + .rsvd = 0,
>> + .metadata = (__u64)(uintptr_t) metadata,
>> + .addr = (__u64)(uintptr_t) data,
>> + .slba = slba,
>> + .dsmgmt = dsmgmt,
>> + .reftag = reftag,
>> + .appmask = apptag,
>> + .apptag = appmask,
>> + };
>> +
>> + return ioctl(fd, NVME_IOCTL_SUBMIT_IO, &io);
>> +}
>> +
>> +int nvme_read(int fd, __u64 slba, __u16 nblocks, __u16 control,
>> __u32 dsmgmt,
>> + __u32 reftag, __u16 apptag, __u16 appmask, void *data,
>> void *metadata)
>> +{
>> + return nvme_io(fd, 0x2, slba, nblocks, control, dsmgmt,
>> + reftag, apptag, appmask, data, metadata);
>> +}
>
> This API is bloated. Out of these 10(!) function arguments, 6 are zero
> in the only call you have.
>
Thanks, this will be optimized.
>> +
>> +static int nvme_submit_passthru(int fd, int ioctl_cmd, struct
>> nvme_admin_cmd *cmd)
>> +{
>> + return ioctl(fd, ioctl_cmd, cmd);
>> +}
>> +
>> +int nvme_passthru(int fd, int ioctl_cmd, __u8 opcode, __u8 flags,
>> __u16 rsvd,
>> + __u32 nsid, __u32 cdw2, __u32 cdw3, __u32 cdw10,
>> __u32 cdw11,
>> + __u32 cdw12, __u32 cdw13, __u32 cdw14, __u32
>> cdw15,
>> + __u32 data_len, void *data, __u32 metadata_len,
>> + void *metadata, __u32 timeout_ms, __u32 *result)
>
> This is even worse. This function takes 20(!!!) parameters, and 15 of
> them are 0 in the call. Do you see any other use case of
> nvme_admin_cmd() in multipath-tools that would justify the extra
> parameters?
>
Thanks, this will be optimized.
>> +{
>> + struct nvme_admin_cmd cmd = {
>> + .opcode = opcode,
>> + .flags = flags,
>> + .rsvd1 = rsvd,
>> + .nsid = nsid,
>> + .cdw2 = cdw2,
>> + .cdw3 = cdw3,
>> + .metadata = (__u64)(uintptr_t) metadata,
>> + .addr = (__u64)(uintptr_t) data,
>> + .metadata_len = metadata_len,
>> + .data_len = data_len,
>> + .cdw10 = cdw10,
>> + .cdw11 = cdw11,
>> + .cdw12 = cdw12,
>> + .cdw13 = cdw13,
>> + .cdw14 = cdw14,
>> + .cdw15 = cdw15,
>> + .timeout_ms = timeout_ms,
>> + .result = 0,
>> + };
>> + int err;
>> +
>> + err = nvme_submit_passthru(fd, ioctl_cmd, &cmd);
>> + if (!err && result)
>> + *result = cmd.result;
>> + return err;
>> +}
>> +
>> +int nvme_keep_alive(int fd, __u32 timeout_ms)
>> +{
>> + __u32 result;
>> +
>> + return nvme_passthru(fd, NVME_IOCTL_ADMIN_CMD, 0x18, 0, 0, 0, 0,
>> 0, 0, 0,
>> + 0, 0, 0, 0, 0, 0, 0,0 , timeout_ms,
>> &result);
>> +}
>
> Honestly, why do you define the API at all? It would be more readable
> to call nvme_submit_passthru() directly.
>
Thanks, the func will be renamed.
>> diff --git a/libmultipath/libnvme.h b/libmultipath/libnvme.h
>> new file mode 100644
>> index 0000000..a2b5460
>> --- /dev/null
>> +++ b/libmultipath/libnvme.h
>> @@ -0,0 +1,10 @@
>> +#ifndef _LIBNVME_H
>> +#define _LIBNVME_H
>> +
>> +#include <linux/types.h>
>> +
>> +int nvme_read(int fd, __u64 slba, __u16 nblocks, __u16 control,
>> __u32 dsmgmt,
>> + __u32 reftag, __u16 apptag, __u16 appmask, void *data,
>> void *metadata);
>> +int nvme_keep_alive(int fd, __u32 timeout_ms);
>> +
>> +#endif /* _LIBNVME_H */
>> diff --git a/libmultipath/libsg.c b/libmultipath/libsg.c
>> new file mode 100644
>
> see above
>
> diff --git a/libmultipath/prioritizers/path_latency.c
>> b/libmultipath/prioritizers/path_latency.c
>> index 34b734b..21209ff 100644
>> --- a/libmultipath/prioritizers/path_latency.c
>> +++ b/libmultipath/prioritizers/path_latency.c
>> @@ -23,11 +23,11 @@
>> #include <math.h>
>> #include <ctype.h>
>> #include <time.h>
>> -
>> #include "debug.h"
>> #include "prio.h"
>> #include "structs.h"
>> -#include "../checkers/libsg.h"
>> +#include "libsg.h"
>> +#include "libnvme.h"
>>
>> #define pp_pl_log(prio, fmt, args...) condlog(prio, "path_latency
>> prio: " fmt, ##args)
>>
>> @@ -44,6 +44,8 @@
>>
>> #define MAX_CHAR_SIZE 30
>>
>> +#define SENSE_BUFF_LEN 32
>> +
>> #define USEC_PER_SEC 1000000LL
>> #define NSEC_PER_USEC 1000LL
>>
>> @@ -51,19 +53,27 @@ static long long path_latency[MAX_IO_NUM];
>>
>> static inline long long timeval_to_us(const struct timespec *tv)
>> {
>> - return ((long long) tv->tv_sec * USEC_PER_SEC) + (tv-
>>> tv_nsec / NSEC_PER_USEC);
>> + return ((long long) tv->tv_sec * USEC_PER_SEC) + (tv->tv_nsec /
>> NSEC_PER_USEC);
>> }
>>
>> -static int do_readsector0(int fd, unsigned int timeout)
>> +static int do_readsector0(struct path *pp, unsigned int timeout)
>> {
>> - unsigned char buf[4096];
>> - unsigned char sbuf[SENSE_BUFF_LEN];
>> - int ret;
>> + unsigned char buf[4096];
>> + unsigned char mbuf[512];
>> + unsigned char sbuf[SENSE_BUFF_LEN];
>>
>> - ret = sg_read(fd, &buf[0], 4096, &sbuf[0],
>> - SENSE_BUFF_LEN, timeout);
>> + if (!strncmp(pp->dev, "nvme", 4))
>> + {
>> + if (nvme_read(pp->fd, 0, 1, 0, 0, 0, 0, 0, buf, mbuf) != 0)
>> + return 0;
>> + }
>> + else
>> + {
>> + if (sg_read(pp->fd, &buf[0], 4096, &sbuf[0], SENSE_BUFF_LEN,
>> timeout) == 2)
>> + return 0;
>> + }
>>
>> - return ret;
>> + return 1;
>> }
>>
>> int check_args_valid(int io_num, int base_num)
>> @@ -218,9 +228,9 @@ int getprio (struct path *pp, char *args,
>> unsigned int timeout)
>> (void)clock_gettime(CLOCK_MONOTONIC, &tv);
>> before = timeval_to_us(&tv);
>>
>> - if (do_readsector0(pp->fd, timeout) == 2)
>> + if (do_readsector0(pp, timeout) == 0)
>> {
>> - pp_pl_log(0, "%s: path down", pp->dev);
>> + pp_pl_log(0, "%s: send read sector0 command fail", pp-
>>> dev);
>> return -1;
>> }
>>
>> diff --git a/multipath/multipath.conf.5 b/multipath/multipath.conf.5
>> index 0049cba..a8d4797 100644
>> --- a/multipath/multipath.conf.5
>> +++ b/multipath/multipath.conf.5
>> @@ -419,7 +419,7 @@ are:
>> deprecated, please use \fItur\fR instead.
>> .TP
>> .I tur
>> -Issue a \fITEST UNIT READY\fR command to the device.
>> +Issue a \fITEST UNIT READY\fR command or a \fIKEEP ALIVE\fR command
>> to the device.
>> .TP
>> .I emc_clariion
>> (Hardware-dependent)
>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 2/2] multipath-tools/libmultipath: Add args min_avg_latency for path_latency.
2017-07-20 18:07 ` Martin Wilck
@ 2017-07-21 3:38 ` Yang Feng
2017-07-21 10:27 ` Martin Wilck
` (2 more replies)
0 siblings, 3 replies; 13+ messages in thread
From: Yang Feng @ 2017-07-21 3:38 UTC (permalink / raw)
To: Martin Wilck, dm-devel, xose.vazquez
Cc: zouming.zouming, hege09, guanjunxiong, shenhong09
Dear Martin,
Thank you very much for your guidance and reviews.
Please find my replys as follows.
The up-to-date patch will be sent later.
Regards,
-Yang
On 2017/7/21 2:07, Martin Wilck wrote:
> Dear Yang,
>
> On Thu, 2017-07-20 at 11:36 +0800, Yang Feng wrote:
>> Add args min_avg_latency of logarithmic scale, for
>> prioritizers/path_latency.c.
>> Min average latency is not constant 1us, and is set by user.
>> Certainly, max average
>> latency value is still 100s. It make support better for more scenes,
>> because it can
>> deal better with the normal standard deviation of path latency. For
>> example, when the
>> standard deviation value is 200us and the average latency of the
>> normal paths is 1ms,
>> args base_num can be set to 5 and args min_avg_latency can be set to
>> 2ms, so the paths
>> will be grouped in priority groups with path latency <=2ms, (2ms,
>> 10ms], (10ms, 50ms],
>> etc.
>
> Nack. You need this because you are using a wrong calculation for
> standard deviation. If your scale is logarithmic, you need to calculate
> the standard deviation on a log scale, too. The scientific term is
> "geometric standard deviation".
>
> https://en.wikipedia.org/wiki/Geometric_standard_deviation
>
> Suppose you really have 3 classes of devices with us, ms, and seconds
> average latency, respectively. The latency of the devices in the
> "seconds" class will also vary on the order of seconds (e.g. 3.5-5s).
> That's obviously impossible for the us and ms class devices. Rather,
> it's reasonable to assume that the us devices will have us (or sub-us)
> standard deviation and the ms devices have ms (or sub-ms) standard
> deviation, or in other words, the uncertainty is roughly in the same
> order of magnitude as the average.
>> This is exactly how the geometric standard deviation behaves.
>
> I think I actually remarked this on your previous patch, but the patch
> has been applied without that having been fixed. It would be good if
> you could fix it now.
>
Sorry, there is a mistake. I will fix it later.
>>
>> Signed-off-by: Yang Feng <philip.yang@huawei.com>
>> Reviewed-by: Martin Wilck <mwilck@suse.com>
>
> Please don't add my Reviewed-by: tag for patches I haven't reviewed, or
> have negatively reviewed. Reviewed-by: is supposed to indicate
> approval.
>
Sorry, it will fixed.
> Wrt your prio_args parser, could you perhaps support a syntax that is
> similar to other prioritizers, e.g. "base_num=5 io_num=10"?
>
Sorry, I don't find this similar syntax. Could you give a example of the
other prioritizer.
> Regards
> Martin
>
>
>> Reviewed-by: Xose Vazquez Perez <xose.vazquez@gmail.com>
>> ---
>> libmultipath/prioritizers/path_latency.c | 61
>> ++++++++++++++++++++++----------
>> multipath/multipath.conf.5 | 14 +++++---
>> 2 files changed, 52 insertions(+), 23 deletions(-)
>>
>> diff --git a/libmultipath/prioritizers/path_latency.c
>> b/libmultipath/prioritizers/path_latency.c
>> index 34b734b..a71faff 100644
>> --- a/libmultipath/prioritizers/path_latency.c
>> +++ b/libmultipath/prioritizers/path_latency.c
>> @@ -66,7 +66,7 @@ static int do_readsector0(int fd, unsigned int
>> timeout)
>> return ret;
>> }
>>
>> -int check_args_valid(int io_num, int base_num)
>> +int check_args_valid(int io_num, int base_num, int min_avg_latency)
>> {
>> if ((io_num < MIN_IO_NUM) || (io_num > MAX_IO_NUM))
>> {
>> @@ -80,24 +80,33 @@ int check_args_valid(int io_num, int base_num)
>> return 0;
>> }
>>
>> + if ((min_avg_latency < MIN_AVG_LATENCY) || (min_avg_latency >
>> MAX_AVG_LATENCY))
>> + {
>> + pp_pl_log(0, "args min_avg_latency is outside the valid
>> range");
>> + return 0;
>> + }
>> +
>> return 1;
>> }
>>
>> -/* In multipath.conf, args form: io_num|base_num. For example,
>> -* args is "20|10", this function can get io_num value 20, and
>> - base_num value 10.
>> +/* In multipath.conf, args form: io_num|base_num|min_avg_latency.
>> +* For example, args is "20|10|1", this function can get io_num
>> +* value 20, base_num value 10, min_avg_latency value 1 (us).
>> */
>> -static int get_ionum_and_basenum(char *args,
>> - int *ionum,
>> - int *basenum)
>> +static int get_prio_args(char *args,
>> + int *ionum,
>> + int *basenum,
>> + int *minavglatency)
>> {
>> char source[MAX_CHAR_SIZE];
>> char vertica = '|';
>> - char *endstrbefore = NULL;
>> - char *endstrafter = NULL;
>> + char *endstr1 = NULL;
>> + char *endstr2 = NULL;
>> + char *endstr3 = NULL;
>> unsigned int size = strlen(args);
>>
>> - if ((args == NULL) || (ionum == NULL) || (basenum == NULL))
>> + if ((args == NULL) || (ionum == NULL)
>> + || (basenum == NULL) || (minavglatency == NULL))
>> {
>> pp_pl_log(0, "args string is NULL");
>> return 0;
>> @@ -117,21 +126,34 @@ static int get_ionum_and_basenum(char *args,
>> return 0;
>> }
>>
>> - *ionum = (int)strtoul(source, &endstrbefore, 10);
>> - if (endstrbefore[0] != vertica)
>> + *ionum = (int)strtoul(source, &endstr1, 10);
>> + if (endstr1[0] != vertica)
>> + {
>> + pp_pl_log(0, "invalid prio_args format: %s", source);
>> + return 0;
>> + }
>> +
>> + if (!isdigit(endstr1[1]))
>> + {
>> + pp_pl_log(0, "invalid prio_args format: %s", source);
>> + return 0;
>> + }
>> +
>> + *basenum = (long long)strtol(&endstr1[1], &endstr2, 10);
>> + if (endstr2[0] != vertica)
>> {
>> pp_pl_log(0, "invalid prio_args format: %s", source);
>> return 0;
>> }
>>
>> - if (!isdigit(endstrbefore[1]))
>> + if (!isdigit(endstr2[1]))
>> {
>> pp_pl_log(0, "invalid prio_args format: %s", source);
>> return 0;
>> }
>>
>> - *basenum = (long long)strtol(&endstrbefore[1], &endstrafter,
>> 10);
>> - if (check_args_valid(*ionum, *basenum) == 0)
>> + *minavglatency = (long long)strtol(&endstr2[1], &endstr3, 10);
>> + if (check_args_valid(*ionum, *basenum, *minavglatency) == 0)
>> {
>> return 0;
>> }
>> @@ -194,6 +216,7 @@ int getprio (struct path *pp, char *args,
>> unsigned int timeout)
>> int index = 0;
>> int io_num;
>> int base_num;
>> + int min_avg_latency;
>> long long avglatency;
>> long long latency_interval;
>> long long standard_deviation;
>> @@ -204,7 +227,7 @@ int getprio (struct path *pp, char *args,
>> unsigned int timeout)
>> if (pp->fd < 0)
>> return -1;
>>
>> - if (get_ionum_and_basenum(args, &io_num, &base_num) == 0)
>> + if (get_prio_args(args, &io_num, &base_num, &min_avg_latency) ==
>> 0)
>> {
>> pp_pl_log(0, "%s: get path_latency args fail", pp->dev);
>> return DEFAULT_PRIORITY;
>> @@ -245,13 +268,13 @@ int getprio (struct path *pp, char *args,
>> unsigned int timeout)
>> set can change latency_interval value corresponding to
>> avglatency and is not constant.
>> Warn the user if latency_interval is smaller than (2 *
>> standard_deviation), or equal */
>> standard_deviation = calc_standard_deviation(path_latency,
>> index, avglatency);
>> - latency_interval = calc_latency_interval(avglatency,
>> MAX_AVG_LATENCY, MIN_AVG_LATENCY, base_num);
>> - if ((latency_interval!= 0)
>> + latency_interval = calc_latency_interval(avglatency,
>> MAX_AVG_LATENCY, min_avg_latency, base_num);
>> + if ((latency_interval != 0)
>> && (latency_interval <= (2 * standard_deviation)))
>> pp_pl_log(3, "%s: latency interval (%lld) according to
>> average latency (%lld us) is smaller than "
>> "2 * standard deviation (%lld us), or equal, args
>> base_num (%d) needs to be set bigger value",
>> pp->dev, latency_interval, avglatency,
>> standard_deviation, base_num);
>>
>> - rc = calcPrio(avglatency, MAX_AVG_LATENCY, MIN_AVG_LATENCY,
>> base_num);
>> + rc = calcPrio(avglatency, MAX_AVG_LATENCY, min_avg_latency,
>> base_num);
>> return rc;
>> }
>> diff --git a/multipath/multipath.conf.5 b/multipath/multipath.conf.5
>> index 0049cba..e68e681 100644
>> --- a/multipath/multipath.conf.5
>> +++ b/multipath/multipath.conf.5
>> @@ -341,7 +341,7 @@ these values can be looked up through sysfs or by
>> running \fImultipathd show pat
>> .TP 12
>> .I path_latency
>> Needs a value of the form
>> -\fI"<io_num>|<base_num>"\fR
>> +\fI"<io_num>|<base_num>|<min_avg_latency>"\fR
>> .RS
>> .TP 8
>> .I io_num
>> @@ -350,9 +350,15 @@ Valid Values: Integer, [2, 200].
>> .TP
>> .I base_num
>> The base number value of logarithmic scale, used to partition
>> different priority ranks. Valid Values: Integer,
>> -[2, 10]. And Max average latency value is 100s, min average latency
>> value is 1us.
>> -For example: If base_num=10, the paths will be grouped in priority
>> groups with path latency <=1us, (1us, 10us],
>> -(10us, 100us], (100us, 1ms], (1ms, 10ms], (10ms, 100ms], (100ms,
>> 1s], (1s, 10s], (10s, 100s], >100s.
>> +[2, 10]. And Max average latency value is constant 100s.
>> +For example: If base_num=10 and min_avg_latency=1, the paths will be
>> grouped in priority groups with path latency <=1us,
>> +(1us, 10us], (10us, 100us], (100us, 1ms], (1ms, 10ms], (10ms,
>> 100ms], (100ms, 1s], (1s, 10s], (10s, 100s], >100s.
>> +.TP
>> +.I min_avg_latency
>> +The min average latency value of logarithmic scale, used to
>> partition different priority ranks. Valid Values:
>> +Integer, [1, 10000000] (us). And Max average latency value is
>> constant 100s.
>> +For example: If base_num=10 and min_avg_latency=1000, the paths will
>> be grouped in priority groups with path latency <=1ms,
>> +(1ms, 10ms], (10ms, 100ms], (100ms, 1s], (1s, 10s], (10s, 100s],
>>> 100s.
>> .RE
>> .TP 12
>> .I alua
>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 1/2] multipath-tools/libmultipath: Support for the native NVMe Ioctl command.
2017-07-21 3:30 ` Yang Feng
@ 2017-07-21 10:23 ` Martin Wilck
0 siblings, 0 replies; 13+ messages in thread
From: Martin Wilck @ 2017-07-21 10:23 UTC (permalink / raw)
To: Yang Feng, dm-devel, xose.vazquez
Cc: zouming.zouming, hege09, guanjunxiong, shenhong09
On Fri, 2017-07-21 at 11:30 +0800, Yang Feng wrote:
> Dear Martin,
>
> Thanks a lot for your reviews.
> Please find my replys as follows.
> And the up-to-date patch will be sent later.
Thank you. Note that I'll be out of office for the next two weeks.
Martin
--
Dr. Martin Wilck <mwilck@suse.com>, Tel. +49 (0)911 74053 2107
SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton
HRB 21284 (AG Nürnberg)
--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 2/2] multipath-tools/libmultipath: Add args min_avg_latency for path_latency.
2017-07-21 3:38 ` Yang Feng
@ 2017-07-21 10:27 ` Martin Wilck
2017-07-21 12:22 ` Martin Wilck
2017-07-21 12:26 ` Martin Wilck
2 siblings, 0 replies; 13+ messages in thread
From: Martin Wilck @ 2017-07-21 10:27 UTC (permalink / raw)
To: Yang Feng, dm-devel, xose.vazquez
Cc: zouming.zouming, hege09, guanjunxiong, shenhong09
Dear Yang,
On Fri, 2017-07-21 at 11:38 +0800, Yang Feng wrote:
>
> Sorry, I don't find this similar syntax. Could you give a example of
> the
> other prioritizer.
Please look at the prio_args section in multipath.conf. "weighted" uses
just space separated args. "iet" uses "parm=value" syntax. That's
actually most intuitive IMO. No current prio algorithm uses anything
but space to separate options.
Regards
Martin
--
Dr. Martin Wilck <mwilck@suse.com>, Tel. +49 (0)911 74053 2107
SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton
HRB 21284 (AG Nürnberg)
--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 2/2] multipath-tools/libmultipath: Add args min_avg_latency for path_latency.
2017-07-21 3:38 ` Yang Feng
2017-07-21 10:27 ` Martin Wilck
@ 2017-07-21 12:22 ` Martin Wilck
2017-07-21 12:26 ` Martin Wilck
2 siblings, 0 replies; 13+ messages in thread
From: Martin Wilck @ 2017-07-21 12:22 UTC (permalink / raw)
To: Yang Feng, dm-devel, xose.vazquez
Cc: zouming.zouming, hege09, guanjunxiong, shenhong09
Dear Yang,
On Fri, 2017-07-21 at 11:38 +0800, Yang Feng wrote:
>
> Sorry, I don't find this similar syntax. Could you give a example of
> the
> other prioritizer.
Please look at the prio_args section in multipath.conf. "weighted" uses
just space separated args. "iet" uses "parm=value" syntax. That's
actually most intuitive IMO. No current prio algorithm uses anything
but space to separate options.
Regards
Martin
--
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 2/2] multipath-tools/libmultipath: Add args min_avg_latency for path_latency.
2017-07-21 3:38 ` Yang Feng
2017-07-21 10:27 ` Martin Wilck
2017-07-21 12:22 ` Martin Wilck
@ 2017-07-21 12:26 ` Martin Wilck
2017-07-24 1:45 ` Yang Feng
2 siblings, 1 reply; 13+ messages in thread
From: Martin Wilck @ 2017-07-21 12:26 UTC (permalink / raw)
To: Yang Feng, dm-devel, xose.vazquez
Cc: zouming.zouming, hege09, guanjunxiong, shenhong09
Dear Yang,
On Fri, 2017-07-21 at 11:38 +0800, Yang Feng wrote:
>
> Sorry, I don't find this similar syntax. Could you give a example of
> the
> other prioritizer.
Please look at the prio_args section in multipath.conf. "weighted" uses
just space separated args. "iet" uses "parm=value" syntax. That's
actually most intuitive IMO. No current prio algorithm uses anything
but space to separate options.
Regards
Martin
--
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 2/2] multipath-tools/libmultipath: Add args min_avg_latency for path_latency.
2017-07-21 12:26 ` Martin Wilck
@ 2017-07-24 1:45 ` Yang Feng
0 siblings, 0 replies; 13+ messages in thread
From: Yang Feng @ 2017-07-24 1:45 UTC (permalink / raw)
To: Martin Wilck, dm-devel, xose.vazquez
Cc: zouming.zouming, hege09, guanjunxiong, shenhong09
Dear Martin,
Thanks, I got it.
Regards,
-Yang
On 2017/7/21 20:26, Martin Wilck wrote:
> Dear Yang,
>
> On Fri, 2017-07-21 at 11:38 +0800, Yang Feng wrote:
>>
>> Sorry, I don't find this similar syntax. Could you give a example of
>> the
>> other prioritizer.
>
> Please look at the prio_args section in multipath.conf. "weighted" uses
> just space separated args. "iet" uses "parm=value" syntax. That's
> actually most intuitive IMO. No current prio algorithm uses anything
> but space to separate options.
>
> Regards
> Martin
>
^ permalink raw reply [flat|nested] 13+ messages in thread
end of thread, other threads:[~2017-07-24 1:45 UTC | newest]
Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-07-20 3:36 [PATCH 0/2] multipath-tools/libmultipath: Support for the native NVMe Ioctl command and add args min_avg_latency for path_latency Yang Feng
2017-07-20 3:36 ` [PATCH 1/2] multipath-tools/libmultipath: Support for the native NVMe Ioctl command Yang Feng
2017-07-20 17:50 ` Martin Wilck
2017-07-21 3:30 ` Yang Feng
2017-07-21 10:23 ` Martin Wilck
2017-07-20 3:36 ` [PATCH 2/2] multipath-tools/libmultipath: Add args min_avg_latency for path_latency Yang Feng
2017-07-20 18:07 ` Martin Wilck
2017-07-21 3:38 ` Yang Feng
2017-07-21 10:27 ` Martin Wilck
2017-07-21 12:22 ` Martin Wilck
2017-07-21 12:26 ` Martin Wilck
2017-07-24 1:45 ` Yang Feng
-- strict thread matches above, loose matches on Subject: below --
2017-07-13 7:51 [PATCH 0/2] multipath-tools/libmultipath: Support for the native NVMe Ioctl command and add " Yang Feng
2017-07-13 7:51 ` [PATCH 2/2] multipath-tools/libmultipath: Add " Yang Feng
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.