From: Bart Van Assche <bvanassche-HInyCGIudOg@public.gmane.org>
To: Jinpu Wang <jinpu.wang-EIkl63zCoXaH+58JC4qpiA@public.gmane.org>
Cc: "linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org"
<linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org>
Subject: Re: RFC: FMR support in SRP
Date: Fri, 19 Sep 2014 16:04:59 +0200 [thread overview]
Message-ID: <541C380B.1000803@acm.org> (raw)
In-Reply-To: <CAMGffEkN4OmJVi6UV7PtRYq5q3XZfJVTEaJFqvypx0W8Aq8cRA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
[-- Attachment #1: Type: text/plain, Size: 1669 bytes --]
On 09/19/14 15:11, Jinpu Wang wrote:
> During go through SRP FMR support, I found ib_srp pre-alloc
> fmr_list/map_page in each request,
> fmr_list are alloced as many as target->cmd_sg_cnt
>
> I add some debug message when run fio test with different settings.
> Result show, state.ndesc and state.nmdesc is 1, state.npages is 0,
> after srp_map_sg,
> my question is : do we really need as many cmd_sg_cnt fmr_list, or I
> miss something, ndesc and nmdesc could be bigger than 1?
Hello Jack,
The limitations for FMR / FR memory registration are more restrictive
than those imposed by the SCSI core on S/G-list layout. A few examples:
* Memory registered via FMR must be aligned on an FMR page boundary.
* Memory registered via a single FMR / FR registration must be a
contiguous virtual memory region.
* The maximum size for a memory region registered via FMR or FR
(dev->mr_max_size) can be below the maximum size of an S/G-list that can
be passed by the SCSI core.
Hence the need for multiple memory descriptors. In case you are
wondering how I tested memory registration involving multiple memory
descriptors: I wrote a test program that causes the SCSI core to send an
S/G-list to the SRP initiator that consists of 128 elements with four
bytes of data. None of these elements are aligned on a page boundary and
no two S/G-list elements are contiguous in virtual memory. This test
program causes the SRP initiator to allocate 128 memory descriptors.
Please note that I/O requests submitted by the attached test program
will only be accepted by the SRP initiator if the cmd_sg_entries kernel
module parameter has been set to a value >= 128.
Bart.
[-- Attachment #2: discontiguous-io.cpp --]
[-- Type: text/x-c++src, Size: 7443 bytes --]
#include <cassert>
#include <cstring> // memset()
#include <fcntl.h> // O_RDONLY
#include <iomanip>
#include <iostream>
#include <linux/fs.h> // BLKSSZGET
#include <scsi/sg.h> // sg_io_hdr_t
#include <sys/ioctl.h>
#include <unistd.h> // open()
#include <vector>
class file_descriptor {
public:
file_descriptor(int fd = -1)
: m_fd(fd)
{ }
~file_descriptor()
{ if (m_fd >= 0) close(m_fd); }
operator int() const
{ return m_fd; }
private:
file_descriptor(const file_descriptor &);
file_descriptor &operator=(const file_descriptor &);
int m_fd;
};
class iovec_t {
public:
iovec_t()
{ }
~iovec_t()
{ }
size_t size() const
{ return m_v.size(); }
const sg_iovec_t& operator[](const int i) const
{ return m_v[i]; }
sg_iovec_t& operator[](const int i)
{ return m_v[i]; }
void append(void *addr, size_t len) {
m_v.resize(m_v.size() + 1);
decltype(m_v)::iterator p = m_v.end() - 1;
p->iov_base = addr;
p->iov_len = len;
}
const void *address() const {
return &*m_v.begin();
}
size_t data_len() const {
size_t len = 0;
for (decltype(m_v)::const_iterator p = m_v.begin(); p != m_v.end(); ++p)
len += p->iov_len;
return len;
}
void trunc(size_t len) {
size_t s = 0;
for (decltype(m_v)::iterator p = m_v.begin(); p != m_v.end(); ++p) {
s += p->iov_len;
if (s >= len) {
p->iov_len -= s - len;
assert(p->iov_len > 0 || (p->iov_len == 0 && len == 0));
m_v.resize(p - m_v.begin() + 1);
break;
}
}
}
std::ostream& write(std::ostream& os) const {
for (decltype(m_v)::const_iterator p = m_v.begin(); p != m_v.end(); ++p)
os.write((const char *)p->iov_base, p->iov_len);
return os;
}
private:
iovec_t(const iovec_t &);
iovec_t &operator=(const iovec_t &);
std::vector<sg_iovec_t> m_v;
};
static unsigned block_size;
static void dumphex(std::ostream &os, const void *a, size_t len)
{
for (int i = 0; i < len; i += 16) {
os << std::hex << std::setfill('0') << std::setw(16)
<< (uintptr_t)a + i << ':';
for (int j = i; j < i + 16 && j < len; j++) {
if (j % 4 == 0)
os << ' ';
os << std::hex << std::setfill('0') << std::setw(2)
<< (unsigned)((uint8_t*)a)[j];
}
os << " ";
for (int j = i; j < i + 16 && j < len; j++) {
unsigned char c = ((uint8_t*)a)[j];
os << (c >= ' ' && c < 128 ? (char)c : '.');
}
os << '\n';
}
}
enum {
MAX_READ_WRITE_6_LBA = 0x1fffff,
MAX_READ_WRITE_6_LENGTH = 0xff,
};
static ssize_t sg_read(const file_descriptor &fd, uint32_t lba, const iovec_t &v)
{
if (lba > MAX_READ_WRITE_6_LBA)
return -1;
if (v.data_len() == 0 || (v.data_len() % block_size) != 0)
return -1;
if (v.data_len() / block_size > MAX_READ_WRITE_6_LENGTH)
return -1;
int sg_version;
if (ioctl(fd, SG_GET_VERSION_NUM, &sg_version) < 0 || sg_version < 30000)
return -1;
uint8_t read6[6] = { 0x08, (uint8_t)(lba >> 16), (uint8_t)(lba >> 8),
(uint8_t)(lba), (uint8_t)(v.data_len() / block_size),
0 };
unsigned char sense_buffer[32];
sg_io_hdr_t h = { .interface_id = 'S' };
h.cmdp = read6;
h.cmd_len = sizeof(read6);
h.dxfer_direction = SG_DXFER_FROM_DEV;
h.iovec_count = v.size();
h.dxfer_len = v.data_len();
h.dxferp = const_cast<void*>(v.address());
h.sbp = sense_buffer;
h.mx_sb_len = sizeof(sense_buffer);
h.timeout = 1000; /* 1000 millisecs == 1 second */
if (ioctl(fd, SG_IO, &h) < 0) {
std::cerr << "READ(6) ioctl failed with errno " << errno << '\n';
return -1;
}
if (h.status) {
std::cerr << "READ(6) failed with SCSI status " << (unsigned)h.status
<< "\n";
if (h.status == 2) {
std::cerr << "Sense buffer:\n";
dumphex(std::cerr, sense_buffer, h.sb_len_wr);
}
return -1;
}
return v.data_len() - h.resid;
}
static ssize_t sg_write(const file_descriptor &fd, uint32_t lba, const iovec_t &v)
{
if (lba > MAX_READ_WRITE_6_LBA)
return -1;
if (v.data_len() == 0 || (v.data_len() % block_size) != 0)
return -1;
if (v.data_len() / block_size > MAX_READ_WRITE_6_LENGTH)
return -1;
int sg_version;
if (ioctl(fd, SG_GET_VERSION_NUM, &sg_version) < 0 || sg_version < 30000)
return -1;
uint8_t write6[6] = { 0x0a, (uint8_t)(lba >> 16), (uint8_t)(lba >> 8),
(uint8_t)(lba), (uint8_t)(v.data_len() / block_size),
0 };
unsigned char sense_buffer[32];
sg_io_hdr_t h = { .interface_id = 'S' };
h.cmdp = write6;
h.cmd_len = sizeof(write6);
h.dxfer_direction = SG_DXFER_TO_DEV;
h.iovec_count = v.size();
h.dxfer_len = v.data_len();
h.dxferp = const_cast<void*>(v.address());
h.sbp = sense_buffer;
h.mx_sb_len = sizeof(sense_buffer);
h.timeout = 1000; /* 1000 millisecs == 1 second */
if (ioctl(fd, SG_IO, &h) < 0) {
std::cerr << "WRITE(6) ioctl failed with errno " << errno << '\n';
return -1;
}
if (h.status) {
std::cerr << "WRITE(6) failed with SCSI status " << (unsigned)h.status
<< "\n";
if (h.status == 2) {
std::cerr << "Sense buffer:\n";
dumphex(std::cerr, sense_buffer, h.sb_len_wr);
}
return -1;
}
return v.data_len() - h.resid;
}
static void usage()
{
std::cout <<
"Usage: [-h] [-l <length_in_bytes>] [-o <lba_in_bytes>] [-s] [-w] <dev>\n";
}
int main(int argc, char **argv)
{
bool scattered = false, write = false;
uint32_t offs = 0;
const char *dev;
int c;
std::vector<uint8_t> buf;
size_t len = 512;
while ((c = getopt(argc, argv, "hl:o:sw")) != EOF) {
switch (c) {
case 'l': len = strtoul(optarg, NULL, 0); break;
case 'o': offs = strtoul(optarg, NULL, 0); break;
case 's': scattered = true; break;
case 'w': write = true; break;
default: usage(); goto out;
}
}
if (argc - optind < 1) {
std::cerr << "Too few arguments.\n";
goto out;
}
dev = argv[optind];
buf.resize(len);
{
file_descriptor fd(open(dev, O_RDONLY));
if (fd < 0) {
std::cerr << "Failed to open " << dev << "\n";
goto out;
}
if (ioctl(fd, BLKSSZGET, &block_size) < 0) {
std::cerr << "Failed to query block size of " << dev << "\n";
goto out;
}
if (offs % block_size) {
std::cerr << "LBA is not a multiple of the block size.\n";
goto out;
}
iovec_t iov;
if (scattered) {
buf.resize(buf.size() * 2);
unsigned char *p = &*buf.begin();
for (int i = 0; i < len / 4; i++)
iov.append(p + 4 + i * 8, std::min(4ul, len - i * 4));
} else {
iov.append(&*buf.begin(), buf.size());
}
if (write) {
for (int i = 0; i < iov.size(); i++) {
sg_iovec_t& e = iov[i];
size_t prevgcount = std::cin.gcount();
if (!std::cin.read((char *)e.iov_base, e.iov_len)) {
e.iov_len = std::cin.gcount() - prevgcount;
break;
}
}
ssize_t len = sg_write(fd, offs / block_size, iov);
if (len >= 0)
std::cout << "Wrote " << len << "/" << iov.data_len()
<< " bytes of data.\n";
} else {
ssize_t len = sg_read(fd, offs / block_size, iov);
if (len >= 0) {
std::cerr << "Read " << len << " bytes of data:\n";
iov.trunc(len);
iov.write(std::cout);
}
}
}
out:
return 0;
}
// Local variables:
// c-basic-offset: 4
// c-label-minimum-indentation: 0
// compile-command: "g++ -g -o discontiguous-io -Wall -Wextra -Werror -Wno-sign-compare -Wno-missing-field-initializers --std=c++11 discontiguous-io.cpp"
// End:
next prev parent reply other threads:[~2014-09-19 14:04 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-09-19 13:11 RFC: FMR support in SRP Jinpu Wang
[not found] ` <CAMGffEkN4OmJVi6UV7PtRYq5q3XZfJVTEaJFqvypx0W8Aq8cRA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2014-09-19 14:04 ` Bart Van Assche [this message]
[not found] ` <541C380B.1000803-HInyCGIudOg@public.gmane.org>
2014-09-19 15:08 ` Jinpu Wang
[not found] ` <CAMGffEmjzsYsOTnaPWPr4D7TDCbwcoxM8Lr2kDNWCnBOpP5k-A-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2014-09-19 15:14 ` Bart Van Assche
[not found] ` <541C485B.60302-HInyCGIudOg@public.gmane.org>
2014-09-19 15:35 ` Jinpu Wang
[not found] ` <CAMGffE=AobYJe_r=0He-U=bH6WLSsKi_Bx-S0YeH_68gE96o7A-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2014-09-19 16:04 ` Jinpu Wang
[not found] ` <CAMGffE=BG6JS7C3ZZ_pwUV3AyHDtG8i49RR2PmXZo+ZT18M5XA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2014-09-20 6:24 ` Bart Van Assche
[not found] ` <541D1DBA.7010004-HInyCGIudOg@public.gmane.org>
2014-09-20 8:41 ` Jack Wang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=541C380B.1000803@acm.org \
--to=bvanassche-hinycgiudog@public.gmane.org \
--cc=jinpu.wang-EIkl63zCoXaH+58JC4qpiA@public.gmane.org \
--cc=linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.