From: Douglas Gilbert <dgilbert@interlog.com>
To: linux-scsi@vger.kernel.org
Cc: martin.petersen@oracle.com, jejb@linux.vnet.ibm.com, hare@suse.de
Subject: [PATCH v8 36/38] sg: rework mmap support
Date: Sun, 1 Mar 2020 22:32:47 -0500 [thread overview]
Message-ID: <20200302033249.4515-37-dgilbert@interlog.com> (raw)
In-Reply-To: <20200302033249.4515-1-dgilbert@interlog.com>
Linux has an issue with mmap-ed multiple pages issued by
alloc_pages() [with order > 0]. So when mmap(2) is called if the
reserve request's scatter gather (sgat) list is either:
- not big enough, or
- made up of elements of order > 0 (i.e. size > PAGE_SIZE)
then throw away reserve requests sgat list and rebuild it meeting
those requirements.
Clean up related code and stop doing mmap+indirect_io.
This new mmap implementation is marginally more flexible (but
still compatible with) the production driver. Previously if the
user wanted a larger mmap(2) 'length' than the reserve request's
size, then they needed to use ioctl(SG_SET_RESERVED_SIZE) to set
the new size first. Now mmap(2) called on a sg device node will
adjust to the size given by 'length' [mmap's second parameter].
Tweak some SG_LOG() levels to control the amount of debug
output.
*** Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Douglas Gilbert <dgilbert@interlog.com>
---
drivers/scsi/sg.c | 150 +++++++++++++++++++++++++++-------------------
1 file changed, 87 insertions(+), 63 deletions(-)
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 7f3c0052c135..0d835ae88c35 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -279,7 +279,8 @@ static void sg_dfs_exit(void);
static int sg_start_req(struct sg_request *srp, struct sg_comm_wr_t *cwrp,
int dxfer_dir);
static void sg_finish_scsi_blk_rq(struct sg_request *srp);
-static int sg_mk_sgat(struct sg_request *srp, struct sg_fd *sfp, int minlen);
+static int sg_mk_sgat(struct sg_request *srp, struct sg_fd *sfp, int minlen,
+ bool for_mmap);
static int sg_v3_submit(struct file *filp, struct sg_fd *sfp,
struct sg_io_hdr *hp, bool sync,
struct sg_request **o_srp);
@@ -2234,13 +2235,39 @@ sg_fasync(int fd, struct file *filp, int mode)
return fasync_helper(fd, filp, mode, &sfp->async_qp);
}
+static void
+sg_vma_open(struct vm_area_struct *vma)
+{
+ struct sg_fd *sfp = vma->vm_private_data;
+
+ if (unlikely(!sfp)) {
+ pr_warn("%s: sfp null\n", __func__);
+ return;
+ }
+ kref_get(&sfp->f_ref);
+}
+
+static void
+sg_vma_close(struct vm_area_struct *vma)
+{
+ struct sg_fd *sfp = vma->vm_private_data;
+
+ if (unlikely(!sfp)) {
+ pr_warn("%s: sfp null\n", __func__);
+ return;
+ }
+ kref_put(&sfp->f_ref, sg_remove_sfp); /* get in: sg_vma_open() */
+}
+
/* Note: the error return: VM_FAULT_SIGBUS causes a "bus error" */
static vm_fault_t
sg_vma_fault(struct vm_fault *vmf)
{
- int k, length;
- unsigned long offset, len, sa, iflags;
+ int k, n, length;
+ int res = VM_FAULT_SIGBUS;
+ unsigned long offset;
struct vm_area_struct *vma = vmf->vma;
+ struct page *page;
struct sg_scatter_hold *rsv_schp;
struct sg_request *srp;
struct sg_device *sdp;
@@ -2266,7 +2293,7 @@ sg_vma_fault(struct vm_fault *vmf)
SG_LOG(1, sfp, "%s: srp%s\n", __func__, nbp);
goto out_err;
}
- xa_lock_irqsave(&sfp->srp_arr, iflags);
+ mutex_lock(&sfp->f_mutex);
rsv_schp = &srp->sgat_h;
offset = vmf->pgoff << PAGE_SHIFT;
if (offset >= (unsigned int)rsv_schp->buflen) {
@@ -2274,44 +2301,37 @@ sg_vma_fault(struct vm_fault *vmf)
offset);
goto out_err_unlock;
}
- sa = vma->vm_start;
- SG_LOG(3, sfp, "%s: vm_start=0x%lx, off=%lu\n", __func__, sa, offset);
+ SG_LOG(5, sfp, "%s: vm_start=0x%lx, off=%lu\n", __func__,
+ vma->vm_start, offset);
length = 1 << (PAGE_SHIFT + rsv_schp->page_order);
- for (k = 0; k < rsv_schp->num_sgat && sa < vma->vm_end; ++k) {
- len = vma->vm_end - sa;
- len = min_t(int, len, (int)length);
- if (offset < len) {
- struct page *page;
- struct page *pp;
-
- pp = rsv_schp->pages[k];
- xa_unlock_irqrestore(&sfp->srp_arr, iflags);
- page = nth_page(pp, offset >> PAGE_SHIFT);
- get_page(page); /* increment page count */
- vmf->page = page;
- return 0; /* success */
- }
- sa += len;
- offset -= len;
- }
+ k = (int)offset / length;
+ n = ((int)offset % length) >> PAGE_SHIFT;
+ page = nth_page(rsv_schp->pages[k], n);
+ get_page(page);
+ vmf->page = page;
+ res = 0;
out_err_unlock:
- xa_unlock_irqrestore(&sfp->srp_arr, iflags);
+ mutex_unlock(&sfp->f_mutex);
out_err:
- return VM_FAULT_SIGBUS;
+ return res;
}
static const struct vm_operations_struct sg_mmap_vm_ops = {
.fault = sg_vma_fault,
+ .open = sg_vma_open,
+ .close = sg_vma_close,
};
-/* Entry point for mmap(2) system call */
+/*
+ * Entry point for mmap(2) system call. For mmap(2) to work, request's
+ * scatter gather list needs to be order 0 which it is unlikely to be
+ * by default. mmap(2) cannot be called more than once per fd.
+ */
static int
sg_mmap(struct file *filp, struct vm_area_struct *vma)
{
- int k, length;
- int ret = 0;
- unsigned long req_sz, len, sa, iflags;
- struct sg_scatter_hold *rsv_schp;
+ int res = 0;
+ unsigned long req_sz;
struct sg_fd *sfp;
struct sg_request *srp;
@@ -2322,40 +2342,44 @@ sg_mmap(struct file *filp, struct vm_area_struct *vma)
pr_warn("sg: %s: sfp is NULL\n", __func__);
return -ENXIO;
}
+ mutex_lock(&sfp->f_mutex);
req_sz = vma->vm_end - vma->vm_start;
SG_LOG(3, sfp, "%s: vm_start=%pK, len=%d\n", __func__,
(void *)vma->vm_start, (int)req_sz);
- if (vma->vm_pgoff)
- return -EINVAL; /* only an offset of 0 accepted */
+ if (vma->vm_pgoff) {
+ res = -EINVAL; /* only an offset of 0 accepted */
+ goto fini;
+ }
/* Check reserve request is inactive and has large enough buffer */
- mutex_lock(&sfp->f_mutex);
srp = sfp->rsv_srp;
- xa_lock_irqsave(&sfp->srp_arr, iflags);
if (SG_RS_ACTIVE(srp)) {
- ret = -EBUSY;
- goto out;
+ res = -EBUSY;
+ goto fini;
}
- rsv_schp = &srp->sgat_h;
- if (req_sz > (unsigned long)rsv_schp->buflen) {
- ret = -ENOMEM;
- goto out;
+ if (req_sz > SG_WRITE_COUNT_LIMIT) { /* sanity check */
+ res = -ENOMEM;
+ goto fini;
}
- sa = vma->vm_start;
- length = 1 << (PAGE_SHIFT + rsv_schp->page_order);
- for (k = 0; k < rsv_schp->num_sgat && sa < vma->vm_end; ++k) {
- len = vma->vm_end - sa;
- len = min_t(unsigned long, len, (unsigned long)length);
- sa += len;
+ if (test_and_set_bit(SG_FFD_MMAP_CALLED, sfp->ffd_bm)) {
+ SG_LOG(1, sfp, "%s: multiple invocations on this fd\n",
+ __func__);
+ res = -EADDRINUSE;
+ goto fini;
+ }
+ if (srp->sgat_h.page_order > 0 ||
+ req_sz > (unsigned long)srp->sgat_h.buflen) {
+ sg_remove_sgat(srp);
+ res = sg_mk_sgat(srp, sfp, req_sz, true);
+ if (res)
+ goto fini;
}
-
- set_bit(SG_FFD_MMAP_CALLED, sfp->ffd_bm);
vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
vma->vm_private_data = sfp;
vma->vm_ops = &sg_mmap_vm_ops;
-out:
- xa_unlock_irqrestore(&sfp->srp_arr, iflags);
+ sg_vma_open(vma);
+fini:
mutex_unlock(&sfp->f_mutex);
- return ret;
+ return res;
}
/*
@@ -2949,7 +2973,7 @@ sg_start_req(struct sg_request *srp, struct sg_comm_wr_t *cwrp, int dxfer_dir)
}
scsi_rp->cmd_len = cwrp->cmd_len;
srp->cmd_opcode = scsi_rp->cmd[0];
- us_xfer = !(srp->rq_flags & SG_FLAG_NO_DXFER);
+ us_xfer = !(srp->rq_flags & (SG_FLAG_NO_DXFER | SG_FLAG_MMAP_IO));
assign_bit(SG_FRQ_NO_US_XFER, srp->frq_bm, !us_xfer);
reserved = (sfp->rsv_srp == srp);
rq->end_io_data = srp;
@@ -2978,7 +3002,7 @@ sg_start_req(struct sg_request *srp, struct sg_comm_wr_t *cwrp, int dxfer_dir)
} else if (req_schp->buflen == 0) {
int up_sz = max_t(int, dxfer_len, sfp->sgat_elem_sz);
- res = sg_mk_sgat(srp, sfp, up_sz);
+ res = sg_mk_sgat(srp, sfp, up_sz, false);
}
if (unlikely(res))
goto fini;
@@ -3086,14 +3110,14 @@ sg_finish_scsi_blk_rq(struct sg_request *srp)
}
static int
-sg_mk_sgat(struct sg_request *srp, struct sg_fd *sfp, int minlen)
+sg_mk_sgat(struct sg_request *srp, struct sg_fd *sfp, int minlen,
+ bool for_mmap)
{
- int j, k, rem_sz, order, align_sz;
+ int j, k, rem_sz, order, align_sz, elem_sz;
__maybe_unused int o_order;
int m_size = minlen;
int rup_sz = 0;
int mx_sgat_elems = sfp->parentdp->max_sgat_elems;
- u32 elem_sz;
const size_t struct_page_sz = sizeof(struct page *);
gfp_t mask_ap = GFP_ATOMIC | __GFP_COMP | __GFP_NOWARN | __GFP_ZERO;
gfp_t mask_kz = GFP_ATOMIC | __GFP_NOWARN;
@@ -3106,14 +3130,14 @@ sg_mk_sgat(struct sg_request *srp, struct sg_fd *sfp, int minlen)
++m_size; /* don't remember why */
/* round request up to next highest SG_DEF_SECTOR_SZ byte boundary */
align_sz = ALIGN(m_size, SG_DEF_SECTOR_SZ);
-
schp->pages = kcalloc(mx_sgat_elems, struct_page_sz, mask_kz);
SG_LOG(4, sfp, "%s: minlen=%d [sz=%zu, 0x%pK ++]\n", __func__, minlen,
mx_sgat_elems * struct_page_sz, schp->pages);
if (unlikely(!schp->pages))
return -ENOMEM;
- elem_sz = sfp->sgat_elem_sz; /* power of 2 and >= PAGE_SIZE */
+ /* elem_sz must be power of 2 and >= PAGE_SIZE */
+ elem_sz = for_mmap ? PAGE_SIZE : sfp->sgat_elem_sz;
if (sdp && unlikely(sdp->device->host->unchecked_isa_dma))
mask_ap |= GFP_DMA;
/* PAGE_SIZE == (1 << PAGE_SHIFT) == (2 ** PAGE_SHIFT) */
@@ -3127,7 +3151,7 @@ sg_mk_sgat(struct sg_request *srp, struct sg_fd *sfp, int minlen)
schp->pages[k] = alloc_pages(mask_ap, order);
if (!schp->pages[k])
goto err_out;
- SG_LOG(5, sfp, "%s: k=%d, rup_sz=%d [0x%pK ++]\n", __func__, k,
+ SG_LOG(6, sfp, "%s: k=%d, rup_sz=%d [0x%pK ++]\n", __func__, k,
rup_sz, schp->pages[k]);
}
schp->page_order = order;
@@ -3161,7 +3185,7 @@ sg_remove_sgat_helper(struct sg_fd *sfp, struct sg_scatter_hold *schp)
return;
for (k = 0; k < schp->num_sgat; ++k) {
p = schp->pages[k];
- SG_LOG(5, sfp, "%s: pg[%d]=0x%pK --\n", __func__, k, p);
+ SG_LOG(6, sfp, "%s: pg[%d]=0x%pK --\n", __func__, k, p);
if (unlikely(!p))
continue;
__free_pages(p, schp->page_order);
@@ -3291,7 +3315,7 @@ sg_find_srp_by_id(struct sg_fd *sfp, int pack_id)
}
return NULL;
good:
- SG_LOG(6, sfp, "%s: %s%d found [srp=0x%pK]\n", __func__, "pack_id=",
+ SG_LOG(5, sfp, "%s: %s%d found [srp=0x%pK]\n", __func__, "pack_id=",
pack_id, srp);
return srp;
}
@@ -3330,7 +3354,7 @@ sg_mk_srp_sgat(struct sg_fd *sfp, bool first, int db_len)
if (IS_ERR(n_srp))
return n_srp;
if (db_len > 0) {
- res = sg_mk_sgat(n_srp, sfp, db_len);
+ res = sg_mk_sgat(n_srp, sfp, db_len, false);
if (unlikely(res)) {
kfree(n_srp);
return ERR_PTR(res);
@@ -3363,7 +3387,7 @@ sg_build_reserve(struct sg_fd *sfp, int buflen)
buflen = PAGE_SIZE;
go_out = true;
}
- res = sg_mk_sgat(srp, sfp, buflen);
+ res = sg_mk_sgat(srp, sfp, buflen, false);
if (res == 0) {
SG_LOG(4, sfp, "%s: final buflen=%d, srp=0x%pK ++\n",
__func__, buflen, srp);
--
2.25.1
next prev parent reply other threads:[~2020-03-02 3:33 UTC|newest]
Thread overview: 39+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-03-02 3:32 [PATCH v8 00/38] sg: add v4 interface Douglas Gilbert
2020-03-02 3:32 ` [PATCH v8 01/38] sg: move functions around Douglas Gilbert
2020-03-02 3:32 ` [PATCH v8 02/38] sg: remove typedefs, type+formatting cleanup Douglas Gilbert
2020-03-02 3:32 ` [PATCH v8 03/38] sg: sg_log and is_enabled Douglas Gilbert
2020-03-02 3:32 ` [PATCH v8 04/38] sg: rework sg_poll(), minor changes Douglas Gilbert
2020-03-02 3:32 ` [PATCH v8 05/38] sg: bitops in sg_device Douglas Gilbert
2020-03-02 3:32 ` [PATCH v8 06/38] sg: make open count an atomic Douglas Gilbert
2020-03-02 3:32 ` [PATCH v8 07/38] sg: move header to uapi section Douglas Gilbert
2020-03-02 3:32 ` [PATCH v8 08/38] sg: speed sg_poll and sg_get_num_waiting Douglas Gilbert
2020-03-02 3:32 ` [PATCH v8 09/38] sg: sg_allow_if_err_recovery and renames Douglas Gilbert
2020-03-02 3:32 ` [PATCH v8 10/38] sg: improve naming Douglas Gilbert
2020-03-02 3:32 ` [PATCH v8 11/38] sg: change rwlock to spinlock Douglas Gilbert
2020-03-02 3:32 ` [PATCH v8 12/38] sg: ioctl handling Douglas Gilbert
2020-03-02 3:32 ` [PATCH v8 13/38] sg: split sg_read Douglas Gilbert
2020-03-02 3:32 ` [PATCH v8 14/38] sg: sg_common_write add structure for arguments Douglas Gilbert
2020-03-02 3:32 ` [PATCH v8 15/38] sg: rework sg_vma_fault Douglas Gilbert
2020-03-02 3:32 ` [PATCH v8 16/38] sg: rework sg_mmap Douglas Gilbert
2020-03-02 3:32 ` [PATCH v8 17/38] sg: replace sg_allow_access Douglas Gilbert
2020-03-02 3:32 ` [PATCH v8 18/38] sg: rework scatter gather handling Douglas Gilbert
2020-03-02 3:32 ` [PATCH v8 19/38] sg: introduce request state machine Douglas Gilbert
2020-03-02 3:32 ` [PATCH v8 20/38] sg: sg_find_srp_by_id Douglas Gilbert
2020-03-02 3:32 ` [PATCH v8 21/38] sg: sg_fill_request_element Douglas Gilbert
2020-03-02 3:32 ` [PATCH v8 22/38] sg: printk change %p to %pK Douglas Gilbert
2020-03-02 3:32 ` [PATCH v8 23/38] sg: xarray for fds in device Douglas Gilbert
2020-03-02 3:32 ` [PATCH v8 24/38] sg: xarray for reqs in fd Douglas Gilbert
2020-03-02 3:32 ` [PATCH v8 25/38] sg: replace rq array with lists Douglas Gilbert
2020-03-02 3:32 ` [PATCH v8 26/38] sg: sense buffer rework Douglas Gilbert
2020-03-02 3:32 ` [PATCH v8 27/38] sg: add sg v4 interface support Douglas Gilbert
2020-03-02 3:32 ` [PATCH v8 28/38] sg: rework debug info Douglas Gilbert
2020-03-02 3:32 ` [PATCH v8 29/38] sg: add 8 byte SCSI LUN to sg_scsi_id Douglas Gilbert
2020-03-02 3:32 ` [PATCH v8 30/38] sg: expand sg_comm_wr_t Douglas Gilbert
2020-03-02 3:32 ` [PATCH v8 31/38] sg: add sg_iosubmit_v3 and sg_ioreceive_v3 ioctls Douglas Gilbert
2020-03-02 3:32 ` [PATCH v8 32/38] sg: add some __must_hold macros Douglas Gilbert
2020-03-02 3:32 ` [PATCH v8 33/38] sg: move procfs objects to avoid forward decls Douglas Gilbert
2020-03-02 3:32 ` [PATCH v8 34/38] sg: protect multiple receivers Douglas Gilbert
2020-03-02 3:32 ` [PATCH v8 35/38] sg: first debugfs support Douglas Gilbert
2020-03-02 3:32 ` Douglas Gilbert [this message]
2020-03-02 3:32 ` [PATCH v8 37/38] sg: warn v3 write system call users Douglas Gilbert
2020-03-02 3:32 ` [PATCH v8 38/38] sg: bump version to 4.0.08 Douglas Gilbert
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200302033249.4515-37-dgilbert@interlog.com \
--to=dgilbert@interlog.com \
--cc=hare@suse.de \
--cc=jejb@linux.vnet.ibm.com \
--cc=linux-scsi@vger.kernel.org \
--cc=martin.petersen@oracle.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.