public inbox for linux-scsi@vger.kernel.org
 help / color / mirror / Atom feed
From: Douglas Gilbert <dgilbert@interlog.com>
To: linux-scsi@vger.kernel.org
Cc: martin.petersen@oracle.com, jejb@linux.vnet.ibm.com, hare@suse.de
Subject: [PATCH v18 48/83] sg: sgat_elem_sz and sum_fd_dlens
Date: Tue, 27 Apr 2021 17:56:58 -0400	[thread overview]
Message-ID: <20210427215733.417746-50-dgilbert@interlog.com> (raw)
In-Reply-To: <20210427215733.417746-1-dgilbert@interlog.com>

Wire up some more capabilities of ioctl(SG_SET_GET_EXTENDED). One
is the size of each internal scatter gather list element. This
defaults to 2^15 and was fixed in previous versions of this
driver. If the user provides a value, it must be a power of
2 (bytes) and no less than PAGE_SIZE.

sum_fd_dlens provides user control over a mechanism designed to
stop the starvation of the host machine's memory. Since requests
per file descriptor are no longer limited to 16, thousands could
be queued up by a badly designed program. If each one requests
a large buffer (say 128 KB each for READs) then without this
mechanism, the OOM killer may be called on to save the machine.
The driver counts the cumulative size of data buffers
outstanding held by each file descriptor. Once that figure
exceeds a default size of 32 MB, further submissions on that
file descriptor are failed with E2BIG.

Signed-off-by: Douglas Gilbert <dgilbert@interlog.com>
---
 drivers/scsi/sg.c      | 68 ++++++++++++++++++++++++++++++++++++++----
 include/uapi/scsi/sg.h |  1 +
 2 files changed, 64 insertions(+), 5 deletions(-)

diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 17a733d621c7..b141b0113f96 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -94,7 +94,11 @@ enum sg_rq_state {	/* N.B. sg_rq_state_arr assumes SG_RS_AWAIT_RCV==2 */
 	SG_RS_BUSY,		/* temporary state should rarely be seen */
 };
 
+/* If sum_of(dlen) of a fd exceeds this, write() will yield E2BIG */
+#define SG_TOT_FD_THRESHOLD (32 * 1024 * 1024)
+
 #define SG_TIME_UNIT_MS 0	/* milliseconds */
+/* #define SG_TIME_UNIT_NS 1	   nanoseconds */
 #define SG_DEF_TIME_UNIT SG_TIME_UNIT_MS
 #define SG_DEFAULT_TIMEOUT mult_frac(SG_DEFAULT_TIMEOUT_USER, HZ, USER_HZ)
 #define SG_FD_Q_AT_HEAD 0
@@ -238,6 +242,8 @@ struct sg_fd {		/* holds the state of a file descriptor */
 	atomic_t submitted;	/* number inflight or awaiting receive */
 	atomic_t waiting;	/* number of requests awaiting receive */
 	atomic_t inactives;	/* number of inactive requests */
+	atomic_t sum_fd_dlens;	/* when tot_fd_thresh>0 this is sum_of(dlen) */
+	int tot_fd_thresh;	/* E2BIG if sum_of(dlen) > this, 0: ignore */
 	int sgat_elem_sz;	/* initialized to scatter_elem_sz */
 	int mmap_sz;		/* byte size of previous mmap() call */
 	unsigned long ffd_bm[1];	/* see SG_FFD_* defines above */
@@ -2144,8 +2150,8 @@ sg_ctl_extended(struct sg_fd *sfp, void __user *p)
 {
 	int result = 0;
 	int ret = 0;
-	int n, s_wr_mask, s_rd_mask;
-	u32 or_masks;
+	int n, j, s_wr_mask, s_rd_mask;
+	u32 uv, or_masks;
 	struct sg_device *sdp = sfp->parentdp;
 	struct sg_extended_info *seip;
 	struct sg_extended_info sei;
@@ -2162,6 +2168,19 @@ sg_ctl_extended(struct sg_fd *sfp, void __user *p)
 	}
 	SG_LOG(3, sfp, "%s: wr_mask=0x%x rd_mask=0x%x\n", __func__, s_wr_mask,
 	       s_rd_mask);
+	/* tot_fd_thresh (u32), [rbw] [limit for sum of active cmd dlen_s] */
+	if (or_masks & SG_SEIM_TOT_FD_THRESH) {
+		u32 hold = sfp->tot_fd_thresh;
+
+		if (s_wr_mask & SG_SEIM_TOT_FD_THRESH) {
+			uv = seip->tot_fd_thresh;
+			if (uv > 0 && uv < PAGE_SIZE)
+				uv = PAGE_SIZE;
+			sfp->tot_fd_thresh = uv;
+		}
+		if (s_rd_mask & SG_SEIM_TOT_FD_THRESH)
+			seip->tot_fd_thresh = hold;
+	}
 	/* check all boolean flags for either wr or rd mask set in or_mask */
 	if (or_masks & SG_SEIM_CTL_FLAGS)
 		sg_extended_bool_flags(sfp, seip);
@@ -2176,6 +2195,7 @@ sg_ctl_extended(struct sg_fd *sfp, void __user *p)
 	}
 	if ((s_rd_mask & SG_SEIM_READ_VAL) && (s_wr_mask & SG_SEIM_READ_VAL))
 		sg_extended_read_value(sfp, seip);
+	/* call blk_poll() on this fd's HIPRI requests [raw] */
 	if (or_masks & SG_SEIM_BLK_POLL) {
 		n = 0;
 		if (s_wr_mask & SG_SEIM_BLK_POLL) {
@@ -2188,7 +2208,24 @@ sg_ctl_extended(struct sg_fd *sfp, void __user *p)
 			}
 		}
 		if (s_rd_mask & SG_SEIM_BLK_POLL)
-			seip->num = n;
+			seip->num = n;		/* number completed by LLD */
+	}
+	/* override scatter gather element size [rbw] (def: SG_SCATTER_SZ) */
+	if (or_masks & SG_SEIM_SGAT_ELEM_SZ) {
+		n = sfp->sgat_elem_sz;
+		if (s_wr_mask & SG_SEIM_SGAT_ELEM_SZ) {
+			j = (int)seip->sgat_elem_sz;
+			if (!is_power_of_2(j) || j < (int)PAGE_SIZE) {
+				SG_LOG(1, sfp, "%s: %s not power of 2, %s\n",
+				       __func__, "sgat element size",
+				       "or less than PAGE_SIZE");
+				ret = -EINVAL;
+			} else {
+				sfp->sgat_elem_sz = j;
+			}
+		}
+		if (s_rd_mask & SG_SEIM_SGAT_ELEM_SZ)
+			seip->sgat_elem_sz = n; /* prior value if rw */
 	}
 	/* reserved_sz [raw], since may be reduced by other limits */
 	if (s_wr_mask & SG_SEIM_RESERVED_SIZE) {
@@ -3586,6 +3623,8 @@ sg_mk_sgat(struct sg_request *srp, struct sg_fd *sfp, int minlen)
 	schp->page_order = order;
 	schp->num_sgat = k;
 	schp->buflen = align_sz;
+	if (sfp->tot_fd_thresh > 0)
+		atomic_add(align_sz, &sfp->sum_fd_dlens);
 	return 0;
 err_out:
 	k = pgp - schp->pages;
@@ -3634,6 +3673,14 @@ sg_remove_sgat(struct sg_request *srp)
 		" [rsv]" : ""));
 	sg_remove_sgat_helper(sfp, schp);
 
+	if (sfp->tot_fd_thresh > 0) {
+		/* this is a subtraction, error if it goes negative */
+		if (atomic_add_negative(-schp->buflen, &sfp->sum_fd_dlens)) {
+			SG_LOG(2, sfp, "%s: logic error: this dlen > %s\n",
+			       __func__, "sum_fd_dlens");
+			atomic_set(&sfp->sum_fd_dlens, 0);
+		}
+	}
 	memset(schp, 0, sizeof(*schp));         /* zeros buflen and dlen */
 }
 
@@ -3886,6 +3933,7 @@ sg_setup_req(struct sg_comm_wr_t *cwrp, int dxfr_len)
 	bool second = false;
 	bool has_inactive = false;
 	int l_used_idx;
+	u32 sum_dlen;
 	unsigned long idx, s_idx, end_idx, iflags;
 	struct sg_fd *fp = cwrp->sfp;
 	struct sg_request *r_srp = NULL;	/* request to return */
@@ -3977,6 +4025,13 @@ sg_setup_req(struct sg_comm_wr_t *cwrp, int dxfr_len)
 			SG_LOG(6, fp, "%s: trying 2nd req but cmd_q=false\n",
 			       __func__);
 			goto fini;
+		} else if (fp->tot_fd_thresh > 0) {
+			sum_dlen = atomic_read(&fp->sum_fd_dlens) + dxfr_len;
+			if (sum_dlen > (u32)fp->tot_fd_thresh) {
+				r_srp = ERR_PTR(-E2BIG);
+				SG_LOG(2, fp, "%s: sum_of_dlen(%u) > %s\n",
+				       __func__, sum_dlen, "tot_fd_thresh");
+			}
 		}
 		r_srp = sg_mk_srp_sgat(fp, act_empty, dxfr_len);
 		if (IS_ERR(r_srp)) {
@@ -4071,6 +4126,8 @@ sg_add_sfp(struct sg_device *sdp)
 	__assign_bit(SG_FFD_KEEP_ORPHAN, sfp->ffd_bm, SG_DEF_KEEP_ORPHAN);
 	__assign_bit(SG_FFD_TIME_IN_NS, sfp->ffd_bm, SG_DEF_TIME_UNIT);
 	__assign_bit(SG_FFD_Q_AT_TAIL, sfp->ffd_bm, SG_DEFAULT_Q_AT);
+	sfp->tot_fd_thresh = SG_TOT_FD_THRESHOLD;
+	atomic_set(&sfp->sum_fd_dlens, 0);
 	/*
 	 * SG_SCATTER_SZ initializes scatter_elem_sz but different value may
 	 * be given as driver/module parameter (e.g. 'scatter_elem_sz=8192').
@@ -4547,8 +4604,9 @@ sg_proc_debug_fd(struct sg_fd *fp, char *obp, int len, unsigned long idx)
 		       (int)test_bit(SG_FFD_KEEP_ORPHAN, fp->ffd_bm),
 		       fp->ffd_bm[0]);
 	n += scnprintf(obp + n, len - n,
-		       "   mmap_sz=%d low_used_idx=%d low_await_idx=%d\n",
-		       fp->mmap_sz, READ_ONCE(fp->low_used_idx), READ_ONCE(fp->low_await_idx));
+		       "   mmap_sz=%d low_used_idx=%d low_await_idx=%d sum_fd_dlens=%u\n",
+		       fp->mmap_sz, READ_ONCE(fp->low_used_idx), READ_ONCE(fp->low_await_idx),
+		       atomic_read(&fp->sum_fd_dlens));
 	n += scnprintf(obp + n, len - n,
 		       "   submitted=%d waiting=%d inactives=%d   open thr_id=%d\n",
 		       atomic_read(&fp->submitted),
diff --git a/include/uapi/scsi/sg.h b/include/uapi/scsi/sg.h
index 74f177583fce..532f0f0a56be 100644
--- a/include/uapi/scsi/sg.h
+++ b/include/uapi/scsi/sg.h
@@ -169,6 +169,7 @@ typedef struct sg_req_info {	/* used by SG_GET_REQUEST_TABLE ioctl() */
 #define SG_SEIM_CTL_FLAGS	0x1	/* ctl_flags_mask bits in ctl_flags */
 #define SG_SEIM_READ_VAL	0x2	/* write SG_SEIRV_*, read back value */
 #define SG_SEIM_RESERVED_SIZE	0x4	/* reserved_sz of reserve request */
+#define SG_SEIM_TOT_FD_THRESH	0x8	/* tot_fd_thresh of data buffers */
 #define SG_SEIM_MINOR_INDEX	0x10	/* sg device minor index number */
 #define SG_SEIM_SGAT_ELEM_SZ	0x80	/* sgat element size (>= PAGE_SIZE) */
 #define SG_SEIM_BLK_POLL	0x100	/* call blk_poll, uses 'num' field */
-- 
2.25.1


  parent reply	other threads:[~2021-04-27 21:59 UTC|newest]

Thread overview: 86+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-04-27 21:56 [PATCH v18 00/83] sg: add v4 interface, request sharing Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 00/45] sg: add v4 interface Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 01/83] sg: move functions around Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 02/83] sg: remove typedefs, type+formatting cleanup Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 03/83] sg: sg_log and is_enabled Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 04/83] sg: rework sg_poll(), minor changes Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 05/83] sg: bitops in sg_device Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 06/83] sg: make open count an atomic Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 07/83] sg: move header to uapi section Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 08/83] sg: speed sg_poll and sg_get_num_waiting Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 09/83] sg: sg_allow_if_err_recovery and renames Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 10/83] sg: improve naming Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 11/83] sg: change rwlock to spinlock Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 12/83] sg: ioctl handling Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 13/83] sg: split sg_read Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 14/83] sg: sg_common_write add structure for arguments Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 15/83] sg: rework sg_vma_fault Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 16/83] sg: rework sg_mmap Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 17/83] sg: replace sg_allow_access Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 18/83] sg: rework scatter gather handling Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 19/83] sg: introduce request state machine Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 20/83] sg: sg_find_srp_by_id Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 21/83] sg: sg_fill_request_element Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 22/83] sg: printk change %p to %pK Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 23/83] sg: xarray for fds in device Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 24/83] sg: xarray for reqs in fd Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 25/83] sg: replace rq array with xarray Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 26/83] sg: sense buffer rework Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 27/83] sg: add sg v4 interface support Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 28/83] sg: rework debug info Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 29/83] sg: add 8 byte SCSI LUN to sg_scsi_id Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 30/83] sg: expand sg_comm_wr_t Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 31/83] sg: add sg_iosubmit_v3 and sg_ioreceive_v3 ioctls Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 32/83] sg: add some __must_hold macros Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 33/83] sg: move procfs objects to avoid forward decls Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 34/83] sg: protect multiple receivers Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 35/83] sg: first debugfs support Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 36/83] sg: rework mmap support Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 37/83] sg: defang allow_dio Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 38/83] sg: warn v3 write system call users Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 39/83] sg: add mmap_sz tracking Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 40/83] sg: remove rcv_done request state Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 41/83] sg: track lowest inactive and await indexes Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 42/83] sg: remove unit attention check for device changed Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 43/83] sg: no_dxfer: move to/from kernel buffers Douglas Gilbert
2021-04-28  7:07   ` Hannes Reinecke
2021-04-27 21:56 ` [PATCH v18 44/83] sg: add blk_poll support Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 45/83] sg: bump version to 4.0.12 Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 46/83] sg: add sg_ioabort ioctl Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 47/83] sg: add sg_set_get_extended ioctl Douglas Gilbert
2021-04-27 21:56 ` Douglas Gilbert [this message]
2021-04-27 21:56 ` [PATCH v18 49/83] sg: tag and more_async Douglas Gilbert
2021-04-27 21:57 ` [PATCH v18 50/83] sg: add fd sharing , change, unshare Douglas Gilbert
2021-04-27 21:57 ` [PATCH v18 51/83] sg: add shared requests Douglas Gilbert
2021-04-27 21:57 ` [PATCH v18 52/83] sg: add multiple request support Douglas Gilbert
2021-04-27 21:57 ` [PATCH v18 53/83] sg: rename some mrq variables Douglas Gilbert
2021-04-27 21:57 ` [PATCH v18 54/83] sg: unlikely likely Douglas Gilbert
2021-04-27 21:57 ` [PATCH v18 55/83] sg: mrq abort Douglas Gilbert
2021-04-27 21:57 ` [PATCH v18 56/83] sg: reduce atomic operations Douglas Gilbert
2021-04-27 21:57 ` [PATCH v18 57/83] sg: add excl_wait flag Douglas Gilbert
2021-04-27 21:57 ` [PATCH v18 58/83] sg: tweak sg_find_sfp_by_fd() Douglas Gilbert
2021-04-27 21:57 ` [PATCH v18 59/83] sg: add snap_dev flag and snapped in debugfs Douglas Gilbert
2021-04-27 21:57 ` [PATCH v18 60/83] sg: compress usercontext to uc Douglas Gilbert
2021-04-27 21:57 ` [PATCH v18 61/83] sg: optionally output sg_request.frq_bm flags Douglas Gilbert
2021-04-27 21:57 ` [PATCH v18 62/83] sg: work on sg_mrq_sanity() Douglas Gilbert
2021-04-27 21:57 ` [PATCH v18 63/83] sg: shared variable blocking Douglas Gilbert
2021-04-27 21:57 ` [PATCH v18 64/83] sg: device timestamp Douglas Gilbert
2021-04-27 21:57 ` [PATCH v18 65/83] sg: condition met is not an error Douglas Gilbert
2021-04-27 21:57 ` [PATCH v18 66/83] sg: split sg_setup_req Douglas Gilbert
2021-04-27 21:57 ` [PATCH v18 67/83] sg: finish after read-side request Douglas Gilbert
2021-04-27 21:57 ` [PATCH v18 68/83] sg: keep share and dout offset flags Douglas Gilbert
2021-04-27 21:57 ` [PATCH v18 69/83] sg: add dlen to sg_comm_wr_t Douglas Gilbert
2021-04-27 21:57 ` [PATCH v18 70/83] sg: make use of struct sg_mrq_hold Douglas Gilbert
2021-04-27 21:57 ` [PATCH v18 71/83] sg: add mmap IO option for mrq metadata Douglas Gilbert
2021-04-27 21:57 ` [PATCH v18 72/83] sg: add eventfd support Douglas Gilbert
2021-04-27 21:57 ` [PATCH v18 73/83] sg: table of error number explanations Douglas Gilbert
2021-04-27 21:57 ` [PATCH v18 74/83] sg: add ordered write flag Douglas Gilbert
2021-04-27 21:57 ` [PATCH v18 75/83] sg: expand source line length to 100 characters Douglas Gilbert
2021-04-27 21:57 ` [PATCH v18 76/83] sg: add no_attach_msg parameter Douglas Gilbert
2021-04-27 21:57 ` [PATCH v18 77/83] sg: add SGV4_FLAG_REC_ORDER Douglas Gilbert
2021-04-27 21:57 ` [PATCH v18 78/83] sg: max to read for mrq sg_ioreceive Douglas Gilbert
2021-04-27 21:57 ` [PATCH v18 79/83] sg: mrq: if uniform svb then re-use bio_s Douglas Gilbert
2021-04-27 21:57 ` [PATCH v18 80/83] sg: expand bvec usage; " Douglas Gilbert
2021-04-27 21:57 ` [PATCH v18 81/83] sg: blk_poll/hipri work for mrq Douglas Gilbert
2021-04-27 21:57 ` [PATCH v18 82/83] sg: pollable and non-pollable requests Douglas Gilbert
2021-04-27 21:57 ` [PATCH v18 83/83] sg: bump version to 4.0.47 Douglas Gilbert

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210427215733.417746-50-dgilbert@interlog.com \
    --to=dgilbert@interlog.com \
    --cc=hare@suse.de \
    --cc=jejb@linux.vnet.ibm.com \
    --cc=linux-scsi@vger.kernel.org \
    --cc=martin.petersen@oracle.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox