From: Douglas Gilbert <dgilbert@interlog.com>
To: linux-scsi@vger.kernel.org
Cc: martin.petersen@oracle.com, jejb@linux.vnet.ibm.com, hare@suse.de
Subject: [PATCH v18 63/83] sg: shared variable blocking
Date: Tue, 27 Apr 2021 17:57:13 -0400 [thread overview]
Message-ID: <20210427215733.417746-65-dgilbert@interlog.com> (raw)
In-Reply-To: <20210427215733.417746-1-dgilbert@interlog.com>
Increase the number of reserve requests per file descriptor
from 1 to SG_MAX_RSV_REQS. This is used to implement a new
type of variable blocking multiple requests that processes
request shares. This is done in a partially asynchronous
fashion.
For example up to SG_MAX_RSV_REQS read-side requests are
submitted. Then the responses for these read-side requests
are processed (which may include interruptible waits). After
this the matching write-side requests are issued and their
responses are processed.
The multiple request array presented for shared variable
blocking should be a sequence of read-side/write-side
requests. The only other commands that are accepted are
those that move no (user) data. TEST UNIT READY and
SYNCHRONIZE CACHE are examples of acceptable non-data commands.
Rename sg_remove_sgat() to the more accurate sg_remove_srp().
Signed-off-by: Douglas Gilbert <dgilbert@interlog.com>
---
drivers/scsi/sg.c | 1949 +++++++++++++++++++++++++++-------------
include/uapi/scsi/sg.h | 1 +
2 files changed, 1328 insertions(+), 622 deletions(-)
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index e43bb1673adc..c401047cae70 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -120,6 +120,8 @@ enum sg_shr_var {
#define SG_DEFAULT_Q_AT SG_FD_Q_AT_HEAD /* for backward compatibility */
#define SG_FL_MMAP_DIRECT (SG_FLAG_MMAP_IO | SG_FLAG_DIRECT_IO)
+#define SG_MAX_RSV_REQS 8
+
/* Only take lower 4 bits of driver byte, all host byte and sense byte */
#define SG_ML_RESULT_MSK 0x0fff00ff /* mid-level's 32 bit result value */
@@ -140,6 +142,7 @@ enum sg_shr_var {
#define SG_FRQ_COUNT_ACTIVE 8 /* sfp->submitted + waiting active */
#define SG_FRQ_ISSUED 9 /* blk_execute_rq_nowait() finished */
#define SG_FRQ_POLL_SLEPT 10 /* stop re-entry of hybrid_sleep() */
+#define SG_FRQ_RESERVED 11 /* marks a reserved request */
/* Bit positions (flags) for sg_fd::ffd_bm bitmask follow */
#define SG_FFD_FORCE_PACKID 0 /* receive only given pack_id/tag */
@@ -155,6 +158,8 @@ enum sg_shr_var {
#define SG_FFD_MORE_ASYNC 10 /* yield EBUSY more often */
#define SG_FFD_MRQ_ABORT 11 /* SG_IOABORT + FLAG_MULTIPLE_REQS */
#define SG_FFD_EXCL_WAITQ 12 /* append _exclusive to wait_event */
+#define SG_FFD_SVB_ACTIVE 13 /* shared variable blocking active */
+#define SG_FFD_RESHARE 14 /* reshare limits to single rsv req */
/* Bit positions (flags) for sg_device::fdev_bm bitmask follow */
#define SG_FDEV_EXCLUDE 0 /* have fd open with O_EXCL */
@@ -261,6 +266,7 @@ struct sg_request { /* active SCSI command or inactive request */
u8 *sense_bp; /* mempool alloc-ed sense buffer, as needed */
struct sg_fd *parentfp; /* pointer to owning fd, even when on fl */
struct request *rqq; /* released in sg_rq_end_io(), bio kept */
+ struct sg_request *sh_srp; /* read-side's write srp (or vice versa) */
struct bio *bio; /* kept until this req -->SG_RQ_INACTIVE */
struct execute_work ew_orph; /* harvest orphan request */
};
@@ -286,11 +292,10 @@ struct sg_fd { /* holds the state of a file descriptor */
u8 next_cmd_len; /* 0: automatic, >0: use on next write() */
unsigned long ffd_bm[1]; /* see SG_FFD_* defines above */
struct file *filp; /* my identity when sharing */
- struct sg_request *rsv_srp;/* one reserve request per fd */
- struct sg_request *ws_srp; /* when rsv SG_SHR_RS_RQ, ptr to write-side */
struct sg_fd __rcu *share_sfp;/* fd share cross-references, else NULL */
struct fasync_struct *async_qp; /* used by asynchronous notification */
struct xarray srp_arr; /* xarray of sg_request object pointers */
+ struct sg_request *rsv_arr[SG_MAX_RSV_REQS];
struct kref f_ref;
struct execute_work ew_fd; /* harvest all fd resources and lists */
};
@@ -314,6 +319,7 @@ struct sg_device { /* holds the state of each scsi generic device */
struct sg_comm_wr_t { /* arguments to sg_common_write() */
int timeout;
int cmd_len;
+ int rsv_idx; /* wanted rsv_arr index, def: -1 (anyone) */
unsigned long frq_bm[1]; /* see SG_FRQ_* defines above */
union { /* selector is frq_bm.SG_FRQ_IS_V4I */
struct sg_io_hdr *h3p;
@@ -324,6 +330,20 @@ struct sg_comm_wr_t { /* arguments to sg_common_write() */
const u8 *cmdp;
};
+struct sg_mrq_hold { /* for passing context between mrq functions */
+ bool blocking;
+ bool chk_abort;
+ bool immed;
+ bool stop_if;
+ int id_of_mrq;
+ int s_res; /* secondary error: some-good-then-error */
+ u32 cdb_mxlen; /* cdb length in cdb_ap, actual be may less */
+ u32 tot_reqs; /* total number of requests and cdb_s */
+ struct sg_comm_wr_t *cwrp;
+ u8 *cdb_ap; /* array of commands */
+ struct sg_io_v4 *a_hds; /* array of request to execute */
+};
+
/* tasklet or soft irq callback */
static void sg_rq_end_io(struct request *rq, blk_status_t status);
/* Declarations of other static functions used before they are defined */
@@ -345,7 +365,7 @@ static int sg_receive_v4(struct sg_fd *sfp, struct sg_request *srp,
void __user *p, struct sg_io_v4 *h4p);
static int sg_read_append(struct sg_request *srp, void __user *outp,
int num_xfer);
-static void sg_remove_sgat(struct sg_request *srp);
+static void sg_remove_srp(struct sg_request *srp);
static struct sg_fd *sg_add_sfp(struct sg_device *sdp, struct file *filp);
static void sg_remove_sfp(struct kref *);
static void sg_remove_sfp_share(struct sg_fd *sfp, bool is_rd_side);
@@ -592,8 +612,7 @@ sg_open(struct inode *inode, struct file *filp)
res = 0;
sg_put:
- kref_put(&sdp->d_ref, sg_device_destroy);
- /* if success, sdp->d_ref is incremented twice, decremented once */
+ kref_put(&sdp->d_ref, sg_device_destroy); /* get: sg_get_dev() */
return res;
out_undo:
@@ -653,7 +672,7 @@ sg_release(struct inode *inode, struct file *filp)
if (unlikely(xa_get_mark(&sdp->sfp_arr, sfp->idx, SG_XA_FD_FREE))) {
SG_LOG(1, sfp, "%s: sfp already erased!!!\n", __func__);
- return 0; /* yell out but can't fail */
+ return 0; /* yell out but can't fail */
}
mutex_lock(&sdp->open_rel_lock);
@@ -667,8 +686,7 @@ sg_release(struct inode *inode, struct file *filp)
sg_fd_is_shared(sfp))
sg_remove_sfp_share(sfp, xa_get_mark(&sdp->sfp_arr, sfp->idx,
SG_XA_FD_RS_SHARE));
- kref_put(&sfp->f_ref, sg_remove_sfp);
-
+ kref_put(&sfp->f_ref, sg_remove_sfp); /* init=1: sg_add_sfp() */
/*
* Possibly many open()s waiting on exclude clearing, start many;
* only open(O_EXCL)'s wait when open_cnt<2 and only start one.
@@ -831,6 +849,7 @@ sg_write(struct file *filp, const char __user *p, size_t count, loff_t *ppos)
WRITE_ONCE(cwr.frq_bm[0], 0);
cwr.timeout = sfp->timeout;
cwr.cmd_len = cmd_size;
+ cwr.rsv_idx = -1;
cwr.sfp = sfp;
cwr.u_cmdp = p;
cwr.cmdp = NULL;
@@ -841,11 +860,15 @@ sg_write(struct file *filp, const char __user *p, size_t count, loff_t *ppos)
static inline int
sg_chk_mmap(struct sg_fd *sfp, int rq_flags, int len)
{
+ struct sg_request *rsv_srp = sfp->rsv_arr[0];
+
if (unlikely(sfp->mmap_sz == 0))
return -EBADFD;
if (unlikely(atomic_read(&sfp->submitted) > 0))
return -EBUSY; /* already active requests on fd */
- if (unlikely(len > sfp->rsv_srp->sgat_h.buflen))
+ if (IS_ERR_OR_NULL(rsv_srp))
+ return -EPROTO; /* first element always a reserve req */
+ if (unlikely(len > rsv_srp->sgatp->buflen))
return -ENOMEM; /* MMAP_IO size must fit in reserve */
if (unlikely(len > sfp->mmap_sz))
return -ENOMEM; /* MMAP_IO size can't exceed mmap() size */
@@ -900,6 +923,7 @@ sg_submit_v3(struct sg_fd *sfp, struct sg_io_hdr *hp, bool sync,
cwr.h3p = hp;
cwr.timeout = min_t(unsigned long, ul_timeout, INT_MAX);
cwr.cmd_len = hp->cmd_len;
+ cwr.rsv_idx = -1;
cwr.sfp = sfp;
cwr.u_cmdp = hp->cmdp;
cwr.cmdp = NULL;
@@ -946,27 +970,33 @@ sg_mrq_arr_flush(struct sg_io_v4 *cop, struct sg_io_v4 *a_hds, u32 tot_reqs,
static int
sg_mrq_1complet(struct sg_io_v4 *cop, struct sg_io_v4 *a_hds,
- struct sg_fd *w_sfp, int tot_reqs, struct sg_request *srp)
+ struct sg_fd *do_on_sfp, int tot_reqs, struct sg_request *srp)
{
int s_res, indx;
struct sg_io_v4 *hp;
- SG_LOG(3, w_sfp, "%s: start, tot_reqs=%d\n", __func__, tot_reqs);
if (unlikely(!srp))
return -EPROTO;
indx = srp->s_hdr4.mrq_ind;
+ if (unlikely(srp->parentfp != do_on_sfp)) {
+ SG_LOG(1, do_on_sfp, "%s: mrq_ind=%d, sfp out-of-sync\n",
+ __func__, indx);
+ return -EPROTO;
+ }
+ SG_LOG(3, do_on_sfp, "%s: mrq_ind=%d, pack_id=%d\n", __func__, indx,
+ srp->pack_id);
if (unlikely(indx < 0 || indx >= tot_reqs))
return -EPROTO;
hp = a_hds + indx;
- s_res = sg_receive_v4(w_sfp, srp, NULL, hp);
+ s_res = sg_receive_v4(do_on_sfp, srp, NULL, hp);
if (unlikely(s_res == -EFAULT))
return s_res;
hp->info |= SG_INFO_MRQ_FINI;
- if (w_sfp->async_qp && (hp->flags & SGV4_FLAG_SIGNAL)) {
+ if (do_on_sfp->async_qp && (hp->flags & SGV4_FLAG_SIGNAL)) {
s_res = sg_mrq_arr_flush(cop, a_hds, tot_reqs, s_res);
if (unlikely(s_res)) /* can only be -EFAULT */
return s_res;
- kill_fasync(&w_sfp->async_qp, SIGPOLL, POLL_IN);
+ kill_fasync(&do_on_sfp->async_qp, SIGPOLL, POLL_IN);
}
return 0;
}
@@ -992,36 +1022,47 @@ sg_mrq_complets(struct sg_io_v4 *cop, struct sg_io_v4 *a_hds,
struct sg_fd *sfp, struct sg_fd *sec_sfp, int tot_reqs,
int mreqs, int sec_reqs)
{
- int res;
- int sum_inflight = mreqs + sec_reqs; /* may be < tot_reqs */
+ int res = 0;
+ int rres;
struct sg_request *srp;
SG_LOG(3, sfp, "%s: mreqs=%d, sec_reqs=%d\n", __func__, mreqs,
sec_reqs);
- for ( ; sum_inflight > 0; --sum_inflight, ++cop->info) {
- srp = NULL;
- if (mreqs > 0 && sg_mrq_get_ready_srp(sfp, &srp)) {
+ while (mreqs + sec_reqs > 0) {
+ while (mreqs > 0 && sg_mrq_get_ready_srp(sfp, &srp)) {
if (IS_ERR(srp)) { /* -ENODATA: no mrqs here */
- mreqs = 0;
- } else {
- --mreqs;
- res = sg_mrq_1complet(cop, a_hds, sfp,
- tot_reqs, srp);
- if (unlikely(res))
- return res;
+ if (PTR_ERR(srp) == -ENODATA)
+ break;
+ res = PTR_ERR(srp);
+ break;
}
- } else if (sec_reqs > 0 &&
- sg_mrq_get_ready_srp(sec_sfp, &srp)) {
+ --mreqs;
+ res = sg_mrq_1complet(cop, a_hds, sfp, tot_reqs, srp);
+ if (unlikely(res))
+ return res;
+ ++cop->info;
+ if (cop->din_xfer_len > 0)
+ --cop->din_resid;
+ }
+ while (sec_reqs > 0 && sg_mrq_get_ready_srp(sec_sfp, &srp)) {
if (IS_ERR(srp)) {
- sec_reqs = 0;
- } else {
- --sec_reqs;
- res = sg_mrq_1complet(cop, a_hds, sec_sfp,
- tot_reqs, srp);
- if (unlikely(res))
- return res;
+ if (PTR_ERR(srp) == -ENODATA)
+ break;
+ res = PTR_ERR(srp);
+ break;
}
- } else if (mreqs > 0) {
+ --sec_reqs;
+ rres = sg_mrq_1complet(cop, a_hds, sec_sfp, tot_reqs,
+ srp);
+ if (unlikely(rres))
+ return rres;
+ ++cop->info;
+ if (cop->din_xfer_len > 0)
+ --cop->din_resid;
+ }
+ if (res)
+ break;
+ if (mreqs > 0) {
res = sg_wait_mrq_event(sfp, &srp);
if (unlikely(res))
return res; /* signal --> -ERESTARTSYS */
@@ -1033,8 +1074,12 @@ sg_mrq_complets(struct sg_io_v4 *cop, struct sg_io_v4 *a_hds,
tot_reqs, srp);
if (unlikely(res))
return res;
+ ++cop->info;
+ if (cop->din_xfer_len > 0)
+ --cop->din_resid;
}
- } else if (sec_reqs > 0) {
+ }
+ if (sec_reqs > 0) {
res = sg_wait_mrq_event(sec_sfp, &srp);
if (unlikely(res))
return res; /* signal --> -ERESTARTSYS */
@@ -1046,14 +1091,13 @@ sg_mrq_complets(struct sg_io_v4 *cop, struct sg_io_v4 *a_hds,
tot_reqs, srp);
if (unlikely(res))
return res;
+ ++cop->info;
+ if (cop->din_xfer_len > 0)
+ --cop->din_resid;
}
- } else { /* expect one of the above conditions to be true */
- return -EPROTO;
}
- if (cop->din_xfer_len > 0)
- --cop->din_resid;
- }
- return 0;
+ } /* end of outer while loop (while requests still inflight) */
+ return res;
}
static int
@@ -1101,7 +1145,6 @@ sg_mrq_sanity(struct sg_device *sdp, struct sg_io_v4 *cop,
rip, k, "no IMMED with COMPLETE_B4");
return -ERANGE;
}
- /* N.B. SGV4_FLAG_SIG_ON_OTHER is allowed */
}
if (sg_fd_is_shared(sfp)) {
if (!share_on_oth && share)
@@ -1135,6 +1178,422 @@ sg_mrq_sanity(struct sg_device *sdp, struct sg_io_v4 *cop,
return 0;
}
+static bool
+sg_mrq_svb_chk(struct sg_io_v4 *a_hds, u32 tot_reqs)
+{
+ bool expect_rd;
+ int k;
+ u32 flags;
+ struct sg_io_v4 *hp;
+
+ /* expect read-write pairs, all with SGV4_FLAG_NO_DXFER set */
+ for (k = 0, hp = a_hds, expect_rd = true; k < tot_reqs; ++k, ++hp) {
+ flags = hp->flags;
+ if (flags & (SGV4_FLAG_COMPLETE_B4))
+ return false;
+ if (expect_rd) {
+ if (hp->dout_xfer_len > 0)
+ return false;
+ if (hp->din_xfer_len > 0) {
+ if (!(flags & SGV4_FLAG_SHARE))
+ return false;
+ if (flags & SGV4_FLAG_DO_ON_OTHER)
+ return false;
+ expect_rd = false;
+ }
+ /* allowing commands with no dxfer */
+ } else { /* checking write side */
+ if (hp->dout_xfer_len > 0) {
+ if (~flags &
+ (SGV4_FLAG_NO_DXFER | SGV4_FLAG_SHARE |
+ SGV4_FLAG_DO_ON_OTHER))
+ return false;
+ expect_rd = true;
+ }
+ if (hp->din_xfer_len > 0)
+ return false;
+ }
+ }
+ if (!expect_rd)
+ return false;
+ return true;
+}
+
+static struct sg_request *
+sg_mrq_submit(struct sg_fd *rq_sfp, struct sg_mrq_hold *mhp, int pos_hdr,
+ int rsv_idx)
+{
+ unsigned long ul_timeout;
+ struct sg_comm_wr_t r_cwr;
+ struct sg_comm_wr_t *r_cwrp = &r_cwr;
+ struct sg_io_v4 *hp = mhp->a_hds + pos_hdr;
+
+ if (mhp->cdb_ap) { /* already have array of cdbs */
+ r_cwrp->cmdp = mhp->cdb_ap + (pos_hdr * mhp->cdb_mxlen);
+ r_cwrp->u_cmdp = NULL;
+ } else { /* fetch each cdb from user space */
+ r_cwrp->cmdp = NULL;
+ r_cwrp->u_cmdp = cuptr64(hp->request);
+ }
+ r_cwrp->cmd_len = hp->request_len;
+ r_cwrp->rsv_idx = rsv_idx;
+ ul_timeout = msecs_to_jiffies(hp->timeout);
+ r_cwrp->frq_bm[0] = 0;
+ __assign_bit(SG_FRQ_SYNC_INVOC, r_cwrp->frq_bm,
+ (int)mhp->blocking);
+ __set_bit(SG_FRQ_IS_V4I, r_cwrp->frq_bm);
+ r_cwrp->h4p = hp;
+ r_cwrp->timeout = min_t(unsigned long, ul_timeout, INT_MAX);
+ r_cwrp->sfp = rq_sfp;
+ return sg_common_write(r_cwrp);
+}
+
+/*
+ * Processes most mrq requests apart from those from "shared variable
+ * blocking" (svb) method which is processed in sg_process_svb_mrq().
+ */
+static int
+sg_process_most_mrq(struct sg_fd *fp, struct sg_fd *o_sfp,
+ struct sg_mrq_hold *mhp)
+{
+ int flags, j;
+ int num_subm = 0;
+ int num_cmpl = 0;
+ int res = 0;
+ int other_fp_sent = 0;
+ int this_fp_sent = 0;
+ const int shr_complet_b4 = SGV4_FLAG_SHARE | SGV4_FLAG_COMPLETE_B4;
+ struct sg_fd *rq_sfp;
+ struct sg_io_v4 *cop = mhp->cwrp->h4p;
+ struct sg_io_v4 *hp; /* ptr to request object in a_hds */
+ struct sg_request *srp;
+
+ SG_LOG(3, fp, "%s: id_of_mrq=%d, tot_reqs=%d, enter\n", __func__,
+ mhp->id_of_mrq, mhp->tot_reqs);
+ /* Dispatch (submit) requests and optionally wait for response */
+ for (hp = mhp->a_hds, j = 0; num_subm < mhp->tot_reqs; ++hp, ++j) {
+ if (mhp->chk_abort && test_and_clear_bit(SG_FFD_MRQ_ABORT,
+ fp->ffd_bm)) {
+ SG_LOG(1, fp, "%s: id_of_mrq=%d aborting at ind=%d\n",
+ __func__, mhp->id_of_mrq, num_subm);
+ break; /* N.B. rest not submitted */
+ }
+ flags = hp->flags;
+ rq_sfp = (flags & SGV4_FLAG_DO_ON_OTHER) ? o_sfp : fp;
+ srp = sg_mrq_submit(rq_sfp, mhp, j, -1);
+ if (IS_ERR(srp)) {
+ mhp->s_res = PTR_ERR(srp);
+ break;
+ }
+ srp->s_hdr4.mrq_ind = num_subm++;
+ if (mhp->chk_abort)
+ atomic_set(&srp->s_hdr4.pack_id_of_mrq,
+ mhp->id_of_mrq);
+ if (mhp->immed ||
+ (!(mhp->blocking || (flags & shr_complet_b4)))) {
+ if (fp == rq_sfp)
+ ++this_fp_sent;
+ else
+ ++other_fp_sent;
+ continue; /* defer completion until all submitted */
+ }
+ mhp->s_res = sg_wait_event_srp(rq_sfp, NULL, hp, srp);
+ if (unlikely(mhp->s_res)) {
+ if (mhp->s_res == -ERESTARTSYS)
+ return mhp->s_res;
+ break;
+ }
+ ++num_cmpl;
+ hp->info |= SG_INFO_MRQ_FINI;
+ if (mhp->stop_if && (hp->driver_status ||
+ hp->transport_status ||
+ hp->device_status)) {
+ SG_LOG(2, fp, "%s: %s=0x%x/0x%x/0x%x] cause exit\n",
+ __func__, "STOP_IF and status [drv/tran/scsi",
+ hp->driver_status, hp->transport_status,
+ hp->device_status);
+ break; /* cop->driver_status <-- 0 in this case */
+ }
+ if (rq_sfp->async_qp && (hp->flags & SGV4_FLAG_SIGNAL)) {
+ res = sg_mrq_arr_flush(cop, mhp->a_hds, mhp->tot_reqs,
+ mhp->s_res);
+ if (unlikely(res))
+ break;
+ kill_fasync(&rq_sfp->async_qp, SIGPOLL, POLL_IN);
+ }
+ } /* end of dispatch request and optionally wait response loop */
+ cop->dout_resid = mhp->tot_reqs - num_subm;
+ cop->info = mhp->immed ? num_subm : num_cmpl;
+ if (cop->din_xfer_len > 0) {
+ cop->din_resid = mhp->tot_reqs - num_cmpl;
+ cop->spare_out = -mhp->s_res;
+ }
+
+ if (mhp->immed)
+ return res;
+ if (likely(res == 0 && (this_fp_sent + other_fp_sent) > 0)) {
+ mhp->s_res = sg_mrq_complets(cop, mhp->a_hds, fp, o_sfp,
+ mhp->tot_reqs, this_fp_sent,
+ other_fp_sent);
+ if (unlikely(mhp->s_res == -EFAULT ||
+ mhp->s_res == -ERESTARTSYS))
+ res = mhp->s_res; /* this may leave orphans */
+ }
+ if (mhp->id_of_mrq) /* can no longer do a mrq abort */
+ atomic_set(&fp->mrq_id_abort, 0);
+ return res;
+}
+
+static int
+sg_find_srp_idx(struct sg_fd *sfp, const struct sg_request *srp)
+{
+ int k;
+ struct sg_request **rapp = sfp->rsv_arr;
+
+ for (k = 0; k < SG_MAX_RSV_REQS; ++k, ++rapp) {
+ if (*rapp == srp)
+ return k;
+ }
+ return -1;
+}
+
+/*
+ * Processes shared variable blocking. First inner loop submits a chunk of
+ * requests (some read-side, some non-data) but defers any write-side requests. The
+ * second inner loop processes the completions from the first inner loop, plus
+ * for any completed read-side request it submits the paired write-side request. The
+ * second inner loop also waits for the completions of those write-side requests.
+ * The outer loop then moves onto the next chunk, working its way through
+ * the multiple requests. The user sees a blocking command, but the chunks
+ * are run in parallel apart from read-write ordering requirement.
+ * N.B. Only one svb mrq permitted per file descriptor at a time.
+ */
+static int
+sg_process_svb_mrq(struct sg_fd *fp, struct sg_fd *o_sfp,
+ struct sg_mrq_hold *mhp)
+{
+ bool aborted = false;
+ bool chk_oth_first;
+ int k, j, i, m, rcv_before, idx, ws_pos, sent;
+ int this_fp_sent, other_fp_sent;
+ int num_subm = 0;
+ int num_cmpl = 0;
+ int res = 0;
+ struct sg_fd *rq_sfp;
+ struct sg_io_v4 *cop = mhp->cwrp->h4p;
+ struct sg_io_v4 *hp; /* ptr to request object in a_hds */
+ struct sg_request *srp;
+ struct sg_request *rs_srp;
+ struct sg_io_v4 *a_hds = mhp->a_hds;
+ int ws_pos_a[SG_MAX_RSV_REQS]; /* write-side hdr pos within a_hds */
+ struct sg_request *rs_srp_a[SG_MAX_RSV_REQS];
+
+ SG_LOG(3, fp, "%s: id_of_mrq=%d, tot_reqs=%d, enter\n", __func__,
+ mhp->id_of_mrq, mhp->tot_reqs);
+
+ /* work through mrq array, SG_MAX_RSV_REQS read-side requests at a time */
+ for (hp = a_hds, j = 0; j < mhp->tot_reqs; ) {
+ this_fp_sent = 0;
+ other_fp_sent = 0;
+ chk_oth_first = false;
+ for (k = 0; k < SG_MAX_RSV_REQS && j < mhp->tot_reqs;
+ ++hp, ++j) {
+ if (mhp->chk_abort &&
+ test_and_clear_bit(SG_FFD_MRQ_ABORT, fp->ffd_bm)) {
+ SG_LOG(1, fp,
+ "%s: id_of_mrq=%d aborting at pos=%d\n",
+ __func__, mhp->id_of_mrq, num_subm);
+ aborted = true;
+ /*
+ * after mrq abort detected, complete those
+ * already submitted, but don't submit any more
+ */
+ }
+ if (aborted)
+ break;
+ if (hp->flags & SGV4_FLAG_DO_ON_OTHER) {
+ if (hp->dout_xfer_len > 0) {
+ /* need to await read-side completion */
+ ws_pos_a[k] = j;
+ ++k;
+ continue; /* deferred to next loop */
+ }
+ chk_oth_first = true;
+ SG_LOG(6, o_sfp,
+ "%s: subm-nodat p_id=%d on write-side\n",
+ __func__, (int)hp->request_extra);
+ rq_sfp = o_sfp;
+ } else {
+ SG_LOG(6, fp, "%s: submit p_id=%d on read-side\n",
+ __func__, (int)hp->request_extra);
+ rq_sfp = fp;
+ }
+ srp = sg_mrq_submit(rq_sfp, mhp, j, -1);
+ if (IS_ERR(srp)) {
+ mhp->s_res = PTR_ERR(srp);
+ res = mhp->s_res; /* don't loop again */
+ SG_LOG(1, rq_sfp, "%s: mrq_submit()->%d\n",
+ __func__, res);
+ break;
+ }
+ num_subm++;
+ if (hp->din_xfer_len > 0)
+ rs_srp_a[k] = srp;
+ srp->s_hdr4.mrq_ind = j;
+ if (mhp->chk_abort)
+ atomic_set(&srp->s_hdr4.pack_id_of_mrq,
+ mhp->id_of_mrq);
+ if (fp == rq_sfp)
+ ++this_fp_sent;
+ else
+ ++other_fp_sent;
+ }
+ sent = this_fp_sent + other_fp_sent;
+ if (sent <= 0)
+ break;
+ /*
+ * We have just submitted a fixed number read-side reqs and any
+ * others (that don't move data). Now we pick up their
+ * responses. Any responses that were read-side requests have
+ * their paired write-side submitted. Finally we wait for those
+ * paired write-side to complete.
+ */
+ rcv_before = cop->info;
+ for (i = 0; i < sent; ++i) { /* now process responses */
+ if (other_fp_sent > 0 &&
+ sg_mrq_get_ready_srp(o_sfp, &srp)) {
+other_found:
+ if (IS_ERR(srp)) {
+ res = PTR_ERR(srp);
+ break;
+ }
+ --other_fp_sent;
+ res = sg_mrq_1complet(cop, a_hds, o_sfp,
+ mhp->tot_reqs, srp);
+ if (unlikely(res))
+ return res;
+ ++cop->info;
+ if (cop->din_xfer_len > 0)
+ --cop->din_resid;
+ continue; /* do available submits first */
+ }
+ if (this_fp_sent > 0 &&
+ sg_mrq_get_ready_srp(fp, &srp)) {
+this_found:
+ if (IS_ERR(srp)) {
+ res = PTR_ERR(srp);
+ break;
+ }
+ --this_fp_sent;
+ res = sg_mrq_1complet(cop, a_hds, fp,
+ mhp->tot_reqs, srp);
+ if (unlikely(res))
+ return res;
+ ++cop->info;
+ if (cop->din_xfer_len > 0)
+ --cop->din_resid;
+ if (srp->s_hdr4.dir != SG_DXFER_FROM_DEV)
+ continue;
+ /* read-side req completed, submit its write-side */
+ rs_srp = srp;
+ for (m = 0; m < k; ++m) {
+ if (rs_srp == rs_srp_a[m])
+ break;
+ }
+ if (m >= k) {
+ SG_LOG(1, rs_srp->parentfp,
+ "%s: m >= %d, pack_id=%d\n",
+ __func__, k, rs_srp->pack_id);
+ res = -EPROTO;
+ break;
+ }
+ ws_pos = ws_pos_a[m];
+ idx = sg_find_srp_idx(fp, rs_srp);
+ if (idx < 0) {
+ SG_LOG(1, rs_srp->parentfp,
+ "%s: idx < 0\n", __func__);
+ res = -EPROTO;
+ break;
+ }
+ SG_LOG(6, o_sfp,
+ "%s: submit ws_pos=%d, rs_idx=%d\n",
+ __func__, ws_pos, idx);
+ srp = sg_mrq_submit(o_sfp, mhp, ws_pos, idx);
+ if (IS_ERR(srp)) {
+ mhp->s_res = PTR_ERR(srp);
+ res = mhp->s_res;
+ SG_LOG(1, o_sfp,
+ "%s: mrq_submit(oth)->%d\n",
+ __func__, res);
+ break;
+ }
+ ++num_subm;
+ ++other_fp_sent;
+ ++sent;
+ srp->s_hdr4.mrq_ind = ws_pos;
+ if (mhp->chk_abort)
+ atomic_set(&srp->s_hdr4.pack_id_of_mrq,
+ mhp->id_of_mrq);
+ continue; /* do available submits first */
+ }
+ /* waits maybe interrupted by signals (-ERESTARTSYS) */
+ if (chk_oth_first)
+ goto oth_first;
+this_second:
+ if (this_fp_sent > 0) {
+ res = sg_wait_mrq_event(fp, &srp);
+ if (unlikely(res))
+ return res;
+ goto this_found;
+ }
+ if (chk_oth_first)
+ continue;
+oth_first:
+ if (other_fp_sent > 0) {
+ res = sg_wait_mrq_event(o_sfp, &srp);
+ if (unlikely(res))
+ return res;
+ goto other_found;
+ }
+ if (chk_oth_first)
+ goto this_second;
+ } /* end of response/write_side_submit/write_side_response loop */
+ if (unlikely(mhp->s_res == -EFAULT ||
+ mhp->s_res == -ERESTARTSYS))
+ res = mhp->s_res; /* this may leave orphans */
+ num_cmpl += (cop->info - rcv_before);
+ if (res)
+ break;
+ if (aborted)
+ break;
+ } /* end of outer for loop */
+
+ cop->dout_resid = mhp->tot_reqs - num_subm;
+ if (cop->din_xfer_len > 0) {
+ cop->din_resid = mhp->tot_reqs - num_cmpl;
+ cop->spare_out = -mhp->s_res;
+ }
+ if (mhp->id_of_mrq) /* can no longer do a mrq abort */
+ atomic_set(&fp->mrq_id_abort, 0);
+ return res;
+}
+
+#if IS_ENABLED(SG_LOG_ACTIVE)
+static const char *
+sg_mrq_name(bool blocking, u32 flags)
+{
+ if (!(flags & SGV4_FLAG_MULTIPLE_REQS))
+ return "_not_ multiple requests control object";
+ if (blocking)
+ return "ordered blocking";
+ if (flags & SGV4_FLAG_IMMED)
+ return "submit or full non-blocking";
+ if (flags & SGV4_FLAG_SHARE)
+ return "shared variable blocking";
+ return "variable blocking";
+}
+#endif
+
/*
* Implements the multiple request functionality. When 'blocking' is true
* invocation was via ioctl(SG_IO), otherwise it was via ioctl(SG_IOSUBMIT).
@@ -1145,47 +1604,51 @@ sg_mrq_sanity(struct sg_device *sdp, struct sg_io_v4 *cop,
static int
sg_do_multi_req(struct sg_comm_wr_t *cwrp, bool blocking)
{
- bool chk_abort = false;
- bool set_this, set_other, immed, stop_if, f_non_block;
+ bool f_non_block, share_on_oth;
int res = 0;
- int s_res = 0; /* for secondary error: some-good-then-error, case */
- int other_fp_sent = 0;
- int this_fp_sent = 0;
- int num_subm = 0;
- int num_cmpl = 0;
- const int shr_complet_b4 = SGV4_FLAG_SHARE | SGV4_FLAG_COMPLETE_B4;
- int id_of_mrq, existing_id;
- u32 n, flags, cdb_mxlen;
- unsigned long ul_timeout;
+ int existing_id;
+ u32 cdb_mxlen;
struct sg_io_v4 *cop = cwrp->h4p; /* controlling object */
u32 blen = cop->dout_xfer_len;
u32 cdb_alen = cop->request_len;
u32 tot_reqs = blen / SZ_SG_IO_V4;
u8 *cdb_ap = NULL;
- struct sg_io_v4 *hp; /* ptr to request object in a_hds */
struct sg_io_v4 *a_hds; /* array of request objects */
struct sg_fd *fp = cwrp->sfp;
struct sg_fd *o_sfp = sg_fd_share_ptr(fp);
- struct sg_fd *rq_sfp;
- struct sg_request *srp;
struct sg_device *sdp = fp->parentdp;
+ struct sg_mrq_hold mh;
+ struct sg_mrq_hold *mhp = &mh;
+#if IS_ENABLED(SG_LOG_ACTIVE)
+ const char *mrq_name;
+#endif
+ mhp->cwrp = cwrp;
+ mhp->blocking = blocking;
+#if IS_ENABLED(SG_LOG_ACTIVE)
+ mrq_name = sg_mrq_name(blocking, cop->flags);
+#endif
f_non_block = !!(fp->filp->f_flags & O_NONBLOCK);
- immed = !!(cop->flags & SGV4_FLAG_IMMED);
- stop_if = !!(cop->flags & SGV4_FLAG_STOP_IF);
- id_of_mrq = (int)cop->request_extra;
- if (id_of_mrq) {
- existing_id = atomic_cmpxchg(&fp->mrq_id_abort, 0, id_of_mrq);
- if (existing_id && existing_id != id_of_mrq) {
+ mhp->immed = !!(cop->flags & SGV4_FLAG_IMMED);
+ mhp->stop_if = !!(cop->flags & SGV4_FLAG_STOP_IF);
+ mhp->id_of_mrq = (int)cop->request_extra;
+ mhp->tot_reqs = tot_reqs;
+ mhp->s_res = 0;
+ if (mhp->id_of_mrq) {
+ existing_id = atomic_cmpxchg(&fp->mrq_id_abort, 0,
+ mhp->id_of_mrq);
+ if (existing_id && existing_id != mhp->id_of_mrq) {
SG_LOG(1, fp, "%s: existing id=%d id_of_mrq=%d\n",
- __func__, existing_id, id_of_mrq);
+ __func__, existing_id, mhp->id_of_mrq);
return -EDOM;
}
clear_bit(SG_FFD_MRQ_ABORT, fp->ffd_bm);
- chk_abort = true;
+ mhp->chk_abort = true;
+ } else {
+ mhp->chk_abort = false;
}
if (blocking) { /* came from ioctl(SG_IO) */
- if (unlikely(immed)) {
+ if (unlikely(mhp->immed)) {
SG_LOG(1, fp, "%s: ioctl(SG_IO) %s contradicts\n",
__func__, "with SGV4_FLAG_IMMED");
return -ERANGE;
@@ -1196,11 +1659,10 @@ sg_do_multi_req(struct sg_comm_wr_t *cwrp, bool blocking)
f_non_block = false;
}
}
- if (!immed && f_non_block)
- immed = true;
+ if (!mhp->immed && f_non_block)
+ mhp->immed = true;
SG_LOG(3, fp, "%s: %s, tot_reqs=%u, id_of_mrq=%d\n", __func__,
- (immed ? "IMMED" : (blocking ? "ordered blocking" :
- "variable blocking")), tot_reqs, id_of_mrq);
+ mrq_name, tot_reqs, mhp->id_of_mrq);
sg_sgv4_out_zero(cop);
if (unlikely(tot_reqs > U16_MAX)) {
@@ -1208,7 +1670,7 @@ sg_do_multi_req(struct sg_comm_wr_t *cwrp, bool blocking)
} else if (unlikely(blen > SG_MAX_MULTI_REQ_SZ ||
cdb_alen > SG_MAX_MULTI_REQ_SZ)) {
return -E2BIG;
- } else if (unlikely(immed && stop_if)) {
+ } else if (unlikely(mhp->immed && mhp->stop_if)) {
return -ERANGE;
} else if (unlikely(tot_reqs == 0)) {
return 0;
@@ -1224,16 +1686,14 @@ sg_do_multi_req(struct sg_comm_wr_t *cwrp, bool blocking)
cdb_mxlen = 0;
}
- if (SG_IS_DETACHING(sdp))
- return -ENODEV;
- else if (unlikely(o_sfp && SG_IS_DETACHING((o_sfp->parentdp))))
+ if (SG_IS_DETACHING(sdp) || (o_sfp && SG_IS_DETACHING(o_sfp->parentdp)))
return -ENODEV;
a_hds = kcalloc(tot_reqs, SZ_SG_IO_V4, GFP_KERNEL | __GFP_NOWARN);
if (unlikely(!a_hds))
return -ENOMEM;
- n = tot_reqs * SZ_SG_IO_V4;
- if (copy_from_user(a_hds, cuptr64(cop->dout_xferp), n)) {
+ if (copy_from_user(a_hds, cuptr64(cop->dout_xferp),
+ tot_reqs * SZ_SG_IO_V4)) {
res = -EFAULT;
goto fini;
}
@@ -1249,114 +1709,45 @@ sg_do_multi_req(struct sg_comm_wr_t *cwrp, bool blocking)
}
}
/* do sanity checks on all requests before starting */
- res = sg_mrq_sanity(sdp, cop, a_hds, cdb_ap, fp, immed, tot_reqs,
- NULL);
+ res = sg_mrq_sanity(sdp, cop, a_hds, cdb_ap, fp, mhp->immed,
+ tot_reqs, &share_on_oth);
if (unlikely(res))
goto fini;
- set_this = false;
- set_other = false;
- /* Dispatch (submit) requests and optionally wait for response */
- for (hp = a_hds; num_subm < tot_reqs; ++hp) {
- if (chk_abort && test_and_clear_bit(SG_FFD_MRQ_ABORT,
- fp->ffd_bm)) {
- SG_LOG(1, fp, "%s: id_of_mrq=%d aborting at ind=%d\n",
- __func__, id_of_mrq, num_subm);
- break; /* N.B. rest not submitted */
- }
- flags = hp->flags;
- if (flags & SGV4_FLAG_DO_ON_OTHER) {
- rq_sfp = o_sfp;
- if (!set_other) {
- set_other = true;
- if (test_bit(SG_FFD_NO_CMD_Q, rq_sfp->ffd_bm))
- clear_bit(SG_FFD_NO_CMD_Q,
- rq_sfp->ffd_bm);
- }
- } else {
- rq_sfp = fp;
- if (!set_this) {
- set_this = true;
- if (test_bit(SG_FFD_NO_CMD_Q, rq_sfp->ffd_bm))
- clear_bit(SG_FFD_NO_CMD_Q,
- rq_sfp->ffd_bm);
- }
- }
- if (cdb_ap) { /* already have array of cdbs */
- cwrp->cmdp = cdb_ap + (num_subm * cdb_mxlen);
- cwrp->u_cmdp = NULL;
- } else { /* fetch each cdb from user space */
- cwrp->cmdp = NULL;
- cwrp->u_cmdp = cuptr64(hp->request);
- }
- cwrp->cmd_len = hp->request_len;
- ul_timeout = msecs_to_jiffies(hp->timeout);
- cwrp->frq_bm[0] = 0;
- __assign_bit(SG_FRQ_SYNC_INVOC, cwrp->frq_bm, (int)blocking);
- __set_bit(SG_FRQ_IS_V4I, cwrp->frq_bm);
- cwrp->h4p = hp;
- cwrp->timeout = min_t(unsigned long, ul_timeout, INT_MAX);
- cwrp->sfp = rq_sfp;
- srp = sg_common_write(cwrp);
- if (IS_ERR(srp)) {
- s_res = PTR_ERR(srp);
- break;
- }
- srp->s_hdr4.mrq_ind = num_subm++;
- if (chk_abort)
- atomic_set(&srp->s_hdr4.pack_id_of_mrq, id_of_mrq);
- if (immed || (!(blocking || (flags & shr_complet_b4)))) {
- if (fp == rq_sfp)
- ++this_fp_sent;
- else
- ++other_fp_sent;
- continue; /* defer completion until all submitted */
- }
- s_res = sg_wait_event_srp(rq_sfp, NULL, hp, srp);
- if (unlikely(s_res)) {
- if (s_res == -ERESTARTSYS) {
- res = s_res;
- goto fini;
- }
- break;
- }
- ++num_cmpl;
- hp->info |= SG_INFO_MRQ_FINI;
- if (stop_if && (hp->driver_status || hp->transport_status ||
- hp->device_status)) {
- SG_LOG(2, fp, "%s: %s=0x%x/0x%x/0x%x] cause exit\n",
- __func__, "STOP_IF and status [drv/tran/scsi",
- hp->driver_status, hp->transport_status,
- hp->device_status);
- break; /* cop->driver_status <-- 0 in this case */
- }
- if (rq_sfp->async_qp && (hp->flags & SGV4_FLAG_SIGNAL)) {
- res = sg_mrq_arr_flush(cop, a_hds, tot_reqs, s_res);
- if (unlikely(res))
- break;
- kill_fasync(&rq_sfp->async_qp, SIGPOLL, POLL_IN);
- }
- } /* end of dispatch request and optionally wait response loop */
- cop->dout_resid = tot_reqs - num_subm;
- cop->info = num_cmpl; /* number received */
- if (cop->din_xfer_len > 0) {
- cop->din_resid = tot_reqs - num_cmpl;
- cop->spare_out = -s_res;
- }
- if (immed)
- goto fini;
+ /* override cmd queuing setting to allow */
+ clear_bit(SG_FFD_NO_CMD_Q, fp->ffd_bm);
+ if (o_sfp)
+ clear_bit(SG_FFD_NO_CMD_Q, o_sfp->ffd_bm);
- if (likely(res == 0 && (this_fp_sent + other_fp_sent) > 0)) {
- s_res = sg_mrq_complets(cop, a_hds, fp, o_sfp, tot_reqs,
- this_fp_sent, other_fp_sent);
- if (unlikely(s_res == -EFAULT || s_res == -ERESTARTSYS))
- res = s_res; /* this may leave orphans */
+ mhp->cdb_ap = cdb_ap;
+ mhp->a_hds = a_hds;
+ mhp->cdb_mxlen = cdb_mxlen;
+
+ if (!mhp->immed && !blocking && share_on_oth) {
+ bool ok;
+
+ /* check for 'shared' variable blocking (svb) */
+ ok = sg_mrq_svb_chk(a_hds, tot_reqs);
+ if (!ok) {
+ SG_LOG(1, fp, "%s: %s failed on req(s)\n", __func__,
+ mrq_name);
+ res = -ERANGE;
+ goto fini;
+ }
+ if (test_and_set_bit(SG_FFD_SVB_ACTIVE, fp->ffd_bm)) {
+ SG_LOG(1, fp, "%s: %s already active\n", __func__,
+ mrq_name);
+ res = -EBUSY;
+ goto fini;
+ }
+ res = sg_process_svb_mrq(fp, o_sfp, mhp);
+ clear_bit(SG_FFD_SVB_ACTIVE, fp->ffd_bm);
+ } else {
+ res = sg_process_most_mrq(fp, o_sfp, mhp);
}
- if (id_of_mrq) /* can no longer do a mrq abort */
- atomic_set(&fp->mrq_id_abort, 0);
fini:
- if (likely(res == 0) && !immed)
- res = sg_mrq_arr_flush(cop, a_hds, tot_reqs, s_res);
+ if (likely(res == 0) && !mhp->immed)
+ res = sg_mrq_arr_flush(cop, a_hds, tot_reqs, mhp->s_res);
kfree(cdb_ap);
kfree(a_hds);
return res;
@@ -1414,6 +1805,7 @@ sg_submit_v4(struct sg_fd *sfp, void __user *p, struct sg_io_v4 *h4p,
cwr.h4p = h4p;
cwr.timeout = min_t(unsigned long, ul_timeout, INT_MAX);
cwr.cmd_len = h4p->request_len;
+ cwr.rsv_idx = -1;
cwr.u_cmdp = cuptr64(h4p->request);
cwr.cmdp = NULL;
srp = sg_common_write(&cwr);
@@ -1485,11 +1877,12 @@ sg_share_chk_flags(struct sg_fd *sfp, u32 rq_flags, int dxfer_len, int dir,
enum sg_shr_var sh_var = SG_SHR_NONE;
if (rq_flags & SGV4_FLAG_SHARE) {
- if (unlikely(rq_flags & SG_FLAG_DIRECT_IO))
+ if (unlikely(rq_flags & SG_FLAG_DIRECT_IO)) {
result = -EINVAL; /* since no control of data buffer */
- else if (unlikely(dxfer_len < 1))
- result = -ENODATA;
- else if (is_read_side) {
+ } else if (unlikely(dxfer_len < 1)) {
+ sh_var = is_read_side ? SG_SHR_RS_NOT_SRQ :
+ SG_SHR_WS_NOT_SRQ;
+ } else if (is_read_side) {
sh_var = SG_SHR_RS_RQ;
if (unlikely(dir != SG_DXFER_FROM_DEV))
result = -ENOMSG;
@@ -1498,7 +1891,7 @@ sg_share_chk_flags(struct sg_fd *sfp, u32 rq_flags, int dxfer_len, int dir,
if (unlikely(rq_flags & SG_FL_MMAP_DIRECT))
result = -ENODATA;
}
- } else { /* fd is write-side */
+ } else {
sh_var = SG_SHR_WS_RQ;
if (unlikely(dir != SG_DXFER_TO_DEV))
result = -ENOMSG;
@@ -1536,6 +1929,49 @@ sg_rq_state_fail_msg(struct sg_fd *sfp, enum sg_rq_state exp_old_st,
/* Functions ending in '_ulck' assume sfp->xa_lock held by caller. */
static void
+sg_rq_chg_state_force_ulck(struct sg_request *srp, enum sg_rq_state new_st)
+{
+ bool prev, want;
+ struct sg_fd *sfp = srp->parentfp;
+ struct xarray *xafp = &sfp->srp_arr;
+
+ atomic_set(&srp->rq_st, new_st);
+ want = (new_st == SG_RQ_AWAIT_RCV);
+ prev = xa_get_mark(xafp, srp->rq_idx, SG_XA_RQ_AWAIT);
+ if (prev != want) {
+ if (want)
+ __xa_set_mark(xafp, srp->rq_idx, SG_XA_RQ_AWAIT);
+ else
+ __xa_clear_mark(xafp, srp->rq_idx, SG_XA_RQ_AWAIT);
+ }
+ want = (new_st == SG_RQ_INACTIVE);
+ prev = xa_get_mark(xafp, srp->rq_idx, SG_XA_RQ_INACTIVE);
+ if (prev != want) {
+ if (want) {
+ int prev_idx = READ_ONCE(sfp->low_used_idx);
+
+ if (prev_idx < 0 || srp->rq_idx < prev_idx ||
+ !xa_get_mark(xafp, prev_idx, SG_XA_RQ_INACTIVE))
+ WRITE_ONCE(sfp->low_used_idx, srp->rq_idx);
+ __xa_set_mark(xafp, srp->rq_idx, SG_XA_RQ_INACTIVE);
+ } else {
+ __xa_clear_mark(xafp, srp->rq_idx, SG_XA_RQ_INACTIVE);
+ }
+ }
+}
+
+static void
+sg_rq_chg_state_force(struct sg_request *srp, enum sg_rq_state new_st)
+{
+ unsigned long iflags;
+ struct xarray *xafp = &srp->parentfp->srp_arr;
+
+ xa_lock_irqsave(xafp, iflags);
+ sg_rq_chg_state_force_ulck(srp, new_st);
+ xa_unlock_irqrestore(xafp, iflags);
+}
+
+static inline void
sg_rq_chg_state_help(struct xarray *xafp, struct sg_request *srp, int indic)
{
if (indic & 1) /* from inactive state */
@@ -1565,13 +2001,10 @@ static int
sg_rq_chg_state_ulck(struct sg_request *srp, enum sg_rq_state old_st,
enum sg_rq_state new_st)
{
- enum sg_rq_state act_old_st;
- int indic;
+ enum sg_rq_state act_old_st =
+ (enum sg_rq_state)atomic_cmpxchg_relaxed(&srp->rq_st, old_st, new_st);
+ int indic = sg_rq_state_arr[(int)old_st] + sg_rq_state_mul2arr[(int)new_st];
- indic = sg_rq_state_arr[(int)old_st] +
- sg_rq_state_mul2arr[(int)new_st];
- act_old_st = (enum sg_rq_state)atomic_cmpxchg(&srp->rq_st, old_st,
- new_st);
if (unlikely(act_old_st != old_st)) {
#if IS_ENABLED(SG_LOG_ACTIVE)
SG_LOG(1, srp->parentfp, "%s: unexpected old state: %s\n",
@@ -1579,8 +2012,19 @@ sg_rq_chg_state_ulck(struct sg_request *srp, enum sg_rq_state old_st,
#endif
return -EPROTOTYPE; /* only used for this error type */
}
- if (indic)
- sg_rq_chg_state_help(&srp->parentfp->srp_arr, srp, indic);
+ if (indic) {
+ struct sg_fd *sfp = srp->parentfp;
+
+ if (new_st == SG_RQ_INACTIVE) {
+ int prev_idx = READ_ONCE(sfp->low_used_idx);
+ struct xarray *xafp = &sfp->srp_arr;
+
+ if (prev_idx < 0 || srp->rq_idx < prev_idx ||
+ !xa_get_mark(xafp, prev_idx, SG_XA_RQ_INACTIVE))
+ WRITE_ONCE(sfp->low_used_idx, srp->rq_idx);
+ }
+ sg_rq_chg_state_help(&sfp->srp_arr, srp, indic);
+ }
return 0;
}
@@ -1625,47 +2069,139 @@ sg_rq_chg_state(struct sg_request *srp, enum sg_rq_state old_st,
return 0;
}
-static void
-sg_rq_chg_state_force_ulck(struct sg_request *srp, enum sg_rq_state new_st)
+/*
+ * Returns index of an unused element in sfp's rsv_arr, or -1 if it is full.
+ * Marks that element's rsv_srp with ERR_PTR(-EBUSY) to reserve that index.
+ */
+static int
+sg_get_idx_new(struct sg_fd *sfp)
{
- bool prev, want;
- struct sg_fd *sfp = srp->parentfp;
- struct xarray *xafp = &sfp->srp_arr;
+ int k;
+ struct sg_request **rapp = sfp->rsv_arr;
- atomic_set(&srp->rq_st, new_st);
- want = (new_st == SG_RQ_AWAIT_RCV);
- prev = xa_get_mark(xafp, srp->rq_idx, SG_XA_RQ_AWAIT);
- if (prev != want) {
- if (want)
- __xa_set_mark(xafp, srp->rq_idx, SG_XA_RQ_AWAIT);
- else
- __xa_clear_mark(xafp, srp->rq_idx, SG_XA_RQ_AWAIT);
+ for (k = 0; k < SG_MAX_RSV_REQS; ++k, ++rapp) {
+ if (!*rapp) {
+ *rapp = ERR_PTR(-EBUSY);
+ return k;
+ }
}
- want = (new_st == SG_RQ_INACTIVE);
- prev = xa_get_mark(xafp, srp->rq_idx, SG_XA_RQ_INACTIVE);
- if (prev != want) {
- if (want) {
- int prev_idx = READ_ONCE(sfp->low_used_idx);
+ return -1;
+}
- if (prev_idx < 0 || srp->rq_idx < prev_idx ||
- !xa_get_mark(xafp, prev_idx, SG_XA_RQ_INACTIVE))
- WRITE_ONCE(sfp->low_used_idx, srp->rq_idx);
- __xa_set_mark(xafp, srp->rq_idx, SG_XA_RQ_INACTIVE);
- } else {
- __xa_clear_mark(xafp, srp->rq_idx, SG_XA_RQ_INACTIVE);
+static int
+sg_get_idx_new_lck(struct sg_fd *sfp)
+{
+ int res;
+ unsigned long iflags;
+
+ xa_lock_irqsave(&sfp->srp_arr, iflags);
+ res = sg_get_idx_new(sfp);
+ xa_unlock_irqrestore(&sfp->srp_arr, iflags);
+ return res;
+}
+
+/*
+ * Looks for an available element index in sfp's rsv_arr. That element's
+ * sh_srp must be NULL and will be set to ERR_PTR(-EBUSY). If no element
+ * is available then returns -1.
+ */
+static int
+sg_get_idx_available(struct sg_fd *sfp)
+{
+ int k;
+ struct sg_request **rapp = sfp->rsv_arr;
+ struct sg_request *srp;
+
+ for (k = 0; k < SG_MAX_RSV_REQS; ++k, ++rapp) {
+ srp = *rapp;
+ if (!IS_ERR_OR_NULL(srp)) {
+ if (!srp->sh_srp && !SG_RQ_ACTIVE(srp)) {
+ srp->sh_srp = ERR_PTR(-EBUSY);
+ return k;
+ }
}
}
+ return -1;
}
-static void
-sg_rq_chg_state_force(struct sg_request *srp, enum sg_rq_state new_st)
+static struct sg_request *
+sg_get_probable_read_side(struct sg_fd *sfp)
+{
+ struct sg_request **rapp = sfp->rsv_arr;
+ struct sg_request **end_rapp = rapp + SG_MAX_RSV_REQS;
+ struct sg_request *rs_srp;
+
+ for ( ; rapp < end_rapp; ++rapp) {
+ rs_srp = *rapp;
+ if (IS_ERR_OR_NULL(rs_srp) || rs_srp->sh_srp)
+ continue;
+ switch (atomic_read(&rs_srp->rq_st)) {
+ case SG_RQ_INFLIGHT:
+ case SG_RQ_AWAIT_RCV:
+ case SG_RQ_BUSY:
+ case SG_RQ_SHR_SWAP:
+ return rs_srp;
+ default:
+ break;
+ }
+ }
+ return NULL;
+}
+
+/*
+ * Returns string of the form: <leadin>rsv<num><leadout> if srp is one of
+ * the reserve requests. Otherwise a blank string of length <leadin> plus
+ * length of <leadout> is returned.
+ */
+static const char *
+sg_get_rsv_str(struct sg_request *srp, const char *leadin,
+ const char *leadout, int b_len, char *b)
+{
+ int k, i_len, o_len, len;
+ struct sg_fd *sfp;
+ struct sg_request **rapp;
+
+ if (!b || b_len < 1)
+ return b;
+ if (!leadin)
+ leadin = "";
+ if (!leadout)
+ leadout = "";
+ i_len = strlen(leadin);
+ o_len = strlen(leadout);
+ if (!srp)
+ goto blank;
+ sfp = srp->parentfp;
+ if (!sfp)
+ goto blank;
+ rapp = sfp->rsv_arr;
+ for (k = 0; k < SG_MAX_RSV_REQS; ++k, ++rapp) {
+ if (srp == *rapp)
+ break;
+ }
+ if (k >= SG_MAX_RSV_REQS)
+ goto blank;
+ scnprintf(b, b_len, "%srsv%d%s", leadin, k, leadout);
+ return b;
+blank:
+ len = min_t(int, i_len + o_len, b_len - 1);
+ for (k = 0; k < len; ++k)
+ b[k] = ' ';
+ b[len] = '\0';
+ return b;
+}
+
+static inline const char *
+sg_get_rsv_str_lck(struct sg_request *srp, const char *leadin,
+ const char *leadout, int b_len, char *b)
{
unsigned long iflags;
- struct xarray *xafp = &srp->parentfp->srp_arr;
+ const char *cp;
- xa_lock_irqsave(xafp, iflags);
- sg_rq_chg_state_force_ulck(srp, new_st);
- xa_unlock_irqrestore(xafp, iflags);
+ xa_lock_irqsave(&srp->parentfp->srp_arr, iflags);
+ cp = sg_get_rsv_str(srp, leadin, leadout, b_len, b);
+ xa_unlock_irqrestore(&srp->parentfp->srp_arr, iflags);
+ return cp;
}
static void
@@ -1691,9 +2227,8 @@ sg_execute_cmd(struct sg_fd *sfp, struct sg_request *srp)
else /* this sfd is defaulting to head */
at_head = !(srp->rq_flags & SG_FLAG_Q_AT_TAIL);
- kref_get(&sfp->f_ref); /* sg_rq_end_io() does kref_put(). */
+ kref_get(&sfp->f_ref); /* put usually in: sg_rq_end_io() */
sg_rq_chg_state_force(srp, SG_RQ_INFLIGHT);
-
/* >>>>>>> send cmd/req off to other levels <<<<<<<< */
if (!sync) {
atomic_inc(&sfp->submitted);
@@ -1761,7 +2296,7 @@ sg_common_write(struct sg_comm_wr_t *cwrp)
} else {
sh_var = SG_SHR_NONE;
if (unlikely(rq_flags & SGV4_FLAG_SHARE))
- return ERR_PTR(-ENOMSG);
+ return ERR_PTR(-ENOMSG); /* no file share found */
}
if (unlikely(dxfr_len >= SZ_256M))
return ERR_PTR(-EINVAL);
@@ -1779,6 +2314,7 @@ sg_common_write(struct sg_comm_wr_t *cwrp)
srp->s_hdr4.cmd_len = h4p->request_len;
srp->s_hdr4.dir = dir;
srp->s_hdr4.out_resid = 0;
+ srp->s_hdr4.mrq_ind = 0;
} else { /* v3 interface active */
memcpy(&srp->s_hdr3, hi_p, sizeof(srp->s_hdr3));
}
@@ -1873,7 +2409,6 @@ sg_rec_state_v3v4(struct sg_fd *sfp, struct sg_request *srp, bool v4_active)
int err = 0;
u32 rq_res = srp->rq_result;
enum sg_shr_var sh_var = srp->sh_var;
- struct sg_fd *sh_sfp;
if (unlikely(srp->rq_result & 0xff)) {
int sb_len_wr = sg_copy_sense(srp, v4_active);
@@ -1886,30 +2421,40 @@ sg_rec_state_v3v4(struct sg_fd *sfp, struct sg_request *srp, bool v4_active)
if (unlikely(test_bit(SG_FRQ_ABORTING, srp->frq_bm)))
srp->rq_info |= SG_INFO_ABORTED;
- sh_sfp = sg_fd_share_ptr(sfp);
if (sh_var == SG_SHR_WS_RQ && sg_fd_is_shared(sfp)) {
- struct sg_request *rs_srp = sh_sfp->rsv_srp;
- enum sg_rq_state mar_st = atomic_read(&rs_srp->rq_st);
+ enum sg_rq_state rs_st;
+ struct sg_request *rs_srp = srp->sh_srp;
+
+ if (!rs_srp)
+ return -EPROTO;
+ rs_st = atomic_read(&rs_srp->rq_st);
- switch (mar_st) {
+ switch (rs_st) {
case SG_RQ_SHR_SWAP:
case SG_RQ_SHR_IN_WS:
/* make read-side request available for re-use */
rs_srp->tag = SG_TAG_WILDCARD;
rs_srp->sh_var = SG_SHR_NONE;
sg_rq_chg_state_force(rs_srp, SG_RQ_INACTIVE);
- atomic_inc(&sh_sfp->inactives);
+ atomic_inc(&rs_srp->parentfp->inactives);
+ rs_srp->frq_bm[0] = 0;
+ __set_bit(SG_FRQ_RESERVED, rs_srp->frq_bm);
+ rs_srp->in_resid = 0;
+ rs_srp->rq_info = 0;
+ rs_srp->sense_len = 0;
+ rs_srp->sh_srp = NULL;
break;
case SG_RQ_AWAIT_RCV:
break;
case SG_RQ_INACTIVE:
- sh_sfp->ws_srp = NULL;
- break; /* nothing to do */
+ /* remove request share mapping */
+ rs_srp->sh_srp = NULL;
+ break;
default:
err = -EPROTO; /* Logic error */
SG_LOG(1, sfp,
"%s: SHR_WS_RQ, bad read-side state: %s\n",
- __func__, sg_rq_st_str(mar_st, true));
+ __func__, sg_rq_st_str(rs_st, true));
break; /* nothing to do */
}
}
@@ -1924,6 +2469,8 @@ sg_complete_v3v4(struct sg_fd *sfp, struct sg_request *srp, bool other_err)
enum sg_rq_state sr_st = atomic_read(&srp->rq_st);
/* advance state machine, send signal to write-side if appropriate */
+ SG_LOG(4, sfp, "%s: %pK: sh_var=%s\n", __func__, srp,
+ sg_shr_str(srp->sh_var, true));
switch (srp->sh_var) {
case SG_SHR_RS_RQ:
{
@@ -1939,29 +2486,32 @@ sg_complete_v3v4(struct sg_fd *sfp, struct sg_request *srp, bool other_err)
} else if (sr_st != SG_RQ_SHR_SWAP) {
sg_rq_chg_state_force(srp, SG_RQ_SHR_SWAP);
}
- if (ws_sfp && ws_sfp->async_qp &&
+ if (ws_sfp && ws_sfp->async_qp && !srp->sh_srp &&
(!test_bit(SG_FRQ_IS_V4I, srp->frq_bm) ||
(srp->rq_flags & SGV4_FLAG_SIGNAL)))
kill_fasync(&ws_sfp->async_qp, SIGPOLL,
poll_type);
}
break;
- case SG_SHR_WS_RQ: /* cleanup both on write-side completion */
- {
- struct sg_fd *rs_sfp = sg_fd_share_ptr(sfp);
+ case SG_SHR_WS_RQ: /* cleanup both on write-side completion */
+ if (likely(sg_fd_is_shared(sfp))) {
+ struct sg_request *rs_srp = srp->sh_srp;
- if (likely(rs_sfp)) {
- rs_sfp->ws_srp = NULL;
- if (rs_sfp->rsv_srp)
- rs_sfp->rsv_srp->sh_var =
- SG_SHR_RS_NOT_SRQ;
+ if (rs_srp) {
+ rs_srp->sh_srp = NULL;
+ rs_srp->sh_var = SG_SHR_RS_NOT_SRQ;
+ } else {
+ SG_LOG(2, sfp, "%s: write-side's paired read is missing\n",
+ __func__);
}
}
srp->sh_var = SG_SHR_WS_NOT_SRQ;
+ srp->sh_srp = NULL;
srp->sgatp = &srp->sgat_h;
if (sr_st != SG_RQ_BUSY)
sg_rq_chg_state_force(srp, SG_RQ_BUSY);
break;
+ case SG_SHR_WS_NOT_SRQ:
default:
if (sr_st != SG_RQ_BUSY)
sg_rq_chg_state_force(srp, SG_RQ_BUSY);
@@ -2017,6 +2567,7 @@ sg_receive_v4(struct sg_fd *sfp, struct sg_request *srp, void __user *p,
}
/*
+ * Invoked when user calls ioctl(SG_IORECEIVE, SGV4_FLAG_MULTIPLE_REQS).
* Returns negative on error including -ENODATA if there are no mrqs submitted
* nor waiting. Otherwise it returns the number of elements written to
* rsp_arr, which may be 0 if mrqs submitted but none waiting
@@ -2059,7 +2610,7 @@ sg_mrq_iorec_complets(struct sg_fd *sfp, bool non_block, int max_mrqs,
/*
* Invoked when user calls ioctl(SG_IORECEIVE, SGV4_FLAG_MULTIPLE_REQS).
- * Expected race as multiple concurrent calls with the same pack_id/tag can
+ * Expected race as many concurrent calls with the same pack_id/tag can
* occur. Only one should succeed per request (more may succeed but will get
* different requests).
*/
@@ -2541,7 +3092,7 @@ sg_change_after_read_side_rq(struct sg_fd *sfp, bool fini1_again0)
goto fini;
if (xa_get_mark(&sdp->sfp_arr, sfp->idx, SG_XA_FD_RS_SHARE))
rs_sfp = sfp;
- rs_rsv_srp = sfp->rsv_srp;
+ rs_rsv_srp = rs_sfp->rsv_arr[0];
if (IS_ERR_OR_NULL(rs_rsv_srp))
goto fini;
@@ -2592,18 +3143,27 @@ sg_change_after_read_side_rq(struct sg_fd *sfp, bool fini1_again0)
static void
sg_unshare_rs_fd(struct sg_fd *rs_sfp, bool lck)
{
+ int k;
unsigned long iflags = 0;
struct sg_device *sdp = rs_sfp->parentdp;
+ struct sg_request **rapp = rs_sfp->rsv_arr;
struct xarray *xadp = &sdp->sfp_arr;
+ struct sg_request *r_srp;
- rcu_assign_pointer(rs_sfp->share_sfp, NULL);
if (lck)
- xa_lock_irqsave(xadp, iflags);
- rs_sfp->ws_srp = NULL;
+ xa_lock_irqsave_nested(xadp, iflags, 1);
+ __clear_bit(SG_FFD_RESHARE, rs_sfp->ffd_bm);
+ for (k = 0; k < SG_MAX_RSV_REQS; ++k, ++rapp) {
+ r_srp = *rapp;
+ if (IS_ERR_OR_NULL(r_srp))
+ continue;
+ r_srp->sh_srp = NULL;
+ }
__xa_set_mark(xadp, rs_sfp->idx, SG_XA_FD_UNSHARED);
__xa_clear_mark(xadp, rs_sfp->idx, SG_XA_FD_RS_SHARE);
if (lck)
xa_unlock_irqrestore(xadp, iflags);
+ rcu_assign_pointer(rs_sfp->share_sfp, NULL);
kref_put(&rs_sfp->f_ref, sg_remove_sfp);/* get: sg_find_sfp_by_fd() */
}
@@ -2614,13 +3174,13 @@ sg_unshare_ws_fd(struct sg_fd *ws_sfp, bool lck)
struct sg_device *sdp = ws_sfp->parentdp;
struct xarray *xadp = &sdp->sfp_arr;
- rcu_assign_pointer(ws_sfp->share_sfp, NULL);
if (lck)
- xa_lock_irqsave(xadp, iflags);
+ xa_lock_irqsave_nested(xadp, iflags, 1);
__xa_set_mark(xadp, ws_sfp->idx, SG_XA_FD_UNSHARED);
/* SG_XA_FD_RS_SHARE mark should be already clear */
if (lck)
xa_unlock_irqrestore(xadp, iflags);
+ rcu_assign_pointer(ws_sfp->share_sfp, NULL);
kref_put(&ws_sfp->f_ref, sg_remove_sfp);/* get: sg_find_sfp_by_fd() */
}
@@ -2633,74 +3193,95 @@ sg_unshare_ws_fd(struct sg_fd *ws_sfp, bool lck)
*/
static void
sg_remove_sfp_share(struct sg_fd *sfp, bool is_rd_side)
+ __must_hold(sfp->parentdp->open_rel_lock)
{
__maybe_unused int res = 0;
+ int k, retry_count;
unsigned long iflags;
enum sg_rq_state sr_st;
+ struct sg_request **rapp;
struct sg_device *sdp = sfp->parentdp;
struct sg_device *sh_sdp;
struct sg_fd *sh_sfp;
struct sg_request *rsv_srp = NULL;
struct sg_request *ws_srp;
struct xarray *xadp = &sdp->sfp_arr;
+ struct xarray *xafp = &sfp->srp_arr;
SG_LOG(3, sfp, "%s: sfp=%pK %s\n", __func__, sfp,
(is_rd_side ? "read-side" : "write-side"));
xa_lock_irqsave(xadp, iflags);
+ retry_count = 0;
+try_again:
+ if (is_rd_side && !xa_get_mark(xadp, sfp->idx, SG_XA_FD_RS_SHARE))
+ goto fini;
sh_sfp = sg_fd_share_ptr(sfp);
- if (!sg_fd_is_shared(sfp))
- goto err_out;
+ if (unlikely(!sh_sfp))
+ goto fini;
sh_sdp = sh_sfp->parentdp;
- if (is_rd_side) {
- bool set_inactive = false;
-
- if (unlikely(!xa_get_mark(xadp, sfp->idx,
- SG_XA_FD_RS_SHARE))) {
- xa_unlock_irqrestore(xadp, iflags);
+ if (!xa_trylock(xafp)) {
+ /*
+ * The other side of the share might be closing as well, avoid
+ * deadlock. Should clear relatively quickly.
+ */
+ xa_unlock_irqrestore(xadp, iflags);
+ if (++retry_count > SG_ADD_RQ_MAX_RETRIES) {
+ SG_LOG(1, sfp, "%s: retry_count>>\n", __func__);
return;
}
- rsv_srp = sfp->rsv_srp;
- if (unlikely(!rsv_srp))
- goto fini;
- if (unlikely(rsv_srp->sh_var != SG_SHR_RS_RQ))
- goto fini;
- sr_st = atomic_read(&rsv_srp->rq_st);
- switch (sr_st) {
- case SG_RQ_SHR_SWAP:
- set_inactive = true;
- break;
- case SG_RQ_SHR_IN_WS:
- ws_srp = sfp->ws_srp;
- if (ws_srp && !IS_ERR(ws_srp)) {
- ws_srp->sh_var = SG_SHR_WS_NOT_SRQ;
- sfp->ws_srp = NULL;
+ mutex_unlock(&sdp->open_rel_lock);
+ cpu_relax();
+ mutex_lock(&sdp->open_rel_lock);
+ xa_lock_irqsave(xadp, iflags);
+ goto try_again;
+ }
+ /* have acquired xafp lock */
+ if (is_rd_side) {
+ rapp = sfp->rsv_arr;
+ for (k = 0; k < SG_MAX_RSV_REQS; ++k, ++rapp) {
+ bool set_inactive = false;
+
+ rsv_srp = *rapp;
+ if (IS_ERR_OR_NULL(rsv_srp) ||
+ rsv_srp->sh_var != SG_SHR_RS_RQ)
+ continue;
+ sr_st = atomic_read(&rsv_srp->rq_st);
+ switch (sr_st) {
+ case SG_RQ_SHR_SWAP:
+ set_inactive = true;
+ break;
+ case SG_RQ_SHR_IN_WS:
+ ws_srp = rsv_srp->sh_srp;
+ if (!IS_ERR_OR_NULL(ws_srp) &&
+ !test_bit(SG_FFD_RELEASE,
+ sh_sfp->ffd_bm)) {
+ ws_srp->sh_var = SG_SHR_WS_NOT_SRQ;
+ }
+ rsv_srp->sh_srp = NULL;
+ set_inactive = true;
+ break;
+ default:
+ break;
+ }
+ rsv_srp->sh_var = SG_SHR_NONE;
+ if (set_inactive) {
+ res = sg_rq_chg_state_ulck(rsv_srp, sr_st, SG_RQ_INACTIVE);
+ if (!res)
+ atomic_inc(&sfp->inactives);
}
- set_inactive = true;
- break;
- default:
- break;
- }
- rsv_srp->sh_var = SG_SHR_NONE;
- if (set_inactive) {
- res = sg_rq_chg_state_ulck(rsv_srp, sr_st, SG_RQ_INACTIVE);
- if (!res)
- atomic_inc(&sfp->inactives);
}
-fini:
if (!xa_get_mark(&sh_sdp->sfp_arr, sh_sfp->idx,
SG_XA_FD_FREE) && sg_fd_is_shared(sh_sfp))
sg_unshare_ws_fd(sh_sfp, sdp != sh_sdp);
sg_unshare_rs_fd(sfp, false);
- } else {
- if (unlikely(!sg_fd_is_shared(sfp))) {
- xa_unlock_irqrestore(xadp, iflags);
- return;
- } else if (!xa_get_mark(&sh_sdp->sfp_arr, sh_sfp->idx,
- SG_XA_FD_FREE))
+ } else { /* is write-side of share */
+ if (!xa_get_mark(&sh_sdp->sfp_arr, sh_sfp->idx,
+ SG_XA_FD_FREE) && sg_fd_is_shared(sh_sfp))
sg_unshare_rs_fd(sh_sfp, sdp != sh_sdp);
sg_unshare_ws_fd(sfp, false);
}
-err_out:
+ xa_unlock(xafp);
+fini:
xa_unlock_irqrestore(xadp, iflags);
}
@@ -2713,27 +3294,31 @@ static void
sg_do_unshare(struct sg_fd *sfp, bool unshare_val)
__must_hold(sfp->f_mutex)
{
- bool retry;
+ bool retry, same_sdp_s;
int retry_count = 0;
+ unsigned long iflags;
struct sg_request *rs_rsv_srp;
struct sg_fd *rs_sfp;
struct sg_fd *ws_sfp;
struct sg_fd *o_sfp = sg_fd_share_ptr(sfp);
struct sg_device *sdp = sfp->parentdp;
+ struct xarray *xadp = &sdp->sfp_arr;
- if (!sg_fd_is_shared(sfp)) {
+ if (unlikely(!o_sfp)) {
SG_LOG(1, sfp, "%s: not shared ? ?\n", __func__);
return; /* no share to undo */
}
if (!unshare_val)
return; /* when unshare value is zero, it's a NOP */
+ same_sdp_s = (o_sfp && sfp->parentdp == o_sfp->parentdp);
again:
retry = false;
if (xa_get_mark(&sdp->sfp_arr, sfp->idx, SG_XA_FD_RS_SHARE)) {
rs_sfp = sfp;
ws_sfp = o_sfp;
- rs_rsv_srp = rs_sfp->rsv_srp;
- if (rs_rsv_srp && rs_rsv_srp->sh_var != SG_SHR_RS_RQ) {
+ rs_rsv_srp = rs_sfp->rsv_arr[0];
+ if (!IS_ERR_OR_NULL(rs_rsv_srp) &&
+ rs_rsv_srp->sh_var != SG_SHR_RS_RQ) {
if (unlikely(!mutex_trylock(&ws_sfp->f_mutex))) {
if (++retry_count > SG_ADD_RQ_MAX_RETRIES)
SG_LOG(1, sfp,
@@ -2743,7 +3328,16 @@ sg_do_unshare(struct sg_fd *sfp, bool unshare_val)
retry = true;
goto fini;
}
- sg_unshare_rs_fd(rs_sfp, true);
+ if (same_sdp_s) {
+ xa_lock_irqsave(xadp, iflags);
+ /* write-side is 'other' so do first */
+ sg_unshare_ws_fd(ws_sfp, false);
+ sg_unshare_rs_fd(rs_sfp, false);
+ xa_unlock_irqrestore(xadp, iflags);
+ } else {
+ sg_unshare_ws_fd(ws_sfp, true);
+ sg_unshare_rs_fd(rs_sfp, true);
+ }
mutex_unlock(&ws_sfp->f_mutex);
}
} else { /* called on write-side fd */
@@ -2757,10 +3351,19 @@ sg_do_unshare(struct sg_fd *sfp, bool unshare_val)
retry = true;
goto fini;
}
- rs_rsv_srp = rs_sfp->rsv_srp;
- if (rs_rsv_srp->sh_var != SG_SHR_RS_RQ) {
- sg_unshare_rs_fd(rs_sfp, true);
- sg_unshare_ws_fd(ws_sfp, true);
+ rs_rsv_srp = rs_sfp->rsv_arr[0];
+ if (!IS_ERR_OR_NULL(rs_rsv_srp) &&
+ rs_rsv_srp->sh_var != SG_SHR_RS_RQ) {
+ if (same_sdp_s) {
+ xa_lock_irqsave(xadp, iflags);
+ /* read-side is 'other' so do first */
+ sg_unshare_rs_fd(rs_sfp, false);
+ sg_unshare_ws_fd(ws_sfp, false);
+ xa_unlock_irqrestore(xadp, iflags);
+ } else {
+ sg_unshare_rs_fd(rs_sfp, true);
+ sg_unshare_ws_fd(ws_sfp, true);
+ }
}
mutex_unlock(&rs_sfp->f_mutex);
}
@@ -2970,6 +3573,16 @@ sg_ctl_sg_io(struct sg_device *sdp, struct sg_fd *sfp, void __user *p)
return res;
}
+static inline int
+sg_num_waiting_maybe_acquire(struct sg_fd *sfp)
+{
+ int num = atomic_read(&sfp->waiting);
+
+ if (num < 1)
+ num = atomic_read_acquire(&sfp->waiting);
+ return num;
+}
+
/*
* When use_tag is true then id is a tag, else it is a pack_id. Returns
* valid srp if match, else returns NULL.
@@ -2977,15 +3590,11 @@ sg_ctl_sg_io(struct sg_device *sdp, struct sg_fd *sfp, void __user *p)
static struct sg_request *
sg_match_request(struct sg_fd *sfp, bool use_tag, int id)
{
- int num_waiting = atomic_read(&sfp->waiting);
unsigned long idx;
struct sg_request *srp;
- if (num_waiting < 1) {
- num_waiting = atomic_read_acquire(&sfp->waiting);
- if (num_waiting < 1)
- return NULL;
- }
+ if (sg_num_waiting_maybe_acquire(sfp) < 1)
+ return NULL;
if (id == SG_PACK_ID_WILDCARD) {
xa_for_each_marked(&sfp->srp_arr, idx, srp, SG_XA_RQ_AWAIT)
return srp;
@@ -3019,14 +3628,10 @@ sg_match_first_mrq_after(struct sg_fd *sfp, int pack_id,
unsigned long idx;
struct sg_request *srp;
- if (atomic_read(&sfp->waiting) < 1) {
- if (atomic_read_acquire(&sfp->waiting) < 1)
- return NULL;
- }
+ if (sg_num_waiting_maybe_acquire(sfp) < 1)
+ return NULL;
once_more:
xa_for_each_marked(&sfp->srp_arr, idx, srp, SG_XA_RQ_AWAIT) {
- if (unlikely(!srp))
- continue;
if (look_for_after) {
if (after_rp == srp)
look_for_after = false;
@@ -3095,16 +3700,15 @@ sg_abort_req(struct sg_fd *sfp, struct sg_request *srp)
return res;
}
+/* Holding xa_lock_irq(&sfp->srp_arr) */
static int
sg_mrq_abort_inflight(struct sg_fd *sfp, int pack_id)
{
bool got_ebusy = false;
int res = 0;
- unsigned long iflags;
struct sg_request *srp;
struct sg_request *prev_srp;
- xa_lock_irqsave(&sfp->srp_arr, iflags);
for (prev_srp = NULL; true; prev_srp = srp) {
srp = sg_match_first_mrq_after(sfp, pack_id, prev_srp);
if (!srp)
@@ -3115,7 +3719,6 @@ sg_mrq_abort_inflight(struct sg_fd *sfp, int pack_id)
else if (res)
break;
}
- xa_unlock_irqrestore(&sfp->srp_arr, iflags);
if (res)
return res;
return got_ebusy ? -EBUSY : 0;
@@ -3135,7 +3738,7 @@ sg_mrq_abort(struct sg_fd *sfp, int pack_id, bool dev_scope)
{
int existing_id;
int res = 0;
- unsigned long idx;
+ unsigned long idx, iflags;
struct sg_device *sdp;
struct sg_fd *o_sfp;
struct sg_fd *s_sfp;
@@ -3167,7 +3770,7 @@ sg_mrq_abort(struct sg_fd *sfp, int pack_id, bool dev_scope)
__func__, pack_id);
/* now look for inflight requests matching that mrq pack_id */
- xa_lock(&sfp->srp_arr);
+ xa_lock_irqsave(&sfp->srp_arr, iflags);
res = sg_mrq_abort_inflight(sfp, pack_id);
if (res == -EBUSY) {
res = sg_mrq_abort_inflight(sfp, pack_id);
@@ -3175,11 +3778,11 @@ sg_mrq_abort(struct sg_fd *sfp, int pack_id, bool dev_scope)
goto fini;
}
s_sfp = sg_fd_share_ptr(sfp);
- if (s_sfp) { /* SGV4_FLAG_DO_ON_OTHER may have been used */
- xa_unlock(&sfp->srp_arr);
- sfp = s_sfp; /* if share, check other fd */
- xa_lock(&sfp->srp_arr);
- if (sg_fd_is_shared(sfp))
+ if (s_sfp) { /* SGV4_FLAG_DO_ON_OTHER possible */
+ xa_unlock_irqrestore(&sfp->srp_arr, iflags);
+ sfp = s_sfp; /* if share, switch to other fd */
+ xa_lock_irqsave(&sfp->srp_arr, iflags);
+ if (!sg_fd_is_shared(sfp))
goto fini;
/* tough luck if other fd used same mrq pack_id */
res = sg_mrq_abort_inflight(sfp, pack_id);
@@ -3187,7 +3790,7 @@ sg_mrq_abort(struct sg_fd *sfp, int pack_id, bool dev_scope)
res = sg_mrq_abort_inflight(sfp, pack_id);
}
fini:
- xa_unlock(&sfp->srp_arr);
+ xa_unlock_irqrestore(&sfp->srp_arr, iflags);
return res;
check_whole_dev:
@@ -3196,10 +3799,10 @@ sg_mrq_abort(struct sg_fd *sfp, int pack_id, bool dev_scope)
xa_for_each(&sdp->sfp_arr, idx, o_sfp) {
if (o_sfp == sfp)
continue; /* already checked */
- xa_lock(&o_sfp->srp_arr);
+ mutex_lock(&o_sfp->f_mutex);
/* recurse, dev_scope==false is stopping condition */
res = sg_mrq_abort(o_sfp, pack_id, false);
- xa_unlock(&o_sfp->srp_arr);
+ mutex_unlock(&o_sfp->f_mutex);
if (res == 0)
break;
}
@@ -3235,12 +3838,13 @@ sg_ctl_abort(struct sg_device *sdp, struct sg_fd *sfp, void __user *p)
if (h4p->flags & SGV4_FLAG_MULTIPLE_REQS) {
if (pack_id == 0)
return -ENOSTR;
- return sg_mrq_abort(sfp, pack_id, dev_scope);
+ res = sg_mrq_abort(sfp, pack_id, dev_scope);
+ return res;
}
+ xa_lock_irqsave(&sfp->srp_arr, iflags);
use_tag = test_bit(SG_FFD_PREFER_TAG, sfp->ffd_bm);
id = use_tag ? (int)h4p->request_tag : pack_id;
- xa_lock_irqsave(&sfp->srp_arr, iflags);
srp = sg_match_request(sfp, use_tag, id);
if (!srp) { /* assume device (not just fd) scope */
xa_unlock_irqrestore(&sfp->srp_arr, iflags);
@@ -3311,7 +3915,7 @@ sg_find_sfp_by_fd(const struct file *search_for, struct sg_fd *from_sfp,
__xa_set_mark(&from_sdp->sfp_arr, from_sfp->idx,
SG_XA_FD_RS_SHARE);
else
- kref_get(&from_sfp->f_ref);/* so unshare done before release */
+ kref_get(&from_sfp->f_ref); /* undone: sg_unshare_*_fd() */
if (from_sdp != sdp) {
xa_unlock_irqrestore(&from_sdp->sfp_arr, iflags);
xa_lock_irqsave(&sdp->sfp_arr, iflags);
@@ -3338,7 +3942,6 @@ sg_fd_share(struct sg_fd *ws_sfp, int m_fd)
{
bool found = false;
int res = 0;
- int retry_count = 0;
struct file *filp;
struct sg_fd *rs_sfp;
@@ -3360,22 +3963,9 @@ sg_fd_share(struct sg_fd *ws_sfp, int m_fd)
}
SG_LOG(6, ws_sfp, "%s: read-side fd okay, scan for filp=0x%pK\n",
__func__, filp);
-again:
rs_sfp = sg_find_sfp_by_fd(filp, ws_sfp, false);
- if (IS_ERR(rs_sfp)) {
- res = PTR_ERR(rs_sfp);
- if (res == -EPROBE_DEFER) {
- if (unlikely(++retry_count > SG_ADD_RQ_MAX_RETRIES)) {
- res = -EBUSY;
- } else {
- res = 0;
- cpu_relax();
- goto again;
- }
- }
- } else {
+ if (!IS_ERR(rs_sfp))
found = !!rs_sfp;
- }
fini:
/* paired with filp=fget(m_fd) above */
fput(filp);
@@ -3395,8 +3985,6 @@ sg_fd_reshare(struct sg_fd *rs_sfp, int new_ws_fd)
{
bool found = false;
int res = 0;
- int retry_count = 0;
- enum sg_rq_state rq_st;
struct file *filp;
struct sg_fd *ws_sfp = sg_fd_share_ptr(rs_sfp);
@@ -3408,17 +3996,7 @@ sg_fd_reshare(struct sg_fd *rs_sfp, int new_ws_fd)
if (unlikely(!xa_get_mark(&rs_sfp->parentdp->sfp_arr, rs_sfp->idx,
SG_XA_FD_RS_SHARE)))
return -EINVAL;
- if (unlikely(!ws_sfp))
- return -EINVAL;
- if (unlikely(!rs_sfp->rsv_srp))
- res = -EPROTO; /* Internal error */
- rq_st = atomic_read(&rs_sfp->rsv_srp->rq_st);
- if (!(rq_st == SG_RQ_INACTIVE || rq_st == SG_RQ_SHR_SWAP))
- res = -EBUSY; /* read-side reserve buffer busy */
- if (rs_sfp->ws_srp)
- res = -EBUSY; /* previous write-side request not finished */
- if (unlikely(res))
- return res;
+ /* SG_XA_FD_RS_SHARE set impiles ws_sfp is valid */
/* Alternate approach: fcheck_files(current->files, m_fd) */
filp = fget(new_ws_fd);
@@ -3430,28 +4008,22 @@ sg_fd_reshare(struct sg_fd *rs_sfp, int new_ws_fd)
}
SG_LOG(6, ws_sfp, "%s: write-side fd ok, scan for filp=0x%pK\n", __func__,
filp);
- sg_unshare_ws_fd(ws_sfp, false);
-again:
+ sg_unshare_ws_fd(ws_sfp, true);
ws_sfp = sg_find_sfp_by_fd(filp, rs_sfp, true);
- if (IS_ERR(ws_sfp)) {
- res = PTR_ERR(ws_sfp);
- if (res == -EPROBE_DEFER) {
- if (unlikely(++retry_count > SG_ADD_RQ_MAX_RETRIES)) {
- res = -EBUSY;
- } else {
- res = 0;
- cpu_relax();
- goto again;
- }
- }
- } else {
+ if (!IS_ERR(ws_sfp))
found = !!ws_sfp;
- }
fini:
/* paired with filp=fget(new_ws_fd) above */
fput(filp);
if (unlikely(res))
return res;
+ if (found) { /* can only reshare rsv_arr[0] */
+ struct sg_request *rs_srp = rs_sfp->rsv_arr[0];
+
+ if (!IS_ERR_OR_NULL(rs_srp))
+ rs_srp->sh_srp = NULL;
+ set_bit(SG_FFD_RESHARE, rs_sfp->ffd_bm);
+ }
return found ? 0 : -ENOTSOCK; /* ENOTSOCK for fd exists but not sg */
}
@@ -3469,76 +4041,92 @@ sg_set_reserved_sz(struct sg_fd *sfp, int want_rsv_sz)
{
bool use_new_srp = false;
int res = 0;
- int new_sz, blen;
- unsigned long idx, iflags;
+ int k, new_sz, blen;
+ unsigned long idx = 0;
+ unsigned long iflags;
struct sg_request *o_srp; /* prior reserve sg_request */
struct sg_request *n_srp; /* new sg_request, may be used */
struct sg_request *t_srp; /* other fl entries */
struct sg_device *sdp = sfp->parentdp;
+ struct sg_request **rapp = &sfp->rsv_arr[SG_MAX_RSV_REQS - 1];
struct xarray *xafp = &sfp->srp_arr;
if (unlikely(sg_fd_is_shared(sfp)))
return -EBUSY; /* this fd can't be either side of share */
- o_srp = sfp->rsv_srp;
- if (unlikely(!o_srp))
- return -EPROTO;
new_sz = min_t(int, want_rsv_sz, sdp->max_sgat_sz);
new_sz = max_t(int, new_sz, sfp->sgat_elem_sz);
- blen = o_srp->sgatp->buflen;
SG_LOG(3, sfp, "%s: was=%d, ask=%d, new=%d (sgat_elem_sz=%d)\n",
- __func__, blen, want_rsv_sz, new_sz, sfp->sgat_elem_sz);
- if (blen == new_sz)
- return 0;
- n_srp = sg_mk_srp_sgat(sfp, true /* can take time */, new_sz);
- if (IS_ERR(n_srp))
- return PTR_ERR(n_srp);
- /* new sg_request object, sized correctly is now available */
+ __func__, *rapp ? (*rapp)->sgatp->buflen : -1,
+ want_rsv_sz, new_sz, sfp->sgat_elem_sz);
+ if (unlikely(sfp->mmap_sz > 0))
+ return -EBUSY; /* existing pages possibly pinned */
+
+ for (k = SG_MAX_RSV_REQS - 1; k >= 0; --k, --rapp) {
+ o_srp = *rapp;
+ if (IS_ERR_OR_NULL(o_srp))
+ continue;
+ blen = o_srp->sgatp->buflen;
+ if (blen == new_sz)
+ continue;
+ /* new sg_request object, sized correctly is now available */
+ n_srp = sg_mk_srp_sgat(sfp, true /* can take time */, new_sz);
+ if (IS_ERR(n_srp))
+ return PTR_ERR(n_srp);
try_again:
- o_srp = sfp->rsv_srp;
- if (unlikely(!o_srp)) {
- res = -EPROTO;
- goto fini;
- }
- if (unlikely(SG_RQ_ACTIVE(o_srp) || sfp->mmap_sz > 0)) {
- res = -EBUSY;
- goto fini;
- }
- use_new_srp = true;
- xa_for_each(xafp, idx, t_srp) {
- if (t_srp != o_srp && new_sz <= t_srp->sgatp->buflen &&
- !SG_RQ_ACTIVE(t_srp)) {
- use_new_srp = false;
- sfp->rsv_srp = t_srp;
- break;
+ o_srp = *rapp;
+ if (unlikely(SG_RQ_ACTIVE(o_srp))) {
+ res = -EBUSY;
+ goto fini;
}
- }
- if (use_new_srp) {
- struct sg_request *cxc_srp;
+ use_new_srp = true;
+ xa_for_each_marked(xafp, idx, t_srp, SG_XA_RQ_INACTIVE) {
+ if (t_srp != o_srp && new_sz <= t_srp->sgatp->buflen) {
+ use_new_srp = false;
+ xa_lock_irqsave(xafp, iflags);
+ __clear_bit(SG_FRQ_RESERVED, o_srp->frq_bm);
+ __set_bit(SG_FRQ_RESERVED, t_srp->frq_bm);
+ *rapp = t_srp;
+ xa_unlock_irqrestore(xafp, iflags);
+ sg_remove_srp(n_srp);
+ kfree(n_srp);
+ n_srp = NULL;
+ break;
+ }
+ }
+ if (use_new_srp) {
+ struct sg_request *cxc_srp;
- xa_lock_irqsave(xafp, iflags);
- n_srp->rq_idx = o_srp->rq_idx;
- idx = o_srp->rq_idx;
- cxc_srp = __xa_cmpxchg(xafp, idx, o_srp, n_srp, GFP_ATOMIC);
- if (o_srp == cxc_srp) {
- sfp->rsv_srp = n_srp;
- sg_rq_chg_state_force_ulck(n_srp, SG_RQ_INACTIVE);
- /* don't bump inactives, since replaced an inactive */
- xa_unlock_irqrestore(xafp, iflags);
- SG_LOG(6, sfp, "%s: new rsv srp=0x%pK ++\n", __func__,
- n_srp);
- sg_remove_sgat(o_srp);
- kfree(o_srp);
- } else {
- xa_unlock_irqrestore(xafp, iflags);
- SG_LOG(1, sfp, "%s: xa_cmpxchg() failed, again\n",
- __func__);
- goto try_again;
+ xa_lock_irqsave(xafp, iflags);
+ n_srp->rq_idx = o_srp->rq_idx;
+ idx = o_srp->rq_idx;
+ cxc_srp = __xa_cmpxchg(xafp, idx, o_srp, n_srp,
+ GFP_ATOMIC);
+ if (o_srp == cxc_srp) {
+ __assign_bit(SG_FRQ_RESERVED, n_srp->frq_bm,
+ test_bit(SG_FRQ_RESERVED,
+ o_srp->frq_bm));
+ *rapp = n_srp;
+ sg_rq_chg_state_force_ulck(n_srp, SG_RQ_INACTIVE);
+ xa_unlock_irqrestore(xafp, iflags);
+ SG_LOG(6, sfp, "%s: new rsv srp=0x%pK ++\n",
+ __func__, n_srp);
+ n_srp = NULL;
+ sg_remove_srp(o_srp);
+ kfree(o_srp);
+ o_srp = NULL;
+ } else {
+ xa_unlock_irqrestore(xafp, iflags);
+ SG_LOG(1, sfp, "%s: xa_cmpxchg()-->retry\n",
+ __func__);
+ goto try_again;
+ }
}
}
+ return res;
fini:
- if (!use_new_srp) {
- sg_remove_sgat(n_srp);
- kfree(n_srp); /* no-one else has seen n_srp, so safe */
+ if (n_srp) {
+ sg_remove_srp(n_srp);
+ kfree(n_srp); /* nothing has seen n_srp, so safe */
}
return res;
}
@@ -3574,16 +4162,12 @@ static bool
sg_any_persistent_orphans(struct sg_fd *sfp)
{
if (test_bit(SG_FFD_KEEP_ORPHAN, sfp->ffd_bm)) {
- int num_waiting = atomic_read(&sfp->waiting);
unsigned long idx;
struct sg_request *srp;
struct xarray *xafp = &sfp->srp_arr;
- if (num_waiting < 1) {
- num_waiting = atomic_read_acquire(&sfp->waiting);
- if (num_waiting < 1)
- return false;
- }
+ if (sg_num_waiting_maybe_acquire(sfp) < 1)
+ return false;
xa_for_each_marked(xafp, idx, srp, SG_XA_RQ_AWAIT) {
if (test_bit(SG_FRQ_IS_ORPHAN, srp->frq_bm))
return true;
@@ -3592,9 +4176,17 @@ sg_any_persistent_orphans(struct sg_fd *sfp)
return false;
}
-/* Ignore append if size already over half of available buffer */
+/*
+ * Will clear_first if size already over half of available buffer.
+ *
+ * N.B. This function is a useful debug aid to be called inline with its
+ * output going to /sys/kernel/debug/scsi_generic/snapped for later
+ * examination. Best to call it with no locks held and that implies that
+ * the driver state may change while it is processing. Interpret the
+ * result with this in mind.
+ */
static void
-sg_take_snap(struct sg_fd *sfp, bool dont_append)
+sg_take_snap(struct sg_fd *sfp, bool clear_first)
{
u32 hour, minute, second;
u64 n;
@@ -3619,7 +4211,7 @@ sg_take_snap(struct sg_fd *sfp, bool dont_append)
GFP_KERNEL | __GFP_NOWARN);
if (!snapped_buf)
goto fini;
- } else if (dont_append) {
+ } else if (clear_first) {
memset(snapped_buf, 0, SG_SNAP_BUFF_SZ);
}
#if IS_ENABLED(SG_PROC_OR_DEBUG_FS)
@@ -3653,10 +4245,11 @@ sg_take_snap(struct sg_fd *sfp, bool dont_append)
* of boolean flags. Access abbreviations: [rw], read-write; [ro], read-only;
* [wo], write-only; [raw], read after write; [rbw], read before write.
*/
-static void
+static int
sg_extended_bool_flags(struct sg_fd *sfp, struct sg_extended_info *seip)
{
bool flg = false;
+ int res = 0;
const u32 c_flgs_wm = seip->ctl_flags_wr_mask;
const u32 c_flgs_rm = seip->ctl_flags_rd_mask;
const u32 c_flgs_val_in = seip->ctl_flags;
@@ -3740,10 +4333,10 @@ sg_extended_bool_flags(struct sg_fd *sfp, struct sg_extended_info *seip)
* reading: read-side is finished, awaiting action by write-side;
* when written: 1 --> write-side doesn't want to continue
*/
- if (c_flgs_rm & SG_CTL_FLAGM_READ_SIDE_FINI) {
+ if ((c_flgs_rm & SG_CTL_FLAGM_READ_SIDE_FINI) && sg_fd_is_shared(sfp)) {
rs_sfp = sg_fd_share_ptr(sfp);
- if (rs_sfp && rs_sfp->rsv_srp) {
- struct sg_request *res_srp = rs_sfp->rsv_srp;
+ if (rs_sfp && !IS_ERR_OR_NULL(rs_sfp->rsv_arr[0])) {
+ struct sg_request *res_srp = rs_sfp->rsv_arr[0];
if (atomic_read(&res_srp->rq_st) == SG_RQ_SHR_SWAP)
c_flgs_val_out |= SG_CTL_FLAGM_READ_SIDE_FINI;
@@ -3756,7 +4349,7 @@ sg_extended_bool_flags(struct sg_fd *sfp, struct sg_extended_info *seip)
if (c_flgs_wm & SG_CTL_FLAGM_READ_SIDE_FINI) {
bool rs_fini_wm = !!(c_flgs_val_in & SG_CTL_FLAGM_READ_SIDE_FINI);
- sg_change_after_read_side_rq(sfp, rs_fini_wm);
+ res = sg_change_after_read_side_rq(sfp, rs_fini_wm);
}
/* READ_SIDE_ERR boolean, [ro] share: read-side finished with error */
if (c_flgs_rm & SG_CTL_FLAGM_READ_SIDE_ERR) {
@@ -3819,6 +4412,7 @@ sg_extended_bool_flags(struct sg_fd *sfp, struct sg_extended_info *seip)
if (c_flgs_val_in != c_flgs_val_out)
seip->ctl_flags = c_flgs_val_out;
+ return res;
}
static void
@@ -3865,6 +4459,9 @@ sg_extended_read_value(struct sg_fd *sfp, struct sg_extended_info *seip)
uv += (u32)atomic_read(&a_sfp->submitted);
seip->read_value = uv;
break;
+ case SG_SEIRV_MAX_RSV_REQS:
+ seip->read_value = SG_MAX_RSV_REQS;
+ break;
default:
SG_LOG(6, sfp, "%s: can't decode %d --> read_value\n",
__func__, seip->read_value);
@@ -3911,8 +4508,11 @@ sg_ctl_extended(struct sg_fd *sfp, void __user *p)
seip->tot_fd_thresh = hold;
}
/* check all boolean flags for either wr or rd mask set in or_mask */
- if (or_masks & SG_SEIM_CTL_FLAGS)
- sg_extended_bool_flags(sfp, seip);
+ if (or_masks & SG_SEIM_CTL_FLAGS) {
+ result = sg_extended_bool_flags(sfp, seip);
+ if (ret == 0 && unlikely(result))
+ ret = result;
+ }
/* yields minor_index (type: u32) [ro] */
if (or_masks & SG_SEIM_MINOR_INDEX) {
if (s_wr_mask & SG_SEIM_MINOR_INDEX) {
@@ -3937,7 +4537,7 @@ sg_ctl_extended(struct sg_fd *sfp, void __user *p)
struct sg_fd *sh_sfp = sg_fd_share_ptr(sfp);
seip->share_fd = sh_sfp ? sh_sfp->parentdp->index :
- U32_MAX;
+ U32_MAX;
}
mutex_unlock(&sfp->f_mutex);
}
@@ -3998,10 +4598,12 @@ sg_ctl_extended(struct sg_fd *sfp, void __user *p)
ret = result;
mutex_unlock(&sfp->f_mutex);
}
- if (s_rd_mask & SG_SEIM_RESERVED_SIZE)
- seip->reserved_sz = (u32)min_t(int,
- sfp->rsv_srp->sgatp->buflen,
+ if (s_rd_mask & SG_SEIM_RESERVED_SIZE) {
+ struct sg_request *r_srp = sfp->rsv_arr[0];
+
+ seip->reserved_sz = (u32)min_t(int, r_srp->sgatp->buflen,
sdp->max_sgat_sz);
+ }
/* copy to user space if int or boolean read mask non-zero */
if (s_rd_mask || seip->ctl_flags_rd_mask) {
if (copy_to_user(p, seip, SZ_SG_EXTENDED_INFO))
@@ -4096,11 +4698,20 @@ sg_ioctl_common(struct file *filp, struct sg_device *sdp, struct sg_fd *sfp,
unsigned long idx;
__maybe_unused const char *pmlp = ", pass to mid-level";
- SG_LOG(6, sfp, "%s: cmd=0x%x, O_NONBLOCK=%d\n", __func__, cmd_in,
- !!(filp->f_flags & O_NONBLOCK));
+ SG_LOG(6, sfp, "%s: cmd=0x%x, O_NONBLOCK=%d%s\n", __func__, cmd_in,
+ !!(filp->f_flags & O_NONBLOCK),
+ (cmd_in == SG_GET_NUM_WAITING ? ", SG_GET_NUM_WAITING" : ""));
sdev = sdp->device;
switch (cmd_in) {
+ case SG_GET_NUM_WAITING:
+ /* Want as fast as possible, with a useful result */
+ if (test_bit(SG_FFD_HIPRI_SEEN, sfp->ffd_bm))
+ sg_sfp_blk_poll(sfp, 0); /* LLD may have some ready */
+ val = atomic_read(&sfp->waiting);
+ if (val)
+ return put_user(val, ip);
+ return put_user(atomic_read_acquire(&sfp->waiting), ip);
case SG_IO:
if (SG_IS_DETACHING(sdp))
return -ENODEV;
@@ -4169,18 +4780,10 @@ sg_ioctl_common(struct file *filp, struct sg_device *sdp, struct sg_fd *sfp,
}
SG_LOG(3, sfp, "%s: SG_GET_PACK_ID=%d\n", __func__, val);
return put_user(val, ip);
- case SG_GET_NUM_WAITING:
- /* Want as fast as possible, with a useful result */
- if (test_bit(SG_FFD_HIPRI_SEEN, sfp->ffd_bm))
- sg_sfp_blk_poll(sfp, 0); /* LLD may have some ready */
- val = atomic_read(&sfp->waiting);
- if (val)
- return put_user(val, ip);
- return put_user(atomic_read_acquire(&sfp->waiting), ip);
case SG_GET_SG_TABLESIZE:
SG_LOG(3, sfp, "%s: SG_GET_SG_TABLESIZE=%d\n", __func__,
- sdp->max_sgat_sz);
- return put_user(sdp->max_sgat_sz, ip);
+ sdp->max_sgat_elems);
+ return put_user(sdp->max_sgat_elems, ip);
case SG_SET_RESERVED_SIZE:
res = get_user(val, ip);
if (likely(!res)) {
@@ -4195,13 +4798,17 @@ sg_ioctl_common(struct file *filp, struct sg_device *sdp, struct sg_fd *sfp,
}
return res;
case SG_GET_RESERVED_SIZE:
- mutex_lock(&sfp->f_mutex);
- val = min_t(int, sfp->rsv_srp->sgatp->buflen,
- sdp->max_sgat_sz);
- mutex_unlock(&sfp->f_mutex);
+ {
+ struct sg_request *r_srp = sfp->rsv_arr[0];
+
+ mutex_lock(&sfp->f_mutex);
+ val = min_t(int, r_srp->sgatp->buflen,
+ sdp->max_sgat_sz);
+ mutex_unlock(&sfp->f_mutex);
+ res = put_user(val, ip);
+ }
SG_LOG(3, sfp, "%s: SG_GET_RESERVED_SIZE=%d\n", __func__,
val);
- res = put_user(val, ip);
return res;
case SG_SET_COMMAND_Q: /* set by driver whenever v3 or v4 req seen */
SG_LOG(3, sfp, "%s: SG_SET_COMMAND_Q\n", __func__);
@@ -4495,7 +5102,7 @@ sg_vma_open(struct vm_area_struct *vma)
pr_warn("%s: sfp null\n", __func__);
return;
}
- kref_get(&sfp->f_ref);
+ kref_get(&sfp->f_ref); /* put in: sg_vma_close() */
}
static void
@@ -4540,8 +5147,8 @@ sg_vma_fault(struct vm_fault *vmf)
SG_LOG(1, sfp, "%s: device detaching\n", __func__);
goto out_err;
}
- srp = sfp->rsv_srp;
- if (unlikely(!srp)) {
+ srp = sfp->rsv_arr[0];
+ if (IS_ERR_OR_NULL(srp)) {
SG_LOG(1, sfp, "%s: srp%s\n", __func__, nbp);
goto out_err;
}
@@ -4594,7 +5201,8 @@ sg_mmap(struct file *filp, struct vm_area_struct *vma)
pr_warn("sg: %s: sfp is NULL\n", __func__);
return -ENXIO;
}
- mutex_lock(&sfp->f_mutex);
+ if (unlikely(!mutex_trylock(&sfp->f_mutex)))
+ return -EBUSY;
req_sz = vma->vm_end - vma->vm_start;
SG_LOG(3, sfp, "%s: vm_start=%pK, len=%d\n", __func__,
(void *)vma->vm_start, (int)req_sz);
@@ -4603,7 +5211,7 @@ sg_mmap(struct file *filp, struct vm_area_struct *vma)
goto fini;
}
/* Check reserve request is inactive and has large enough buffer */
- srp = sfp->rsv_srp;
+ srp = sfp->rsv_arr[0];
if (SG_RQ_ACTIVE(srp)) {
res = -EBUSY;
goto fini;
@@ -4620,7 +5228,7 @@ sg_mmap(struct file *filp, struct vm_area_struct *vma)
}
if (srp->sgat_h.page_order > 0 ||
req_sz > (unsigned long)srp->sgat_h.buflen) {
- sg_remove_sgat(srp);
+ sg_remove_srp(srp);
set_bit(SG_FRQ_FOR_MMAP, srp->frq_bm);
res = sg_mk_sgat(srp, sfp, req_sz);
if (res) {
@@ -4661,7 +5269,7 @@ sg_uc_rq_end_io_orphaned(struct work_struct *work)
sg_finish_scsi_blk_rq(srp); /* clean up orphan case */
sg_deact_request(sfp, srp);
}
- kref_put(&sfp->f_ref, sg_remove_sfp);
+ kref_put(&sfp->f_ref, sg_remove_sfp); /* get in: sg_execute_cmd() */
}
/*
@@ -4748,7 +5356,7 @@ sg_rq_end_io(struct request *rqq, blk_status_t status)
}
xa_lock_irqsave(&sfp->srp_arr, iflags);
__set_bit(SG_FRQ_ISSUED, srp->frq_bm);
- sg_rq_chg_state_force_ulck(srp, rqq_state);
+ sg_rq_chg_state_force_ulck(srp, rqq_state); /* normally --> SG_RQ_AWAIT_RCV */
WRITE_ONCE(srp->rqq, NULL);
if (test_bit(SG_FRQ_COUNT_ACTIVE, srp->frq_bm)) {
int num = atomic_inc_return(&sfp->waiting);
@@ -4775,16 +5383,15 @@ sg_rq_end_io(struct request *rqq, blk_status_t status)
/* clean up orphaned request that aren't being kept */
INIT_WORK(&srp->ew_orph.work, sg_uc_rq_end_io_orphaned);
schedule_work(&srp->ew_orph.work);
+ /* kref_put(f_ref) done in sg_uc_rq_end_io_orphaned() */
return;
}
- /* Wake any sg_read()/ioctl(SG_IORECEIVE) awaiting this req */
if (!(srp->rq_flags & SGV4_FLAG_HIPRI))
wake_up_interruptible(&sfp->cmpl_wait);
if (sfp->async_qp && (!test_bit(SG_FRQ_IS_V4I, srp->frq_bm) ||
(srp->rq_flags & SGV4_FLAG_SIGNAL)))
kill_fasync(&sfp->async_qp, SIGPOLL, POLL_IN);
- kref_put(&sfp->f_ref, sg_remove_sfp);
- return;
+ kref_put(&sfp->f_ref, sg_remove_sfp); /* get in: sg_execute_cmd() */
}
static const struct file_operations sg_fops = {
@@ -4851,6 +5458,7 @@ sg_add_device_helper(struct gendisk *disk, struct scsi_device *scsidp)
clear_bit(SG_FDEV_DETACHING, sdp->fdev_bm);
atomic_set(&sdp->open_cnt, 0);
sdp->index = k;
+ /* set d_ref to 1; corresponding put in: sg_remove_device() */
kref_init(&sdp->d_ref);
error = 0;
@@ -4977,12 +5585,13 @@ sg_remove_device(struct device *cl_dev, struct class_interface *cl_intf)
if (unlikely(!sdp))
return;
/* set this flag as soon as possible as it could be a surprise */
- if (test_and_set_bit(SG_FDEV_DETACHING, sdp->fdev_bm))
+ if (test_and_set_bit(SG_FDEV_DETACHING, sdp->fdev_bm)) {
+ pr_warn("%s: multiple entries: sg%u\n", __func__, sdp->index);
return; /* only want to do following once per device */
-
+ }
SCSI_LOG_TIMEOUT(3, sdev_printk(KERN_INFO, sdp->device,
- "%s: 0x%pK\n", __func__, sdp));
-
+ "%s: sg%u 0x%pK\n", __func__,
+ sdp->index, sdp));
xa_for_each(&sdp->sfp_arr, idx, sfp) {
wake_up_interruptible_all(&sfp->cmpl_wait);
if (sfp->async_qp)
@@ -4995,6 +5604,7 @@ sg_remove_device(struct device *cl_dev, struct class_interface *cl_intf)
cdev_del(sdp->cdev);
sdp->cdev = NULL;
+ /* init to 1: kref_init() in sg_add_device_helper() */
kref_put(&sdp->d_ref, sg_device_destroy);
}
@@ -5135,21 +5745,10 @@ sg_rq_map_kern(struct sg_request *srp, struct request_queue *q, struct request *
return res;
}
-static inline void
-sg_set_map_data(const struct sg_scatter_hold *schp, bool up_valid,
- struct rq_map_data *mdp)
-{
- mdp->pages = schp->pages;
- mdp->page_order = schp->page_order;
- mdp->nr_entries = schp->num_sgat;
- mdp->offset = 0;
- mdp->null_mapped = !up_valid;
-}
-
static int
sg_start_req(struct sg_request *srp, struct sg_comm_wr_t *cwrp, int dxfer_dir)
{
- bool reserved, no_xfer, us_xfer;
+ bool no_dxfer, us_xfer;
int res = 0;
int dxfer_len = 0;
int r0w = READ;
@@ -5172,7 +5771,7 @@ sg_start_req(struct sg_request *srp, struct sg_comm_wr_t *cwrp, int dxfer_dir)
long_cmdp = kzalloc(cwrp->cmd_len, GFP_KERNEL);
if (unlikely(!long_cmdp)) {
res = -ENOMEM;
- goto err_out;
+ goto err_pre_blk_get;
}
SG_LOG(5, sfp, "%s: long_cmdp=0x%pK ++\n", __func__, long_cmdp);
}
@@ -5199,8 +5798,8 @@ sg_start_req(struct sg_request *srp, struct sg_comm_wr_t *cwrp, int dxfer_dir)
iov_count = sh3p->iovec_count;
r0w = dxfer_dir == SG_DXFER_TO_DEV ? WRITE : READ;
}
- SG_LOG(4, sfp, "%s: dxfer_len=%d, data-%s\n", __func__, dxfer_len,
- (r0w ? "OUT" : "IN"));
+ SG_LOG(4, sfp, "%s: dxfer_len=%d%s\n", __func__, dxfer_len,
+ (dxfer_len ? (r0w ? ", data-OUT" : ", data-IN") : ""));
q = sdp->device->request_queue;
/*
@@ -5213,9 +5812,8 @@ sg_start_req(struct sg_request *srp, struct sg_comm_wr_t *cwrp, int dxfer_dir)
(test_bit(SG_FFD_MORE_ASYNC, sfp->ffd_bm) ?
BLK_MQ_REQ_NOWAIT : 0));
if (IS_ERR(rqq)) {
- kfree(long_cmdp);
res = PTR_ERR(rqq);
- goto err_out;
+ goto err_pre_blk_get;
}
/* current sg_request protected by SG_RQ_BUSY state */
scsi_rp = scsi_req(rqq);
@@ -5224,8 +5822,11 @@ sg_start_req(struct sg_request *srp, struct sg_comm_wr_t *cwrp, int dxfer_dir)
srp->tag = rqq->tag;
if (rq_flags & SGV4_FLAG_HIPRI)
set_bit(SG_FFD_HIPRI_SEEN, sfp->ffd_bm);
- if (cwrp->cmd_len > BLK_MAX_CDB)
+ if (cwrp->cmd_len > BLK_MAX_CDB) {
scsi_rp->cmd = long_cmdp; /* transfer ownership */
+ /* this heap freed in scsi_req_free_cmd() */
+ long_cmdp = NULL;
+ }
if (cwrp->u_cmdp)
res = sg_fetch_cmnd(sfp, cwrp->u_cmdp, cwrp->cmd_len,
scsi_rp->cmd);
@@ -5234,18 +5835,17 @@ sg_start_req(struct sg_request *srp, struct sg_comm_wr_t *cwrp, int dxfer_dir)
else
res = -EPROTO;
if (unlikely(res))
- goto err_out;
+ goto fini;
scsi_rp->cmd_len = cwrp->cmd_len;
srp->cmd_opcode = scsi_rp->cmd[0];
- no_xfer = dxfer_len <= 0 || dxfer_dir == SG_DXFER_NONE;
+ no_dxfer = dxfer_len <= 0 || dxfer_dir == SG_DXFER_NONE;
us_xfer = !(rq_flags & (SG_FLAG_NO_DXFER | SG_FLAG_MMAP_IO));
- __assign_bit(SG_FRQ_US_XFER, srp->frq_bm, !no_xfer && us_xfer);
- reserved = (sfp->rsv_srp == srp);
+ __assign_bit(SG_FRQ_US_XFER, srp->frq_bm, !no_dxfer && us_xfer);
rqq->end_io_data = srp;
scsi_rp->retries = SG_DEFAULT_RETRIES;
req_schp = srp->sgatp;
- if (no_xfer) {
+ if (no_dxfer) {
SG_LOG(4, sfp, "%s: no data xfer [0x%pK]\n", __func__, srp);
goto fini; /* path of reqs with no din nor dout */
} else if (unlikely(rq_flags & SG_FLAG_DIRECT_IO) && iov_count == 0 &&
@@ -5262,9 +5862,13 @@ sg_start_req(struct sg_request *srp, struct sg_comm_wr_t *cwrp, int dxfer_dir)
if (likely(md)) { /* normal, "indirect" IO */
if (unlikely(rq_flags & SG_FLAG_MMAP_IO)) {
/* mmap IO must use and fit in reserve request */
- if (unlikely(!reserved ||
+ bool reserve0;
+ struct sg_request *r_srp = sfp->rsv_arr[0];
+
+ reserve0 = (r_srp == srp);
+ if (unlikely(!reserve0 ||
dxfer_len > req_schp->buflen))
- res = reserved ? -ENOMEM : -EBUSY;
+ res = reserve0 ? -ENOMEM : -EBUSY;
} else if (req_schp->buflen == 0) {
int up_sz = max_t(int, dxfer_len, sfp->sgat_elem_sz);
@@ -5272,8 +5876,11 @@ sg_start_req(struct sg_request *srp, struct sg_comm_wr_t *cwrp, int dxfer_dir)
}
if (unlikely(res))
goto fini;
-
- sg_set_map_data(req_schp, !!up, md);
+ md->pages = req_schp->pages;
+ md->page_order = req_schp->page_order;
+ md->nr_entries = req_schp->num_sgat;
+ md->offset = 0;
+ md->null_mapped = !up;
md->from_user = (dxfer_dir == SG_DXFER_TO_FROM_DEV);
}
@@ -5282,7 +5889,7 @@ sg_start_req(struct sg_request *srp, struct sg_comm_wr_t *cwrp, int dxfer_dir)
struct iov_iter i;
res = import_iovec(r0w, up, iov_count, 0, &iov, &i);
- if (res < 0)
+ if (unlikely(res < 0))
goto fini;
iov_iter_truncate(&i, dxfer_len);
@@ -5317,9 +5924,10 @@ sg_start_req(struct sg_request *srp, struct sg_comm_wr_t *cwrp, int dxfer_dir)
} else {
srp->bio = rqq->bio;
}
-err_out:
+err_pre_blk_get:
SG_LOG((res ? 1 : 4), sfp, "%s: %s %s res=%d [0x%pK]\n", __func__,
sg_shr_str(srp->sh_var, false), cp, res, srp);
+ kfree(long_cmdp);
return res;
}
@@ -5336,13 +5944,14 @@ sg_finish_scsi_blk_rq(struct sg_request *srp)
int ret;
struct sg_fd *sfp = srp->parentfp;
struct request *rqq = READ_ONCE(srp->rqq);
+ __maybe_unused char b[32];
SG_LOG(4, sfp, "%s: srp=0x%pK%s\n", __func__, srp,
- (srp->parentfp->rsv_srp == srp) ? " rsv" : "");
+ sg_get_rsv_str_lck(srp, " ", "", sizeof(b), b));
if (test_and_clear_bit(SG_FRQ_COUNT_ACTIVE, srp->frq_bm)) {
if (atomic_dec_and_test(&sfp->submitted))
clear_bit(SG_FFD_HIPRI_SEEN, sfp->ffd_bm);
- atomic_dec(&sfp->waiting);
+ atomic_dec_return_release(&sfp->waiting);
}
/* Expect blk_put_request(rqq) already called in sg_rq_end_io() */
@@ -5443,7 +6052,7 @@ sg_mk_sgat(struct sg_request *srp, struct sg_fd *sfp, int minlen)
}
static void
-sg_remove_sgat_helper(struct sg_fd *sfp, struct sg_scatter_hold *schp)
+sg_remove_sgat(struct sg_fd *sfp, struct sg_scatter_hold *schp)
{
int k;
void *p;
@@ -5464,15 +6073,19 @@ sg_remove_sgat_helper(struct sg_fd *sfp, struct sg_scatter_hold *schp)
/* Remove the data (possibly a sgat list) held by srp, not srp itself */
static void
-sg_remove_sgat(struct sg_request *srp)
+sg_remove_srp(struct sg_request *srp)
{
- struct sg_scatter_hold *schp = &srp->sgat_h; /* care: remove own data */
- struct sg_fd *sfp = srp->parentfp;
+ struct sg_scatter_hold *schp;
+ struct sg_fd *sfp;
+ __maybe_unused char b[48];
+ if (!srp)
+ return;
+ schp = &srp->sgat_h; /* care: remove own data */
+ sfp = srp->parentfp;
SG_LOG(4, sfp, "%s: num_sgat=%d%s\n", __func__, schp->num_sgat,
- ((srp->parentfp ? (sfp->rsv_srp == srp) : false) ?
- " [rsv]" : ""));
- sg_remove_sgat_helper(sfp, schp);
+ sg_get_rsv_str_lck(srp, " [", "]", sizeof(b), b));
+ sg_remove_sgat(sfp, schp);
if (sfp->tot_fd_thresh > 0) {
/* this is a subtraction, error if it goes negative */
@@ -5527,7 +6140,7 @@ sg_read_append(struct sg_request *srp, void __user *outp, int num_xfer)
}
/*
- * If there are multiple requests outstanding, the speed of this function is
+ * If there are many requests outstanding, the speed of this function is
* important. 'id' is pack_id when is_tag=false, otherwise it is a tag. Both
* SG_PACK_ID_WILDCARD and SG_TAG_WILDCARD are -1 and that case is typically
* the fast path. This function is only used in the non-blocking cases.
@@ -5543,7 +6156,6 @@ sg_find_srp_by_id(struct sg_fd *sfp, int id, bool is_tag)
bool second = false;
enum sg_rq_state sr_st;
int res;
- int num_waiting = atomic_read(&sfp->waiting);
int l_await_idx = READ_ONCE(sfp->low_await_idx);
unsigned long idx, s_idx;
unsigned long end_idx = ULONG_MAX;
@@ -5552,11 +6164,8 @@ sg_find_srp_by_id(struct sg_fd *sfp, int id, bool is_tag)
if (test_bit(SG_FFD_HIPRI_SEEN, sfp->ffd_bm))
sg_sfp_blk_poll(sfp, 0); /* LLD may have some ready to push up */
- if (num_waiting < 1) {
- num_waiting = atomic_read_acquire(&sfp->waiting);
- if (num_waiting < 1)
- return NULL;
- }
+ if (sg_num_waiting_maybe_acquire(sfp) < 1)
+ return NULL;
s_idx = (l_await_idx < 0) ? 0 : l_await_idx;
idx = s_idx;
@@ -5670,7 +6279,7 @@ static bool
sg_mrq_get_ready_srp(struct sg_fd *sfp, struct sg_request **srpp)
{
bool second = false;
- int num_waiting, res;
+ int res;
int l_await_idx = READ_ONCE(sfp->low_await_idx);
unsigned long idx, s_idx, end_idx;
struct sg_request *srp;
@@ -5684,12 +6293,8 @@ sg_mrq_get_ready_srp(struct sg_fd *sfp, struct sg_request **srpp)
*srpp = ERR_PTR(-ENODATA);
return true;
}
- num_waiting = atomic_read(&sfp->waiting);
- if (num_waiting < 1) {
- num_waiting = atomic_read_acquire(&sfp->waiting);
- if (num_waiting < 1)
- goto fini;
- }
+ if (sg_num_waiting_maybe_acquire(sfp) < 1)
+ goto fini;
s_idx = (l_await_idx < 0) ? 0 : l_await_idx;
idx = s_idx;
@@ -5727,9 +6332,10 @@ sg_mrq_get_ready_srp(struct sg_fd *sfp, struct sg_request **srpp)
* may take time but has improved chance of success, otherwise use GFP_ATOMIC.
* Note that basic initialization is done but srp is not added to either sfp
* list. On error returns twisted negated errno value (not NULL).
+ * N.B. Initializes new srp state to SG_RQ_BUSY.
*/
static struct sg_request *
-sg_mk_srp(struct sg_fd *sfp, bool first)
+sg_mk_only_srp(struct sg_fd *sfp, bool first)
{
struct sg_request *srp;
gfp_t gfp = __GFP_NOWARN;
@@ -5754,7 +6360,7 @@ static struct sg_request *
sg_mk_srp_sgat(struct sg_fd *sfp, bool first, int db_len)
{
int res;
- struct sg_request *n_srp = sg_mk_srp(sfp, first);
+ struct sg_request *n_srp = sg_mk_only_srp(sfp, first);
if (IS_ERR(n_srp))
return n_srp;
@@ -5779,14 +6385,22 @@ static struct sg_request *
sg_build_reserve(struct sg_fd *sfp, int buflen)
{
bool go_out = false;
- int res;
+ int res, idx;
struct sg_request *srp;
+ struct sg_request **rapp;
SG_LOG(3, sfp, "%s: buflen=%d\n", __func__, buflen);
- srp = sg_mk_srp(sfp, xa_empty(&sfp->srp_arr));
- if (IS_ERR(srp))
+ idx = sg_get_idx_new_lck(sfp);
+ if (idx < 0) {
+ SG_LOG(1, sfp, "%s: sg_get_idx_new_lck() failed\n", __func__);
+ return ERR_PTR(-EFBIG);
+ }
+ rapp = sfp->rsv_arr + idx;
+ srp = sg_mk_only_srp(sfp, xa_empty(&sfp->srp_arr));
+ if (IS_ERR(srp)) {
+ *rapp = NULL;
return srp;
- sfp->rsv_srp = srp;
+ }
do {
if (buflen < (int)PAGE_SIZE) {
buflen = PAGE_SIZE;
@@ -5794,14 +6408,18 @@ sg_build_reserve(struct sg_fd *sfp, int buflen)
}
res = sg_mk_sgat(srp, sfp, buflen);
if (likely(res == 0)) {
- SG_LOG(4, sfp, "%s: final buflen=%d, srp=0x%pK ++\n",
- __func__, buflen, srp);
+ *rapp = srp;
+ SG_LOG(4, sfp,
+ "%s: rsv%d: final buflen=%d, srp=0x%pK ++\n",
+ __func__, idx, buflen, srp);
return srp;
}
- if (go_out)
+ if (go_out) {
+ *rapp = NULL;
return ERR_PTR(res);
+ }
/* failed so remove, halve buflen, try again */
- sg_remove_sgat(srp);
+ sg_remove_srp(srp);
buflen >>= 1; /* divide by 2 */
} while (true);
}
@@ -5820,19 +6438,21 @@ sg_setup_req(struct sg_comm_wr_t *cwrp, enum sg_shr_var sh_var, int dxfr_len)
bool act_empty = false;
bool allow_rsv = true; /* see note above */
bool mk_new_srp = true;
+ bool new_rsv_srp = false;
bool ws_rq = false;
bool try_harder = false;
bool second = false;
bool has_inactive = false;
+ bool is_rsv;
+ int ra_idx = 0;
int res, l_used_idx;
u32 sum_dlen;
unsigned long idx, s_idx, end_idx, iflags;
enum sg_rq_state sr_st;
- enum sg_rq_state rs_sr_st = SG_RQ_INACTIVE;
+ enum sg_rq_state rs_st = SG_RQ_INACTIVE;
struct sg_fd *fp = cwrp->sfp;
struct sg_request *r_srp = NULL; /* returned value won't be NULL */
struct sg_request *low_srp = NULL;
- __maybe_unused struct sg_request *rsv_srp;
struct sg_request *rs_rsv_srp = NULL;
struct sg_fd *rs_sfp = NULL;
struct xarray *xafp = &fp->srp_arr;
@@ -5840,25 +6460,33 @@ sg_setup_req(struct sg_comm_wr_t *cwrp, enum sg_shr_var sh_var, int dxfr_len)
__maybe_unused char b[64];
b[0] = '\0';
- rsv_srp = fp->rsv_srp;
-
switch (sh_var) {
case SG_SHR_NONE:
case SG_SHR_WS_NOT_SRQ:
break;
case SG_SHR_RS_RQ:
- sr_st = atomic_read(&rsv_srp->rq_st);
+ if (test_bit(SG_FFD_RESHARE, fp->ffd_bm))
+ ra_idx = 0;
+ else
+ ra_idx = sg_get_idx_available(fp);
+ if (ra_idx < 0) {
+ new_rsv_srp = true;
+ cp = "m_rq";
+ goto good_fini;
+ }
+ r_srp = fp->rsv_arr[ra_idx];
+ sr_st = atomic_read(&r_srp->rq_st);
if (sr_st == SG_RQ_INACTIVE) {
- res = sg_rq_chg_state(rsv_srp, sr_st, SG_RQ_BUSY);
+ res = sg_rq_chg_state(r_srp, sr_st, SG_RQ_BUSY);
if (likely(res == 0)) {
- r_srp = rsv_srp;
+ r_srp->sh_srp = NULL;
mk_new_srp = false;
cp = "rs_rq";
goto good_fini;
}
}
/* Did not find the reserve request available */
- r_srp = ERR_PTR(-EBUSY);
+ r_srp = ERR_PTR(-EFBIG);
break;
case SG_SHR_RS_NOT_SRQ:
allow_rsv = false;
@@ -5875,26 +6503,36 @@ sg_setup_req(struct sg_comm_wr_t *cwrp, enum sg_shr_var sh_var, int dxfr_len)
* EADDRINUSE errno. The winner advances read-side's rq_state:
* SG_RQ_SHR_SWAP --> SG_RQ_SHR_IN_WS
*/
- rs_rsv_srp = rs_sfp->rsv_srp;
- rs_sr_st = atomic_read(&rs_rsv_srp->rq_st);
- switch (rs_sr_st) {
+ if (cwrp->rsv_idx >= 0)
+ rs_rsv_srp = rs_sfp->rsv_arr[cwrp->rsv_idx];
+ else
+ rs_rsv_srp = sg_get_probable_read_side(rs_sfp);
+ if (!rs_rsv_srp) {
+ r_srp = ERR_PTR(-ENOSTR);
+ break;
+ }
+ rs_st = atomic_read(&rs_rsv_srp->rq_st);
+ switch (rs_st) {
case SG_RQ_AWAIT_RCV:
if (unlikely(rs_rsv_srp->rq_result & SG_ML_RESULT_MSK)) {
/* read-side done but error occurred */
r_srp = ERR_PTR(-ENOSTR);
break;
}
- fallthrough;
+ ws_rq = true;
+ break;
case SG_RQ_SHR_SWAP:
ws_rq = true;
- if (unlikely(rs_sr_st == SG_RQ_AWAIT_RCV))
+ if (unlikely(rs_st == SG_RQ_AWAIT_RCV))
break;
- res = sg_rq_chg_state(rs_rsv_srp, rs_sr_st, SG_RQ_SHR_IN_WS);
+ res = sg_rq_chg_state(rs_rsv_srp, rs_st, SG_RQ_SHR_IN_WS);
if (unlikely(res))
r_srp = ERR_PTR(-EADDRINUSE);
break;
case SG_RQ_INFLIGHT:
case SG_RQ_BUSY:
+ SG_LOG(6, fp, "%s: write-side finds read-side: %s\n", __func__,
+ sg_rq_st_str(rs_st, true));
r_srp = ERR_PTR(-EBUSY);
break;
case SG_RQ_INACTIVE:
@@ -5911,15 +6549,24 @@ sg_setup_req(struct sg_comm_wr_t *cwrp, enum sg_shr_var sh_var, int dxfr_len)
if (PTR_ERR(r_srp) == -EBUSY)
goto err_out;
#if IS_ENABLED(SG_LOG_ACTIVE)
- if (sh_var == SG_SHR_RS_RQ)
+ if (sh_var == SG_SHR_RS_RQ) {
snprintf(b, sizeof(b), "SG_SHR_RS_RQ --> sr_st=%s",
sg_rq_st_str(sr_st, false));
- else if (sh_var == SG_SHR_WS_RQ && rs_sfp)
- snprintf(b, sizeof(b), "SG_SHR_WS_RQ-->rs_sr_st=%s",
- sg_rq_st_str(rs_sr_st, false));
- else
+ } else if (sh_var == SG_SHR_WS_RQ && rs_sfp) {
+ char c[32];
+ const char *ccp;
+
+ if (rs_rsv_srp)
+ ccp = sg_get_rsv_str(rs_rsv_srp, "[", "]",
+ sizeof(c), c);
+ else
+ ccp = "? ";
+ snprintf(b, sizeof(b), "SHR_WS_RQ --> rs_sr%s_st=%s",
+ ccp, sg_rq_st_str(rs_st, false));
+ } else {
snprintf(b, sizeof(b), "sh_var=%s",
sg_shr_str(sh_var, false));
+ }
#endif
goto err_out;
}
@@ -5947,7 +6594,7 @@ sg_setup_req(struct sg_comm_wr_t *cwrp, enum sg_shr_var sh_var, int dxfr_len)
s_idx = (l_used_idx < 0) ? 0 : l_used_idx;
if (l_used_idx >= 0 && xa_get_mark(xafp, s_idx, SG_XA_RQ_INACTIVE)) {
r_srp = xa_load(xafp, s_idx);
- if (r_srp && (allow_rsv || rsv_srp != r_srp)) {
+ if (r_srp && (allow_rsv || !test_bit(SG_FRQ_RESERVED, r_srp->frq_bm))) {
if (r_srp->sgat_h.buflen <= SG_DEF_SECTOR_SZ) {
if (sg_rq_chg_state(r_srp, SG_RQ_INACTIVE,
SG_RQ_BUSY) == 0) {
@@ -5960,7 +6607,8 @@ sg_setup_req(struct sg_comm_wr_t *cwrp, enum sg_shr_var sh_var, int dxfr_len)
}
xa_for_each_marked(xafp, idx, r_srp, SG_XA_RQ_INACTIVE) {
has_inactive = true;
- if (!allow_rsv && rsv_srp == r_srp)
+ if (!allow_rsv &&
+ test_bit(SG_FRQ_RESERVED, r_srp->frq_bm))
continue;
if (!low_srp && dxfr_len < SG_DEF_SECTOR_SZ) {
low_srp = r_srp;
@@ -5985,7 +6633,8 @@ sg_setup_req(struct sg_comm_wr_t *cwrp, enum sg_shr_var sh_var, int dxfr_len)
for (r_srp = xa_find(xafp, &idx, end_idx, SG_XA_RQ_INACTIVE);
r_srp;
r_srp = xa_find_after(xafp, &idx, end_idx, SG_XA_RQ_INACTIVE)) {
- if (!allow_rsv && rsv_srp == r_srp)
+ if (!allow_rsv &&
+ test_bit(SG_FRQ_RESERVED, r_srp->frq_bm))
continue;
if (r_srp->sgat_h.buflen >= dxfr_len) {
if (sg_rq_chg_state(r_srp, SG_RQ_INACTIVE, SG_RQ_BUSY))
@@ -6025,7 +6674,7 @@ sg_setup_req(struct sg_comm_wr_t *cwrp, enum sg_shr_var sh_var, int dxfr_len)
r_srp = ERR_PTR(-EDOM);
SG_LOG(6, fp, "%s: trying 2nd req but cmd_q=false\n",
__func__);
- goto fini;
+ goto err_out;
} else if (fp->tot_fd_thresh > 0) {
sum_dlen = atomic_read(&fp->sum_fd_dlens) + dxfr_len;
if (unlikely(sum_dlen > (u32)fp->tot_fd_thresh)) {
@@ -6034,6 +6683,20 @@ sg_setup_req(struct sg_comm_wr_t *cwrp, enum sg_shr_var sh_var, int dxfr_len)
__func__, sum_dlen, "tot_fd_thresh");
}
}
+ if (!IS_ERR(r_srp) && new_rsv_srp) {
+ ra_idx = sg_get_idx_new(fp);
+ if (ra_idx < 0) {
+ ra_idx = sg_get_idx_available(fp);
+ if (ra_idx < 0) {
+ SG_LOG(1, fp,
+ "%s: no read-side reqs available\n",
+ __func__);
+ r_srp = ERR_PTR(-EFBIG);
+ }
+ }
+ }
+ if (IS_ERR(r_srp)) /* NULL is _not_ an ERR here */
+ goto err_out;
r_srp = sg_mk_srp_sgat(fp, act_empty, dxfr_len);
if (IS_ERR(r_srp)) {
if (!try_harder && dxfr_len < SG_DEF_SECTOR_SZ &&
@@ -6041,46 +6704,70 @@ sg_setup_req(struct sg_comm_wr_t *cwrp, enum sg_shr_var sh_var, int dxfr_len)
try_harder = true;
goto start_again;
}
- goto fini;
+ goto err_out;
+ }
+ SG_LOG(4, fp, "%s: %smk_new_srp=0x%pK ++\n", __func__,
+ (new_rsv_srp ? "rsv " : ""), r_srp);
+ if (new_rsv_srp) {
+ fp->rsv_arr[ra_idx] = r_srp;
+ set_bit(SG_FRQ_RESERVED, r_srp->frq_bm);
+ r_srp->sh_srp = NULL;
}
xa_lock_irqsave(xafp, iflags);
- res = __xa_alloc(xafp, &n_idx, r_srp, xa_limit_32b, GFP_KERNEL);
+ res = __xa_alloc(xafp, &n_idx, r_srp, xa_limit_32b, GFP_ATOMIC);
xa_unlock_irqrestore(xafp, iflags);
if (unlikely(res < 0)) {
- sg_remove_sgat(r_srp);
+ xa_unlock_irqrestore(xafp, iflags);
+ sg_remove_srp(r_srp);
kfree(r_srp);
r_srp = ERR_PTR(-EPROTOTYPE);
SG_LOG(1, fp, "%s: xa_alloc() failed, errno=%d\n",
__func__, -res);
- goto fini;
+ goto err_out;
}
- idx = n_idx;
- r_srp->rq_idx = idx;
+ r_srp->rq_idx = n_idx;
r_srp->parentfp = fp;
- sg_rq_chg_state_force(r_srp, SG_RQ_BUSY);
- SG_LOG(4, fp, "%s: mk_new_srp=0x%pK ++\n", __func__, r_srp);
+ xa_unlock_irqrestore(xafp, iflags);
}
- /* following copes with unlikely case where frq_bm > one ulong */
- WRITE_ONCE(r_srp->frq_bm[0], cwrp->frq_bm[0]); /* assumes <= 32 req flags */
+ /* keep SG_FRQ_RESERVED setting from prior/new r_srp; clear rest */
+ is_rsv = test_bit(SG_FRQ_RESERVED, r_srp->frq_bm);
+ WRITE_ONCE(r_srp->frq_bm[0], 0);
+ if (is_rsv)
+ set_bit(SG_FRQ_RESERVED, r_srp->frq_bm);
+ /* r_srp inherits these 3 flags from cwrp->frq_bm */
+ if (test_bit(SG_FRQ_IS_V4I, cwrp->frq_bm))
+ set_bit(SG_FRQ_IS_V4I, r_srp->frq_bm);
+ if (test_bit(SG_FRQ_SYNC_INVOC, cwrp->frq_bm))
+ set_bit(SG_FRQ_SYNC_INVOC, r_srp->frq_bm);
r_srp->sgatp->dlen = dxfr_len;/* must be <= r_srp->sgat_h.buflen */
r_srp->sh_var = sh_var;
r_srp->cmd_opcode = 0xff; /* set invalid opcode (VS), 0x0 is TUR */
-fini:
/* If setup stalls (e.g. blk_get_request()) debug shows 'elap=1 ns' */
if (test_bit(SG_FFD_TIME_IN_NS, fp->ffd_bm))
r_srp->start_ns = S64_MAX;
if (ws_rq && rs_rsv_srp) {
- rs_sfp->ws_srp = r_srp;
/* write-side "shares" the read-side reserve request's data buffer */
r_srp->sgatp = &rs_rsv_srp->sgat_h;
- } else if (sh_var == SG_SHR_RS_RQ && test_bit(SG_FFD_READ_SIDE_ERR, fp->ffd_bm))
+ rs_rsv_srp->sh_srp = r_srp;
+ r_srp->sh_srp = rs_rsv_srp;
+ } else if (sh_var == SG_SHR_RS_RQ && test_bit(SG_FFD_READ_SIDE_ERR, fp->ffd_bm)) {
clear_bit(SG_FFD_READ_SIDE_ERR, fp->ffd_bm);
+ }
err_out:
- if (IS_ERR(r_srp) && PTR_ERR(r_srp) != -EBUSY && b[0])
- SG_LOG(1, fp, "%s: bad %s\n", __func__, b);
- if (!IS_ERR(r_srp))
+#if IS_ENABLED(SG_LOG_ACTIVE)
+ if (IS_ERR(r_srp)) {
+ int err = -PTR_ERR(r_srp);
+
+ if (err == EBUSY)
+ SG_LOG(4, fp, "%s: EBUSY (as ptr err)\n", __func__);
+ else
+ SG_LOG(1, fp, "%s: %s err=%d\n", __func__, b, err);
+ } else {
SG_LOG(4, fp, "%s: %s %sr_srp=0x%pK\n", __func__, cp,
- ((r_srp == fp->rsv_srp) ? "[rsv] " : ""), r_srp);
+ sg_get_rsv_str_lck(r_srp, "[", "] ", sizeof(b), b),
+ r_srp);
+ }
+#endif
return r_srp;
}
@@ -6094,25 +6781,31 @@ sg_setup_req(struct sg_comm_wr_t *cwrp, enum sg_shr_var sh_var, int dxfr_len)
static void
sg_deact_request(struct sg_fd *sfp, struct sg_request *srp)
{
+ bool is_rsv;
enum sg_rq_state sr_st;
u8 *sbp;
if (WARN_ON(!sfp || !srp))
return;
+ SG_LOG(3, sfp, "%s: srp=%pK\n", __func__, srp);
sbp = srp->sense_bp;
srp->sense_bp = NULL;
sr_st = atomic_read(&srp->rq_st);
- if (sr_st != SG_RQ_SHR_SWAP) { /* mark _BUSY then _INACTIVE at end */
+ if (sr_st != SG_RQ_SHR_SWAP) {
/*
* Can be called from many contexts and it is hard to know
* whether xa locks held. So assume not.
*/
sg_rq_chg_state_force(srp, SG_RQ_INACTIVE);
atomic_inc(&sfp->inactives);
+ is_rsv = test_bit(SG_FRQ_RESERVED, srp->frq_bm);
WRITE_ONCE(srp->frq_bm[0], 0);
+ if (is_rsv)
+ __set_bit(SG_FRQ_RESERVED, srp->frq_bm);
srp->tag = SG_TAG_WILDCARD;
srp->in_resid = 0;
srp->rq_info = 0;
+ srp->sense_len = 0;
}
/* maybe orphaned req, thus never read */
if (sbp)
@@ -6130,16 +6823,15 @@ sg_add_sfp(struct sg_device *sdp, struct file *filp)
unsigned long iflags;
struct sg_fd *sfp;
struct sg_request *srp = NULL;
- struct xarray *xadp = &sdp->sfp_arr;
struct xarray *xafp;
+ struct xarray *xadp;
sfp = kzalloc(sizeof(*sfp), GFP_ATOMIC | __GFP_NOWARN);
if (unlikely(!sfp))
return ERR_PTR(-ENOMEM);
init_waitqueue_head(&sfp->cmpl_wait);
xa_init_flags(&sfp->srp_arr, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
- xafp = &sfp->srp_arr;
- kref_init(&sfp->f_ref);
+ kref_init(&sfp->f_ref); /* init to 1; put: sg_release() */
mutex_init(&sfp->f_mutex);
sfp->timeout = SG_DEFAULT_TIMEOUT;
sfp->timeout_user = SG_DEFAULT_TIMEOUT_USER;
@@ -6152,6 +6844,9 @@ sg_add_sfp(struct sg_device *sdp, struct file *filp)
__assign_bit(SG_FFD_Q_AT_TAIL, sfp->ffd_bm, SG_DEFAULT_Q_AT);
sfp->tot_fd_thresh = SG_TOT_FD_THRESHOLD;
atomic_set(&sfp->sum_fd_dlens, 0);
+ atomic_set(&sfp->submitted, 0);
+ atomic_set(&sfp->waiting, 0);
+ atomic_set(&sfp->inactives, 0);
/*
* SG_SCATTER_SZ initializes scatter_elem_sz but different value may
* be given as driver/module parameter (e.g. 'scatter_elem_sz=8192').
@@ -6161,12 +6856,9 @@ sg_add_sfp(struct sg_device *sdp, struct file *filp)
*/
sfp->sgat_elem_sz = scatter_elem_sz;
sfp->parentdp = sdp;
- atomic_set(&sfp->submitted, 0);
- atomic_set(&sfp->waiting, 0);
- atomic_set(&sfp->inactives, 0);
if (SG_IS_DETACHING(sdp)) {
- SG_LOG(1, sfp, "%s: detaching\n", __func__);
+ SG_LOG(1, sfp, "%s: sg%u detaching\n", __func__, sdp->index);
kfree(sfp);
return ERR_PTR(-ENODEV);
}
@@ -6175,6 +6867,7 @@ sg_add_sfp(struct sg_device *sdp, struct file *filp)
rbuf_len = min_t(int, sg_big_buff, sdp->max_sgat_sz);
if (rbuf_len > 0) {
+ xafp = &sfp->srp_arr;
srp = sg_build_reserve(sfp, rbuf_len);
if (IS_ERR(srp)) {
err = PTR_ERR(srp);
@@ -6191,41 +6884,44 @@ sg_add_sfp(struct sg_device *sdp, struct file *filp)
}
xa_lock_irqsave(xafp, iflags);
res = __xa_alloc(xafp, &idx, srp, xa_limit_32b, GFP_ATOMIC);
- if (!res) {
- srp->rq_idx = idx;
- srp->parentfp = sfp;
- sg_rq_chg_state_force_ulck(srp, SG_RQ_INACTIVE);
- atomic_inc(&sfp->inactives);
- }
- xa_unlock_irqrestore(xafp, iflags);
if (res < 0) {
SG_LOG(1, sfp, "%s: xa_alloc(srp) bad, errno=%d\n",
__func__, -res);
- sg_remove_sgat(srp);
+ xa_unlock_irqrestore(xafp, iflags);
+ sg_remove_srp(srp);
kfree(srp);
kfree(sfp);
return ERR_PTR(-EPROTOTYPE);
}
+ srp->rq_idx = idx;
+ srp->parentfp = sfp;
+ sg_rq_chg_state_force_ulck(srp, SG_RQ_INACTIVE);
+ atomic_inc(&sfp->inactives);
+ __set_bit(SG_FRQ_RESERVED, srp->frq_bm);
+ xa_unlock_irqrestore(xafp, iflags);
}
if (!reduced) {
SG_LOG(4, sfp, "%s: built reserve buflen=%d\n", __func__,
rbuf_len);
}
+ xadp = &sdp->sfp_arr;
xa_lock_irqsave(xadp, iflags);
- res = __xa_alloc(xadp, &idx, sfp, xa_limit_32b, GFP_KERNEL);
- xa_unlock_irqrestore(xadp, iflags);
+ res = __xa_alloc(xadp, &idx, sfp, xa_limit_32b, GFP_ATOMIC);
if (unlikely(res < 0)) {
+ xa_unlock_irqrestore(xadp, iflags);
pr_warn("%s: xa_alloc(sdp) bad, o_count=%d, errno=%d\n",
__func__, atomic_read(&sdp->open_cnt), -res);
if (srp) {
- sg_remove_sgat(srp);
+ sg_remove_srp(srp);
kfree(srp);
}
kfree(sfp);
return ERR_PTR(res);
}
sfp->idx = idx;
- kref_get(&sdp->d_ref);
+ __xa_set_mark(xadp, idx, SG_XA_FD_UNSHARED);
+ xa_unlock_irqrestore(xadp, iflags);
+ kref_get(&sdp->d_ref); /* put in: sg_uc_remove_sfp() */
__module_get(THIS_MODULE);
SG_LOG(3, sfp, "%s: success, sfp=0x%pK ++\n", __func__, sfp);
return sfp;
@@ -6259,14 +6955,13 @@ sg_uc_remove_sfp(struct work_struct *work)
return;
}
sdp = sfp->parentdp;
- xadp = &sdp->sfp_arr;
/* Cleanup any responses which were never read(). */
xa_for_each(xafp, idx, srp) {
if (!xa_get_mark(xafp, srp->rq_idx, SG_XA_RQ_INACTIVE))
sg_finish_scsi_blk_rq(srp);
if (srp->sgatp->buflen > 0)
- sg_remove_sgat(srp);
+ sg_remove_srp(srp);
if (unlikely(srp->sense_bp)) {
mempool_free(srp->sense_bp, sg_sense_pool);
srp->sense_bp = NULL;
@@ -6285,6 +6980,7 @@ sg_uc_remove_sfp(struct work_struct *work)
SG_LOG(1, sfp, "%s: expected submitted=0 got %d\n",
__func__, subm);
xa_destroy(xafp);
+ xadp = &sdp->sfp_arr;
xa_lock_irqsave(xadp, iflags);
e_sfp = __xa_erase(xadp, sfp->idx);
xa_unlock_irqrestore(xadp, iflags);
@@ -6297,7 +6993,7 @@ sg_uc_remove_sfp(struct work_struct *work)
kfree(sfp);
scsi_device_put(sdp->device);
- kref_put(&sdp->d_ref, sg_device_destroy);
+ kref_put(&sdp->d_ref, sg_device_destroy); /* get: sg_add_sfp() */
module_put(THIS_MODULE);
}
@@ -6337,7 +7033,7 @@ sg_get_dev(int min_dev)
*/
sdp = ERR_PTR(-ENODEV);
} else
- kref_get(&sdp->d_ref);
+ kref_get(&sdp->d_ref); /* put: sg_open() */
read_unlock_irqrestore(&sg_index_lock, iflags);
return sdp;
}
@@ -6607,23 +7303,26 @@ sg_proc_debug_sreq(struct sg_request *srp, int to, bool t_in_ns, char *obp,
enum sg_rq_state rq_st;
const char *cp;
const char *tp = t_in_ns ? "ns" : "ms";
+ char b[32];
if (unlikely(len < 1))
return 0;
v4 = test_bit(SG_FRQ_IS_V4I, srp->frq_bm);
is_v3v4 = v4 ? true : (srp->s_hdr3.interface_id != '\0');
- if (srp->parentfp->rsv_srp == srp)
+ sg_get_rsv_str(srp, " ", "", sizeof(b), b);
+ if (strlen(b) > 5)
cp = (is_v3v4 && (srp->rq_flags & SG_FLAG_MMAP_IO)) ?
- " mmap>> " : " rsv>> ";
+ " mmap" : "";
else
- cp = (srp->rq_info & SG_INFO_DIRECT_IO_MASK) ?
- " dio>> " : " ";
+ cp = (srp->rq_info & SG_INFO_DIRECT_IO_MASK) ? " dio" : "";
rq_st = atomic_read(&srp->rq_st);
dur = sg_get_dur(srp, &rq_st, t_in_ns, &is_dur);
- n += scnprintf(obp + n, len - n, "%s%s: dlen=%d/%d id=%d", cp,
- sg_rq_st_str(rq_st, false), srp->sgatp->dlen,
+ n += scnprintf(obp + n, len - n, "%s%s>> %s:%d dlen=%d/%d id=%d", b,
+ cp, sg_rq_st_str(rq_st, false), srp->rq_idx, srp->sgatp->dlen,
srp->sgatp->buflen, (int)srp->pack_id);
- if (is_dur) /* cmd/req has completed, waiting for ... */
+ if (test_bit(SG_FFD_NO_DURATION, srp->parentfp->ffd_bm))
+ ;
+ else if (is_dur) /* cmd/req has completed, waiting for ... */
n += scnprintf(obp + n, len - n, " dur=%u%s", dur, tp);
else if (dur < U32_MAX) { /* in-flight or busy (so ongoing) */
if ((srp->rq_flags & SGV4_FLAG_YIELD_TAG) &&
@@ -6636,9 +7335,10 @@ sg_proc_debug_sreq(struct sg_request *srp, int to, bool t_in_ns, char *obp,
if (srp->sh_var != SG_SHR_NONE)
n += scnprintf(obp + n, len - n, " shr=%s",
sg_shr_str(srp->sh_var, false));
+ if (srp->sgatp->num_sgat > 1)
+ n += scnprintf(obp + n, len - n, " sgat=%d", srp->sgatp->num_sgat);
cp = (srp->rq_flags & SGV4_FLAG_HIPRI) ? "hipri " : "";
- n += scnprintf(obp + n, len - n, " sgat=%d %sop=0x%02x\n",
- srp->sgatp->num_sgat, cp, srp->cmd_opcode);
+ n += scnprintf(obp + n, len - n, " %sop=0x%02x\n", cp, srp->cmd_opcode);
return n;
}
@@ -6653,7 +7353,7 @@ sg_proc_debug_fd(struct sg_fd *fp, char *obp, int len, unsigned long idx,
int to, k;
unsigned long iflags;
const char *cp;
- struct sg_request *srp;
+ struct sg_request *srp = fp->rsv_arr[0];
struct sg_device *sdp = fp->parentdp;
if (sg_fd_is_shared(fp))
@@ -6671,14 +7371,19 @@ sg_proc_debug_fd(struct sg_fd *fp, char *obp, int len, unsigned long idx,
n += scnprintf(obp + n, len - n, "timeout=%dms rs", to);
else
n += scnprintf(obp + n, len - n, "timeout=%ds rs", to / 1000);
- n += scnprintf(obp + n, len - n, "v_buflen=%d%s idx=%lu\n cmd_q=%d ",
- fp->rsv_srp->sgatp->buflen, cp, idx,
- (int)!test_bit(SG_FFD_NO_CMD_Q, fp->ffd_bm));
- n += scnprintf(obp + n, len - n,
- "f_packid=%d k_orphan=%d ffd_bm=0x%lx\n",
- (int)test_bit(SG_FFD_FORCE_PACKID, fp->ffd_bm),
- (int)test_bit(SG_FFD_KEEP_ORPHAN, fp->ffd_bm),
- fp->ffd_bm[0]);
+ n += scnprintf(obp + n, len - n, "v_buflen=%d%s fd_idx=%lu\n ",
+ (srp ? srp->sgatp->buflen : -1), cp, idx);
+ if (test_bit(SG_FFD_NO_CMD_Q, fp->ffd_bm))
+ n += scnprintf(obp + n, len - n, " no_cmd_q");
+ if (test_bit(SG_FFD_FORCE_PACKID, fp->ffd_bm))
+ n += scnprintf(obp + n, len - n, " force_packid");
+ if (test_bit(SG_FFD_KEEP_ORPHAN, fp->ffd_bm))
+ n += scnprintf(obp + n, len - n, " keep_orphan");
+ if (test_bit(SG_FFD_EXCL_WAITQ, fp->ffd_bm))
+ n += scnprintf(obp + n, len - n, " excl_waitq");
+ if (test_bit(SG_FFD_SVB_ACTIVE, fp->ffd_bm))
+ n += scnprintf(obp + n, len - n, " svb");
+ n += scnprintf(obp + n, len - n, " fd_bm=0x%lx\n", fp->ffd_bm[0]);
n += scnprintf(obp + n, len - n,
" mmap_sz=%d low_used_idx=%d low_await_idx=%d sum_fd_dlens=%u\n",
fp->mmap_sz, READ_ONCE(fp->low_used_idx), READ_ONCE(fp->low_await_idx),
@@ -6699,7 +7404,7 @@ sg_proc_debug_fd(struct sg_fd *fp, char *obp, int len, unsigned long idx,
if (xa_get_mark(&fp->srp_arr, idx, SG_XA_RQ_INACTIVE))
continue;
if (set_debug)
- n += scnprintf(obp + n, len - n, " frq_bm=0x%lx ",
+ n += scnprintf(obp + n, len - n, " rq_bm=0x%lx",
srp->frq_bm[0]);
else if (test_bit(SG_FRQ_ABORTING, srp->frq_bm))
n += scnprintf(obp + n, len - n,
@@ -6720,7 +7425,7 @@ sg_proc_debug_fd(struct sg_fd *fp, char *obp, int len, unsigned long idx,
if (k == 0)
n += scnprintf(obp + n, len - n, " Inactives:\n");
if (set_debug)
- n += scnprintf(obp + n, len - n, " frq_bm=0x%lx ",
+ n += scnprintf(obp + n, len - n, " rq_bm=0x%lx",
srp->frq_bm[0]);
n += sg_proc_debug_sreq(srp, fp->timeout, t_in_ns,
obp + n, len - n);
diff --git a/include/uapi/scsi/sg.h b/include/uapi/scsi/sg.h
index bf947ebe06dd..a1f35fd34816 100644
--- a/include/uapi/scsi/sg.h
+++ b/include/uapi/scsi/sg.h
@@ -222,6 +222,7 @@ typedef struct sg_req_info { /* used by SG_GET_REQUEST_TABLE ioctl() */
#define SG_SEIRV_DEV_INACT_RQS 0x4 /* sum(inactive rqs) on owning dev */
#define SG_SEIRV_SUBMITTED 0x5 /* number of mrqs submitted+unread */
#define SG_SEIRV_DEV_SUBMITTED 0x6 /* sum(submitted) on all dev's fds */
+#define SG_SEIRV_MAX_RSV_REQS 0x7 /* maximum reserve requests */
/*
* A pointer to the following structure is passed as the third argument to
--
2.25.1
next prev parent reply other threads:[~2021-04-27 21:59 UTC|newest]
Thread overview: 86+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-04-27 21:56 [PATCH v18 00/83] sg: add v4 interface, request sharing Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 00/45] sg: add v4 interface Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 01/83] sg: move functions around Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 02/83] sg: remove typedefs, type+formatting cleanup Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 03/83] sg: sg_log and is_enabled Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 04/83] sg: rework sg_poll(), minor changes Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 05/83] sg: bitops in sg_device Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 06/83] sg: make open count an atomic Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 07/83] sg: move header to uapi section Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 08/83] sg: speed sg_poll and sg_get_num_waiting Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 09/83] sg: sg_allow_if_err_recovery and renames Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 10/83] sg: improve naming Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 11/83] sg: change rwlock to spinlock Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 12/83] sg: ioctl handling Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 13/83] sg: split sg_read Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 14/83] sg: sg_common_write add structure for arguments Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 15/83] sg: rework sg_vma_fault Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 16/83] sg: rework sg_mmap Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 17/83] sg: replace sg_allow_access Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 18/83] sg: rework scatter gather handling Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 19/83] sg: introduce request state machine Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 20/83] sg: sg_find_srp_by_id Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 21/83] sg: sg_fill_request_element Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 22/83] sg: printk change %p to %pK Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 23/83] sg: xarray for fds in device Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 24/83] sg: xarray for reqs in fd Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 25/83] sg: replace rq array with xarray Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 26/83] sg: sense buffer rework Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 27/83] sg: add sg v4 interface support Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 28/83] sg: rework debug info Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 29/83] sg: add 8 byte SCSI LUN to sg_scsi_id Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 30/83] sg: expand sg_comm_wr_t Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 31/83] sg: add sg_iosubmit_v3 and sg_ioreceive_v3 ioctls Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 32/83] sg: add some __must_hold macros Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 33/83] sg: move procfs objects to avoid forward decls Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 34/83] sg: protect multiple receivers Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 35/83] sg: first debugfs support Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 36/83] sg: rework mmap support Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 37/83] sg: defang allow_dio Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 38/83] sg: warn v3 write system call users Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 39/83] sg: add mmap_sz tracking Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 40/83] sg: remove rcv_done request state Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 41/83] sg: track lowest inactive and await indexes Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 42/83] sg: remove unit attention check for device changed Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 43/83] sg: no_dxfer: move to/from kernel buffers Douglas Gilbert
2021-04-28 7:07 ` Hannes Reinecke
2021-04-27 21:56 ` [PATCH v18 44/83] sg: add blk_poll support Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 45/83] sg: bump version to 4.0.12 Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 46/83] sg: add sg_ioabort ioctl Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 47/83] sg: add sg_set_get_extended ioctl Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 48/83] sg: sgat_elem_sz and sum_fd_dlens Douglas Gilbert
2021-04-27 21:56 ` [PATCH v18 49/83] sg: tag and more_async Douglas Gilbert
2021-04-27 21:57 ` [PATCH v18 50/83] sg: add fd sharing , change, unshare Douglas Gilbert
2021-04-27 21:57 ` [PATCH v18 51/83] sg: add shared requests Douglas Gilbert
2021-04-27 21:57 ` [PATCH v18 52/83] sg: add multiple request support Douglas Gilbert
2021-04-27 21:57 ` [PATCH v18 53/83] sg: rename some mrq variables Douglas Gilbert
2021-04-27 21:57 ` [PATCH v18 54/83] sg: unlikely likely Douglas Gilbert
2021-04-27 21:57 ` [PATCH v18 55/83] sg: mrq abort Douglas Gilbert
2021-04-27 21:57 ` [PATCH v18 56/83] sg: reduce atomic operations Douglas Gilbert
2021-04-27 21:57 ` [PATCH v18 57/83] sg: add excl_wait flag Douglas Gilbert
2021-04-27 21:57 ` [PATCH v18 58/83] sg: tweak sg_find_sfp_by_fd() Douglas Gilbert
2021-04-27 21:57 ` [PATCH v18 59/83] sg: add snap_dev flag and snapped in debugfs Douglas Gilbert
2021-04-27 21:57 ` [PATCH v18 60/83] sg: compress usercontext to uc Douglas Gilbert
2021-04-27 21:57 ` [PATCH v18 61/83] sg: optionally output sg_request.frq_bm flags Douglas Gilbert
2021-04-27 21:57 ` [PATCH v18 62/83] sg: work on sg_mrq_sanity() Douglas Gilbert
2021-04-27 21:57 ` Douglas Gilbert [this message]
2021-04-27 21:57 ` [PATCH v18 64/83] sg: device timestamp Douglas Gilbert
2021-04-27 21:57 ` [PATCH v18 65/83] sg: condition met is not an error Douglas Gilbert
2021-04-27 21:57 ` [PATCH v18 66/83] sg: split sg_setup_req Douglas Gilbert
2021-04-27 21:57 ` [PATCH v18 67/83] sg: finish after read-side request Douglas Gilbert
2021-04-27 21:57 ` [PATCH v18 68/83] sg: keep share and dout offset flags Douglas Gilbert
2021-04-27 21:57 ` [PATCH v18 69/83] sg: add dlen to sg_comm_wr_t Douglas Gilbert
2021-04-27 21:57 ` [PATCH v18 70/83] sg: make use of struct sg_mrq_hold Douglas Gilbert
2021-04-27 21:57 ` [PATCH v18 71/83] sg: add mmap IO option for mrq metadata Douglas Gilbert
2021-04-27 21:57 ` [PATCH v18 72/83] sg: add eventfd support Douglas Gilbert
2021-04-27 21:57 ` [PATCH v18 73/83] sg: table of error number explanations Douglas Gilbert
2021-04-27 21:57 ` [PATCH v18 74/83] sg: add ordered write flag Douglas Gilbert
2021-04-27 21:57 ` [PATCH v18 75/83] sg: expand source line length to 100 characters Douglas Gilbert
2021-04-27 21:57 ` [PATCH v18 76/83] sg: add no_attach_msg parameter Douglas Gilbert
2021-04-27 21:57 ` [PATCH v18 77/83] sg: add SGV4_FLAG_REC_ORDER Douglas Gilbert
2021-04-27 21:57 ` [PATCH v18 78/83] sg: max to read for mrq sg_ioreceive Douglas Gilbert
2021-04-27 21:57 ` [PATCH v18 79/83] sg: mrq: if uniform svb then re-use bio_s Douglas Gilbert
2021-04-27 21:57 ` [PATCH v18 80/83] sg: expand bvec usage; " Douglas Gilbert
2021-04-27 21:57 ` [PATCH v18 81/83] sg: blk_poll/hipri work for mrq Douglas Gilbert
2021-04-27 21:57 ` [PATCH v18 82/83] sg: pollable and non-pollable requests Douglas Gilbert
2021-04-27 21:57 ` [PATCH v18 83/83] sg: bump version to 4.0.47 Douglas Gilbert
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210427215733.417746-65-dgilbert@interlog.com \
--to=dgilbert@interlog.com \
--cc=hare@suse.de \
--cc=jejb@linux.vnet.ibm.com \
--cc=linux-scsi@vger.kernel.org \
--cc=martin.petersen@oracle.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.