* [PATCH 5/10] convert st to use scsi_execute_async
@ 2005-11-08 10:06 Mike Christie
2005-11-12 17:03 ` Kai Makisara
0 siblings, 1 reply; 12+ messages in thread
From: Mike Christie @ 2005-11-08 10:06 UTC (permalink / raw)
To: axboe, linux-scsi
convert st to always send scatterlists and kill scsi_request
usage.
This is the same as last time as it was posted, but with Kai's patches
merged and we now pass the bytes value to scsi_execute_async.
TODO:
- move DIO code to common place or make block layers usable for ULDs.
- move buffer allocation code to common place for all ULDs to use. And
make buffer allocation code handle all queue limits so we can find
out about problems before calling scsi_execute_async.
- move indirect (copy_to/from_user) paths commone place or make block
layers usable for ULDs.
Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
index 6b85f84..9c5ef70 100644
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -50,7 +50,6 @@ static char *verstr = "20050830";
#include <scsi/scsi_eh.h>
#include <scsi/scsi_host.h>
#include <scsi/scsi_ioctl.h>
-#include <scsi/scsi_request.h>
#include <scsi/sg.h>
@@ -188,8 +187,6 @@ static int from_buffer(struct st_buffer
static void move_buffer_data(struct st_buffer *, int);
static void buf_to_sg(struct st_buffer *, unsigned int);
-static int st_map_user_pages(struct scatterlist *, const unsigned int,
- unsigned long, size_t, int, unsigned long);
static int sgl_map_user_pages(struct scatterlist *, const unsigned int,
unsigned long, size_t, int);
static int sgl_unmap_user_pages(struct scatterlist *, const unsigned int, int);
@@ -313,12 +310,13 @@ static inline char *tape_name(struct scs
}
-static void st_analyze_sense(struct scsi_request *SRpnt, struct st_cmdstatus *s)
+static void st_analyze_sense(struct st_request *SRpnt, struct st_cmdstatus *s)
{
const u8 *ucp;
- const u8 *sense = SRpnt->sr_sense_buffer;
+ const u8 *sense = SRpnt->sense;
- s->have_sense = scsi_request_normalize_sense(SRpnt, &s->sense_hdr);
+ s->have_sense = scsi_normalize_sense(SRpnt->sense,
+ SCSI_SENSE_BUFFERSIZE, &s->sense_hdr);
s->flags = 0;
if (s->have_sense) {
@@ -345,9 +343,9 @@ static void st_analyze_sense(struct scsi
/* Convert the result to success code */
-static int st_chk_result(struct scsi_tape *STp, struct scsi_request * SRpnt)
+static int st_chk_result(struct scsi_tape *STp, struct st_request * SRpnt)
{
- int result = SRpnt->sr_result;
+ int result = SRpnt->result;
u8 scode;
DEB(const char *stp;)
char *name = tape_name(STp);
@@ -366,13 +364,12 @@ static int st_chk_result(struct scsi_tap
DEB(
if (debugging) {
- printk(ST_DEB_MSG "%s: Error: %x, cmd: %x %x %x %x %x %x Len: %d\n",
+ printk(ST_DEB_MSG "%s: Error: %x, cmd: %x %x %x %x %x %x\n",
name, result,
- SRpnt->sr_cmnd[0], SRpnt->sr_cmnd[1], SRpnt->sr_cmnd[2],
- SRpnt->sr_cmnd[3], SRpnt->sr_cmnd[4], SRpnt->sr_cmnd[5],
- SRpnt->sr_bufflen);
+ SRpnt->cmd[0], SRpnt->cmd[1], SRpnt->cmd[2],
+ SRpnt->cmd[3], SRpnt->cmd[4], SRpnt->cmd[5]);
if (cmdstatp->have_sense)
- scsi_print_req_sense("st", SRpnt);
+ __scsi_print_sense("st", SRpnt->sense, SCSI_SENSE_BUFFERSIZE);
} ) /* end DEB */
if (!debugging) { /* Abnormal conditions for tape */
if (!cmdstatp->have_sense)
@@ -386,20 +383,21 @@ static int st_chk_result(struct scsi_tap
/* scode != UNIT_ATTENTION && */
scode != BLANK_CHECK &&
scode != VOLUME_OVERFLOW &&
- SRpnt->sr_cmnd[0] != MODE_SENSE &&
- SRpnt->sr_cmnd[0] != TEST_UNIT_READY) {
+ SRpnt->cmd[0] != MODE_SENSE &&
+ SRpnt->cmd[0] != TEST_UNIT_READY) {
printk(KERN_WARNING "%s: Error with sense data: ", name);
- scsi_print_req_sense("st", SRpnt);
+ __scsi_print_sense("st", SRpnt->sense,
+ SCSI_SENSE_BUFFERSIZE);
}
}
if (cmdstatp->fixed_format &&
STp->cln_mode >= EXTENDED_SENSE_START) { /* Only fixed format sense */
if (STp->cln_sense_value)
- STp->cleaning_req |= ((SRpnt->sr_sense_buffer[STp->cln_mode] &
+ STp->cleaning_req |= ((SRpnt->sense[STp->cln_mode] &
STp->cln_sense_mask) == STp->cln_sense_value);
else
- STp->cleaning_req |= ((SRpnt->sr_sense_buffer[STp->cln_mode] &
+ STp->cleaning_req |= ((SRpnt->sense[STp->cln_mode] &
STp->cln_sense_mask) != 0);
}
if (cmdstatp->have_sense &&
@@ -411,8 +409,8 @@ static int st_chk_result(struct scsi_tap
if (cmdstatp->have_sense &&
scode == RECOVERED_ERROR
#if ST_RECOVERED_WRITE_FATAL
- && SRpnt->sr_cmnd[0] != WRITE_6
- && SRpnt->sr_cmnd[0] != WRITE_FILEMARKS
+ && SRpnt->cmd[0] != WRITE_6
+ && SRpnt->cmd[0] != WRITE_FILEMARKS
#endif
) {
STp->recover_count++;
@@ -420,9 +418,9 @@ static int st_chk_result(struct scsi_tap
DEB(
if (debugging) {
- if (SRpnt->sr_cmnd[0] == READ_6)
+ if (SRpnt->cmd[0] == READ_6)
stp = "read";
- else if (SRpnt->sr_cmnd[0] == WRITE_6)
+ else if (SRpnt->cmd[0] == WRITE_6)
stp = "write";
else
stp = "ioctl";
@@ -438,28 +436,37 @@ static int st_chk_result(struct scsi_tap
/* Wakeup from interrupt */
-static void st_sleep_done(struct scsi_cmnd * SCpnt)
+static void st_sleep_done(void *data, char *sense, int result, int resid)
{
- struct scsi_tape *STp = container_of(SCpnt->request->rq_disk->private_data,
- struct scsi_tape, driver);
+ struct st_request *SRpnt = data;
+ struct scsi_tape *STp = SRpnt->stp;
- (STp->buffer)->cmdstat.midlevel_result = SCpnt->result;
- SCpnt->request->rq_status = RQ_SCSI_DONE;
+ memcpy(SRpnt->sense, sense, SCSI_SENSE_BUFFERSIZE);
+ (STp->buffer)->cmdstat.midlevel_result = SRpnt->result = result;
DEB( STp->write_pending = 0; )
- if (SCpnt->request->waiting)
- complete(SCpnt->request->waiting);
+ if (SRpnt->waiting)
+ complete(SRpnt->waiting);
+}
+
+static struct st_request *st_allocate_request(void)
+{
+ return kzalloc(sizeof(struct st_request), GFP_KERNEL);
+}
+
+static void st_release_request(struct st_request *streq)
+{
+ kfree(streq);
}
/* Do the scsi command. Waits until command performed if do_wait is true.
Otherwise write_behind_check() is used to check that the command
has finished. */
-static struct scsi_request *
-st_do_scsi(struct scsi_request * SRpnt, struct scsi_tape * STp, unsigned char *cmd,
+static struct st_request *
+st_do_scsi(struct st_request * SRpnt, struct scsi_tape * STp, unsigned char *cmd,
int bytes, int direction, int timeout, int retries, int do_wait)
{
struct completion *waiting;
- unsigned char *bp;
/* if async, make sure there's no command outstanding */
if (!do_wait && ((STp->buffer)->last_SRpnt)) {
@@ -473,7 +480,7 @@ st_do_scsi(struct scsi_request * SRpnt,
}
if (SRpnt == NULL) {
- SRpnt = scsi_allocate_request(STp->device, GFP_ATOMIC);
+ SRpnt = st_allocate_request();
if (SRpnt == NULL) {
DEBC( printk(KERN_ERR "%s: Can't get SCSI request.\n",
tape_name(STp)); );
@@ -483,6 +490,7 @@ st_do_scsi(struct scsi_request * SRpnt,
(STp->buffer)->syscall_result = (-EBUSY);
return NULL;
}
+ SRpnt->stp = STp;
}
/* If async IO, set last_SRpnt. This ptr tells write_behind_check
@@ -492,32 +500,26 @@ st_do_scsi(struct scsi_request * SRpnt,
waiting = &STp->wait;
init_completion(waiting);
- SRpnt->sr_use_sg = STp->buffer->do_dio || (bytes > (STp->buffer)->frp[0].length);
- if (SRpnt->sr_use_sg) {
- if (!STp->buffer->do_dio)
- buf_to_sg(STp->buffer, bytes);
- SRpnt->sr_use_sg = (STp->buffer)->sg_segs;
- bp = (char *) &((STp->buffer)->sg[0]);
- } else
- bp = (STp->buffer)->b_data;
- SRpnt->sr_data_direction = direction;
- SRpnt->sr_cmd_len = 0;
- SRpnt->sr_request->waiting = waiting;
- SRpnt->sr_request->rq_status = RQ_SCSI_BUSY;
- SRpnt->sr_request->rq_disk = STp->disk;
- SRpnt->sr_request->end_io = blk_end_sync_rq;
- STp->buffer->cmdstat.have_sense = 0;
+ SRpnt->waiting = waiting;
- scsi_do_req(SRpnt, (void *) cmd, bp, bytes,
- st_sleep_done, timeout, retries);
+ if (!STp->buffer->do_dio)
+ buf_to_sg(STp->buffer, bytes);
+
+ memcpy(SRpnt->cmd, cmd, sizeof(SRpnt->cmd));
+ STp->buffer->cmdstat.have_sense = 0;
+ STp->buffer->syscall_result = 0;
- if (do_wait) {
+ if (scsi_execute_async(STp->device, cmd, direction,
+ &((STp->buffer)->sg[0]), bytes, (STp->buffer)->sg_segs,
+ timeout, retries, SRpnt, st_sleep_done, GFP_KERNEL))
+ /* could not allocate the buffer or request was too large */
+ (STp->buffer)->syscall_result = (-EBUSY);
+ else if (do_wait) {
wait_for_completion(waiting);
- SRpnt->sr_request->waiting = NULL;
- if (SRpnt->sr_request->rq_status != RQ_SCSI_DONE)
- SRpnt->sr_result |= (DRIVER_ERROR << 24);
+ SRpnt->waiting = NULL;
(STp->buffer)->syscall_result = st_chk_result(STp, SRpnt);
}
+
return SRpnt;
}
@@ -532,7 +534,7 @@ static int write_behind_check(struct scs
struct st_buffer *STbuffer;
struct st_partstat *STps;
struct st_cmdstatus *cmdstatp;
- struct scsi_request *SRpnt;
+ struct st_request *SRpnt;
STbuffer = STp->buffer;
if (!STbuffer->writing)
@@ -548,12 +550,10 @@ static int write_behind_check(struct scs
wait_for_completion(&(STp->wait));
SRpnt = STbuffer->last_SRpnt;
STbuffer->last_SRpnt = NULL;
- SRpnt->sr_request->waiting = NULL;
- if (SRpnt->sr_request->rq_status != RQ_SCSI_DONE)
- SRpnt->sr_result |= (DRIVER_ERROR << 24);
+ SRpnt->waiting = NULL;
(STp->buffer)->syscall_result = st_chk_result(STp, SRpnt);
- scsi_release_request(SRpnt);
+ st_release_request(SRpnt);
STbuffer->buffer_bytes -= STbuffer->writing;
STps = &(STp->ps[STp->partition]);
@@ -593,7 +593,7 @@ static int write_behind_check(struct scs
it messes up the block number). */
static int cross_eof(struct scsi_tape * STp, int forward)
{
- struct scsi_request *SRpnt;
+ struct st_request *SRpnt;
unsigned char cmd[MAX_COMMAND_SIZE];
cmd[0] = SPACE;
@@ -613,7 +613,7 @@ static int cross_eof(struct scsi_tape *
if (!SRpnt)
return (STp->buffer)->syscall_result;
- scsi_release_request(SRpnt);
+ st_release_request(SRpnt);
SRpnt = NULL;
if ((STp->buffer)->cmdstat.midlevel_result != 0)
@@ -630,7 +630,7 @@ static int flush_write_buffer(struct scs
int offset, transfer, blks;
int result;
unsigned char cmd[MAX_COMMAND_SIZE];
- struct scsi_request *SRpnt;
+ struct st_request *SRpnt;
struct st_partstat *STps;
result = write_behind_check(STp);
@@ -688,7 +688,7 @@ static int flush_write_buffer(struct scs
STp->dirty = 0;
(STp->buffer)->buffer_bytes = 0;
}
- scsi_release_request(SRpnt);
+ st_release_request(SRpnt);
SRpnt = NULL;
}
return result;
@@ -785,7 +785,7 @@ static int set_mode_densblk(struct scsi_
}
-/* Lock or unlock the drive door. Don't use when scsi_request allocated. */
+/* Lock or unlock the drive door. Don't use when st_request allocated. */
static int do_door_lock(struct scsi_tape * STp, int do_lock)
{
int retval, cmd;
@@ -844,7 +844,7 @@ static int test_ready(struct scsi_tape *
int attentions, waits, max_wait, scode;
int retval = CHKRES_READY, new_session = 0;
unsigned char cmd[MAX_COMMAND_SIZE];
- struct scsi_request *SRpnt = NULL;
+ struct st_request *SRpnt = NULL;
struct st_cmdstatus *cmdstatp = &STp->buffer->cmdstat;
max_wait = do_wait ? ST_BLOCK_SECONDS : 0;
@@ -903,7 +903,7 @@ static int test_ready(struct scsi_tape *
}
if (SRpnt != NULL)
- scsi_release_request(SRpnt);
+ st_release_request(SRpnt);
return retval;
}
@@ -918,7 +918,7 @@ static int check_tape(struct scsi_tape *
int i, retval, new_session = 0, do_wait;
unsigned char cmd[MAX_COMMAND_SIZE], saved_cleaning;
unsigned short st_flags = filp->f_flags;
- struct scsi_request *SRpnt = NULL;
+ struct st_request *SRpnt = NULL;
struct st_modedef *STm;
struct st_partstat *STps;
char *name = tape_name(STp);
@@ -993,7 +993,7 @@ static int check_tape(struct scsi_tape *
goto err_out;
}
- if (!SRpnt->sr_result && !STp->buffer->cmdstat.have_sense) {
+ if (!SRpnt->result && !STp->buffer->cmdstat.have_sense) {
STp->max_block = ((STp->buffer)->b_data[1] << 16) |
((STp->buffer)->b_data[2] << 8) | (STp->buffer)->b_data[3];
STp->min_block = ((STp->buffer)->b_data[4] << 8) |
@@ -1045,7 +1045,7 @@ static int check_tape(struct scsi_tape *
}
STp->drv_write_prot = ((STp->buffer)->b_data[2] & 0x80) != 0;
}
- scsi_release_request(SRpnt);
+ st_release_request(SRpnt);
SRpnt = NULL;
STp->inited = 1;
@@ -1196,7 +1196,7 @@ static int st_flush(struct file *filp)
{
int result = 0, result2;
unsigned char cmd[MAX_COMMAND_SIZE];
- struct scsi_request *SRpnt;
+ struct st_request *SRpnt;
struct scsi_tape *STp = filp->private_data;
struct st_modedef *STm = &(STp->modes[STp->current_mode]);
struct st_partstat *STps = &(STp->ps[STp->partition]);
@@ -1249,7 +1249,7 @@ static int st_flush(struct file *filp)
cmdstatp->sense_hdr.sense_key == RECOVERED_ERROR) &&
(!cmdstatp->remainder_valid || cmdstatp->uremainder64 == 0))) {
/* Write successful at EOM */
- scsi_release_request(SRpnt);
+ st_release_request(SRpnt);
SRpnt = NULL;
if (STps->drv_file >= 0)
STps->drv_file++;
@@ -1259,7 +1259,7 @@ static int st_flush(struct file *filp)
STps->eof = ST_FM;
}
else { /* Write error */
- scsi_release_request(SRpnt);
+ st_release_request(SRpnt);
SRpnt = NULL;
printk(KERN_ERR "%s: Error on write filemark.\n", name);
if (result == 0)
@@ -1400,11 +1400,11 @@ static int setup_buffering(struct scsi_t
i = STp->try_dio && try_rdio;
else
i = STp->try_dio && try_wdio;
+
if (i && ((unsigned long)buf & queue_dma_alignment(
STp->device->request_queue)) == 0) {
- i = st_map_user_pages(&(STbp->sg[0]), STbp->use_sg,
- (unsigned long)buf, count, (is_read ? READ : WRITE),
- STp->max_pfn);
+ i = sgl_map_user_pages(&(STbp->sg[0]), STbp->use_sg,
+ (unsigned long)buf, count, (is_read ? READ : WRITE));
if (i > 0) {
STbp->do_dio = i;
STbp->buffer_bytes = 0; /* can be used as transfer counter */
@@ -1472,7 +1472,7 @@ st_write(struct file *filp, const char _
int async_write;
unsigned char cmd[MAX_COMMAND_SIZE];
const char __user *b_point;
- struct scsi_request *SRpnt = NULL;
+ struct st_request *SRpnt = NULL;
struct scsi_tape *STp = filp->private_data;
struct st_modedef *STm;
struct st_partstat *STps;
@@ -1624,7 +1624,7 @@ st_write(struct file *filp, const char _
retval = STbp->syscall_result;
goto out;
}
- if (async_write) {
+ if (async_write && !STbp->syscall_result) {
STbp->writing = transfer;
STp->dirty = !(STbp->writing ==
STbp->buffer_bytes);
@@ -1698,7 +1698,7 @@ st_write(struct file *filp, const char _
} else {
count += do_count;
STps->drv_block = (-1); /* Too cautious? */
- retval = (-EIO);
+ retval = STbp->syscall_result;
}
}
@@ -1728,7 +1728,7 @@ st_write(struct file *filp, const char _
out:
if (SRpnt != NULL)
- scsi_release_request(SRpnt);
+ st_release_request(SRpnt);
release_buffering(STp);
up(&STp->lock);
@@ -1742,11 +1742,11 @@ st_write(struct file *filp, const char _
Does release user buffer mapping if it is set.
*/
static long read_tape(struct scsi_tape *STp, long count,
- struct scsi_request ** aSRpnt)
+ struct st_request ** aSRpnt)
{
int transfer, blks, bytes;
unsigned char cmd[MAX_COMMAND_SIZE];
- struct scsi_request *SRpnt;
+ struct st_request *SRpnt;
struct st_modedef *STm;
struct st_partstat *STps;
struct st_buffer *STbp;
@@ -1802,10 +1802,10 @@ static long read_tape(struct scsi_tape *
retval = 1;
DEBC(printk(ST_DEB_MSG "%s: Sense: %2x %2x %2x %2x %2x %2x %2x %2x\n",
name,
- SRpnt->sr_sense_buffer[0], SRpnt->sr_sense_buffer[1],
- SRpnt->sr_sense_buffer[2], SRpnt->sr_sense_buffer[3],
- SRpnt->sr_sense_buffer[4], SRpnt->sr_sense_buffer[5],
- SRpnt->sr_sense_buffer[6], SRpnt->sr_sense_buffer[7]));
+ SRpnt->sense[0], SRpnt->sense[1],
+ SRpnt->sense[2], SRpnt->sense[3],
+ SRpnt->sense[4], SRpnt->sense[5],
+ SRpnt->sense[6], SRpnt->sense[7]));
if (cmdstatp->have_sense) {
if (cmdstatp->sense_hdr.sense_key == BLANK_CHECK)
@@ -1835,7 +1835,7 @@ static long read_tape(struct scsi_tape *
}
STbp->buffer_bytes = bytes - transfer;
} else {
- scsi_release_request(SRpnt);
+ st_release_request(SRpnt);
SRpnt = *aSRpnt = NULL;
if (transfer == blks) { /* We did not get anything, error */
printk(KERN_NOTICE "%s: Incorrect block size.\n", name);
@@ -1929,7 +1929,7 @@ st_read(struct file *filp, char __user *
ssize_t retval = 0;
ssize_t i, transfer;
int special, do_dio = 0;
- struct scsi_request *SRpnt = NULL;
+ struct st_request *SRpnt = NULL;
struct scsi_tape *STp = filp->private_data;
struct st_modedef *STm;
struct st_partstat *STps;
@@ -2054,7 +2054,7 @@ st_read(struct file *filp, char __user *
out:
if (SRpnt != NULL) {
- scsi_release_request(SRpnt);
+ st_release_request(SRpnt);
SRpnt = NULL;
}
if (do_dio) {
@@ -2284,7 +2284,7 @@ static int st_set_options(struct scsi_ta
static int read_mode_page(struct scsi_tape *STp, int page, int omit_block_descs)
{
unsigned char cmd[MAX_COMMAND_SIZE];
- struct scsi_request *SRpnt = NULL;
+ struct st_request *SRpnt = NULL;
memset(cmd, 0, MAX_COMMAND_SIZE);
cmd[0] = MODE_SENSE;
@@ -2298,7 +2298,7 @@ static int read_mode_page(struct scsi_ta
if (SRpnt == NULL)
return (STp->buffer)->syscall_result;
- scsi_release_request(SRpnt);
+ st_release_request(SRpnt);
return (STp->buffer)->syscall_result;
}
@@ -2310,7 +2310,7 @@ static int write_mode_page(struct scsi_t
{
int pgo;
unsigned char cmd[MAX_COMMAND_SIZE];
- struct scsi_request *SRpnt = NULL;
+ struct st_request *SRpnt = NULL;
memset(cmd, 0, MAX_COMMAND_SIZE);
cmd[0] = MODE_SELECT;
@@ -2329,7 +2329,7 @@ static int write_mode_page(struct scsi_t
if (SRpnt == NULL)
return (STp->buffer)->syscall_result;
- scsi_release_request(SRpnt);
+ st_release_request(SRpnt);
return (STp->buffer)->syscall_result;
}
@@ -2412,7 +2412,7 @@ static int do_load_unload(struct scsi_ta
DEB( char *name = tape_name(STp); )
unsigned char cmd[MAX_COMMAND_SIZE];
struct st_partstat *STps;
- struct scsi_request *SRpnt;
+ struct st_request *SRpnt;
if (STp->ready != ST_READY && !load_code) {
if (STp->ready == ST_NO_TAPE)
@@ -2455,7 +2455,7 @@ static int do_load_unload(struct scsi_ta
return (STp->buffer)->syscall_result;
retval = (STp->buffer)->syscall_result;
- scsi_release_request(SRpnt);
+ st_release_request(SRpnt);
if (!retval) { /* SCSI command successful */
@@ -2503,7 +2503,7 @@ static int st_int_ioctl(struct scsi_tape
int ioctl_result;
int chg_eof = 1;
unsigned char cmd[MAX_COMMAND_SIZE];
- struct scsi_request *SRpnt;
+ struct st_request *SRpnt;
struct st_partstat *STps;
int fileno, blkno, at_sm, undone;
int datalen = 0, direction = DMA_NONE;
@@ -2757,7 +2757,7 @@ static int st_int_ioctl(struct scsi_tape
ioctl_result = (STp->buffer)->syscall_result;
if (!ioctl_result) { /* SCSI command successful */
- scsi_release_request(SRpnt);
+ st_release_request(SRpnt);
SRpnt = NULL;
STps->drv_block = blkno;
STps->drv_file = fileno;
@@ -2872,7 +2872,7 @@ static int st_int_ioctl(struct scsi_tape
/* Try the other possible state of Page Format if not
already tried */
STp->use_pf = !STp->use_pf | PF_TESTED;
- scsi_release_request(SRpnt);
+ st_release_request(SRpnt);
SRpnt = NULL;
return st_int_ioctl(STp, cmd_in, arg);
}
@@ -2882,7 +2882,7 @@ static int st_int_ioctl(struct scsi_tape
if (cmdstatp->sense_hdr.sense_key == BLANK_CHECK)
STps->eof = ST_EOD;
- scsi_release_request(SRpnt);
+ st_release_request(SRpnt);
SRpnt = NULL;
}
@@ -2898,7 +2898,7 @@ static int get_location(struct scsi_tape
{
int result;
unsigned char scmd[MAX_COMMAND_SIZE];
- struct scsi_request *SRpnt;
+ struct st_request *SRpnt;
DEB( char *name = tape_name(STp); )
if (STp->ready != ST_READY)
@@ -2944,7 +2944,7 @@ static int get_location(struct scsi_tape
DEBC(printk(ST_DEB_MSG "%s: Got tape pos. blk %d part %d.\n", name,
*block, *partition));
}
- scsi_release_request(SRpnt);
+ st_release_request(SRpnt);
SRpnt = NULL;
return result;
@@ -2961,7 +2961,7 @@ static int set_location(struct scsi_tape
unsigned int blk;
int timeout;
unsigned char scmd[MAX_COMMAND_SIZE];
- struct scsi_request *SRpnt;
+ struct st_request *SRpnt;
DEB( char *name = tape_name(STp); )
if (STp->ready != ST_READY)
@@ -3047,7 +3047,7 @@ static int set_location(struct scsi_tape
result = 0;
}
- scsi_release_request(SRpnt);
+ st_release_request(SRpnt);
SRpnt = NULL;
return result;
@@ -3577,7 +3577,7 @@ static long st_compat_ioctl(struct file
static struct st_buffer *
new_tape_buffer(int from_initialization, int need_dma, int max_sg)
{
- int i, got = 0, segs = 0;
+ int i, got = 0;
gfp_t priority;
struct st_buffer *tb;
@@ -3594,10 +3594,8 @@ static struct st_buffer *
return NULL;
}
memset(tb, 0, i);
- tb->frp_segs = tb->orig_frp_segs = segs;
+ tb->frp_segs = tb->orig_frp_segs = 0;
tb->use_sg = max_sg;
- if (segs > 0)
- tb->b_data = page_address(tb->sg[0].page);
tb->frp = (struct st_buf_fragment *)(&(tb->sg[0]) + max_sg);
tb->in_use = 1;
@@ -3628,7 +3626,7 @@ static int enlarge_buffer(struct st_buff
priority = GFP_KERNEL | __GFP_NOWARN;
if (need_dma)
priority |= GFP_DMA;
- for (b_size = PAGE_SIZE, order=0;
+ for (b_size = PAGE_SIZE, order=0; order <= 6 &&
b_size < new_size - STbuffer->buffer_size;
order++, b_size *= 2)
; /* empty */
@@ -3882,7 +3880,6 @@ static int st_probe(struct device *dev)
struct st_buffer *buffer;
int i, j, mode, dev_num, error;
char *stp;
- u64 bounce_limit;
if (SDp->type != TYPE_TAPE)
return -ENODEV;
@@ -3994,11 +3991,6 @@ static int st_probe(struct device *dev)
tpnt->long_timeout = ST_LONG_TIMEOUT;
tpnt->try_dio = try_direct_io && !SDp->host->unchecked_isa_dma;
- bounce_limit = scsi_calculate_bounce_limit(SDp->host) >> PAGE_SHIFT;
- if (bounce_limit > ULONG_MAX)
- bounce_limit = ULONG_MAX;
- tpnt->max_pfn = bounce_limit;
-
for (i = 0; i < ST_NBR_MODES; i++) {
STm = &(tpnt->modes[i]);
STm->defined = 0;
@@ -4077,9 +4069,9 @@ static int st_probe(struct device *dev)
sdev_printk(KERN_WARNING, SDp,
"Attached scsi tape %s", tape_name(tpnt));
- printk(KERN_WARNING "%s: try direct i/o: %s (alignment %d B), max page reachable by HBA %lu\n",
+ printk(KERN_WARNING "%s: try direct i/o: %s (alignment %d B)\n",
tape_name(tpnt), tpnt->try_dio ? "yes" : "no",
- queue_dma_alignment(SDp->request_queue) + 1, tpnt->max_pfn);
+ queue_dma_alignment(SDp->request_queue) + 1);
return 0;
@@ -4408,34 +4400,6 @@ static void do_create_class_files(struct
return;
}
-
-/* Pin down user pages and put them into a scatter gather list. Returns <= 0 if
- - mapping of all pages not successful
- - any page is above max_pfn
- (i.e., either completely successful or fails)
-*/
-static int st_map_user_pages(struct scatterlist *sgl, const unsigned int max_pages,
- unsigned long uaddr, size_t count, int rw,
- unsigned long max_pfn)
-{
- int i, nr_pages;
-
- nr_pages = sgl_map_user_pages(sgl, max_pages, uaddr, count, rw);
- if (nr_pages <= 0)
- return nr_pages;
-
- for (i=0; i < nr_pages; i++) {
- if (page_to_pfn(sgl[i].page) > max_pfn)
- goto out_unmap;
- }
- return nr_pages;
-
- out_unmap:
- sgl_unmap_user_pages(sgl, nr_pages, 0);
- return 0;
-}
-
-
/* The following functions may be useful for a larger audience. */
static int sgl_map_user_pages(struct scatterlist *sgl, const unsigned int max_pages,
unsigned long uaddr, size_t count, int rw)
diff --git a/drivers/scsi/st.h b/drivers/scsi/st.h
index 790acac..4112090 100644
--- a/drivers/scsi/st.h
+++ b/drivers/scsi/st.h
@@ -4,6 +4,7 @@
#include <linux/completion.h>
#include <linux/kref.h>
+#include <scsi/scsi_cmnd.h>
/* Descriptor for analyzed sense data */
struct st_cmdstatus {
@@ -17,6 +18,17 @@ struct st_cmdstatus {
u8 deferred;
};
+struct scsi_tape;
+
+/* scsi tape command */
+struct st_request {
+ unsigned char cmd[MAX_COMMAND_SIZE];
+ unsigned char sense[SCSI_SENSE_BUFFERSIZE];
+ int result;
+ struct scsi_tape *stp;
+ struct completion *waiting;
+};
+
/* The tape buffer descriptor. */
struct st_buffer {
unsigned char in_use;
@@ -28,7 +40,7 @@ struct st_buffer {
int read_pointer;
int writing;
int syscall_result;
- struct scsi_request *last_SRpnt;
+ struct st_request *last_SRpnt;
struct st_cmdstatus cmdstat;
unsigned char *b_data;
unsigned short use_sg; /* zero or max number of s/g segments for this adapter */
^ permalink raw reply related [flat|nested] 12+ messages in thread* Re: [PATCH 5/10] convert st to use scsi_execute_async
2005-11-08 10:06 [PATCH 5/10] convert st to use scsi_execute_async Mike Christie
@ 2005-11-12 17:03 ` Kai Makisara
2005-11-12 19:12 ` Mike Christie
0 siblings, 1 reply; 12+ messages in thread
From: Kai Makisara @ 2005-11-12 17:03 UTC (permalink / raw)
To: Mike Christie; +Cc: Jens Axboe, linux-scsi
On Tue, 8 Nov 2005, Mike Christie wrote:
> convert st to always send scatterlists and kill scsi_request
> usage.
>
> This is the same as last time as it was posted, but with Kai's patches
> merged and we now pass the bytes value to scsi_execute_async.
>
This does not work "out of the box". The changes in st are almost the same
ones than before but the world around st has changed ;-) This has revealed
at least one bug in st that is fixed by the patch at the end of this
message. After this patch some simple tests succeed but there seem to be
still problems with larger requests. I don't have time to continue
debugging just now but I should be able to post more results during this
weekend.
Kai
--- linux-2.6.14-blk1/drivers/scsi/st.c.org 2005-11-09 22:36:56.000000000 +0200
+++ linux-2.6.14-blk1/drivers/scsi/st.c 2005-11-12 17:59:22.000000000 +0200
@@ -3666,6 +3666,7 @@
}
STbuffer->frp_segs = STbuffer->orig_frp_segs;
STbuffer->frp_sg_current = 0;
+ STbuffer->sg_segs = 0;
}
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 5/10] convert st to use scsi_execute_async
2005-11-12 17:03 ` Kai Makisara
@ 2005-11-12 19:12 ` Mike Christie
2005-11-12 19:54 ` Mike Christie
0 siblings, 1 reply; 12+ messages in thread
From: Mike Christie @ 2005-11-12 19:12 UTC (permalink / raw)
To: Kai Makisara; +Cc: Jens Axboe, linux-scsi
Kai Makisara wrote:
> On Tue, 8 Nov 2005, Mike Christie wrote:
>
>
>>convert st to always send scatterlists and kill scsi_request
>>usage.
>>
>>This is the same as last time as it was posted, but with Kai's patches
>>merged and we now pass the bytes value to scsi_execute_async.
>>
>
> This does not work "out of the box". The changes in st are almost the same
> ones than before but the world around st has changed ;-) This has revealed
> at least one bug in st that is fixed by the patch at the end of this
ok thanks.
> message. After this patch some simple tests succeed but there seem to be
> still problems with larger requests. I don't have time to continue
> debugging just now but I should be able to post more results during this
> weekend.
>
Was there a oops or lockup or any debug output you can send me? I will
try some more large request tests with scsi_debug. You also have to
compile your kernel with SCSI_MAX_PHYS_SEGMENTS == 255 to get larger
requests now.
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 5/10] convert st to use scsi_execute_async
2005-11-12 19:12 ` Mike Christie
@ 2005-11-12 19:54 ` Mike Christie
2005-11-13 8:04 ` Kai Makisara
0 siblings, 1 reply; 12+ messages in thread
From: Mike Christie @ 2005-11-12 19:54 UTC (permalink / raw)
To: Mike Christie; +Cc: Kai Makisara, Jens Axboe, linux-scsi, dougg
Mike Christie wrote:
> Kai Makisara wrote:
>
>> On Tue, 8 Nov 2005, Mike Christie wrote:
>>
>>
>>> convert st to always send scatterlists and kill scsi_request
>>> usage.
>>>
>>> This is the same as last time as it was posted, but with Kai's patches
>>> merged and we now pass the bytes value to scsi_execute_async.
>>>
>>
>> This does not work "out of the box". The changes in st are almost the
>> same ones than before but the world around st has changed ;-) This has
>> revealed at least one bug in st that is fixed by the patch at the end
>> of this
>
>
> ok thanks.
>
>> message. After this patch some simple tests succeed but there seem to
>> be still problems with larger requests. I don't have time to continue
>> debugging just now but I should be able to post more results during
>> this weekend.
>>
>
> Was there a oops or lockup or any debug output you can send me? I will
> try some more large request tests with scsi_debug. You also have to
> compile your kernel with SCSI_MAX_PHYS_SEGMENTS == 255 to get larger
> requests now.
Oh yeah I put the patches Christoph asked me to make, and changes that
Jens and Pat Mansfield asked for, and a patch to increase
SCSI_MAX_PHYS_SEGMENTS here
http://www.cs.wisc.edu/~michaelc/block/use-sg/v11/
They were made against a scsi-misc tree I got today. But I was not
actually able to test against that tree becuase the current git trees
(scsi-misc, linux-2.6.git, etc) do not boot for me.
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 5/10] convert st to use scsi_execute_async
2005-11-12 19:54 ` Mike Christie
@ 2005-11-13 8:04 ` Kai Makisara
2005-11-13 17:07 ` Doug Ledford
0 siblings, 1 reply; 12+ messages in thread
From: Kai Makisara @ 2005-11-13 8:04 UTC (permalink / raw)
To: Mike Christie; +Cc: Jens Axboe, linux-scsi, Douglas Gilbert
On Sat, 12 Nov 2005, Mike Christie wrote:
> Mike Christie wrote:
> > Kai Makisara wrote:
> >
> > > On Tue, 8 Nov 2005, Mike Christie wrote:
> > >
> > >
> > > > convert st to always send scatterlists and kill scsi_request
> > > > usage.
> > > >
> > > > This is the same as last time as it was posted, but with Kai's patches
> > > > merged and we now pass the bytes value to scsi_execute_async.
> > > >
> > >
> > > This does not work "out of the box". The changes in st are almost the same
> > > ones than before but the world around st has changed ;-) This has revealed
> > > at least one bug in st that is fixed by the patch at the end of this
> >
> >
> > ok thanks.
> >
> > > message. After this patch some simple tests succeed but there seem to be
> > > still problems with larger requests. I don't have time to continue
> > > debugging just now but I should be able to post more results during this
> > > weekend.
> > >
> >
> > Was there a oops or lockup or any debug output you can send me? I will try
> > some more large request tests with scsi_debug. You also have to compile your
> > kernel with SCSI_MAX_PHYS_SEGMENTS == 255 to get larger requests now.
>
It was an oops in sgl_unmap_user_pages(). The reason is this:
/* XXX: just for debug. Remove when PageReserved is removed */
BUG_ON(PageReserved(page));
I was using /dev/zero as input and it triggers this. When I used a file as
input, this did not trigger. Should this BUG_ON be removed?
In the same log I noticed that there was another ->sg_segs inconsistency.
Also, the field ->last_SRpnt was not reset when scsi_execute_async()
failed. This caused the error message "Async command already active"
later and prevented proper close.
While doing the changes, I noticed that the current code (since
2.6.0-test4) does not set the pages dirty when reading with direct i/o.
All of these st problems (including the one I sent earlier) are fixed in
the patch at the end of this message. These fixes should probably be
included already in 2.6.15.
After these fixes, the tape seems to operate as expected. Without other
changes, the largest block size with sym53c896 SCSI adapter is 384 kB. The
maximum number of sg segments is set to 96 and clustering is disabled in
the driver. 96 x 4 kB = 384 kB. OK.
I enabled clustering and set max_sectors to 10000 in the SCSI HBA driver.
Now the block size limit is 5000 kB as expected.
>
> Oh yeah I put the patches Christoph asked me to make, and changes that Jens
> and Pat Mansfield asked for, and a patch to increase SCSI_MAX_PHYS_SEGMENTS
> here http://www.cs.wisc.edu/~michaelc/block/use-sg/v11/
>
> They were made against a scsi-misc tree I got today. But I was not actually
> able to test against that tree becuase the current git trees (scsi-misc,
> linux-2.6.git, etc) do not boot for me.
>
I will test the new version together with my changes later today (if it
boots for me).
Kai
--------------------------------------8<------------------------------------
--- linux-2.6.14-blk1/drivers/scsi/st.c.org 2005-11-09 22:36:56.000000000 +0200
+++ linux-2.6.14-blk1/drivers/scsi/st.c 2005-11-13 09:30:11.000000000 +0200
@@ -509,9 +509,11 @@
if (scsi_execute_async(STp->device, cmd, direction,
&((STp->buffer)->sg[0]), bytes, (STp->buffer)->sg_segs,
- timeout, retries, SRpnt, st_sleep_done, GFP_KERNEL))
+ timeout, retries, SRpnt, st_sleep_done, GFP_KERNEL)) {
/* could not allocate the buffer or request was too large */
(STp->buffer)->syscall_result = (-EBUSY);
+ (STp->buffer)->last_SRpnt = NULL;
+ }
else if (do_wait) {
wait_for_completion(waiting);
SRpnt->waiting = NULL;
@@ -1447,14 +1449,15 @@
/* Can be called more than once after each setup_buffer() */
-static void release_buffering(struct scsi_tape *STp)
+static void release_buffering(struct scsi_tape *STp, int is_read)
{
struct st_buffer *STbp;
STbp = STp->buffer;
if (STbp->do_dio) {
- sgl_unmap_user_pages(&(STbp->sg[0]), STbp->do_dio, 0);
+ sgl_unmap_user_pages(&(STbp->sg[0]), STbp->do_dio, is_read);
STbp->do_dio = 0;
+ STbp->sg_segs = 0;
}
}
@@ -1727,7 +1730,7 @@
out:
if (SRpnt != NULL)
st_release_request(SRpnt);
- release_buffering(STp);
+ release_buffering(STp, 0);
up(&STp->lock);
return retval;
@@ -1785,7 +1788,7 @@
SRpnt = *aSRpnt;
SRpnt = st_do_scsi(SRpnt, STp, cmd, bytes, DMA_FROM_DEVICE,
STp->device->timeout, MAX_RETRIES, 1);
- release_buffering(STp);
+ release_buffering(STp, 1);
*aSRpnt = SRpnt;
if (!SRpnt)
return STbp->syscall_result;
@@ -2056,7 +2059,7 @@
SRpnt = NULL;
}
if (do_dio) {
- release_buffering(STp);
+ release_buffering(STp, 1);
STbp->buffer_bytes = 0;
}
up(&STp->lock);
@@ -3666,6 +3669,7 @@
}
STbuffer->frp_segs = STbuffer->orig_frp_segs;
STbuffer->frp_sg_current = 0;
+ STbuffer->sg_segs = 0;
}
^ permalink raw reply [flat|nested] 12+ messages in thread* Re: [PATCH 5/10] convert st to use scsi_execute_async
2005-11-13 8:04 ` Kai Makisara
@ 2005-11-13 17:07 ` Doug Ledford
2005-11-13 18:08 ` Kai Makisara
2005-11-14 7:55 ` Douglas Gilbert
0 siblings, 2 replies; 12+ messages in thread
From: Doug Ledford @ 2005-11-13 17:07 UTC (permalink / raw)
To: Kai Makisara; +Cc: Mike Christie, Jens Axboe, linux-scsi, Douglas Gilbert
[-- Attachment #1: Type: text/plain, Size: 1788 bytes --]
Kai Makisara wrote:
> On Sat, 12 Nov 2005, Mike Christie wrote:
I noticed that these patches still have the same bug that the 2.4 kernel
st driver has, namely the holding of the st's SCSI request struct until
write_behind_check is called. This behavior is responsible for at least
two bugs with tape systems under 2.4 that we've fixed. The first bug is
that if you perform a write to a tape device that involves an async
write behind request, then attempt to access the device via the sg
mechanism without performing any intervening read or ioctl commands on
the st device, the sg access will hang. This only happens on SCSI
controllers that set the cmd_per_lun value == 1 (eg. mptscsih). In
order to replicate this problem you need one application writing to the
tape device, then pausing, then something as simple as attempting to do
an INQUIRY to the tape while the writer is paused causes the hang. This
happens at least with NetBackup, possibly with others as well. The
second bug is related to multiple tape usage on the same system. It
only happens on x86_64, not i686, but with multiple tapes in use the
system eventually attempts to dma map a null pointer resulting in a
BUG(). I didn't root cause the dma mapping issue, but I did verify that
once the initial bug was fixed, the dma mapping bug went away as well
(either because whatever race window existed was reduced to so small
that we no longer hit it or the problem was in fact fixed). The patch
we used to solve the problem is attached. As a side note, holding on to
a command without any upper bound on when it will be released is simply
a *bad* idea. Get the information you need from the command and free it.
--
Doug Ledford <dledford@redhat.com>
http://people.redhat.com/dledford
[-- Attachment #2: st-tape.patch --]
[-- Type: text/x-patch, Size: 1089 bytes --]
--- drivers/scsi-orig/st.c 2005-09-14 17:44:16.000000000 -0400
+++ drivers/scsi/st.c 2005-09-15 17:36:52.000000000 -0400
@@ -353,7 +353,14 @@
(STp->buffer)->last_SRpnt = SCpnt->sc_request;
DEB( STp->write_pending = 0; )
- complete(SCpnt->request.waiting);
+ if ((STp->buffer)->writing) {
+ /* This is a write-behind request, we need to release the
+ * scsi request struct */
+ (STp->buffer)->syscall_result = st_chk_result(STp, SCpnt->sc_request);
+ SCpnt->sc_request->sr_request.waiting = NULL;
+ scsi_release_request(SCpnt->sc_request);
+ }
+ complete(&(STp->wait));
}
@@ -423,10 +430,6 @@
) /* end DEB */
wait_for_completion(&(STp->wait));
- (STp->buffer)->last_SRpnt->sr_request.waiting = NULL;
-
- (STp->buffer)->syscall_result = st_chk_result(STp, (STp->buffer)->last_SRpnt);
- scsi_release_request((STp->buffer)->last_SRpnt);
STbuffer->buffer_bytes -= STbuffer->writing;
STps = &(STp->ps[STp->partition]);
^ permalink raw reply [flat|nested] 12+ messages in thread* Re: [PATCH 5/10] convert st to use scsi_execute_async
2005-11-13 17:07 ` Doug Ledford
@ 2005-11-13 18:08 ` Kai Makisara
2005-11-13 19:49 ` Doug Ledford
2005-11-14 7:55 ` Douglas Gilbert
1 sibling, 1 reply; 12+ messages in thread
From: Kai Makisara @ 2005-11-13 18:08 UTC (permalink / raw)
To: Doug Ledford; +Cc: Mike Christie, Jens Axboe, linux-scsi, Douglas Gilbert
On Sun, 13 Nov 2005, Doug Ledford wrote:
> Kai Makisara wrote:
> > On Sat, 12 Nov 2005, Mike Christie wrote:
>
> I noticed that these patches still have the same bug that the 2.4 kernel st
> driver has, namely the holding of the st's SCSI request struct until
> write_behind_check is called. This behavior is responsible for at least two
> bugs with tape systems under 2.4 that we've fixed. The first bug is that if
> you perform a write to a tape device that involves an async write behind
> request, then attempt to access the device via the sg mechanism without
> performing any intervening read or ioctl commands on the st device, the sg
> access will hang. This only happens on SCSI controllers that set the
> cmd_per_lun value == 1 (eg. mptscsih). In order to replicate this problem you
> need one application writing to the tape device, then pausing, then something
> as simple as attempting to do an INQUIRY to the tape while the writer is
> paused causes the hang. This happens at least with NetBackup, possibly with
> others as well. The second bug is related to multiple tape usage on the same
> system. It only happens on x86_64, not i686, but with multiple tapes in use
> the system eventually attempts to dma map a null pointer resulting in a BUG().
> I didn't root cause the dma mapping issue, but I did verify that once the
> initial bug was fixed, the dma mapping bug went away as well (either because
> whatever race window existed was reduced to so small that we no longer hit it
> or the problem was in fact fixed). The patch we used to solve the problem is
> attached. As a side note, holding on to a command without any upper bound on
> when it will be released is simply a *bad* idea. Get the information you need
> from the command and free it.
>
You are complaining about one feature and reporting a possible bug without
much information. It seems that you (RedHat) have been sitting on this
report for a long time and have shipped the fix for your own clients only.
Not very nice!
Originally there was a reason why the SCSI request struct was held until
write_behind_check. The reason was to execute minimum amount of code in
interrupt context. For a very long time, scsi_done has been called outside
interrupt and this reason is not valid any more. The reason why this has
not changed is that nobody has asked for it.
I don't see any reason why the change you suggest should not be done. Does
anyone else? If nobody complains, I will do the change for 2.6.16.
The dma bug you are talking about is interesting but I don't have any idea
why it is happening. Releasing the SCSI request earlier should not have
anything to do with that.
Mixing sg access with ULD operation is almost always a bad idea.
Thanks for the report and fix.
--
Kai
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 5/10] convert st to use scsi_execute_async
2005-11-13 18:08 ` Kai Makisara
@ 2005-11-13 19:49 ` Doug Ledford
2005-11-13 22:12 ` Kai Makisara
0 siblings, 1 reply; 12+ messages in thread
From: Doug Ledford @ 2005-11-13 19:49 UTC (permalink / raw)
To: Kai Makisara; +Cc: Mike Christie, Jens Axboe, linux-scsi, Douglas Gilbert
Kai Makisara wrote:
> On Sun, 13 Nov 2005, Doug Ledford wrote:
>
>
>>Kai Makisara wrote:
>>
>>>On Sat, 12 Nov 2005, Mike Christie wrote:
>>
>>I noticed that these patches still have the same bug that the 2.4 kernel st
>>driver has, namely the holding of the st's SCSI request struct until
>>write_behind_check is called. This behavior is responsible for at least two
>>bugs with tape systems under 2.4 that we've fixed. The first bug is that if
>>you perform a write to a tape device that involves an async write behind
>>request, then attempt to access the device via the sg mechanism without
>>performing any intervening read or ioctl commands on the st device, the sg
>>access will hang. This only happens on SCSI controllers that set the
>>cmd_per_lun value == 1 (eg. mptscsih). In order to replicate this problem you
>>need one application writing to the tape device, then pausing, then something
>>as simple as attempting to do an INQUIRY to the tape while the writer is
>>paused causes the hang. This happens at least with NetBackup, possibly with
>>others as well. The second bug is related to multiple tape usage on the same
>>system. It only happens on x86_64, not i686, but with multiple tapes in use
>>the system eventually attempts to dma map a null pointer resulting in a BUG().
>>I didn't root cause the dma mapping issue, but I did verify that once the
>>initial bug was fixed, the dma mapping bug went away as well (either because
>>whatever race window existed was reduced to so small that we no longer hit it
>>or the problem was in fact fixed). The patch we used to solve the problem is
>>attached. As a side note, holding on to a command without any upper bound on
>>when it will be released is simply a *bad* idea. Get the information you need
>>from the command and free it.
>>
>
>
> You are complaining about one feature and reporting a possible bug without
> much information.
I was going to put in a simple reproducer, but the reproducer was based
on code from someone else and I don't know the redistribution status of
it since it wasn't my bug report.
> It seems that you (RedHat) have been sitting on this
> report for a long time and have shipped the fix for your own clients only.
> Not very nice!
Not true at all. The fix hasn't even shipped yet except in a HOTFIX
kernel, it will be released in our next update.
> Originally there was a reason why the SCSI request struct was held until
> write_behind_check. The reason was to execute minimum amount of code in
> interrupt context. For a very long time, scsi_done has been called outside
> interrupt and this reason is not valid any more.
For 2.4 that's not entirely true. The old error handler drivers in 2.4
still do their work in interrupt context, and the driver I referenced,
mptscsih, is an old error handler driver.
> The reason why this has
> not changed is that nobody has asked for it.
>
> I don't see any reason why the change you suggest should not be done. Does
> anyone else? If nobody complains, I will do the change for 2.6.16.
>
> The dma bug you are talking about is interesting but I don't have any idea
> why it is happening. Releasing the SCSI request earlier should not have
> anything to do with that.
I never isolated it down to a root cause. I suspect the more NUMA like
nature of x86_64 compared to i686 SMP has something to do with it.
> Mixing sg access with ULD operation is almost always a bad idea.
Maybe. If you are talking about doing an sg write operation while also
doing st writes, then yeah, that's bad. But there's no inherent reason
that something like INQUIRY or some status command can't be intermixed
into the overall command stream via sg, and I would consider it a bug in
the core scsi layer if it didn't handle that properly.
> Thanks for the report and fix.
No problem.
--
Doug Ledford <dledford@redhat.com>
http://people.redhat.com/dledford
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 5/10] convert st to use scsi_execute_async
2005-11-13 19:49 ` Doug Ledford
@ 2005-11-13 22:12 ` Kai Makisara
0 siblings, 0 replies; 12+ messages in thread
From: Kai Makisara @ 2005-11-13 22:12 UTC (permalink / raw)
To: Doug Ledford; +Cc: Mike Christie, Jens Axboe, linux-scsi, Douglas Gilbert
On Sun, 13 Nov 2005, Doug Ledford wrote:
> Kai Makisara wrote:
...
> > It seems that you (RedHat) have been sitting on this report for a long time
> > and have shipped the fix for your own clients only. Not very nice!
>
> Not true at all. The fix hasn't even shipped yet except in a HOTFIX kernel,
> it will be released in our next update.
>
OK. I do apologize. I misinterpreted your message.
--
Kai
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 5/10] convert st to use scsi_execute_async
2005-11-13 17:07 ` Doug Ledford
2005-11-13 18:08 ` Kai Makisara
@ 2005-11-14 7:55 ` Douglas Gilbert
2005-11-14 15:15 ` Jens Axboe
2005-11-14 15:39 ` Doug Ledford
1 sibling, 2 replies; 12+ messages in thread
From: Douglas Gilbert @ 2005-11-14 7:55 UTC (permalink / raw)
To: Doug Ledford; +Cc: Kai Makisara, Mike Christie, Jens Axboe, linux-scsi
Doug Ledford wrote:
> Kai Makisara wrote:
>
>> On Sat, 12 Nov 2005, Mike Christie wrote:
>
>
> I noticed that these patches still have the same bug that the 2.4 kernel
> st driver has, namely the holding of the st's SCSI request struct until
> write_behind_check is called. This behavior is responsible for at least
> two bugs with tape systems under 2.4 that we've fixed. The first bug is
> that if you perform a write to a tape device that involves an async
> write behind request, then attempt to access the device via the sg
> mechanism without performing any intervening read or ioctl commands on
> the st device, the sg access will hang. This only happens on SCSI
> controllers that set the cmd_per_lun value == 1 (eg. mptscsih). In
> order to replicate this problem you need one application writing to the
> tape device, then pausing, then something as simple as attempting to do
> an INQUIRY to the tape while the writer is paused causes the hang. This
> happens at least with NetBackup, possibly with others as well. The
> second bug is related to multiple tape usage on the same system. It
> only happens on x86_64, not i686, but with multiple tapes in use the
> system eventually attempts to dma map a null pointer resulting in a
> BUG(). I didn't root cause the dma mapping issue, but I did verify that
> once the initial bug was fixed, the dma mapping bug went away as well
> (either because whatever race window existed was reduced to so small
> that we no longer hit it or the problem was in fact fixed). The patch
> we used to solve the problem is attached. As a side note, holding on to
> a command without any upper bound on when it will be released is simply
> a *bad* idea. Get the information you need from the command and free it.
Doug,
It might indeed be a bad idea, but there is the odd SCSI
command that is defined that way. I wonder if any cd/dvd
drive implements the GET EVENT STATUS NOTIFICATION command
in asynchronous notification mode (see MMC-4)?
INQUIRY and REPORT LUNS have implicit "head of queue"
task attribute and should not be blocked by the scsi
subsystem in response to a TASK SET FULL status. In the
case of the mptscsih driver, the limit seems to be in
the HBA.
OTOH while formatting SCSI disks in foreground
(immed=0) I noticed that sending an innocent INQUIRY
or TEST UNIT READY can be fatal (for the format). This
occurred because the disk being formatted didn't respond
to the INQUIRY (perhaps it should have returned BUSY),
the INQUIRY timed out and the disk ended up being reset
which aborted the format.
In some cases I think a "fire and forget" timeout would
be useful: when the timeout goes off, just report it
back to the caller, clean up resources, but do _not_
start issuing, a command abort escalating to a lu/target/bus
reset. If the LLD does see a response to that command later,
then it just consigns it to the bit bucket.
Doug Gilbert
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 5/10] convert st to use scsi_execute_async
2005-11-14 7:55 ` Douglas Gilbert
@ 2005-11-14 15:15 ` Jens Axboe
2005-11-14 15:39 ` Doug Ledford
1 sibling, 0 replies; 12+ messages in thread
From: Jens Axboe @ 2005-11-14 15:15 UTC (permalink / raw)
To: Douglas Gilbert; +Cc: Doug Ledford, Kai Makisara, Mike Christie, linux-scsi
On Mon, Nov 14 2005, Douglas Gilbert wrote:
> It might indeed be a bad idea, but there is the odd SCSI
> command that is defined that way. I wonder if any cd/dvd
> drive implements the GET EVENT STATUS NOTIFICATION command
> in asynchronous notification mode (see MMC-4)?
I don't think so, at least I've never heard of it actually being
implemented. The async notification requires TCQ support in the cd/dvd
device, I've never seen that either.
--
Jens Axboe
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 5/10] convert st to use scsi_execute_async
2005-11-14 7:55 ` Douglas Gilbert
2005-11-14 15:15 ` Jens Axboe
@ 2005-11-14 15:39 ` Doug Ledford
1 sibling, 0 replies; 12+ messages in thread
From: Doug Ledford @ 2005-11-14 15:39 UTC (permalink / raw)
To: dougg; +Cc: Kai Makisara, Mike Christie, Jens Axboe, linux-scsi
Douglas Gilbert wrote:
> Doug Ledford wrote:
>
>>Kai Makisara wrote:
>>
>>
>>>On Sat, 12 Nov 2005, Mike Christie wrote:
>>
>>
>>I noticed that these patches still have the same bug that the 2.4 kernel
>>st driver has, namely the holding of the st's SCSI request struct until
>>write_behind_check is called. This behavior is responsible for at least
>>two bugs with tape systems under 2.4 that we've fixed. The first bug is
>>that if you perform a write to a tape device that involves an async
>>write behind request, then attempt to access the device via the sg
>>mechanism without performing any intervening read or ioctl commands on
>>the st device, the sg access will hang. This only happens on SCSI
>>controllers that set the cmd_per_lun value == 1 (eg. mptscsih). In
>>order to replicate this problem you need one application writing to the
>>tape device, then pausing, then something as simple as attempting to do
>>an INQUIRY to the tape while the writer is paused causes the hang. This
>>happens at least with NetBackup, possibly with others as well. The
>>second bug is related to multiple tape usage on the same system. It
>>only happens on x86_64, not i686, but with multiple tapes in use the
>>system eventually attempts to dma map a null pointer resulting in a
>>BUG(). I didn't root cause the dma mapping issue, but I did verify that
>>once the initial bug was fixed, the dma mapping bug went away as well
>>(either because whatever race window existed was reduced to so small
>>that we no longer hit it or the problem was in fact fixed). The patch
>>we used to solve the problem is attached. As a side note, holding on to
>>a command without any upper bound on when it will be released is simply
>>a *bad* idea. Get the information you need from the command and free it.
>
>
> Doug,
> It might indeed be a bad idea, but there is the odd SCSI
> command that is defined that way. I wonder if any cd/dvd
> drive implements the GET EVENT STATUS NOTIFICATION command
> in asynchronous notification mode (see MMC-4)?
>
> INQUIRY and REPORT LUNS have implicit "head of queue"
> task attribute and should not be blocked by the scsi
> subsystem in response to a TASK SET FULL status. In the
> case of the mptscsih driver, the limit seems to be in
> the HBA.
No, the card and driver are doing exactly as they are supposed to. When
the st driver holds onto the command until write_behind_check is called,
it keeps the device's active and busy counters at 1. It isn't until the
scsi_release_request is called that scsi_release_command gets called,
decrementing the busy count. As long as the busy count is 1, and
cmd_per_lun on the host is also 1, scsi_request_fn won't send any other
commands even though this one is complete.
> OTOH while formatting SCSI disks in foreground
> (immed=0) I noticed that sending an innocent INQUIRY
> or TEST UNIT READY can be fatal (for the format). This
> occurred because the disk being formatted didn't respond
> to the INQUIRY (perhaps it should have returned BUSY),
> the INQUIRY timed out and the disk ended up being reset
> which aborted the format.
Which would be a valid case for holding onto the command then, but a
completed write isn't. And if you held onto the command the same way
the st driver does, no other commands would ever time out because they
never make it out of the device queue to the driver.
> In some cases I think a "fire and forget" timeout would
> be useful: when the timeout goes off, just report it
> back to the caller, clean up resources, but do _not_
> start issuing, a command abort escalating to a lu/target/bus
> reset. If the LLD does see a response to that command later,
> then it just consigns it to the bit bucket.
>
> Doug Gilbert
--
Doug Ledford <dledford@redhat.com>
http://people.redhat.com/dledford
^ permalink raw reply [flat|nested] 12+ messages in thread
end of thread, other threads:[~2005-11-14 15:14 UTC | newest]
Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-11-08 10:06 [PATCH 5/10] convert st to use scsi_execute_async Mike Christie
2005-11-12 17:03 ` Kai Makisara
2005-11-12 19:12 ` Mike Christie
2005-11-12 19:54 ` Mike Christie
2005-11-13 8:04 ` Kai Makisara
2005-11-13 17:07 ` Doug Ledford
2005-11-13 18:08 ` Kai Makisara
2005-11-13 19:49 ` Doug Ledford
2005-11-13 22:12 ` Kai Makisara
2005-11-14 7:55 ` Douglas Gilbert
2005-11-14 15:15 ` Jens Axboe
2005-11-14 15:39 ` Doug Ledford
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).