* [PATCH 4/5] convert st to use scsi_execte_async
@ 2005-09-16 4:39 Mike Christie
2005-09-17 11:57 ` Kai Makisara
0 siblings, 1 reply; 27+ messages in thread
From: Mike Christie @ 2005-09-16 4:39 UTC (permalink / raw)
To: Kai.Makisara, linux-scsi
Convert st to use scsi_execute_async.
I made scsi_execute_async take a scatterlist so there was not much todo.
I allocate st_request with GFP_KERNEL but will send another patch to
allocate it within the st_buffer if you want.
The strange thing is that it looks like st could have always been
sending down scatterlists before. This (bytes > (STp->buffer)->frp[0].length)
test here:
SRpnt->sr_use_sg = STp->buffer->do_dio || (bytes > (STp->buffer)->frp[0].length);
if (SRpnt->sr_use_sg) {
if (!STp->buffer->do_dio)
buf_to_sg(STp->buffer, bytes);
SRpnt->sr_use_sg = (STp->buffer)->sg_segs;
bp = (char *) &((STp->buffer)->sg[0]);
} else
bp = (STp->buffer)->b_data;
looks like st was sending down use_sg=0 on purpose for the
one segment case. Is this a leftover from old scsi code? If so
it does not look like there was much to change, so I merged
your patch except the max_pfn part since I reverted all my
related changes.
Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -50,7 +50,6 @@ static char *verstr = "20050830";
#include <scsi/scsi_eh.h>
#include <scsi/scsi_host.h>
#include <scsi/scsi_ioctl.h>
-#include <scsi/scsi_request.h>
#include <scsi/sg.h>
@@ -188,8 +187,6 @@ static int from_buffer(struct st_buffer
static void move_buffer_data(struct st_buffer *, int);
static void buf_to_sg(struct st_buffer *, unsigned int);
-static int st_map_user_pages(struct scatterlist *, const unsigned int,
- unsigned long, size_t, int, unsigned long);
static int sgl_map_user_pages(struct scatterlist *, const unsigned int,
unsigned long, size_t, int);
static int sgl_unmap_user_pages(struct scatterlist *, const unsigned int, int);
@@ -313,12 +310,13 @@ static inline char *tape_name(struct scs
}
-static void st_analyze_sense(struct scsi_request *SRpnt, struct st_cmdstatus *s)
+static void st_analyze_sense(struct st_request *SRpnt, struct st_cmdstatus *s)
{
const u8 *ucp;
- const u8 *sense = SRpnt->sr_sense_buffer;
+ const u8 *sense = SRpnt->sense;
- s->have_sense = scsi_request_normalize_sense(SRpnt, &s->sense_hdr);
+ s->have_sense = scsi_normalize_sense(SRpnt->sense,
+ SCSI_SENSE_BUFFERSIZE, &s->sense_hdr);
s->flags = 0;
if (s->have_sense) {
@@ -345,9 +343,9 @@ static void st_analyze_sense(struct scsi
/* Convert the result to success code */
-static int st_chk_result(struct scsi_tape *STp, struct scsi_request * SRpnt)
+static int st_chk_result(struct scsi_tape *STp, struct st_request * SRpnt)
{
- int result = SRpnt->sr_result;
+ int result = SRpnt->result;
u8 scode;
DEB(const char *stp;)
char *name = tape_name(STp);
@@ -366,13 +364,12 @@ static int st_chk_result(struct scsi_tap
DEB(
if (debugging) {
- printk(ST_DEB_MSG "%s: Error: %x, cmd: %x %x %x %x %x %x Len: %d\n",
+ printk(ST_DEB_MSG "%s: Error: %x, cmd: %x %x %x %x %x %x\n",
name, result,
- SRpnt->sr_cmnd[0], SRpnt->sr_cmnd[1], SRpnt->sr_cmnd[2],
- SRpnt->sr_cmnd[3], SRpnt->sr_cmnd[4], SRpnt->sr_cmnd[5],
- SRpnt->sr_bufflen);
+ SRpnt->cmd[0], SRpnt->cmd[1], SRpnt->cmd[2],
+ SRpnt->cmd[3], SRpnt->cmd[4], SRpnt->cmd[5]);
if (cmdstatp->have_sense)
- scsi_print_req_sense("st", SRpnt);
+ __scsi_print_sense("st", SRpnt->sense, SCSI_SENSE_BUFFERSIZE);
} ) /* end DEB */
if (!debugging) { /* Abnormal conditions for tape */
if (!cmdstatp->have_sense)
@@ -386,20 +383,21 @@ static int st_chk_result(struct scsi_tap
/* scode != UNIT_ATTENTION && */
scode != BLANK_CHECK &&
scode != VOLUME_OVERFLOW &&
- SRpnt->sr_cmnd[0] != MODE_SENSE &&
- SRpnt->sr_cmnd[0] != TEST_UNIT_READY) {
+ SRpnt->cmd[0] != MODE_SENSE &&
+ SRpnt->cmd[0] != TEST_UNIT_READY) {
printk(KERN_WARNING "%s: Error with sense data: ", name);
- scsi_print_req_sense("st", SRpnt);
+ __scsi_print_sense("st", SRpnt->sense,
+ SCSI_SENSE_BUFFERSIZE);
}
}
if (cmdstatp->fixed_format &&
STp->cln_mode >= EXTENDED_SENSE_START) { /* Only fixed format sense */
if (STp->cln_sense_value)
- STp->cleaning_req |= ((SRpnt->sr_sense_buffer[STp->cln_mode] &
+ STp->cleaning_req |= ((SRpnt->sense[STp->cln_mode] &
STp->cln_sense_mask) == STp->cln_sense_value);
else
- STp->cleaning_req |= ((SRpnt->sr_sense_buffer[STp->cln_mode] &
+ STp->cleaning_req |= ((SRpnt->sense[STp->cln_mode] &
STp->cln_sense_mask) != 0);
}
if (cmdstatp->have_sense &&
@@ -411,8 +409,8 @@ static int st_chk_result(struct scsi_tap
if (cmdstatp->have_sense &&
scode == RECOVERED_ERROR
#if ST_RECOVERED_WRITE_FATAL
- && SRpnt->sr_cmnd[0] != WRITE_6
- && SRpnt->sr_cmnd[0] != WRITE_FILEMARKS
+ && SRpnt->cmd[0] != WRITE_6
+ && SRpnt->cmd[0] != WRITE_FILEMARKS
#endif
) {
STp->recover_count++;
@@ -420,9 +418,9 @@ static int st_chk_result(struct scsi_tap
DEB(
if (debugging) {
- if (SRpnt->sr_cmnd[0] == READ_6)
+ if (SRpnt->cmd[0] == READ_6)
stp = "read";
- else if (SRpnt->sr_cmnd[0] == WRITE_6)
+ else if (SRpnt->cmd[0] == WRITE_6)
stp = "write";
else
stp = "ioctl";
@@ -438,28 +436,37 @@ static int st_chk_result(struct scsi_tap
/* Wakeup from interrupt */
-static void st_sleep_done(struct scsi_cmnd * SCpnt)
+static void st_sleep_done(void *data, char *sense, int result, int resid)
{
- struct scsi_tape *STp = container_of(SCpnt->request->rq_disk->private_data,
- struct scsi_tape, driver);
+ struct st_request *SRpnt = data;
+ struct scsi_tape *STp = SRpnt->stp;
- (STp->buffer)->cmdstat.midlevel_result = SCpnt->result;
- SCpnt->request->rq_status = RQ_SCSI_DONE;
+ memcpy(SRpnt->sense, sense, SCSI_SENSE_BUFFERSIZE);
+ (STp->buffer)->cmdstat.midlevel_result = SRpnt->result = result;
DEB( STp->write_pending = 0; )
- if (SCpnt->request->waiting)
- complete(SCpnt->request->waiting);
+ if (SRpnt->waiting)
+ complete(SRpnt->waiting);
+}
+
+static struct st_request *st_allocate_request(void)
+{
+ return kzalloc(sizeof(struct st_request), GFP_KERNEL);
+}
+
+static void st_release_request(struct st_request *streq)
+{
+ kfree(streq);
}
/* Do the scsi command. Waits until command performed if do_wait is true.
Otherwise write_behind_check() is used to check that the command
has finished. */
-static struct scsi_request *
-st_do_scsi(struct scsi_request * SRpnt, struct scsi_tape * STp, unsigned char *cmd,
+static struct st_request *
+st_do_scsi(struct st_request * SRpnt, struct scsi_tape * STp, unsigned char *cmd,
int bytes, int direction, int timeout, int retries, int do_wait)
{
struct completion *waiting;
- unsigned char *bp;
/* if async, make sure there's no command outstanding */
if (!do_wait && ((STp->buffer)->last_SRpnt)) {
@@ -473,7 +480,7 @@ st_do_scsi(struct scsi_request * SRpnt,
}
if (SRpnt == NULL) {
- SRpnt = scsi_allocate_request(STp->device, GFP_ATOMIC);
+ SRpnt = st_allocate_request();
if (SRpnt == NULL) {
DEBC( printk(KERN_ERR "%s: Can't get SCSI request.\n",
tape_name(STp)); );
@@ -483,6 +490,7 @@ st_do_scsi(struct scsi_request * SRpnt,
(STp->buffer)->syscall_result = (-EBUSY);
return NULL;
}
+ SRpnt->stp = STp;
}
/* If async IO, set last_SRpnt. This ptr tells write_behind_check
@@ -492,32 +500,25 @@ st_do_scsi(struct scsi_request * SRpnt,
waiting = &STp->wait;
init_completion(waiting);
- SRpnt->sr_use_sg = STp->buffer->do_dio || (bytes > (STp->buffer)->frp[0].length);
- if (SRpnt->sr_use_sg) {
- if (!STp->buffer->do_dio)
- buf_to_sg(STp->buffer, bytes);
- SRpnt->sr_use_sg = (STp->buffer)->sg_segs;
- bp = (char *) &((STp->buffer)->sg[0]);
- } else
- bp = (STp->buffer)->b_data;
- SRpnt->sr_data_direction = direction;
- SRpnt->sr_cmd_len = 0;
- SRpnt->sr_request->waiting = waiting;
- SRpnt->sr_request->rq_status = RQ_SCSI_BUSY;
- SRpnt->sr_request->rq_disk = STp->disk;
- SRpnt->sr_request->end_io = blk_end_sync_rq;
- STp->buffer->cmdstat.have_sense = 0;
+ SRpnt->waiting = waiting;
+
+ if (!STp->buffer->do_dio)
+ buf_to_sg(STp->buffer, bytes);
- scsi_do_req(SRpnt, (void *) cmd, bp, bytes,
- st_sleep_done, timeout, retries);
+ memcpy(SRpnt->cmd, cmd, sizeof(SRpnt->cmd));
+ STp->buffer->cmdstat.have_sense = 0;
- if (do_wait) {
+ if (scsi_execute_async(STp->device, cmd, direction,
+ &((STp->buffer)->sg[0]), 0, (STp->buffer)->sg_segs,
+ timeout, retries, SRpnt, st_sleep_done, GFP_KERNEL))
+ /* could not allocate the buffer probably */
+ (STp->buffer)->syscall_result = (-EBUSY);
+ else if (do_wait) {
wait_for_completion(waiting);
- SRpnt->sr_request->waiting = NULL;
- if (SRpnt->sr_request->rq_status != RQ_SCSI_DONE)
- SRpnt->sr_result |= (DRIVER_ERROR << 24);
+ SRpnt->waiting = NULL;
(STp->buffer)->syscall_result = st_chk_result(STp, SRpnt);
}
+
return SRpnt;
}
@@ -532,7 +533,7 @@ static int write_behind_check(struct scs
struct st_buffer *STbuffer;
struct st_partstat *STps;
struct st_cmdstatus *cmdstatp;
- struct scsi_request *SRpnt;
+ struct st_request *SRpnt;
STbuffer = STp->buffer;
if (!STbuffer->writing)
@@ -548,12 +549,10 @@ static int write_behind_check(struct scs
wait_for_completion(&(STp->wait));
SRpnt = STbuffer->last_SRpnt;
STbuffer->last_SRpnt = NULL;
- SRpnt->sr_request->waiting = NULL;
- if (SRpnt->sr_request->rq_status != RQ_SCSI_DONE)
- SRpnt->sr_result |= (DRIVER_ERROR << 24);
+ SRpnt->waiting = NULL;
(STp->buffer)->syscall_result = st_chk_result(STp, SRpnt);
- scsi_release_request(SRpnt);
+ st_release_request(SRpnt);
STbuffer->buffer_bytes -= STbuffer->writing;
STps = &(STp->ps[STp->partition]);
@@ -593,7 +592,7 @@ static int write_behind_check(struct scs
it messes up the block number). */
static int cross_eof(struct scsi_tape * STp, int forward)
{
- struct scsi_request *SRpnt;
+ struct st_request *SRpnt;
unsigned char cmd[MAX_COMMAND_SIZE];
cmd[0] = SPACE;
@@ -613,7 +612,7 @@ static int cross_eof(struct scsi_tape *
if (!SRpnt)
return (STp->buffer)->syscall_result;
- scsi_release_request(SRpnt);
+ st_release_request(SRpnt);
SRpnt = NULL;
if ((STp->buffer)->cmdstat.midlevel_result != 0)
@@ -630,7 +629,7 @@ static int flush_write_buffer(struct scs
int offset, transfer, blks;
int result;
unsigned char cmd[MAX_COMMAND_SIZE];
- struct scsi_request *SRpnt;
+ struct st_request *SRpnt;
struct st_partstat *STps;
result = write_behind_check(STp);
@@ -688,7 +687,7 @@ static int flush_write_buffer(struct scs
STp->dirty = 0;
(STp->buffer)->buffer_bytes = 0;
}
- scsi_release_request(SRpnt);
+ st_release_request(SRpnt);
SRpnt = NULL;
}
return result;
@@ -785,7 +784,7 @@ static int set_mode_densblk(struct scsi_
}
-/* Lock or unlock the drive door. Don't use when scsi_request allocated. */
+/* Lock or unlock the drive door. Don't use when st_request allocated. */
static int do_door_lock(struct scsi_tape * STp, int do_lock)
{
int retval, cmd;
@@ -844,7 +843,7 @@ static int test_ready(struct scsi_tape *
int attentions, waits, max_wait, scode;
int retval = CHKRES_READY, new_session = 0;
unsigned char cmd[MAX_COMMAND_SIZE];
- struct scsi_request *SRpnt = NULL;
+ struct st_request *SRpnt = NULL;
struct st_cmdstatus *cmdstatp = &STp->buffer->cmdstat;
max_wait = do_wait ? ST_BLOCK_SECONDS : 0;
@@ -903,7 +902,7 @@ static int test_ready(struct scsi_tape *
}
if (SRpnt != NULL)
- scsi_release_request(SRpnt);
+ st_release_request(SRpnt);
return retval;
}
@@ -918,7 +917,7 @@ static int check_tape(struct scsi_tape *
int i, retval, new_session = 0, do_wait;
unsigned char cmd[MAX_COMMAND_SIZE], saved_cleaning;
unsigned short st_flags = filp->f_flags;
- struct scsi_request *SRpnt = NULL;
+ struct st_request *SRpnt = NULL;
struct st_modedef *STm;
struct st_partstat *STps;
char *name = tape_name(STp);
@@ -993,7 +992,7 @@ static int check_tape(struct scsi_tape *
goto err_out;
}
- if (!SRpnt->sr_result && !STp->buffer->cmdstat.have_sense) {
+ if (!SRpnt->result && !STp->buffer->cmdstat.have_sense) {
STp->max_block = ((STp->buffer)->b_data[1] << 16) |
((STp->buffer)->b_data[2] << 8) | (STp->buffer)->b_data[3];
STp->min_block = ((STp->buffer)->b_data[4] << 8) |
@@ -1045,7 +1044,7 @@ static int check_tape(struct scsi_tape *
}
STp->drv_write_prot = ((STp->buffer)->b_data[2] & 0x80) != 0;
}
- scsi_release_request(SRpnt);
+ st_release_request(SRpnt);
SRpnt = NULL;
STp->inited = 1;
@@ -1196,7 +1195,7 @@ static int st_flush(struct file *filp)
{
int result = 0, result2;
unsigned char cmd[MAX_COMMAND_SIZE];
- struct scsi_request *SRpnt;
+ struct st_request *SRpnt;
struct scsi_tape *STp = filp->private_data;
struct st_modedef *STm = &(STp->modes[STp->current_mode]);
struct st_partstat *STps = &(STp->ps[STp->partition]);
@@ -1249,7 +1248,7 @@ static int st_flush(struct file *filp)
cmdstatp->sense_hdr.sense_key == RECOVERED_ERROR) &&
(!cmdstatp->remainder_valid || cmdstatp->uremainder64 == 0))) {
/* Write successful at EOM */
- scsi_release_request(SRpnt);
+ st_release_request(SRpnt);
SRpnt = NULL;
if (STps->drv_file >= 0)
STps->drv_file++;
@@ -1259,7 +1258,7 @@ static int st_flush(struct file *filp)
STps->eof = ST_FM;
}
else { /* Write error */
- scsi_release_request(SRpnt);
+ st_release_request(SRpnt);
SRpnt = NULL;
printk(KERN_ERR "%s: Error on write filemark.\n", name);
if (result == 0)
@@ -1400,11 +1399,11 @@ static int setup_buffering(struct scsi_t
i = STp->try_dio && try_rdio;
else
i = STp->try_dio && try_wdio;
+
if (i && ((unsigned long)buf & queue_dma_alignment(
STp->device->request_queue)) == 0) {
- i = st_map_user_pages(&(STbp->sg[0]), STbp->use_sg,
- (unsigned long)buf, count, (is_read ? READ : WRITE),
- STp->max_pfn);
+ i = sgl_map_user_pages(&(STbp->sg[0]), STbp->use_sg,
+ (unsigned long)buf, count, (is_read ? READ : WRITE));
if (i > 0) {
STbp->do_dio = i;
STbp->buffer_bytes = 0; /* can be used as transfer counter */
@@ -1472,7 +1471,7 @@ st_write(struct file *filp, const char _
int async_write;
unsigned char cmd[MAX_COMMAND_SIZE];
const char __user *b_point;
- struct scsi_request *SRpnt = NULL;
+ struct st_request *SRpnt = NULL;
struct scsi_tape *STp = filp->private_data;
struct st_modedef *STm;
struct st_partstat *STps;
@@ -1728,7 +1727,7 @@ st_write(struct file *filp, const char _
out:
if (SRpnt != NULL)
- scsi_release_request(SRpnt);
+ st_release_request(SRpnt);
release_buffering(STp);
up(&STp->lock);
@@ -1742,11 +1741,11 @@ st_write(struct file *filp, const char _
Does release user buffer mapping if it is set.
*/
static long read_tape(struct scsi_tape *STp, long count,
- struct scsi_request ** aSRpnt)
+ struct st_request ** aSRpnt)
{
int transfer, blks, bytes;
unsigned char cmd[MAX_COMMAND_SIZE];
- struct scsi_request *SRpnt;
+ struct st_request *SRpnt;
struct st_modedef *STm;
struct st_partstat *STps;
struct st_buffer *STbp;
@@ -1802,10 +1801,10 @@ static long read_tape(struct scsi_tape *
retval = 1;
DEBC(printk(ST_DEB_MSG "%s: Sense: %2x %2x %2x %2x %2x %2x %2x %2x\n",
name,
- SRpnt->sr_sense_buffer[0], SRpnt->sr_sense_buffer[1],
- SRpnt->sr_sense_buffer[2], SRpnt->sr_sense_buffer[3],
- SRpnt->sr_sense_buffer[4], SRpnt->sr_sense_buffer[5],
- SRpnt->sr_sense_buffer[6], SRpnt->sr_sense_buffer[7]));
+ SRpnt->sense[0], SRpnt->sense[1],
+ SRpnt->sense[2], SRpnt->sense[3],
+ SRpnt->sense[4], SRpnt->sense[5],
+ SRpnt->sense[6], SRpnt->sense[7]));
if (cmdstatp->have_sense) {
if (cmdstatp->sense_hdr.sense_key == BLANK_CHECK)
@@ -1835,7 +1834,7 @@ static long read_tape(struct scsi_tape *
}
STbp->buffer_bytes = bytes - transfer;
} else {
- scsi_release_request(SRpnt);
+ st_release_request(SRpnt);
SRpnt = *aSRpnt = NULL;
if (transfer == blks) { /* We did not get anything, error */
printk(KERN_NOTICE "%s: Incorrect block size.\n", name);
@@ -1929,7 +1928,7 @@ st_read(struct file *filp, char __user *
ssize_t retval = 0;
ssize_t i, transfer;
int special, do_dio = 0;
- struct scsi_request *SRpnt = NULL;
+ struct st_request *SRpnt = NULL;
struct scsi_tape *STp = filp->private_data;
struct st_modedef *STm;
struct st_partstat *STps;
@@ -2054,7 +2053,7 @@ st_read(struct file *filp, char __user *
out:
if (SRpnt != NULL) {
- scsi_release_request(SRpnt);
+ st_release_request(SRpnt);
SRpnt = NULL;
}
if (do_dio) {
@@ -2284,7 +2283,7 @@ static int st_set_options(struct scsi_ta
static int read_mode_page(struct scsi_tape *STp, int page, int omit_block_descs)
{
unsigned char cmd[MAX_COMMAND_SIZE];
- struct scsi_request *SRpnt = NULL;
+ struct st_request *SRpnt = NULL;
memset(cmd, 0, MAX_COMMAND_SIZE);
cmd[0] = MODE_SENSE;
@@ -2298,7 +2297,7 @@ static int read_mode_page(struct scsi_ta
if (SRpnt == NULL)
return (STp->buffer)->syscall_result;
- scsi_release_request(SRpnt);
+ st_release_request(SRpnt);
return (STp->buffer)->syscall_result;
}
@@ -2310,7 +2309,7 @@ static int write_mode_page(struct scsi_t
{
int pgo;
unsigned char cmd[MAX_COMMAND_SIZE];
- struct scsi_request *SRpnt = NULL;
+ struct st_request *SRpnt = NULL;
memset(cmd, 0, MAX_COMMAND_SIZE);
cmd[0] = MODE_SELECT;
@@ -2329,7 +2328,7 @@ static int write_mode_page(struct scsi_t
if (SRpnt == NULL)
return (STp->buffer)->syscall_result;
- scsi_release_request(SRpnt);
+ st_release_request(SRpnt);
return (STp->buffer)->syscall_result;
}
@@ -2412,7 +2411,7 @@ static int do_load_unload(struct scsi_ta
DEB( char *name = tape_name(STp); )
unsigned char cmd[MAX_COMMAND_SIZE];
struct st_partstat *STps;
- struct scsi_request *SRpnt;
+ struct st_request *SRpnt;
if (STp->ready != ST_READY && !load_code) {
if (STp->ready == ST_NO_TAPE)
@@ -2455,7 +2454,7 @@ static int do_load_unload(struct scsi_ta
return (STp->buffer)->syscall_result;
retval = (STp->buffer)->syscall_result;
- scsi_release_request(SRpnt);
+ st_release_request(SRpnt);
if (!retval) { /* SCSI command successful */
@@ -2503,7 +2502,7 @@ static int st_int_ioctl(struct scsi_tape
int ioctl_result;
int chg_eof = 1;
unsigned char cmd[MAX_COMMAND_SIZE];
- struct scsi_request *SRpnt;
+ struct st_request *SRpnt;
struct st_partstat *STps;
int fileno, blkno, at_sm, undone;
int datalen = 0, direction = DMA_NONE;
@@ -2757,7 +2756,7 @@ static int st_int_ioctl(struct scsi_tape
ioctl_result = (STp->buffer)->syscall_result;
if (!ioctl_result) { /* SCSI command successful */
- scsi_release_request(SRpnt);
+ st_release_request(SRpnt);
SRpnt = NULL;
STps->drv_block = blkno;
STps->drv_file = fileno;
@@ -2872,7 +2871,7 @@ static int st_int_ioctl(struct scsi_tape
/* Try the other possible state of Page Format if not
already tried */
STp->use_pf = !STp->use_pf | PF_TESTED;
- scsi_release_request(SRpnt);
+ st_release_request(SRpnt);
SRpnt = NULL;
return st_int_ioctl(STp, cmd_in, arg);
}
@@ -2882,7 +2881,7 @@ static int st_int_ioctl(struct scsi_tape
if (cmdstatp->sense_hdr.sense_key == BLANK_CHECK)
STps->eof = ST_EOD;
- scsi_release_request(SRpnt);
+ st_release_request(SRpnt);
SRpnt = NULL;
}
@@ -2898,7 +2897,7 @@ static int get_location(struct scsi_tape
{
int result;
unsigned char scmd[MAX_COMMAND_SIZE];
- struct scsi_request *SRpnt;
+ struct st_request *SRpnt;
DEB( char *name = tape_name(STp); )
if (STp->ready != ST_READY)
@@ -2944,7 +2943,7 @@ static int get_location(struct scsi_tape
DEBC(printk(ST_DEB_MSG "%s: Got tape pos. blk %d part %d.\n", name,
*block, *partition));
}
- scsi_release_request(SRpnt);
+ st_release_request(SRpnt);
SRpnt = NULL;
return result;
@@ -2961,7 +2960,7 @@ static int set_location(struct scsi_tape
unsigned int blk;
int timeout;
unsigned char scmd[MAX_COMMAND_SIZE];
- struct scsi_request *SRpnt;
+ struct st_request *SRpnt;
DEB( char *name = tape_name(STp); )
if (STp->ready != ST_READY)
@@ -3047,7 +3046,7 @@ static int set_location(struct scsi_tape
result = 0;
}
- scsi_release_request(SRpnt);
+ st_release_request(SRpnt);
SRpnt = NULL;
return result;
@@ -3577,7 +3576,7 @@ static long st_compat_ioctl(struct file
static struct st_buffer *
new_tape_buffer(int from_initialization, int need_dma, int max_sg)
{
- int i, priority, got = 0, segs = 0;
+ int i, priority, got = 0;
struct st_buffer *tb;
if (from_initialization)
@@ -3593,10 +3592,8 @@ static struct st_buffer *
return NULL;
}
memset(tb, 0, i);
- tb->frp_segs = tb->orig_frp_segs = segs;
+ tb->frp_segs = tb->orig_frp_segs = 0;
tb->use_sg = max_sg;
- if (segs > 0)
- tb->b_data = page_address(tb->sg[0].page);
tb->frp = (struct st_buf_fragment *)(&(tb->sg[0]) + max_sg);
tb->in_use = 1;
@@ -3880,7 +3877,6 @@ static int st_probe(struct device *dev)
struct st_buffer *buffer;
int i, j, mode, dev_num, error;
char *stp;
- u64 bounce_limit;
if (SDp->type != TYPE_TAPE)
return -ENODEV;
@@ -3994,11 +3990,6 @@ static int st_probe(struct device *dev)
tpnt->long_timeout = ST_LONG_TIMEOUT;
tpnt->try_dio = try_direct_io && !SDp->host->unchecked_isa_dma;
- bounce_limit = scsi_calculate_bounce_limit(SDp->host) >> PAGE_SHIFT;
- if (bounce_limit > ULONG_MAX)
- bounce_limit = ULONG_MAX;
- tpnt->max_pfn = bounce_limit;
-
for (i = 0; i < ST_NBR_MODES; i++) {
STm = &(tpnt->modes[i]);
STm->defined = 0;
@@ -4078,9 +4069,9 @@ static int st_probe(struct device *dev)
printk(KERN_WARNING
"Attached scsi tape %s at scsi%d, channel %d, id %d, lun %d\n",
tape_name(tpnt), SDp->host->host_no, SDp->channel, SDp->id, SDp->lun);
- printk(KERN_WARNING "%s: try direct i/o: %s (alignment %d B), max page reachable by HBA %lu\n",
+ printk(KERN_WARNING "%s: try direct i/o: %s (alignment %d B)\n",
tape_name(tpnt), tpnt->try_dio ? "yes" : "no",
- queue_dma_alignment(SDp->request_queue) + 1, tpnt->max_pfn);
+ queue_dma_alignment(SDp->request_queue) + 1);
return 0;
@@ -4409,34 +4400,6 @@ static void do_create_class_files(struct
return;
}
-
-/* Pin down user pages and put them into a scatter gather list. Returns <= 0 if
- - mapping of all pages not successful
- - any page is above max_pfn
- (i.e., either completely successful or fails)
-*/
-static int st_map_user_pages(struct scatterlist *sgl, const unsigned int max_pages,
- unsigned long uaddr, size_t count, int rw,
- unsigned long max_pfn)
-{
- int i, nr_pages;
-
- nr_pages = sgl_map_user_pages(sgl, max_pages, uaddr, count, rw);
- if (nr_pages <= 0)
- return nr_pages;
-
- for (i=0; i < nr_pages; i++) {
- if (page_to_pfn(sgl[i].page) > max_pfn)
- goto out_unmap;
- }
- return nr_pages;
-
- out_unmap:
- sgl_unmap_user_pages(sgl, nr_pages, 0);
- return 0;
-}
-
-
/* The following functions may be useful for a larger audience. */
static int sgl_map_user_pages(struct scatterlist *sgl, const unsigned int max_pages,
unsigned long uaddr, size_t count, int rw)
diff --git a/drivers/scsi/st.h b/drivers/scsi/st.h
--- a/drivers/scsi/st.h
+++ b/drivers/scsi/st.h
@@ -4,6 +4,7 @@
#include <linux/completion.h>
#include <linux/kref.h>
+#include <scsi/scsi_cmnd.h>
/* Descriptor for analyzed sense data */
struct st_cmdstatus {
@@ -17,6 +18,17 @@ struct st_cmdstatus {
u8 deferred;
};
+struct scsi_tape;
+
+/* scsi tape command */
+struct st_request {
+ unsigned char cmd[MAX_COMMAND_SIZE];
+ unsigned char sense[SCSI_SENSE_BUFFERSIZE];
+ int result;
+ struct scsi_tape *stp;
+ struct completion *waiting;
+};
+
/* The tape buffer descriptor. */
struct st_buffer {
unsigned char in_use;
@@ -28,7 +40,7 @@ struct st_buffer {
int read_pointer;
int writing;
int syscall_result;
- struct scsi_request *last_SRpnt;
+ struct st_request *last_SRpnt;
struct st_cmdstatus cmdstat;
unsigned char *b_data;
unsigned short use_sg; /* zero or max number of s/g segments for this adapter */
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH 4/5] convert st to use scsi_execte_async
2005-09-16 4:39 [PATCH 4/5] convert st to use scsi_execte_async Mike Christie
@ 2005-09-17 11:57 ` Kai Makisara
2005-09-17 15:43 ` Mike Christie
2005-09-17 15:57 ` Kai Makisara
0 siblings, 2 replies; 27+ messages in thread
From: Kai Makisara @ 2005-09-17 11:57 UTC (permalink / raw)
To: Mike Christie; +Cc: linux-scsi
On Thu, 15 Sep 2005, Mike Christie wrote:
> Convert st to use scsi_execute_async.
>
> I made scsi_execute_async take a scatterlist so there was not much todo.
> I allocate st_request with GFP_KERNEL but will send another patch to
> allocate it within the st_buffer if you want.
>
I can think about this and change it later if necessary. Both approaches
have pros and cons.
> The strange thing is that it looks like st could have always been
> sending down scatterlists before. This (bytes > (STp->buffer)->frp[0].length)
> test here:
>
> SRpnt->sr_use_sg = STp->buffer->do_dio || (bytes > (STp->buffer)->frp[0].length);
> if (SRpnt->sr_use_sg) {
> if (!STp->buffer->do_dio)
> buf_to_sg(STp->buffer, bytes);
> SRpnt->sr_use_sg = (STp->buffer)->sg_segs;
> bp = (char *) &((STp->buffer)->sg[0]);
> } else
> bp = (STp->buffer)->b_data;
>
> looks like st was sending down use_sg=0 on purpose for the
> one segment case. Is this a leftover from old scsi code? If so
> it does not look like there was much to change, so I merged
> your patch except the max_pfn part since I reverted all my
> related changes.
>
Yes, it is a leftover from the time when not all HBA drivers supported
scatterlists. I should have eliminated the non-scatterlist case while
doing other changes but I have never remembered it at the right time ;-)
I have continued testing with your new patch. No oopses but it does not
pass all of the tests.
I think I may have found the problem: scsi_execute_async does not use the
parameter retries (the same applies to scsi_execute btw). This leads to
retrying the reads at filemark and this is not correct. I added the hack
below to scsi_execute_async and after this the simple tests succeed.
--- linux-2.6.14-rc1-blk3/drivers/scsi/scsi_lib.c~ 2005-09-16 21:44:05.000000000 +0300
+++ linux-2.6.14-rc1-blk3/drivers/scsi/scsi_lib.c 2005-09-17 14:21:14.000000000 +0300
@@ -428,6 +428,8 @@ int scsi_execute_async(struct scsi_devic
req->sense_len = 0;
req->timeout = timeout;
req->flags |= REQ_BLOCK_PC | REQ_QUIET;
+ if (retries == 0)
+ req->flags |= REQ_FAILFAST;
req->end_io_data = sioc;
sioc->data = privdata;
This is not a real solution though. There should be some control over
retries from the scsi ULDs but I did not directly see how to do it.
Looking at the tests I did a couple of days, the number of retries seemed
to be variable (i.e., the number of retries I saw in the same test today
and a couple of days ago didn't match).
--
Kai
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH 4/5] convert st to use scsi_execte_async
2005-09-17 11:57 ` Kai Makisara
@ 2005-09-17 15:43 ` Mike Christie
2005-09-17 15:55 ` Mike Christie
2005-09-17 15:57 ` Kai Makisara
1 sibling, 1 reply; 27+ messages in thread
From: Mike Christie @ 2005-09-17 15:43 UTC (permalink / raw)
To: Kai Makisara; +Cc: linux-scsi
Kai Makisara wrote:
>
> I think I may have found the problem: scsi_execute_async does not use the
> parameter retries (the same applies to scsi_execute btw). This leads to
> retrying the reads at filemark and this is not correct. I added the hack
> below to scsi_execute_async and after this the simple tests succeed.
ah ok. I think when we prep the command we need to copy the retries from
the command. So in st.c st_init_command callout we need to copy that
value (we are just copying the timeout today).
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH 4/5] convert st to use scsi_execte_async
2005-09-17 15:43 ` Mike Christie
@ 2005-09-17 15:55 ` Mike Christie
2005-09-17 16:25 ` Mike Christie
0 siblings, 1 reply; 27+ messages in thread
From: Mike Christie @ 2005-09-17 15:55 UTC (permalink / raw)
To: Kai Makisara; +Cc: linux-scsi
Mike Christie wrote:
> Kai Makisara wrote:
>
>>
>> I think I may have found the problem: scsi_execute_async does not use
>> the parameter retries (the same applies to scsi_execute btw). This
>> leads to retrying the reads at filemark and this is not correct. I
>> added the hack below to scsi_execute_async and after this the simple
>> tests succeed.
>
>
> ah ok. I think when we prep the command we need to copy the retries from
> the command. So in st.c st_init_command callout we need to copy that
> value (we are just copying the timeout today).
oh I guess there is not retries count on the request like there is for
timeout :)
But it looks like retried is always 0. I guess st's init_command could
just do
SCpnt->allowed = 0;
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH 4/5] convert st to use scsi_execte_async
2005-09-17 11:57 ` Kai Makisara
2005-09-17 15:43 ` Mike Christie
@ 2005-09-17 15:57 ` Kai Makisara
2005-09-17 16:48 ` Kai Makisara
1 sibling, 1 reply; 27+ messages in thread
From: Kai Makisara @ 2005-09-17 15:57 UTC (permalink / raw)
To: Mike Christie; +Cc: linux-scsi
On Sat, 17 Sep 2005, Kai Makisara wrote:
> On Thu, 15 Sep 2005, Mike Christie wrote:
>
...
> This is not a real solution though. There should be some control over
> retries from the scsi ULDs but I did not directly see how to do it.
> Looking at the tests I did a couple of days, the number of retries seemed
> to be variable (i.e., the number of retries I saw in the same test today
> and a couple of days ago didn't match).
>
I have looked the retries more and there seems to be several problems
somewhere, not necessarily in your new code. I don't have any answers but
here is data from one of my experiments. I had debugging enabled in
st and added a printk to st_sleep_done to see what results st gets from
the requests.
The tape contained four files, 10 10240 byte blocks each. The drive was in
variable block mode and reading starts from the beginning of the tape.
The first command was
dd if=/dev/nst0 of=/dev/null bs=10240 count=10
i.e., read the data from the first file. The system log contained the
following
Sep 17 14:04:03 box kernel: st: cmd=0x00 result=0x0 resid=0 sense[0]=0x00
Sep 17 14:04:03 box kernel: st: cmd=0x05 result=0x0 resid=0 sense[0]=0x00
Sep 17 14:04:03 box kernel: st0: Block limits 1 - 16777215 bytes.
Sep 17 14:04:03 box kernel: st: cmd=0x1a result=0x0 resid=0 sense[0]=0x00
Sep 17 14:04:03 box kernel: st0: Mode sense. Length 11, medium 0, WBS 10, BLL 8
Sep 17 14:04:03 box kernel: st0: Density 24, tape length: 0, drv buffer: 1
Sep 17 14:04:03 box kernel: st0: Block size: 0, buffer size: 4096 (1 blocks).
Sep 17 14:04:03 box kernel: st: cmd=0x08 result=0x0 resid=0 sense[0]=0x00
Sep 17 14:04:03 box last message repeated 9 times
Sep 17 14:04:03 box kernel: st0: Number of r/w requests 10, dio used in 10, pages 30 (0).
This was correct. Now the tape was positioned before the filemark. The
next command was the same as before
dd if=/dev/nst0 of=/dev/null bs=10240 count=10
Sep 17 14:04:29 box kernel: st: cmd=0x00 result=0x0 resid=10240 sense[0]=0x00
Sep 17 14:04:29 box kernel: st: cmd=0x05 result=0x0 resid=0 sense[0]=0x00
Sep 17 14:04:29 box kernel: st0: Block limits 1 - 16777215 bytes.
Sep 17 14:04:29 box kernel: st: cmd=0x1a result=0x0 resid=0 sense[0]=0x00
Sep 17 14:04:29 box kernel: st0: Mode sense. Length 11, medium 0, WBS 10, BLL 8
Sep 17 14:04:29 box kernel: st0: Density 24, tape length: 0, drv buffer: 1
Sep 17 14:04:29 box kernel: st0: Block size: 0, buffer size: 4096 (1 blocks).
The first SCSI read command of 10240 bytes should have terminated with
sense data showing that a filemark has been detected (sense code NO
SENSE). What happened was that the system attempted to retry the command
six times. This retry did not happen correctly because the sym53c8xx_2
driver printed the following:
Sep 17 14:04:29 box kernel: st 1:0:5:0: extraneous data discarded.
Sep 17 14:04:29 box kernel: st 1:0:5:0: COMMAND FAILED (87 0 1).
Sep 17 14:04:29 box kernel: st 1:0:5:0: extraneous data discarded.
Sep 17 14:04:29 box kernel: st 1:0:5:0: COMMAND FAILED (87 0 1).
Sep 17 14:04:29 box kernel: st 1:0:5:0: extraneous data discarded.
Sep 17 14:04:29 box kernel: st 1:0:5:0: COMMAND FAILED (87 0 1).
Sep 17 14:04:29 box kernel: st 1:0:5:0: extraneous data discarded.
Sep 17 14:04:29 box kernel: st 1:0:5:0: COMMAND FAILED (87 0 1).
Sep 17 14:04:29 box kernel: st 1:0:5:0: extraneous data discarded.
Sep 17 14:04:29 box kernel: st 1:0:5:0: COMMAND FAILED (87 0 1).
Sep 17 14:04:29 box kernel: st 1:0:5:0: extraneous data discarded.
Sep 17 14:04:29 box kernel: st 1:0:5:0: COMMAND FAILED (87 0 1).
Sep 17 14:04:29 box kernel: st: cmd=0x08 result=0x70000 resid=10240 sense[0]=0xfffffff0
Sep 17 14:04:29 box kernel: st0: Error: 70000, cmd: 8 0 0 28 0 0
Sep 17 14:04:29 box kernel: st: Current: sense key: No Sense
Sep 17 14:04:29 box kernel: Additional sense: Filemark detected
Sep 17 14:04:29 box kernel: Info fld=0x2800, FMK
Sep 17 14:04:29 box kernel: st0: Sense: f0 0 80 0 0 28 0 e
The result shows the correct return data from the first read command. The
retried commands should have succeeded (well, the first one).
Sep 17 14:04:29 box kernel: st0: EOF detected (0 bytes read).
Sep 17 14:04:29 box kernel: st0: Number of r/w requests 1, dio used in 1, pages 3 (0).
OK. Next I tried
mt tell
to see where the tape was.
Sep 17 14:12:10 box kernel: st: cmd=0x00 result=0x0 resid=10240 sense[0]=0x00
^^^^^^^^^^^
The resid here seems to have been "inherited" from the previous command.
Sep 17 14:12:10 box kernel: st: cmd=0x05 result=0x0 resid=0 sense[0]=0x00
Sep 17 14:12:10 box kernel: st0: Block limits 1 - 16777215 bytes.
Sep 17 14:12:10 box kernel: st: cmd=0x1a result=0x0 resid=0 sense[0]=0x00
Sep 17 14:12:10 box kernel: st0: Mode sense. Length 11, medium 0, WBS 10, BLL 8
Sep 17 14:12:10 box kernel: st0: Density 24, tape length: 0, drv buffer: 1
Sep 17 14:12:10 box kernel: st0: Block size: 0, buffer size: 4096 (1 blocks).
Sep 17 14:12:10 box kernel: st: cmd=0x34 result=0x0 resid=0 sense[0]=0x00
Sep 17 14:12:10 box kernel: st0: Got tape pos. blk 16 part 0.
The tape is positioned after the sixth block in the second file, i.e., the
retried commands have moved the tape.
So, the retries did not happen correctly. One can also say that apparently
the first read command returned sense data and it should not have been
retried. I don't know how old these problems are because earlier the SCSI
subsystem has used the retries parameter and not even "thought" about
retrying the tape commands.
--
Kai
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH 4/5] convert st to use scsi_execte_async
2005-09-17 15:55 ` Mike Christie
@ 2005-09-17 16:25 ` Mike Christie
2005-09-18 12:01 ` Kai Makisara
0 siblings, 1 reply; 27+ messages in thread
From: Mike Christie @ 2005-09-17 16:25 UTC (permalink / raw)
To: Kai Makisara; +Cc: linux-scsi
On Sat, 2005-09-17 at 10:55 -0500, Mike Christie wrote:
> Mike Christie wrote:
> > Kai Makisara wrote:
> >
> >>
> >> I think I may have found the problem: scsi_execute_async does not use
> >> the parameter retries (the same applies to scsi_execute btw). This
> >> leads to retrying the reads at filemark and this is not correct. I
> >> added the hack below to scsi_execute_async and after this the simple
> >> tests succeed.
> >
> >
> > ah ok. I think when we prep the command we need to copy the retries from
> > the command. So in st.c st_init_command callout we need to copy that
> > value (we are just copying the timeout today).
>
> oh I guess there is not retries count on the request like there is for
> timeout :)
>
> But it looks like retried is always 0. I guess st's init_command could
> just do
>
> SCpnt->allowed = 0;
Or how about this patch it was made against my updated patches here:
http://www.cs.wisc.edu/~michaelc/block/use-sg/v6/
It is the 06-add-retry-field.patch patch. All it does it add a retries
count onto the request so it can be passed around like the timeout
value.
diff -aurp scsi-rc-fixes.tmp/drivers/block/scsi_ioctl.c scsi-rc-fixes/drivers/block/scsi_ioctl.c
--- scsi-rc-fixes.tmp/drivers/block/scsi_ioctl.c 2005-09-17 11:16:42.000000000 -0500
+++ scsi-rc-fixes/drivers/block/scsi_ioctl.c 2005-09-17 11:05:22.000000000 -0500
@@ -302,6 +302,7 @@ static int sg_io(struct file *file, requ
rq->timeout = q->sg_timeout;
if (!rq->timeout)
rq->timeout = BLK_DEFAULT_TIMEOUT;
+ rq->retries = 1;
start_time = jiffies;
@@ -412,6 +413,7 @@ static int sg_scsi_ioctl(struct file *fi
rq->timeout = BLK_DEFAULT_TIMEOUT;
break;
}
+ rq->retries = 1;
memset(sense, 0, sizeof(sense));
rq->sense = sense;
@@ -570,6 +572,7 @@ int scsi_cmd_ioctl(struct file *file, st
rq->data = NULL;
rq->data_len = 0;
rq->timeout = BLK_DEFAULT_TIMEOUT;
+ rq->retries = 1;
memset(rq->cmd, 0, sizeof(rq->cmd));
rq->cmd[0] = GPCMD_START_STOP_UNIT;
rq->cmd[4] = 0x02 + (close != 0);
diff -aurp scsi-rc-fixes.tmp/drivers/scsi/scsi_lib.c scsi-rc-fixes/drivers/scsi/scsi_lib.c
--- scsi-rc-fixes.tmp/drivers/scsi/scsi_lib.c 2005-09-17 11:17:08.000000000 -0500
+++ scsi-rc-fixes/drivers/scsi/scsi_lib.c 2005-09-17 11:06:17.000000000 -0500
@@ -261,6 +261,7 @@ int scsi_execute(struct scsi_device *sde
memcpy(req->cmd, cmd, req->cmd_len);
req->sense = sense;
req->sense_len = 0;
+ req->retries = retries;
req->timeout = timeout;
req->flags |= flags | REQ_BLOCK_PC | REQ_SPECIAL | REQ_QUIET;
@@ -427,6 +428,7 @@ int scsi_execute_async(struct scsi_devic
req->sense = sioc->sense;
req->sense_len = 0;
req->timeout = timeout;
+ req->retries = retries;
req->flags |= REQ_BLOCK_PC | REQ_QUIET;
req->end_io_data = sioc;
@@ -1340,7 +1342,7 @@ static int scsi_prep_fn(struct request_q
cmd->sc_data_direction = DMA_NONE;
cmd->transfersize = req->data_len;
- cmd->allowed = 3;
+ cmd->allowed = req->retries;
cmd->timeout_per_command = req->timeout;
cmd->done = scsi_generic_done;
}
diff -aurp scsi-rc-fixes.tmp/drivers/scsi/sd.c scsi-rc-fixes/drivers/scsi/sd.c
--- scsi-rc-fixes.tmp/drivers/scsi/sd.c 2005-09-17 11:16:49.000000000 -0500
+++ scsi-rc-fixes/drivers/scsi/sd.c 2005-09-17 11:05:32.000000000 -0500
@@ -86,7 +86,6 @@
* Number of allowed retries
*/
#define SD_MAX_RETRIES 5
-#define SD_PASSTHROUGH_RETRIES 1
static void scsi_disk_release(struct kref *kref);
@@ -248,7 +247,7 @@ static int sd_init_command(struct scsi_c
timeout = rq->timeout;
SCpnt->transfersize = rq->data_len;
- SCpnt->allowed = SD_PASSTHROUGH_RETRIES;
+ SCpnt->allowed = rq->retries;
goto queue;
}
diff -aurp scsi-rc-fixes.tmp/drivers/scsi/st.c scsi-rc-fixes/drivers/scsi/st.c
--- scsi-rc-fixes.tmp/drivers/scsi/st.c 2005-09-17 11:17:08.000000000 -0500
+++ scsi-rc-fixes/drivers/scsi/st.c 2005-09-17 11:06:00.000000000 -0500
@@ -4206,6 +4206,7 @@ static int st_init_command(struct scsi_c
else
SCpnt->sc_data_direction = DMA_NONE;
+ SCpnt->allowed = rq->retries;
SCpnt->timeout_per_command = rq->timeout;
SCpnt->transfersize = rq->data_len;
SCpnt->done = st_intr;
diff -aurp scsi-rc-fixes.tmp/include/linux/blkdev.h scsi-rc-fixes/include/linux/blkdev.h
--- scsi-rc-fixes.tmp/include/linux/blkdev.h 2005-09-17 11:17:08.000000000 -0500
+++ scsi-rc-fixes/include/linux/blkdev.h 2005-09-17 10:59:44.000000000 -0500
@@ -184,6 +184,7 @@ struct request {
void *sense;
unsigned int timeout;
+ int retries;
/*
* For Power Management requests
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH 4/5] convert st to use scsi_execte_async
2005-09-17 15:57 ` Kai Makisara
@ 2005-09-17 16:48 ` Kai Makisara
0 siblings, 0 replies; 27+ messages in thread
From: Kai Makisara @ 2005-09-17 16:48 UTC (permalink / raw)
To: Mike Christie; +Cc: linux-scsi
On Sat, 17 Sep 2005, Kai Makisara wrote:
> On Sat, 17 Sep 2005, Kai Makisara wrote:
>
> > On Thu, 15 Sep 2005, Mike Christie wrote:
> >
> ...
> > This is not a real solution though. There should be some control over
> > retries from the scsi ULDs but I did not directly see how to do it.
> > Looking at the tests I did a couple of days, the number of retries seemed
> > to be variable (i.e., the number of retries I saw in the same test today
> > and a couple of days ago didn't match).
> >
> I have looked the retries more and there seems to be several problems
> somewhere, not necessarily in your new code. I don't have any answers but
> here is data from one of my experiments. I had debugging enabled in
> st and added a printk to st_sleep_done to see what results st gets from
> the requests.
>
...
> Sep 17 14:12:10 box kernel: st: cmd=0x00 result=0x0 resid=10240 sense[0]=0x00
> ^^^^^^^^^^^
> The resid here seems to have been "inherited" from the previous command.
Can be a bug in sym53c8xx_2.
> I don't know how old these problems are because earlier the SCSI
> subsystem has used the retries parameter and not even "thought" about
> retrying the tape commands.
>
I tried 2.6.14-rc1 (changed st.c to allow 5 read retries). This did
not show any errors (no retries were done).
--
Kai
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH 4/5] convert st to use scsi_execte_async
2005-09-17 16:25 ` Mike Christie
@ 2005-09-18 12:01 ` Kai Makisara
2005-09-18 15:03 ` Mike Christie
0 siblings, 1 reply; 27+ messages in thread
From: Kai Makisara @ 2005-09-18 12:01 UTC (permalink / raw)
To: Mike Christie; +Cc: linux-scsi
On Sat, 17 Sep 2005, Mike Christie wrote:
> On Sat, 2005-09-17 at 10:55 -0500, Mike Christie wrote:
> > Mike Christie wrote:
> > > Kai Makisara wrote:
> > >
> > >>
> > >> I think I may have found the problem: scsi_execute_async does not use
> > >> the parameter retries (the same applies to scsi_execute btw). This
> > >> leads to retrying the reads at filemark and this is not correct. I
> > >> added the hack below to scsi_execute_async and after this the simple
> > >> tests succeed.
> > >
> > >
> > > ah ok. I think when we prep the command we need to copy the retries from
> > > the command. So in st.c st_init_command callout we need to copy that
> > > value (we are just copying the timeout today).
> >
> > oh I guess there is not retries count on the request like there is for
> > timeout :)
> >
> > But it looks like retried is always 0. I guess st's init_command could
> > just do
> >
> > SCpnt->allowed = 0;
This does not work because st_init_command() is not called (called only
with SG_IO ioctls).
>
>
> Or how about this patch it was made against my updated patches here:
> http://www.cs.wisc.edu/~michaelc/block/use-sg/v6/
>
> It is the 06-add-retry-field.patch patch. All it does it add a retries
> count onto the request so it can be passed around like the timeout
> value.
>
[patch cut]
This looks like a clean approach because it allows passing the retries
parameter of scsi_execute_* to the request processing.
I tested this but it does not quite work: there are now two retries in the
same test that previously gave six retries. Unfortunately I don't just now
have time to study the problem further.
--
Kai
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH 4/5] convert st to use scsi_execte_async
2005-09-18 12:01 ` Kai Makisara
@ 2005-09-18 15:03 ` Mike Christie
2005-09-18 15:17 ` Mike Christie
0 siblings, 1 reply; 27+ messages in thread
From: Mike Christie @ 2005-09-18 15:03 UTC (permalink / raw)
To: Kai Makisara; +Cc: linux-scsi
Kai Makisara wrote:
> On Sat, 17 Sep 2005, Mike Christie wrote:
>
>
>>On Sat, 2005-09-17 at 10:55 -0500, Mike Christie wrote:
>>
>>>Mike Christie wrote:
>>>
>>>>Kai Makisara wrote:
>>>>
>>>>
>>>>>I think I may have found the problem: scsi_execute_async does not use
>>>>>the parameter retries (the same applies to scsi_execute btw). This
>>>>>leads to retrying the reads at filemark and this is not correct. I
>>>>>added the hack below to scsi_execute_async and after this the simple
>>>>>tests succeed.
>>>>
>>>>
>>>>ah ok. I think when we prep the command we need to copy the retries from
>>>>the command. So in st.c st_init_command callout we need to copy that
>>>>value (we are just copying the timeout today).
>>>
>>>oh I guess there is not retries count on the request like there is for
>>>timeout :)
>>>
>>>But it looks like retried is always 0. I guess st's init_command could
>>>just do
>>>
>>>SCpnt->allowed = 0;
>
>
> This does not work because st_init_command() is not called (called only
> with SG_IO ioctls).
The scsi_execute* functions actually send all commands down as
REQ_BLOCK_PC commands like SG_IO so we are always going through the
st_init_commad().
>
>
>>
>>Or how about this patch it was made against my updated patches here:
>>http://www.cs.wisc.edu/~michaelc/block/use-sg/v6/
>>
>>It is the 06-add-retry-field.patch patch. All it does it add a retries
>>count onto the request so it can be passed around like the timeout
>>value.
>>
>
> [patch cut]
>
> This looks like a clean approach because it allows passing the retries
> parameter of scsi_execute_* to the request processing.
>
> I tested this but it does not quite work: there are now two retries in the
> same test that previously gave six retries. Unfortunately I don't just now
> have time to study the problem further.
>
ok I will try to look into it.
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH 4/5] convert st to use scsi_execte_async
2005-09-18 15:03 ` Mike Christie
@ 2005-09-18 15:17 ` Mike Christie
2005-09-18 17:40 ` Kai Makisara
0 siblings, 1 reply; 27+ messages in thread
From: Mike Christie @ 2005-09-18 15:17 UTC (permalink / raw)
To: Kai Makisara; +Cc: linux-scsi
Mike Christie wrote:
>> I tested this but it does not quite work: there are now two retries in
>> the same test that previously gave six retries. Unfortunately I don't
>> just now have time to study the problem further.
>>
>
> ok I will try to look into it.
>
Oh I bet it is from the change that James B noticed. With scsi_do_req()
we passed in the sr_done function, but with scsi_execute* we pass in the
requests end_io function. So with scsi_do_req, when we complete a
command in scsi_finish_command() (where it calls cmd->done) we would
call the done function passed into scsi_do_req(), but with scsi_execute*
we call scsi_io_completion.
Maybe it could be solved by having st implement a cmd->done function
like sd.c that gets set in the init_command callout. Since all the
commands will be coming down as REQ_BLOCK_PC commands, st can set the
scsi_cmnd->done function and decide what to do there. Or maybe some of
the scsi_io_completion logic needs to be fixed. What was the command
that is causing the problems?
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH 4/5] convert st to use scsi_execte_async
2005-09-18 17:40 ` Kai Makisara
@ 2005-09-18 15:46 ` Mike Christie
2005-09-18 16:13 ` Mike Christie
2005-09-18 16:08 ` Mike Christie
2005-09-18 16:36 ` Mike Christie
2 siblings, 1 reply; 27+ messages in thread
From: Mike Christie @ 2005-09-18 15:46 UTC (permalink / raw)
To: Kai Makisara; +Cc: linux-scsi
Kai Makisara wrote:
>
> init_command() is _not_ called. I added a printk to it and it did not
> print anything. If you look at scsi_prep_fn(), init_command() is called
> only if req->rq_disk is set. scsi_execute_async() does not set it.
oh yeah, sorry. I meant to add that back but would have had to add
another argument to the scsi_execute* functions since I could not figure
out how to go from scsi_device or request_queue to disk.
>
> Setting command parameters through the separate init_command() function
> seems like a horrible idea. You have to do hacks to pass the information.
This is how sd works :(
> The command triggering these problems is a 6-byte read of 10240 bytes. It
> should not a this point return anything but finish with some sense data.
>
> I am debugging the problem but it is going slowly because the system disk
> is a SCSI disk. I have to be careful with the debugging output and changes
> outside st.c require a reboot ;-) So far I have found out that
> scsi_check_sense() is called correctly and returns SUCCESS as it should.
> What happens after that is a mystery.
>
ok
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH 4/5] convert st to use scsi_execte_async
2005-09-18 17:40 ` Kai Makisara
2005-09-18 15:46 ` Mike Christie
@ 2005-09-18 16:08 ` Mike Christie
2005-09-18 16:36 ` Mike Christie
2 siblings, 0 replies; 27+ messages in thread
From: Mike Christie @ 2005-09-18 16:08 UTC (permalink / raw)
To: Kai Makisara; +Cc: linux-scsi
Kai Makisara wrote:
> The command triggering these problems is a 6-byte read of 10240 bytes. It
> should not a this point return anything but finish with some sense data.
>
> I am debugging the problem but it is going slowly because the system disk
> is a SCSI disk. I have to be careful with the debugging output and changes
> outside st.c require a reboot ;-) So far I have found out that
> scsi_check_sense() is called correctly and returns SUCCESS as it should.
> What happens after that is a mystery.
>
If we sent the command as multiple segments then since scsi_generic_done
just does this
scsi_io_completion(cmd, cmd->result == 0 ? cmd->bufflen : 0, 0);
we could up gettting here in scsi_io_completion() (or depding on the
sense we could get retired further up in that function).
/*
* Mark a single buffer as not uptodate. Queue the remainder.
* We sometimes get this cruft in the event that a medium error
* isn't properly reported.
*/
block_bytes = req->hard_cur_sectors << 9;
if (!block_bytes)
block_bytes = req->data_len;
scsi_end_request(cmd, 0, block_bytes, 1);
and if this command has multiple segments we only partially complete the
command and the rest gets retried.
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH 4/5] convert st to use scsi_execte_async
2005-09-18 15:46 ` Mike Christie
@ 2005-09-18 16:13 ` Mike Christie
0 siblings, 0 replies; 27+ messages in thread
From: Mike Christie @ 2005-09-18 16:13 UTC (permalink / raw)
To: Mike Christie; +Cc: Kai Makisara, linux-scsi
Mike Christie wrote:
> Kai Makisara wrote:
>
>>
>> init_command() is _not_ called. I added a printk to it and it did not
>> print anything. If you look at scsi_prep_fn(), init_command() is
>> called only if req->rq_disk is set. scsi_execute_async() does not set it.
>
>
> oh yeah, sorry. I meant to add that back but would have had to add
> another argument to the scsi_execute* functions since I could not figure
> out how to go from scsi_device or request_queue to disk.
>
>>
>> Setting command parameters through the separate init_command()
>> function seems like a horrible idea. You have to do hacks to pass the
>> information.
>
>
> This is how sd works :(
>
But I guess if every ULD has to add a done() then scsi-ml could set it
if we just add a complete_command on the scsi_driver.
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH 4/5] convert st to use scsi_execte_async
2005-09-18 17:40 ` Kai Makisara
2005-09-18 15:46 ` Mike Christie
2005-09-18 16:08 ` Mike Christie
@ 2005-09-18 16:36 ` Mike Christie
2005-09-18 16:38 ` Mike Christie
2 siblings, 1 reply; 27+ messages in thread
From: Mike Christie @ 2005-09-18 16:36 UTC (permalink / raw)
To: Kai Makisara; +Cc: linux-scsi
On Sun, 2005-09-18 at 20:40 +0300, Kai Makisara wrote:
> The command triggering these problems is a 6-byte read of 10240 bytes. It
> should not a this point return anything but finish with some sense data.
>
> I am debugging the problem but it is going slowly because the system disk
> is a SCSI disk. I have to be careful with the debugging output and changes
> outside st.c require a reboot ;-) So far I have found out that
> scsi_check_sense() is called correctly and returns SUCCESS as it should.
> What happens after that is a mystery.
>
I think something like this would give us something like the old
behavior back. This way we do not let scsi_io_completion further process
the error.
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1126,7 +1126,7 @@ static int scsi_issue_flush_fn(request_q
static void scsi_generic_done(struct scsi_cmnd *cmd)
{
BUG_ON(!blk_pc_request(cmd->request));
- scsi_io_completion(cmd, cmd->result == 0 ? cmd->bufflen : 0, 0);
+ scsi_io_completion(cmd, cmd->bufflen, 0);
}
static int scsi_prep_fn(struct request_queue *q, struct request *req)
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH 4/5] convert st to use scsi_execte_async
2005-09-18 16:36 ` Mike Christie
@ 2005-09-18 16:38 ` Mike Christie
2005-09-18 19:03 ` Kai Makisara
0 siblings, 1 reply; 27+ messages in thread
From: Mike Christie @ 2005-09-18 16:38 UTC (permalink / raw)
To: Mike Christie; +Cc: Kai Makisara, linux-scsi
Mike Christie wrote:
> On Sun, 2005-09-18 at 20:40 +0300, Kai Makisara wrote:
>
>>The command triggering these problems is a 6-byte read of 10240 bytes. It
>>should not a this point return anything but finish with some sense data.
>>
>>I am debugging the problem but it is going slowly because the system disk
>>is a SCSI disk. I have to be careful with the debugging output and changes
>>outside st.c require a reboot ;-) So far I have found out that
>>scsi_check_sense() is called correctly and returns SUCCESS as it should.
>>What happens after that is a mystery.
>>
>
>
> I think something like this would give us something like the old
> behavior back. This way we do not let scsi_io_completion further process
> the error.
>
> diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
> --- a/drivers/scsi/scsi_lib.c
> +++ b/drivers/scsi/scsi_lib.c
> @@ -1126,7 +1126,7 @@ static int scsi_issue_flush_fn(request_q
> static void scsi_generic_done(struct scsi_cmnd *cmd)
> {
> BUG_ON(!blk_pc_request(cmd->request));
> - scsi_io_completion(cmd, cmd->result == 0 ? cmd->bufflen : 0, 0);
> + scsi_io_completion(cmd, cmd->bufflen, 0);
> }
>
> static int scsi_prep_fn(struct request_queue *q, struct request *req)
>
this is over 1-6 of my other patches.
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH 4/5] convert st to use scsi_execte_async
2005-09-18 19:03 ` Kai Makisara
@ 2005-09-18 17:01 ` Mike Christie
2005-09-19 18:39 ` Kai Makisara
0 siblings, 1 reply; 27+ messages in thread
From: Mike Christie @ 2005-09-18 17:01 UTC (permalink / raw)
To: Kai Makisara; +Cc: linux-scsi
Kai Makisara wrote:
>
> This solves the problem. All works properly and if I try some mt command
> after filemark detection, the residual is correctly zero.
ok thanks for having patience with me and testing so much.
>
> This change solving problems also explains what I have seen today. Before
> your message I had found out that all three commands (the one that should
> have been executed and the two retries) were fetched from the queue in the
> normal way. Eh did not have anything to do with them. The command was
> using dio and 10240 bytes needs three pages => two retries.
>
ahh :)
> Fixing passing the retries did not actually have anything to do with this
> exact problem. However, I think that the retries parameter should be
> obeyed if it is in the interface.
>
Yes, I think we need both fixes for the users of scsi_execte and
scsi_execute_req that was added to scsi-rc-fixes. I will make a patch
and send if off.
Thanks again for testing.
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH 4/5] convert st to use scsi_execte_async
2005-09-18 15:17 ` Mike Christie
@ 2005-09-18 17:40 ` Kai Makisara
2005-09-18 15:46 ` Mike Christie
` (2 more replies)
0 siblings, 3 replies; 27+ messages in thread
From: Kai Makisara @ 2005-09-18 17:40 UTC (permalink / raw)
To: Mike Christie; +Cc: linux-scsi
On Sun, 18 Sep 2005, Mike Christie wrote:
> Mike Christie wrote:
> > > I tested this but it does not quite work: there are now two retries
> > > in the same test that previously gave six retries. Unfortunately I
> > > don't just now have time to study the problem further.
> > >
> >
> > ok I will try to look into it.
> >
>
> Oh I bet it is from the change that James B noticed. With scsi_do_req() we
> passed in the sr_done function, but with scsi_execute* we pass in the requests
> end_io function. So with scsi_do_req, when we complete a command in
> scsi_finish_command() (where it calls cmd->done) we would call the done
> function passed into scsi_do_req(), but with scsi_execute* we call
> scsi_io_completion.
>
Possible.
> Maybe it could be solved by having st implement a cmd->done function like sd.c
> that gets set in the init_command callout. Since all the commands will be
> coming down as REQ_BLOCK_PC commands, st can set the scsi_cmnd->done function
> and decide what to do there. Or maybe some of the scsi_io_completion logic
> needs to be fixed. What was the command that is causing the problems?
init_command() is _not_ called. I added a printk to it and it did not
print anything. If you look at scsi_prep_fn(), init_command() is called
only if req->rq_disk is set. scsi_execute_async() does not set it.
Setting command parameters through the separate init_command() function
seems like a horrible idea. You have to do hacks to pass the information.
The command triggering these problems is a 6-byte read of 10240 bytes. It
should not a this point return anything but finish with some sense data.
I am debugging the problem but it is going slowly because the system disk
is a SCSI disk. I have to be careful with the debugging output and changes
outside st.c require a reboot ;-) So far I have found out that
scsi_check_sense() is called correctly and returns SUCCESS as it should.
What happens after that is a mystery.
--
Kai
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH 4/5] convert st to use scsi_execte_async
2005-09-18 16:38 ` Mike Christie
@ 2005-09-18 19:03 ` Kai Makisara
2005-09-18 17:01 ` Mike Christie
0 siblings, 1 reply; 27+ messages in thread
From: Kai Makisara @ 2005-09-18 19:03 UTC (permalink / raw)
To: Mike Christie; +Cc: linux-scsi
On Sun, 18 Sep 2005, Mike Christie wrote:
> Mike Christie wrote:
> > On Sun, 2005-09-18 at 20:40 +0300, Kai Makisara wrote:
> >
> > > The command triggering these problems is a 6-byte read of 10240 bytes.
> > > It should not a this point return anything but finish with some sense
> > > data.
> > >
> > > I am debugging the problem but it is going slowly because the system
> > > disk is a SCSI disk. I have to be careful with the debugging output
> > > and changes outside st.c require a reboot ;-) So far I have found out
> > > that scsi_check_sense() is called correctly and returns SUCCESS as it
> > > should. What happens after that is a mystery.
> > >
> >
> >
> > I think something like this would give us something like the old
> > behavior back. This way we do not let scsi_io_completion further process
> > the error.
> >
> > diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
> > --- a/drivers/scsi/scsi_lib.c
> > +++ b/drivers/scsi/scsi_lib.c
> > @@ -1126,7 +1126,7 @@ static int scsi_issue_flush_fn(request_q
> > static void scsi_generic_done(struct scsi_cmnd *cmd)
> > {
> > BUG_ON(!blk_pc_request(cmd->request));
> > - scsi_io_completion(cmd, cmd->result == 0 ? cmd->bufflen : 0, 0);
> > + scsi_io_completion(cmd, cmd->bufflen, 0);
> > }
> >
> > static int scsi_prep_fn(struct request_queue *q, struct request *req)
> >
>
> this is over 1-6 of my other patches.
>
This solves the problem. All works properly and if I try some mt command
after filemark detection, the residual is correctly zero.
This change solving problems also explains what I have seen today. Before
your message I had found out that all three commands (the one that should
have been executed and the two retries) were fetched from the queue in the
normal way. Eh did not have anything to do with them. The command was
using dio and 10240 bytes needs three pages => two retries.
Fixing passing the retries did not actually have anything to do with this
exact problem. However, I think that the retries parameter should be
obeyed if it is in the interface.
--
Kai
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH 4/5] convert st to use scsi_execte_async
2005-09-18 17:01 ` Mike Christie
@ 2005-09-19 18:39 ` Kai Makisara
2005-09-19 19:22 ` Mike Christie
0 siblings, 1 reply; 27+ messages in thread
From: Kai Makisara @ 2005-09-19 18:39 UTC (permalink / raw)
To: Mike Christie; +Cc: linux-scsi
On Sun, 18 Sep 2005, Mike Christie wrote:
> Kai Makisara wrote:
> >
> > This solves the problem. All works properly and if I try some mt command
> > after filemark detection, the residual is correctly zero.
>
> ok thanks for having patience with me and testing so much.
>
Thanks for making the code. But we still have some problems left.
I have tested writing big blocks. The writing fails if the block
size exceeds 512 kB. Below are the details I have found out so far.
The system uses sym53c8xx_2. It does not enable clustering by default but
I changed this. The driver supports up to 96 s/g segments.
I have added some printks to see where the process hangs. The following is
output related to the write command when the command
dd if=tdata of=/dev/nst0 bs=512k count=1
is used (this succeeds):
Sep 19 20:27:48 box kernel: st: about to send cmd 0x0a segs 1
Sep 19 20:27:48 box kernel: queuecommand: cmd[0]=0x0a
Sep 19 20:27:48 box kernel: sym_scatter: use_sg=8
Sep 19 20:27:48 box kernel: st: async ccmd sent.
Sep 19 20:27:48 box kernel: st: cmd=0x0a result=0x0 resid=0 sense[0]=0x00
sense[2]=0x00
st sends the command using a buffer of one segment. The command is passed
to the HBA driver and it sees 8 segments. Clustering seems to work
properly (the maximum segment size is set to 65536 bytes by default).
Here is what is seen when the block size is increased to 513 kB:
dd if=tdata of=/dev/nst0 bs=513k count=1
The dd process hangs in device wait. It turns out that
scsi_execute_async() fails. This is an async write and the process later
waits for the failed write to finish. The patch at the end of this message
fixes this st bug (don't worry about the line shifts, I have some
debugging printks in this driver).
The real problem is that scsi_execute_async() fails. The 513 kB request is
129 pages. Could the reason be related to these defaults in include/linux?
#define MAX_PHYS_SEGMENTS 128
#define MAX_HW_SEGMENTS 128
--
Kai
-------------------------------8<------------------------------------------
--- linux-2.6.14-rc1-blk3/drivers/scsi/st.c.org 2005-09-16 20:37:19.000000000 +0300
+++ linux-2.6.14-rc1-blk3/drivers/scsi/st.c 2005-09-19 21:21:02.000000000 +0300
@@ -507,6 +510,7 @@
memcpy(SRpnt->cmd, cmd, sizeof(SRpnt->cmd));
STp->buffer->cmdstat.have_sense = 0;
+ STp->buffer->syscall_result = 0;
if (scsi_execute_async(STp->device, cmd, direction,
&((STp->buffer)->sg[0]), 0, (STp->buffer)->sg_segs,
@@ -1623,7 +1627,7 @@
retval = STbp->syscall_result;
goto out;
}
- if (async_write) {
+ if (async_write && !STbp->syscall_result) {
STbp->writing = transfer;
STp->dirty = !(STbp->writing ==
STbp->buffer_bytes);
@@ -1697,7 +1701,7 @@
} else {
count += do_count;
STps->drv_block = (-1); /* Too cautious? */
- retval = (-EIO);
+ retval = STbp->syscall_result;
}
}
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH 4/5] convert st to use scsi_execte_async
2005-09-19 18:39 ` Kai Makisara
@ 2005-09-19 19:22 ` Mike Christie
2005-09-20 19:23 ` Kai Makisara
0 siblings, 1 reply; 27+ messages in thread
From: Mike Christie @ 2005-09-19 19:22 UTC (permalink / raw)
To: Kai Makisara; +Cc: linux-scsi
Kai Makisara wrote:
>
> st sends the command using a buffer of one segment. The command is passed
> to the HBA driver and it sees 8 segments. Clustering seems to work
> properly (the maximum segment size is set to 65536 bytes by default).
>
> Here is what is seen when the block size is increased to 513 kB:
> dd if=tdata of=/dev/nst0 bs=513k count=1
>
> The dd process hangs in device wait. It turns out that
> scsi_execute_async() fails. This is an async write and the process later
> waits for the failed write to finish. The patch at the end of this message
> fixes this st bug (don't worry about the line shifts, I have some
> debugging printks in this driver).
>
> The real problem is that scsi_execute_async() fails. The 513 kB request is
> 129 pages. Could the reason be related to these defaults in include/linux?
> #define MAX_PHYS_SEGMENTS 128
> #define MAX_HW_SEGMENTS 128
>
Yeah I think this is due to the MAX_PHYS_SEGMENTS limit. The hw segments
is set by scsi_lib in scsi_alloc_queue and is the sg_tablesize value on
the host. Right now all I do is return a error when someone violates one
of the limits, but I think the right thing to do is have the ULDs take
some of these values into account when they build their lists. However
if I do that we will not be able to make large requests since the
MAX_PHYS_SEGMENTS/SCSI_MAX_PHYS_SEGMENTS will limit them. Umm let me
rethink.
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH 4/5] convert st to use scsi_execte_async
2005-09-19 19:22 ` Mike Christie
@ 2005-09-20 19:23 ` Kai Makisara
2005-09-20 19:55 ` Mike Christie
` (2 more replies)
0 siblings, 3 replies; 27+ messages in thread
From: Kai Makisara @ 2005-09-20 19:23 UTC (permalink / raw)
To: Mike Christie; +Cc: linux-scsi
On Mon, 19 Sep 2005, Mike Christie wrote:
> Kai Makisara wrote:
> >
> > st sends the command using a buffer of one segment. The command is passed
> > to the HBA driver and it sees 8 segments. Clustering seems to work
> > properly (the maximum segment size is set to 65536 bytes by default).
> > Here is what is seen when the block size is increased to 513 kB:
> > dd if=tdata of=/dev/nst0 bs=513k count=1
> >
> > The dd process hangs in device wait. It turns out that
> > scsi_execute_async() fails. This is an async write and the process later
> > waits for the failed write to finish. The patch at the end of this
> > message fixes this st bug (don't worry about the line shifts, I have some
> > debugging printks in this driver).
> >
> > The real problem is that scsi_execute_async() fails. The 513 kB request
> > is 129 pages. Could the reason be related to these defaults in
> > include/linux?
> > # define MAX_PHYS_SEGMENTS 128
> > # define MAX_HW_SEGMENTS 128
> >
>
> Yeah I think this is due to the MAX_PHYS_SEGMENTS limit. The hw segments is
> set by scsi_lib in scsi_alloc_queue and is the sg_tablesize value on the host.
> Right now all I do is return a error when someone violates one of the limits,
> but I think the right thing to do is have the ULDs take some of these values
> into account when they build their lists. However if I do that we will not be
> able to make large requests since the MAX_PHYS_SEGMENTS/SCSI_MAX_PHYS_SEGMENTS
> will limit them. Umm let me rethink.
>
I have done some additional debugging. Submitting the large write fails in
bio_map_pages() called from scsi_req_map_sg(). The first reason is not
phys_segments or hw_segments limit but max_sectors. The sym53c8xx_2 uses
the default SCSI_DEFAULT_MAX_SECTORS which is 1024 (512 kB).
I increased max_sectors in sym_glue.c to 10240 in sym53c8xx and now I can
write blocks up to 1024 kB. Then bio_map_page() fails again but this time
in bio_alloc(). This is because st can allocate chunks of more than 1 MB
and this is too much for one bio. I added the code in the patch at the end
of this message to limit the chunk size and this allowed writing of blocks
up to 5120 kB.
If I try 5121 kB, write fails as is expected but not completely nicely.
Here are some test printks:
scsi_req_map_sg: q->back_merge failed, i=10
st 1:0:5:0: extraneous data discarded.
st 1:0:5:0: COMMAND FAILED (87 0 1).
sym: cmd: 0x0a 0x00 0x50 0x04 0x00 0x00
st: cmd=0x0a result=0x70000 resid=0 sense[0]=0x00 sense[2]=0x00
scsi_req_map_sg fails as it should but still a bogus SCSI command is sent.
I think the reason for this is simple but I don't want to delay the good
news by trying to debug this.
So, now st can write as large blocks as it should. Good work, Mike!
What I don't quite like is that being able to do this requires setting
SCSI adapter parameters (use_clustering, max_sectors) to values that are
not used by most drivers today. Changing is in most cases trivial but this
has to be done. Otherwise the users needing large block sizes feel that
these enhancements are a regression.
--
Kai
--------------------------------8<--------------------------------------
--- linux-2.6.14-rc1-blk3/drivers/scsi/st.c.org 2005-09-16 20:37:19.000000000 +0300
+++ linux-2.6.14-rc1-blk3/drivers/scsi/st.c 2005-09-20 21:52:22.000000000 +0300
@@ -3623,7 +3627,7 @@
priority = GFP_KERNEL | __GFP_NOWARN;
if (need_dma)
priority |= GFP_DMA;
- for (b_size = PAGE_SIZE, order=0;
+ for (b_size = PAGE_SIZE, order=0; order <= 6 &&
b_size < new_size - STbuffer->buffer_size;
order++, b_size *= 2)
; /* empty */
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH 4/5] convert st to use scsi_execte_async
2005-09-20 19:23 ` Kai Makisara
@ 2005-09-20 19:55 ` Mike Christie
2005-09-20 20:20 ` James Bottomley
2005-09-22 20:12 ` Kai Makisara
2 siblings, 0 replies; 27+ messages in thread
From: Mike Christie @ 2005-09-20 19:55 UTC (permalink / raw)
To: Kai Makisara; +Cc: linux-scsi
Kai Makisara wrote:
> On Mon, 19 Sep 2005, Mike Christie wrote:
>
>
>>Kai Makisara wrote:
>>
>>>st sends the command using a buffer of one segment. The command is passed
>>>to the HBA driver and it sees 8 segments. Clustering seems to work
>>>properly (the maximum segment size is set to 65536 bytes by default).
>>>Here is what is seen when the block size is increased to 513 kB:
>>>dd if=tdata of=/dev/nst0 bs=513k count=1
>>>
>>>The dd process hangs in device wait. It turns out that
>>>scsi_execute_async() fails. This is an async write and the process later
>>>waits for the failed write to finish. The patch at the end of this
>>>message fixes this st bug (don't worry about the line shifts, I have some
>>>debugging printks in this driver).
>>>
>>>The real problem is that scsi_execute_async() fails. The 513 kB request
>>>is 129 pages. Could the reason be related to these defaults in
>>>include/linux?
>>># define MAX_PHYS_SEGMENTS 128
>>># define MAX_HW_SEGMENTS 128
>>>
>>
>>Yeah I think this is due to the MAX_PHYS_SEGMENTS limit. The hw segments is
>>set by scsi_lib in scsi_alloc_queue and is the sg_tablesize value on the host.
>>Right now all I do is return a error when someone violates one of the limits,
>>but I think the right thing to do is have the ULDs take some of these values
>>into account when they build their lists. However if I do that we will not be
>>able to make large requests since the MAX_PHYS_SEGMENTS/SCSI_MAX_PHYS_SEGMENTS
>>will limit them. Umm let me rethink.
>>
>
> I have done some additional debugging. Submitting the large write fails in
> bio_map_pages() called from scsi_req_map_sg(). The first reason is not
> phys_segments or hw_segments limit but max_sectors. The sym53c8xx_2 uses
> the default SCSI_DEFAULT_MAX_SECTORS which is 1024 (512 kB).
>
> I increased max_sectors in sym_glue.c to 10240 in sym53c8xx and now I can
> write blocks up to 1024 kB. Then bio_map_page() fails again but this time
> in bio_alloc(). This is because st can allocate chunks of more than 1 MB
> and this is too much for one bio. I added the code in the patch at the end
> of this message to limit the chunk size and this allowed writing of blocks
> up to 5120 kB.
>
> If I try 5121 kB, write fails as is expected but not completely nicely.
> Here are some test printks:
>
> scsi_req_map_sg: q->back_merge failed, i=10
> st 1:0:5:0: extraneous data discarded.
> st 1:0:5:0: COMMAND FAILED (87 0 1).
> sym: cmd: 0x0a 0x00 0x50 0x04 0x00 0x00
> st: cmd=0x0a result=0x70000 resid=0 sense[0]=0x00 sense[2]=0x00
>
> scsi_req_map_sg fails as it should but still a bogus SCSI command is sent.
> I think the reason for this is simple but I don't want to delay the good
> news by trying to debug this.
>
> So, now st can write as large blocks as it should. Good work, Mike!
>
> What I don't quite like is that being able to do this requires setting
> SCSI adapter parameters (use_clustering, max_sectors) to values that are
> not used by most drivers today. Changing is in most cases trivial but this
> has to be done. Otherwise the users needing large block sizes feel that
> these enhancements are a regression.
Yes, I agree. I will try to see what I can do to make those functions
handle some things for the ULDs. I made some bad assumtions, so i did
not handle some failures gracefully. I think it should be possible to
start new bios when we hit a queue sector limit then ignore the
max_sectors when creating requests so that you do not have to manually
adjust the queues limits for example (this will be useful when sectors
do not make sense for commands like with SG too). This was one of the
reasons Jens wanted those functions in the scsi layer after all.
I think we are screwed if someone does not support clustering and they
hit the phys segments limits though. This is due to the scsi sg pools
that are preallocated. Without my patches you would have not hit the
phys segment limits becuase st and sg were allocating the scatterlist
that get sent.
An alternative would be to just set the scatterlist getting passed to
scsi_execute_async to the requests data field then add some bits so that
that scatterlist gets used later instead of blk_rq_map_sg making a new
one. This would completely remove the need for scsi_req_map_sg and allow
SG and ST to avoid all those limits like before. I think some may
consider this to be hack on the block layer though?
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH 4/5] convert st to use scsi_execte_async
2005-09-20 19:23 ` Kai Makisara
2005-09-20 19:55 ` Mike Christie
@ 2005-09-20 20:20 ` James Bottomley
2005-09-20 21:17 ` Kai Makisara
2005-09-22 20:12 ` Kai Makisara
2 siblings, 1 reply; 27+ messages in thread
From: James Bottomley @ 2005-09-20 20:20 UTC (permalink / raw)
To: Kai Makisara; +Cc: Mike Christie, SCSI Mailing List
On Tue, 2005-09-20 at 22:23 +0300, Kai Makisara wrote:
> > Yeah I think this is due to the MAX_PHYS_SEGMENTS limit. The hw segments is
> > set by scsi_lib in scsi_alloc_queue and is the sg_tablesize value on the host.
> > Right now all I do is return a error when someone violates one of the limits,
> > but I think the right thing to do is have the ULDs take some of these values
> > into account when they build their lists. However if I do that we will not be
> > able to make large requests since the MAX_PHYS_SEGMENTS/SCSI_MAX_PHYS_SEGMENTS
> > will limit them. Umm let me rethink.
There's already the code in SCSI to increase this to 256 (and
potentially beyond) if it becomes necessary.
> I have done some additional debugging. Submitting the large write fails in
> bio_map_pages() called from scsi_req_map_sg(). The first reason is not
> phys_segments or hw_segments limit but max_sectors. The sym53c8xx_2 uses
> the default SCSI_DEFAULT_MAX_SECTORS which is 1024 (512 kB).
>
> I increased max_sectors in sym_glue.c to 10240 in sym53c8xx and now I can
> write blocks up to 1024 kB. Then bio_map_page() fails again but this time
> in bio_alloc(). This is because st can allocate chunks of more than 1 MB
> and this is too much for one bio. I added the code in the patch at the end
> of this message to limit the chunk size and this allowed writing of blocks
> up to 5120 kB.
>
> If I try 5121 kB, write fails as is expected but not completely nicely.
> Here are some test printks:
>
> scsi_req_map_sg: q->back_merge failed, i=10
> st 1:0:5:0: extraneous data discarded.
> st 1:0:5:0: COMMAND FAILED (87 0 1).
> sym: cmd: 0x0a 0x00 0x50 0x04 0x00 0x00
> st: cmd=0x0a result=0x70000 resid=0 sense[0]=0x00 sense[2]=0x00
>
> scsi_req_map_sg fails as it should but still a bogus SCSI command is sent.
> I think the reason for this is simple but I don't want to delay the good
> news by trying to debug this.
>
> So, now st can write as large blocks as it should. Good work, Mike!
>
> What I don't quite like is that being able to do this requires setting
> SCSI adapter parameters (use_clustering, max_sectors) to values that are
> not used by most drivers today. Changing is in most cases trivial but this
> has to be done. Otherwise the users needing large block sizes feel that
> these enhancements are a regression.
OK, but this is troubling either way: Previously the LLD was simply
lying about its ability to support more sectors and clustering and you
called its bluff by sending such commands. Now the bio layer is
actually enforcing these limits.
The fix has got to be to update the LLDs so they're reporting their
capabilities truthfully.
James
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH 4/5] convert st to use scsi_execte_async
2005-09-20 20:20 ` James Bottomley
@ 2005-09-20 21:17 ` Kai Makisara
2005-09-20 22:39 ` Douglas Gilbert
0 siblings, 1 reply; 27+ messages in thread
From: Kai Makisara @ 2005-09-20 21:17 UTC (permalink / raw)
To: James Bottomley; +Cc: Mike Christie, SCSI Mailing List
On Tue, 20 Sep 2005, James Bottomley wrote:
> On Tue, 2005-09-20 at 22:23 +0300, Kai Makisara wrote:
> > > Yeah I think this is due to the MAX_PHYS_SEGMENTS limit. The hw segments is
> > > set by scsi_lib in scsi_alloc_queue and is the sg_tablesize value on the host.
> > > Right now all I do is return a error when someone violates one of the limits,
> > > but I think the right thing to do is have the ULDs take some of these values
> > > into account when they build their lists. However if I do that we will not be
> > > able to make large requests since the MAX_PHYS_SEGMENTS/SCSI_MAX_PHYS_SEGMENTS
> > > will limit them. Umm let me rethink.
>
> There's already the code in SCSI to increase this to 256 (and
> potentially beyond) if it becomes necessary.
>
The current default segment size limit is 64 kB. This multiplied by 128
gives 8 MB. This should be large enough for all tape usage I know.
...
> > What I don't quite like is that being able to do this requires setting
> > SCSI adapter parameters (use_clustering, max_sectors) to values that are
> > not used by most drivers today. Changing is in most cases trivial but this
> > has to be done. Otherwise the users needing large block sizes feel that
> > these enhancements are a regression.
>
> OK, but this is troubling either way: Previously the LLD was simply
> lying about its ability to support more sectors and clustering and you
> called its bluff by sending such commands. Now the bio layer is
> actually enforcing these limits.
>
I agree that it is good to have a common layer to enforce the limits.
Currently st has had to check the limits (it checks some and for others
relies on that all decent SCSI HBAs support what st tries to do). Based
on my earlier experience, I just doubt a little that the limits will be
updated but I hope to be proven wrong here :-)
> The fix has got to be to update the LLDs so they're reporting their
> capabilities truthfully.
>
If this happens, it is very good. (I hope this includes the alignment
constraints, too :-)
--
Kai
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH 4/5] convert st to use scsi_execte_async
2005-09-20 21:17 ` Kai Makisara
@ 2005-09-20 22:39 ` Douglas Gilbert
0 siblings, 0 replies; 27+ messages in thread
From: Douglas Gilbert @ 2005-09-20 22:39 UTC (permalink / raw)
To: Kai Makisara; +Cc: James Bottomley, Mike Christie, SCSI Mailing List
Kai Makisara wrote:
> On Tue, 20 Sep 2005, James Bottomley wrote:
>
>
>>On Tue, 2005-09-20 at 22:23 +0300, Kai Makisara wrote:
>>
>>>>Yeah I think this is due to the MAX_PHYS_SEGMENTS limit. The hw segments is
>>>>set by scsi_lib in scsi_alloc_queue and is the sg_tablesize value on the host.
>>>>Right now all I do is return a error when someone violates one of the limits,
>>>>but I think the right thing to do is have the ULDs take some of these values
>>>>into account when they build their lists. However if I do that we will not be
>>>>able to make large requests since the MAX_PHYS_SEGMENTS/SCSI_MAX_PHYS_SEGMENTS
>>>>will limit them. Umm let me rethink.
>>
>>There's already the code in SCSI to increase this to 256 (and
>>potentially beyond) if it becomes necessary.
>>
>
> The current default segment size limit is 64 kB. This multiplied by 128
> gives 8 MB. This should be large enough for all tape usage I know.
>
> ...
>
>>>What I don't quite like is that being able to do this requires setting
>>>SCSI adapter parameters (use_clustering, max_sectors) to values that are
>>>not used by most drivers today. Changing is in most cases trivial but this
>>>has to be done. Otherwise the users needing large block sizes feel that
>>>these enhancements are a regression.
>>
>>OK, but this is troubling either way: Previously the LLD was simply
>>lying about its ability to support more sectors and clustering and you
>>called its bluff by sending such commands. Now the bio layer is
>>actually enforcing these limits.
>>
>
> I agree that it is good to have a common layer to enforce the limits.
> Currently st has had to check the limits (it checks some and for others
> relies on that all decent SCSI HBAs support what st tries to do). Based
> on my earlier experience, I just doubt a little that the limits will be
> updated but I hope to be proven wrong here :-)
>
>
>>The fix has got to be to update the LLDs so they're reporting their
>>capabilities truthfully.
>>
>
> If this happens, it is very good. (I hope this includes the alignment
> constraints, too :-)
... or perhaps the LLD could complain (rather than
the block layer) if it gets a scatter gather list
that it can't handle, since that may be dynamic
(e.g. depend on other requests the LLD is processing
at the time). Both the st and sg drivers could produce
a reasonable error to their application clients in this
case.
The SCSI device may also have data size limits
(e.g. the Block Limits VPD page in SBC-2). And that
VPD page makes an interesting distinction: between
maximum and optimum transfer size. Also these limits
apply to a disk's block commands (e.g. READ and WRITE)
but not to other data bearing commands (e.g. WRITE
BUFFER for uploading firmware).
Basically if an app is using st to talk to a SCSI
tape device, the kernel block layer and the LLD
should only be heard from if they can't do what was
requested. No second guessing please.
Doug Gilbert
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH 4/5] convert st to use scsi_execte_async
2005-09-20 19:23 ` Kai Makisara
2005-09-20 19:55 ` Mike Christie
2005-09-20 20:20 ` James Bottomley
@ 2005-09-22 20:12 ` Kai Makisara
2005-09-23 3:20 ` Mike Christie
2 siblings, 1 reply; 27+ messages in thread
From: Kai Makisara @ 2005-09-22 20:12 UTC (permalink / raw)
To: Mike Christie; +Cc: linux-scsi
On Tue, 20 Sep 2005, Kai Makisara wrote:
...
> If I try 5121 kB, write fails as is expected but not completely nicely.
> Here are some test printks:
>
> scsi_req_map_sg: q->back_merge failed, i=10
> st 1:0:5:0: extraneous data discarded.
> st 1:0:5:0: COMMAND FAILED (87 0 1).
> sym: cmd: 0x0a 0x00 0x50 0x04 0x00 0x00
> st: cmd=0x0a result=0x70000 resid=0 sense[0]=0x00 sense[2]=0x00
>
> scsi_req_map_sg fails as it should but still a bogus SCSI command is sent.
> I think the reason for this is simple but I don't want to delay the good
> news by trying to debug this.
>
The reason was simple: scsi_req_map_sg did not set err when q->back_merge
failed. The patch at the end fixes the problem (I am not sure what would
be the correct error code).
--
Kai
--- linux-2.6.14-rc1-blk3/drivers/scsi/scsi_lib.c.old 2005-09-22 22:57:53.000000000 +0300
+++ linux-2.6.14-rc1-blk3/drivers/scsi/scsi_lib.c 2005-09-22 22:58:14.000000000 +0300
@@ -360,6 +360,7 @@
blk_rq_bio_prep(q, rq, bio);
else if (!q->back_merge_fn(q, rq, bio)) {
bio_endio(bio, bio->bi_size, 0);
+ err = -ENOMEM;
goto free_bios;
} else {
rq->biotail->bi_next = bio;
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH 4/5] convert st to use scsi_execte_async
2005-09-22 20:12 ` Kai Makisara
@ 2005-09-23 3:20 ` Mike Christie
0 siblings, 0 replies; 27+ messages in thread
From: Mike Christie @ 2005-09-23 3:20 UTC (permalink / raw)
To: Kai Makisara; +Cc: linux-scsi
Kai Makisara wrote:
> On Tue, 20 Sep 2005, Kai Makisara wrote:
>
> ...
>
>>If I try 5121 kB, write fails as is expected but not completely nicely.
>>Here are some test printks:
>>
>>scsi_req_map_sg: q->back_merge failed, i=10
>>st 1:0:5:0: extraneous data discarded.
>>st 1:0:5:0: COMMAND FAILED (87 0 1).
>>sym: cmd: 0x0a 0x00 0x50 0x04 0x00 0x00
>>st: cmd=0x0a result=0x70000 resid=0 sense[0]=0x00 sense[2]=0x00
>>
>>scsi_req_map_sg fails as it should but still a bogus SCSI command is sent.
>>I think the reason for this is simple but I don't want to delay the good
>>news by trying to debug this.
>>
>
> The reason was simple: scsi_req_map_sg did not set err when q->back_merge
> failed. The patch at the end fixes the problem (I am not sure what would
> be the correct error code).
yeah :) thanks.
^ permalink raw reply [flat|nested] 27+ messages in thread
end of thread, other threads:[~2005-09-23 3:20 UTC | newest]
Thread overview: 27+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-09-16 4:39 [PATCH 4/5] convert st to use scsi_execte_async Mike Christie
2005-09-17 11:57 ` Kai Makisara
2005-09-17 15:43 ` Mike Christie
2005-09-17 15:55 ` Mike Christie
2005-09-17 16:25 ` Mike Christie
2005-09-18 12:01 ` Kai Makisara
2005-09-18 15:03 ` Mike Christie
2005-09-18 15:17 ` Mike Christie
2005-09-18 17:40 ` Kai Makisara
2005-09-18 15:46 ` Mike Christie
2005-09-18 16:13 ` Mike Christie
2005-09-18 16:08 ` Mike Christie
2005-09-18 16:36 ` Mike Christie
2005-09-18 16:38 ` Mike Christie
2005-09-18 19:03 ` Kai Makisara
2005-09-18 17:01 ` Mike Christie
2005-09-19 18:39 ` Kai Makisara
2005-09-19 19:22 ` Mike Christie
2005-09-20 19:23 ` Kai Makisara
2005-09-20 19:55 ` Mike Christie
2005-09-20 20:20 ` James Bottomley
2005-09-20 21:17 ` Kai Makisara
2005-09-20 22:39 ` Douglas Gilbert
2005-09-22 20:12 ` Kai Makisara
2005-09-23 3:20 ` Mike Christie
2005-09-17 15:57 ` Kai Makisara
2005-09-17 16:48 ` Kai Makisara
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).