linux-scsi.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* sg regression in 2.6.16-rc5
@ 2006-02-28 17:54 Mark Rustad
  2006-02-28 19:53 ` Douglas Gilbert
  0 siblings, 1 reply; 33+ messages in thread
From: Mark Rustad @ 2006-02-28 17:54 UTC (permalink / raw)
  To: linux-scsi; +Cc: Linux Kernel Mailing List

We have encountered some kind of sg regression with kernel 2.6.16-rc5  
relative to 2.6.15. We have a small program that demonstrates the  
failure. On 2.6.15 it produces the output:

Alloced dataptr 0 -> 0xb7d07008
IOS: 0
ios 100

indicating that it did 100 operations successfully. On 2.6.16-rc5, it  
produces the output:

Alloced dataptr 0 -> 0xa7d10008
SG_IO ioctl error 12 Cannot allocate memory
ios 0

indicating that it did 0 operations successfully. This program is  
attempting to do 1MB reads on a SCSI device. We get the failure both  
on an aic79xx parallel SCSI and on aic94xx SAS. With both types of  
devices, it works fine on the 2.6.15 kernel. We have also seen this  
problem on the 2.6.16-rc4 kernel. In all cases we were running on an  
Intel Xeon-based system.

Below is the source for the program that was used to demonstrate this  
problem:

#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <sched.h>
#include <error.h>
#include <errno.h>
#include <string.h>
#include <scsi/sg.h>
#include <scsi/scsi.h>
#include <sys/ioctl.h>
#include <sys/epoll.h>
#include <ctype.h>


void dispsns(unsigned char cdb0, unsigned char *sense_buffer, int len)
{
     int i;
     unsigned int *dwptr;
     printf("sense:  cdb0 : %02hhX",cdb0);
     dwptr = (unsigned int *) &sense_buffer;
     for (i = 0; i < len ; i++) {

         if (!(i % 16)) {
             printf("\n%02hhX", sense_buffer[i]);
             continue;
         }
         if (!(i % 4))
             printf(" ");
         printf("%02hhX", sense_buffer[i]);
     }
     printf(" KEY: %02hhX ", sense_buffer[2] & 0x0f);
     printf("ASC: %02hhX ", sense_buffer[12]);
     printf("ASCQ: %02hhX\n", sense_buffer[13]);
}

/*
     sends the given io to the sg layer if there is only 1 SGL  
element first IO
     will be attmpeted.  Otherwise the SGL is sent
*/
int do_scsi_io(int sg_fd, unsigned char *cdb,int cdblen, sg_iovec_t  
*iovec,
     int dir, int datalen,int sglcount)
{
     unsigned char sense_buffer[32];

     sg_io_hdr_t io_hdr;
     memset(&io_hdr, 0, sizeof(sg_io_hdr_t));

     io_hdr.interface_id = 'S';
     io_hdr.cmd_len = cdblen;
     io_hdr.mx_sb_len = 32;
     io_hdr.dxfer_direction = dir;
     io_hdr.dxfer_len = datalen;
     if (sglcount > 1) {
         io_hdr.dxferp = iovec;
         io_hdr.iovec_count = sglcount;
     } else {
         io_hdr.flags |= SG_FLAG_DIRECT_IO;
         io_hdr.dxferp = iovec[0].iov_base;
         io_hdr.iovec_count = 0;
     }
     io_hdr.cmdp = cdb;
     io_hdr.sbp = sense_buffer;
     io_hdr.timeout = 10000;     /* 10000 millisecs == 10 seconds */
     memset(&sense_buffer, 0, 32);

     if (ioctl(sg_fd, SG_IO, &io_hdr) < 0) {
         printf("SG_IO ioctl error %d %s\n", errno, strerror(errno));
         return -1;
     }
     if ((io_hdr.info & SG_INFO_OK_MASK) == SG_INFO_OK) {
         return datalen - io_hdr.resid;
     } else {
         dispsns(cdb[0], sense_buffer, io_hdr.sb_len_wr);
         return -1;
     }
}

unsigned int IOLEN = 0x100000;
unsigned int SGLCOUNT =  1;
unsigned int LOOPCOUNT = 100;
int main(int argc, char *argv[])
{
     char devpath[80];
     int i;
     int handle;
     int ret;
     unsigned char readCmdBlk[10] = {0x28, 0, 0, 0, 0x00, 0, 0, 0 ,0,  
0};
     unsigned int blkcount;
     sg_iovec_t  iovectable[SGLCOUNT];
     void *dataptr[SGLCOUNT];
     void *dataptr2[SGLCOUNT];

     if (argc < 2) {
         printf("Error: no input parms\n");
         printf("  Usage: iotest /dev/sg<n>\n");
         return -1;
     }

     for (i = 0; i < SGLCOUNT; i++) {
         dataptr[i] = malloc((IOLEN/SGLCOUNT) + 0x2000);
         dataptr2[i] = dataptr[i];
         printf("Alloced dataptr %d -> %p \n", i, dataptr[i]);
     }

     for (i = 0; i < SGLCOUNT; i++)
         if (dataptr[i] == NULL) {
             printf("Unable to alloc memory \n");
             return -1;
         }
     for (i = 0; i < SGLCOUNT; i++) {
         iovectable[i].iov_base = dataptr2[i];
         iovectable[i].iov_len = IOLEN/SGLCOUNT;
     }
     strcpy(devpath, argv[1]);

     handle = open(devpath, O_RDWR);
     if (handle == -1) {
         printf(" Open of %s failed \n",devpath);
         for (i = 0; i < SGLCOUNT; i++)
             free(dataptr[i]);
         return -1;
     }
     blkcount = IOLEN /0x200;
     readCmdBlk[2] = 0; /*lba*/
     readCmdBlk[3] = 0; /*lba*/
     readCmdBlk[4] = 0; /*lba*/
     readCmdBlk[5] = 0; /*lba*/
     readCmdBlk[7] = (blkcount & 0xff00) >> 8; /*len*/
     readCmdBlk[8] = blkcount & 0xff; /*len*/

     for (i = 0; i < LOOPCOUNT; i++) {
         ret = do_scsi_io(handle, readCmdBlk, 10, iovectable,
                 SG_DXFER_FROM_DEV, IOLEN,SGLCOUNT);
         if (ret == -1)
             break;
         if ((i & 0xFF) == 0)
             printf(" IOS: %d \n", i);
     }
     printf("ios %d \n", i);
     for (i = 0; i < SGLCOUNT; i++)
            free(dataptr[i]);

     return 0;
}

-- 
Mark Rustad, MRustad@mac.com


^ permalink raw reply	[flat|nested] 33+ messages in thread
* RE: sg regression in 2.6.16-rc5
@ 2006-03-02 23:04 Falkinder, David Malcolm
  0 siblings, 0 replies; 33+ messages in thread
From: Falkinder, David Malcolm @ 2006-03-02 23:04 UTC (permalink / raw)
  To: Linus Torvalds, Douglas Gilbert
  Cc: Kai Makisara, Matthias Andree, Mark Rustad, linux-scsi,
	Linux Kernel Mailing List, Dewar, Charles David

Linus,

I contacted Doug off-list, and he asked me to express my concerns here.

Whilst a Linux advocate, I work cross platform, and have but a shallow
knowledge of the kernel, so apologies in advance for any technical
inaccuracies, or misunderstandings ...

Essentially what I conveyed to Doug was :

I guess, I'm not fully aware of the implications of what is being
discussed as there appears to essentially be two implementations of the
SG_IO IOCtl - namely the one in the sg driver, and the one in the block
layer.

One of the key drivers for us using Linux is the ability to do a 16Mb
contiguous single transfer.
i.e. WRITE(6) with 0xFF 0xFF 0xFF as the transfer length. Often we use
patterns like (2^n)-1, 2^n, (2^n)+1, to thoroughly test the SCSI bus, so
ALL transfer sizes are needed.

Certainly a 1Mb limit would be useless, as would 4Mb.

To achieve our goal of 16Mb all we've had to do to date is recompile the
kernel having set SG_SCATTER_SZ to (64 * 4096).

Whilst it would be great to just use a vanilla kernel, this is a
relatively trivial patch to meet our needs. I'd hate to think at any
point anything would be done to move away from this. Certainly we'd have
to either find another proprietary solution, or freeze our Linux
implementation indefinitely. Neither a particularly attractive solution.

-------

I (obviously) support your wish to fix broken code. In my technical
naivety in this area, I obviously can't comment on the ramifications of
a fix/non fix situation other than pertaining directly to the large
transfer situation. However it's obvious we ( and I'm sure others ) are
at the moment exploiting this "defect". I guess I feel to be hearing a
lot of discussion regarding the fix, so it's obviously contentious, and
it's agreed it will effectively reduce large transfer functionality of
the kernel; what I am not hearing is a timeline for restoring that
functionality. Personally I'd be happy to "miss out" on a couple of
kernel releases, if I was confident functionality would be restored.
What does worry me is the potential for this fix to be applied, and the
functionality I need not be restored. For example the SG_IO IoCtl in the
block layer was obviously a laudable project, yet to date does not
provide all the features offered by the SG driver [ that I need at least
].

Can I request therefore, that unless the fix can be extended to retain
the large transfer functionality, or a suitable timeline for it's
restoration be resolved; that the patch not be applied.

Many thanks,

	Best Wishes,

	|\
	|/ave




-----Original Message-----
From: linux-scsi-owner@vger.kernel.org
[mailto:linux-scsi-owner@vger.kernel.org] On Behalf Of Linus Torvalds
Sent: 02 March 2006 21:25
To: Douglas Gilbert
Cc: Kai Makisara; Matthias Andree; Mark Rustad;
linux-scsi@vger.kernel.org; Linux Kernel Mailing List
Subject: Re: sg regression in 2.6.16-rc5



On Thu, 2 Mar 2006, Douglas Gilbert wrote:
> 
> As more information has come to light, the worst case "big transfer" 
> of a single SCSI command through sg (and st I suspect) is 512 KB **. 
> With full coalescing that figure goes up to 4 MB **. I am also aware 
> that some users increase SG_SCATTER_SZ in the sg driver to get larger 
> "big transfer"s than sg's current limit of (8MB - 32KB) **.
> That facility has now gone (i.e. upping SG_SCATTER_SZ will have no 
> effect) with no replacement mechanism.
> 
> So I'll add my vote to "revert this change before lk 2.6.16"
> with a view to applying it after some solution to the "big transfer" 
> problem is found.

Considering that the old code was apparently known-broken due to not
honoring the use_clustering flag, I would say that the more likely thing
is that very few people use sg in the first place, and we should wait
and see what the reaction is to actually fixing a real bug.

Doing more than page-sized transfers can be hard/impossible in
virtualized environments, for example.

In contrast, upping the limits should be fairly easy, I assume. Same
goes for if some driver disables clustering even though it shouldn't.
No?

		Linus
-
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org More majordomo info
at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 33+ messages in thread
* RE: sg regression in 2.6.16-rc5
@ 2006-03-03 20:24 Falkinder, David Malcolm
  2006-03-03 21:54 ` Linus Torvalds
  0 siblings, 1 reply; 33+ messages in thread
From: Falkinder, David Malcolm @ 2006-03-03 20:24 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: linux-scsi, dougg

Linus,

Got completely ignored last time, so time for some Linus stylee
directness ;-)
 
This thread has moved onto a discuss of transfer limits based entirely
upon "max_sectors"! It's cobblers. I'd love to hear how this applied to
a SCSI tape drive, or a SCSI scanner etc - it's a disk centric concept.

With no disrespect intended, disks are pretty dumb - use largely fixed
transfer sizes, and but a small subset of the SCSI command set. You're
not considering the "bigger picture".

To succinctly re-iterate - you're breaking functionality (albeit an
exploited defect), without providing a timeline / plan for it's
restoration. Again I don't disagree with the plan to make this change in
the medium term, once the lower levels provide large transfer
capability.

Surely the length of this thread is indicative of a need to go for a
third option to the current in or out plans.


	Regards,

	|\
	|/ave
 
-----Original Message-----
From: linux-scsi-owner@vger.kernel.org
[mailto:linux-scsi-owner@vger.kernel.org] On Behalf Of Linus Torvalds
Sent: 03 March 2006 20:10
To: Jeff Garzik
Cc: Steve Byan; Mark Lord; Matthias Andree; Douglas Gilbert; Mark
Rustad; linux-scsi@vger.kernel.org; Linux Kernel Mailing List
Subject: Re: sg regression in 2.6.16-rc5



On Fri, 3 Mar 2006, Jeff Garzik wrote:
> 
> 256 max sectors IDE driver, 200 max sectors libata (due to driver not 
> hardware).

When I said "lower due to broken hw" I was more thinking about things
like the SiIimage driver, which actually limits the rqsize to 15 sectors
due to some strange hw interactions with seagate SATA devices.

(It will then raise it back up to 128 if it's not a Seagate SATA drive.
I forget what the exact issue was. Some strange corruption in some
limited case, and not allowing big requests worked around it. There's
some strange IDE quirks out there...).

			Linus
-
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org More majordomo info
at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 33+ messages in thread

end of thread, other threads:[~2006-03-03 21:54 UTC | newest]

Thread overview: 33+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-02-28 17:54 sg regression in 2.6.16-rc5 Mark Rustad
2006-02-28 19:53 ` Douglas Gilbert
2006-02-28 20:38   ` Kai Makisara
2006-03-01  2:04     ` Douglas Gilbert
2006-03-01  2:08     ` Mark Rustad
2006-03-01  8:38   ` Matthias Andree
2006-03-01 18:28     ` Linus Torvalds
2006-03-01 18:32       ` Mark Lord
2006-03-01 18:42         ` Linus Torvalds
2006-03-01 18:50           ` Matthew Wilcox
2006-03-03 18:27           ` Steve Byan
2006-03-03 18:55             ` Linus Torvalds
2006-03-03 19:13               ` Steve Byan
2006-03-03 19:42               ` Jeff Garzik
2006-03-03 20:09                 ` Linus Torvalds
2006-03-03 20:30                   ` Jeff Garzik
2006-03-01 19:33       ` Douglas Gilbert
2006-03-01 20:42         ` Mike Christie
2006-03-01 22:30           ` James Bottomley
2006-03-01 22:56             ` Mike Christie
2006-03-01 21:06         ` Kai Makisara
2006-03-02 19:50           ` Douglas Gilbert
2006-03-02 21:25             ` Linus Torvalds
2006-03-02 23:08               ` Matthias Andree
2006-03-02 23:20                 ` Linus Torvalds
2006-03-03  0:02                   ` Linus Torvalds
2006-03-03  0:12                     ` Linus Torvalds
2006-03-03 16:43                   ` Douglas Gilbert
2006-03-03 17:17                     ` Linus Torvalds
2006-03-02 23:06             ` Matthias Andree
  -- strict thread matches above, loose matches on Subject: below --
2006-03-02 23:04 Falkinder, David Malcolm
2006-03-03 20:24 Falkinder, David Malcolm
2006-03-03 21:54 ` Linus Torvalds

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).