How to make kernel block layer generate bigger request in the request queue?

public inbox for linux-mmc@vger.kernel.org
 help / color / mirror / Atom feed

* How to make kernel block layer generate bigger request in the request queue?
@ 2010-04-09 14:07 Gao, Yunpeng
  2010-04-09 23:54 ` Robert Hancock
  0 siblings, 1 reply; 9+ messages in thread
From: Gao, Yunpeng @ 2010-04-09 14:07 UTC (permalink / raw)
  To: linux-ide@vger.kernel.org, linux-mmc@vger.kernel.org

Hi,

I'm working on a block device driver (NAND flash driver with FTL layer) on 2.6.31 Kernel. And try to improve sequential read/write performance of the block driver.

When I debug the driver, I found that the sector numbers of every r/w request in the request queue is always not bigger than 8. That means, for every r/w request, it only handle 512 * 8 = 4KB bytes at most. And I think the sequential r/w speed can be improved if the Linux block layer generates bigger size data (for example, 64KB) for every request in the request queue.

To implement this, I have added some code as below (My hardware doesn't support scatter/gather, but can do 512KB DMA data transfer):
	...
	blk_queue_max_sectors(dev->queue, 1024);
	blk_queue_max_phys_segments(dev->queue, 128);
	blk_queue_max_hw_segments(dev->queue, 1);
	blk_queue_max_segment_size(dev->queue, 524288);
	...
And also set NOOP as the default IO Scheduler (because the underlying 'block' device is NAND flash, not a real hard disk).

But seems it doesn't work. The block layer still generate at most 8 sector r/w request in request queue even if I read/write 1GB data from/to the device with dd command.

Did I miss something to make the block layer generate bigger size data for every request in the request queue? 
Below is part of my source code. Any comments are highly appreciated. Thank you in advance.

-----------------------------------------------------------------------------------------------------
...

static int GLOB_SBD_majornum;
static struct spectra_nand_dev nand_device[NUM_DEVICES];
static struct mutex spectra_lock;
struct spectra_indentfy_dev_tag IdentifyDeviceData;

...

static void SBD_prepare_flush(struct request_queue *q, struct request *rq)
{
	rq->cmd_type = REQ_TYPE_LINUX_BLOCK;
	rq->cmd[0] = REQ_LB_OP_FLUSH;
}

/* Transfer a full request. */
static int do_transfer(struct spectra_nand_dev *tr, struct request *req)
{
	u64 start_addr, addr;
	u32 logical_start_sect, hd_start_sect;
	u32 nsect, hd_sects;
	u32 rsect, tsect = 0;
	char *buf;
	u32 ratio = IdentifyDeviceData.PageDataSize >> 9;

	start_addr = (u64)(blk_rq_pos(req)) << 9;

	if (req->cmd_type == REQ_TYPE_LINUX_BLOCK &&
			req->cmd[0] == REQ_LB_OP_FLUSH) {
		if (force_flush_cache()) /* Fail to flush cache */
			return -EIO;
		else
			return 0;
	}

	if (!blk_fs_request(req))
		return -EIO;

	if (blk_rq_pos(req) + blk_rq_cur_sectors(req) > get_capacity(tr->gd)) {
		printk(KERN_ERR "Error: request over the device "
			"capacity!sector %d, current_nr_sectors %d, "
			"while capacity is %d\n",
			(int)blk_rq_pos(req),
			blk_rq_cur_sectors(req),
			(int)get_capacity(tr->gd));
		return -EIO;
	}

	logical_start_sect = start_addr >> 9;
	hd_start_sect = logical_start_sect / ratio;
	rsect = logical_start_sect - hd_start_sect * ratio;

	addr = (u64)hd_start_sect * ratio * 512;
	buf = req->buffer;
	nsect = blk_rq_cur_sectors(req);

	switch (rq_data_dir(req)) {
	case READ:
		/* Call the low level function to read data from NAND device */
		...
		return 0;
	case WRITE:
		/* Call the low level function to write data to NAND device */
		...
		return 0;
	default:
		printk(KERN_NOTICE "Unknown request %u\n", rq_data_dir(req));
		return -EIO;
	}
}

/* This function is copied from drivers/mtd/mtd_blkdevs.c */
static int spectra_trans_thread(void *arg)
{
	struct spectra_nand_dev *tr = arg;
	struct request_queue *rq = tr->queue;
	struct request *req = NULL;

	/* we might get involved when memory gets low, so use PF_MEMALLOC */
	current->flags |= PF_MEMALLOC;

	spin_lock_irq(rq->queue_lock);
	while (!kthread_should_stop()) {
		int res;

		if (!req) {
			req = blk_fetch_request(rq);
			if (!req) {
				set_current_state(TASK_INTERRUPTIBLE);
				spin_unlock_irq(rq->queue_lock);
				schedule();
				spin_lock_irq(rq->queue_lock);
				continue;
			}
		}

		spin_unlock_irq(rq->queue_lock);

		mutex_lock(&spectra_lock);
		res = do_transfer(tr, req);
		mutex_unlock(&spectra_lock);

		spin_lock_irq(rq->queue_lock);

		if (!__blk_end_request_cur(req, res))
			req = NULL;
	}

	if (req)
		__blk_end_request_all(req, -EIO);

	spin_unlock_irq(rq->queue_lock);

	return 0;
}

static void GLOB_SBD_request(struct request_queue *rq)
{
	struct spectra_nand_dev *pdev = rq->queuedata;
	wake_up_process(pdev->thread);
}

static struct block_device_operations GLOB_SBD_ops = {
	.owner = THIS_MODULE,
	.open = GLOB_SBD_open,
	.release = GLOB_SBD_release,
	.locked_ioctl = GLOB_SBD_ioctl,
	.getgeo = GLOB_SBD_getgeo,
};

static int SBD_setup_device(struct spectra_nand_dev *dev, int which)
{
	u32 sects;

	memset(dev, 0, sizeof(struct spectra_nand_dev));
	dev->size = (u64)IdentifyDeviceData.PageDataSize * IdentifyDeviceData.PagesPerBlock * IdentifyDeviceData.wDataBlockNum;
	spin_lock_init(&dev->qlock);

	dev->tmp_buf = kmalloc(IdentifyDeviceData.PageDataSize, GFP_ATOMIC);
	if (!dev->tmp_buf) {
		printk(KERN_ERR "Failed to kmalloc memory in %s Line %d, exit.\n",
			__FILE__, __LINE__);
		goto out_vfree;
	}

	dev->queue = blk_init_queue(GLOB_SBD_request, &dev->qlock);
	if (dev->queue == NULL) {
		printk(KERN_ERR
		       "Spectra: Request queue could not be initialized. Aborting\n ");
		goto out_vfree;
	}
	dev->queue->queuedata = dev;

	/* As Linux block layer doens't support >4KB hardware sector,  */
	/* Here we force report 512 byte hardware sector size to Kernel */
	blk_queue_logical_block_size(dev->queue, 512);

	blk_queue_ordered(dev->queue, QUEUE_ORDERED_DRAIN_FLUSH, SBD_prepare_flush);

	/* Set paratmeters to optimize sequential r/w performance */
	blk_queue_max_sectors(dev->queue, 1024);
	blk_queue_max_phys_segments(dev->queue, 128);
	blk_queue_max_hw_segments(dev->queue, 1);
	blk_queue_max_segment_size(dev->queue, 524288);

	dev->thread = kthread_run(spectra_trans_thread, dev, "nand_thd");
	if (IS_ERR(dev->thread)) {
		blk_cleanup_queue(dev->queue);
		unregister_blkdev(GLOB_SBD_majornum, GLOB_SBD_NAME);
		return PTR_ERR(dev->thread);
	}

	dev->gd = alloc_disk(PARTITIONS);
	if (!dev->gd) {
		printk(KERN_ERR
		       "Spectra: Could not allocate disk. Aborting \n ");
		goto out_vfree;
	}
	dev->gd->major = GLOB_SBD_majornum;
	dev->gd->first_minor = which * PARTITIONS;
	dev->gd->fops = &GLOB_SBD_ops;
	dev->gd->queue = dev->queue;
	dev->gd->private_data = dev;
	snprintf(dev->gd->disk_name, 32, "%s%c", GLOB_SBD_NAME, which + 'a');

	sects = dev->size >> 9;
	nand_dbg_print(NAND_DBG_WARN, "Capacity sects: %d\n", sects);
	set_capacity(dev->gd, sects);

	add_disk(dev->gd);

	return 0;
out_vfree:
	return -ENOMEM;
}

static int GLOB_SBD_init(void)
{
	int i, ret;

	mutex_init(&spectra_lock);

	GLOB_SBD_majornum = register_blkdev(0, GLOB_SBD_NAME);
	if (GLOB_SBD_majornum <= 0) {
		printk(KERN_ERR "Unable to get the major %d for Spectra",
		       GLOB_SBD_majornum);
		return -EBUSY;
	}

	if (PASS != GLOB_FTL_Flash_Init()) {
		printk(KERN_ERR "Spectra: Unable to Initialize Flash Device. "
		       "Aborting\n");
		goto out_flash_register;
	}

	if (PASS != GLOB_FTL_IdentifyDevice(&IdentifyDeviceData)) {
		printk(KERN_ERR "Spectra: Unable to Read Flash Device. "
		       "Aborting\n");
		goto out_flash_register;
	}

	if (GLOB_FTL_Init() != PASS) {
		printk(KERN_ERR "Spectra: Unable to Initialize FTL Layer. "
		       "Aborting\n");
		goto out_ftl_flash_register;
	}

	for (i = 0; i < NUM_DEVICES; i++)
		if (SBD_setup_device(&nand_device[i], i) == -ENOMEM)
			goto out_ftl_flash_register;

	return 0;

out_ftl_flash_register:
	GLOB_FTL_Cache_Release();
out_flash_register:
	GLOB_FTL_Flash_Release();
	unregister_blkdev(GLOB_SBD_majornum, GLOB_SBD_NAME);

	return -ENOMEM;
}

static void __exit GLOB_SBD_exit(void)
{
	...
}

module_init(GLOB_SBD_init);
module_exit(GLOB_SBD_exit);

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: How to make kernel block layer generate bigger request in the request queue?
  2010-04-09 14:07 How to make kernel block layer generate bigger request in the request queue? Gao, Yunpeng
@ 2010-04-09 23:54 ` Robert Hancock
  2010-04-10  2:05   ` Martin K. Petersen
  0 siblings, 1 reply; 9+ messages in thread
From: Robert Hancock @ 2010-04-09 23:54 UTC (permalink / raw)
  To: Gao, Yunpeng; +Cc: linux-ide@vger.kernel.org, linux-mmc@vger.kernel.org

On 04/09/2010 08:07 AM, Gao, Yunpeng wrote:
> Hi,
>
> I'm working on a block device driver (NAND flash driver with FTL layer) on 2.6.31 Kernel. And try to improve sequential read/write performance of the block driver.
>
> When I debug the driver, I found that the sector numbers of every r/w request in the request queue is always not bigger than 8. That means, for every r/w request, it only handle 512 * 8 = 4KB bytes at most. And I think the sequential r/w speed can be improved if the Linux block layer generates bigger size data (for example, 64KB) for every request in the request queue.
>
> To implement this, I have added some code as below (My hardware doesn't support scatter/gather, but can do 512KB DMA data transfer):
> 	...
> 	blk_queue_max_sectors(dev->queue, 1024);
> 	blk_queue_max_phys_segments(dev->queue, 128);
> 	blk_queue_max_hw_segments(dev->queue, 1);
> 	blk_queue_max_segment_size(dev->queue, 524288);
> 	...
> And also set NOOP as the default IO Scheduler (because the underlying 'block' device is NAND flash, not a real hard disk).
>
> But seems it doesn't work. The block layer still generate at most 8 sector r/w request in request queue even if I read/write 1GB data from/to the device with dd command.
>
> Did I miss something to make the block layer generate bigger size data for every request in the request queue?
> Below is part of my source code. Any comments are highly appreciated. Thank you in advance.

8 sectors is 4KB, that's the size of a page. If the pages that are being 
written are not physically contiguous that may be the best the block 
layer can do with the constraints you've given it (not supporting any 
more than 1 DMA segment). I don't think it will try to copy pages around 
in order to try and generate a bigger request.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: How to make kernel block layer generate bigger request in the request queue?
  2010-04-09 23:54 ` Robert Hancock
@ 2010-04-10  2:05   ` Martin K. Petersen
  2010-04-10 14:58     ` James Bottomley
  0 siblings, 1 reply; 9+ messages in thread
From: Martin K. Petersen @ 2010-04-10  2:05 UTC (permalink / raw)
  To: Robert Hancock
  Cc: Gao, Yunpeng, linux-ide@vger.kernel.org,
	linux-mmc@vger.kernel.org

>>>>> "Robert" == Robert Hancock <hancockrwd@gmail.com> writes:

>> Did I miss something to make the block layer generate bigger size
>> data for every request in the request queue?  Below is part of my
>> source code. Any comments are highly appreciated. Thank you in
>> advance.

Robert> 8 sectors is 4KB, that's the size of a page. If the pages that
Robert> are being written are not physically contiguous that may be the
Robert> best the block layer can do with the constraints you've given it
Robert> (not supporting any more than 1 DMA segment). 

Correct.  It's quite unlikely for pages to be contiguous so this is the
best we can do.

Having recently done a cleanup of our segment handling I became aware
that there are many MMC devices that are single-element only.  This was
probably a sufficient approach for 32MB devices but it's time to get
with the program and implement proper scatter-gather.

-- 
Martin K. Petersen	Oracle Linux Engineering

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: How to make kernel block layer generate bigger request in the request queue?
  2010-04-10  2:05   ` Martin K. Petersen
@ 2010-04-10 14:58     ` James Bottomley
  2010-04-12 18:26       ` Martin K. Petersen
  0 siblings, 1 reply; 9+ messages in thread
From: James Bottomley @ 2010-04-10 14:58 UTC (permalink / raw)
  To: Martin K. Petersen
  Cc: Robert Hancock, Gao, Yunpeng, linux-ide@vger.kernel.org,
	linux-mmc@vger.kernel.org

On Fri, 2010-04-09 at 22:05 -0400, Martin K. Petersen wrote:
> >>>>> "Robert" == Robert Hancock <hancockrwd@gmail.com> writes:
> 
> >> Did I miss something to make the block layer generate bigger size
> >> data for every request in the request queue?  Below is part of my
> >> source code. Any comments are highly appreciated. Thank you in
> >> advance.
> 
> Robert> 8 sectors is 4KB, that's the size of a page. If the pages that
> Robert> are being written are not physically contiguous that may be the
> Robert> best the block layer can do with the constraints you've given it
> Robert> (not supporting any more than 1 DMA segment). 
> 
> Correct.  It's quite unlikely for pages to be contiguous so this is the
> best we can do.

Actually, average servers do about 50% contiguous on average since we
changed the mm layer to allocate in ascending physical page order ...
this figure is highly sensitive to mm changes though, and can vary from
release to release.

Just in case anyone is thinking of it, there's no way we'd alter the
block layer to copy into physically contiguous since the overhead would
be pretty horrific.  However, if the platform has an iommu, you can use
it to map physically discontinuous to bus contiguous and fit everything
into a single sg element.

> Having recently done a cleanup of our segment handling I became aware
> that there are many MMC devices that are single-element only.  This was
> probably a sufficient approach for 32MB devices but it's time to get
> with the program and implement proper scatter-gather.

James



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: How to make kernel block layer generate bigger request in the request queue?
  2010-04-10 14:58     ` James Bottomley
@ 2010-04-12 18:26       ` Martin K. Petersen
  2010-04-12 19:58         ` James Bottomley
  0 siblings, 1 reply; 9+ messages in thread
From: Martin K. Petersen @ 2010-04-12 18:26 UTC (permalink / raw)
  To: James Bottomley
  Cc: Martin K. Petersen, Robert Hancock, Gao, Yunpeng,
	linux-ide@vger.kernel.org, linux-mmc@vger.kernel.org

>>>>> "James" == James Bottomley <James.Bottomley@suse.de> writes:

>> Correct.  It's quite unlikely for pages to be contiguous so this is
>> the best we can do.

James> Actually, average servers do about 50% contiguous on average
James> since we changed the mm layer to allocate in ascending physical
James> page order ...  this figure is highly sensitive to mm changes
James> though, and can vary from release to release.

Interesting.  When did this happen?

Last time I gathered data on segment merge efficiency (1 year+ ago) I
found that adjacent pages were quite rare for a normal fs type workload.
Certainly not in the 50% ballpark.  I'll take another look when I have a
moment...

-- 
Martin K. Petersen	Oracle Linux Engineering

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: How to make kernel block layer generate bigger request in the request queue?
  2010-04-12 18:26       ` Martin K. Petersen
@ 2010-04-12 19:58         ` James Bottomley
  2010-04-13 15:06           ` Gao, Yunpeng
  0 siblings, 1 reply; 9+ messages in thread
From: James Bottomley @ 2010-04-12 19:58 UTC (permalink / raw)
  To: Martin K. Petersen
  Cc: Robert Hancock, Gao, Yunpeng, linux-ide@vger.kernel.org,
	linux-mmc@vger.kernel.org

On Mon, 2010-04-12 at 14:26 -0400, Martin K. Petersen wrote:
> >>>>> "James" == James Bottomley <James.Bottomley@suse.de> writes:
> 
> >> Correct.  It's quite unlikely for pages to be contiguous so this is
> >> the best we can do.
> 
> James> Actually, average servers do about 50% contiguous on average
> James> since we changed the mm layer to allocate in ascending physical
> James> page order ...  this figure is highly sensitive to mm changes
> James> though, and can vary from release to release.
> 
> Interesting.  When did this happen?

The initial work was done by Bill Irwin, years ago.  For a while it was
good, but then after Mel Gorman did the page reclaim code, we became
highly sensitive to the reclaim algorithms for this, so it's fluctuated
a bit ever since.  Even with all this, the efficiency is highly
dependent on the amount of free memory: once the machine starts running
to exhaustion (excluding page cache, since that usually allocates
correctly to begin with) the contiguity really drops.

> Last time I gathered data on segment merge efficiency (1 year+ ago) I
> found that adjacent pages were quite rare for a normal fs type workload.
> Certainly not in the 50% ballpark.  I'll take another look when I have a
> moment...

I got 60% with an I/O bound test with about a gigabyte of free memory a
while ago (2.6.31, I think).  Even for machines approaching memory
starvation, 30% seems achievable.

James

^ permalink raw reply	[flat|nested] 9+ messages in thread

* RE: How to make kernel block layer generate bigger request in the request queue?
  2010-04-12 19:58         ` James Bottomley
@ 2010-04-13 15:06           ` Gao, Yunpeng
  2010-04-13 15:20             ` Alan Cox
  0 siblings, 1 reply; 9+ messages in thread
From: Gao, Yunpeng @ 2010-04-13 15:06 UTC (permalink / raw)
  To: James Bottomley, Martin K. Petersen
  Cc: Robert Hancock, linux-ide@vger.kernel.org,
	linux-mmc@vger.kernel.org

Thanks a lot for all the responses to my question.
To be honest, I can not fully understand what you are discussing but just try to understand it better -
Below is part of the log when I debugging my driver code. It outputs the start sector and the sector number of every read/write request in the request queue of block layer.
It seems that many of the requested sectors are contiguous. And I just curious why the block layer does not merge these contiguous sectors into one single request? For example, if the block layer generate 'start_sect: 48776, nsect: 64, rw: r' instead of below requests, I think the performance will be better.
...
start_sect: 48776, nsect: 8, rw: r
start_sect: 48784, nsect: 8, rw: r
start_sect: 48792, nsect: 8, rw: r
start_sect: 48800, nsect: 8, rw: r
start_sect: 48808, nsect: 8, rw: r
start_sect: 48816, nsect: 8, rw: r
start_sect: 48824, nsect: 8, rw: r
start_sect: 48832, nsect: 8, rw: r
...

Thanks.

Regards,
Yunpeng

>-----Original Message-----
>From: James Bottomley [mailto:James.Bottomley@suse.de]
>Sent: 2010年4月13日 3:58
>To: Martin K. Petersen
>Cc: Robert Hancock; Gao, Yunpeng; linux-ide@vger.kernel.org;
>linux-mmc@vger.kernel.org
>Subject: Re: How to make kernel block layer generate bigger request in the
>request queue?
>
>On Mon, 2010-04-12 at 14:26 -0400, Martin K. Petersen wrote:
>> >>>>> "James" == James Bottomley <James.Bottomley@suse.de> writes:
>>
>> >> Correct.  It's quite unlikely for pages to be contiguous so this is
>> >> the best we can do.
>>
>> James> Actually, average servers do about 50% contiguous on average
>> James> since we changed the mm layer to allocate in ascending physical
>> James> page order ...  this figure is highly sensitive to mm changes
>> James> though, and can vary from release to release.
>>
>> Interesting.  When did this happen?
>
>The initial work was done by Bill Irwin, years ago.  For a while it was
>good, but then after Mel Gorman did the page reclaim code, we became
>highly sensitive to the reclaim algorithms for this, so it's fluctuated
>a bit ever since.  Even with all this, the efficiency is highly
>dependent on the amount of free memory: once the machine starts running
>to exhaustion (excluding page cache, since that usually allocates
>correctly to begin with) the contiguity really drops.
>
>> Last time I gathered data on segment merge efficiency (1 year+ ago) I
>> found that adjacent pages were quite rare for a normal fs type workload.
>> Certainly not in the 50% ballpark.  I'll take another look when I have a
>> moment...
>
>I got 60% with an I/O bound test with about a gigabyte of free memory a
>while ago (2.6.31, I think).  Even for machines approaching memory
>starvation, 30% seems achievable.
>
>James
>


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: How to make kernel block layer generate bigger request in the request queue?
  2010-04-13 15:06           ` Gao, Yunpeng
@ 2010-04-13 15:20             ` Alan Cox
  2010-04-19  6:42               ` Gao, Yunpeng
  0 siblings, 1 reply; 9+ messages in thread
From: Alan Cox @ 2010-04-13 15:20 UTC (permalink / raw)
  To: Gao, Yunpeng
  Cc: James Bottomley, Martin K. Petersen, Robert Hancock,
	linux-ide@vger.kernel.org, linux-mmc@vger.kernel.org

> And I just curious why the block layer does not merge these contiguous sectors into one single request? For example, if > the block layer generate 'start_sect: 48776, nsect: 64, rw: r' instead of below requests, I think the performance will 
> be better.

You said earlier "My hardware doesn't support scatter/gather"

> start_sect: 48776, nsect: 8, rw: r
> start_sect: 48784, nsect: 8, rw: r
> start_sect: 48792, nsect: 8, rw: r
> start_sect: 48800, nsect: 8, rw: r
> start_sect: 48808, nsect: 8, rw: r
> start_sect: 48816, nsect: 8, rw: r
> start_sect: 48824, nsect: 8, rw: r
> start_sect: 48832, nsect: 8, rw: r

Print the bus address of each request and you will probably find they are
not contiguous so they have not been merged because your hardware could
not do that transfer and you have no IOMMU.

If the overhead per command is really really huge you can preallocate an
internal buffer of say 32K or 64K in your driver and tell the block layer
you do scatter gather, then copy the buffers into a linear chunk. I'd be
very surprised if that was a win overall on any vaguely sane hardware but
flash with erase block overhead and the like might be one of the less
sane cases.

Alan

^ permalink raw reply	[flat|nested] 9+ messages in thread

* RE: How to make kernel block layer generate bigger request in the request queue?
  2010-04-13 15:20             ` Alan Cox
@ 2010-04-19  6:42               ` Gao, Yunpeng
  0 siblings, 0 replies; 9+ messages in thread
From: Gao, Yunpeng @ 2010-04-19  6:42 UTC (permalink / raw)
  To: Alan Cox
  Cc: James Bottomley, Martin K. Petersen, Robert Hancock,
	linux-ide@vger.kernel.org, linux-mmc@vger.kernel.org

Thanks a lot to Alan for this suggestion. I think it makes sense to simulate a scatter gather in driver for this case. I'll try it later and expect to see the improved performance.

>-----Original Message-----
>From: Alan Cox [mailto:alan@lxorguk.ukuu.org.uk]
>Sent: 2010年4月13日 23:21
>To: Gao, Yunpeng
>Cc: James Bottomley; Martin K. Petersen; Robert Hancock;
>linux-ide@vger.kernel.org; linux-mmc@vger.kernel.org
>Subject: Re: How to make kernel block layer generate bigger request in the
>request queue?
>
>> And I just curious why the block layer does not merge these contiguous sectors
>into one single request? For example, if > the block layer generate 'start_sect:
>48776, nsect: 64, rw: r' instead of below requests, I think the performance will
>> be better.
>
>You said earlier "My hardware doesn't support scatter/gather"
>
>> start_sect: 48776, nsect: 8, rw: r
>> start_sect: 48784, nsect: 8, rw: r
>> start_sect: 48792, nsect: 8, rw: r
>> start_sect: 48800, nsect: 8, rw: r
>> start_sect: 48808, nsect: 8, rw: r
>> start_sect: 48816, nsect: 8, rw: r
>> start_sect: 48824, nsect: 8, rw: r
>> start_sect: 48832, nsect: 8, rw: r
>
>Print the bus address of each request and you will probably find they are
>not contiguous so they have not been merged because your hardware could
>not do that transfer and you have no IOMMU.
>
>If the overhead per command is really really huge you can preallocate an
>internal buffer of say 32K or 64K in your driver and tell the block layer
>you do scatter gather, then copy the buffers into a linear chunk. I'd be
>very surprised if that was a win overall on any vaguely sane hardware but
>flash with erase block overhead and the like might be one of the less
>sane cases.
>
>Alan

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2010-04-19  6:42 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-04-09 14:07 How to make kernel block layer generate bigger request in the request queue? Gao, Yunpeng
2010-04-09 23:54 ` Robert Hancock
2010-04-10  2:05   ` Martin K. Petersen
2010-04-10 14:58     ` James Bottomley
2010-04-12 18:26       ` Martin K. Petersen
2010-04-12 19:58         ` James Bottomley
2010-04-13 15:06           ` Gao, Yunpeng
2010-04-13 15:20             ` Alan Cox
2010-04-19  6:42               ` Gao, Yunpeng

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox