All of lore.kernel.org
 help / color / mirror / Atom feed
From: Andrew Morton <akpm@digeo.com>
To: Doug Ledford <dledford@redhat.com>
Cc: linux-kernel@vger.kernel.org
Subject: Re: balance_dirty_pages broken
Date: Wed, 16 Oct 2002 22:09:41 -0700	[thread overview]
Message-ID: <3DAE4615.F98B6DE@digeo.com> (raw)
In-Reply-To: 20021017043623.GR8159@redhat.com

Doug Ledford wrote:
> 
> Actually, I don't know if it's balance_dirty_pages fault or some other
> part of the kernel's fault, but it is broken here.  Failure mode is that
> balance_dirty_pages would loop forever.  Reason it would loop forever is
> because the ps struct had a negative entry for nr_dirty.

I've seen one other report of this.  Something has gone wrong
with the page accounting.  Could you please send me your
.config and a description of what filesytems you're using?

err.  ramdisk?   initrd? If so, does this fix?



 drivers/block/rd.c |  173 +++++++++++++++++++++++------------------------------
 fs/buffer.c        |   18 +++--
 kernel/ksyms.c     |    1 
 3 files changed, 90 insertions(+), 102 deletions(-)

--- 2.5.43/drivers/block/rd.c~rd-cleanup	Tue Oct 15 21:10:15 2002
+++ 2.5.43-akpm/drivers/block/rd.c	Tue Oct 15 21:12:13 2002
@@ -45,12 +45,14 @@
 #include <linux/config.h>
 #include <linux/string.h>
 #include <linux/slab.h>
-#include <asm/atomic.h>
+#include <linux/highmem.h>
 #include <linux/bio.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/devfs_fs_kernel.h>
 #include <linux/buffer_head.h>		/* for invalidate_bdev() */
+#include <linux/backing-dev.h>
+
 #include <asm/uaccess.h>
 
 /*
@@ -73,10 +75,7 @@ unsigned long initrd_start, initrd_end;
 int initrd_below_start_ok;
 #endif
 
-/* Various static variables go here.  Most are used only in the RAM disk code.
- */
-
-static unsigned long rd_length[NUM_RAMDISKS];	/* Size of RAM disks in bytes   */
+static unsigned long rd_length[NUM_RAMDISKS];	/* Size of RAM disks in bytes */
 static struct gendisk *rd_disks[NUM_RAMDISKS];
 static devfs_handle_t devfs_handle;
 static struct block_device *rd_bdev[NUM_RAMDISKS];/* Protected device data */
@@ -87,7 +86,7 @@ static struct block_device *rd_bdev[NUM_
  * architecture-specific setup routine (from the stored boot sector
  * information). 
  */
-int rd_size = CONFIG_BLK_DEV_RAM_SIZE;		/* Size of the RAM disks */
+
 /*
  * It would be very desiderable to have a soft-blocksize (that in the case
  * of the ramdisk driver is also the hardblocksize ;) of PAGE_SIZE because
@@ -101,26 +100,17 @@ int rd_size = CONFIG_BLK_DEV_RAM_SIZE;		
  */
 int rd_blocksize = BLOCK_SIZE;			/* blocksize of the RAM disks */
 
+/* Size of the RAM disks */
+int rd_size = (CONFIG_BLK_DEV_RAM_SIZE + (PAGE_CACHE_SIZE >> 10) - 1) &
+			(PAGE_CACHE_MASK >> 10);
+
 /*
  * Copyright (C) 2000 Linus Torvalds.
  *               2000 Transmeta Corp.
  * aops copied from ramfs.
  */
-static int ramdisk_readpage(struct file *file, struct page * page)
-{
-	if (!PageUptodate(page)) {
-		void *kaddr = kmap_atomic(page, KM_USER0);
 
-		memset(kaddr, 0, PAGE_CACHE_SIZE);
-		flush_dcache_page(page);
-		kunmap_atomic(kaddr, KM_USER0);
-		SetPageUptodate(page);
-	}
-	unlock_page(page);
-	return 0;
-}
-
-static int ramdisk_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to)
+static void wipe_page(struct page *page)
 {
 	if (!PageUptodate(page)) {
 		void *kaddr = kmap_atomic(page, KM_USER0);
@@ -130,43 +120,26 @@ static int ramdisk_prepare_write(struct 
 		kunmap_atomic(kaddr, KM_USER0);
 		SetPageUptodate(page);
 	}
-	SetPageDirty(page);
-	return 0;
 }
 
-static int ramdisk_commit_write(struct file *file, struct page *page, unsigned offset, unsigned to)
+static int
+rd_blkdev_pagecache_IO(int rw, struct bio_vec *vec, sector_t sector, int minor)
 {
-	return 0;
-}
-
-static struct address_space_operations ramdisk_aops = {
-	readpage: ramdisk_readpage,
-	writepage: fail_writepage,
-	prepare_write: ramdisk_prepare_write,
-	commit_write: ramdisk_commit_write,
-};
-
-static int rd_blkdev_pagecache_IO(int rw, struct bio_vec *vec,
-				  sector_t sector, int minor)
-{
-	struct address_space * mapping;
+	struct address_space *mapping;
 	unsigned long index;
 	unsigned int vec_offset;
 	int offset, size, err;
 
 	err = 0;
 	mapping = rd_bdev[minor]->bd_inode->i_mapping;
-
 	index = sector >> (PAGE_CACHE_SHIFT - 9);
 	offset = (sector << 9) & ~PAGE_CACHE_MASK;
 	size = vec->bv_len;
 	vec_offset = vec->bv_offset;
 
 	do {
+		struct page *page;
 		int count;
-		struct page * page;
-		char * src, * dst;
-		int unlock = 0;
 
 		count = PAGE_CACHE_SIZE - offset;
 		if (count > size)
@@ -176,53 +149,39 @@ static int rd_blkdev_pagecache_IO(int rw
 		page = find_get_page(mapping, index);
 		if (!page) {
 			page = grab_cache_page(mapping, index);
-			err = -ENOMEM;
-			if (!page)
+			if (!page) {
+				err = -ENOMEM;
 				goto out;
-			err = 0;
-
-			if (!PageUptodate(page)) {
-				void *kaddr = kmap_atomic(page, KM_USER0);
-
-				memset(kaddr, 0, PAGE_CACHE_SIZE);
-				flush_dcache_page(page);
-				kunmap_atomic(kaddr, KM_USER0);
-				SetPageUptodate(page);
 			}
-
-			unlock = 1;
+			wipe_page(page);
+			set_page_dirty(page);
+			unlock_page(page);
 		}
+		if (page != vec->bv_page || vec_offset != offset) {
+			if (rw == READ) {
+				char *src = kmap_atomic(page, KM_USER0);
+				char *dst = kmap_atomic(vec->bv_page, KM_USER1);
+
+				memcpy(dst + vec_offset, src + offset, count);
+				flush_dcache_page(vec->bv_page);
+				kunmap_atomic(src, KM_USER0);
+				kunmap_atomic(dst, KM_USER1);
+			} else {
+				char *src = kmap_atomic(vec->bv_page, KM_USER0);
+				char *dst = kmap_atomic(page, KM_USER1);
 
-		index++;
-
-		if (rw == READ) {
-			src = kmap(page);
-			src += offset;
-			dst = kmap(vec->bv_page) + vec_offset;
-		} else {
-			dst = kmap(page);
-			dst += offset;
-			src = kmap(vec->bv_page) + vec_offset;
+				memcpy(dst + offset, src + vec_offset, count);
+				flush_dcache_page(page);
+				kunmap_atomic(src, KM_USER0);
+				kunmap_atomic(dst, KM_USER1);
+			}
 		}
+		page_cache_release(page);
 		offset = 0;
 		vec_offset += count;
-
-		memcpy(dst, src, count);
-
-		kunmap(page);
-		kunmap(vec->bv_page);
-
-		if (rw == READ) {
-			flush_dcache_page(sbh->b_page);
-		} else {
-			SetPageDirty(page);
-		}
-		if (unlock)
-			unlock_page(page);
-		__free_page(page);
+		index++;
 	} while (size);
-
- out:
+out:
 	return err;
 }
 
@@ -324,7 +283,6 @@ static ssize_t initrd_read(struct file *
 	return count;
 }
 
-
 static int initrd_release(struct inode *inode,struct file *file)
 {
 	extern void free_initrd_mem(unsigned long, unsigned long);
@@ -343,14 +301,19 @@ static int initrd_release(struct inode *
 	return 0;
 }
 
-
 static struct file_operations initrd_fops = {
-	read:		initrd_read,
-	release:	initrd_release,
+	.read		= initrd_read,
+	.release	= initrd_release,
 };
 
 #endif
 
+struct address_space_operations ramdisk_aops;
+
+static struct backing_dev_info rd_backing_dev_info = {
+	.ra_pages	= 0,	/* No readahead */
+	.memory_backed	= 1,	/* Does not contribute to dirty memory */
+};
 
 static int rd_open(struct inode * inode, struct file * filp)
 {
@@ -375,22 +338,25 @@ static int rd_open(struct inode * inode,
 	 * Immunize device against invalidate_buffers() and prune_icache().
 	 */
 	if (rd_bdev[unit] == NULL) {
-		rd_bdev[unit] = bdget(kdev_t_to_nr(inode->i_rdev));
-		rd_bdev[unit]->bd_openers++;
-		rd_bdev[unit]->bd_block_size = rd_blocksize;
-		rd_bdev[unit]->bd_inode->i_mapping->a_ops = &ramdisk_aops;
-		rd_bdev[unit]->bd_inode->i_size = rd_length[unit];
-		rd_bdev[unit]->bd_queue = &blk_dev[MAJOR_NR].request_queue;
-		rd_bdev[unit]->bd_disk = get_disk(rd_disks[unit]);
+		struct block_device *b = bdget(kdev_t_to_nr(inode->i_rdev));
+
+		rd_bdev[unit] = b;
+		b->bd_openers++;
+		b->bd_block_size = rd_blocksize;
+		b->bd_inode->i_mapping->a_ops = &ramdisk_aops;
+		b->bd_inode->i_mapping->backing_dev_info = &rd_backing_dev_info;
+		b->bd_inode->i_size = rd_length[unit];
+		b->bd_queue = &blk_dev[MAJOR_NR].request_queue;
+		b->bd_disk = get_disk(rd_disks[unit]);
 	}
 
 	return 0;
 }
 
 static struct block_device_operations rd_bd_op = {
-	owner:		THIS_MODULE,
-	open:		rd_open,
-	ioctl:		rd_ioctl,
+	.owner	= THIS_MODULE,
+	.open	= rd_open,
+	.ioctl	= rd_ioctl,
 };
 
 /* Before freeing the module, invalidate all of the protected buffers! */
@@ -416,6 +382,19 @@ static void __exit rd_cleanup (void)
 	unregister_blkdev( MAJOR_NR, "ramdisk" );
 }
 
+/*
+ * If someone writes a ramdisk page with submit_bh(), we have a dirty page
+ * with clean buffers.  try_to_free_buffers() will then propagate the buffer
+ * cleanness up into page-cleaness and the VM will evict the page.
+ *
+ * To stop that happening, the ramdisk address_space has a ->releasepage()
+ * which always fails.
+ */
+static int fail_releasepage(struct page *page, int offset)
+{
+	return 0;
+}
+
 /* This is the registration and initialization section of the RAM disk driver */
 static int __init rd_init (void)
 {
@@ -428,6 +407,9 @@ static int __init rd_init (void)
 		       rd_blocksize);
 		rd_blocksize = BLOCK_SIZE;
 	}
+	ramdisk_aops = def_blk_aops;
+	ramdisk_aops.writepage = fail_writepage;
+	ramdisk_aops.releasepage = fail_releasepage;
 
 #ifdef CONFIG_BLK_DEV_INITRD
 	initrd_disk = alloc_disk(1);
@@ -519,9 +501,8 @@ __setup("ramdisk_blocksize=", ramdisk_bl
 #endif
 
 /* options - modular */
-MODULE_PARM     (rd_size, "1i");
+MODULE_PARM(rd_size, "1i");
 MODULE_PARM_DESC(rd_size, "Size of each RAM disk in kbytes.");
 MODULE_PARM     (rd_blocksize, "i");
 MODULE_PARM_DESC(rd_blocksize, "Blocksize of each RAM disk in bytes.");
-
 MODULE_LICENSE("GPL");
--- 2.5.43/fs/buffer.c~rd-cleanup	Tue Oct 15 21:10:15 2002
+++ 2.5.43-akpm/fs/buffer.c	Tue Oct 15 21:10:15 2002
@@ -388,14 +388,21 @@ __find_get_block_slow(struct block_devic
 	head = page_buffers(page);
 	bh = head;
 	do {
-		if (bh->b_blocknr == block) {
+		if (bh->b_blocknr == block && buffer_mapped(bh)) {
 			ret = bh;
 			get_bh(bh);
 			goto out_unlock;
 		}
 		bh = bh->b_this_page;
 	} while (bh != head);
-	buffer_error();
+	/*
+	 * This path can happen if the page had some unmapped buffers, which
+	 * will have b_blocknr == -1.  When a ramdisk mapping's page was brought
+	 * partially uptodate by mkfs and unmap_underlying_metadata searches
+	 * for blocks in part of the page which wasn't touched by mkfs.
+	 *
+	 * buffer_error();
+	 */
 out_unlock:
 	spin_unlock(&bd_mapping->private_lock);
 	page_cache_release(page);
@@ -1463,7 +1470,7 @@ int try_to_release_page(struct page *pag
  * @offset: the index of the truncation point
  *
  * block_invalidatepage() is called when all or part of the page has become
- * invalidatedby a truncate operation.
+ * invalidated by a truncate operation.
  *
  * block_invalidatepage() does not have to release all buffers, but it must
  * ensure that no dirty buffer is left outside @offset and that no I/O
@@ -1617,8 +1624,6 @@ static int __block_write_full_page(struc
 	last_block = (inode->i_size - 1) >> inode->i_blkbits;
 
 	if (!page_has_buffers(page)) {
-		if (S_ISBLK(inode->i_mode))
-			buffer_error();
 		if (!PageUptodate(page))
 			buffer_error();
 		create_empty_buffers(page, 1 << inode->i_blkbits,
@@ -2605,7 +2610,8 @@ void free_buffer_head(struct buffer_head
 }
 EXPORT_SYMBOL(free_buffer_head);
 
-static void init_buffer_head(void *data, kmem_cache_t *cachep, unsigned long flags)
+static void
+init_buffer_head(void *data, kmem_cache_t *cachep, unsigned long flags)
 {
 	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
 			    SLAB_CTOR_CONSTRUCTOR) {
--- 2.5.43/kernel/ksyms.c~rd-cleanup	Tue Oct 15 21:10:15 2002
+++ 2.5.43-akpm/kernel/ksyms.c	Tue Oct 15 21:10:15 2002
@@ -138,6 +138,7 @@ EXPORT_SYMBOL(page_address);
 EXPORT_SYMBOL(get_user_pages);
 
 /* filesystem internal functions */
+EXPORT_SYMBOL_GPL(def_blk_aops);
 EXPORT_SYMBOL(def_blk_fops);
 EXPORT_SYMBOL(update_atime);
 EXPORT_SYMBOL(get_fs_type);

.

  reply	other threads:[~2002-10-17  5:03 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2002-10-17  4:36 balance_dirty_pages broken Doug Ledford
2002-10-17  5:09 ` Andrew Morton [this message]
2002-10-17  5:22   ` Doug Ledford
2002-10-17  5:41     ` Doug Ledford
2002-10-17  5:54       ` Andrew Morton
2002-10-17  6:01         ` Doug Ledford
2002-10-17  6:09           ` Andrew Morton

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=3DAE4615.F98B6DE@digeo.com \
    --to=akpm@digeo.com \
    --cc=dledford@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.