Beta quality write out daemon - Eric W. Biederman

All of lore.kernel.org
 help / color / mirror / Atom feed

From: ebiederm+eric@ccr.net (Eric W. Biederman)
To: linux-mm@kvack.org
Cc: Hans Eric Sandstrom <hes@xinit.se>
Subject: Beta quality write out daemon
Date: 17 Jan 1999 00:12:25 -0600	[thread overview]
Message-ID: <m1k8ym4ed2.fsf_-_@flinx.ccr.net> (raw)
In-Reply-To: ebiederm+eric@ccr.net's message of "15 Jan 1999 03:02:07 -0600"

O.k. For all who want to play with it here is my write out daemon.
It currently appears to be stable but untuned.

In particular I probably need a:
wait_for_any_page(unsigned long timeout);
primitive.

Currently a really fast memory allocator temporarly halts all processes that
request memory because it can't get any memory, and it doesn't really stall waiting
on I/O.  It just spins trying to get memory until it's spinning fails.
The memory allocation failure rate is also a little high.

I also seem to have detected a bug in some other part of the kernel.
Where I can find a dirty pte pointing to a swap cache page.

I'm going to attempt to reproduce this on a clean pre7 with added detection logic,
and I should have a small detection patch, against vanilla pre7 shortly.

We currently don't notice this case because we assume if a page is in the swap cache
it must be clean.

Eric

diff -uNrX linux-ignore-files linux-2.2.0-pre5.eb1.1/include/linux/mm.h linux-2.2.0-pre5.eb1.2/include/linux/mm.h
--- linux-2.2.0-pre5.eb1.1/include/linux/mm.h	Thu Jan  7 00:25:38 1999
+++ linux-2.2.0-pre5.eb1.2/include/linux/mm.h	Wed Jan 13 08:53:22 1999
@@ -100,6 +100,7 @@
 		unsigned long page);
 	int (*swapout)(struct vm_area_struct *,  unsigned long, pte_t *);
 	pte_t (*swapin)(struct vm_area_struct *, unsigned long, unsigned long);
+	int (*writeout)(struct vm_area_struct *, unsigned long offset, unsigned long page);
 };
 
 /*
@@ -392,6 +393,8 @@
 				buffer_mem.min_percent * num_physpages)
 #define pgcache_under_min()	(page_cache_size * 100 < \
 				page_cache.min_percent * num_physpages)
+
+extern void wakeup_pgflush(void);
 
 #endif /* __KERNEL__ */
 
diff -uNrX linux-ignore-files linux-2.2.0-pre5.eb1.1/include/linux/sched.h linux-2.2.0-pre5.eb1.2/include/linux/sched.h
--- linux-2.2.0-pre5.eb1.1/include/linux/sched.h	Wed Jan  6 22:51:44 1999
+++ linux-2.2.0-pre5.eb1.2/include/linux/sched.h	Wed Jan 13 08:53:16 1999
@@ -168,6 +168,9 @@
 	unsigned long rss, total_vm, locked_vm;
 	unsigned long def_flags;
 	unsigned long cpu_vm_mask;
+	unsigned long swap_write_pass;
+	unsigned long swap_address;
+	unsigned long swap_cnt;		/* number of pages to swap on next pass */
 	/*
 	 * This is an architecture-specific pointer: the portable
 	 * part of Linux does not know about any segments.
@@ -184,7 +187,9 @@
 		0, 0, 0, 				\
 		0, 0, 0, 0,				\
 		0, 0, 0,				\
-		0, 0, NULL }
+		0, 0, 					\
+		0, 0, 0,				\
+	        NULL }
 
 struct signal_struct {
 	atomic_t		count;
@@ -269,10 +274,8 @@
 	unsigned long min_flt, maj_flt, nswap, cmin_flt, cmaj_flt, cnswap;
 	int swappable:1;
 	int trashing_memory:1;
-	unsigned long swap_address;
 	unsigned long old_maj_flt;	/* old value of maj_flt */
 	unsigned long dec_flt;		/* page fault count of the last time */
-	unsigned long swap_cnt;		/* number of pages to swap on next pass */
 /* process credentials */
 	uid_t uid,euid,suid,fsuid;
 	gid_t gid,egid,sgid,fsgid;
@@ -354,7 +357,7 @@
 /* utime */	{0,0,0,0},0, \
 /* per CPU times */ {0, }, {0, }, \
 /* flt */	0,0,0,0,0,0, \
-/* swp */	0,0,0,0,0,0, \
+/* swp */	0,0,0,0, \
 /* process credentials */					\
 /* uid etc */	0,0,0,0,0,0,0,0,				\
 /* suppl grps*/ 0, {0,},					\
diff -uNrX linux-ignore-files linux-2.2.0-pre5.eb1.1/init/main.c linux-2.2.0-pre5.eb1.2/init/main.c
--- linux-2.2.0-pre5.eb1.1/init/main.c	Wed Jan  6 22:46:42 1999
+++ linux-2.2.0-pre5.eb1.2/init/main.c	Mon Jan 11 19:13:57 1999
@@ -65,6 +65,8 @@
 extern int bdflush(void *);
 extern int kswapd(void *);
 extern void kswapd_setup(void);
+extern int pgflush(void *);
+extern void pgflush_init(void);
 
 extern void init_IRQ(void);
 extern void init_modules(void);
@@ -1272,6 +1274,10 @@
 	/* Start the background pageout daemon. */
 	kswapd_setup();
 	kernel_thread(kswapd, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
+	/* Start the backgroud writeout daemon. */
+	pgflush_init();
+	kernel_thread(pgflush, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
+ 
 
 #if CONFIG_AP1000
 	/* Start the async paging daemon. */
diff -uNrX linux-ignore-files linux-2.2.0-pre5.eb1.1/mm/Makefile linux-2.2.0-pre5.eb1.2/mm/Makefile
--- linux-2.2.0-pre5.eb1.1/mm/Makefile	Tue May 12 14:17:54 1998
+++ linux-2.2.0-pre5.eb1.2/mm/Makefile	Mon Jan 11 22:09:47 1999
@@ -9,7 +9,7 @@
 
 O_TARGET := mm.o
 O_OBJS	 := memory.o mmap.o filemap.o mprotect.o mlock.o mremap.o \
-	    vmalloc.o slab.o \
+	    vmalloc.o slab.o vmclean.o \
 	    swap.o vmscan.o page_io.o page_alloc.o swap_state.o swapfile.o
 
 include $(TOPDIR)/Rules.make
diff -uNrX linux-ignore-files linux-2.2.0-pre5.eb1.1/mm/filemap.c linux-2.2.0-pre5.eb1.2/mm/filemap.c
--- linux-2.2.0-pre5.eb1.1/mm/filemap.c	Mon Jan 11 23:19:16 1999
+++ linux-2.2.0-pre5.eb1.2/mm/filemap.c	Fri Jan 15 00:41:40 1999
@@ -1115,58 +1115,14 @@
 	 */
 	file->f_count++;
 	down(&inode->i_sem);
+	atomic_inc(&page_map->count);
 	result = do_write_page(inode, file, (const char *) page, offset);
+	__free_page(page_map);
 	up(&inode->i_sem);
 	fput(file);
 	return result;
 }
 
-
-/*
- * Swapping to a shared file: while we're busy writing out the page
- * (and the page still exists in memory), we save the page information
- * in the page table, so that "filemap_swapin()" can re-use the page
- * immediately if it is called while we're busy swapping it out..
- *
- * Once we've written it all out, we mark the page entry "empty", which
- * will result in a normal page-in (instead of a swap-in) from the now
- * up-to-date disk file.
- */
-int filemap_swapout(struct vm_area_struct * vma,
-	unsigned long offset,
-	pte_t *page_table)
-{
-	int error;
-	unsigned long page = pte_page(*page_table);
-	unsigned long entry = SWP_ENTRY(SHM_SWP_TYPE, MAP_NR(page));
-
-	flush_cache_page(vma, (offset + vma->vm_start - vma->vm_offset));
-	set_pte(page_table, __pte(entry));
-	flush_tlb_page(vma, (offset + vma->vm_start - vma->vm_offset));
-	error = filemap_write_page(vma, offset, page);
-	if (pte_val(*page_table) == entry)
-		pte_clear(page_table);
-	return error;
-}
-
-/*
- * filemap_swapin() is called only if we have something in the page
- * tables that is non-zero (but not present), which we know to be the
- * page index of a page that is busy being swapped out (see above).
- * So we just use it directly..
- */
-static pte_t filemap_swapin(struct vm_area_struct * vma,
-	unsigned long offset,
-	unsigned long entry)
-{
-	unsigned long page = SWP_OFFSET(entry);
-
-	atomic_inc(&mem_map[page].count);
-	page = (page << PAGE_SHIFT) + PAGE_OFFSET;
-	return mk_pte(page,vma->vm_page_prot);
-}
-
-
 static inline int filemap_sync_pte(pte_t * ptep, struct vm_area_struct *vma,
 	unsigned long address, unsigned int flags)
 {
@@ -1306,8 +1262,9 @@
 	NULL,			/* advise */
 	filemap_nopage,		/* nopage */
 	NULL,			/* wppage */
-	filemap_swapout,	/* swapout */
-	filemap_swapin,		/* swapin */
+	NULL,			/* swapout */
+	NULL,			/* swapin */
+	filemap_write_page,	/* writeout */
 };
 
 /*
diff -uNrX linux-ignore-files linux-2.2.0-pre5.eb1.1/mm/page_io.c linux-2.2.0-pre5.eb1.2/mm/page_io.c
--- linux-2.2.0-pre5.eb1.1/mm/page_io.c	Wed Jan  6 22:46:43 1999
+++ linux-2.2.0-pre5.eb1.2/mm/page_io.c	Thu Jan 14 11:02:41 1999
@@ -18,6 +18,10 @@
 
 #include <asm/pgtable.h>
 
+#ifdef DEBUG_SWAP
+#include <linux/pagemap.h>
+#endif
+
 static struct wait_queue * lock_queue = NULL;
 
 /*
@@ -197,7 +201,7 @@
 #ifdef DEBUG_SWAP
 	printk ("DebugVM: %s_swap_page finished on page %p (count %d)\n",
 		(rw == READ) ? "read" : "write", 
-		(char *) page_adddress(page), 
+		(char *) page_address(page), 
 		atomic_read(&page->count));
 #endif
 }
diff -uNrX linux-ignore-files linux-2.2.0-pre5.eb1.1/mm/vmclean.c linux-2.2.0-pre5.eb1.2/mm/vmclean.c
--- linux-2.2.0-pre5.eb1.1/mm/vmclean.c	Wed Dec 31 18:00:00 1969
+++ linux-2.2.0-pre5.eb1.2/mm/vmclean.c	Sat Jan 16 23:24:10 1999
@@ -0,0 +1,415 @@
+/*
+ *  linux/mm/vmclean.c
+ *
+ *  Copyright (C) 1999 Eric Biederman
+ *
+ */
+
+#include <linux/slab.h>
+#include <linux/kernel_stat.h>
+#include <linux/swap.h>
+#include <linux/swapctl.h>
+#include <linux/smp_lock.h>
+#include <linux/pagemap.h>
+#include <linux/init.h>
+
+#include <asm/pgtable.h>
+
+/*
+ * The vm-clean  functions return 1 if they successfully
+ * cleaned something. It returns zero if it couldn't do anything, and
+ * any other value indicates it decreased rss, but the page was shared.
+ *
+ * NOTE! If it sleeps, it *must* return 1 to make sure we
+ * don't continue with the clean. Otherwise we may be
+ * using a process that no longer actually exists (it might
+ * have died while we slept).
+ */
+static int try_to_clean(struct vm_area_struct* vma, unsigned long address, pte_t * page_table)
+{
+	pte_t pte;
+	unsigned long entry;
+	unsigned long page;
+	struct page * page_map;
+	int result = 0;
+
+	pte = *page_table;
+	if (!pte_present(pte))
+		return result;
+	page = pte_page(pte);
+	if (MAP_NR(page) >= max_mapnr)
+		return result;
+
+	page_map = mem_map + MAP_NR(page);
+	if ((PageReserved(page_map))
+	    || PageLocked(page_map))
+		return result;
+
+	if (!pte_dirty(pte)) {
+		goto out;
+	}
+	flush_cache_page(vma, address);
+
+	if (vma->vm_ops && vma->vm_ops->writeout) {
+		set_pte(page_table, pte_mkclean(pte));
+		if (vma->vm_ops->writeout(vma, address - vma->vm_start + vma->vm_offset, page)) {
+			struct task_struct *tsk;
+			/* Find some appropriate process to tell,
+			 * anyone with the same mm_struct is fine
+			 */
+			read_lock(&tasklist_lock);
+			for_each_task(tsk) {
+				if (!tsk->swappable)
+					continue;
+				if (tsk->mm == vma->vm_mm) {
+					kill_proc(tsk->pid, SIGBUS, 1);
+					break;
+				}
+			}
+			read_unlock(&tasklist_lock);
+			result = 0;
+		} else {
+			result = 1;
+		}
+		goto out;
+	}
+	if (PageSwapCache(page_map)) {
+		printk(KERN_ERR "vm_clean: dirty page %08lx in swap cache?\n",
+		       page_address(page_map));
+		goto out;
+		       
+	}
+	/*
+	 * This is a dirty, swappable page.  First of all,
+	 * get a suitable swap entry for it, and make sure
+	 * we have the swap cache set up to associate the
+	 * page with that swap entry.
+	 */
+	entry = get_swap_page();
+	if (!entry)
+		goto out; /* No swap space left */
+
+	/* We checked we were unlocked way up above, and we
+	   have been careful not to stall until here */
+	set_bit(PG_locked, &page_map->flags);
+
+	/* get an extra reference in case we rw_swap_page does an unasked for
+	 * synchronous write
+	 */
+	atomic_inc(&page_map->count);
+
+	if (!add_to_swap_cache(page_map, entry)) {
+		printk(KERN_ERR "vm_clean: can't add page %08lx to swap_cache!\n",
+		       page_address(page_map));
+		swap_free(entry);
+		clear_bit(PG_locked, &page_map->flags);
+		wake_up(&page_map->wait);
+		__free_page(page_map);
+		goto out;
+	}
+
+	/* Note:  We make the page read only here to maintain the invariant
+	 * that swap cache pages are always read only.
+	 * Once we have PG_dirty or a similar mechanism implemented we
+	 * can relax this.
+	 *
+	 * We still need to be sure and clear the dirty bit before any I/O
+	 * occurs however.
+	 */
+	set_pte(page_table, pte_wrprotect(pte_mkclean(pte)));
+
+	/* OK, do a physical asynchronous write to swap.  */
+	rw_swap_page(WRITE, entry, (char *) page, 0);
+
+	__free_page(page_map);
+
+	result = 1; /* Could we have slept? Play it safe */
+
+out:
+	return result;
+}
+
+/*
+ * A new implementation of swap_out().  We do not swap complete processes,
+ * but only a small number of blocks, before we continue with the next
+ * process.  The number of blocks actually swapped is determined on the
+ * number of page faults, that this process actually had in the last time,
+ * so we won't swap heavily used processes all the time ...
+ *
+ * Note: the priority argument is a hint on much CPU to waste with the
+ *       swap block search, not a hint, of how much blocks to swap with
+ *       each process.
+ *
+ * (C) 1993 Kai Petzke, wpp@marie.physik.tu-berlin.de
+ */
+
+static inline int clean_pmd(struct vm_area_struct * vma,
+	pmd_t *dir, unsigned long address, unsigned long end,
+	unsigned long *paddress)
+{
+	pte_t * pte;
+	unsigned long pmd_end;
+
+	if (pmd_none(*dir))
+		return 0;
+	if (pmd_bad(*dir)) {
+		printk("clean_pmd: bad pmd (%08lx)\n", pmd_val(*dir));
+		pmd_clear(dir);
+		return 0;
+	}
+	
+	pte = pte_offset(dir, address);
+	
+	pmd_end = (address + PMD_SIZE) & PMD_MASK;
+	if (end > pmd_end)
+		end = pmd_end;
+
+	do {
+		int result;
+		*paddress = address + PAGE_SIZE;
+		result = try_to_clean(vma, address, pte);
+		if (result)
+			return result;
+		address += PAGE_SIZE;
+		pte++;
+	} while (address < end);
+	return 0;
+}
+
+static inline int clean_pgd(struct vm_area_struct * vma,
+	pgd_t *dir, unsigned long address, unsigned long end, unsigned long *paddress)
+{
+	pmd_t * pmd;
+	unsigned long pgd_end;
+
+	if (pgd_none(*dir))
+		return 0;
+	if (pgd_bad(*dir)) {
+		printk("clean_pgd: bad pgd (%08lx)\n", pgd_val(*dir));
+		pgd_clear(dir);
+		return 0;
+	}
+
+	pmd = pmd_offset(dir, address);
+
+	pgd_end = (address + PGDIR_SIZE) & PGDIR_MASK;	
+	if (end > pgd_end)
+		end = pgd_end;
+	
+	do {
+		int result;
+		result = clean_pmd(vma, pmd, address, end, paddress);
+		if (result)
+			return result;
+		address = (address + PMD_SIZE) & PMD_MASK;
+		pmd++;
+	} while (address < end);
+	return 0;
+}
+
+static int clean_vma(struct vm_area_struct *vma, 
+		     unsigned long address, unsigned long *paddress)
+{
+	pgd_t *pgdir;
+	unsigned long end;
+
+	/* Don't write out areas like shared memory which have their
+	 * own separate swapping mechanism
+	 */
+	if (vma->vm_flags & VM_SHM)  {
+		return 0;
+	}
+
+	/* Don't write out locked anonymous memory */
+	if (vma->vm_flags & VM_LOCKED && 
+	    (!vma->vm_ops || !vma->vm_ops->writeout)) {
+		return 0;
+	}
+
+	pgdir = pgd_offset(vma->vm_mm, address);
+
+	end = vma->vm_end;
+	while (address < end) {
+		int result;
+		result = clean_pgd(vma, pgdir, address, end, paddress);
+		if (result)
+			return result;
+		address = (address + PGDIR_SIZE) & PGDIR_MASK;
+		pgdir++;
+	}
+	return 0;
+}
+
+static int clean_mm(struct mm_struct *mm, unsigned long *paddress)
+{
+	struct vm_area_struct *vma;
+	unsigned long address = *paddress;
+
+	vma = find_vma(mm, address);
+	if (vma) {
+		if (address < vma->vm_start) {
+			address = vma->vm_start;
+		}
+		for(;atomic_read(&mm->count) > 1;) {
+			int result = clean_vma(vma, address, paddress);
+			if (result) 
+				return result;
+			vma = vma->vm_next;
+			if (!vma)
+				break;
+			address = vma->vm_start;
+		}
+	}
+	/* We didn't write anything out */
+	return -1;	
+}
+
+/*
+ * Select the task with maximal swap_cnt and try to swap out a page.
+ * N.B. This function returns only 0 or 1.  Return values != 1 from
+ * the lower level routines result in continued processing.
+ */
+static void clean_tsks(void)
+{
+	/* Use write pass so I don't write out the same mm more than once */
+	static unsigned long write_pass = 0;
+
+	for(;;) {	
+		struct mm_struct *mm;
+		int result;
+		unsigned long address;
+		struct task_struct *tsk;
+		mm = NULL;
+
+		read_lock(&tasklist_lock);
+		for_each_task(tsk) {
+			if (!tsk->swappable)  /* the task is being set up */
+				continue;
+			if (tsk->mm->rss == 0) {
+				tsk->mm->swap_write_pass = write_pass; /* bad? */
+				continue;
+			}
+			if (tsk->mm->swap_write_pass == write_pass) 
+				continue;
+			/* don't let the mm struct go away unexpectedly */
+			mm = tsk->mm;
+			mmget(mm);
+			break;
+		}
+		read_unlock(&tasklist_lock);
+		if (!mm) 
+			break;
+		lock_kernel();
+		address = 0;
+#if 0
+		printk(KERN_DEBUG "clean_mm(%ld) starting for %p\n", write_pass, mm);
+#endif
+		do {
+			result = clean_mm(mm, &address);
+			if (result) {
+#if 0
+				printk(KERN_DEBUG "clean_mm:%ld found a page\n", write_pass);
+#endif
+			}
+			if (atomic_read(&nr_async_pages) >= pager_daemon.swap_cluster) {
+				run_task_queue(&tq_disk);
+			}
+			if (current->need_resched) 
+				schedule();
+		} while((result > 0) && (atomic_read(&mm->count) > 1));
+		mm->swap_write_pass = write_pass;
+#if 0
+		printk(KERN_DEBUG "clean_mm:%ld done\n", write_pass);
+#endif
+		mmput(mm);
+		unlock_kernel();
+	}
+		
+	write_pass = (write_pass != 0)?0:1;
+}
+
+/* ====================== pgflush support =================== */
+
+/* This is a simple kernel daemon, whose job it is to write all dirty pages
+ * in memory.
+ */
+#if 1
+unsigned long pgflush_wait = 30*HZ;
+#else
+unsigned long pgflush_wait = HZ/5;
+#endif
+struct task_struct *pgflush_tsk = 0;
+
+void wakeup_pgflush(void)
+{
+	if (!pgflush_tsk)
+		return;
+	if (current == pgflush_tsk)
+		return;
+	wake_up_process(pgflush_tsk);
+}
+
+/*
+ * Before we start the kernel thread, print out the
+ * pgflushd initialization message (otherwise the init message
+ * may be printed in the middle of another driver's init
+ * message).  It looks very bad when that happens.
+ */
+__initfunc(void pgflush_init(void))
+{
+       printk ("Starting kpgflushd\n");
+}
+
+/* This is the actual pgflush daemon itself. 
+ * We launch it ourselves internally with
+ * kernel_thread(...)  directly after the first thread in init/main.c 
+ */
+
+int pgflush(void *unsused)
+{
+	/*
+	 *	We have a bare-bones task_struct, and really should fill
+	 *	in a few more things so "top" and /proc/2/{exe,root,cwd}
+	 *	display semi-sane things. Not real crucial though...  
+	 */
+
+	current->session = 1;
+	current->pgrp = 1;
+	sprintf(current->comm, "kpgflushd");
+	pgflush_tsk = current;
+
+	/*
+	 * Tell the memory management that we're a "memory allocator",
+	 * and that if we need more memory we should get access to it
+	 * regardless (see "__get_free_pages()"). "kswapd" should
+	 * never get caught in the normal page freeing logic.
+	 *
+	 * (Kswapd normally doesn't need memory anyway, but sometimes
+	 * you need a small amount of memory in order to be able to
+	 * page out something else, and this flag essentially protects
+	 * us from recursively trying to free more memory as we're
+	 * trying to free the first piece of memory in the first place).
+	 */
+	for(;;) {
+		unsigned long left;
+		current->state = TASK_INTERRUPTIBLE;
+
+		if (signal_pending(current)) {
+			spin_lock_irq(&current->sigmask_lock);
+			flush_signals(current);
+			spin_unlock_irq(&current->sigmask_lock);
+		}
+
+		printk(KERN_DEBUG "clean_tsks() starting\n");
+		clean_tsks();  /* fill up the dirty list */
+		printk(KERN_DEBUG "clean_tsks() done\n");
+		/* Then put it all on disk */
+		run_task_queue(&tq_disk);
+		/* Should I run tq_disk more often? */
+
+		printk(KERN_DEBUG "kpgflushd going to sleep\n");
+		left = schedule_timeout(pgflush_wait);
+		printk(KERN_DEBUG "kpgflushd awoke with %ld jiffies remaing\n",
+		       left);
+	}
+}
diff -uNrX linux-ignore-files linux-2.2.0-pre5.eb1.1/mm/vmscan.c linux-2.2.0-pre5.eb1.2/mm/vmscan.c
--- linux-2.2.0-pre5.eb1.1/mm/vmscan.c	Wed Jan  6 22:51:45 1999
+++ linux-2.2.0-pre5.eb1.2/mm/vmscan.c	Tue Jan 12 23:39:42 1999
@@ -79,7 +79,7 @@
 		tsk->nswap++;
 		flush_tlb_page(vma, address);
 		__free_page(page_map);
-		return 0;
+		return 0; /* This is the only work we can do now */
 	}
 
 	/*
@@ -99,61 +99,8 @@
 		pte_clear(page_table);
 		goto drop_pte;
 	}
-
-	/*
-	 * Ok, it's really dirty. That means that
-	 * we should either create a new swap cache
-	 * entry for it, or we should write it back
-	 * to its own backing store.
-	 *
-	 * Note that in neither case do we actually
-	 * know that we make a page available, but
-	 * as we potentially sleep we can no longer
-	 * continue scanning, so we migth as well
-	 * assume we free'd something.
-	 *
-	 * NOTE NOTE NOTE! This should just set a
-	 * dirty bit in page_map, and just drop the
-	 * pte. All the hard work would be done by
-	 * shrink_mmap().
-	 *
-	 * That would get rid of a lot of problems.
-	 */
-	if (vma->vm_ops && vma->vm_ops->swapout) {
-		pid_t pid = tsk->pid;
-		vma->vm_mm->rss--;
-		if (vma->vm_ops->swapout(vma, address - vma->vm_start + vma->vm_offset, page_table))
-			kill_proc(pid, SIGBUS, 1);
-		__free_page(page_map);
-		return 1;
-	}
-
-	/*
-	 * This is a dirty, swappable page.  First of all,
-	 * get a suitable swap entry for it, and make sure
-	 * we have the swap cache set up to associate the
-	 * page with that swap entry.
-	 */
-	entry = get_swap_page();
-	if (!entry)
-		return 0; /* No swap space left */
-		
-	vma->vm_mm->rss--;
-	tsk->nswap++;
-	flush_cache_page(vma, address);
-	set_pte(page_table, __pte(entry));
-	flush_tlb_page(vma, address);
-	swap_duplicate(entry);	/* One for the process, one for the swap cache */
-	add_to_swap_cache(page_map, entry);
-	/* We checked we were unlocked way up above, and we
-	   have been careful not to stall until here */
-	set_bit(PG_locked, &page_map->flags);
-
-	/* OK, do a physical asynchronous write to swap.  */
-	rw_swap_page(WRITE, entry, (char *) page, 0);
-
-	__free_page(page_map);
-	return 1;
+	/* wakeup_pgflush here? */
+	return 0;
 }
 
 /*
@@ -192,7 +139,7 @@
 
 	do {
 		int result;
-		tsk->swap_address = address + PAGE_SIZE;
+		tsk->mm->swap_address = address + PAGE_SIZE;
 		result = try_to_swap_out(tsk, vma, address, pte, gfp_mask);
 		if (result)
 			return result;
@@ -264,7 +211,7 @@
 	/*
 	 * Go through process' page directory.
 	 */
-	address = p->swap_address;
+	address = p->mm->swap_address;
 
 	/*
 	 * Find the proper vm-area
@@ -286,8 +233,8 @@
 	}
 
 	/* We didn't find anything for the process */
-	p->swap_cnt = 0;
-	p->swap_address = 0;
+	p->mm->swap_cnt = 0;
+	p->mm->swap_address = 0;
 	return 0;
 }
 
@@ -335,9 +282,9 @@
 				continue;
 			/* Refresh swap_cnt? */
 			if (assign)
-				p->swap_cnt = p->mm->rss;
-			if (p->swap_cnt > max_cnt) {
-				max_cnt = p->swap_cnt;
+				p->mm->swap_cnt = p->mm->rss;
+			if (p->mm->swap_cnt > max_cnt) {
+				max_cnt = p->mm->swap_cnt;
 				pbest = p;
 			}
 		}
@@ -456,6 +403,9 @@
 	/* Always trim SLAB caches when memory gets low. */
 	kmem_cache_reap(gfp_mask);
 
+	/* Also invest in clean pages */
+	wakeup_pgflush();
+
 	priority = 6;
 	do {
 		while (shrink_mmap(priority, gfp_mask)) {
@@ -476,6 +426,9 @@
 		}
 
 		shrink_dcache_memory(priority, gfp_mask);
+
+		schedule();
+
 	} while (--priority >= 0);
 done:
 	unlock_kernel();

--
This is a majordomo managed list.  To unsubscribe, send a message with
the body 'unsubscribe linux-mm me@address' to: majordomo@kvack.org

next prev parent reply	other threads:[~1999-01-17  6:44 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
1999-01-14 10:08 Alpha quality write out daemon Eric W. Biederman
     [not found] ` <369E0501.987D2B3B@xinit.se>
1999-01-14 16:49   ` Eric W. Biederman
1999-01-15  7:31 ` Eric W. Biederman
1998-01-14 18:44   ` Hans Eric Sandström
1999-01-15  9:02     ` Eric W. Biederman
1999-01-17  6:12       ` Eric W. Biederman [this message]
1999-01-18  5:05         ` Beta " Eric W. Biederman
1999-01-19 15:15 ` Alpha " Stephen C. Tweedie
1999-01-20 14:52   ` Eric W. Biederman
1999-01-20 18:46     ` Zlatko Calusic
1999-01-25  1:40       ` Eric W. Biederman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=m1k8ym4ed2.fsf_-_@flinx.ccr.net \
    --to=ebiederm+eric@ccr.net \
    --cc=hes@xinit.se \
    --cc=linux-mm@kvack.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.