From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
To: Dan Magenheimer <dan.magenheimer@oracle.com>
Cc: linux-kernel@vger.kernel.org, linux-mm@kvack.org,
jeremy@goop.org, hughd@google.com, ngupta@vflare.org,
JBeulich@novell.com, kurt.hackel@oracle.com, npiggin@suse.de,
akpm@linux-foundation.org, riel@redhat.com, hannes@cmpxchg.org,
matthew@wil.cx, chris.mason@oracle.com
Subject: Re: [PATCH V4 2/4] mm: frontswap: core code
Date: Tue, 7 Jun 2011 13:53:20 -0400 [thread overview]
Message-ID: <20110607175320.GA32207@dumpdata.com> (raw)
In-Reply-To: <20110527194845.GA27141@ca-server1.us.oracle.com>
On Fri, May 27, 2011 at 12:48:45PM -0700, Dan Magenheimer wrote:
> [PATCH V4 2/4] mm: frontswap: core code
>
> Core frontswap interface between swap subsystem hooks and
> frontswap backend via frontswap_ops.
>
> Credits: Frontswap_ops design derived from Jeremy Fitzhardinge
> design for tmem; sysfs code modelled after mm/ksm.c
>
> Signed-off-by: Dan Magenheimer <dan.magenheimer@oracle.com>
>
> Diffstat:
> include/linux/frontswap.h | 86 +++++
> mm/frontswap.c | 331 +++++++++++++++++++++
> 2 files changed, 417 insertions(+)
>
> --- linux-2.6.39/include/linux/frontswap.h 1969-12-31 17:00:00.000000000 -0700
> +++ linux-2.6.39-frontswap/include/linux/frontswap.h 2011-05-26 15:37:25.198065401 -0600
> @@ -0,0 +1,86 @@
> +#ifndef _LINUX_FRONTSWAP_H
> +#define _LINUX_FRONTSWAP_H
> +
> +#include <linux/swap.h>
> +#include <linux/mm.h>
> +
> +struct frontswap_ops {
> + void (*init)(unsigned);
> + int (*put_page)(unsigned, pgoff_t, struct page *);
> + int (*get_page)(unsigned, pgoff_t, struct page *);
> + void (*flush_page)(unsigned, pgoff_t);
> + void (*flush_area)(unsigned);
> +};
> +
> +extern int frontswap_enabled;
> +extern struct frontswap_ops
> + frontswap_register_ops(struct frontswap_ops *ops);
> +extern void frontswap_shrink(unsigned long);
> +extern unsigned long frontswap_curr_pages(void);
> +
> +extern void frontswap_init(unsigned type);
> +extern int __frontswap_put_page(struct page *page);
> +extern int __frontswap_get_page(struct page *page);
> +extern void __frontswap_flush_page(unsigned, pgoff_t);
> +extern void __frontswap_flush_area(unsigned);
> +
> +#ifndef CONFIG_FRONTSWAP
> +/* all inline routines become no-ops and all externs are ignored */
> +#define frontswap_enabled (0)
> +#endif
> +
> +static inline int frontswap_test(struct swap_info_struct *sis, pgoff_t offset)
> +{
> + int ret = 0;
> +
> + if (frontswap_enabled && sis->frontswap_map)
> + ret = test_bit(offset % BITS_PER_LONG,
> + &sis->frontswap_map[offset/BITS_PER_LONG]);
> + return ret;
> +}
> +
> +static inline void frontswap_set(struct swap_info_struct *sis, pgoff_t offset)
> +{
> + if (frontswap_enabled && sis->frontswap_map)
> + set_bit(offset % BITS_PER_LONG,
> + &sis->frontswap_map[offset/BITS_PER_LONG]);
> +}
> +
> +static inline void frontswap_clear(struct swap_info_struct *sis, pgoff_t offset)
> +{
> + if (frontswap_enabled && sis->frontswap_map)
> + clear_bit(offset % BITS_PER_LONG,
> + &sis->frontswap_map[offset/BITS_PER_LONG]);
> +}
> +
> +static inline int frontswap_put_page(struct page *page)
> +{
> + int ret = -1;
> +
> + if (frontswap_enabled)
> + ret = __frontswap_put_page(page);
> + return ret;
> +}
> +
> +static inline int frontswap_get_page(struct page *page)
> +{
> + int ret = -1;
> +
> + if (frontswap_enabled)
> + ret = __frontswap_get_page(page);
> + return ret;
> +}
> +
> +static inline void frontswap_flush_page(unsigned type, pgoff_t offset)
> +{
> + if (frontswap_enabled)
> + __frontswap_flush_page(type, offset);
> +}
> +
> +static inline void frontswap_flush_area(unsigned type)
> +{
> + if (frontswap_enabled)
> + __frontswap_flush_area(type);
> +}
> +
> +#endif /* _LINUX_FRONTSWAP_H */
> --- linux-2.6.39/mm/frontswap.c 1969-12-31 17:00:00.000000000 -0700
> +++ linux-2.6.39-frontswap/mm/frontswap.c 2011-05-26 15:37:25.254816281 -0600
> @@ -0,0 +1,331 @@
> +/*
> + * Frontswap frontend
> + *
> + * This code provides the generic "frontend" layer to call a matching
> + * "backend" driver implementation of frontswap. See
> + * Documentation/vm/frontswap.txt for more information.
> + *
> + * Copyright (C) 2009-2010 Oracle Corp. All rights reserved.
> + * Author: Dan Magenheimer
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2.
> + */
> +
> +#include <linux/mm.h>
> +#include <linux/mman.h>
> +#include <linux/sysctl.h>
> +#include <linux/swap.h>
> +#include <linux/swapops.h>
> +#include <linux/proc_fs.h>
> +#include <linux/security.h>
> +#include <linux/capability.h>
> +#include <linux/module.h>
> +#include <linux/uaccess.h>
> +#include <linux/frontswap.h>
> +#include <linux/swapfile.h>
> +
> +/*
> + * frontswap_ops is set by frontswap_register_ops to contain the pointers
> + * to the frontswap "backend" implementation functions.
> + */
> +static struct frontswap_ops frontswap_ops;
> +
> +/*
> + * This global enablement flag reduces overhead on systems where frontswap_ops
> + * has not been registered, so is preferred to the slower alternative: a
> + * function call that checks a non-global.
> + */
> +int frontswap_enabled;
> +EXPORT_SYMBOL(frontswap_enabled);
> +
> +/* useful stats available in /sys/kernel/mm/frontswap */
> +static unsigned long frontswap_gets;
> +static unsigned long frontswap_succ_puts;
> +static unsigned long frontswap_failed_puts;
> +static unsigned long frontswap_flushes;
> +
> +/*
> + * register operations for frontswap, returning previous thus allowing
> + * detection of multiple backends and possible nesting
> + */
> +struct frontswap_ops frontswap_register_ops(struct frontswap_ops *ops)
> +{
> + struct frontswap_ops old = frontswap_ops;
> +
> + frontswap_ops = *ops;
> + frontswap_enabled = 1;
> + return old;
> +}
> +EXPORT_SYMBOL(frontswap_register_ops);
> +
> +/* Called when a swap device is swapon'd */
> +void frontswap_init(unsigned type)
> +{
> + if (frontswap_enabled)
> + (*frontswap_ops.init)(type);
> +}
> +EXPORT_SYMBOL(frontswap_init);
> +
> +/*
> + * "Put" data from a page to frontswap and associate it with the page's
> + * swaptype and offset. Page must be locked and in the swap cache.
> + * If frontswap already contains a page with matching swaptype and
> + * offset, the frontswap implmentation may either overwrite the data
> + * and return success or flush the page from frontswap and return failure
> + */
> +int __frontswap_put_page(struct page *page)
> +{
> + int ret = -1, dup = 0;
> + swp_entry_t entry = { .val = page_private(page), };
> + int type = swp_type(entry);
> + struct swap_info_struct *sis = swap_info[type];
There is no danger that swap_info[type] returns NULL?
Should you get the get the swap_lock lock? Or is that not
neccessary b/c the caller already holds the lock? If so it might
be better to name the function __frontswap_put_page_locked(..)?
> + pgoff_t offset = swp_offset(entry);
> +
> + BUG_ON(!PageLocked(page));
> + if (frontswap_test(sis, offset))
> + dup = 1;
> + ret = (*frontswap_ops.put_page)(type, offset, page);
> + if (ret == 0) {
> + frontswap_set(sis, offset);
> + frontswap_succ_puts++;
> + if (!dup)
> + sis->frontswap_pages++;
> + } else if (dup) {
> + /*
> + failed dup always results in automatic flush of
> + the (older) page from frontswap
> + */
> + frontswap_clear(sis, offset);
> + sis->frontswap_pages--;
> + frontswap_failed_puts++;
> + } else
> + frontswap_failed_puts++;
> + return ret;
> +}
> +
> +/*
> + * "Get" data from frontswap associated with swaptype and offset that were
> + * specified when the data was put to frontswap and use it to fill the
> + * specified page with data. Page must be locked and in the swap cache
> + */
> +int __frontswap_get_page(struct page *page)
> +{
> + int ret = -1;
> + swp_entry_t entry = { .val = page_private(page), };
> + int type = swp_type(entry);
> + struct swap_info_struct *sis = swap_info[type];
> + pgoff_t offset = swp_offset(entry);
> +
> + BUG_ON(!PageLocked(page));
Looks like swap_readpage already does this?
> + if (frontswap_test(sis, offset))
So no checking if sis == NULL b/c the caller already does that? You might
want to stick a comment here.
> + ret = (*frontswap_ops.get_page)(type, offset, page);
> + if (ret == 0)
> + frontswap_gets++;
> + return ret;
> +}
> +
> +/*
> + * Flush any data from frontswap associated with the specified swaptype
> + * and offset so that a subsequent "get" will fail.
> + */
> +void __frontswap_flush_page(unsigned type, pgoff_t offset)
> +{
> + struct swap_info_struct *sis = swap_info[type];
> +
> + if (frontswap_test(sis, offset)) {
> + (*frontswap_ops.flush_page)(type, offset);
> + sis->frontswap_pages--;
> + frontswap_clear(sis, offset);
> + frontswap_flushes++;
> + }
> +}
> +
> +/*
> + * Flush all data from frontswap associated with all offsets for the
> + * specified swaptype.
> + */
> +void __frontswap_flush_area(unsigned type)
> +{
> + struct swap_info_struct *sis = swap_info[type];
> +
> + (*frontswap_ops.flush_area)(type);
> + sis->frontswap_pages = 0;
> + memset(sis->frontswap_map, 0, sis->max / sizeof(long));
> +}
> +
> +/*
> + * Frontswap, like a true swap device, may unnecessarily retain pages
> + * under certain circumstances; "shrink" frontswap is essentially a
> + * "partial swapoff" and works by calling try_to_unuse to attempt to
> + * unuse enough frontswap pages to attempt to -- subject to memory
> + * constraints -- reduce the number of pages in frontswap
> + */
> +void frontswap_shrink(unsigned long target_pages)
> +{
> + int wrapped = 0;
> + bool locked = false;
> +
> + for (wrapped = 0; wrapped <= 3; wrapped++) {
3? Is this value special? You might want to provide a comment
of why '3' is _the_ choice.
> +
> + struct swap_info_struct *si = NULL;
> + unsigned long total_pages = 0, total_pages_to_unuse;
> + unsigned long pages = 0, unuse_pages = 0;
> + int type;
> +
> + /*
> + * we don't want to hold swap_lock while doing a very
> + * lengthy try_to_unuse, but swap_list may change
> + * so restart scan from swap_list.head each time
> + */
> + spin_lock(&swap_lock);
> + locked = true;
> + total_pages = 0;
> + for (type = swap_list.head; type >= 0; type = si->next) {
> + si = swap_info[type];
> + total_pages += si->frontswap_pages;
> + }
> + if (total_pages <= target_pages)
> + goto out;
> + total_pages_to_unuse = total_pages - target_pages;
> + for (type = swap_list.head; type >= 0; type = si->next) {
> + si = swap_info[type];
> + if (total_pages_to_unuse < si->frontswap_pages)
> + pages = unuse_pages = total_pages_to_unuse;
> + else {
> + pages = si->frontswap_pages;
unused_pages sounds better than 'pages'.
> + unuse_pages = 0; /* unuse all */
pages_unuse? pages_to_unuse? Yeah, that sounds better.
> + }
> + if (security_vm_enough_memory_kern(pages))
> + continue;
> + vm_unacct_memory(pages);
> + break;
> + }
> + if (type < 0)
> + goto out;
> + locked = false;
> + spin_unlock(&swap_lock);
> + try_to_unuse(type, true, unuse_pages);
> + }
> +
> +out:
> + if (locked)
> + spin_unlock(&swap_lock);
> + return;
> +}
> +EXPORT_SYMBOL(frontswap_shrink);
> +
> +/*
> + * count and return the number of pages frontswap pages across all
> + * swap devices. This is exported so that a kernel module can
> + * determine current usage without reading sysfs.
> + */
> +unsigned long frontswap_curr_pages(void)
> +{
> + int type;
> + unsigned long totalpages = 0;
> + struct swap_info_struct *si = NULL;
> +
> + spin_lock(&swap_lock);
> + for (type = swap_list.head; type >= 0; type = si->next) {
> + si = swap_info[type];
> + totalpages += si->frontswap_pages;
Uh, you don't need to check 'si' for NULL?
> + }
> + spin_unlock(&swap_lock);
> + return totalpages;
> +}
> +EXPORT_SYMBOL(frontswap_curr_pages);
> +
> +#ifdef CONFIG_SYSFS
> +
> +/* see Documentation/ABI/xxx/sysfs-kernel-mm-frontswap */
> +
> +#define FRONTSWAP_ATTR_RO(_name) \
> + static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
> +#define FRONTSWAP_ATTR(_name) \
> + static struct kobj_attribute _name##_attr = \
> + __ATTR(_name, 0644, _name##_show, _name##_store)
> +
> +static ssize_t curr_pages_show(struct kobject *kobj,
> + struct kobj_attribute *attr, char *buf)
> +{
> + return sprintf(buf, "%lu\n", frontswap_curr_pages());
> +}
> +
> +static ssize_t curr_pages_store(struct kobject *kobj,
> + struct kobj_attribute *attr,
> + const char *buf, size_t count)
> +{
> + unsigned long target_pages;
> + int err;
> +
> + err = strict_strtoul(buf, 10, &target_pages);
> + if (err)
> + return -EINVAL;
How about just "if (strict..) return -EINVAL" ) and ditch the 'int err'?
> +
> + frontswap_shrink(target_pages);
> +
> + return count;
> +}
> +FRONTSWAP_ATTR(curr_pages);
> +
> +static ssize_t succ_puts_show(struct kobject *kobj,
> + struct kobj_attribute *attr, char *buf)
> +{
> + return sprintf(buf, "%lu\n", frontswap_succ_puts);
> +}
> +FRONTSWAP_ATTR_RO(succ_puts);
> +
> +static ssize_t failed_puts_show(struct kobject *kobj,
> + struct kobj_attribute *attr, char *buf)
> +{
> + return sprintf(buf, "%lu\n", frontswap_failed_puts);
> +}
> +FRONTSWAP_ATTR_RO(failed_puts);
> +
> +static ssize_t gets_show(struct kobject *kobj,
> + struct kobj_attribute *attr, char *buf)
> +{
> + return sprintf(buf, "%lu\n", frontswap_gets);
> +}
> +FRONTSWAP_ATTR_RO(gets);
> +
> +static ssize_t flushes_show(struct kobject *kobj,
> + struct kobj_attribute *attr, char *buf)
> +{
> + return sprintf(buf, "%lu\n", frontswap_flushes);
> +}
> +FRONTSWAP_ATTR_RO(flushes);
> +
> +static struct attribute *frontswap_attrs[] = {
> + &curr_pages_attr.attr,
> + &succ_puts_attr.attr,
> + &failed_puts_attr.attr,
> + &gets_attr.attr,
> + &flushes_attr.attr,
> + NULL,
> +};
> +
> +static struct attribute_group frontswap_attr_group = {
> + .attrs = frontswap_attrs,
> + .name = "frontswap",
> +};
> +
> +#endif /* CONFIG_SYSFS */
> +
> +static int __init init_frontswap(void)
> +{
> +#ifdef CONFIG_SYSFS
> + int err;
> +
> + err = sysfs_create_group(mm_kobj, &frontswap_attr_group);
So shouldn't you return 'err'?
> +#endif /* CONFIG_SYSFS */
> + return 0;
> +}
> +
> +static void __exit exit_frontswap(void)
> +{
> + frontswap_shrink(0UL);
> +}
> +
> +module_init(init_frontswap);
> +module_exit(exit_frontswap);
WARNING: multiple messages have this Message-ID (diff)
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
To: Dan Magenheimer <dan.magenheimer@oracle.com>
Cc: linux-kernel@vger.kernel.org, linux-mm@kvack.org,
jeremy@goop.org, hughd@google.com, ngupta@vflare.org,
JBeulich@novell.com, kurt.hackel@oracle.com, npiggin@suse.de,
akpm@linux-foundation.org, riel@redhat.com, hannes@cmpxchg.org,
matthew@wil.cx, chris.mason@oracle.com
Subject: Re: [PATCH V4 2/4] mm: frontswap: core code
Date: Tue, 7 Jun 2011 13:53:20 -0400 [thread overview]
Message-ID: <20110607175320.GA32207@dumpdata.com> (raw)
In-Reply-To: <20110527194845.GA27141@ca-server1.us.oracle.com>
On Fri, May 27, 2011 at 12:48:45PM -0700, Dan Magenheimer wrote:
> [PATCH V4 2/4] mm: frontswap: core code
>
> Core frontswap interface between swap subsystem hooks and
> frontswap backend via frontswap_ops.
>
> Credits: Frontswap_ops design derived from Jeremy Fitzhardinge
> design for tmem; sysfs code modelled after mm/ksm.c
>
> Signed-off-by: Dan Magenheimer <dan.magenheimer@oracle.com>
>
> Diffstat:
> include/linux/frontswap.h | 86 +++++
> mm/frontswap.c | 331 +++++++++++++++++++++
> 2 files changed, 417 insertions(+)
>
> --- linux-2.6.39/include/linux/frontswap.h 1969-12-31 17:00:00.000000000 -0700
> +++ linux-2.6.39-frontswap/include/linux/frontswap.h 2011-05-26 15:37:25.198065401 -0600
> @@ -0,0 +1,86 @@
> +#ifndef _LINUX_FRONTSWAP_H
> +#define _LINUX_FRONTSWAP_H
> +
> +#include <linux/swap.h>
> +#include <linux/mm.h>
> +
> +struct frontswap_ops {
> + void (*init)(unsigned);
> + int (*put_page)(unsigned, pgoff_t, struct page *);
> + int (*get_page)(unsigned, pgoff_t, struct page *);
> + void (*flush_page)(unsigned, pgoff_t);
> + void (*flush_area)(unsigned);
> +};
> +
> +extern int frontswap_enabled;
> +extern struct frontswap_ops
> + frontswap_register_ops(struct frontswap_ops *ops);
> +extern void frontswap_shrink(unsigned long);
> +extern unsigned long frontswap_curr_pages(void);
> +
> +extern void frontswap_init(unsigned type);
> +extern int __frontswap_put_page(struct page *page);
> +extern int __frontswap_get_page(struct page *page);
> +extern void __frontswap_flush_page(unsigned, pgoff_t);
> +extern void __frontswap_flush_area(unsigned);
> +
> +#ifndef CONFIG_FRONTSWAP
> +/* all inline routines become no-ops and all externs are ignored */
> +#define frontswap_enabled (0)
> +#endif
> +
> +static inline int frontswap_test(struct swap_info_struct *sis, pgoff_t offset)
> +{
> + int ret = 0;
> +
> + if (frontswap_enabled && sis->frontswap_map)
> + ret = test_bit(offset % BITS_PER_LONG,
> + &sis->frontswap_map[offset/BITS_PER_LONG]);
> + return ret;
> +}
> +
> +static inline void frontswap_set(struct swap_info_struct *sis, pgoff_t offset)
> +{
> + if (frontswap_enabled && sis->frontswap_map)
> + set_bit(offset % BITS_PER_LONG,
> + &sis->frontswap_map[offset/BITS_PER_LONG]);
> +}
> +
> +static inline void frontswap_clear(struct swap_info_struct *sis, pgoff_t offset)
> +{
> + if (frontswap_enabled && sis->frontswap_map)
> + clear_bit(offset % BITS_PER_LONG,
> + &sis->frontswap_map[offset/BITS_PER_LONG]);
> +}
> +
> +static inline int frontswap_put_page(struct page *page)
> +{
> + int ret = -1;
> +
> + if (frontswap_enabled)
> + ret = __frontswap_put_page(page);
> + return ret;
> +}
> +
> +static inline int frontswap_get_page(struct page *page)
> +{
> + int ret = -1;
> +
> + if (frontswap_enabled)
> + ret = __frontswap_get_page(page);
> + return ret;
> +}
> +
> +static inline void frontswap_flush_page(unsigned type, pgoff_t offset)
> +{
> + if (frontswap_enabled)
> + __frontswap_flush_page(type, offset);
> +}
> +
> +static inline void frontswap_flush_area(unsigned type)
> +{
> + if (frontswap_enabled)
> + __frontswap_flush_area(type);
> +}
> +
> +#endif /* _LINUX_FRONTSWAP_H */
> --- linux-2.6.39/mm/frontswap.c 1969-12-31 17:00:00.000000000 -0700
> +++ linux-2.6.39-frontswap/mm/frontswap.c 2011-05-26 15:37:25.254816281 -0600
> @@ -0,0 +1,331 @@
> +/*
> + * Frontswap frontend
> + *
> + * This code provides the generic "frontend" layer to call a matching
> + * "backend" driver implementation of frontswap. See
> + * Documentation/vm/frontswap.txt for more information.
> + *
> + * Copyright (C) 2009-2010 Oracle Corp. All rights reserved.
> + * Author: Dan Magenheimer
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2.
> + */
> +
> +#include <linux/mm.h>
> +#include <linux/mman.h>
> +#include <linux/sysctl.h>
> +#include <linux/swap.h>
> +#include <linux/swapops.h>
> +#include <linux/proc_fs.h>
> +#include <linux/security.h>
> +#include <linux/capability.h>
> +#include <linux/module.h>
> +#include <linux/uaccess.h>
> +#include <linux/frontswap.h>
> +#include <linux/swapfile.h>
> +
> +/*
> + * frontswap_ops is set by frontswap_register_ops to contain the pointers
> + * to the frontswap "backend" implementation functions.
> + */
> +static struct frontswap_ops frontswap_ops;
> +
> +/*
> + * This global enablement flag reduces overhead on systems where frontswap_ops
> + * has not been registered, so is preferred to the slower alternative: a
> + * function call that checks a non-global.
> + */
> +int frontswap_enabled;
> +EXPORT_SYMBOL(frontswap_enabled);
> +
> +/* useful stats available in /sys/kernel/mm/frontswap */
> +static unsigned long frontswap_gets;
> +static unsigned long frontswap_succ_puts;
> +static unsigned long frontswap_failed_puts;
> +static unsigned long frontswap_flushes;
> +
> +/*
> + * register operations for frontswap, returning previous thus allowing
> + * detection of multiple backends and possible nesting
> + */
> +struct frontswap_ops frontswap_register_ops(struct frontswap_ops *ops)
> +{
> + struct frontswap_ops old = frontswap_ops;
> +
> + frontswap_ops = *ops;
> + frontswap_enabled = 1;
> + return old;
> +}
> +EXPORT_SYMBOL(frontswap_register_ops);
> +
> +/* Called when a swap device is swapon'd */
> +void frontswap_init(unsigned type)
> +{
> + if (frontswap_enabled)
> + (*frontswap_ops.init)(type);
> +}
> +EXPORT_SYMBOL(frontswap_init);
> +
> +/*
> + * "Put" data from a page to frontswap and associate it with the page's
> + * swaptype and offset. Page must be locked and in the swap cache.
> + * If frontswap already contains a page with matching swaptype and
> + * offset, the frontswap implmentation may either overwrite the data
> + * and return success or flush the page from frontswap and return failure
> + */
> +int __frontswap_put_page(struct page *page)
> +{
> + int ret = -1, dup = 0;
> + swp_entry_t entry = { .val = page_private(page), };
> + int type = swp_type(entry);
> + struct swap_info_struct *sis = swap_info[type];
There is no danger that swap_info[type] returns NULL?
Should you get the get the swap_lock lock? Or is that not
neccessary b/c the caller already holds the lock? If so it might
be better to name the function __frontswap_put_page_locked(..)?
> + pgoff_t offset = swp_offset(entry);
> +
> + BUG_ON(!PageLocked(page));
> + if (frontswap_test(sis, offset))
> + dup = 1;
> + ret = (*frontswap_ops.put_page)(type, offset, page);
> + if (ret == 0) {
> + frontswap_set(sis, offset);
> + frontswap_succ_puts++;
> + if (!dup)
> + sis->frontswap_pages++;
> + } else if (dup) {
> + /*
> + failed dup always results in automatic flush of
> + the (older) page from frontswap
> + */
> + frontswap_clear(sis, offset);
> + sis->frontswap_pages--;
> + frontswap_failed_puts++;
> + } else
> + frontswap_failed_puts++;
> + return ret;
> +}
> +
> +/*
> + * "Get" data from frontswap associated with swaptype and offset that were
> + * specified when the data was put to frontswap and use it to fill the
> + * specified page with data. Page must be locked and in the swap cache
> + */
> +int __frontswap_get_page(struct page *page)
> +{
> + int ret = -1;
> + swp_entry_t entry = { .val = page_private(page), };
> + int type = swp_type(entry);
> + struct swap_info_struct *sis = swap_info[type];
> + pgoff_t offset = swp_offset(entry);
> +
> + BUG_ON(!PageLocked(page));
Looks like swap_readpage already does this?
> + if (frontswap_test(sis, offset))
So no checking if sis == NULL b/c the caller already does that? You might
want to stick a comment here.
> + ret = (*frontswap_ops.get_page)(type, offset, page);
> + if (ret == 0)
> + frontswap_gets++;
> + return ret;
> +}
> +
> +/*
> + * Flush any data from frontswap associated with the specified swaptype
> + * and offset so that a subsequent "get" will fail.
> + */
> +void __frontswap_flush_page(unsigned type, pgoff_t offset)
> +{
> + struct swap_info_struct *sis = swap_info[type];
> +
> + if (frontswap_test(sis, offset)) {
> + (*frontswap_ops.flush_page)(type, offset);
> + sis->frontswap_pages--;
> + frontswap_clear(sis, offset);
> + frontswap_flushes++;
> + }
> +}
> +
> +/*
> + * Flush all data from frontswap associated with all offsets for the
> + * specified swaptype.
> + */
> +void __frontswap_flush_area(unsigned type)
> +{
> + struct swap_info_struct *sis = swap_info[type];
> +
> + (*frontswap_ops.flush_area)(type);
> + sis->frontswap_pages = 0;
> + memset(sis->frontswap_map, 0, sis->max / sizeof(long));
> +}
> +
> +/*
> + * Frontswap, like a true swap device, may unnecessarily retain pages
> + * under certain circumstances; "shrink" frontswap is essentially a
> + * "partial swapoff" and works by calling try_to_unuse to attempt to
> + * unuse enough frontswap pages to attempt to -- subject to memory
> + * constraints -- reduce the number of pages in frontswap
> + */
> +void frontswap_shrink(unsigned long target_pages)
> +{
> + int wrapped = 0;
> + bool locked = false;
> +
> + for (wrapped = 0; wrapped <= 3; wrapped++) {
3? Is this value special? You might want to provide a comment
of why '3' is _the_ choice.
> +
> + struct swap_info_struct *si = NULL;
> + unsigned long total_pages = 0, total_pages_to_unuse;
> + unsigned long pages = 0, unuse_pages = 0;
> + int type;
> +
> + /*
> + * we don't want to hold swap_lock while doing a very
> + * lengthy try_to_unuse, but swap_list may change
> + * so restart scan from swap_list.head each time
> + */
> + spin_lock(&swap_lock);
> + locked = true;
> + total_pages = 0;
> + for (type = swap_list.head; type >= 0; type = si->next) {
> + si = swap_info[type];
> + total_pages += si->frontswap_pages;
> + }
> + if (total_pages <= target_pages)
> + goto out;
> + total_pages_to_unuse = total_pages - target_pages;
> + for (type = swap_list.head; type >= 0; type = si->next) {
> + si = swap_info[type];
> + if (total_pages_to_unuse < si->frontswap_pages)
> + pages = unuse_pages = total_pages_to_unuse;
> + else {
> + pages = si->frontswap_pages;
unused_pages sounds better than 'pages'.
> + unuse_pages = 0; /* unuse all */
pages_unuse? pages_to_unuse? Yeah, that sounds better.
> + }
> + if (security_vm_enough_memory_kern(pages))
> + continue;
> + vm_unacct_memory(pages);
> + break;
> + }
> + if (type < 0)
> + goto out;
> + locked = false;
> + spin_unlock(&swap_lock);
> + try_to_unuse(type, true, unuse_pages);
> + }
> +
> +out:
> + if (locked)
> + spin_unlock(&swap_lock);
> + return;
> +}
> +EXPORT_SYMBOL(frontswap_shrink);
> +
> +/*
> + * count and return the number of pages frontswap pages across all
> + * swap devices. This is exported so that a kernel module can
> + * determine current usage without reading sysfs.
> + */
> +unsigned long frontswap_curr_pages(void)
> +{
> + int type;
> + unsigned long totalpages = 0;
> + struct swap_info_struct *si = NULL;
> +
> + spin_lock(&swap_lock);
> + for (type = swap_list.head; type >= 0; type = si->next) {
> + si = swap_info[type];
> + totalpages += si->frontswap_pages;
Uh, you don't need to check 'si' for NULL?
> + }
> + spin_unlock(&swap_lock);
> + return totalpages;
> +}
> +EXPORT_SYMBOL(frontswap_curr_pages);
> +
> +#ifdef CONFIG_SYSFS
> +
> +/* see Documentation/ABI/xxx/sysfs-kernel-mm-frontswap */
> +
> +#define FRONTSWAP_ATTR_RO(_name) \
> + static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
> +#define FRONTSWAP_ATTR(_name) \
> + static struct kobj_attribute _name##_attr = \
> + __ATTR(_name, 0644, _name##_show, _name##_store)
> +
> +static ssize_t curr_pages_show(struct kobject *kobj,
> + struct kobj_attribute *attr, char *buf)
> +{
> + return sprintf(buf, "%lu\n", frontswap_curr_pages());
> +}
> +
> +static ssize_t curr_pages_store(struct kobject *kobj,
> + struct kobj_attribute *attr,
> + const char *buf, size_t count)
> +{
> + unsigned long target_pages;
> + int err;
> +
> + err = strict_strtoul(buf, 10, &target_pages);
> + if (err)
> + return -EINVAL;
How about just "if (strict..) return -EINVAL" ) and ditch the 'int err'?
> +
> + frontswap_shrink(target_pages);
> +
> + return count;
> +}
> +FRONTSWAP_ATTR(curr_pages);
> +
> +static ssize_t succ_puts_show(struct kobject *kobj,
> + struct kobj_attribute *attr, char *buf)
> +{
> + return sprintf(buf, "%lu\n", frontswap_succ_puts);
> +}
> +FRONTSWAP_ATTR_RO(succ_puts);
> +
> +static ssize_t failed_puts_show(struct kobject *kobj,
> + struct kobj_attribute *attr, char *buf)
> +{
> + return sprintf(buf, "%lu\n", frontswap_failed_puts);
> +}
> +FRONTSWAP_ATTR_RO(failed_puts);
> +
> +static ssize_t gets_show(struct kobject *kobj,
> + struct kobj_attribute *attr, char *buf)
> +{
> + return sprintf(buf, "%lu\n", frontswap_gets);
> +}
> +FRONTSWAP_ATTR_RO(gets);
> +
> +static ssize_t flushes_show(struct kobject *kobj,
> + struct kobj_attribute *attr, char *buf)
> +{
> + return sprintf(buf, "%lu\n", frontswap_flushes);
> +}
> +FRONTSWAP_ATTR_RO(flushes);
> +
> +static struct attribute *frontswap_attrs[] = {
> + &curr_pages_attr.attr,
> + &succ_puts_attr.attr,
> + &failed_puts_attr.attr,
> + &gets_attr.attr,
> + &flushes_attr.attr,
> + NULL,
> +};
> +
> +static struct attribute_group frontswap_attr_group = {
> + .attrs = frontswap_attrs,
> + .name = "frontswap",
> +};
> +
> +#endif /* CONFIG_SYSFS */
> +
> +static int __init init_frontswap(void)
> +{
> +#ifdef CONFIG_SYSFS
> + int err;
> +
> + err = sysfs_create_group(mm_kobj, &frontswap_attr_group);
So shouldn't you return 'err'?
> +#endif /* CONFIG_SYSFS */
> + return 0;
> +}
> +
> +static void __exit exit_frontswap(void)
> +{
> + frontswap_shrink(0UL);
> +}
> +
> +module_init(init_frontswap);
> +module_exit(exit_frontswap);
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2011-06-07 17:54 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-05-27 19:48 [PATCH V4 2/4] mm: frontswap: core code Dan Magenheimer
2011-05-27 19:48 ` Dan Magenheimer
2011-06-07 17:53 ` Konrad Rzeszutek Wilk [this message]
2011-06-07 17:53 ` Konrad Rzeszutek Wilk
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20110607175320.GA32207@dumpdata.com \
--to=konrad.wilk@oracle.com \
--cc=JBeulich@novell.com \
--cc=akpm@linux-foundation.org \
--cc=chris.mason@oracle.com \
--cc=dan.magenheimer@oracle.com \
--cc=hannes@cmpxchg.org \
--cc=hughd@google.com \
--cc=jeremy@goop.org \
--cc=kurt.hackel@oracle.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=matthew@wil.cx \
--cc=ngupta@vflare.org \
--cc=npiggin@suse.de \
--cc=riel@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.