> /* is kswapd sleeping prematurely? */
> -static bool sleeping_prematurely(pg_data_t *pgdat, int order, long remaining,
> - int classzone_idx)
> +static int sleeping_prematurely(struct kswapd *kswapd, int order,
> + long remaining, int classzone_idx)
> {
> int i;
> unsigned long balanced = 0;
> bool all_zones_ok = true;
> + pg_data_t *pgdat = kswapd->kswapd_pgdat;
>
> /* If a direct reclaimer woke kswapd within HZ/10, it's premature */
> if (remaining)
> @@ -2570,28 +2573,31 @@ out:
> return order;
> }
>
> -static void kswapd_try_to_sleep(pg_data_t *pgdat, int order, int classzone_idx)
> +static void kswapd_try_to_sleep(struct kswapd *kswapd_p, int order,
> + int classzone_idx)
> {
> long remaining = 0;
> DEFINE_WAIT(wait);
> + pg_data_t *pgdat = kswapd_p->kswapd_pgdat;
> + wait_queue_head_t *wait_h = &kswapd_p->kswapd_wait;
>
> if (freezing(current) || kthread_should_stop())
> return;
>
> - prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE);
> + prepare_to_wait(wait_h, &wait, TASK_INTERRUPTIBLE);
>
> /* Try to sleep for a short interval */
> - if (!sleeping_prematurely(pgdat, order, remaining, classzone_idx)) {
> + if (!sleeping_prematurely(kswapd_p, order, remaining, classzone_idx)) {
> remaining = schedule_timeout(HZ/10);
> - finish_wait(&pgdat->kswapd_wait, &wait);
> - prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE);
> + finish_wait(wait_h, &wait);
> + prepare_to_wait(wait_h, &wait, TASK_INTERRUPTIBLE);
> }
>
> /*
> * After a short sleep, check if it was a premature sleep. If not, then
> * go fully to sleep until explicitly woken up.
> */
> - if (!sleeping_prematurely(pgdat, order, remaining, classzone_idx)) {
> + if (!sleeping_prematurely(kswapd_p, order, remaining, classzone_idx)) {
> trace_mm_vmscan_kswapd_sleep(pgdat->node_id);
>
> /*
> @@ -2611,7 +2617,7 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int order, int classzone_idx)
> else
> count_vm_event(KSWAPD_HIGH_WMARK_HIT_QUICKLY);
> }
> - finish_wait(&pgdat->kswapd_wait, &wait);
> + finish_wait(wait_h, &wait);
> }
>
> /*
> @@ -2627,20 +2633,24 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int order, int classzone_idx)
> * If there are applications that are active memory-allocators
> * (most normal use), this basically shouldn't matter.
> */
> -static int kswapd(void *p)
> +int kswapd(void *p)
> {
> unsigned long order;
> int classzone_idx;
> - pg_data_t *pgdat = (pg_data_t*)p;
> + struct kswapd *kswapd_p = (struct kswapd *)p;
> + pg_data_t *pgdat = kswapd_p->kswapd_pgdat;
> + wait_queue_head_t *wait_h = &kswapd_p->kswapd_wait;
> struct task_struct *tsk = current;
>
> struct reclaim_state reclaim_state = {
> .reclaimed_slab = 0,
> };
> - const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
> + const struct cpumask *cpumask;
>
> lockdep_set_current_reclaim_state(GFP_KERNEL);
>
> + BUG_ON(pgdat->kswapd_wait != wait_h);
> + cpumask = cpumask_of_node(pgdat->node_id);
> if (!cpumask_empty(cpumask))
> set_cpus_allowed_ptr(tsk, cpumask);
> current->reclaim_state = &reclaim_state;
> @@ -2679,7 +2689,7 @@ static int kswapd(void *p)
> order = new_order;
> classzone_idx = new_classzone_idx;
> } else {
> - kswapd_try_to_sleep(pgdat, order, classzone_idx);
> + kswapd_try_to_sleep(kswapd_p, order, classzone_idx);
> order = pgdat->kswapd_max_order;
> classzone_idx = pgdat->classzone_idx;
> pgdat->kswapd_max_order = 0;
> @@ -2719,13 +2729,13 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx)
> pgdat->kswapd_max_order = order;
> pgdat->classzone_idx = min(pgdat->classzone_idx, classzone_idx);
> }
> - if (!waitqueue_active(&pgdat->kswapd_wait))
> + if (!waitqueue_active(pgdat->kswapd_wait))
> return;
> if (zone_watermark_ok_safe(zone, order, low_wmark_pages(zone), 0, 0))
> return;
>
> trace_mm_vmscan_wakeup_kswapd(pgdat->node_id, zone_idx(zone), order);
> - wake_up_interruptible(&pgdat->kswapd_wait);
> + wake_up_interruptible(pgdat->kswapd_wait);
> }
>
> /*
> @@ -2817,12 +2827,23 @@ static int __devinit cpu_callback(struct notifier_block *nfb,
> for_each_node_state(nid, N_HIGH_MEMORY) {
> pg_data_t *pgdat = NODE_DATA(nid);
> const struct cpumask *mask;
> + struct kswapd *kswapd_p;
> + struct task_struct *kswapd_thr;
> + wait_queue_head_t *wait;
>
> mask = cpumask_of_node(pgdat->node_id);
>
> + spin_lock(&kswapds_spinlock);
> + wait = pgdat->kswapd_wait;
> + kswapd_p = container_of(wait, struct kswapd,
> + kswapd_wait);
> + kswapd_thr = kswapd_p->kswapd_task;
> + spin_unlock(&kswapds_spinlock);
> +
> if (cpumask_any_and(cpu_online_mask, mask) < nr_cpu_ids)
> /* One of our CPUs online: restore mask */
> - set_cpus_allowed_ptr(pgdat->kswapd, mask);
> + if (kswapd_thr)
> + set_cpus_allowed_ptr(kswapd_thr, mask);
> }
> }
> return NOTIFY_OK;
> @@ -2835,18 +2856,31 @@ static int __devinit cpu_callback(struct notifier_block *nfb,
> int kswapd_run(int nid)
> {
> pg_data_t *pgdat = NODE_DATA(nid);
> + struct task_struct *kswapd_thr;
> + struct kswapd *kswapd_p;
> int ret = 0;
>
> - if (pgdat->kswapd)
> + if (pgdat->kswapd_wait)
> return 0;
>
> - pgdat->kswapd = kthread_run(kswapd, pgdat, "kswapd%d", nid);
> - if (IS_ERR(pgdat->kswapd)) {
> + kswapd_p = kzalloc(sizeof(struct kswapd), GFP_KERNEL);
> + if (!kswapd_p)
> + return -ENOMEM;
> +
> + init_waitqueue_head(&kswapd_p->kswapd_wait);
> + pgdat->kswapd_wait = &kswapd_p->kswapd_wait;
> + kswapd_p->kswapd_pgdat = pgdat;
> +
> + kswapd_thr = kthread_run(kswapd, kswapd_p, "kswapd%d", nid);
> + if (IS_ERR(kswapd_thr)) {
> /* failure at boot is fatal */
> BUG_ON(system_state == SYSTEM_BOOTING);
> printk("Failed to start kswapd on node %d\n",nid);
> + pgdat->kswapd_wait = NULL;
> + kfree(kswapd_p);
> ret = -1;
> - }
> + } else
> + kswapd_p->kswapd_task = kswapd_thr;
> return ret;
> }
>
> @@ -2855,10 +2889,25 @@ int kswapd_run(int nid)
> */
> void kswapd_stop(int nid)
> {
> - struct task_struct *kswapd = NODE_DATA(nid)->kswapd;
> + struct task_struct *kswapd_thr = NULL;
> + struct kswapd *kswapd_p = NULL;
> + wait_queue_head_t *wait;
> +
> + pg_data_t *pgdat = NODE_DATA(nid);
> +
> + spin_lock(&kswapds_spinlock);
> + wait = pgdat->kswapd_wait;
> + if (wait) {
> + kswapd_p = container_of(wait, struct kswapd, kswapd_wait);
> + kswapd_thr = kswapd_p->kswapd_task;
> + kswapd_p->kswapd_task = NULL;
> + }
> + spin_unlock(&kswapds_spinlock);
> +
> + if (kswapd_thr)
> + kthread_stop(kswapd_thr);
>
> - if (kswapd)
> - kthread_stop(kswapd);
> + kfree(kswapd_p);
> }
>
> static int __init kswapd_init(void)
> --
> 1.7.3.1
>
> --