All of lore.kernel.org
 help / color / mirror / Atom feed
From: Pankaj Gupta <pagupta@redhat.com>
To: Nitesh Narayan Lal <nilal@redhat.com>
Cc: kvm@vger.kernel.org, riel@redhat.com, mst@redhat.com,
	david@redhat.com, yang zhang wz <yang.zhang.wz@gmail.com>,
	wei w wang <wei.w.wang@intel.com>
Subject: Re: [PATCH 2/3] KVM: Guest page hinting functionality
Date: Wed, 2 Aug 2017 03:01:48 -0400 (EDT)	[thread overview]
Message-ID: <414002282.37988665.1501657308398.JavaMail.zimbra@redhat.com> (raw)
In-Reply-To: <20170801204806.23938-3-nilal@redhat.com>

Hi Nitesh,

I tried to look at the code. 
Some minor comments inline.

Thanks,
Pankaj

> 
> This patch adds the guest implementation in order to maintain the list of
> pages which are freed by the guest and are not reused. To avoid any
> reallocation it includes seqlock once the list is completely filled.
> Though it doesn't carries the hypercall related changes.
> 
> Signed-off-by: Nitesh Narayan Lal <nilal@redhat.com>
> ---
>  virt/kvm/page_hinting.c | 246
>  +++++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 244 insertions(+), 2 deletions(-)
> 
> diff --git a/virt/kvm/page_hinting.c b/virt/kvm/page_hinting.c
> index 39d2b1d..cfdc513 100644
> --- a/virt/kvm/page_hinting.c
> +++ b/virt/kvm/page_hinting.c
> @@ -3,7 +3,7 @@
>  #include <linux/page_ref.h>
>  #include <linux/kvm_host.h>
>  #include <linux/sort.h>
> -
> +#include <linux/kernel.h>
>  #include <trace/events/kvm.h>
>  
>  #define MAX_FGPT_ENTRIES	1000
> @@ -33,14 +33,256 @@ struct hypervisor_pages {
>  	unsigned int pages;
>  };
>  
> +static __cacheline_aligned_in_smp DEFINE_SEQLOCK(guest_page_lock);
>  DEFINE_PER_CPU(struct kvm_free_pages [MAX_FGPT_ENTRIES], kvm_pt);
>  DEFINE_PER_CPU(int, kvm_pt_idx);
> -struct hypervisor_pages hypervisor_pagelist[MAX_FGPT_ENTRIES];
> +struct hypervisor_pages hypervisor_pagelist[MAX_FGPT_ENTRIES - 1];
> +
> +static void empty_hyperlist(void)
> +{
> +	int i = 0;
> +
> +	while (i < MAX_FGPT_ENTRIES - 1) {

      MAX_FGPT_ENTRIES in-place of 'MAX_FGPT_ENTRIES - 1' here
      and at similar other places?

> +		hypervisor_pagelist[i].pfn = 0;
> +		hypervisor_pagelist[i].pages = 0;
> +		i++;
> +	}
> +}
> +
> +static void make_hypercall(void)
> +{
> +	/*
> +	 * Dummy function: Tobe filled later.
> +	 */
> +	empty_hyperlist();
> +}
> +
> +static int sort_pfn(const void *a1, const void *b1)
> +{
> +	const struct hypervisor_pages *a = a1;
> +	const struct hypervisor_pages *b = b1;
> +
> +	if (a->pfn > b->pfn)
> +		return 1;
> +
> +	if (a->pfn < b->pfn)
> +		return -1;
> +
> +	return 0;
> +}
> +
> +static int pack_hyperlist(void)
> +{
> +	int i = 0, j = 0;
> +
> +	while (i < MAX_FGPT_ENTRIES  - 1) {
> +		if (hypervisor_pagelist[i].pfn != 0) {

   no need to copy values at same index if non-zero                   

> +			hypervisor_pagelist[j].pfn =
> +					hypervisor_pagelist[i].pfn;
> +			hypervisor_pagelist[j].pages =
> +					hypervisor_pagelist[i].pages;
> +			j++;
> +		}
> +		i++;
> +	}
> +	i = j;
> +	while (j < MAX_FGPT_ENTRIES - 1) {
> +		hypervisor_pagelist[j].pfn = 0;
> +		hypervisor_pagelist[j].pages = 0;
> +		j++;
> +	}
> +	return i;
> +}
> +
> +int compress_hyperlist(void)
> +{
> +	int i = 0, j = 1, merge_counter = 0, ret = 0;
> +
> +	sort(hypervisor_pagelist, MAX_FGPT_ENTRIES - 1,
> +	     sizeof(struct hypervisor_pages), sort_pfn, NULL);

> +	while (i < MAX_FGPT_ENTRIES - 1 && j < MAX_FGPT_ENTRIES - 1) {
> +		unsigned long pfni = hypervisor_pagelist[i].pfn;
> +		unsigned int pagesi = hypervisor_pagelist[i].pages;
> +		unsigned long pfnj = hypervisor_pagelist[j].pfn;
> +		unsigned int pagesj = hypervisor_pagelist[j].pages;
> +
> +		if (pfnj <= pfni) {
> +			if (((pfnj + pagesj - 1) <= (pfni + pagesi - 1)) &&
> +			    ((pfnj + pagesj - 1) >= (pfni - 1))) {
> +				hypervisor_pagelist[i].pfn = pfnj;
> +				hypervisor_pagelist[i].pages += pfni - pfnj;
> +				hypervisor_pagelist[j].pfn = 0;
> +				hypervisor_pagelist[j].pages = 0;
> +				j++;
> +				merge_counter++;
> +				continue;
> +			} else if ((pfnj + pagesj - 1) > (pfni + pagesi - 1)) {
> +				hypervisor_pagelist[i].pfn = pfnj;
> +				hypervisor_pagelist[i].pages = pagesj;
> +				hypervisor_pagelist[j].pfn = 0;
> +				hypervisor_pagelist[j].pages = 0;
> +				j++;
> +				merge_counter++;
> +				continue;
> +			}
> +		}
> +		if (pfnj > pfni) {
             
          else if here

> +			if ((pfnj + pagesj - 1) > (pfni + pagesi - 1) &&
> +			    (pfnj <= pfni + pagesi)) {
> +				hypervisor_pagelist[i].pages +=
> +						(pfnj + pagesj - 1) -
> +						(pfni + pagesi - 1);
> +				hypervisor_pagelist[j].pfn = 0;
> +				hypervisor_pagelist[j].pages = 0;
> +				j++;
> +				merge_counter++;
> +				continue;
> +			}
> +			if ((pfnj + pagesj - 1) <= (pfni + pagesi - 1)) {
> +				hypervisor_pagelist[j].pfn = 0;
> +				hypervisor_pagelist[j].pages = 0;
> +				j++;
> +				merge_counter++;
> +				continue;
> +			}
> +		}
> +		i = j;
> +		j++;
> +	}
> +	if (merge_counter != 0)
> +		ret = pack_hyperlist() - 1;
> +	else
> +		ret = MAX_FGPT_ENTRIES - 1;
> +	return ret;
> +}
> +
> +void copy_hyperlist(int hyper_idx)
> +{
> +	int *idx = &get_cpu_var(kvm_pt_idx);
> +	struct kvm_free_pages *free_page_obj;
> +	int i = 0;
> +
> +	free_page_obj = &get_cpu_var(kvm_pt)[0];
> +	while (i < hyper_idx) {
> +		free_page_obj[*idx].pfn = hypervisor_pagelist[i].pfn;
> +		free_page_obj[*idx].pages = hypervisor_pagelist[i].pages;
> +		*idx += 1;
> +		i++;
> +	}
> +	empty_hyperlist();
> +	put_cpu_var(kvm_pt);
> +	put_cpu_var(kvm_pt_idx);
> +}
> +
> +/*
> + * arch_free_page_slowpath() - This function adds the guest free page
> entries
> + * to hypervisor_pages list and also ensures defragmentation prior to
> addition
> + * if it is present with any entry of the kvm_free_pages list.
> + */
> +void arch_free_page_slowpath(void)
> +{
> +	int idx = 0;
> +	int hyper_idx = -1;
> +	int *kvm_idx = &get_cpu_var(kvm_pt_idx);
> +	struct kvm_free_pages *free_page_obj = &get_cpu_var(kvm_pt)[0];
> +
> +	write_seqlock(&guest_page_lock);
> +	while (idx < MAX_FGPT_ENTRIES) {
> +		unsigned long pfn = free_page_obj[idx].pfn;
> +		unsigned long pfn_end = free_page_obj[idx].pfn +
> +					free_page_obj[idx].pages - 1;
> +		bool prev_free = false;
> +
> +		while (pfn <= pfn_end) {
> +			struct page *p = pfn_to_page(pfn);
> +
> +			if (PageCompound(p)) {
> +				struct page *head_page = compound_head(p);
> +				unsigned long head_pfn = page_to_pfn(head_page);
> +				unsigned int alloc_pages =
> +					1 << compound_order(head_page);
> +
> +				pfn = head_pfn + alloc_pages;
> +				prev_free = false;
> +				continue;
> +			}
> +			if (page_ref_count(p)) {
> +				pfn++;
> +				prev_free = false;
> +				continue;
> +			}
> +			/*
> +			 * The page is free so add it to the list and free the
> +			 * hypervisor_pagelist if required.
> +			 */
> +			if (!prev_free) {
> +				hyper_idx++;
> +				if (hyper_idx == MAX_FGPT_ENTRIES - 1) {
> +					hyper_idx =  compress_hyperlist();
> +					if (hyper_idx >=
> +					    HYPERLIST_THRESHOLD - 1) {
> +						make_hypercall();
> +						hyper_idx = 0;
> +					}
> +				}
> +				hypervisor_pagelist[hyper_idx].pfn = pfn;
> +				hypervisor_pagelist[hyper_idx].pages = 1;
> +				/*
> +				 * If the next contiguous page is free, it can
> +				 * be added to this same entry.
> +				 */
> +				prev_free = true;
> +			} else {
> +				/*
> +				 * Multiple adjacent free pages
> +				 */
> +				hypervisor_pagelist[hyper_idx].pages++;
> +			}
> +			pfn++;
> +		}
> +		free_page_obj[idx].pfn = 0;
> +		free_page_obj[idx].pages = 0;
> +		idx++;
> +	}
> +	*kvm_idx = 0;
> +	put_cpu_var(kvm_pt);
> +	put_cpu_var(kvm_pt_idx);
> +	write_sequnlock(&guest_page_lock);
> +}
>  
>  void arch_alloc_page(struct page *page, int order)
>  {
> +	unsigned int seq;
> +
> +	/*
> +	 * arch_free_page will acquire the lock once the list carrying guest
> +	 * free pages is full and a hypercall will be made. Until complete free
> +	 * page list is traversed no further allocaiton will be allowed.
> +	 */
> +	do {
> +		seq = read_seqbegin(&guest_page_lock);
> +	} while (read_seqretry(&guest_page_lock, seq));
>  }
>  
>  void arch_free_page(struct page *page, int order)
>  {
> +	int *free_page_idx = &get_cpu_var(kvm_pt_idx);
> +	struct kvm_free_pages *free_page_obj;
> +	unsigned long flags;
> +
> +	/*
> +	 * use of global variables may trigger a race condition between irq and
> +	 * process context causing unwanted overwrites. This will be replaced
> +	 * with a better solution to prevent such race conditions.
> +	 */
> +	local_irq_save(flags);
> +	free_page_obj = &get_cpu_var(kvm_pt)[0];
> +	free_page_obj[*free_page_idx].pfn = page_to_pfn(page);
> +	free_page_obj[*free_page_idx].pages = 1 << order;
> +	*free_page_idx += 1;
> +	if (*free_page_idx == MAX_FGPT_ENTRIES)
> +		arch_free_page_slowpath();
> +	put_cpu_var(kvm_pt);
> +	put_cpu_var(kvm_pt_idx);
> +	local_irq_restore(flags);
>  }
> --
> 2.9.4
> 
> 

  reply	other threads:[~2017-08-02  7:01 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-08-01 20:48 [PATCH 0/3] [RFC] KVM: Guest page hinting Nitesh Narayan Lal
2017-08-01 20:48 ` [PATCH 1/3] KVM: Support for guest " Nitesh Narayan Lal
2017-08-02  7:12   ` kbuild test robot
2017-08-01 20:48 ` [PATCH 2/3] KVM: Guest page hinting functionality Nitesh Narayan Lal
2017-08-02  7:01   ` Pankaj Gupta [this message]
2017-08-02 18:59     ` Nitesh Narayan Lal
2017-08-02 19:20       ` Rik van Riel
2017-08-02 20:37         ` Nitesh Narayan Lal
2017-08-02 12:19   ` Pankaj Gupta
2017-08-02 15:02     ` Rik van Riel
2017-08-01 20:48 ` [PATCH 3/3] KVM: Adding tracepoints for guest page hinting Nitesh Narayan Lal
2017-08-04  5:16 ` [PATCH 0/3] [RFC] KVM: Guest " Yang Zhang
2017-08-04 11:59   ` David Hildenbrand

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=414002282.37988665.1501657308398.JavaMail.zimbra@redhat.com \
    --to=pagupta@redhat.com \
    --cc=david@redhat.com \
    --cc=kvm@vger.kernel.org \
    --cc=mst@redhat.com \
    --cc=nilal@redhat.com \
    --cc=riel@redhat.com \
    --cc=wei.w.wang@intel.com \
    --cc=yang.zhang.wz@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.