All of lore.kernel.org
 help / color / mirror / Atom feed
From: Gilles Chanteperdrix <gilles.chanteperdrix@xenomai.org>
To: Gilles Chanteperdrix <gilles.chanteperdrix@xenomai.org>
Cc: xenomai-core <xenomai@xenomai.org>
Subject: Re: [Xenomai-core] Nocow patch.
Date: Thu, 11 Jan 2007 16:18:37 +0100	[thread overview]
Message-ID: <45A6554D.2060900@domain.hid> (raw)
In-Reply-To: <45A52B04.1010706@domain.hid>

[-- Attachment #1: Type: text/plain, Size: 915 bytes --]

Gilles Chanteperdrix wrote:
> This was run on x86, but need further testing before inclusion.

Here is a new version, after testing. It appears to run fine. I tested
forking in real-time applications both before and after calling
rt_task_shadow, and vmallocing areas of 256 Mo, and memseting them both
from a non-realtime or real-time context and it works.

The next step is to clean up the patch, but I have to admit that I need
some help: should I keep the functions in the files where I put them ?
in what headers should I declare them ? Should I define an empty
ipipe_update_nofault_mms when CONFIG_IPIPE is not set in order to avoid
a few #ifdefs ?

Note that in order to use the patch, you have to call
ipipe_disable_task_faults(current) in xnshadow_map instead of simply
setting the VM_NOCOW flag.

I will now test the patch on ARM.

-- 
                                                 Gilles Chanteperdrix

[-- Attachment #2: vm-nocow-2.6.19.3.patch --]
[-- Type: text/x-patch, Size: 12690 bytes --]

diff -Naurdp -x '*~' ipipe-2.6.19/arch/i386/mm/fault.c ipipe-2.6.19-nocow/arch/i386/mm/fault.c
--- ipipe-2.6.19/arch/i386/mm/fault.c	2007-01-10 09:44:52.000000000 +0100
+++ ipipe-2.6.19-nocow/arch/i386/mm/fault.c	2007-01-11 09:58:49.000000000 +0100
@@ -654,3 +654,19 @@ void vmalloc_sync_all(void)
 	}
 }
 #endif
+
+#ifdef CONFIG_IPIPE
+int ipipe_arch_map_vm_area_to_mm(struct mm_struct *mm,
+				 unsigned long start,
+				 unsigned long end)
+{
+	unsigned long next, addr = start;
+
+	do {
+		next = pgd_addr_end(addr, end);
+		vmalloc_sync_one(mm->pgd, addr);
+	} while (addr = next, addr != end);
+
+	return 0;
+}
+#endif /* CONFIG_IPIPE */
diff -Naurdp -x '*~' ipipe-2.6.19/include/asm-i386/pgalloc.h ipipe-2.6.19-nocow/include/asm-i386/pgalloc.h
--- ipipe-2.6.19/include/asm-i386/pgalloc.h	2007-01-10 09:44:53.000000000 +0100
+++ ipipe-2.6.19-nocow/include/asm-i386/pgalloc.h	2007-01-11 09:58:49.000000000 +0100
@@ -46,27 +46,4 @@ static inline void pte_free(struct page 
 
 #define check_pgt_cache()	do { } while (0)
 
-static inline void set_pgdir(unsigned long address, pgd_t entry)
-{
-#ifdef CONFIG_IPIPE
-	struct task_struct * p;
-	struct page *page;
-	pgd_t *pgd;
-
-	read_lock(&tasklist_lock);
-
-	for_each_process(p) {
-		if(p->mm)
-		    *pgd_offset(p->mm,address) = entry;
-	}
-
-	read_unlock(&tasklist_lock);
-
-	for (page = pgd_list; page; page = (struct page *)page->index) {
-		pgd = (pgd_t *)page_address(page);
-		pgd[address >> PGDIR_SHIFT] = entry;
-	}
-#endif /* CONFIG_IPIPE */
-}
-
 #endif /* _I386_PGALLOC_H */
diff -Naurdp -x '*~' ipipe-2.6.19/include/linux/mm.h ipipe-2.6.19-nocow/include/linux/mm.h
--- ipipe-2.6.19/include/linux/mm.h	2007-01-04 10:10:33.000000000 +0100
+++ ipipe-2.6.19-nocow/include/linux/mm.h	2007-01-11 09:58:49.000000000 +0100
@@ -166,6 +166,7 @@ extern unsigned int kobjsize(const void 
 #define VM_NONLINEAR	0x00800000	/* Is non-linear (remap_file_pages) */
 #define VM_MAPPED_COPY	0x01000000	/* T if mapped copy of data (nommu mmap) */
 #define VM_INSERTPAGE	0x02000000	/* The vma has had "vm_insert_page()" done on it */
+#define VM_NOFAULT	0x10000000	/* Disable faults for the vma */
 
 #ifndef VM_STACK_DEFAULT_FLAGS		/* arch can override this */
 #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
diff -Naurdp -x '*~' ipipe-2.6.19/include/linux/sched.h ipipe-2.6.19-nocow/include/linux/sched.h
--- ipipe-2.6.19/include/linux/sched.h	2007-01-10 09:44:53.000000000 +0100
+++ ipipe-2.6.19-nocow/include/linux/sched.h	2007-01-11 09:58:49.000000000 +0100
@@ -363,6 +363,10 @@ struct mm_struct {
 	/* aio bits */
 	rwlock_t		ioctx_list_lock;
 	struct kioctx		*ioctx_list;
+
+#if CONFIG_IPIPE
+	struct list_head nofault;
+#endif /* CONFIG_IPIPE */
 };
 
 struct sighand_struct {
diff -Naurdp -x '*~' ipipe-2.6.19/kernel/fork.c ipipe-2.6.19-nocow/kernel/fork.c
--- ipipe-2.6.19/kernel/fork.c	2007-01-10 09:44:53.000000000 +0100
+++ ipipe-2.6.19-nocow/kernel/fork.c	2007-01-11 15:32:25.000000000 +0100
@@ -385,6 +385,7 @@ void mmput(struct mm_struct *mm)
 
 	if (atomic_dec_and_test(&mm->mm_users)) {
 		ipipe_cleanup_notify(mm);
+		ipipe_destroy_nofault_mm(mm);
 		exit_aio(mm);
 		exit_mmap(mm);
 		if (!list_empty(&mm->mmlist)) {
diff -Naurdp -x '*~' ipipe-2.6.19/lib/ioremap.c ipipe-2.6.19-nocow/lib/ioremap.c
--- ipipe-2.6.19/lib/ioremap.c	2007-01-10 09:44:53.000000000 +0100
+++ ipipe-2.6.19-nocow/lib/ioremap.c	2007-01-11 09:58:49.000000000 +0100
@@ -85,9 +85,10 @@ int ioremap_page_range(unsigned long add
 		err = ioremap_pud_range(pgd, addr, next, phys_addr+addr, prot);
 		if (err)
 			break;
-		set_pgdir(addr, *pgd);
 	} while (pgd++, addr = next, addr != end);
-
+#ifdef CONFIG_IPIPE
+	ipipe_update_nofault_mms(start, end);
+#endif /* CONFIG_IPIPE */
 	flush_cache_vmap(start, end);
 
 	return err;
diff -Naurdp -x '*~' ipipe-2.6.19/mm/memory.c ipipe-2.6.19-nocow/mm/memory.c
--- ipipe-2.6.19/mm/memory.c	2007-01-04 10:10:35.000000000 +0100
+++ ipipe-2.6.19-nocow/mm/memory.c	2007-01-11 15:50:37.000000000 +0100
@@ -50,6 +50,9 @@
 #include <linux/delayacct.h>
 #include <linux/init.h>
 #include <linux/writeback.h>
+#ifdef CONFIG_IPIPE
+#include <linux/vmalloc.h>	/* For vmlist */
+#endif /* CONFIG_IPIPE */
 
 #include <asm/pgalloc.h>
 #include <asm/uaccess.h>
@@ -418,13 +421,41 @@ struct page *vm_normal_page(struct vm_ar
 	return pfn_to_page(pfn);
 }
 
+static inline void cow_user_page(struct page *dst, struct page *src, unsigned long va)
+{
+	/*
+	 * If the source page was a PFN mapping, we don't have
+	 * a "struct page" for it. We do a best-effort copy by
+	 * just copying from the original user address. If that
+	 * fails, we just zero-fill it. Live with it.
+	 */
+	if (unlikely(!src)) {
+		void *kaddr = kmap_atomic(dst, KM_USER0);
+		void __user *uaddr = (void __user *)(va & PAGE_MASK);
+
+		/*
+		 * This really shouldn't fail, because the page is there
+		 * in the page tables. But it might just be unreadable,
+		 * in which case we just give up and fill the result with
+		 * zeroes.
+		 */
+		if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE))
+			memset(kaddr, 0, PAGE_SIZE);
+		kunmap_atomic(kaddr, KM_USER0);
+		flush_dcache_page(dst);
+		return;
+		
+	}
+	copy_user_highpage(dst, src, va);
+}
+
 /*
  * copy one vm_area from one task to the other. Assumes the page tables
  * already present in the new task to be cleared in the whole range
  * covered by this vma.
  */
 
-static inline void
+static inline int
 copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 		pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
 		unsigned long addr, int *rss)
@@ -466,6 +497,25 @@ copy_one_pte(struct mm_struct *dst_mm, s
 	 * in the parent and the child
 	 */
 	if (is_cow_mapping(vm_flags)) {
+#ifdef CONFIG_IPIPE
+		if (((vm_flags|src_mm->def_flags) & (VM_LOCKED|VM_NOFAULT)) == (VM_LOCKED|VM_NOFAULT)) {
+			struct page *old_page = vm_normal_page(vma, addr, pte);
+			page = alloc_page_vma(GFP_HIGHUSER, vma, addr);
+			if (!page)
+				return -ENOMEM;
+
+			cow_user_page(page, old_page, addr);
+			pte = mk_pte(page, vma->vm_page_prot);
+			
+			if (vm_flags & VM_SHARED)
+				pte = pte_mkclean(pte);
+			pte = pte_mkold(pte);
+
+			page_dup_rmap(page);
+			rss[!!PageAnon(page)]++;
+			goto out_set_pte;
+		}
+#endif /* CONFIG_IPIPE */
 		ptep_set_wrprotect(src_mm, addr, src_pte);
 		pte = pte_wrprotect(pte);
 	}
@@ -487,6 +537,7 @@ copy_one_pte(struct mm_struct *dst_mm, s
 
 out_set_pte:
 	set_pte_at(dst_mm, addr, dst_pte, pte);
+	return 0;
 }
 
 static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
@@ -524,7 +575,9 @@ again:
 			progress++;
 			continue;
 		}
-		copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, vma, addr, rss);
+		if (copy_one_pte(dst_mm, src_mm, dst_pte,
+				 src_pte, vma, addr, rss))
+			return -ENOMEM;
 		progress += 8;
 	} while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end);
 
@@ -1431,34 +1484,6 @@ static inline pte_t maybe_mkwrite(pte_t 
 	return pte;
 }
 
-static inline void cow_user_page(struct page *dst, struct page *src, unsigned long va)
-{
-	/*
-	 * If the source page was a PFN mapping, we don't have
-	 * a "struct page" for it. We do a best-effort copy by
-	 * just copying from the original user address. If that
-	 * fails, we just zero-fill it. Live with it.
-	 */
-	if (unlikely(!src)) {
-		void *kaddr = kmap_atomic(dst, KM_USER0);
-		void __user *uaddr = (void __user *)(va & PAGE_MASK);
-
-		/*
-		 * This really shouldn't fail, because the page is there
-		 * in the page tables. But it might just be unreadable,
-		 * in which case we just give up and fill the result with
-		 * zeroes.
-		 */
-		if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE))
-			memset(kaddr, 0, PAGE_SIZE);
-		kunmap_atomic(kaddr, KM_USER0);
-		flush_dcache_page(dst);
-		return;
-		
-	}
-	copy_user_highpage(dst, src, va);
-}
-
 /*
  * This routine handles present pages, when users try to write
  * to a shared page. It is done by copying the page to a new address
@@ -2676,3 +2701,150 @@ int access_process_vm(struct task_struct
 
 	return buf - old_buf;
 }
+
+#ifdef CONFIG_IPIPE
+static LIST_HEAD(nofault_mms);
+static DEFINE_RWLOCK(nofault_mms_lock);
+
+static int ipipe_fault_pte_range(struct mm_struct *mm, pmd_t *pmd,
+				 struct vm_area_struct *vma,
+				 unsigned long addr, unsigned long end)
+{
+	spinlock_t *ptl;
+	pte_t *pte;
+	
+	do {
+		pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
+
+		if (do_wp_page(mm, vma, addr, pte, pmd, ptl, *pte) == VM_FAULT_OOM)
+			return -ENOMEM;
+	} while (addr += PAGE_SIZE, addr != end);
+	return 0;
+}
+
+static int ipipe_fault_pmd_range(struct mm_struct *mm, pud_t *pud,
+				 struct vm_area_struct *vma,
+				 unsigned long addr, unsigned long end)
+{
+	unsigned long next;
+	pmd_t *pmd;
+
+	pmd = pmd_offset(pud, addr);
+	do {
+		next = pmd_addr_end(addr, end);
+		if (ipipe_fault_pte_range(mm, pmd, vma, addr, end))
+			return -ENOMEM;
+	} while (pmd++, addr = next, addr != end);
+	return 0;
+}
+
+static int ipipe_fault_pud_range(struct mm_struct *mm, pgd_t *pgd,
+				 struct vm_area_struct *vma,
+				 unsigned long addr, unsigned long end)
+{
+	unsigned long next;
+	pud_t *pud;
+
+	pud = pud_offset(pgd, addr);
+	do {
+		next = pud_addr_end(addr, end);
+		if (ipipe_fault_pmd_range(mm, pud, vma, addr, end))
+			return -ENOMEM;
+	} while (pud++, addr = next, addr != end);
+	return 0;
+}
+
+int ipipe_disable_task_faults(struct task_struct *tsk)
+{
+	unsigned long addr, next, end;
+	struct vm_area_struct *vma;
+	struct vm_struct *area;
+	struct mm_struct *mm;
+	int result = 0;
+	pgd_t *pgd;
+	
+	mm = get_task_mm(tsk);
+	if (!mm)
+		return -EPERM;
+
+	down_write(&mm->mmap_sem);
+	if (mm->def_flags & VM_NOFAULT)
+		goto up_mmap_sem_done;
+	
+	for (vma = mm->mmap; vma; vma = vma->vm_next) {
+		if (!is_cow_mapping(vma->vm_flags))
+			continue;
+
+		addr = vma->vm_start;
+		end = vma->vm_end;
+		
+		pgd = pgd_offset(mm, addr);
+		do {
+			next = pgd_addr_end(addr, end);
+			if (ipipe_fault_pud_range(mm, pgd, vma, addr, next)) {
+				result = -ENOMEM;
+			  up_mmap_sem_done:
+				up_write(&mm->mmap_sem);
+				goto done_mm;
+			}
+		} while (pgd++, addr = next, addr != end);
+	}
+	mm->def_flags |= VM_NOFAULT;
+	up_write(&mm->mmap_sem);
+
+	read_lock(&vmlist_lock);
+	down_write(&mm->mmap_sem);
+	for (area = vmlist; area; area = area->next) {
+		result = ipipe_arch_map_vm_area_to_mm(mm,
+						      (unsigned long) area->addr,
+						      (unsigned long) area->addr
+						      + area->size);
+		if (result) {
+			mm->def_flags &= ~VM_NOFAULT;
+			up_write(&mm->mmap_sem);
+			goto done_vmlist;
+		}
+	}
+	up_write(&mm->mmap_sem);
+
+	write_lock(&nofault_mms_lock);
+	list_add(&mm->nofault, &nofault_mms);
+	write_unlock(&nofault_mms_lock);
+
+  done_vmlist:
+	read_unlock(&vmlist_lock);	
+  done_mm:
+	mmput(mm);
+	return result;
+}
+
+EXPORT_SYMBOL(ipipe_disable_task_faults);
+
+int ipipe_update_nofault_mms(unsigned long start, unsigned long end)
+{
+	struct mm_struct *mm;
+	int result = 0;
+
+	read_lock(&nofault_mms_lock);
+	list_for_each_entry(mm, &nofault_mms, nofault) {
+		down_write(&mm->mmap_sem);
+		result = ipipe_arch_map_vm_area_to_mm(mm, start, end);
+		up_write(&mm->mmap_sem);
+
+		if (result)
+			break;
+	}
+	read_unlock(&nofault_mms_lock);
+
+	return result;
+}
+
+void ipipe_destroy_nofault_mm(struct mm_struct *mm)
+{
+	if (mm->def_flags & VM_NOFAULT) {
+		write_lock(&nofault_mms_lock);
+		list_del(&mm->nofault);
+		write_unlock(&nofault_mms_lock);
+	}
+}
+#endif
diff -Naurdp -x '*~' ipipe-2.6.19/mm/mlock.c ipipe-2.6.19-nocow/mm/mlock.c
--- ipipe-2.6.19/mm/mlock.c	2007-01-02 10:59:48.000000000 +0100
+++ ipipe-2.6.19-nocow/mm/mlock.c	2007-01-11 15:32:09.000000000 +0100
@@ -166,7 +166,7 @@ static int do_mlockall(int flags)
 
 	if (flags & MCL_FUTURE)
 		def_flags = VM_LOCKED;
-	current->mm->def_flags = def_flags;
+	current->mm->def_flags |= def_flags;
 	if (flags == MCL_FUTURE)
 		goto out;
 
diff -Naurdp -x '*~' ipipe-2.6.19/mm/vmalloc.c ipipe-2.6.19-nocow/mm/vmalloc.c
--- ipipe-2.6.19/mm/vmalloc.c	2007-01-10 11:22:05.000000000 +0100
+++ ipipe-2.6.19-nocow/mm/vmalloc.c	2007-01-11 09:58:49.000000000 +0100
@@ -152,15 +152,14 @@ int map_vm_area(struct vm_struct *area, 
 	BUG_ON(addr >= end);
 	pgd = pgd_offset_k(addr);
 	do {
-		pgd_t oldpgd;
-		memcpy(&oldpgd,pgd,sizeof(pgd_t));
 		next = pgd_addr_end(addr, end);
 		err = vmap_pud_range(pgd, addr, next, prot, pages);
 		if (err)
 			break;
-		if (pgd_val(oldpgd) != pgd_val(*pgd))
-			set_pgdir(addr, *pgd);
 	} while (pgd++, addr = next, addr != end);
+#ifdef CONFIG_IPIPE
+	ipipe_update_nofault_mms((unsigned long) area->addr, end);
+#endif /* CONFIG_IPIPE */
 	flush_cache_vmap((unsigned long) area->addr, end);
 	return err;
 }

  parent reply	other threads:[~2007-01-11 15:18 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-01-10 18:05 [Xenomai-core] Nocow patch Gilles Chanteperdrix
2007-01-11  7:38 ` Niklaus Giger
2007-01-11  8:43   ` Gilles Chanteperdrix
2007-01-11  8:46     ` Gilles Chanteperdrix
2007-01-11 15:18 ` Gilles Chanteperdrix [this message]
2007-01-13 18:57   ` Philippe Gerum
2007-01-15 10:48     ` Gilles Chanteperdrix
2007-01-19  9:22       ` Gilles Chanteperdrix
2007-01-19  9:58         ` Philippe Gerum
2007-01-19 10:10           ` Gilles Chanteperdrix
  -- strict thread matches above, loose matches on Subject: below --
2007-01-31  8:37 Gilles Chanteperdrix

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=45A6554D.2060900@domain.hid \
    --to=gilles.chanteperdrix@xenomai.org \
    --cc=xenomai@xenomai.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.