From: Nick Piggin <nickpiggin@yahoo.com.au>
To: Linux Memory Management <linux-mm@kvack.org>
Subject: Re: [PATCH 0/7] abstract pagetable locking and pte updates
Date: Fri, 29 Oct 2004 21:45:57 +1000 [thread overview]
Message-ID: <41822D75.3090802@yahoo.com.au> (raw)
In-Reply-To: <4181EF2D.5000407@yahoo.com.au>
[-- Attachment #1: Type: text/plain, Size: 710 bytes --]
Nick Piggin wrote:
> Hello,
>
> Following are patches that abstract page table operations to
> allow lockless implementations by using cmpxchg or per-pte locks.
>
One more patch - this provides a generic framework for pte
locks, and a basic i386 reference implementation (which just
ifdefs out the cmpxchg version). Boots, runs, and has taken
some stressing.
I should have sorted this out before sending the patches for
RFC. The generic code actually did need a few lines of changes,
but not much as you can see. Needs some tidying up though, but
I only just wrote it in a few minutes.
And now before anyone gets a chance to shoot down the whole thing,
I just have to say
"look ma, no page_table_lock!"
[-- Attachment #2: vm-i386-locked-pte.patch --]
[-- Type: text/x-patch, Size: 11031 bytes --]
---
linux-2.6-npiggin/include/asm-generic/pgtable.h | 128 +++++++++++++++++++++++-
linux-2.6-npiggin/include/asm-i386/pgtable.h | 33 ++++++
linux-2.6-npiggin/include/linux/mm.h | 7 -
linux-2.6-npiggin/kernel/futex.c | 5
linux-2.6-npiggin/mm/memory.c | 13 +-
5 files changed, 174 insertions(+), 12 deletions(-)
diff -puN include/asm-i386/pgtable.h~vm-i386-locked-pte include/asm-i386/pgtable.h
--- linux-2.6/include/asm-i386/pgtable.h~vm-i386-locked-pte 2004-10-29 19:12:15.000000000 +1000
+++ linux-2.6-npiggin/include/asm-i386/pgtable.h 2004-10-29 20:38:38.000000000 +1000
@@ -106,6 +106,8 @@ void paging_init(void);
#define _PAGE_BIT_UNUSED3 11
#define _PAGE_BIT_NX 63
+#define _PAGE_BIT_LOCKED 9
+
#define _PAGE_PRESENT 0x001
#define _PAGE_RW 0x002
#define _PAGE_USER 0x004
@@ -119,6 +121,8 @@ void paging_init(void);
#define _PAGE_UNUSED2 0x400
#define _PAGE_UNUSED3 0x800
+#define _PAGE_LOCKED 0x200
+
#define _PAGE_FILE 0x040 /* set:pagecache unset:swap */
#define _PAGE_PROTNONE 0x080 /* If not present */
#ifdef CONFIG_X86_PAE
@@ -231,11 +235,13 @@ static inline pte_t pte_exprotect(pte_t
static inline pte_t pte_mkclean(pte_t pte) { (pte).pte_low &= ~_PAGE_DIRTY; return pte; }
static inline pte_t pte_mkold(pte_t pte) { (pte).pte_low &= ~_PAGE_ACCESSED; return pte; }
static inline pte_t pte_wrprotect(pte_t pte) { (pte).pte_low &= ~_PAGE_RW; return pte; }
+static inline pte_t pte_mkunlocked(pte_t pte) { (pte).pte_low &= ~_PAGE_LOCKED; return pte; }
static inline pte_t pte_mkread(pte_t pte) { (pte).pte_low |= _PAGE_USER; return pte; }
static inline pte_t pte_mkexec(pte_t pte) { (pte).pte_low |= _PAGE_USER; return pte; }
static inline pte_t pte_mkdirty(pte_t pte) { (pte).pte_low |= _PAGE_DIRTY; return pte; }
static inline pte_t pte_mkyoung(pte_t pte) { (pte).pte_low |= _PAGE_ACCESSED; return pte; }
static inline pte_t pte_mkwrite(pte_t pte) { (pte).pte_low |= _PAGE_RW; return pte; }
+static inline pte_t pte_mklocked(pte_t pte) { (pte).pte_low |= _PAGE_LOCKED; return pte; }
#ifdef CONFIG_X86_PAE
# include <asm/pgtable-3level.h>
@@ -398,7 +404,32 @@ extern pte_t *lookup_address(unsigned lo
} \
} while (0)
-#define __HAVE_ARCH_PTEP_CMPXCHG
+#define __HAVE_ARCH_PTEP_LOCK
+#define ptep_xchg(__ptep, __newval) \
+({ \
+ pte_t ret; \
+ /* Just need to make sure we keep the _PAGE_BIT_LOCKED bit */ \
+ ret.pte_low = xchg(&(__ptep)->pte_low, (__newval).pte_low); \
+ ret.pte_high = (__ptep)->pte_high; \
+ (__ptep)->pte_high = (__newval).pte_high; \
+ ret; \
+})
+
+#define ptep_lock(__ptep) \
+do { \
+ preempt_disable(); \
+ while (unlikely(test_and_set_bit(_PAGE_BIT_LOCKED, &(__ptep)->pte_low))) \
+ cpu_relax(); \
+} while (0)
+
+#define ptep_unlock(__ptep) \
+do { \
+ if (unlikely(!test_and_clear_bit(_PAGE_BIT_LOCKED, &(__ptep)->pte_low))) \
+ BUG(); \
+ preempt_enable(); \
+} while (0)
+
+//#define __HAVE_ARCH_PTEP_CMPXCHG
#ifdef CONFIG_X86_PAE
#define __HAVE_ARCH_PTEP_ATOMIC_READ
diff -puN include/asm-generic/pgtable.h~vm-i386-locked-pte include/asm-generic/pgtable.h
--- linux-2.6/include/asm-generic/pgtable.h~vm-i386-locked-pte 2004-10-29 19:35:14.000000000 +1000
+++ linux-2.6-npiggin/include/asm-generic/pgtable.h 2004-10-29 20:54:56.000000000 +1000
@@ -135,7 +135,7 @@ static inline void ptep_mkdirty(pte_t *p
#endif
#ifndef __ASSEMBLY__
-#ifdef __HAVE_ARCH_PTEP_CMPXCHG
+#if defined(__HAVE_ARCH_PTEP_CMPXCHG)
#define mm_lock_page_table(__mm) \
do { \
} while (0);
@@ -254,7 +254,130 @@ do {} while (0)
#define ptep_verify_finish(__pmod, __mm, __ptep) \
ptep_verify(__pmod, __mm, __ptep)
-#else /* __HAVE_ARCH_PTEP_CMPXCHG */ /* GENERIC_PTEP_LOCKING follows */
+#elif defined(__HAVE_ARCH_PTEP_LOCK)
+
+#define mm_lock_page_table(__mm) \
+do { \
+} while (0);
+
+#define mm_unlock_page_table(__mm) \
+do { \
+} while (0);
+
+#define mm_pin_pages(__mm) \
+do { \
+} while (0)
+
+#define mm_unpin_pages(__mm) \
+do { \
+} while (0)
+
+#define ptep_pin_pages(__mm, __ptep) \
+do { \
+ ptep_lock(__ptep); \
+} while (0)
+
+#define ptep_unpin_pages(__mm, __ptep) \
+do { \
+ ptep_unlock(__ptep); \
+} while (0)
+
+/* mm_lock_page_table doesn't actually take a lock, so this can be 0 */
+#define MM_RELOCK_CHECK 0
+
+struct pte_modify {
+};
+
+#ifndef __HAVE_ARCH_PTEP_ATOMIC_READ
+#define ptep_atomic_read(__ptep) \
+({ \
+ *__ptep; \
+})
+#endif
+
+#define ptep_begin_modify(__pmod, __mm, __ptep) \
+({ \
+ (void)__pmod; \
+ (void)__mm; \
+ ptep_lock(__ptep); \
+ pte_mkunlocked(*(__ptep)); \
+})
+
+#define ptep_abort(__pmod, __mm, __ptep) \
+do { ptep_unlock(__ptep); } while (0)
+
+#define ptep_commit(__pmod, __mm, __ptep, __newval) \
+({ \
+ *(__ptep) = pte_mklocked(__newval); \
+ ptep_unlock(__ptep); \
+ 0; \
+})
+
+#define ptep_commit_flush(__pmod, __mm, __vma, __address, __ptep, __newval) \
+({ \
+ ptep_commit(__pmod, __mm, __ptep, __newval); \
+ flush_tlb_page(__vma, __address); \
+ 0; \
+})
+
+#define ptep_commit_access_flush(__pmod, __mm, __vma, __address, __ptep, __newval, __dirty) \
+({ \
+ ptep_set_access_flags(__vma, __address, __ptep, \
+ pte_mklocked(__newval), __dirty); \
+ ptep_unlock(__ptep); \
+ flush_tlb_page(__vma, __address); \
+ 0; \
+})
+
+#define ptep_commit_establish_flush(__pmod, __mm, __vma, __address, __ptep, __newval) \
+({ \
+ ptep_establish(__vma, __address, __ptep, pte_mklocked(__newval)); \
+ ptep_unlock(__ptep); \
+ flush_tlb_page(__vma, __address); \
+ 0; \
+})
+
+#define ptep_commit_clear(__pmod, __mm, __ptep, __newval, __oldval) \
+({ \
+ __oldval = ptep_xchg(__ptep, pte_mklocked(__newval)); \
+ __oldval = pte_mkunlocked(__oldval); \
+ ptep_unlock(__ptep); \
+ 0; \
+})
+
+#define ptep_commit_clear_flush(__pmod, __mm, __vma, __address, __ptep, __newval, __oldval) \
+({ \
+ ptep_commit_clear(__pmod, __mm, __ptep, __newval, __oldval); \
+ flush_tlb_page(__vma, __address); \
+ 0; \
+})
+
+#define ptep_commit_clear_flush_young(__pmod, __mm, __vma, __address, __ptep, __young) \
+({ \
+ *__young = ptep_clear_flush_young(__vma, __address, __ptep); \
+ ptep_unlock(__ptep); \
+ 0; \
+})
+
+#define ptep_commit_clear_flush_dirty(__pmod, __mm, __vma, __address, __ptep, __dirty) \
+({ \
+ *__dirty = ptep_clear_flush_dirty(__vma, __address, __ptep); \
+ ptep_unlock(__ptep); \
+ 0; \
+})
+
+#define ptep_verify(__pmod, __mm, __ptep) \
+({ \
+ 0; \
+})
+
+#define ptep_verify_finish(__pmod, __mm, __ptep) \
+({ \
+ ptep_unlock(__ptep); \
+ 0; \
+})
+
+#else /* __HAVE_ARCH_PTEP_LOCK */ /* GENERIC_PTEP_LOCKING follows */
/* Use the generic mm->page_table_lock serialised scheme */
/*
* XXX: can we make use of this?
@@ -339,6 +462,7 @@ struct pte_modify {
({ \
(void)__pmod; \
(void)__mm; \
+ /* XXX: needn't be atomic? */ \
ptep_atomic_read(__ptep); \
})
diff -puN mm/memory.c~vm-i386-locked-pte mm/memory.c
--- linux-2.6/mm/memory.c~vm-i386-locked-pte 2004-10-29 20:01:32.000000000 +1000
+++ linux-2.6-npiggin/mm/memory.c 2004-10-29 21:18:31.000000000 +1000
@@ -689,8 +689,9 @@ void zap_page_range(struct vm_area_struc
unmap_vmas(mm, vma, address, end, &nr_accounted, details);
}
-void follow_page_finish(struct mm_struct *mm, unsigned long address)
+void follow_page_finish(struct mm_struct *mm, pte_t *p, unsigned long address)
{
+ ptep_unpin_pages(mm, p);
mm_unpin_pages(mm);
mm_unlock_page_table(mm);
}
@@ -699,7 +700,7 @@ void follow_page_finish(struct mm_struct
* Do a quick page-table lookup for a single page.
*/
struct page *
-follow_page(struct mm_struct *mm, unsigned long address, int write)
+follow_page(struct mm_struct *mm, pte_t **p, unsigned long address, int write)
{
pgd_t *pgd;
pmd_t *pmd;
@@ -732,6 +733,7 @@ follow_page(struct mm_struct *mm, unsign
* page with get_page?
*/
mm_pin_pages(mm);
+ ptep_pin_pages(mm, ptep);
pte = ptep_atomic_read(ptep);
pte_unmap(ptep);
@@ -744,11 +746,13 @@ follow_page(struct mm_struct *mm, unsign
if (write && !pte_dirty(pte) && !PageDirty(page))
set_page_dirty(page);
mark_page_accessed(page);
+ *p = ptep;
return page;
}
}
out_unpin:
+ ptep_unpin_pages(mm, ptep);
mm_unpin_pages(mm);
out:
mm_unlock_page_table(mm);
@@ -850,9 +854,10 @@ int get_user_pages(struct task_struct *t
continue;
}
do {
+ pte_t *p;
struct page *page;
int lookup_write = write;
- while (!(page = follow_page(mm, start, lookup_write))) {
+ while (!(page = follow_page(mm, &p, start, lookup_write))) {
/*
* Shortcut for anonymous pages. We don't want
* to force the creation of pages tables for
@@ -896,7 +901,7 @@ int get_user_pages(struct task_struct *t
page_cache_get(page);
}
if (page)
- follow_page_finish(mm, start);
+ follow_page_finish(mm, p, start);
set_vmas:
if (vmas)
vmas[i] = vma;
diff -puN kernel/futex.c~vm-i386-locked-pte kernel/futex.c
--- linux-2.6/kernel/futex.c~vm-i386-locked-pte 2004-10-29 21:13:50.000000000 +1000
+++ linux-2.6-npiggin/kernel/futex.c 2004-10-29 21:18:11.000000000 +1000
@@ -144,6 +144,7 @@ static int get_futex_key(unsigned long u
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
struct page *page;
+ pte_t *p;
int err;
/*
@@ -204,11 +205,11 @@ static int get_futex_key(unsigned long u
/*
* Do a quick atomic lookup first - this is the fastpath.
*/
- page = follow_page(mm, uaddr, 0);
+ page = follow_page(mm, &p, uaddr, 0);
if (likely(page != NULL)) {
key->shared.pgoff =
page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
- follow_page_finish(mm, uaddr);
+ follow_page_finish(mm, p, uaddr);
return 0;
}
diff -puN include/linux/mm.h~vm-i386-locked-pte include/linux/mm.h
--- linux-2.6/include/linux/mm.h~vm-i386-locked-pte 2004-10-29 21:14:05.000000000 +1000
+++ linux-2.6-npiggin/include/linux/mm.h 2004-10-29 21:17:48.000000000 +1000
@@ -756,9 +756,10 @@ static inline unsigned long vma_pages(st
extern struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr);
extern struct page * vmalloc_to_page(void *addr);
-extern struct page * follow_page(struct mm_struct *mm, unsigned long address,
- int write);
-extern void follow_page_finish(struct mm_struct *mm, unsigned long address);
+extern struct page * follow_page(struct mm_struct *mm, pte_t **p,
+ unsigned long address, int write);
+extern void follow_page_finish(struct mm_struct *mm, pte_t *p,
+ unsigned long address);
int remap_pfn_range(struct vm_area_struct *, unsigned long,
unsigned long, unsigned long, pgprot_t);
_
next prev parent reply other threads:[~2004-10-29 11:45 UTC|newest]
Thread overview: 20+ messages / expand[flat|nested] mbox.gz Atom feed top
2004-10-29 7:20 [PATCH 0/7] abstract pagetable locking and pte updates Nick Piggin
2004-10-29 7:20 ` [PATCH 1/7] " Nick Piggin
2004-10-29 7:21 ` [PATCH 2/7] " Nick Piggin
2004-10-29 7:21 ` [PATCH 3/7] " Nick Piggin
2004-10-29 7:21 ` [PATCH 4/7] " Nick Piggin
2004-10-29 7:22 ` [PATCH 5/7] " Nick Piggin
2004-10-29 7:23 ` [PATCH 6/7] " Nick Piggin
2004-10-29 7:23 ` [PATCH 7/7] " Nick Piggin
2004-10-29 7:46 ` [PATCH 0/7] " William Lee Irwin III
2004-11-02 0:15 ` Christoph Lameter
2004-11-02 0:54 ` William Lee Irwin III
2004-11-02 1:34 ` Nick Piggin
2004-11-02 1:55 ` William Lee Irwin III
2004-11-02 2:38 ` Nick Piggin
2004-11-02 6:57 ` William Lee Irwin III
2004-11-02 17:55 ` Christoph Lameter
2004-10-29 11:45 ` Nick Piggin [this message]
2004-10-29 20:52 ` William Lee Irwin III
2004-10-30 2:46 ` Nick Piggin
2004-11-02 0:19 ` Christoph Lameter
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=41822D75.3090802@yahoo.com.au \
--to=nickpiggin@yahoo.com.au \
--cc=linux-mm@kvack.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.