All of lore.kernel.org
 help / color / mirror / Atom feed
From: David Howells <dhowells@redhat.com>
To: linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	linux-fsdevel@vger.kernel.org
Subject: [RESEND][PATCH 4/6] Add page becoming writable notification
Date: Thu, 14 Oct 2004 20:05:01 +0100	[thread overview]
Message-ID: <24449.1097780701@redhat.com> (raw)

Because Christoph asked so nicely, I'm resending this patch with a wider
distribution. The patch currently resides in the -mm kernels. It is being used
by the in-kernel AFS filesystem and by the NFS caching patches that haven't
yet been passed to akpm.

---

The attached patch creates a facility by which a filesystem, character device
or block device can detect a page mapped to an inode is about to become
writable. This provides the facility at two levels: a vma operation and an
address space operation. This can be used by a netfs to synchronise with a
cache writing the page to disc.

Signed-Off-By: David Howells <dhowells@redhat.com>
---

 include/linux/fs.h |    3 +
 include/linux/mm.h |    4 ++
 mm/filemap.c       |   19 +++++++++-
 mm/memory.c        |   96 ++++++++++++++++++++++++++++++++++++++++-------------
 4 files changed, 99 insertions(+), 23 deletions(-)

diff -uNrp linux-2.6.9-rc1-mm2/include/linux/fs.h linux-2.6.9-rc1-mm2-cachefs/include/linux/fs.h
--- linux-2.6.9-rc1-mm2/include/linux/fs.h	2004-08-31 16:52:38.000000000 +0100
+++ linux-2.6.9-rc1-mm2-cachefs/include/linux/fs.h	2004-09-03 16:42:33.229296834 +0100
@@ -328,6 +328,9 @@ struct address_space_operations {
 	int (*releasepage) (struct page *, int);
 	ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov,
 			loff_t offset, unsigned long nr_segs);
+
+	/* notification that a page is about to become writable */
+	int (*page_mkwrite)(struct page *page);
 };
 
 struct backing_dev_info;
diff -uNrp linux-2.6.9-rc1-mm2/include/linux/mm.h linux-2.6.9-rc1-mm2-cachefs/include/linux/mm.h
--- linux-2.6.9-rc1-mm2/include/linux/mm.h	2004-08-31 16:52:38.000000000 +0100
+++ linux-2.6.9-rc1-mm2-cachefs/include/linux/mm.h	2004-09-03 16:47:46.047671396 +0100
@@ -175,6 +175,10 @@ struct vm_operations_struct {
 	void (*close)(struct vm_area_struct * area);
 	struct page * (*nopage)(struct vm_area_struct * area, unsigned long address, int *type);
 	int (*populate)(struct vm_area_struct * area, unsigned long address, unsigned long len, pgprot_t prot, unsigned long pgoff, int nonblock);
+
+	/* notification that a previously read-only page is about to become
+	 * writable, if an error is returned it will cause a SIGBUS */
+	int (*page_mkwrite)(struct vm_area_struct *vma, struct page *page);
 #ifdef CONFIG_NUMA
 	int (*set_policy)(struct vm_area_struct *vma, struct mempolicy *new);
 	struct mempolicy *(*get_policy)(struct vm_area_struct *vma,
diff -uNrp linux-2.6.9-rc1-mm2/mm/filemap.c linux-2.6.9-rc1-mm2-cachefs/mm/filemap.c
--- linux-2.6.9-rc1-mm2/mm/filemap.c	2004-08-31 16:52:40.000000000 +0100
+++ linux-2.6.9-rc1-mm2-cachefs/mm/filemap.c	2004-09-03 16:46:58.780545691 +0100
@@ -1445,11 +1445,25 @@ repeat:
 	return 0;
 }
 
+/*
+ * pass notification that a page is becoming writable up to the filesystem
+ */
+static int filemap_page_mkwrite(struct vm_area_struct *vma, struct page *page)
+{
+	return page->mapping->a_ops->page_mkwrite(page);
+}
+
 struct vm_operations_struct generic_file_vm_ops = {
 	.nopage		= filemap_nopage,
 	.populate	= filemap_populate,
 };
 
+struct vm_operations_struct generic_file_vm_mkwr_ops = {
+	.nopage		= filemap_nopage,
+	.populate	= filemap_populate,
+	.page_mkwrite	= filemap_page_mkwrite,
+};
+
 /* This is used for a general mmap of a disk file */
 
 int generic_file_mmap(struct file * file, struct vm_area_struct * vma)
@@ -1459,7 +1473,10 @@ int generic_file_mmap(struct file * file
 	if (!mapping->a_ops->readpage)
 		return -ENOEXEC;
 	file_accessed(file);
-	vma->vm_ops = &generic_file_vm_ops;
+	if (!mapping->a_ops->page_mkwrite)
+		vma->vm_ops = &generic_file_vm_ops;
+	else
+		vma->vm_ops = &generic_file_vm_mkwr_ops;
 	return 0;
 }
 
diff -uNrp linux-2.6.9-rc1-mm2/mm/memory.c linux-2.6.9-rc1-mm2-cachefs/mm/memory.c
--- linux-2.6.9-rc1-mm2/mm/memory.c	2004-08-31 16:52:40.000000000 +0100
+++ linux-2.6.9-rc1-mm2-cachefs/mm/memory.c	2004-09-02 15:40:26.000000000 +0100
@@ -1030,6 +1030,54 @@ static inline void break_cow(struct vm_a
 }
 
 /*
+ * Make a PTE writeable for do_wp_page() on a shared-writable page
+ */
+static inline int do_wp_page_mk_pte_writable(struct mm_struct *mm,
+					     struct vm_area_struct *vma,
+					     unsigned long address,
+					     pte_t *page_table,
+					     struct page *old_page,
+					     pte_t pte)
+{
+	pte_t entry;
+
+	/* See if the VMA's owner wants to know that the page is about to
+	 * become writable */
+	if (vma->vm_ops && vma->vm_ops->page_mkwrite) {
+		/* Notify the page owner without the lock held so they can
+		 * sleep if they want to */
+		spin_unlock(&mm->page_table_lock);
+
+		if (vma->vm_ops->page_mkwrite(vma, old_page) < 0)
+			goto bus_error;
+
+		spin_lock(&mm->page_table_lock);
+
+		/* Since we dropped the lock we need to revalidate the PTE as
+		 * someone else may have changed it. If they did, we just
+		 * return, as we can count on the MMU to tell us if they didn't
+		 * also make it writable
+		 */
+		if (!pte_same(*page_table, pte))
+			goto minor_fault;
+	}
+
+	flush_cache_page(vma, address);
+	entry = maybe_mkwrite(pte_mkyoung(pte_mkdirty(pte)),
+			      vma);
+	ptep_set_access_flags(vma, address, page_table, entry, 1);
+	update_mmu_cache(vma, address, entry);
+	pte_unmap(page_table);
+
+ minor_fault:
+	spin_unlock(&mm->page_table_lock);
+	return VM_FAULT_MINOR;
+
+ bus_error:
+	return VM_FAULT_SIGBUS;
+}
+
+/*
  * This routine handles present pages, when users try to write
  * to a shared page. It is done by copying the page to a new address
  * and decrementing the shared-page counter for the old page.
@@ -1054,7 +1102,6 @@ static int do_wp_page(struct mm_struct *
 {
 	struct page *old_page, *new_page;
 	unsigned long pfn = pte_pfn(pte);
-	pte_t entry;
 
 	if (unlikely(!pfn_valid(pfn))) {
 		/*
@@ -1073,16 +1120,11 @@ static int do_wp_page(struct mm_struct *
 	if (!TestSetPageLocked(old_page)) {
 		int reuse = can_share_swap_page(old_page);
 		unlock_page(old_page);
-		if (reuse) {
-			flush_cache_page(vma, address);
-			entry = maybe_mkwrite(pte_mkyoung(pte_mkdirty(pte)),
-					      vma);
-			ptep_set_access_flags(vma, address, page_table, entry, 1);
-			update_mmu_cache(vma, address, entry);
-			pte_unmap(page_table);
-			spin_unlock(&mm->page_table_lock);
-			return VM_FAULT_MINOR;
-		}
+		if (reuse)
+			/* We can just make the PTE writable */
+			return do_wp_page_mk_pte_writable(mm, vma, address,
+							  page_table, old_page,
+							  pte);
 	}
 	pte_unmap(page_table);
 
@@ -1521,18 +1563,28 @@ retry:
 	/*
 	 * Should we do an early C-O-W break?
 	 */
-	if (write_access && !(vma->vm_flags & VM_SHARED)) {
-		struct page *page;
+	if (write_access) {
+		if (!(vma->vm_flags & VM_SHARED)) {
+			struct page *page;
 
-		if (unlikely(anon_vma_prepare(vma)))
-			goto oom;
-		page = alloc_page_vma(GFP_HIGHUSER, vma, address);
-		if (!page)
-			goto oom;
-		copy_user_highpage(page, new_page, address);
-		page_cache_release(new_page);
-		new_page = page;
-		anon = 1;
+			if (unlikely(anon_vma_prepare(vma)))
+				goto oom;
+			page = alloc_page_vma(GFP_HIGHUSER, vma, address);
+			if (!page)
+				goto oom;
+			copy_user_highpage(page, new_page, address);
+			page_cache_release(new_page);
+			new_page = page;
+			anon = 1;
+
+		} else {
+			/* if the page will be shareable, see if the backing
+			 * address space wants to know that the page is about
+			 * to become writable */
+			if (vma->vm_ops->page_mkwrite &&
+			    vma->vm_ops->page_mkwrite(vma, new_page) < 0)
+				return VM_FAULT_SIGBUS;
+		}
 	}
 
 	spin_lock(&mm->page_table_lock);

WARNING: multiple messages have this Message-ID (diff)
From: David Howells <dhowells@redhat.com>
To: linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	linux-fsdevel@vger.kernel.org
Subject: [RESEND][PATCH 4/6] Add page becoming writable notification
Date: Thu, 14 Oct 2004 20:05:01 +0100	[thread overview]
Message-ID: <24449.1097780701@redhat.com> (raw)

Because Christoph asked so nicely, I'm resending this patch with a wider
distribution. The patch currently resides in the -mm kernels. It is being used
by the in-kernel AFS filesystem and by the NFS caching patches that haven't
yet been passed to akpm.

---

The attached patch creates a facility by which a filesystem, character device
or block device can detect a page mapped to an inode is about to become
writable. This provides the facility at two levels: a vma operation and an
address space operation. This can be used by a netfs to synchronise with a
cache writing the page to disc.

Signed-Off-By: David Howells <dhowells@redhat.com>
---

 include/linux/fs.h |    3 +
 include/linux/mm.h |    4 ++
 mm/filemap.c       |   19 +++++++++-
 mm/memory.c        |   96 ++++++++++++++++++++++++++++++++++++++++-------------
 4 files changed, 99 insertions(+), 23 deletions(-)

diff -uNrp linux-2.6.9-rc1-mm2/include/linux/fs.h linux-2.6.9-rc1-mm2-cachefs/include/linux/fs.h
--- linux-2.6.9-rc1-mm2/include/linux/fs.h	2004-08-31 16:52:38.000000000 +0100
+++ linux-2.6.9-rc1-mm2-cachefs/include/linux/fs.h	2004-09-03 16:42:33.229296834 +0100
@@ -328,6 +328,9 @@ struct address_space_operations {
 	int (*releasepage) (struct page *, int);
 	ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov,
 			loff_t offset, unsigned long nr_segs);
+
+	/* notification that a page is about to become writable */
+	int (*page_mkwrite)(struct page *page);
 };
 
 struct backing_dev_info;
diff -uNrp linux-2.6.9-rc1-mm2/include/linux/mm.h linux-2.6.9-rc1-mm2-cachefs/include/linux/mm.h
--- linux-2.6.9-rc1-mm2/include/linux/mm.h	2004-08-31 16:52:38.000000000 +0100
+++ linux-2.6.9-rc1-mm2-cachefs/include/linux/mm.h	2004-09-03 16:47:46.047671396 +0100
@@ -175,6 +175,10 @@ struct vm_operations_struct {
 	void (*close)(struct vm_area_struct * area);
 	struct page * (*nopage)(struct vm_area_struct * area, unsigned long address, int *type);
 	int (*populate)(struct vm_area_struct * area, unsigned long address, unsigned long len, pgprot_t prot, unsigned long pgoff, int nonblock);
+
+	/* notification that a previously read-only page is about to become
+	 * writable, if an error is returned it will cause a SIGBUS */
+	int (*page_mkwrite)(struct vm_area_struct *vma, struct page *page);
 #ifdef CONFIG_NUMA
 	int (*set_policy)(struct vm_area_struct *vma, struct mempolicy *new);
 	struct mempolicy *(*get_policy)(struct vm_area_struct *vma,
diff -uNrp linux-2.6.9-rc1-mm2/mm/filemap.c linux-2.6.9-rc1-mm2-cachefs/mm/filemap.c
--- linux-2.6.9-rc1-mm2/mm/filemap.c	2004-08-31 16:52:40.000000000 +0100
+++ linux-2.6.9-rc1-mm2-cachefs/mm/filemap.c	2004-09-03 16:46:58.780545691 +0100
@@ -1445,11 +1445,25 @@ repeat:
 	return 0;
 }
 
+/*
+ * pass notification that a page is becoming writable up to the filesystem
+ */
+static int filemap_page_mkwrite(struct vm_area_struct *vma, struct page *page)
+{
+	return page->mapping->a_ops->page_mkwrite(page);
+}
+
 struct vm_operations_struct generic_file_vm_ops = {
 	.nopage		= filemap_nopage,
 	.populate	= filemap_populate,
 };
 
+struct vm_operations_struct generic_file_vm_mkwr_ops = {
+	.nopage		= filemap_nopage,
+	.populate	= filemap_populate,
+	.page_mkwrite	= filemap_page_mkwrite,
+};
+
 /* This is used for a general mmap of a disk file */
 
 int generic_file_mmap(struct file * file, struct vm_area_struct * vma)
@@ -1459,7 +1473,10 @@ int generic_file_mmap(struct file * file
 	if (!mapping->a_ops->readpage)
 		return -ENOEXEC;
 	file_accessed(file);
-	vma->vm_ops = &generic_file_vm_ops;
+	if (!mapping->a_ops->page_mkwrite)
+		vma->vm_ops = &generic_file_vm_ops;
+	else
+		vma->vm_ops = &generic_file_vm_mkwr_ops;
 	return 0;
 }
 
diff -uNrp linux-2.6.9-rc1-mm2/mm/memory.c linux-2.6.9-rc1-mm2-cachefs/mm/memory.c
--- linux-2.6.9-rc1-mm2/mm/memory.c	2004-08-31 16:52:40.000000000 +0100
+++ linux-2.6.9-rc1-mm2-cachefs/mm/memory.c	2004-09-02 15:40:26.000000000 +0100
@@ -1030,6 +1030,54 @@ static inline void break_cow(struct vm_a
 }
 
 /*
+ * Make a PTE writeable for do_wp_page() on a shared-writable page
+ */
+static inline int do_wp_page_mk_pte_writable(struct mm_struct *mm,
+					     struct vm_area_struct *vma,
+					     unsigned long address,
+					     pte_t *page_table,
+					     struct page *old_page,
+					     pte_t pte)
+{
+	pte_t entry;
+
+	/* See if the VMA's owner wants to know that the page is about to
+	 * become writable */
+	if (vma->vm_ops && vma->vm_ops->page_mkwrite) {
+		/* Notify the page owner without the lock held so they can
+		 * sleep if they want to */
+		spin_unlock(&mm->page_table_lock);
+
+		if (vma->vm_ops->page_mkwrite(vma, old_page) < 0)
+			goto bus_error;
+
+		spin_lock(&mm->page_table_lock);
+
+		/* Since we dropped the lock we need to revalidate the PTE as
+		 * someone else may have changed it. If they did, we just
+		 * return, as we can count on the MMU to tell us if they didn't
+		 * also make it writable
+		 */
+		if (!pte_same(*page_table, pte))
+			goto minor_fault;
+	}
+
+	flush_cache_page(vma, address);
+	entry = maybe_mkwrite(pte_mkyoung(pte_mkdirty(pte)),
+			      vma);
+	ptep_set_access_flags(vma, address, page_table, entry, 1);
+	update_mmu_cache(vma, address, entry);
+	pte_unmap(page_table);
+
+ minor_fault:
+	spin_unlock(&mm->page_table_lock);
+	return VM_FAULT_MINOR;
+
+ bus_error:
+	return VM_FAULT_SIGBUS;
+}
+
+/*
  * This routine handles present pages, when users try to write
  * to a shared page. It is done by copying the page to a new address
  * and decrementing the shared-page counter for the old page.
@@ -1054,7 +1102,6 @@ static int do_wp_page(struct mm_struct *
 {
 	struct page *old_page, *new_page;
 	unsigned long pfn = pte_pfn(pte);
-	pte_t entry;
 
 	if (unlikely(!pfn_valid(pfn))) {
 		/*
@@ -1073,16 +1120,11 @@ static int do_wp_page(struct mm_struct *
 	if (!TestSetPageLocked(old_page)) {
 		int reuse = can_share_swap_page(old_page);
 		unlock_page(old_page);
-		if (reuse) {
-			flush_cache_page(vma, address);
-			entry = maybe_mkwrite(pte_mkyoung(pte_mkdirty(pte)),
-					      vma);
-			ptep_set_access_flags(vma, address, page_table, entry, 1);
-			update_mmu_cache(vma, address, entry);
-			pte_unmap(page_table);
-			spin_unlock(&mm->page_table_lock);
-			return VM_FAULT_MINOR;
-		}
+		if (reuse)
+			/* We can just make the PTE writable */
+			return do_wp_page_mk_pte_writable(mm, vma, address,
+							  page_table, old_page,
+							  pte);
 	}
 	pte_unmap(page_table);
 
@@ -1521,18 +1563,28 @@ retry:
 	/*
 	 * Should we do an early C-O-W break?
 	 */
-	if (write_access && !(vma->vm_flags & VM_SHARED)) {
-		struct page *page;
+	if (write_access) {
+		if (!(vma->vm_flags & VM_SHARED)) {
+			struct page *page;
 
-		if (unlikely(anon_vma_prepare(vma)))
-			goto oom;
-		page = alloc_page_vma(GFP_HIGHUSER, vma, address);
-		if (!page)
-			goto oom;
-		copy_user_highpage(page, new_page, address);
-		page_cache_release(new_page);
-		new_page = page;
-		anon = 1;
+			if (unlikely(anon_vma_prepare(vma)))
+				goto oom;
+			page = alloc_page_vma(GFP_HIGHUSER, vma, address);
+			if (!page)
+				goto oom;
+			copy_user_highpage(page, new_page, address);
+			page_cache_release(new_page);
+			new_page = page;
+			anon = 1;
+
+		} else {
+			/* if the page will be shareable, see if the backing
+			 * address space wants to know that the page is about
+			 * to become writable */
+			if (vma->vm_ops->page_mkwrite &&
+			    vma->vm_ops->page_mkwrite(vma, new_page) < 0)
+				return VM_FAULT_SIGBUS;
+		}
 	}
 
 	spin_lock(&mm->page_table_lock);
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org"> aart@kvack.org </a>

             reply	other threads:[~2004-10-14 19:12 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2004-10-14 19:05 David Howells [this message]
2004-10-14 19:05 ` [RESEND][PATCH 4/6] Add page becoming writable notification David Howells
2004-10-14 19:29 ` Matthew Wilcox
2004-10-14 19:29   ` Matthew Wilcox
2004-10-14 20:35 ` Christoph Hellwig
2004-10-14 20:35   ` Christoph Hellwig
2004-10-15 15:05   ` David Howells
2004-10-15 15:05     ` David Howells
2004-10-15 15:34     ` Christoph Hellwig
2004-10-15 15:34       ` Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=24449.1097780701@redhat.com \
    --to=dhowells@redhat.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.