Linux-mm Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [patch 0/5] mm: improve remapping of vmalloc regions
From: Nick Piggin @ 2006-04-20 17:06 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Linux Kernel, Nick Piggin, Linux Memory Management, Hugh Dickins

Hi,

I'd like some feedback about this patchset -- whether it is the right
design, and the implementation (e.g. people might dislike patch 4).

vm_insert_page and remap_pfn_range loops are really clever, bit
probably asking a bit too much of most drivers. I was able to get
rid of most of them without too much trouble.

Not tested, because I don't have any of the hardware, but it seems
compiles OK.

Nick

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply

* [patch 1/5] mm: remap_vmalloc_range
From: Nick Piggin @ 2006-04-20 17:06 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Linux Kernel, Nick Piggin, Linux Memory Management, Hugh Dickins
In-Reply-To: <20060228202202.14172.60409.sendpatchset@linux.site>

Add a remap_vmalloc_range and get rid of as many remap_pfn_range and
vm_insert_page loops as possible.

remap_vmalloc_range can do a whole lot of nice range checking even
if the caller gets it wrong (which it looks like one or two do).

Signed-off-by: Nick Piggin <npiggin@suse.de>

Index: linux-2.6/drivers/media/video/cpia.c
===================================================================
--- linux-2.6.orig/drivers/media/video/cpia.c
+++ linux-2.6/drivers/media/video/cpia.c
@@ -3750,9 +3750,7 @@ static int cpia_ioctl(struct inode *inod
 static int cpia_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	struct video_device *dev = file->private_data;
-	unsigned long start = vma->vm_start;
 	unsigned long size  = vma->vm_end - vma->vm_start;
-	unsigned long page, pos;
 	struct cam_data *cam = dev->priv;
 	int retval;
 
@@ -3778,19 +3776,9 @@ static int cpia_mmap(struct file *file, 
 		}
 	}
 
-	pos = (unsigned long)(cam->frame_buf);
-	while (size > 0) {
-		page = vmalloc_to_pfn((void *)pos);
-		if (remap_pfn_range(vma, start, page, PAGE_SIZE, PAGE_SHARED)) {
-			mutex_unlock(&cam->busy_lock);
-			return -EAGAIN;
-		}
-		start += PAGE_SIZE;
-		pos += PAGE_SIZE;
-		if (size > PAGE_SIZE)
-			size -= PAGE_SIZE;
-		else
-			size = 0;
+	if (remap_vmalloc_range(vma, cam->frame_buf, 0)) {
+		mutex_unlock(&cam->busy_lock);
+		return -EAGAIN;
 	}
 
 	DBG("cpia_mmap: %ld\n", size);
Index: linux-2.6/drivers/media/video/meye.c
===================================================================
--- linux-2.6.orig/drivers/media/video/meye.c
+++ linux-2.6/drivers/media/video/meye.c
@@ -1699,13 +1699,10 @@ static struct vm_operations_struct meye_
 
 static int meye_mmap(struct file *file, struct vm_area_struct *vma)
 {
-	unsigned long start = vma->vm_start;
 	unsigned long size = vma->vm_end - vma->vm_start;
-	unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
-	unsigned long page, pos;
 
 	mutex_lock(&meye.lock);
-	if (size > gbuffers * gbufsize) {
+	if (size > gbuffers * gbufsize) { /* XXX: should be size + vm_pgoff? */
 		mutex_unlock(&meye.lock);
 		return -EINVAL;
 	}
@@ -1722,20 +1719,10 @@ static int meye_mmap(struct file *file, 
 		for (i = 0; i < gbuffers; i++)
 			meye.vma_use_count[i] = 0;
 	}
-	pos = (unsigned long)meye.grab_fbuffer + offset;
 
-	while (size > 0) {
-		page = vmalloc_to_pfn((void *)pos);
-		if (remap_pfn_range(vma, start, page, PAGE_SIZE, PAGE_SHARED)) {
-			mutex_unlock(&meye.lock);
-			return -EAGAIN;
-		}
-		start += PAGE_SIZE;
-		pos += PAGE_SIZE;
-		if (size > PAGE_SIZE)
-			size -= PAGE_SIZE;
-		else
-			size = 0;
+	if (remap_vmalloc_range(vma, meye.grab_fbuffer, vma->vm_pgoff)) {
+		mutex_unlock(&meye.lock);
+		return -EAGAIN;
 	}
 
 	vma->vm_ops = &meye_vm_ops;
Index: linux-2.6/drivers/media/video/ov511.c
===================================================================
--- linux-2.6.orig/drivers/media/video/ov511.c
+++ linux-2.6/drivers/media/video/ov511.c
@@ -4616,10 +4616,8 @@ static int
 ov51x_v4l1_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	struct video_device *vdev = file->private_data;
-	unsigned long start = vma->vm_start;
 	unsigned long size  = vma->vm_end - vma->vm_start;
 	struct usb_ov511 *ov = video_get_drvdata(vdev);
-	unsigned long page, pos;
 
 	if (ov->dev == NULL)
 		return -EIO;
@@ -4634,19 +4632,9 @@ ov51x_v4l1_mmap(struct file *file, struc
 	if (mutex_lock_interruptible(&ov->lock))
 		return -EINTR;
 
-	pos = (unsigned long)ov->fbuf;
-	while (size > 0) {
-		page = vmalloc_to_pfn((void *)pos);
-		if (remap_pfn_range(vma, start, page, PAGE_SIZE, PAGE_SHARED)) {
-			mutex_unlock(&ov->lock);
-			return -EAGAIN;
-		}
-		start += PAGE_SIZE;
-		pos += PAGE_SIZE;
-		if (size > PAGE_SIZE)
-			size -= PAGE_SIZE;
-		else
-			size = 0;
+	if (remap_vmalloc_range(vma, ov->fbuf, 0)) {
+		mutex_unlock(&ov->lock);
+		return -EAGAIN;
 	}
 
 	mutex_unlock(&ov->lock);
Index: linux-2.6/drivers/media/video/pwc/pwc-if.c
===================================================================
--- linux-2.6.orig/drivers/media/video/pwc/pwc-if.c
+++ linux-2.6/drivers/media/video/pwc/pwc-if.c
@@ -1602,28 +1602,16 @@ static int pwc_video_mmap(struct file *f
 {
 	struct video_device *vdev = file->private_data;
 	struct pwc_device *pdev;
-	unsigned long start = vma->vm_start;
-	unsigned long size  = vma->vm_end-vma->vm_start;
-	unsigned long page, pos;
 
-	Trace(TRACE_MEMORY, "mmap(0x%p, 0x%lx, %lu) called.\n", vdev, start, size);
+	/* XXX: should check ranges */
+	Trace(TRACE_MEMORY, "mmap(0x%p, 0x%lx, %lu) called.\n", vdev,
+				vma->vm_start, vma->vm_end - vma->vm_start);
 	pdev = vdev->priv;
 
 	vma->vm_flags |= VM_IO;
 
-	pos = (unsigned long)pdev->image_data;
-	while (size > 0) {
-		page = vmalloc_to_pfn((void *)pos);
-		if (remap_pfn_range(vma, start, page, PAGE_SIZE, PAGE_SHARED))
-			return -EAGAIN;
-
-		start += PAGE_SIZE;
-		pos += PAGE_SIZE;
-		if (size > PAGE_SIZE)
-			size -= PAGE_SIZE;
-		else
-			size = 0;
-	}
+	if (remap_vmalloc_range(vma, pdev->image_data, 0))
+		return -EAGAIN;
 
 	return 0;
 }
Index: linux-2.6/drivers/media/video/se401.c
===================================================================
--- linux-2.6.orig/drivers/media/video/se401.c
+++ linux-2.6/drivers/media/video/se401.c
@@ -1153,9 +1153,7 @@ static int se401_mmap(struct file *file,
 {
 	struct video_device *dev = file->private_data;
 	struct usb_se401 *se401 = (struct usb_se401 *)dev;
-	unsigned long start = vma->vm_start;
 	unsigned long size  = vma->vm_end-vma->vm_start;
-	unsigned long page, pos;
 
 	mutex_lock(&se401->lock);
 
@@ -1167,19 +1165,9 @@ static int se401_mmap(struct file *file,
 		mutex_unlock(&se401->lock);
 		return -EINVAL;
 	}
-	pos = (unsigned long)se401->fbuf;
-	while (size > 0) {
-		page = vmalloc_to_pfn((void *)pos);
-		if (remap_pfn_range(vma, start, page, PAGE_SIZE, PAGE_SHARED)) {
-			mutex_unlock(&se401->lock);
-			return -EAGAIN;
-		}
-		start += PAGE_SIZE;
-		pos += PAGE_SIZE;
-		if (size > PAGE_SIZE)
-			size -= PAGE_SIZE;
-		else
-			size = 0;
+	if (remap_vmalloc_range(vma, se401->fbuf, 0)) {
+		mutex_unlock(&se401->lock);
+		return -EAGAIN;
 	}
 	mutex_unlock(&se401->lock);
 
Index: linux-2.6/drivers/media/video/stv680.c
===================================================================
--- linux-2.6.orig/drivers/media/video/stv680.c
+++ linux-2.6/drivers/media/video/stv680.c
@@ -1254,9 +1254,7 @@ static int stv680_mmap (struct file *fil
 {
 	struct video_device *dev = file->private_data;
 	struct usb_stv *stv680 = video_get_drvdata(dev);
-	unsigned long start = vma->vm_start;
 	unsigned long size  = vma->vm_end-vma->vm_start;
-	unsigned long page, pos;
 
 	mutex_lock(&stv680->lock);
 
@@ -1269,19 +1267,9 @@ static int stv680_mmap (struct file *fil
 		mutex_unlock(&stv680->lock);
 		return -EINVAL;
 	}
-	pos = (unsigned long) stv680->fbuf;
-	while (size > 0) {
-		page = vmalloc_to_pfn((void *)pos);
-		if (remap_pfn_range(vma, start, page, PAGE_SIZE, PAGE_SHARED)) {
-			mutex_unlock(&stv680->lock);
-			return -EAGAIN;
-		}
-		start += PAGE_SIZE;
-		pos += PAGE_SIZE;
-		if (size > PAGE_SIZE)
-			size -= PAGE_SIZE;
-		else
-			size = 0;
+	if (remap_vmalloc_range(vma, stv680->fbuf, 0)) {
+		mutex_unlock(&stv680->lock);
+		return -EAGAIN;
 	}
 	mutex_unlock(&stv680->lock);
 
Index: linux-2.6/drivers/media/video/usbvideo/usbvideo.c
===================================================================
--- linux-2.6.orig/drivers/media/video/usbvideo/usbvideo.c
+++ linux-2.6/drivers/media/video/usbvideo/usbvideo.c
@@ -1068,9 +1068,7 @@ EXPORT_SYMBOL(usbvideo_RegisterVideoDevi
 static int usbvideo_v4l_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	struct uvd *uvd = file->private_data;
-	unsigned long start = vma->vm_start;
 	unsigned long size  = vma->vm_end-vma->vm_start;
-	unsigned long page, pos;
 
 	if (!CAMERA_IS_OPERATIONAL(uvd))
 		return -EFAULT;
@@ -1078,19 +1076,8 @@ static int usbvideo_v4l_mmap(struct file
 	if (size > (((USBVIDEO_NUMFRAMES * uvd->max_frame_size) + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1)))
 		return -EINVAL;
 
-	pos = (unsigned long) uvd->fbuf;
-	while (size > 0) {
-		page = vmalloc_to_pfn((void *)pos);
-		if (remap_pfn_range(vma, start, page, PAGE_SIZE, PAGE_SHARED))
-			return -EAGAIN;
-
-		start += PAGE_SIZE;
-		pos += PAGE_SIZE;
-		if (size > PAGE_SIZE)
-			size -= PAGE_SIZE;
-		else
-			size = 0;
-	}
+	if (remap_vmalloc_range(vma, uvd->fbuf, 0))
+		return -EAGAIN;
 
 	return 0;
 }
Index: linux-2.6/drivers/media/video/usbvideo/vicam.c
===================================================================
--- linux-2.6.orig/drivers/media/video/usbvideo/vicam.c
+++ linux-2.6/drivers/media/video/usbvideo/vicam.c
@@ -1029,8 +1029,6 @@ static int
 vicam_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	// TODO: allocate the raw frame buffer if necessary
-	unsigned long page, pos;
-	unsigned long start = vma->vm_start;
 	unsigned long size  = vma->vm_end-vma->vm_start;
 	struct vicam_camera *cam = file->private_data;
 
@@ -1039,25 +1037,16 @@ vicam_mmap(struct file *file, struct vm_
 
 	DBG("vicam_mmap: %ld\n", size);
 
-	/* We let mmap allocate as much as it wants because Linux was adding 2048 bytes
-	 * to the size the application requested for mmap and it was screwing apps up.
-	 if (size > VICAM_FRAMES*VICAM_MAX_FRAME_SIZE)
-	 return -EINVAL;
+	/* We let mmap allocate as much as it wants because Linux was adding
+	 * 2048 bytes to the size the application requested for mmap and it was
+	 * screwing apps up.
+	 *
+	 * It shouldn't have been, so let's try this check again -np
 	 */
+	 if (size > VICAM_FRAMES*VICAM_MAX_FRAME_SIZE)
 
-	pos = (unsigned long)cam->framebuf;
-	while (size > 0) {
-		page = vmalloc_to_pfn((void *)pos);
-		if (remap_pfn_range(vma, start, page, PAGE_SIZE, PAGE_SHARED))
-			return -EAGAIN;
-
-		start += PAGE_SIZE;
-		pos += PAGE_SIZE;
-		if (size > PAGE_SIZE)
-			size -= PAGE_SIZE;
-		else
-			size = 0;
-	}
+	if (remap_vmalloc_range(vma, cam->framebuf, 0))
+		return -EAGAIN;
 
 	return 0;
 }
Index: linux-2.6/drivers/media/video/w9968cf.c
===================================================================
--- linux-2.6.orig/drivers/media/video/w9968cf.c
+++ linux-2.6/drivers/media/video/w9968cf.c
@@ -2861,10 +2861,7 @@ static int w9968cf_mmap(struct file* fil
 	struct w9968cf_device* cam = (struct w9968cf_device*)
 				     video_get_drvdata(video_devdata(filp));
 	unsigned long vsize = vma->vm_end - vma->vm_start,
-		      psize = cam->nbuffers * cam->frame[0].size,
-		      start = vma->vm_start,
-		      pos = (unsigned long)cam->frame[0].buffer,
-		      page;
+		      psize = cam->nbuffers * cam->frame[0].size;
 
 	if (cam->disconnected) {
 		DBG(2, "Device not present")
@@ -2881,15 +2878,8 @@ static int w9968cf_mmap(struct file* fil
 	if (vsize > psize - (vma->vm_pgoff << PAGE_SHIFT))
 		return -EINVAL;
 
-	while (vsize > 0) {
-		page = vmalloc_to_pfn((void *)pos);
-		if (remap_pfn_range(vma, start, page + vma->vm_pgoff,
-						PAGE_SIZE, vma->vm_page_prot))
-			return -EAGAIN;
-		start += PAGE_SIZE;
-		pos += PAGE_SIZE;
-		vsize -= PAGE_SIZE;
-	}
+	if (remap_vmalloc_range(vma, cam->frame[0].buffer, vma->vm_pgoff))
+		return -EAGAIN;
 
 	DBG(5, "mmap method successfully called")
 	return 0;
Index: linux-2.6/arch/ia64/kernel/perfmon.c
===================================================================
--- linux-2.6.orig/arch/ia64/kernel/perfmon.c
+++ linux-2.6/arch/ia64/kernel/perfmon.c
@@ -2237,25 +2237,6 @@ pfm_free_fd(int fd, struct file *file)
 	put_unused_fd(fd);
 }
 
-static int
-pfm_remap_buffer(struct vm_area_struct *vma, unsigned long buf, unsigned long addr, unsigned long size)
-{
-	DPRINT(("CPU%d buf=0x%lx addr=0x%lx size=%ld\n", smp_processor_id(), buf, addr, size));
-
-	while (size > 0) {
-		unsigned long pfn = ia64_tpa(buf) >> PAGE_SHIFT;
-
-
-		if (remap_pfn_range(vma, addr, pfn, PAGE_SIZE, PAGE_READONLY))
-			return -ENOMEM;
-
-		addr  += PAGE_SIZE;
-		buf   += PAGE_SIZE;
-		size  -= PAGE_SIZE;
-	}
-	return 0;
-}
-
 /*
  * allocate a sampling buffer and remaps it into the user address space of the task
  */
@@ -2343,7 +2324,7 @@ pfm_smpl_buffer_alloc(struct task_struct
 	DPRINT(("aligned size=%ld, hdr=%p mapped @0x%lx\n", size, ctx->ctx_smpl_hdr, vma->vm_start));
 
 	/* can only be applied to current task, need to have the mm semaphore held when called */
-	if (pfm_remap_buffer(vma, (unsigned long)smpl_buf, vma->vm_start, size)) {
+	if (remap_vmalloc_range(vma, smpl_buf, 0)) {
 		DPRINT(("Can't remap buffer\n"));
 		up_write(&task->mm->mmap_sem);
 		goto error;
Index: linux-2.6/include/linux/vmalloc.h
===================================================================
--- linux-2.6.orig/include/linux/vmalloc.h
+++ linux-2.6/include/linux/vmalloc.h
@@ -45,6 +45,9 @@ extern void vfree(void *addr);
 extern void *vmap(struct page **pages, unsigned int count,
 			unsigned long flags, pgprot_t prot);
 extern void vunmap(void *addr);
+
+extern int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
+							unsigned long pgoff);
  
 /*
  *	Lowlevel-APIs (not for driver use!)
Index: linux-2.6/mm/vmalloc.c
===================================================================
--- linux-2.6.orig/mm/vmalloc.c
+++ linux-2.6/mm/vmalloc.c
@@ -256,6 +256,20 @@ struct vm_struct *get_vm_area_node(unsig
 	return __get_vm_area_node(size, flags, VMALLOC_START, VMALLOC_END, node);
 }
 
+static struct vm_struct *find_vm_area(void *addr)
+{
+	struct vm_struct *tmp;
+
+	write_lock(&vmlist_lock);
+	for (tmp = vmlist; tmp != NULL; tmp = tmp->next) {
+		 if (tmp->addr == addr)
+			break;
+	}
+	write_unlock(&vmlist_lock);
+
+	return tmp;
+}
+
 /* Caller must hold vmlist_lock */
 struct vm_struct *__remove_vm_area(void *addr)
 {
@@ -630,3 +644,55 @@ finished:
 	read_unlock(&vmlist_lock);
 	return buf - buf_start;
 }
+
+/**
+ *	remap_vmalloc_range  -  map vmalloc pages to userspace
+ *
+ *	@vma:		vma to cover (map full range of vma)
+ *	@addr:		vmalloc memory
+ *	@pgoff:		number of pages into addr before first page to map
+ *	@returns:	0 for success, -Exxx on failure
+ *
+ *	This function checks that addr is a valid vmalloc'ed area, and
+ *	that it is big enough to cover the vma. Will return failure if
+ *	that criteria isn't met.
+ *
+ *	Similar to remap_pfn_range (see mm/memory.c)
+ */
+int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
+						unsigned long pgoff)
+{
+	struct vm_struct *area;
+	unsigned long uaddr = vma->vm_start;
+	unsigned long usize = vma->vm_end - vma->vm_start;
+	int ret;
+
+	if ((PAGE_SIZE-1) & (unsigned long)addr)
+		return -EINVAL;
+
+	area = find_vm_area(addr);
+	if (!area)
+		return -EINVAL;
+
+	if (usize + (pgoff << PAGE_SHIFT) > area->size - PAGE_SIZE)
+		return -EINVAL;
+
+	addr = (void *)((unsigned long)addr + (pgoff << PAGE_SHIFT));
+	do {
+		struct page *page = vmalloc_to_page(addr);
+		ret = vm_insert_page(vma, uaddr, page);
+		if (ret)
+			return ret;
+
+		uaddr += PAGE_SIZE;
+		addr = (void *)((unsigned long)addr+PAGE_SIZE);
+		usize -= PAGE_SIZE;
+	} while (usize > 0);
+
+	/* Prevent "things" like memory migration? VM_flags need a cleanup... */
+	vma->vm_flags |= VM_RESERVED;
+
+	return ret;
+}
+EXPORT_SYMBOL(remap_vmalloc_range);
+
Index: linux-2.6/drivers/media/video/em28xx/em28xx-video.c
===================================================================
--- linux-2.6.orig/drivers/media/video/em28xx/em28xx-video.c
+++ linux-2.6/drivers/media/video/em28xx/em28xx-video.c
@@ -34,6 +34,7 @@
 #include <linux/version.h>
 #include <linux/video_decoder.h>
 #include <linux/mutex.h>
+#include <linux/vmalloc.h>
 
 #include "em28xx.h"
 #include <media/tuner.h>
@@ -582,9 +583,7 @@ static struct vm_operations_struct em28x
  */
 static int em28xx_v4l2_mmap(struct file *filp, struct vm_area_struct *vma)
 {
-	unsigned long size = vma->vm_end - vma->vm_start,
-	    start = vma->vm_start;
-	void *pos;
+	unsigned long size = vma->vm_end - vma->vm_start;
 	u32 i;
 
 	struct em28xx *dev = filp->private_data;
@@ -625,16 +624,10 @@ static int em28xx_v4l2_mmap(struct file 
 	vma->vm_flags |= VM_IO;
 	vma->vm_flags |= VM_RESERVED;	/* avoid to swap out this VMA */
 
-	pos = dev->frame[i].bufmem;
-	while (size > 0) {	/* size is page-aligned */
-		if (vm_insert_page(vma, start, vmalloc_to_page(pos))) {
-			em28xx_videodbg("mmap: vm_insert_page failed\n");
-			mutex_unlock(&dev->fileop_lock);
-			return -EAGAIN;
-		}
-		start += PAGE_SIZE;
-		pos += PAGE_SIZE;
-		size -= PAGE_SIZE;
+	if (remap_vmalloc_range(vma, dev->frame[i].bufmem, 0)) {
+		em28xx_videodbg("mmap: remap_vmalloc_range failed\n");
+		mutex_unlock(&dev->fileop_lock);
+		return -EAGAIN;
 	}
 
 	vma->vm_ops = &em28xx_vm_ops;
Index: linux-2.6/drivers/media/video/et61x251/et61x251_core.c
===================================================================
--- linux-2.6.orig/drivers/media/video/et61x251/et61x251_core.c
+++ linux-2.6/drivers/media/video/et61x251/et61x251_core.c
@@ -1464,9 +1464,7 @@ static struct vm_operations_struct et61x
 static int et61x251_mmap(struct file* filp, struct vm_area_struct *vma)
 {
 	struct et61x251_device* cam = video_get_drvdata(video_devdata(filp));
-	unsigned long size = vma->vm_end - vma->vm_start,
-		      start = vma->vm_start;
-	void *pos;
+	unsigned long size = vma->vm_end - vma->vm_start;
 	u32 i;
 
 	if (mutex_lock_interruptible(&cam->fileop_mutex))
@@ -1503,15 +1501,9 @@ static int et61x251_mmap(struct file* fi
 	vma->vm_flags |= VM_IO;
 	vma->vm_flags |= VM_RESERVED;
 
-	pos = cam->frame[i].bufmem;
-	while (size > 0) { /* size is page-aligned */
-		if (vm_insert_page(vma, start, vmalloc_to_page(pos))) {
-			mutex_unlock(&cam->fileop_mutex);
-			return -EAGAIN;
-		}
-		start += PAGE_SIZE;
-		pos += PAGE_SIZE;
-		size -= PAGE_SIZE;
+	if (remap_vmalloc_range(vma, cam->frame[i].bufmem, 0)) {
+		mutex_unlock(&cam->fileop_mutex);
+		return -EAGAIN;
 	}
 
 	vma->vm_ops = &et61x251_vm_ops;
Index: linux-2.6/drivers/media/video/sn9c102/sn9c102_core.c
===================================================================
--- linux-2.6.orig/drivers/media/video/sn9c102/sn9c102_core.c
+++ linux-2.6/drivers/media/video/sn9c102/sn9c102_core.c
@@ -1728,9 +1728,7 @@ static struct vm_operations_struct sn9c1
 static int sn9c102_mmap(struct file* filp, struct vm_area_struct *vma)
 {
 	struct sn9c102_device* cam = video_get_drvdata(video_devdata(filp));
-	unsigned long size = vma->vm_end - vma->vm_start,
-		      start = vma->vm_start;
-	void *pos;
+	unsigned long size = vma->vm_end - vma->vm_start;
 	u32 i;
 
 	if (mutex_lock_interruptible(&cam->fileop_mutex))
@@ -1767,15 +1765,9 @@ static int sn9c102_mmap(struct file* fil
 	vma->vm_flags |= VM_IO;
 	vma->vm_flags |= VM_RESERVED;
 
-	pos = cam->frame[i].bufmem;
-	while (size > 0) { /* size is page-aligned */
-		if (vm_insert_page(vma, start, vmalloc_to_page(pos))) {
-			mutex_unlock(&cam->fileop_mutex);
-			return -EAGAIN;
-		}
-		start += PAGE_SIZE;
-		pos += PAGE_SIZE;
-		size -= PAGE_SIZE;
+	if (remap_vmalloc_range(vma, cam->frame[i].bufmem, 0)) {
+		mutex_unlock(&cam->fileop_mutex);
+		return -EAGAIN;
 	}
 
 	vma->vm_ops = &sn9c102_vm_ops;
Index: linux-2.6/drivers/media/video/zc0301/zc0301_core.c
===================================================================
--- linux-2.6.orig/drivers/media/video/zc0301/zc0301_core.c
+++ linux-2.6/drivers/media/video/zc0301/zc0301_core.c
@@ -929,9 +929,7 @@ static struct vm_operations_struct zc030
 static int zc0301_mmap(struct file* filp, struct vm_area_struct *vma)
 {
 	struct zc0301_device* cam = video_get_drvdata(video_devdata(filp));
-	unsigned long size = vma->vm_end - vma->vm_start,
-		      start = vma->vm_start;
-	void *pos;
+	unsigned long size = vma->vm_end - vma->vm_start;
 	u32 i;
 
 	if (mutex_lock_interruptible(&cam->fileop_mutex))
@@ -968,15 +966,9 @@ static int zc0301_mmap(struct file* filp
 	vma->vm_flags |= VM_IO;
 	vma->vm_flags |= VM_RESERVED;
 
-	pos = cam->frame[i].bufmem;
-	while (size > 0) { /* size is page-aligned */
-		if (vm_insert_page(vma, start, vmalloc_to_page(pos))) {
-			mutex_unlock(&cam->fileop_mutex);
-			return -EAGAIN;
-		}
-		start += PAGE_SIZE;
-		pos += PAGE_SIZE;
-		size -= PAGE_SIZE;
+	if (remap_vmalloc_range(vma, cam->frame[i].bufmem, 0)) {
+		mutex_unlock(&cam->fileop_mutex);
+		return -EAGAIN;
 	}
 
 	vma->vm_ops = &zc0301_vm_ops;

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply

* [patch 2/5] mm: deprecate vmalloc_to_pfn
From: Nick Piggin @ 2006-04-20 17:06 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Linux Kernel, Nick Piggin, Linux Memory Management, Hugh Dickins
In-Reply-To: <20060228202202.14172.60409.sendpatchset@linux.site>

Deprecate vmalloc_to_pfn.

Signed-off-by: Nick Piggin <npiggin@suse.de>

Index: linux-2.6/Documentation/feature-removal-schedule.txt
===================================================================
--- linux-2.6.orig/Documentation/feature-removal-schedule.txt
+++ linux-2.6/Documentation/feature-removal-schedule.txt
@@ -238,3 +238,12 @@ Why:	The interface no longer has any cal
 Who:	Nick Piggin <npiggin@suse.de>
 
 ---------------------------
+
+What:	vmalloc_to_pfn
+When:	April 2007
+Why:	The interface no longer has any callers left in the kernel. It
+	was previously used so remap_pfn_range can be used on vmalloc memory,
+	but is deprecated with the introduction of remap_vmalloc_range.
+Who:	Nick Piggin <npiggin@suse.de>
+
+---------------------------
Index: linux-2.6/include/linux/mm.h
===================================================================
--- linux-2.6.orig/include/linux/mm.h
+++ linux-2.6/include/linux/mm.h
@@ -1002,7 +1002,7 @@ static inline unsigned long vma_pages(st
 
 struct vm_area_struct *find_extend_vma(struct mm_struct *, unsigned long addr);
 struct page *vmalloc_to_page(void *addr);
-unsigned long vmalloc_to_pfn(void *addr);
+__deprecated_for_modules unsigned long vmalloc_to_pfn(void *addr);
 int remap_pfn_range(struct vm_area_struct *, unsigned long addr,
 			unsigned long pfn, unsigned long size, pgprot_t);
 int vm_insert_page(struct vm_area_struct *, unsigned long addr, struct page *);

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply

* [patch 3/5] mm: remove rvmalloc
From: Nick Piggin @ 2006-04-20 17:06 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Linux Kernel, Nick Piggin, Linux Memory Management, Hugh Dickins
In-Reply-To: <20060228202202.14172.60409.sendpatchset@linux.site>

Some distros will be using 2.6.16ish kernels, which gives us a good
chance to start getting rid of the SetPageReserved which has stuck
around until now for its ability to catch paper bag type reference
counting bugs.

Signed-off-by: Nick Piggin <npiggin@suse.de>

Index: linux-2.6/drivers/ieee1394/dv1394-private.h
===================================================================
--- linux-2.6.orig/drivers/ieee1394/dv1394-private.h
+++ linux-2.6/drivers/ieee1394/dv1394-private.h
@@ -478,7 +478,7 @@ struct video_card {
 	/* support asynchronous I/O signals (SIGIO) */
 	struct fasync_struct *fasync;
 
-	/* the large, non-contiguous (rvmalloc()) ringbuffer for DV
+	/* the large, non-contiguous (vmalloc()) ringbuffer for DV
            data, exposed to user-space via mmap() */
 	unsigned long      dv_buf_size;
 	struct dma_region  dv_buf;
Index: linux-2.6/drivers/media/video/cpia.c
===================================================================
--- linux-2.6.orig/drivers/media/video/cpia.c
+++ linux-2.6/drivers/media/video/cpia.c
@@ -212,48 +212,6 @@ static void set_flicker(struct cam_param
 
 /**********************************************************************
  *
- * Memory management
- *
- **********************************************************************/
-static void *rvmalloc(unsigned long size)
-{
-	void *mem;
-	unsigned long adr;
-
-	size = PAGE_ALIGN(size);
-	mem = vmalloc_32(size);
-	if (!mem)
-		return NULL;
-
-	memset(mem, 0, size); /* Clear the ram out, no junk to the user */
-	adr = (unsigned long) mem;
-	while (size > 0) {
-		SetPageReserved(vmalloc_to_page((void *)adr));
-		adr += PAGE_SIZE;
-		size -= PAGE_SIZE;
-	}
-
-	return mem;
-}
-
-static void rvfree(void *mem, unsigned long size)
-{
-	unsigned long adr;
-
-	if (!mem)
-		return;
-
-	adr = (unsigned long) mem;
-	while ((long) size > 0) {
-		ClearPageReserved(vmalloc_to_page((void *)adr));
-		adr += PAGE_SIZE;
-		size -= PAGE_SIZE;
-	}
-	vfree(mem);
-}
-
-/**********************************************************************
- *
  * /proc interface
  *
  **********************************************************************/
@@ -1610,7 +1568,7 @@ static int allocate_frame_buf(struct cam
 {
 	int i;
 
-	cam->frame_buf = rvmalloc(FRAME_NUM * CPIA_MAX_FRAME_SIZE);
+	cam->frame_buf = vmalloc_32_user(FRAME_NUM * CPIA_MAX_FRAME_SIZE);
 	if (!cam->frame_buf)
 		return -ENOBUFS;
 
@@ -1624,7 +1582,7 @@ static int free_frame_buf(struct cam_dat
 {
 	int i;
 
-	rvfree(cam->frame_buf, FRAME_NUM*CPIA_MAX_FRAME_SIZE);
+	vfree(cam->frame_buf);
 	cam->frame_buf = NULL;
 	for (i=0; i < FRAME_NUM; i++)
 		cam->frame[i].data = NULL;
@@ -3188,13 +3146,13 @@ static int cpia_open(struct inode *inode
 	mutex_lock(&cam->busy_lock);
 	err = -ENOMEM;
 	if (!cam->raw_image) {
-		cam->raw_image = rvmalloc(CPIA_MAX_IMAGE_SIZE);
+		cam->raw_image = vmalloc_32_user(CPIA_MAX_IMAGE_SIZE);
 		if (!cam->raw_image)
 			goto oops;
 	}
 
 	if (!cam->decompressed_frame.data) {
-		cam->decompressed_frame.data = rvmalloc(CPIA_MAX_FRAME_SIZE);
+		cam->decompressed_frame.data = vmalloc_32_user(CPIA_MAX_FRAME_SIZE);
 		if (!cam->decompressed_frame.data)
 			goto oops;
 	}
@@ -3231,11 +3189,11 @@ static int cpia_open(struct inode *inode
 
  oops:
 	if (cam->decompressed_frame.data) {
-		rvfree(cam->decompressed_frame.data, CPIA_MAX_FRAME_SIZE);
+		vfree(cam->decompressed_frame.data);
 		cam->decompressed_frame.data = NULL;
 	}
 	if (cam->raw_image) {
-		rvfree(cam->raw_image, CPIA_MAX_IMAGE_SIZE);
+		vfree(cam->raw_image);
 		cam->raw_image = NULL;
 	}
 	mutex_unlock(&cam->busy_lock);
@@ -3274,12 +3232,12 @@ static int cpia_close(struct inode *inod
 	if (--cam->open_count == 0) {
 		/* clean up capture-buffers */
 		if (cam->raw_image) {
-			rvfree(cam->raw_image, CPIA_MAX_IMAGE_SIZE);
+			vfree(cam->raw_image);
 			cam->raw_image = NULL;
 		}
 
 		if (cam->decompressed_frame.data) {
-			rvfree(cam->decompressed_frame.data, CPIA_MAX_FRAME_SIZE);
+			vfree(cam->decompressed_frame.data);
 			cam->decompressed_frame.data = NULL;
 		}
 
Index: linux-2.6/drivers/media/video/cpia2/cpia2_core.c
===================================================================
--- linux-2.6.orig/drivers/media/video/cpia2/cpia2_core.c
+++ linux-2.6/drivers/media/video/cpia2/cpia2_core.c
@@ -86,47 +86,6 @@ static inline unsigned long kvirt_to_pa(
 	return ret;
 }
 
-static void *rvmalloc(unsigned long size)
-{
-	void *mem;
-	unsigned long adr;
-
-	/* Round it off to PAGE_SIZE */
-	size = PAGE_ALIGN(size);
-
-	mem = vmalloc_32(size);
-	if (!mem)
-		return NULL;
-
-	memset(mem, 0, size);	/* Clear the ram out, no junk to the user */
-	adr = (unsigned long) mem;
-
-	while ((long)size > 0) {
-		SetPageReserved(vmalloc_to_page((void *)adr));
-		adr += PAGE_SIZE;
-		size -= PAGE_SIZE;
-	}
-	return mem;
-}
-
-static void rvfree(void *mem, unsigned long size)
-{
-	unsigned long adr;
-
-	if (!mem)
-		return;
-
-	size = PAGE_ALIGN(size);
-
-	adr = (unsigned long) mem;
-	while ((long)size > 0) {
-		ClearPageReserved(vmalloc_to_page((void *)adr));
-		adr += PAGE_SIZE;
-		size -= PAGE_SIZE;
-	}
-	vfree(mem);
-}
-
 /******************************************************************************
  *
  *  cpia2_do_command
@@ -2300,7 +2259,8 @@ int cpia2_allocate_buffers(struct camera
 	}
 
 	if(!cam->frame_buffer) {
-		cam->frame_buffer = rvmalloc(cam->frame_size*cam->num_frames);
+		cam->frame_buffer = vmalloc_32_user(cam->frame_size *
+							cam->num_frames);
 		if (!cam->frame_buffer) {
 			ERR("couldn't vmalloc frame buffer data area\n");
 			kfree(cam->buffers);
@@ -2342,7 +2302,7 @@ void cpia2_free_buffers(struct camera_da
 		cam->buffers = NULL;
 	}
 	if(cam->frame_buffer) {
-		rvfree(cam->frame_buffer, cam->frame_size*cam->num_frames);
+		vfree(cam->frame_buffer);
 		cam->frame_buffer = NULL;
 	}
 }
Index: linux-2.6/drivers/media/video/meye.c
===================================================================
--- linux-2.6.orig/drivers/media/video/meye.c
+++ linux-2.6/drivers/media/video/meye.c
@@ -71,43 +71,6 @@ MODULE_PARM_DESC(video_nr, "video device
 /* driver structure - only one possible */
 static struct meye meye;
 
-/****************************************************************************/
-/* Memory allocation routines (stolen from bttv-driver.c)                   */
-/****************************************************************************/
-static void *rvmalloc(unsigned long size)
-{
-	void *mem;
-	unsigned long adr;
-
-	size = PAGE_ALIGN(size);
-	mem = vmalloc_32(size);
-	if (mem) {
-		memset(mem, 0, size);
-		adr = (unsigned long) mem;
-		while (size > 0) {
-			SetPageReserved(vmalloc_to_page((void *)adr));
-			adr += PAGE_SIZE;
-			size -= PAGE_SIZE;
-		}
-	}
-	return mem;
-}
-
-static void rvfree(void * mem, unsigned long size)
-{
-	unsigned long adr;
-
-	if (mem) {
-		adr = (unsigned long) mem;
-		while ((long) size > 0) {
-			ClearPageReserved(vmalloc_to_page((void *)adr));
-			adr += PAGE_SIZE;
-			size -= PAGE_SIZE;
-		}
-		vfree(mem);
-	}
-}
-
 /*
  * return a page table pointing to N pages of locked memory
  *
@@ -1516,12 +1479,12 @@ static int meye_do_ioctl(struct inode *i
 					mutex_unlock(&meye.lock);
 					return -EINVAL;
 				}
-			rvfree(meye.grab_fbuffer, gbuffers * gbufsize);
+			vfree(meye.grab_fbuffer);
 			meye.grab_fbuffer = NULL;
 		}
 		gbuffers = max(2, min((int)req->count, MEYE_MAX_BUFNBRS));
 		req->count = gbuffers;
-		meye.grab_fbuffer = rvmalloc(gbuffers * gbufsize);
+		meye.grab_fbuffer = vmalloc_32_user(gbuffers * gbufsize);
 		if (!meye.grab_fbuffer) {
 			printk(KERN_ERR "meye: v4l framebuffer allocation"
 					" failed\n");
@@ -1710,7 +1673,7 @@ static int meye_mmap(struct file *file, 
 		int i;
 
 		/* lazy allocation */
-		meye.grab_fbuffer = rvmalloc(gbuffers*gbufsize);
+		meye.grab_fbuffer = vmalloc_32_user(gbuffers*gbufsize);
 		if (!meye.grab_fbuffer) {
 			printk(KERN_ERR "meye: v4l framebuffer allocation failed\n");
 			mutex_unlock(&meye.lock);
@@ -1982,7 +1945,7 @@ static void __devexit meye_remove(struct
 	vfree(meye.grab_temp);
 
 	if (meye.grab_fbuffer) {
-		rvfree(meye.grab_fbuffer, gbuffers*gbufsize);
+		vfree(meye.grab_fbuffer);
 		meye.grab_fbuffer = NULL;
 	}
 
Index: linux-2.6/drivers/media/video/ov511.c
===================================================================
--- linux-2.6.orig/drivers/media/video/ov511.c
+++ linux-2.6/drivers/media/video/ov511.c
@@ -11,7 +11,6 @@
  * Original SAA7111A code by Dave Perks <dperks@ibm.net>
  * URB error messages from pwc driver by Nemosoft
  * generic_ioctl() code from videodev.c by Gerd Knorr and Alan Cox
- * Memory management (rvmalloc) code from bttv driver, by Gerd Knorr and others
  *
  * Based on the Linux CPiA driver written by Peter Pregler,
  * Scott J. Bertin and Johannes Erdfelt.
@@ -310,48 +309,6 @@ static struct symbolic_list urb_errlist[
 };
 
 /**********************************************************************
- * Memory management
- **********************************************************************/
-static void *
-rvmalloc(unsigned long size)
-{
-	void *mem;
-	unsigned long adr;
-
-	size = PAGE_ALIGN(size);
-	mem = vmalloc_32(size);
-	if (!mem)
-		return NULL;
-
-	memset(mem, 0, size); /* Clear the ram out, no junk to the user */
-	adr = (unsigned long) mem;
-	while (size > 0) {
-		SetPageReserved(vmalloc_to_page((void *)adr));
-		adr += PAGE_SIZE;
-		size -= PAGE_SIZE;
-	}
-
-	return mem;
-}
-
-static void
-rvfree(void *mem, unsigned long size)
-{
-	unsigned long adr;
-
-	if (!mem)
-		return;
-
-	adr = (unsigned long) mem;
-	while ((long) size > 0) {
-		ClearPageReserved(vmalloc_to_page((void *)adr));
-		adr += PAGE_SIZE;
-		size -= PAGE_SIZE;
-	}
-	vfree(mem);
-}
-
-/**********************************************************************
  *
  * Register I/O
  *
@@ -3791,8 +3748,7 @@ ov51x_do_dealloc(struct usb_ov511 *ov)
 	PDEBUG(4, "entered");
 
 	if (ov->fbuf) {
-		rvfree(ov->fbuf, OV511_NUMFRAMES
-		       * MAX_DATA_SIZE(ov->maxwidth, ov->maxheight));
+		vfree(ov->fbuf);
 		ov->fbuf = NULL;
 	}
 
@@ -3837,7 +3793,7 @@ ov51x_alloc(struct usb_ov511 *ov)
 	if (ov->buf_state == BUF_ALLOCATED)
 		goto out;
 
-	ov->fbuf = rvmalloc(data_bufsize);
+	ov->fbuf = vmalloc_32_user(data_bufsize);
 	if (!ov->fbuf)
 		goto error;
 
Index: linux-2.6/drivers/media/video/pwc/pwc-if.c
===================================================================
--- linux-2.6.orig/drivers/media/video/pwc/pwc-if.c
+++ linux-2.6/drivers/media/video/pwc/pwc-if.c
@@ -213,47 +213,6 @@ static inline unsigned long kvirt_to_pa(
 	return ret;
 }
 
-static void * rvmalloc(unsigned long size)
-{
-	void * mem;
-	unsigned long adr;
-
-	size=PAGE_ALIGN(size);
-	mem=vmalloc_32(size);
-	if (mem)
-	{
-		memset(mem, 0, size); /* Clear the ram out, no junk to the user */
-		adr=(unsigned long) mem;
-		while (size > 0)
-		{
-			SetPageReserved(vmalloc_to_page((void *)adr));
-			adr+=PAGE_SIZE;
-			size-=PAGE_SIZE;
-		}
-	}
-	return mem;
-}
-
-static void rvfree(void * mem, unsigned long size)
-{
-	unsigned long adr;
-
-	if (mem)
-	{
-		adr=(unsigned long) mem;
-		while ((long) size > 0)
-		{
-			ClearPageReserved(vmalloc_to_page((void *)adr));
-			adr+=PAGE_SIZE;
-			size-=PAGE_SIZE;
-		}
-		vfree(mem);
-	}
-}
-
-
-
-
 static int pwc_allocate_buffers(struct pwc_device *pdev)
 {
 	int i;
@@ -335,7 +294,7 @@ static int pwc_allocate_buffers(struct p
 	pdev->decompress_data = kbuf;
 
 	/* Allocate image buffer; double buffer for mmap() */
-	kbuf = rvmalloc(default_mbufs * pdev->len_per_image);
+	kbuf = vmalloc_32_user(default_mbufs * pdev->len_per_image);
 	if (kbuf == NULL) {
 		Err("Failed to allocate image buffer(s). needed (%d)\n",default_mbufs * pdev->len_per_image);
 		return -ENOMEM;
@@ -400,7 +359,7 @@ static void pwc_free_buffers(struct pwc_
 	/* Release image buffers */
 	if (pdev->image_data != NULL) {
 		Trace(TRACE_MEMORY, "Freeing image buffer at %p.\n", pdev->image_data);
-		rvfree(pdev->image_data, default_mbufs * pdev->len_per_image);
+		vfree(pdev->image_data);
 	}
 	pdev->image_data = NULL;
 
Index: linux-2.6/drivers/media/video/se401.c
===================================================================
--- linux-2.6.orig/drivers/media/video/se401.c
+++ linux-2.6/drivers/media/video/se401.c
@@ -60,50 +60,6 @@ module_param(video_nr, int, 0);
 static struct usb_driver se401_driver;
 
 
-/**********************************************************************
- *
- * Memory management
- *
- **********************************************************************/
-static void *rvmalloc(unsigned long size)
-{
-	void *mem;
-	unsigned long adr;
-
-	size = PAGE_ALIGN(size);
-	mem = vmalloc_32(size);
-	if (!mem)
-		return NULL;
-
-	memset(mem, 0, size); /* Clear the ram out, no junk to the user */
-	adr = (unsigned long) mem;
-	while (size > 0) {
-		SetPageReserved(vmalloc_to_page((void *)adr));
-		adr += PAGE_SIZE;
-		size -= PAGE_SIZE;
-	}
-
-	return mem;
-}
-
-static void rvfree(void *mem, unsigned long size)
-{
-	unsigned long adr;
-
-	if (!mem)
-		return;
-
-	adr = (unsigned long) mem;
-	while ((long) size > 0) {
-		ClearPageReserved(vmalloc_to_page((void *)adr));
-		adr += PAGE_SIZE;
-		size -= PAGE_SIZE;
-	}
-	vfree(mem);
-}
-
-
-
 /****************************************************************************
  *
  * se401 register read/write functions
@@ -907,7 +863,7 @@ static int se401_open(struct inode *inod
 
 	if (se401->user)
 		return -EBUSY;
-	se401->fbuf = rvmalloc(se401->maxframesize * SE401_NUMFRAMES);
+	se401->fbuf = vmalloc_32_user(se401->maxframesize * SE401_NUMFRAMES);
 	if (se401->fbuf)
 		file->private_data = dev;
 	else
@@ -923,7 +879,7 @@ static int se401_close(struct inode *ino
 	struct usb_se401 *se401 = (struct usb_se401 *)dev;
 	int i;
 
-	rvfree(se401->fbuf, se401->maxframesize * SE401_NUMFRAMES);
+	vfree(se401->fbuf);
 	if (se401->removed) {
 		usb_se401_remove_disconnected(se401);
 		info("device unregistered");
Index: linux-2.6/drivers/media/video/stv680.c
===================================================================
--- linux-2.6.orig/drivers/media/video/stv680.c
+++ linux-2.6/drivers/media/video/stv680.c
@@ -100,62 +100,6 @@ module_param(swapRGB_on, int, 0);
 MODULE_PARM_DESC (swapRGB_on, "Red/blue swap: 1=always, 0=auto, -1=never");
 module_param(video_nr, int, 0);
 
-/********************************************************************
- *
- * Memory management
- *
- * This is a shameless copy from the USB-cpia driver (linux kernel
- * version 2.3.29 or so, I have no idea what this code actually does ;).
- * Actually it seems to be a copy of a shameless copy of the bttv-driver.
- * Or that is a copy of a shameless copy of ... (To the powers: is there
- * no generic kernel-function to do this sort of stuff?)
- *
- * Yes, it was a shameless copy from the bttv-driver. IIRC, Alan says
- * there will be one, but apparentely not yet -jerdfelt
- *
- * So I copied it again for the ov511 driver -claudio
- *
- * Same for the se401 driver -Jeroen
- *
- * And the STV0680 driver - Kevin
- ********************************************************************/
-static void *rvmalloc (unsigned long size)
-{
-	void *mem;
-	unsigned long adr;
-
-	size = PAGE_ALIGN(size);
-	mem = vmalloc_32 (size);
-	if (!mem)
-		return NULL;
-
-	memset (mem, 0, size);	/* Clear the ram out, no junk to the user */
-	adr = (unsigned long) mem;
-	while (size > 0) {
-		SetPageReserved(vmalloc_to_page((void *)adr));
-		adr += PAGE_SIZE;
-		size -= PAGE_SIZE;
-	}
-	return mem;
-}
-
-static void rvfree (void *mem, unsigned long size)
-{
-	unsigned long adr;
-
-	if (!mem)
-		return;
-
-	adr = (unsigned long) mem;
-	while ((long) size > 0) {
-		ClearPageReserved(vmalloc_to_page((void *)adr));
-		adr += PAGE_SIZE;
-		size -= PAGE_SIZE;
-	}
-	vfree (mem);
-}
-
-
 /*********************************************************************
  * pencam read/write functions
  ********************************************************************/
@@ -1037,9 +981,10 @@ static int stv_open (struct inode *inode
 	err = stv_init (stv680);	/* main initialization routine for camera */
 
 	if (err >= 0) {
-		stv680->fbuf = rvmalloc (stv680->maxframesize * STV680_NUMFRAMES);
+		stv680->fbuf = vmalloc_32_user(stv680->maxframesize *
+							STV680_NUMFRAMES);
 		if (!stv680->fbuf) {
-			PDEBUG (0, "STV(e): Could not rvmalloc frame bufer");
+			PDEBUG (0, "STV(e): Could not vmalloc frame bufer");
 			err = -ENOMEM;
 		}
 		file->private_data = dev;
@@ -1064,7 +1009,7 @@ static int stv_close (struct inode *inod
 	if ((i = stv_stop_video (stv680)) < 0)
 		PDEBUG (1, "STV(e): stop_video failed in stv_close");
 
-	rvfree (stv680->fbuf, stv680->maxframesize * STV680_NUMFRAMES);
+	vfree(stv680->fbuf);
 	stv680->user = 0;
 
 	if (stv680->removed) {
Index: linux-2.6/drivers/media/video/usbvideo/usbvideo.c
===================================================================
--- linux-2.6.orig/drivers/media/video/usbvideo/usbvideo.c
+++ linux-2.6/drivers/media/video/usbvideo/usbvideo.c
@@ -57,46 +57,6 @@ static int usbvideo_NewFrame(struct uvd 
 static void usbvideo_SoftwareContrastAdjustment(struct uvd *uvd,
 						struct usbvideo_frame *frame);
 
-/*******************************/
-/* Memory management functions */
-/*******************************/
-static void *usbvideo_rvmalloc(unsigned long size)
-{
-	void *mem;
-	unsigned long adr;
-
-	size = PAGE_ALIGN(size);
-	mem = vmalloc_32(size);
-	if (!mem)
-		return NULL;
-
-	memset(mem, 0, size); /* Clear the ram out, no junk to the user */
-	adr = (unsigned long) mem;
-	while (size > 0) {
-		SetPageReserved(vmalloc_to_page((void *)adr));
-		adr += PAGE_SIZE;
-		size -= PAGE_SIZE;
-	}
-
-	return mem;
-}
-
-static void usbvideo_rvfree(void *mem, unsigned long size)
-{
-	unsigned long adr;
-
-	if (!mem)
-		return;
-
-	adr = (unsigned long) mem;
-	while ((long) size > 0) {
-		ClearPageReserved(vmalloc_to_page((void *)adr));
-		adr += PAGE_SIZE;
-		size -= PAGE_SIZE;
-	}
-	vfree(mem);
-}
-
 static void RingQueue_Initialize(struct RingQueue *rq)
 {
 	assert(rq != NULL);
@@ -120,7 +80,7 @@ static void RingQueue_Allocate(struct Ri
 
 	rq->length = rqLen;
 	rq->ri = rq->wi = 0;
-	rq->queue = usbvideo_rvmalloc(rq->length);
+	rq->queue = vmalloc_32_user(rq->length);
 	assert(rq->queue != NULL);
 }
 
@@ -135,7 +95,7 @@ static void RingQueue_Free(struct RingQu
 {
 	assert(rq != NULL);
 	if (RingQueue_IsAllocated(rq)) {
-		usbvideo_rvfree(rq->queue, rq->length);
+		vfree(rq->queue);
 		rq->queue = NULL;
 		rq->length = 0;
 	}
@@ -1122,7 +1082,7 @@ static int usbvideo_v4l_open(struct inod
 
 		/* Allocate memory for the frame buffers */
 		uvd->fbuf_size = USBVIDEO_NUMFRAMES * uvd->max_frame_size;
-		uvd->fbuf = usbvideo_rvmalloc(uvd->fbuf_size);
+		uvd->fbuf = vmalloc_32_user(uvd->fbuf_size);
 		RingQueue_Allocate(&uvd->dp, RING_QUEUE_SIZE);
 		if ((uvd->fbuf == NULL) ||
 		    (!RingQueue_IsAllocated(&uvd->dp))) {
@@ -1151,7 +1111,7 @@ static int usbvideo_v4l_open(struct inod
 		if (errCode != 0) {
 			/* Have to free all that memory */
 			if (uvd->fbuf != NULL) {
-				usbvideo_rvfree(uvd->fbuf, uvd->fbuf_size);
+				vfree(uvd->fbuf);
 				uvd->fbuf = NULL;
 			}
 			RingQueue_Free(&uvd->dp);
@@ -1219,7 +1179,7 @@ static int usbvideo_v4l_close(struct ino
 
 	mutex_lock(&uvd->lock);
 	GET_CALLBACK(uvd, stopDataPump)(uvd);
-	usbvideo_rvfree(uvd->fbuf, uvd->fbuf_size);
+	vfree(uvd->fbuf);
 	uvd->fbuf = NULL;
 	RingQueue_Free(&uvd->dp);
 
Index: linux-2.6/drivers/media/video/usbvideo/vicam.c
===================================================================
--- linux-2.6.orig/drivers/media/video/usbvideo/vicam.c
+++ linux-2.6/drivers/media/video/usbvideo/vicam.c
@@ -352,50 +352,6 @@ static unsigned char setup5[] = {
 	0x46, 0x05, 0x6C, 0x05, 0x00, 0x00
 };
 
-/* rvmalloc / rvfree copied from usbvideo.c
- *
- * Not sure why these are not yet non-statics which I can reference through
- * usbvideo.h the same as it is in 2.4.20.  I bet this will get fixed sometime
- * in the future.
- *
-*/
-static void *rvmalloc(unsigned long size)
-{
-	void *mem;
-	unsigned long adr;
-
-	size = PAGE_ALIGN(size);
-	mem = vmalloc_32(size);
-	if (!mem)
-		return NULL;
-
-	memset(mem, 0, size); /* Clear the ram out, no junk to the user */
-	adr = (unsigned long) mem;
-	while (size > 0) {
-		SetPageReserved(vmalloc_to_page((void *)adr));
-		adr += PAGE_SIZE;
-		size -= PAGE_SIZE;
-	}
-
-	return mem;
-}
-
-static void rvfree(void *mem, unsigned long size)
-{
-	unsigned long adr;
-
-	if (!mem)
-		return;
-
-	adr = (unsigned long) mem;
-	while ((long) size > 0) {
-		ClearPageReserved(vmalloc_to_page((void *)adr));
-		adr += PAGE_SIZE;
-		size -= PAGE_SIZE;
-	}
-	vfree(mem);
-}
-
 struct vicam_camera {
 	u16 shutter_speed;	// capture shutter speed
 	u16 gain;		// capture gain
@@ -783,7 +739,7 @@ vicam_open(struct inode *inode, struct f
 		return -ENOMEM;
 	}
 
-	cam->framebuf = rvmalloc(VICAM_MAX_FRAME_SIZE * VICAM_FRAMES);
+	cam->framebuf = vmalloc_32_user(VICAM_MAX_FRAME_SIZE * VICAM_FRAMES);
 	if (!cam->framebuf) {
 		kfree(cam->raw_image);
 		return -ENOMEM;
@@ -792,7 +748,7 @@ vicam_open(struct inode *inode, struct f
 	cam->cntrlbuf = kmalloc(PAGE_SIZE, GFP_KERNEL);
 	if (!cam->cntrlbuf) {
 		kfree(cam->raw_image);
-		rvfree(cam->framebuf, VICAM_MAX_FRAME_SIZE * VICAM_FRAMES);
+		vfree(cam->framebuf);
 		return -ENOMEM;
 	}
 
@@ -830,7 +786,7 @@ vicam_close(struct inode *inode, struct 
 	set_camera_power(cam, 0);
 
 	kfree(cam->raw_image);
-	rvfree(cam->framebuf, VICAM_MAX_FRAME_SIZE * VICAM_FRAMES);
+	vfree(cam->framebuf);
 	kfree(cam->cntrlbuf);
 
 	mutex_lock(&cam->cam_lock);
Index: linux-2.6/drivers/media/video/w9968cf.c
===================================================================
--- linux-2.6.orig/drivers/media/video/w9968cf.c
+++ linux-2.6/drivers/media/video/w9968cf.c
@@ -449,8 +449,6 @@ static int w9968cf_i2c_control(struct i2
 			       unsigned long arg);
 
 /* Memory management */
-static void* rvmalloc(unsigned long size);
-static void rvfree(void *mem, unsigned long size);
 static void w9968cf_deallocate_memory(struct w9968cf_device*);
 static int  w9968cf_allocate_memory(struct w9968cf_device*);
 
@@ -593,50 +591,6 @@ static struct w9968cf_symbolic_list urb_
 	{ -1, NULL }
 };
 
-
-
-/****************************************************************************
- * Memory management functions                                              *
- ****************************************************************************/
-static void* rvmalloc(unsigned long size)
-{
-	void* mem;
-	unsigned long adr;
-
-	size = PAGE_ALIGN(size);
-	mem = vmalloc_32(size);
-	if (!mem)
-		return NULL;
-
-	memset(mem, 0, size); /* Clear the ram out, no junk to the user */
-	adr = (unsigned long) mem;
-	while (size > 0) {
-		SetPageReserved(vmalloc_to_page((void *)adr));
-		adr += PAGE_SIZE;
-		size -= PAGE_SIZE;
-	}
-
-	return mem;
-}
-
-
-static void rvfree(void* mem, unsigned long size)
-{
-	unsigned long adr;
-
-	if (!mem)
-		return;
-
-	adr = (unsigned long) mem;
-	while ((long) size > 0) {
-		ClearPageReserved(vmalloc_to_page((void *)adr));
-		adr += PAGE_SIZE;
-		size -= PAGE_SIZE;
-	}
-	vfree(mem);
-}
-
-
 /*--------------------------------------------------------------------------
   Deallocate previously allocated memory.
   --------------------------------------------------------------------------*/
@@ -652,19 +606,19 @@ static void w9968cf_deallocate_memory(st
 
 	/* Free temporary frame buffer */
 	if (cam->frame_tmp.buffer) {
-		rvfree(cam->frame_tmp.buffer, cam->frame_tmp.size);
+		vfree(cam->frame_tmp.buffer);
 		cam->frame_tmp.buffer = NULL;
 	}
 
 	/* Free helper buffer */
 	if (cam->frame_vpp.buffer) {
-		rvfree(cam->frame_vpp.buffer, cam->frame_vpp.size);
+		vfree(cam->frame_vpp.buffer);
 		cam->frame_vpp.buffer = NULL;
 	}
 
 	/* Free video frame buffers */
 	if (cam->frame[0].buffer) {
-		rvfree(cam->frame[0].buffer, cam->nbuffers*cam->frame[0].size);
+		vfree(cam->frame[0].buffer);
 		cam->frame[0].buffer = NULL;
 	}
 
@@ -711,7 +665,7 @@ static int w9968cf_allocate_memory(struc
 	}
 
 	/* Allocate memory for the temporary frame buffer */
-	if (!(cam->frame_tmp.buffer = rvmalloc(hw_bufsize))) {
+	if (!(cam->frame_tmp.buffer = vmalloc_32_user(hw_bufsize))) {
 		DBG(1, "Couldn't allocate memory for the temporary "
 		       "video frame buffer (%lu bytes)", hw_bufsize)
 		return -ENOMEM;
@@ -721,7 +675,7 @@ static int w9968cf_allocate_memory(struc
 
 	/* Allocate memory for the helper buffer */
 	if (w9968cf_vpp) {
-		if (!(cam->frame_vpp.buffer = rvmalloc(vpp_bufsize))) {
+		if (!(cam->frame_vpp.buffer = vmalloc_32_user(vpp_bufsize))) {
 			DBG(1, "Couldn't allocate memory for the helper buffer"
 			       " (%lu bytes)", vpp_bufsize)
 			return -ENOMEM;
@@ -733,7 +687,7 @@ static int w9968cf_allocate_memory(struc
 	/* Allocate memory for video frame buffers */
 	cam->nbuffers = cam->max_buffers;
 	while (cam->nbuffers >= 2) {
-		if ((buff = rvmalloc(cam->nbuffers * vpp_bufsize)))
+		if ((buff = vmalloc_32_user(cam->nbuffers * vpp_bufsize)))
 			break;
 		else
 			cam->nbuffers--;
Index: linux-2.6/include/linux/vmalloc.h
===================================================================
--- linux-2.6.orig/include/linux/vmalloc.h
+++ linux-2.6/include/linux/vmalloc.h
@@ -32,9 +32,11 @@ struct vm_struct {
  *	Highlevel APIs for driver use
  */
 extern void *vmalloc(unsigned long size);
+extern void *vmalloc_user(unsigned long size);
 extern void *vmalloc_node(unsigned long size, int node);
 extern void *vmalloc_exec(unsigned long size);
 extern void *vmalloc_32(unsigned long size);
+extern void *vmalloc_32_user(unsigned long size);
 extern void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot);
 extern void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask,
 				pgprot_t prot);
Index: linux-2.6/kernel/power/snapshot.c
===================================================================
--- linux-2.6.orig/kernel/power/snapshot.c
+++ linux-2.6/kernel/power/snapshot.c
@@ -89,10 +89,9 @@ static int save_highmem_zone(struct zone
 			continue;
 		page = pfn_to_page(pfn);
 		/*
-		 * This condition results from rvmalloc() sans vmalloc_32()
-		 * and architectural memory reservations. This should be
-		 * corrected eventually when the cases giving rise to this
-		 * are better understood.
+		 * This condition results from architectural memory
+		 * reservations. This should be corrected eventually when the
+		 * cases giving rise to this are better understood.
 		 */
 		if (PageReserved(page))
 			continue;
Index: linux-2.6/mm/vmalloc.c
===================================================================
--- linux-2.6.orig/mm/vmalloc.c
+++ linux-2.6/mm/vmalloc.c
@@ -512,11 +512,24 @@ EXPORT_SYMBOL(__vmalloc);
  */
 void *vmalloc(unsigned long size)
 {
-       return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL);
+	return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL);
 }
 EXPORT_SYMBOL(vmalloc);
 
 /**
+ *	vmalloc_user  -  allocate virtually contiguous memory which has
+ *			   been zeroed so it can be mapped to userspace without
+ *			   leaking data.
+ *
+ *	@size:		allocation size
+ */
+void *vmalloc_user(unsigned long size)
+{
+	return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, PAGE_KERNEL);
+}
+EXPORT_SYMBOL(vmalloc_user);
+
+/**
  *	vmalloc_node  -  allocate memory on a specific node
  *
  *	@size:		allocation size
@@ -530,7 +543,7 @@ EXPORT_SYMBOL(vmalloc);
  */
 void *vmalloc_node(unsigned long size, int node)
 {
-       return __vmalloc_node(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL, node);
+	return __vmalloc_node(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL, node);
 }
 EXPORT_SYMBOL(vmalloc_node);
 
@@ -570,6 +583,19 @@ void *vmalloc_32(unsigned long size)
 }
 EXPORT_SYMBOL(vmalloc_32);
 
+/**
+ *	vmalloc_32_user  -  allocate virtually contiguous memory (32bit
+ *			      addressable) which is zeroed so it can be
+ *			      mapped to userspace without leaking data.
+ *
+ *	@size:		allocation size
+ */
+void *vmalloc_32_user(unsigned long size)
+{
+	return __vmalloc(size, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL);
+}
+EXPORT_SYMBOL(vmalloc_32_user);
+
 long vread(char *buf, char *addr, unsigned long count)
 {
 	struct vm_struct *tmp;
Index: linux-2.6/arch/ia64/kernel/perfmon.c
===================================================================
--- linux-2.6.orig/arch/ia64/kernel/perfmon.c
+++ linux-2.6/arch/ia64/kernel/perfmon.c
@@ -558,17 +558,6 @@ pfm_clear_task_notify(void)
 	clear_thread_flag(TIF_NOTIFY_RESUME);
 }
 
-static inline void
-pfm_reserve_page(unsigned long a)
-{
-	SetPageReserved(vmalloc_to_page((void *)a));
-}
-static inline void
-pfm_unreserve_page(unsigned long a)
-{
-	ClearPageReserved(vmalloc_to_page((void*)a));
-}
-
 static inline unsigned long
 pfm_protect_ctx_ctxsw(pfm_context_t *x)
 {
@@ -799,45 +788,6 @@ pfm_reset_msgq(pfm_context_t *ctx)
 	DPRINT(("ctx=%p msgq reset\n", ctx));
 }
 
-static void *
-pfm_rvmalloc(unsigned long size)
-{
-	void *mem;
-	unsigned long addr;
-
-	size = PAGE_ALIGN(size);
-	mem  = vmalloc(size);
-	if (mem) {
-		//printk("perfmon: CPU%d pfm_rvmalloc(%ld)=%p\n", smp_processor_id(), size, mem);
-		memset(mem, 0, size);
-		addr = (unsigned long)mem;
-		while (size > 0) {
-			pfm_reserve_page(addr);
-			addr+=PAGE_SIZE;
-			size-=PAGE_SIZE;
-		}
-	}
-	return mem;
-}
-
-static void
-pfm_rvfree(void *mem, unsigned long size)
-{
-	unsigned long addr;
-
-	if (mem) {
-		DPRINT(("freeing physical buffer @%p size=%lu\n", mem, size));
-		addr = (unsigned long) mem;
-		while ((long) size > 0) {
-			pfm_unreserve_page(addr);
-			addr+=PAGE_SIZE;
-			size-=PAGE_SIZE;
-		}
-		vfree(mem);
-	}
-	return;
-}
-
 static pfm_context_t *
 pfm_context_alloc(void)
 {
@@ -1455,7 +1405,7 @@ pfm_free_smpl_buffer(pfm_context_t *ctx)
 	/*
 	 * free the buffer
 	 */
-	pfm_rvfree(ctx->ctx_smpl_hdr, ctx->ctx_smpl_size);
+	vfree(ctx->ctx_smpl_hdr);
 
 	ctx->ctx_smpl_hdr  = NULL;
 	ctx->ctx_smpl_size = 0UL;
@@ -2106,12 +2056,14 @@ doit:
 	 * All memory free operations (especially for vmalloc'ed memory)
 	 * MUST be done with interrupts ENABLED.
 	 */
-	if (smpl_buf_addr)  pfm_rvfree(smpl_buf_addr, smpl_buf_size);
+	if (smpl_buf_addr)
+		vfree(smpl_buf_addr);
 
 	/*
 	 * return the memory used by the context
 	 */
-	if (free_possible) pfm_context_free(ctx);
+	if (free_possible)
+		pfm_context_free(ctx);
 
 	return 0;
 }
@@ -2270,9 +2222,9 @@ pfm_smpl_buffer_alloc(struct task_struct
 	/*
 	 * We do the easy to undo allocations first.
  	 *
-	 * pfm_rvmalloc(), clears the buffer, so there is no leak
+	 * vmalloc_user(), clears the buffer, so there is no leak
 	 */
-	smpl_buf = pfm_rvmalloc(size);
+	smpl_buf = vmalloc_user(size);
 	if (smpl_buf == NULL) {
 		DPRINT(("Can't allocate sampling buffer\n"));
 		return -ENOMEM;
@@ -2352,7 +2304,7 @@ pfm_smpl_buffer_alloc(struct task_struct
 error:
 	kmem_cache_free(vm_area_cachep, vma);
 error_kmem:
-	pfm_rvfree(smpl_buf, size);
+	vfree(smpl_buf);
 
 	return -ENOMEM;
 }

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply

* [patch 4/5] mm: extra remap_vmalloc_range check
From: Nick Piggin @ 2006-04-20 17:06 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Linux Kernel, Nick Piggin, Linux Memory Management, Hugh Dickins
In-Reply-To: <20060228202202.14172.60409.sendpatchset@linux.site>

Add a flag to ensure all remap_vmalloc_range memory has been allocated
with the vmalloc _user variants, so data does not get leaked.

Signed-off-by: Nick Piggin <npiggin@suse.de>

Index: linux-2.6/include/linux/vmalloc.h
===================================================================
--- linux-2.6.orig/include/linux/vmalloc.h
+++ linux-2.6/include/linux/vmalloc.h
@@ -8,6 +8,7 @@
 #define VM_IOREMAP	0x00000001	/* ioremap() and friends */
 #define VM_ALLOC	0x00000002	/* vmalloc() */
 #define VM_MAP		0x00000004	/* vmap()ed pages */
+#define VM_USERMAP	0x00000008	/* suitable for remap_vmalloc_range */
 /* bits [20..32] reserved for arch specific ioremap internals */
 
 /*
Index: linux-2.6/drivers/media/video/et61x251/et61x251_core.c
===================================================================
--- linux-2.6.orig/drivers/media/video/et61x251/et61x251_core.c
+++ linux-2.6/drivers/media/video/et61x251/et61x251_core.c
@@ -133,7 +133,8 @@ et61x251_request_buffers(struct et61x251
 
 	cam->nbuffers = count;
 	while (cam->nbuffers > 0) {
-		if ((buff = vmalloc_32(cam->nbuffers * PAGE_ALIGN(imagesize))))
+		if ((buff = vmalloc_32_user(cam->nbuffers *
+						PAGE_ALIGN(imagesize))))
 			break;
 		cam->nbuffers--;
 	}
Index: linux-2.6/drivers/media/video/sn9c102/sn9c102_core.c
===================================================================
--- linux-2.6.orig/drivers/media/video/sn9c102/sn9c102_core.c
+++ linux-2.6/drivers/media/video/sn9c102/sn9c102_core.c
@@ -149,7 +149,7 @@ sn9c102_request_buffers(struct sn9c102_d
 
 	cam->nbuffers = count;
 	while (cam->nbuffers > 0) {
-		if ((buff = vmalloc_32(cam->nbuffers * PAGE_ALIGN(imagesize))))
+		if ((buff = vmalloc_32_user(cam->nbuffers * PAGE_ALIGN(imagesize))))
 			break;
 		cam->nbuffers--;
 	}
Index: linux-2.6/drivers/media/video/zc0301/zc0301_core.c
===================================================================
--- linux-2.6.orig/drivers/media/video/zc0301/zc0301_core.c
+++ linux-2.6/drivers/media/video/zc0301/zc0301_core.c
@@ -136,7 +136,7 @@ zc0301_request_buffers(struct zc0301_dev
 
 	cam->nbuffers = count;
 	while (cam->nbuffers > 0) {
-		if ((buff = vmalloc_32(cam->nbuffers * PAGE_ALIGN(imagesize))))
+		if ((buff = vmalloc_32_user(cam->nbuffers * PAGE_ALIGN(imagesize))))
 			break;
 		cam->nbuffers--;
 	}
Index: linux-2.6/mm/vmalloc.c
===================================================================
--- linux-2.6.orig/mm/vmalloc.c
+++ linux-2.6/mm/vmalloc.c
@@ -525,7 +525,15 @@ EXPORT_SYMBOL(vmalloc);
  */
 void *vmalloc_user(unsigned long size)
 {
-	return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, PAGE_KERNEL);
+	struct vm_struct *area;
+	void *ret;
+
+	ret = __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, PAGE_KERNEL);
+	area = find_vm_area(ret);
+	BUG_ON(!area);
+	area->flags |= VM_USERMAP;
+
+	return ret;
 }
 EXPORT_SYMBOL(vmalloc_user);
 
@@ -592,7 +600,15 @@ EXPORT_SYMBOL(vmalloc_32);
  */
 void *vmalloc_32_user(unsigned long size)
 {
-	return __vmalloc(size, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL);
+	struct vm_struct *area;
+	void *ret;
+
+	ret = __vmalloc(size, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL);
+	area = find_vm_area(ret);
+	BUG_ON(!area);
+	area->flags |= VM_USERMAP;
+
+	return ret;
 }
 EXPORT_SYMBOL(vmalloc_32_user);
 
@@ -700,6 +716,9 @@ int remap_vmalloc_range(struct vm_area_s
 	if (!area)
 		return -EINVAL;
 
+	if (!(area->flags & VM_USERMAP))
+		return -EINVAL;
+
 	if (usize + (pgoff << PAGE_SHIFT) > area->size - PAGE_SIZE)
 		return -EINVAL;
 
Index: linux-2.6/drivers/media/video/em28xx/em28xx-core.c
===================================================================
--- linux-2.6.orig/drivers/media/video/em28xx/em28xx-core.c
+++ linux-2.6/drivers/media/video/em28xx/em28xx-core.c
@@ -79,10 +79,8 @@ u32 em28xx_request_buffers(struct em28xx
 
 	dev->num_frames = count;
 	while (dev->num_frames > 0) {
-		if ((buff = vmalloc_32(dev->num_frames * imagesize))) {
-			memset(buff, 0, dev->num_frames * imagesize);
+		if ((buff = vmalloc_32_user(dev->num_frames * imagesize)))
 			break;
-		}
 		dev->num_frames--;
 	}
 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply

* [patch 5/5] drivers: leave vm_flags alone
From: Nick Piggin @ 2006-04-20 17:07 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Linux Kernel, Nick Piggin, Linux Memory Management, Hugh Dickins
In-Reply-To: <20060228202202.14172.60409.sendpatchset@linux.site>

Get rid of some vm_flags twiddling from driver code. The net result of
this + the last 4 patches is that all converted remap_vmalloc_range
memory can support get_user_pages - do we want that? Can't hurt, can it?

Signed-off-by: Nick Piggin <npiggin@suse.de>

Index: linux-2.6/drivers/media/video/em28xx/em28xx-video.c
===================================================================
--- linux-2.6.orig/drivers/media/video/em28xx/em28xx-video.c
+++ linux-2.6/drivers/media/video/em28xx/em28xx-video.c
@@ -620,10 +620,6 @@ static int em28xx_v4l2_mmap(struct file 
 		return -EINVAL;
 	}
 
-	/* VM_IO is eventually going to replace PageReserved altogether */
-	vma->vm_flags |= VM_IO;
-	vma->vm_flags |= VM_RESERVED;	/* avoid to swap out this VMA */
-
 	if (remap_vmalloc_range(vma, dev->frame[i].bufmem, 0)) {
 		em28xx_videodbg("mmap: remap_vmalloc_range failed\n");
 		mutex_unlock(&dev->fileop_lock);
Index: linux-2.6/drivers/media/video/et61x251/et61x251_core.c
===================================================================
--- linux-2.6.orig/drivers/media/video/et61x251/et61x251_core.c
+++ linux-2.6/drivers/media/video/et61x251/et61x251_core.c
@@ -1499,9 +1499,6 @@ static int et61x251_mmap(struct file* fi
 		return -EINVAL;
 	}
 
-	vma->vm_flags |= VM_IO;
-	vma->vm_flags |= VM_RESERVED;
-
 	if (remap_vmalloc_range(vma, cam->frame[i].bufmem, 0)) {
 		mutex_unlock(&cam->fileop_mutex);
 		return -EAGAIN;
Index: linux-2.6/drivers/media/video/meye.c
===================================================================
--- linux-2.6.orig/drivers/media/video/meye.c
+++ linux-2.6/drivers/media/video/meye.c
@@ -1689,8 +1689,6 @@ static int meye_mmap(struct file *file, 
 	}
 
 	vma->vm_ops = &meye_vm_ops;
-	vma->vm_flags &= ~VM_IO;	/* not I/O memory */
-	vma->vm_flags |= VM_RESERVED;	/* avoid to swap out this VMA */
 	vma->vm_private_data = (void *) (offset / gbufsize);
 	meye_vm_open(vma);
 
Index: linux-2.6/drivers/media/video/pwc/pwc-if.c
===================================================================
--- linux-2.6.orig/drivers/media/video/pwc/pwc-if.c
+++ linux-2.6/drivers/media/video/pwc/pwc-if.c
@@ -1567,8 +1567,6 @@ static int pwc_video_mmap(struct file *f
 				vma->vm_start, vma->vm_end - vma->vm_start);
 	pdev = vdev->priv;
 
-	vma->vm_flags |= VM_IO;
-
 	if (remap_vmalloc_range(vma, pdev->image_data, 0))
 		return -EAGAIN;
 
Index: linux-2.6/drivers/media/video/sn9c102/sn9c102_core.c
===================================================================
--- linux-2.6.orig/drivers/media/video/sn9c102/sn9c102_core.c
+++ linux-2.6/drivers/media/video/sn9c102/sn9c102_core.c
@@ -1762,9 +1762,6 @@ static int sn9c102_mmap(struct file* fil
 		return -EINVAL;
 	}
 
-	vma->vm_flags |= VM_IO;
-	vma->vm_flags |= VM_RESERVED;
-
 	if (remap_vmalloc_range(vma, cam->frame[i].bufmem, 0)) {
 		mutex_unlock(&cam->fileop_mutex);
 		return -EAGAIN;
Index: linux-2.6/drivers/media/video/zc0301/zc0301_core.c
===================================================================
--- linux-2.6.orig/drivers/media/video/zc0301/zc0301_core.c
+++ linux-2.6/drivers/media/video/zc0301/zc0301_core.c
@@ -963,9 +963,6 @@ static int zc0301_mmap(struct file* filp
 		return -EINVAL;
 	}
 
-	vma->vm_flags |= VM_IO;
-	vma->vm_flags |= VM_RESERVED;
-
 	if (remap_vmalloc_range(vma, cam->frame[i].bufmem, 0)) {
 		mutex_unlock(&cam->fileop_mutex);
 		return -EAGAIN;

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply

* Re: [patch 1/5] mm: remap_vmalloc_range
From: Christoph Hellwig @ 2006-04-20 17:22 UTC (permalink / raw)
  To: Nick Piggin
  Cc: Andrew Morton, Linux Kernel, Linux Memory Management,
	Hugh Dickins
In-Reply-To: <20060228202212.14172.59536.sendpatchset@linux.site>

On Thu, Apr 20, 2006 at 07:06:18PM +0200, Nick Piggin wrote:
> Add a remap_vmalloc_range and get rid of as many remap_pfn_range and
> vm_insert_page loops as possible.
> 
> remap_vmalloc_range can do a whole lot of nice range checking even
> if the caller gets it wrong (which it looks like one or two do).

This looks very nice, thanks!  Although it might be better to split it
into one patch to introduce remap_vmalloc_range and various patches to
switch over one susbsyetm for merging purposes.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply

* Re: [patch 2/5] mm: deprecate vmalloc_to_pfn
From: Christoph Hellwig @ 2006-04-20 17:22 UTC (permalink / raw)
  To: Nick Piggin
  Cc: Andrew Morton, Linux Kernel, Linux Memory Management,
	Hugh Dickins
In-Reply-To: <20060228202223.14172.21110.sendpatchset@linux.site>

On Thu, Apr 20, 2006 at 07:06:30PM +0200, Nick Piggin wrote:
> Deprecate vmalloc_to_pfn.

I don't think there's any point to even keep it.  There's a trivial replcement.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply

* [patch 6/5] mm: find_vm_area locking fixes
From: Nick Piggin @ 2006-04-20 17:27 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Linux Kernel, Linux Memory Management, Hugh Dickins
In-Reply-To: <20060228202202.14172.60409.sendpatchset@linux.site>

Bite the bullet and try to get the locking correct the first^Wsecond time.

(subtle bugs like area->flagas modification not having the right memory
consistency could be a nightmare to track down)

Signed-off-by: Nick Piggin <npiggin@suse.de>

Index: linux-2.6/mm/vmalloc.c
===================================================================
--- linux-2.6.orig/mm/vmalloc.c
+++ linux-2.6/mm/vmalloc.c
@@ -256,16 +256,15 @@ struct vm_struct *get_vm_area_node(unsig
 	return __get_vm_area_node(size, flags, VMALLOC_START, VMALLOC_END, node);
 }
 
-static struct vm_struct *find_vm_area(void *addr)
+/* Caller must hold vmlist_lock */
+static struct vm_struct *__find_vm_area(void *addr)
 {
 	struct vm_struct *tmp;
 
-	write_lock(&vmlist_lock);
 	for (tmp = vmlist; tmp != NULL; tmp = tmp->next) {
 		 if (tmp->addr == addr)
 			break;
 	}
-	write_unlock(&vmlist_lock);
 
 	return tmp;
 }
@@ -529,9 +528,10 @@ void *vmalloc_user(unsigned long size)
 	void *ret;
 
 	ret = __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, PAGE_KERNEL);
-	area = find_vm_area(ret);
-	BUG_ON(!area);
+	write_lock(&vmlist_lock);
+	area = __find_vm_area(ret);
 	area->flags |= VM_USERMAP;
+	write_unlock(&vmlist_lock);
 
 	return ret;
 }
@@ -604,9 +604,10 @@ void *vmalloc_32_user(unsigned long size
 	void *ret;
 
 	ret = __vmalloc(size, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL);
-	area = find_vm_area(ret);
-	BUG_ON(!area);
+	write_lock(&vmlist_lock);
+	area = __find_vm_area(ret);
 	area->flags |= VM_USERMAP;
+	write_unlock(&vmlist_lock);
 
 	return ret;
 }
@@ -712,15 +713,17 @@ int remap_vmalloc_range(struct vm_area_s
 	if ((PAGE_SIZE-1) & (unsigned long)addr)
 		return -EINVAL;
 
-	area = find_vm_area(addr);
+	read_lock(&vmlist_lock);
+	area = __find_vm_area(addr);
 	if (!area)
-		return -EINVAL;
+		goto out_einval_locked;
 
 	if (!(area->flags & VM_USERMAP))
-		return -EINVAL;
+		goto out_einval_locked;
 
 	if (usize + (pgoff << PAGE_SHIFT) > area->size - PAGE_SIZE)
-		return -EINVAL;
+		goto out_einval_locked;
+	read_unlock(&vmlist_lock);
 
 	addr = (void *)((unsigned long)addr + (pgoff << PAGE_SHIFT));
 	do {
@@ -738,6 +741,10 @@ int remap_vmalloc_range(struct vm_area_s
 	vma->vm_flags |= VM_RESERVED;
 
 	return ret;
+
+out_einval_locked:
+	read_unlock(&vmlist_lock);
+	return -EINVAL;
 }
 EXPORT_SYMBOL(remap_vmalloc_range);
 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply

* Re: [patch 1/5] mm: remap_vmalloc_range
From: Nick Piggin @ 2006-04-20 17:33 UTC (permalink / raw)
  To: Christoph Hellwig, Nick Piggin, Andrew Morton, Linux Kernel,
	Linux Memory Management, Hugh Dickins
In-Reply-To: <20060420172205.GC21659@infradead.org>

On Thu, Apr 20, 2006 at 06:22:05PM +0100, Christoph Hellwig wrote:
> On Thu, Apr 20, 2006 at 07:06:18PM +0200, Nick Piggin wrote:
> > Add a remap_vmalloc_range and get rid of as many remap_pfn_range and
> > vm_insert_page loops as possible.
> > 
> > remap_vmalloc_range can do a whole lot of nice range checking even
> > if the caller gets it wrong (which it looks like one or two do).
> 
> This looks very nice, thanks!

Thank you

> Although it might be better to split it
> into one patch to introduce remap_vmalloc_range and various patches to
> switch over one susbsyetm for merging purposes.

Sure, if anyone insists ;)

I tend to agree. I would tend to do it in just 2 patches
(1 for implementation, 1 for conversion) to make administrative
overheads smaller -- the conversions are small and very well
contained. Is there a good reason to split further?

Nick

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply

* Re: [patch 2/5] mm: deprecate vmalloc_to_pfn
From: Nick Piggin @ 2006-04-20 17:36 UTC (permalink / raw)
  To: Christoph Hellwig, Nick Piggin, Andrew Morton, Linux Kernel,
	Linux Memory Management, Hugh Dickins
In-Reply-To: <20060420172240.GD21659@infradead.org>

On Thu, Apr 20, 2006 at 06:22:40PM +0100, Christoph Hellwig wrote:
> On Thu, Apr 20, 2006 at 07:06:30PM +0200, Nick Piggin wrote:
> > Deprecate vmalloc_to_pfn.
> 
> I don't think there's any point to even keep it.  There's a trivial replcement.

It is exported, is the only thing. I tend to stick my head in the sand
with these matters, and try to go with whatever I think will help Andrew
merge it.

If nobody cares, I'd just as soon remove it completely.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply

* RE: [RFC] - Kernel text replication on IA64
From: Chen, Kenneth W @ 2006-04-20 17:48 UTC (permalink / raw)
  To: Luck, Tony, Jack Steiner; +Cc: linux-ia64, lee.schermerhorn, clameter, linux-mm
In-Reply-To: <20060420164111.GA18770@agluck-lia64.sc.intel.com>

Luck, Tony wrote on Thursday, April 20, 2006 9:41 AM
> On Thu, Apr 20, 2006 at 08:53:16AM -0500, Jack Steiner wrote:
> > Enabling replication reserves 1 additional DTLB entry for kernel code.
> > This reduces the number of DTLB entries that is available for user code.
> > There is the potential that this could impact some applications.
> > Additional measurements are still needed.
> 
> Ken's recent patch to free up the DTLB that is currently used for per-cpu
> data would mitigate this (though I'm sure he'll be unamused if I blow the
> 1.6% gain he saw on his transaction processing benchmark on this :-)

How much benefit is there to have readonly section replicated?  Do you really
have to use two DTRs - one to map the readonly and one to map rw?

What about just replicate text so we don't need to burn an extra DTR?

- Ken

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply

* Re: [patch 1/5] mm: remap_vmalloc_range
From: Nick Piggin @ 2006-04-20 18:09 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Linux Kernel, Linux Memory Management, Hugh Dickins
In-Reply-To: <20060228202212.14172.59536.sendpatchset@linux.site>

Hotfix #1


Index: linux-2.6/drivers/media/video/usbvideo/vicam.c
===================================================================
--- linux-2.6.orig/drivers/media/video/usbvideo/vicam.c
+++ linux-2.6/drivers/media/video/usbvideo/vicam.c
@@ -1000,6 +1000,7 @@ vicam_mmap(struct file *file, struct vm_
 	 * It shouldn't have been, so let's try this check again -np
 	 */
 	 if (size > VICAM_FRAMES*VICAM_MAX_FRAME_SIZE)
+		return -EINVAL;
 
 	if (remap_vmalloc_range(vma, cam->framebuf, 0))
 		return -EAGAIN;

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply

* [patch][rfc] improve remap_vmalloc_range callers' return values
From: Nick Piggin @ 2006-04-20 18:14 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Linux Kernel, Linux Memory Management, Hugh Dickins,
	Stephane Eranian
In-Reply-To: <20060228202202.14172.60409.sendpatchset@linux.site>

Not directly related to the current patchset... but does anyone
see a reason why we shouldn't try to return saner values from
remap_vmalloc_range callers?

(This patch is slightly more involved for perfmon, so Stephane
CCed. It catches insert_vm_struct errors, and moves
remap_vmalloc_range below it so we needn't have to clean up
by unmapping stuff).

--
Index: linux-2.6/arch/ia64/kernel/perfmon.c
===================================================================
--- linux-2.6.orig/arch/ia64/kernel/perfmon.c
+++ linux-2.6/arch/ia64/kernel/perfmon.c
@@ -2199,6 +2199,7 @@ pfm_smpl_buffer_alloc(struct task_struct
 	struct vm_area_struct *vma = NULL;
 	unsigned long size;
 	void *smpl_buf;
+	int ret = -ENOMEM;
 
 
 	/*
@@ -2217,7 +2218,7 @@ pfm_smpl_buffer_alloc(struct task_struct
 	 * 	return -ENOMEM;
 	 */
 	if (size > task->signal->rlim[RLIMIT_MEMLOCK].rlim_cur)
-		return -ENOMEM;
+		goto out;
 
 	/*
 	 * We do the easy to undo allocations first.
@@ -2227,7 +2228,7 @@ pfm_smpl_buffer_alloc(struct task_struct
 	smpl_buf = vmalloc_user(size);
 	if (smpl_buf == NULL) {
 		DPRINT(("Can't allocate sampling buffer\n"));
-		return -ENOMEM;
+		goto out;
 	}
 
 	DPRINT(("smpl_buf @%p\n", smpl_buf));
@@ -2267,7 +2268,6 @@ pfm_smpl_buffer_alloc(struct task_struct
 	vma->vm_start = pfm_get_unmapped_area(NULL, 0, size, 0, MAP_PRIVATE|MAP_ANONYMOUS, 0);
 	if (vma->vm_start == 0UL) {
 		DPRINT(("Cannot find unmapped area for size %ld\n", size));
-		up_write(&task->mm->mmap_sem);
 		goto error;
 	}
 	vma->vm_end = vma->vm_start + size;
@@ -2275,23 +2275,24 @@ pfm_smpl_buffer_alloc(struct task_struct
 
 	DPRINT(("aligned size=%ld, hdr=%p mapped @0x%lx\n", size, ctx->ctx_smpl_hdr, vma->vm_start));
 
-	/* can only be applied to current task, need to have the mm semaphore held when called */
-	if (remap_vmalloc_range(vma, smpl_buf, 0)) {
-		DPRINT(("Can't remap buffer\n"));
-		up_write(&task->mm->mmap_sem);
-		goto error;
-	}
-
 	/*
 	 * now insert the vma in the vm list for the process, must be
 	 * done with mmap lock held
 	 */
-	insert_vm_struct(mm, vma);
+	if ((ret = insert_vm_struct(mm, vma)) {
+		DPRINT(("Can't insert vma\n"));
+		goto error;
+	}
+
+	/* can only be applied to current task, need to have the mm semaphore held when called */
+	if ((ret = remap_vmalloc_range(vma, smpl_buf, 0))) {
+		DPRINT(("Can't remap buffer\n"));
+		goto error;
+	}
 
 	mm->total_vm  += size >> PAGE_SHIFT;
 	vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file,
 							vma_pages(vma));
-	up_write(&task->mm->mmap_sem);
 
 	/*
 	 * keep track of user level virtual address
@@ -2299,14 +2300,17 @@ pfm_smpl_buffer_alloc(struct task_struct
 	ctx->ctx_smpl_vaddr = (void *)vma->vm_start;
 	*(unsigned long *)user_vaddr = vma->vm_start;
 
+	up_write(&task->mm->mmap_sem);
+
 	return 0;
 
 error:
+	up_write(&task->mm->mmap_sem);
 	kmem_cache_free(vm_area_cachep, vma);
 error_kmem:
 	vfree(smpl_buf);
-
-	return -ENOMEM;
+out:
+	return ret;
 }
 
 /*
Index: linux-2.6/drivers/media/video/cpia.c
===================================================================
--- linux-2.6.orig/drivers/media/video/cpia.c
+++ linux-2.6/drivers/media/video/cpia.c
@@ -3734,9 +3734,9 @@ static int cpia_mmap(struct file *file, 
 		}
 	}
 
-	if (remap_vmalloc_range(vma, cam->frame_buf, 0)) {
+	if ((retval = remap_vmalloc_range(vma, cam->frame_buf, 0))) {
 		mutex_unlock(&cam->busy_lock);
-		return -EAGAIN;
+		return retval;
 	}
 
 	DBG("cpia_mmap: %ld\n", size);
Index: linux-2.6/drivers/media/video/em28xx/em28xx-video.c
===================================================================
--- linux-2.6.orig/drivers/media/video/em28xx/em28xx-video.c
+++ linux-2.6/drivers/media/video/em28xx/em28xx-video.c
@@ -585,6 +585,7 @@ static int em28xx_v4l2_mmap(struct file 
 {
 	unsigned long size = vma->vm_end - vma->vm_start;
 	u32 i;
+	int ret;
 
 	struct em28xx *dev = filp->private_data;
 
@@ -593,21 +594,21 @@ static int em28xx_v4l2_mmap(struct file 
 
 	if (dev->state & DEV_DISCONNECTED) {
 		em28xx_videodbg("mmap: device not present\n");
-		mutex_unlock(&dev->fileop_lock);
-		return -ENODEV;
+		ret = -ENODEV;
+		goto out;
 	}
 
 	if (dev->state & DEV_MISCONFIGURED) {
 		em28xx_videodbg ("mmap: Device is misconfigured; close and "
 						"open it again\n");
-		mutex_unlock(&dev->fileop_lock);
-		return -EIO;
+		ret = -EIO;
+		goto out;
 	}
 
 	if (dev->io != IO_MMAP || !(vma->vm_flags & VM_WRITE) ||
 	    size != PAGE_ALIGN(dev->frame[0].buf.length)) {
-		mutex_unlock(&dev->fileop_lock);
-		return -EINVAL;
+		ret = -EINVAL;
+		goto out;
 	}
 
 	for (i = 0; i < dev->num_frames; i++) {
@@ -616,22 +617,23 @@ static int em28xx_v4l2_mmap(struct file 
 	}
 	if (i == dev->num_frames) {
 		em28xx_videodbg("mmap: user supplied mapping address is out of range\n");
-		mutex_unlock(&dev->fileop_lock);
-		return -EINVAL;
+		ret = -EINVAL;
+		goto out;
 	}
 
-	if (remap_vmalloc_range(vma, dev->frame[i].bufmem, 0)) {
+	if ((ret = remap_vmalloc_range(vma, dev->frame[i].bufmem, 0))) {
 		em28xx_videodbg("mmap: remap_vmalloc_range failed\n");
-		mutex_unlock(&dev->fileop_lock);
-		return -EAGAIN;
+		goto out;
 	}
 
 	vma->vm_ops = &em28xx_vm_ops;
 	vma->vm_private_data = &dev->frame[i];
 
 	em28xx_vm_open(vma);
+
+out:
 	mutex_unlock(&dev->fileop_lock);
-	return 0;
+	return ret;
 }
 
 /*
Index: linux-2.6/drivers/media/video/meye.c
===================================================================
--- linux-2.6.orig/drivers/media/video/meye.c
+++ linux-2.6/drivers/media/video/meye.c
@@ -1663,11 +1663,12 @@ static struct vm_operations_struct meye_
 static int meye_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	unsigned long size = vma->vm_end - vma->vm_start;
+	int ret;
 
 	mutex_lock(&meye.lock);
 	if (size > gbuffers * gbufsize) { /* XXX: should be size + vm_pgoff? */
-		mutex_unlock(&meye.lock);
-		return -EINVAL;
+		ret = -EINVAL;
+		goto out;
 	}
 	if (!meye.grab_fbuffer) {
 		int i;
@@ -1676,24 +1677,23 @@ static int meye_mmap(struct file *file, 
 		meye.grab_fbuffer = vmalloc_32_user(gbuffers*gbufsize);
 		if (!meye.grab_fbuffer) {
 			printk(KERN_ERR "meye: v4l framebuffer allocation failed\n");
-			mutex_unlock(&meye.lock);
-			return -ENOMEM;
+			ret = -ENOMEM;
+			goto out;
 		}
 		for (i = 0; i < gbuffers; i++)
 			meye.vma_use_count[i] = 0;
 	}
 
-	if (remap_vmalloc_range(vma, meye.grab_fbuffer, vma->vm_pgoff)) {
-		mutex_unlock(&meye.lock);
-		return -EAGAIN;
-	}
+	if ((ret = remap_vmalloc_range(vma, meye.grab_fbuffer, vma->vm_pgoff)))
+		goto out;
 
 	vma->vm_ops = &meye_vm_ops;
 	vma->vm_private_data = (void *) (offset / gbufsize);
 	meye_vm_open(vma);
 
+out:
 	mutex_unlock(&meye.lock);
-	return 0;
+	return ret;
 }
 
 static struct file_operations meye_fops = {
Index: linux-2.6/drivers/media/video/ov511.c
===================================================================
--- linux-2.6.orig/drivers/media/video/ov511.c
+++ linux-2.6/drivers/media/video/ov511.c
@@ -4574,6 +4574,7 @@ ov51x_v4l1_mmap(struct file *file, struc
 	struct video_device *vdev = file->private_data;
 	unsigned long size  = vma->vm_end - vma->vm_start;
 	struct usb_ov511 *ov = video_get_drvdata(vdev);
+	int ret;
 
 	if (ov->dev == NULL)
 		return -EIO;
@@ -4588,13 +4589,10 @@ ov51x_v4l1_mmap(struct file *file, struc
 	if (mutex_lock_interruptible(&ov->lock))
 		return -EINTR;
 
-	if (remap_vmalloc_range(vma, ov->fbuf, 0)) {
-		mutex_unlock(&ov->lock);
-		return -EAGAIN;
-	}
+	ret = remap_vmalloc_range(vma, ov->fbuf, 0);
 
 	mutex_unlock(&ov->lock);
-	return 0;
+	return ret;
 }
 
 static struct file_operations ov511_fops = {
Index: linux-2.6/drivers/media/video/pwc/pwc-if.c
===================================================================
--- linux-2.6.orig/drivers/media/video/pwc/pwc-if.c
+++ linux-2.6/drivers/media/video/pwc/pwc-if.c
@@ -1567,10 +1567,7 @@ static int pwc_video_mmap(struct file *f
 				vma->vm_start, vma->vm_end - vma->vm_start);
 	pdev = vdev->priv;
 
-	if (remap_vmalloc_range(vma, pdev->image_data, 0))
-		return -EAGAIN;
-
-	return 0;
+	return remap_vmalloc_range(vma, pdev->image_data, 0);
 }
 
 /***************************************************************************/
Index: linux-2.6/drivers/media/video/se401.c
===================================================================
--- linux-2.6.orig/drivers/media/video/se401.c
+++ linux-2.6/drivers/media/video/se401.c
@@ -1110,24 +1110,24 @@ static int se401_mmap(struct file *file,
 	struct video_device *dev = file->private_data;
 	struct usb_se401 *se401 = (struct usb_se401 *)dev;
 	unsigned long size  = vma->vm_end-vma->vm_start;
+	int ret;
 
 	mutex_lock(&se401->lock);
 
 	if (se401->dev == NULL) {
-		mutex_unlock(&se401->lock);
-		return -EIO;
+		ret = -EIO;
+		goto out;
 	}
 	if (size > (((SE401_NUMFRAMES * se401->maxframesize) + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1))) {
-		mutex_unlock(&se401->lock);
-		return -EINVAL;
-	}
-	if (remap_vmalloc_range(vma, se401->fbuf, 0)) {
-		mutex_unlock(&se401->lock);
-		return -EAGAIN;
+		ret = -EINVAL;
+		goto out;
 	}
-	mutex_unlock(&se401->lock);
 
-	return 0;
+	ret = remap_vmalloc_range(vma, se401->fbuf, 0);
+
+out:
+	mutex_unlock(&se401->lock);
+	return ret;
 }
 
 static struct file_operations se401_fops = {
Index: linux-2.6/drivers/media/video/sn9c102/sn9c102_core.c
===================================================================
--- linux-2.6.orig/drivers/media/video/sn9c102/sn9c102_core.c
+++ linux-2.6/drivers/media/video/sn9c102/sn9c102_core.c
@@ -1730,27 +1730,28 @@ static int sn9c102_mmap(struct file* fil
 	struct sn9c102_device* cam = video_get_drvdata(video_devdata(filp));
 	unsigned long size = vma->vm_end - vma->vm_start;
 	u32 i;
+	int ret;
 
 	if (mutex_lock_interruptible(&cam->fileop_mutex))
 		return -ERESTARTSYS;
 
 	if (cam->state & DEV_DISCONNECTED) {
 		DBG(1, "Device not present");
-		mutex_unlock(&cam->fileop_mutex);
-		return -ENODEV;
+		ret = -ENODEV;
+		goto out;
 	}
 
 	if (cam->state & DEV_MISCONFIGURED) {
 		DBG(1, "The camera is misconfigured. Close and open it "
 		       "again.");
-		mutex_unlock(&cam->fileop_mutex);
-		return -EIO;
+		ret = -EIO;
+		goto out;
 	}
 
 	if (cam->io != IO_MMAP || !(vma->vm_flags & VM_WRITE) ||
 	    size != PAGE_ALIGN(cam->frame[0].buf.length)) {
-		mutex_unlock(&cam->fileop_mutex);
-		return -EINVAL;
+		ret = -EINVAL;
+		goto out;
 	}
 
 	for (i = 0; i < cam->nbuffers; i++) {
@@ -1758,23 +1759,21 @@ static int sn9c102_mmap(struct file* fil
 			break;
 	}
 	if (i == cam->nbuffers) {
-		mutex_unlock(&cam->fileop_mutex);
-		return -EINVAL;
+		ret = -EINVAL;
+		goto out;
 	}
 
-	if (remap_vmalloc_range(vma, cam->frame[i].bufmem, 0)) {
-		mutex_unlock(&cam->fileop_mutex);
-		return -EAGAIN;
-	}
+	if ((ret = remap_vmalloc_range(vma, cam->frame[i].bufmem, 0)))
+		goto out;
 
 	vma->vm_ops = &sn9c102_vm_ops;
 	vma->vm_private_data = &cam->frame[i];
 
 	sn9c102_vm_open(vma);
 
+out:
 	mutex_unlock(&cam->fileop_mutex);
-
-	return 0;
+	return ret;
 }
 
 /*****************************************************************************/
Index: linux-2.6/drivers/media/video/stv680.c
===================================================================
--- linux-2.6.orig/drivers/media/video/stv680.c
+++ linux-2.6/drivers/media/video/stv680.c
@@ -1200,25 +1200,25 @@ static int stv680_mmap (struct file *fil
 	struct video_device *dev = file->private_data;
 	struct usb_stv *stv680 = video_get_drvdata(dev);
 	unsigned long size  = vma->vm_end-vma->vm_start;
+	int ret;
 
 	mutex_lock(&stv680->lock);
 
 	if (stv680->udev == NULL) {
-		mutex_unlock(&stv680->lock);
-		return -EIO;
+		ret = -EIO;
+		goto out;
 	}
 	if (size > (((STV680_NUMFRAMES * stv680->maxframesize) + PAGE_SIZE - 1)
 		    & ~(PAGE_SIZE - 1))) {
-		mutex_unlock(&stv680->lock);
-		return -EINVAL;
-	}
-	if (remap_vmalloc_range(vma, stv680->fbuf, 0)) {
-		mutex_unlock(&stv680->lock);
-		return -EAGAIN;
+		ret = -EINVAL;
+		goto out;
 	}
-	mutex_unlock(&stv680->lock);
 
-	return 0;
+	ret = remap_vmalloc_range(vma, stv680->fbuf, 0);
+
+out:
+	mutex_unlock(&stv680->lock);
+	return ret;
 }
 
 static ssize_t stv680_read (struct file *file, char __user *buf,
Index: linux-2.6/drivers/media/video/usbvideo/usbvideo.c
===================================================================
--- linux-2.6.orig/drivers/media/video/usbvideo/usbvideo.c
+++ linux-2.6/drivers/media/video/usbvideo/usbvideo.c
@@ -1036,10 +1036,7 @@ static int usbvideo_v4l_mmap(struct file
 	if (size > (((USBVIDEO_NUMFRAMES * uvd->max_frame_size) + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1)))
 		return -EINVAL;
 
-	if (remap_vmalloc_range(vma, uvd->fbuf, 0))
-		return -EAGAIN;
-
-	return 0;
+	return remap_vmalloc_range(vma, uvd->fbuf, 0);
 }
 
 /*
Index: linux-2.6/drivers/media/video/usbvideo/vicam.c
===================================================================
--- linux-2.6.orig/drivers/media/video/usbvideo/vicam.c
+++ linux-2.6/drivers/media/video/usbvideo/vicam.c
@@ -1002,10 +1002,7 @@ vicam_mmap(struct file *file, struct vm_
 	 if (size > VICAM_FRAMES*VICAM_MAX_FRAME_SIZE)
 		return -EINVAL;
 
-	if (remap_vmalloc_range(vma, cam->framebuf, 0))
-		return -EAGAIN;
-
-	return 0;
+	return remap_vmalloc_range(vma, cam->framebuf, 0);
 }
 
 #if defined(CONFIG_VIDEO_PROC_FS)
Index: linux-2.6/drivers/media/video/w9968cf.c
===================================================================
--- linux-2.6.orig/drivers/media/video/w9968cf.c
+++ linux-2.6/drivers/media/video/w9968cf.c
@@ -2816,6 +2816,7 @@ static int w9968cf_mmap(struct file* fil
 				     video_get_drvdata(video_devdata(filp));
 	unsigned long vsize = vma->vm_end - vma->vm_start,
 		      psize = cam->nbuffers * cam->frame[0].size;
+	int ret;
 
 	if (cam->disconnected) {
 		DBG(2, "Device not present")
@@ -2832,11 +2833,10 @@ static int w9968cf_mmap(struct file* fil
 	if (vsize > psize - (vma->vm_pgoff << PAGE_SHIFT))
 		return -EINVAL;
 
-	if (remap_vmalloc_range(vma, cam->frame[0].buffer, vma->vm_pgoff))
-		return -EAGAIN;
+	ret = remap_vmalloc_range(vma, cam->frame[0].buffer, vma->vm_pgoff);
 
 	DBG(5, "mmap method successfully called")
-	return 0;
+	return ret;
 }
 
 
Index: linux-2.6/drivers/media/video/zc0301/zc0301_core.c
===================================================================
--- linux-2.6.orig/drivers/media/video/zc0301/zc0301_core.c
+++ linux-2.6/drivers/media/video/zc0301/zc0301_core.c
@@ -931,27 +931,28 @@ static int zc0301_mmap(struct file* filp
 	struct zc0301_device* cam = video_get_drvdata(video_devdata(filp));
 	unsigned long size = vma->vm_end - vma->vm_start;
 	u32 i;
+	int ret;
 
 	if (mutex_lock_interruptible(&cam->fileop_mutex))
 		return -ERESTARTSYS;
 
 	if (cam->state & DEV_DISCONNECTED) {
 		DBG(1, "Device not present");
-		mutex_unlock(&cam->fileop_mutex);
-		return -ENODEV;
+		ret = -ENODEV;
+		goto out;
 	}
 
 	if (cam->state & DEV_MISCONFIGURED) {
 		DBG(1, "The camera is misconfigured. Close and open it "
 		       "again.");
-		mutex_unlock(&cam->fileop_mutex);
-		return -EIO;
+		ret = -EIO;
+		goto out;
 	}
 
 	if (cam->io != IO_MMAP || !(vma->vm_flags & VM_WRITE) ||
 	    size != PAGE_ALIGN(cam->frame[0].buf.length)) {
-		mutex_unlock(&cam->fileop_mutex);
-		return -EINVAL;
+		ret = -EINVAL;
+		goto out;
 	}
 
 	for (i = 0; i < cam->nbuffers; i++) {
@@ -959,23 +960,21 @@ static int zc0301_mmap(struct file* filp
 			break;
 	}
 	if (i == cam->nbuffers) {
-		mutex_unlock(&cam->fileop_mutex);
-		return -EINVAL;
+		ret = -EINVAL;
+		goto out;
 	}
 
-	if (remap_vmalloc_range(vma, cam->frame[i].bufmem, 0)) {
-		mutex_unlock(&cam->fileop_mutex);
-		return -EAGAIN;
-	}
+	if ((ret = remap_vmalloc_range(vma, cam->frame[i].bufmem, 0)))
+		goto out;
 
 	vma->vm_ops = &zc0301_vm_ops;
 	vma->vm_private_data = &cam->frame[i];
 
 	zc0301_vm_open(vma);
 
+out:
 	mutex_unlock(&cam->fileop_mutex);
-
-	return 0;
+	return ret;
 }
 
 /*****************************************************************************/
Index: linux-2.6/drivers/media/video/et61x251/et61x251_core.c
===================================================================
--- linux-2.6.orig/drivers/media/video/et61x251/et61x251_core.c
+++ linux-2.6/drivers/media/video/et61x251/et61x251_core.c
@@ -1467,27 +1467,28 @@ static int et61x251_mmap(struct file* fi
 	struct et61x251_device* cam = video_get_drvdata(video_devdata(filp));
 	unsigned long size = vma->vm_end - vma->vm_start;
 	u32 i;
+	int ret;
 
 	if (mutex_lock_interruptible(&cam->fileop_mutex))
 		return -ERESTARTSYS;
 
 	if (cam->state & DEV_DISCONNECTED) {
 		DBG(1, "Device not present");
-		mutex_unlock(&cam->fileop_mutex);
-		return -ENODEV;
+		ret = -ENODEV;
+		goto out;
 	}
 
 	if (cam->state & DEV_MISCONFIGURED) {
 		DBG(1, "The camera is misconfigured. Close and open it "
 		       "again.");
-		mutex_unlock(&cam->fileop_mutex);
-		return -EIO;
+		ret = -EIO;
+		goto out;
 	}
 
 	if (cam->io != IO_MMAP || !(vma->vm_flags & VM_WRITE) ||
 	    size != PAGE_ALIGN(cam->frame[0].buf.length)) {
-		mutex_unlock(&cam->fileop_mutex);
-		return -EINVAL;
+		ret = -EINVAL;
+		goto out;
 	}
 
 	for (i = 0; i < cam->nbuffers; i++) {
@@ -1495,23 +1496,21 @@ static int et61x251_mmap(struct file* fi
 			break;
 	}
 	if (i == cam->nbuffers) {
-		mutex_unlock(&cam->fileop_mutex);
-		return -EINVAL;
+		ret = -EINVAL;
+		goto out;
 	}
 
-	if (remap_vmalloc_range(vma, cam->frame[i].bufmem, 0)) {
-		mutex_unlock(&cam->fileop_mutex);
-		return -EAGAIN;
-	}
+	if ((ret = remap_vmalloc_range(vma, cam->frame[i].bufmem, 0)))
+		goto out;
 
 	vma->vm_ops = &et61x251_vm_ops;
 	vma->vm_private_data = &cam->frame[i];
 
 	et61x251_vm_open(vma);
 
+out:
 	mutex_unlock(&cam->fileop_mutex);
-
-	return 0;
+	return ret;
 }
 
 /*****************************************************************************/

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply

* Re: [patch 2/5] mm: deprecate vmalloc_to_pfn
From: Andrew Morton @ 2006-04-20 20:03 UTC (permalink / raw)
  To: Nick Piggin; +Cc: hch, linux-kernel, linux-mm, hugh
In-Reply-To: <20060420173616.GE21660@wotan.suse.de>

Nick Piggin <npiggin@suse.de> wrote:
>
> On Thu, Apr 20, 2006 at 06:22:40PM +0100, Christoph Hellwig wrote:
> > On Thu, Apr 20, 2006 at 07:06:30PM +0200, Nick Piggin wrote:
> > > Deprecate vmalloc_to_pfn.
> > 
> > I don't think there's any point to even keep it.  There's a trivial replcement.
> 
> It is exported, is the only thing. I tend to stick my head in the sand
> with these matters, and try to go with whatever I think will help Andrew
> merge it.
> 
> If nobody cares, I'd just as soon remove it completely.

It's been in there for a long time.  Theoretically we should mark it
deprecated, kill it in six months or so.

But vmalloc_to_page() is EXPORT_SYMBOLed, so fixing up downstream breakage
will be so trivial it's hardly worth bothering.  So let's zap vmalloc_to_pfn()
in 2.6.18.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply

* Re: Read/Write migration entries: Implement correct behavior in copy_one_pte
From: Christoph Lameter @ 2006-04-20 20:17 UTC (permalink / raw)
  To: KAMEZAWA Hiroyuki; +Cc: hugh, linux-kernel, linux-mm, akpm
In-Reply-To: <20060419123911.3bd22ab3.kamezawa.hiroyu@jp.fujitsu.com>

On Wed, 19 Apr 2006, KAMEZAWA Hiroyuki wrote:

> BTW, do we manage page table under move_vma() in right way ?

I had a look at it and it seems to be done the right way. The ptl locks
are taken and the vma information is setup before the move. 
remove_migration_ptes() will find the page both in the old and the new 
vma.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply

* Read/Write migration entries: Make mprotect() convert write migration entries to read
From: Christoph Lameter @ 2006-04-20 20:18 UTC (permalink / raw)
  To: KAMEZAWA Hiroyuki; +Cc: hugh, linux-kernel, linux-mm, akpm
In-Reply-To: <20060419123911.3bd22ab3.kamezawa.hiroyu@jp.fujitsu.com>

1. Introduce a new function make_migration_entry() to
   isolate common code between copy_pte_range and change_pte_range.

2. Modify change_pte_range() to check for a migration entry.
   If a write migration entry is found and there is a request for
   a READ permissions then change the migration entry.

I am a bit concerned about the check of newprot. Are there other
values than PAGE_READONLY that indicate read only access?

Signed-off-by: Christoph Lameter <clameter@sgi.com>

Index: linux-2.6.17-rc1-mm3/mm/memory.c
===================================================================
--- linux-2.6.17-rc1-mm3.orig/mm/memory.c	2006-04-18 11:09:23.252982000 -0700
+++ linux-2.6.17-rc1-mm3/mm/memory.c	2006-04-20 12:22:50.626800376 -0700
@@ -447,14 +447,11 @@
 			}
 			if (is_migration_entry(entry) &&
 					is_cow_mapping(vm_flags)) {
-				page = migration_entry_to_page(entry);
-
 				/*
 				 * COW mappings require pages in both parent
-				*  and child to be set to read.
+				 * and child to be set to read.
 				 */
-				entry = make_migration_entry(page,
-						SWP_MIGRATION_READ);
+				make_migration_entry_read(&entry);
 				pte = swp_entry_to_pte(entry);
 				set_pte_at(src_mm, addr, src_pte, pte);
 			}
Index: linux-2.6.17-rc1-mm3/mm/mprotect.c
===================================================================
--- linux-2.6.17-rc1-mm3.orig/mm/mprotect.c	2006-04-18 11:12:30.614603000 -0700
+++ linux-2.6.17-rc1-mm3/mm/mprotect.c	2006-04-20 12:17:03.771210036 -0700
@@ -19,6 +19,8 @@
 #include <linux/mempolicy.h>
 #include <linux/personality.h>
 #include <linux/syscalls.h>
+#include <linux/swap.h>
+#include <linux/swapops.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -28,22 +30,35 @@
 static void change_pte_range(struct mm_struct *mm, pmd_t *pmd,
 		unsigned long addr, unsigned long end, pgprot_t newprot)
 {
-	pte_t *pte;
+	pte_t *pte, oldpte;
 	spinlock_t *ptl;
 
 	pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
 	do {
-		if (pte_present(*pte)) {
+		oldpte = *pte;
+		if (pte_present(oldpte)) {
 			pte_t ptent;
 
 			/* Avoid an SMP race with hardware updated dirty/clean
 			 * bits by wiping the pte and then setting the new pte
 			 * into place.
 			 */
-			ptent = pte_modify(ptep_get_and_clear(mm, addr, pte), newprot);
+			ptent = pte_modify(ptep_get_and_clear(mm, addr, pte),
+								newprot);
 			set_pte_at(mm, addr, pte, ptent);
 			lazy_mmu_prot_update(ptent);
+		} else
+		if (!pte_file(oldpte) && pgprot_val(newprot) ==
+						 pgprot_val(PAGE_READONLY)) {
+			swp_entry_t entry = pte_to_swp_entry(oldpte);
+
+			if (is_write_migration_entry(entry)) {
+				make_migration_entry_read(&entry);
+				set_pte_at(mm, addr, pte,
+					swp_entry_to_pte(entry));
+			}
 		}
+
 	} while (pte++, addr += PAGE_SIZE, addr != end);
 	pte_unmap_unlock(pte - 1, ptl);
 }
Index: linux-2.6.17-rc1-mm3/include/linux/swapops.h
===================================================================
--- linux-2.6.17-rc1-mm3.orig/include/linux/swapops.h	2006-04-18 10:58:33.675573000 -0700
+++ linux-2.6.17-rc1-mm3/include/linux/swapops.h	2006-04-20 12:00:29.279539838 -0700
@@ -98,6 +98,11 @@
 	return p;
 }
 
+static inline void make_migration_entry_read(swp_entry_t *entry)
+{
+	*entry = swp_entry(SWP_MIGRATION_READ, swp_offset(*entry));
+}
+
 extern void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
 					unsigned long address);
 #else
@@ -105,6 +110,7 @@
 #define make_migration_entry(page, write) swp_entry(0, 0)
 #define is_migration_entry(swp) 0
 #define migration_entry_to_page(swp) NULL
+static inline void make_migration_entry_read(entryp) { }
 static inline void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
 					 unsigned long address) { }
 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply

* [PATCH/RFC] Page Cache Policy V0.0 0/5 Overview
From: Lee Schermerhorn @ 2006-04-20 20:39 UTC (permalink / raw)
  To: linux-mm; +Cc: Christoph Lameter, Andi Kleen, Eric Whitney

Resend with subject!

Page Cache Policy V0.0 0/5 Overview

Work in progress -- for comment.  Christoph wanted to see
this addressed before migrate-on-fault goes any farther.
So, here's a cut.  Series to follow...

Note:  tested atop recently posted add-shmem-migratepage-a_op
patch on 2.6.17-rc1-mm2
----------------------

Basic "problem":  currently [2.6.17-rc1], file mmap()ed SHARED
do not follow policy applied to the mapped regions.  Instead, 
shared file backed pages are allocated using the allocating
tasks' task policy.  This is inconsistent with the way that anon
and shmem pages are handled.

One reason for this is that down where pages are allocated for
file backed pages, the faulting (mm, vma, address) are not 
available to compute the policy.  However, we do have the inode
[via the address space] and file index/offset available.  If the
applicable policy could be determined from just this info, the
vma and address would not be required.

The following series of patches against 2.6.17-rc1-mm2 implements
numa memory policy for shared, mmap()ed files.   Because files
mmap()ed SHARED are shared between tasks just like shared memory
regions, I've used the shared_policy infrastructure from shmem.
This infrastructure applies policies directly to ranges of a file
using a prio tree.

The patches break out as follows:

1 - add-offset-arg-to-migrate_pages_to

	A minor preparatory patch:  adds the page offset/index
	arg to migrate_pages_to() for properly computing nodes
	for interleaved policies.  Used by subsequent patch.

2 - move-shared-policy-to-inode

	This patch generalizes the shared_policy infrastructure
	for use by generic files.   First, it adds a shared_policy
	pointer to the struct address_space.  This pointer is
	initialized to NULL on inode allocation, indicating the
	default policy.  The shared memory subsystem is then
	modified to use the shared policy struct out of the
	address_space [a.k.a. mapping] instead of explicitly
	using one embedded in the shmem inode info struct.

	Note, however, at this point we still use the embedded
	shared_policy.  We just point the mapping spolicy pointer
	at the embedded struct at init time.

	One BIG side-effect of this patch:  we no longer split
	vm areas to apply sub-range policies if the vma has
	a set_policy vm_op.  Only shmem currently has a set_policy
	op, and it knows how to handle subranges via the prio tree.
	So, I'm proposing to adopt this semantic:  if a vma has
	set_policy() op, it must know to handle subranges and must
	have a get_policy() op that also knows how to handle sub-
	ranges.

	Tested to ensure shared policies still work for shmem.

	TODO:  check effects on numa maps of not splitting vmas.

3 - alloc-shared-policies

	This patch removes the shared_policy structs embedded in
	the shmem and hugetlbfs inode info structs, and dynamically
	allocates them, from a new kmem cache, when needed.

	Shmem will allocate a shared policy at segment init if
	the superblock [mount] specifies non-default policy.
	Otherwise, the shared_policy struct will only be allocated
	if a task mbind()s a range of the segment.

	Hugetlbfs just leaves the spolicy pointer NULL [default].
	It will be allocated by the shmem set_policy() vm_op if
	a task mbinds a range of the hugetlb segment.

4 - generic-file-policy-vm-ops

	This patch clones the shmem set/get_policy vm_ops for use
	by generic mmap()ed files.  The functions are added to the
	generic_file_vm_ops struct. These functions operate on the
	shared_policy prio tree associated with the inode, allocating
	one if necessary.

	Note:   these turned out to be indentical in all but name to
	the shmem '_policy ops.  Maybe eliminate one copy and share?

5 - use-file-policy-for-page-cache

	This patch enhances page_cache_alloc[_cold]() to take an
	offset/index argument.  It uses this to lookup the policy
	using a new function get_file_policy() which is just a
	wrapper around mpol_shared_policy_lookup().  If the inode's
	[mapping's] shared_policy pointer is NULL, just returns the
	default policy.

	Then page_cache_alloc[_cold]() calls a new function,
	alloc_page_pol() to evaluate the policy [at a specified
	offset] and allocate an appropriate page.  alloc_page_pol()
	shares some code with alloc_page_vma(), so this area is
	reworked to minimize duplication.  

	All callers of page_cache_alloc[_cold]() are modified to
	pass the file index/offset for which a page is requested.
	The index/offset is available at all call sites as it will
	be used to insert the page into the mapping's radix tree.

Cursory testing with memtoy for shm segments, shared and privately
mapped files; single task and 2 tasks mmap()ing same file.  When
the file is mmap()ed shared, either task's policy changes are seen
by both tasks.  When one maps shared and the other private, the
private mapper's policies apply only to its mapping.

Lots more testing needed.

Lee Schermerhorn




--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply

* [PATCH/RFC] Page Cache Policy V0.0 1/5 add offset arg to migrate_pages_to()
From: Lee Schermerhorn @ 2006-04-20 20:41 UTC (permalink / raw)
  To: linux-mm; +Cc: Christoph Lameter, Andi Kleen, Eric Whitney

Page Cache Policy V0.0 1/5 add offset arg to migrate_pages_to()

This patch adds a page offset arg to migrate_pages_to() for
use in selecting nodes from which to allocate for regions with
interleave policy.   This is needed to calculate the correct
node for shmem and generic mmap()ed files using the shared
policy infrastructure [subsequent patches]

Signed-off-by:  Lee Schermerhorn <lee.schermerhorn@hp.com>

Index: linux-2.6.17-rc1-mm2/include/linux/migrate.h
===================================================================
--- linux-2.6.17-rc1-mm2.orig/include/linux/migrate.h	2006-04-20 12:04:21.000000000 -0400
+++ linux-2.6.17-rc1-mm2/include/linux/migrate.h	2006-04-20 12:04:51.000000000 -0400
@@ -12,8 +12,8 @@ extern void migrate_page_copy(struct pag
 extern int migrate_page_remove_references(struct page *, struct page *, int);
 extern int migrate_pages(struct list_head *l, struct list_head *t,
 		struct list_head *moved, struct list_head *failed);
-extern int migrate_pages_to(struct list_head *pagelist,
-			struct vm_area_struct *vma, int dest);
+extern int migrate_pages_to(struct list_head *, struct vm_area_struct *,
+		int, unsigned long);
 extern int fail_migrate_page(struct page *, struct page *);
 
 extern int migrate_prep(void);
Index: linux-2.6.17-rc1-mm2/mm/mempolicy.c
===================================================================
--- linux-2.6.17-rc1-mm2.orig/mm/mempolicy.c	2006-04-20 12:04:21.000000000 -0400
+++ linux-2.6.17-rc1-mm2/mm/mempolicy.c	2006-04-20 12:05:35.000000000 -0400
@@ -604,7 +604,7 @@ int migrate_to_node(struct mm_struct *mm
 			flags | MPOL_MF_DISCONTIG_OK, &pagelist);
 
 	if (!list_empty(&pagelist)) {
-		err = migrate_pages_to(&pagelist, NULL, dest);
+		err = migrate_pages_to(&pagelist, NULL, dest, 0L);
 		if (!list_empty(&pagelist))
 			putback_lru_pages(&pagelist);
 	}
@@ -767,7 +767,8 @@ long do_mbind(unsigned long start, unsig
 		err = mbind_range(vma, start, end, new);
 
 		if (!list_empty(&pagelist))
-			nr_failed = migrate_pages_to(&pagelist, vma, -1);
+			nr_failed = migrate_pages_to(&pagelist, vma, -1,
+					start - vma->vm_start);
 
 		if (!err && nr_failed && (flags & MPOL_MF_STRICT))
 			err = -EIO;
Index: linux-2.6.17-rc1-mm2/mm/migrate.c
===================================================================
--- linux-2.6.17-rc1-mm2.orig/mm/migrate.c	2006-04-20 12:04:21.000000000 -0400
+++ linux-2.6.17-rc1-mm2/mm/migrate.c	2006-04-20 12:04:51.000000000 -0400
@@ -584,14 +584,13 @@ EXPORT_SYMBOL(buffer_migrate_page);
  * Specify destination with either non-NULL vma or dest_node >= 0
  * Return the number of pages not migrated or error code
  */
-int migrate_pages_to(struct list_head *pagelist,
-			struct vm_area_struct *vma, int dest)
+int migrate_pages_to(struct list_head *pagelist, struct vm_area_struct *vma,
+			int dest, unsigned long offset)
 {
 	LIST_HEAD(newlist);
 	LIST_HEAD(moved);
 	LIST_HEAD(failed);
 	int err = 0;
-	unsigned long offset = 0;
 	int nr_pages;
 	struct page *page;
 	struct list_head *p;


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply

* [PATCH/RFC] Page Cache Policy V0.0 2/5 move shared policy to inode
From: Lee Schermerhorn @ 2006-04-20 20:46 UTC (permalink / raw)
  To: linux-mm; +Cc: Christoph Lameter, Andi Kleen, Eric Whitney

Page Cache Policy V0.0 2/5 move shared policy to inode

This patch starts the process of generalizing the shmem shared
[mem]policy infrastructure for use with generic mmap()ed files.

1) add a struct shared_policy pointer to the generic inode
   structure--actually to the address_space in i_data.
   We'll locate this via vma->vm_file->f_mapping->spolicy.

2) create a shared_policy.h header in anticipation of not
   needing all of mempolicy.h in some places that we'll
   use shared policies. 
TODO:  may not turn out to be the case.  but might be nice
       to have shared policy stuff in a separate header?

3) add [byte] start, end args to set_policy vma operation in
   anticipation of allowing multiple policies per vma for
   file/shmem mappings.  Get file/shmem policies in terms
   of start,end instead of entire vma.

4) modify mbind_range() to allow set_policy() vma ops, if
   any, to handle policies on subranges of vma.  I.e., don't
   split the vma at this level if mapping has set_policy()
   vma op set_policy() op could choose to do that.  But,
   don't need to for generic "shared policies".

   N.B. this breaks any assumptions about one policy per
   vma.

TODO:  fix up display of numamaps for vma with multiple
policy ranges.

5) modify shmem, the only existing user of shared policy
   infrastructure, to work with changes above.  At this
   point, just use the shared_policy embedded in the shmem
   inode info struct.  A later patch will dynamically
   allocate the struct when needed.

Signed-off-by:  Lee Schermerhorn <lee.schermerhorn@hp.com>

Index: linux-2.6.17-rc1-mm2/include/linux/fs.h
===================================================================
--- linux-2.6.17-rc1-mm2.orig/include/linux/fs.h	2006-04-20 12:04:21.000000000 -0400
+++ linux-2.6.17-rc1-mm2/include/linux/fs.h	2006-04-20 12:05:51.000000000 -0400
@@ -391,6 +391,9 @@ struct address_space {
 	struct address_space_operations *a_ops;	/* methods */
 	unsigned long		flags;		/* error bits/gfp mask */
 	struct backing_dev_info *backing_dev_info; /* device readahead, etc */
+//TODO:  #ifdef CONFIG_NUMA ???
+	struct shared_policy	*spolicy;
+
 	spinlock_t		private_lock;	/* for use by the address_space */
 	struct list_head	private_list;	/* ditto */
 	struct address_space	*assoc_mapping;	/* ditto */
Index: linux-2.6.17-rc1-mm2/include/linux/mempolicy.h
===================================================================
--- linux-2.6.17-rc1-mm2.orig/include/linux/mempolicy.h	2006-04-20 12:04:21.000000000 -0400
+++ linux-2.6.17-rc1-mm2/include/linux/mempolicy.h	2006-04-20 13:05:27.000000000 -0400
@@ -31,11 +31,11 @@
 #include <linux/config.h>
 #include <linux/mmzone.h>
 #include <linux/slab.h>
-#include <linux/rbtree.h>
 #include <linux/spinlock.h>
 #include <linux/nodemask.h>
 
 struct vm_area_struct;
+#include <linux/shared_policy.h>
 
 #ifdef CONFIG_NUMA
 
@@ -113,34 +113,6 @@ static inline int mpol_equal(struct memp
 
 #define mpol_set_vma_default(vma) ((vma)->vm_policy = NULL)
 
-/*
- * Tree of shared policies for a shared memory region.
- * Maintain the policies in a pseudo mm that contains vmas. The vmas
- * carry the policy. As a special twist the pseudo mm is indexed in pages, not
- * bytes, so that we can work with shared memory segments bigger than
- * unsigned long.
- */
-
-struct sp_node {
-	struct rb_node nd;
-	unsigned long start, end;
-	struct mempolicy *policy;
-};
-
-struct shared_policy {
-	struct rb_root root;
-	spinlock_t lock;
-};
-
-void mpol_shared_policy_init(struct shared_policy *info, int policy,
-				nodemask_t *nodes);
-int mpol_set_shared_policy(struct shared_policy *info,
-				struct vm_area_struct *vma,
-				struct mempolicy *new);
-void mpol_free_shared_policy(struct shared_policy *p);
-struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp,
-					    unsigned long idx);
-
 extern void numa_default_policy(void);
 extern void numa_policy_init(void);
 extern void mpol_rebind_policy(struct mempolicy *pol, const nodemask_t *new);
@@ -200,30 +172,6 @@ static inline struct mempolicy *mpol_cop
 	return NULL;
 }
 
-struct shared_policy {};
-
-static inline int mpol_set_shared_policy(struct shared_policy *info,
-					struct vm_area_struct *vma,
-					struct mempolicy *new)
-{
-	return -EINVAL;
-}
-
-static inline void mpol_shared_policy_init(struct shared_policy *info,
-					int policy, nodemask_t *nodes)
-{
-}
-
-static inline void mpol_free_shared_policy(struct shared_policy *p)
-{
-}
-
-static inline struct mempolicy *
-mpol_shared_policy_lookup(struct shared_policy *sp, unsigned long idx)
-{
-	return NULL;
-}
-
 #define vma_policy(vma) NULL
 #define vma_set_policy(vma, pol) do {} while(0)
 
Index: linux-2.6.17-rc1-mm2/include/linux/shared_policy.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.17-rc1-mm2/include/linux/shared_policy.h	2006-04-20 14:13:48.000000000 -0400
@@ -0,0 +1,65 @@
+#ifndef _LINUX_SHARED_POLICY_H
+#define _LINUX_SHARED_POLICY_H 1
+
+#include <linux/rbtree.h>
+
+/*
+ * Tree of shared policies for a shared memory regions and memory
+ * mapped files.
+TODO:  wean the low level shared policies from the notion of vmas.
+       just use inode, offset, length
+ * Maintain the policies in a pseudo mm that contains vmas. The vmas
+ * carry the policy. As a special twist the pseudo mm is indexed in pages, not
+ * bytes, so that we can work with shared memory segments bigger than
+ * unsigned long.
+ */
+
+#ifdef CONFIG_NUMA
+
+struct sp_node {
+	struct rb_node nd;
+	unsigned long start, end;
+	struct mempolicy *policy;
+};
+
+struct shared_policy {
+	struct rb_root root;
+	spinlock_t lock;
+};
+
+void mpol_shared_policy_init(struct shared_policy *, int, nodemask_t *);
+int mpol_set_shared_policy(struct shared_policy *,
+				struct vm_area_struct *,
+				unsigned long, unsigned long,
+				struct mempolicy *);
+void mpol_free_shared_policy(struct shared_policy *);
+struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *,
+					    unsigned long);
+
+#else /* !NUMA */
+
+struct shared_policy {};
+
+static inline int mpol_set_shared_policy(struct shared_policy *info,
+					struct vm_area_struct *vma,
+					struct mempolicy *new)
+{
+	return -EINVAL;
+}
+static inline void mpol_shared_policy_init(struct shared_policy *info,
+					int policy, nodemask_t *nodes)
+{
+}
+
+static inline void mpol_free_shared_policy(struct shared_policy *p)
+{
+}
+
+static inline struct mempolicy *
+mpol_shared_policy_lookup(struct shared_policy *sp, unsigned long idx)
+{
+	return NULL;
+}
+#endif
+
+#endif /* _LINUX_SHARED_POLICY_H */
Index: linux-2.6.17-rc1-mm2/include/linux/mm.h
===================================================================
--- linux-2.6.17-rc1-mm2.orig/include/linux/mm.h	2006-04-20 12:04:21.000000000 -0400
+++ linux-2.6.17-rc1-mm2/include/linux/mm.h	2006-04-20 14:18:46.000000000 -0400
@@ -201,9 +201,10 @@ struct vm_operations_struct {
 	struct page * (*nopage)(struct vm_area_struct * area, unsigned long address, int *type);
 	int (*populate)(struct vm_area_struct * area, unsigned long address, unsigned long len, pgprot_t prot, unsigned long pgoff, int nonblock);
 #ifdef CONFIG_NUMA
-	int (*set_policy)(struct vm_area_struct *vma, struct mempolicy *new);
-	struct mempolicy *(*get_policy)(struct vm_area_struct *vma,
-					unsigned long addr);
+	int (*set_policy)(struct vm_area_struct *, unsigned long,
+				unsigned long, struct mempolicy *);
+	struct mempolicy *(*get_policy)(struct vm_area_struct *,
+					unsigned long);
 #endif
 };
 
@@ -648,7 +649,8 @@ extern void show_free_areas(void);
 #ifdef CONFIG_SHMEM
 struct page *shmem_nopage(struct vm_area_struct *vma,
 			unsigned long address, int *type);
-int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *new);
+int shmem_set_policy(struct vm_area_struct *, unsigned long, unsigned long,
+			 struct mempolicy *);
 struct mempolicy *shmem_get_policy(struct vm_area_struct *vma,
 					unsigned long addr);
 int shmem_lock(struct file *file, int lock, struct user_struct *user);
@@ -1051,6 +1053,12 @@ static inline unsigned long vma_pages(st
 	return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
 }
 
+static inline pgoff_t vma_addr_to_pgoff(struct vm_area_struct *vma,
+		unsigned long addr, int shift)
+{
+	return ((addr - vma->vm_start) >> shift) + vma->vm_pgoff;
+}
+
 struct vm_area_struct *find_extend_vma(struct mm_struct *, unsigned long addr);
 struct page *vmalloc_to_page(void *addr);
 unsigned long vmalloc_to_pfn(void *addr);
Index: linux-2.6.17-rc1-mm2/mm/mempolicy.c
===================================================================
--- linux-2.6.17-rc1-mm2.orig/mm/mempolicy.c	2006-04-20 12:05:35.000000000 -0400
+++ linux-2.6.17-rc1-mm2/mm/mempolicy.c	2006-04-20 14:13:48.000000000 -0400
@@ -368,20 +368,28 @@ check_range(struct mm_struct *mm, unsign
 	return first;
 }
 
-/* Apply policy to a single VMA */
-static int policy_vma(struct vm_area_struct *vma, struct mempolicy *new)
+/*
+ * Apply policy to a single VMA, or a subrange thereof
+ */
+static int policy_vma(struct vm_area_struct *vma, unsigned long start,
+			unsigned long end, struct mempolicy *new)
 {
 	int err = 0;
-	struct mempolicy *old = vma->vm_policy;
 
 	PDprintk("vma %lx-%lx/%lx vm_ops %p vm_file %p set_policy %p\n",
-		 vma->vm_start, vma->vm_end, vma->vm_pgoff,
+		 start, end,
+		 vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT),
 		 vma->vm_ops, vma->vm_file,
 		 vma->vm_ops ? vma->vm_ops->set_policy : NULL);
 
+	/*
+	 * set_policy op, if exists, is responsible for policy
+	 * reference counts.
+	 */
 	if (vma->vm_ops && vma->vm_ops->set_policy)
-		err = vma->vm_ops->set_policy(vma, new);
-	if (!err) {
+		err = vma->vm_ops->set_policy(vma, start, end, new);
+	else {
+		struct mempolicy *old = vma->vm_policy;
 		mpol_get(new);
 		vma->vm_policy = new;
 		mpol_free(old);
@@ -398,13 +406,24 @@ static int mbind_range(struct vm_area_st
 
 	err = 0;
 	for (; vma && vma->vm_start < end; vma = next) {
+		unsigned long eend = min(end, vma->vm_end);
 		next = vma->vm_next;
+		if (vma->vm_ops && vma->vm_ops->set_policy) {
+			/*
+			 * set_policy op handles policies on
+			 * sub-range of vma
+			 */
+			err = policy_vma(vma, start, eend, new);
+			if (err)
+				break;
+			continue;
+		}
 		if (vma->vm_start < start)
 			err = split_vma(vma->vm_mm, vma, start, 1);
 		if (!err && vma->vm_end > end)
-			err = split_vma(vma->vm_mm, vma, end, 0);
+			err = split_vma(vma->vm_mm, vma, eend, 0);
 		if (!err)
-			err = policy_vma(vma, new);
+			err = policy_vma(vma, start, eend, new);
 		if (err)
 			break;
 	}
@@ -1410,7 +1429,7 @@ mpol_shared_policy_lookup(struct shared_
 	struct mempolicy *pol = NULL;
 	struct sp_node *sn;
 
-	if (!sp->root.rb_node)
+	if (!sp || !sp->root.rb_node)
 		return NULL;
 	spin_lock(&sp->lock);
 	sn = sp_lookup(sp, idx, idx+1);
@@ -1492,11 +1511,12 @@ restart:
 	return 0;
 }
 
-void mpol_shared_policy_init(struct shared_policy *info, int policy,
+void mpol_shared_policy_init(struct shared_policy *sp, int policy,
 				nodemask_t *policy_nodes)
 {
-	info->root = RB_ROOT;
-	spin_lock_init(&info->lock);
+
+	sp->root = RB_ROOT;
+	spin_lock_init(&sp->lock);
 
 	if (policy != MPOL_DEFAULT) {
 		struct mempolicy *newpol;
@@ -1510,53 +1530,58 @@ void mpol_shared_policy_init(struct shar
 			memset(&pvma, 0, sizeof(struct vm_area_struct));
 			/* Policy covers entire file */
 			pvma.vm_end = TASK_SIZE;
-			mpol_set_shared_policy(info, &pvma, newpol);
+			mpol_set_shared_policy(sp, &pvma, 0UL, pvma.vm_end,
+						newpol);
 			mpol_free(newpol);
 		}
 	}
 }
 
-int mpol_set_shared_policy(struct shared_policy *info,
-			struct vm_area_struct *vma, struct mempolicy *npol)
+int mpol_set_shared_policy(struct shared_policy *sp,
+			struct vm_area_struct *vma,
+			unsigned long start, unsigned long end,
+			struct mempolicy *npol)
 {
 	int err;
 	struct sp_node *new = NULL;
-	unsigned long sz = vma_pages(vma);
+	unsigned long sz = (end - start) >> PAGE_SHIFT;
+	pgoff_t pgoff = vma->vm_pgoff;
+	pgoff += (start - vma->vm_start) >> PAGE_SHIFT;
 
 	PDprintk("set_shared_policy %lx sz %lu %d %lx\n",
-		 vma->vm_pgoff,
+		 pgoff,
 		 sz, npol? npol->policy : -1,
 		npol ? nodes_addr(npol->v.nodes)[0] : -1);
 
 	if (npol) {
-		new = sp_alloc(vma->vm_pgoff, vma->vm_pgoff + sz, npol);
+		new = sp_alloc(pgoff, pgoff + sz, npol);
 		if (!new)
 			return -ENOMEM;
 	}
-	err = shared_policy_replace(info, vma->vm_pgoff, vma->vm_pgoff+sz, new);
+	err = shared_policy_replace(sp, pgoff, pgoff+sz, new);
 	if (err && new)
 		kmem_cache_free(sn_cache, new);
 	return err;
 }
 
 /* Free a backing policy store on inode delete. */
-void mpol_free_shared_policy(struct shared_policy *p)
+void mpol_free_shared_policy(struct shared_policy *sp)
 {
 	struct sp_node *n;
 	struct rb_node *next;
 
-	if (!p->root.rb_node)
+	if (!sp->root.rb_node)
 		return;
-	spin_lock(&p->lock);
-	next = rb_first(&p->root);
+	spin_lock(&sp->lock);
+	next = rb_first(&sp->root);
 	while (next) {
 		n = rb_entry(next, struct sp_node, nd);
 		next = rb_next(&n->nd);
-		rb_erase(&n->nd, &p->root);
+		rb_erase(&n->nd, &sp->root);
 		mpol_free(n->policy);
 		kmem_cache_free(sn_cache, n);
 	}
-	spin_unlock(&p->lock);
+	spin_unlock(&sp->lock);
 }
 
 /* assumes fs == KERNEL_DS */
Index: linux-2.6.17-rc1-mm2/mm/shmem.c
===================================================================
--- linux-2.6.17-rc1-mm2.orig/mm/shmem.c	2006-04-20 12:04:21.000000000 -0400
+++ linux-2.6.17-rc1-mm2/mm/shmem.c	2006-04-20 14:18:12.000000000 -0400
@@ -922,7 +922,7 @@ out:
 	return err;
 }
 
-static struct page *shmem_swapin_async(struct shared_policy *p,
+static struct page *shmem_swapin_async(struct shared_policy *sp,
 				       swp_entry_t entry, unsigned long idx)
 {
 	struct page *page;
@@ -932,41 +932,40 @@ static struct page *shmem_swapin_async(s
 	memset(&pvma, 0, sizeof(struct vm_area_struct));
 	pvma.vm_end = PAGE_SIZE;
 	pvma.vm_pgoff = idx;
-	pvma.vm_policy = mpol_shared_policy_lookup(p, idx);
+	pvma.vm_policy = mpol_shared_policy_lookup(sp, idx);
 	page = read_swap_cache_async(entry, &pvma, 0);
 	mpol_free(pvma.vm_policy);
 	return page;
 }
 
-struct page *shmem_swapin(struct shmem_inode_info *info, swp_entry_t entry,
-			  unsigned long idx)
+struct page *shmem_swapin(struct shared_policy *sp,
+				swp_entry_t entry, unsigned long idx)
 {
-	struct shared_policy *p = &info->policy;
 	int i, num;
 	struct page *page;
 	unsigned long offset;
 
 	num = valid_swaphandles(entry, &offset);
 	for (i = 0; i < num; offset++, i++) {
-		page = shmem_swapin_async(p,
+		page = shmem_swapin_async(sp,
 				swp_entry(swp_type(entry), offset), idx);
 		if (!page)
 			break;
 		page_cache_release(page);
 	}
 	lru_add_drain();	/* Push any new pages onto the LRU now */
-	return shmem_swapin_async(p, entry, idx);
+	return shmem_swapin_async(sp, entry, idx);
 }
 
 static struct page *
-shmem_alloc_page(gfp_t gfp, struct shmem_inode_info *info,
+shmem_alloc_page(gfp_t gfp, struct shared_policy *sp,
 		 unsigned long idx)
 {
 	struct vm_area_struct pvma;
 	struct page *page;
 
 	memset(&pvma, 0, sizeof(struct vm_area_struct));
-	pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, idx);
+	pvma.vm_policy = mpol_shared_policy_lookup(sp, idx);
 	pvma.vm_pgoff = idx;
 	pvma.vm_end = PAGE_SIZE;
 	page = alloc_page_vma(gfp | __GFP_ZERO, &pvma, 0);
@@ -980,14 +979,14 @@ static inline int shmem_parse_mpol(char 
 }
 
 static inline struct page *
-shmem_swapin(struct shmem_inode_info *info,swp_entry_t entry,unsigned long idx)
+shmem_swapin(void *sp,swp_entry_t entry,unsigned long idx)
 {
 	swapin_readahead(entry, 0, NULL);
 	return read_swap_cache_async(entry, NULL, 0);
 }
 
 static inline struct page *
-shmem_alloc_page(gfp_t gfp,struct shmem_inode_info *info, unsigned long idx)
+shmem_alloc_page(gfp_t gfp,void *sp, unsigned long idx)
 {
 	return alloc_page(gfp | __GFP_ZERO);
 }
@@ -1052,7 +1051,7 @@ repeat:
 				inc_page_state(pgmajfault);
 				*type = VM_FAULT_MAJOR;
 			}
-			swappage = shmem_swapin(info, swap, idx);
+			swappage = shmem_swapin(mapping->spolicy, swap, idx);
 			if (!swappage) {
 				spin_lock(&info->lock);
 				entry = shmem_swp_alloc(info, idx, sgp);
@@ -1173,7 +1172,7 @@ repeat:
 		if (!filepage) {
 			spin_unlock(&info->lock);
 			filepage = shmem_alloc_page(mapping_gfp_mask(mapping),
-						    info,
+						    mapping->spolicy,
 						    idx);
 			if (!filepage) {
 				shmem_unacct_blocks(info->flags, 1);
@@ -1292,20 +1291,18 @@ static int shmem_populate(struct vm_area
 }
 
 #ifdef CONFIG_NUMA
-int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *new)
+int shmem_set_policy(struct vm_area_struct *vma, unsigned long start,
+			unsigned long end, struct mempolicy *new)
 {
-	struct inode *i = vma->vm_file->f_dentry->d_inode;
-	return mpol_set_shared_policy(&SHMEM_I(i)->policy, vma, new);
+	return mpol_set_shared_policy(vma->vm_file->f_mapping->spolicy,
+					 vma, start, end, new);
 }
 
 struct mempolicy *
 shmem_get_policy(struct vm_area_struct *vma, unsigned long addr)
 {
-	struct inode *i = vma->vm_file->f_dentry->d_inode;
-	unsigned long idx;
-
-	idx = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
-	return mpol_shared_policy_lookup(&SHMEM_I(i)->policy, idx);
+	return mpol_shared_policy_lookup(vma->vm_file->f_mapping->spolicy,
+		 vma_addr_to_pgoff(vma, addr, PAGE_SHIFT));
 }
 #endif
 
@@ -1377,8 +1374,9 @@ shmem_get_inode(struct super_block *sb, 
 		case S_IFREG:
 			inode->i_op = &shmem_inode_operations;
 			inode->i_fop = &shmem_file_operations;
-			mpol_shared_policy_init(&info->policy, sbinfo->policy,
-							&sbinfo->policy_nodes);
+			inode->i_mapping->spolicy = &info->policy;
+			mpol_shared_policy_init(inode->i_mapping->spolicy,
+					 sbinfo->policy, &sbinfo->policy_nodes);
 			break;
 		case S_IFDIR:
 			inode->i_nlink++;
@@ -1392,8 +1390,9 @@ shmem_get_inode(struct super_block *sb, 
 			 * Must not load anything in the rbtree,
 			 * mpol_free_shared_policy will not be called.
 			 */
-			mpol_shared_policy_init(&info->policy, MPOL_DEFAULT,
-						NULL);
+			inode->i_mapping->spolicy = &info->policy;
+			mpol_shared_policy_init(inode->i_mapping->spolicy,
+					 MPOL_DEFAULT, NULL);
 			break;
 		}
 	} else if (sbinfo->max_inodes) {
@@ -2136,7 +2135,7 @@ static void shmem_destroy_inode(struct i
 {
 	if ((inode->i_mode & S_IFMT) == S_IFREG) {
 		/* only struct inode is valid if it's an inline symlink */
-		mpol_free_shared_policy(&SHMEM_I(inode)->policy);
+		mpol_free_shared_policy(inode->i_mapping->spolicy);
 	}
 	kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode));
 }


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply

* [PATCH/RFC]  Page Cache Policy V0.0 3/5 alloc shared policies
From: Lee Schermerhorn @ 2006-04-20 20:47 UTC (permalink / raw)
  To: linux-mm; +Cc: Christoph Lameter, Andi Kleen, Eric Whitney

Page Cache Policy V0.0 3/5 alloc shared policies

Dynamically allocate shared policy structs to inode, as needed.

Initialize shmem and hugetlbfs inode/address_space spolicy
pointer to null, unless superblock [mount] specifies a 
non-default policy.  Make mpol_shared_policy_lookup()
just return NULL if spolicy ptr is NULL.  This will be
treated as default policy [or fallback to task policy?].

set_policy() ops must create shared_policy struct from new
cache when a new policy is installed and no spolicy exists.
mpol_free_shared_policy() must free the spolicy when inode
is destroyed.

Signed-off-by:  Lee Schermerhorn <lee.schermerhorn@hp.com>

Index: linux-2.6.17-rc1-mm2/include/linux/shared_policy.h
===================================================================
--- linux-2.6.17-rc1-mm2.orig/include/linux/shared_policy.h	2006-04-20 14:13:48.000000000 -0400
+++ linux-2.6.17-rc1-mm2/include/linux/shared_policy.h	2006-04-20 14:19:14.000000000 -0400
@@ -27,12 +27,12 @@ struct shared_policy {
 	spinlock_t lock;
 };
 
-void mpol_shared_policy_init(struct shared_policy *, int, nodemask_t *);
-int mpol_set_shared_policy(struct shared_policy *,
+extern struct shared_policy *mpol_shared_policy_new(int, nodemask_t *);
+extern int mpol_set_shared_policy(struct shared_policy *,
 				struct vm_area_struct *,
 				unsigned long, unsigned long,
 				struct mempolicy *);
-void mpol_free_shared_policy(struct shared_policy *);
+extern void mpol_free_shared_policy(struct shared_policy **);
 struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *,
 					    unsigned long);
 
@@ -51,7 +51,7 @@ static inline void mpol_shared_policy_in
 {
 }
 
-static inline void mpol_free_shared_policy(struct shared_policy *p)
+static inline void mpol_free_shared_policy(struct shared_policy **p)
 {
 }
 
Index: linux-2.6.17-rc1-mm2/mm/mempolicy.c
===================================================================
--- linux-2.6.17-rc1-mm2.orig/mm/mempolicy.c	2006-04-20 14:13:48.000000000 -0400
+++ linux-2.6.17-rc1-mm2/mm/mempolicy.c	2006-04-20 14:19:14.000000000 -0400
@@ -97,6 +97,7 @@
 #define MPOL_MF_STATS (MPOL_MF_INTERNAL << 2)		/* Gather statistics */
 
 static struct kmem_cache *policy_cache;
+static struct kmem_cache *sp_cache;
 static struct kmem_cache *sn_cache;
 
 #define PDprintk(fmt...)
@@ -1511,10 +1512,14 @@ restart:
 	return 0;
 }
 
-void mpol_shared_policy_init(struct shared_policy *sp, int policy,
+struct shared_policy *mpol_shared_policy_new(int policy,
 				nodemask_t *policy_nodes)
 {
+	struct shared_policy *sp;
 
+	sp = kmem_cache_alloc(sp_cache, GFP_KERNEL);
+	if (!sp)
+		return NULL;
 	sp->root = RB_ROOT;
 	spin_lock_init(&sp->lock);
 
@@ -1535,6 +1540,7 @@ void mpol_shared_policy_init(struct shar
 			mpol_free(newpol);
 		}
 	}
+	return sp;
 }
 
 int mpol_set_shared_policy(struct shared_policy *sp,
@@ -1565,13 +1571,17 @@ int mpol_set_shared_policy(struct shared
 }
 
 /* Free a backing policy store on inode delete. */
-void mpol_free_shared_policy(struct shared_policy *sp)
+void mpol_free_shared_policy(struct shared_policy **spp)
 {
+	struct shared_policy *sp = *spp;
 	struct sp_node *n;
 	struct rb_node *next;
 
-	if (!sp->root.rb_node)
+	if (!sp || !sp->root.rb_node)
 		return;
+
+//TODO:   locking should be unnecessary as we're only called when
+//        destroying the inode
 	spin_lock(&sp->lock);
 	next = rb_first(&sp->root);
 	while (next) {
@@ -1582,6 +1592,8 @@ void mpol_free_shared_policy(struct shar
 		kmem_cache_free(sn_cache, n);
 	}
 	spin_unlock(&sp->lock);
+	kmem_cache_free(sp_cache, sp);
+	*spp = NULL;
 }
 
 /* assumes fs == KERNEL_DS */
@@ -1591,6 +1603,10 @@ void __init numa_policy_init(void)
 					 sizeof(struct mempolicy),
 					 0, SLAB_PANIC, NULL, NULL);
 
+	sp_cache = kmem_cache_create("shared_policy",
+				     sizeof(struct shared_policy),
+				     0, SLAB_PANIC, NULL, NULL);
+
 	sn_cache = kmem_cache_create("shared_policy_node",
 				     sizeof(struct sp_node),
 				     0, SLAB_PANIC, NULL, NULL);
Index: linux-2.6.17-rc1-mm2/mm/shmem.c
===================================================================
--- linux-2.6.17-rc1-mm2.orig/mm/shmem.c	2006-04-20 14:18:12.000000000 -0400
+++ linux-2.6.17-rc1-mm2/mm/shmem.c	2006-04-20 14:20:20.000000000 -0400
@@ -877,7 +877,8 @@ redirty:
 }
 
 #ifdef CONFIG_NUMA
-static inline int shmem_parse_mpol(char *value, int *policy, nodemask_t *policy_nodes)
+static inline int shmem_parse_mpol(char *value, int *policy,
+				 nodemask_t *policy_nodes)
 {
 	char *nodelist = strchr(value, ':');
 	int err = 1;
@@ -1294,15 +1295,24 @@ static int shmem_populate(struct vm_area
 int shmem_set_policy(struct vm_area_struct *vma, unsigned long start,
 			unsigned long end, struct mempolicy *new)
 {
-	return mpol_set_shared_policy(vma->vm_file->f_mapping->spolicy,
-					 vma, start, end, new);
+	struct shared_policy *sp = vma->vm_file->f_mapping->spolicy;
+
+	if (!sp) {
+		sp = mpol_shared_policy_new(MPOL_DEFAULT, NULL);
+		vma->vm_file->f_mapping->spolicy = sp;
+	}
+	return mpol_set_shared_policy(sp, vma, start, end, new);
 }
 
 struct mempolicy *
 shmem_get_policy(struct vm_area_struct *vma, unsigned long addr)
 {
-	return mpol_shared_policy_lookup(vma->vm_file->f_mapping->spolicy,
-		 vma_addr_to_pgoff(vma, addr, PAGE_SHIFT));
+	struct shared_policy *sp = vma->vm_file->f_mapping->spolicy;
+	if (!sp)
+		return NULL;
+
+	return mpol_shared_policy_lookup(sp,
+			 vma_addr_to_pgoff(vma, addr, PAGE_SHIFT));
 }
 #endif
 
@@ -1374,9 +1384,10 @@ shmem_get_inode(struct super_block *sb, 
 		case S_IFREG:
 			inode->i_op = &shmem_inode_operations;
 			inode->i_fop = &shmem_file_operations;
-			inode->i_mapping->spolicy = &info->policy;
-			mpol_shared_policy_init(inode->i_mapping->spolicy,
-					 sbinfo->policy, &sbinfo->policy_nodes);
+			if (sbinfo->policy != MPOL_DEFAULT)
+				inode->i_mapping->spolicy =
+					mpol_shared_policy_new(sbinfo->policy,
+							 &sbinfo->policy_nodes);
 			break;
 		case S_IFDIR:
 			inode->i_nlink++;
@@ -1385,15 +1396,6 @@ shmem_get_inode(struct super_block *sb, 
 			inode->i_op = &shmem_dir_inode_operations;
 			inode->i_fop = &simple_dir_operations;
 			break;
-		case S_IFLNK:
-			/*
-			 * Must not load anything in the rbtree,
-			 * mpol_free_shared_policy will not be called.
-			 */
-			inode->i_mapping->spolicy = &info->policy;
-			mpol_shared_policy_init(inode->i_mapping->spolicy,
-					 MPOL_DEFAULT, NULL);
-			break;
 		}
 	} else if (sbinfo->max_inodes) {
 		spin_lock(&sbinfo->stat_lock);
@@ -2135,7 +2137,7 @@ static void shmem_destroy_inode(struct i
 {
 	if ((inode->i_mode & S_IFMT) == S_IFREG) {
 		/* only struct inode is valid if it's an inline symlink */
-		mpol_free_shared_policy(inode->i_mapping->spolicy);
+		mpol_free_shared_policy(&inode->i_mapping->spolicy);
 	}
 	kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode));
 }
Index: linux-2.6.17-rc1-mm2/fs/hugetlbfs/inode.c
===================================================================
--- linux-2.6.17-rc1-mm2.orig/fs/hugetlbfs/inode.c	2006-04-20 14:13:48.000000000 -0400
+++ linux-2.6.17-rc1-mm2/fs/hugetlbfs/inode.c	2006-04-20 14:19:14.000000000 -0400
@@ -357,7 +357,6 @@ static struct inode *hugetlbfs_get_inode
 
 	inode = new_inode(sb);
 	if (inode) {
-		struct hugetlbfs_inode_info *info;
 		inode->i_mode = mode;
 		inode->i_uid = uid;
 		inode->i_gid = gid;
@@ -366,8 +365,6 @@ static struct inode *hugetlbfs_get_inode
 		inode->i_mapping->a_ops = &hugetlbfs_aops;
 		inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info;
 		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
-		info = HUGETLBFS_I(inode);
-		mpol_shared_policy_init(&info->policy, MPOL_DEFAULT, NULL);
 		switch (mode & S_IFMT) {
 		default:
 			init_special_inode(inode, mode, dev);
@@ -380,7 +377,10 @@ static struct inode *hugetlbfs_get_inode
 			inode->i_op = &hugetlbfs_dir_inode_operations;
 			inode->i_fop = &simple_dir_operations;
 
-			/* directory inodes start off with i_nlink == 2 (for "." entry) */
+			/*
+			 * directory inodes start off with i_nlink == 2
+			 * (for "." entry)
+			 */
 			inode->i_nlink++;
 			break;
 		case S_IFLNK:
@@ -545,7 +545,7 @@ static struct inode *hugetlbfs_alloc_ino
 static void hugetlbfs_destroy_inode(struct inode *inode)
 {
 	hugetlbfs_inc_free_inodes(HUGETLBFS_SB(inode->i_sb));
-	mpol_free_shared_policy(&HUGETLBFS_I(inode)->policy);
+	mpol_free_shared_policy(&inode->i_mapping->spolicy);
 	kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode));
 }
 
Index: linux-2.6.17-rc1-mm2/include/linux/hugetlb.h
===================================================================
--- linux-2.6.17-rc1-mm2.orig/include/linux/hugetlb.h	2006-04-20 14:13:48.000000000 -0400
+++ linux-2.6.17-rc1-mm2/include/linux/hugetlb.h	2006-04-20 14:19:14.000000000 -0400
@@ -138,7 +138,6 @@ struct hugetlbfs_sb_info {
 
 
 struct hugetlbfs_inode_info {
-	struct shared_policy policy;
 	/* Protected by the (global) hugetlb_lock */
 	unsigned long prereserved_hpages;
 	struct inode vfs_inode;
Index: linux-2.6.17-rc1-mm2/include/linux/shmem_fs.h
===================================================================
--- linux-2.6.17-rc1-mm2.orig/include/linux/shmem_fs.h	2006-04-20 14:13:48.000000000 -0400
+++ linux-2.6.17-rc1-mm2/include/linux/shmem_fs.h	2006-04-20 14:19:14.000000000 -0400
@@ -14,7 +14,6 @@ struct shmem_inode_info {
 	unsigned long		alloced;	/* data pages alloced to file */
 	unsigned long		swapped;	/* subtotal assigned to swap */
 	unsigned long		next_index;	/* highest alloced index + 1 */
-	struct shared_policy	policy;		/* NUMA memory alloc policy */
 	struct page		*i_indirect;	/* top indirect blocks page */
 	swp_entry_t		i_direct[SHMEM_NR_DIRECT]; /* first blocks */
 	struct list_head	swaplist;	/* chain of maybes on swap */
Index: linux-2.6.17-rc1-mm2/fs/inode.c
===================================================================
--- linux-2.6.17-rc1-mm2.orig/fs/inode.c	2006-04-20 14:13:48.000000000 -0400
+++ linux-2.6.17-rc1-mm2/fs/inode.c	2006-04-20 14:19:14.000000000 -0400
@@ -165,6 +165,8 @@ static struct inode *alloc_inode(struct 
 			mapping->backing_dev_info = bdi;
 		}
 		memset(&inode->u, 0, sizeof(inode->u));
+
+		mapping->spolicy = NULL;
 		inode->i_mapping = mapping;
 	}
 	return inode;


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply

* [PATCH/RFC] Page Cache Policy V0.0 4/5 add generic file set/get policy vm ops
From: Lee Schermerhorn @ 2006-04-20 20:49 UTC (permalink / raw)
  To: linux-mm; +Cc: Christoph Lameter, Andi Kleen, Eric Whitney

Page Cache Policy V0.0 4/5 add generic file set/get policy vm ops

Add set/get policy vm ops to generic_file_vm_ops in support of
mmap()ed file memory policies.

Note that these ops are identical in all but name to the shmem
policy vm ops as modified by this series.  Perhaps shmem could
use the "generic" ones if this series is eventually accepted.

Signed-off-by:  Lee Schermerhorn <lee.schermerhorn@hp.com>

Index: linux-2.6.17-rc1-mm2/mm/filemap.c
===================================================================
--- linux-2.6.17-rc1-mm2.orig/mm/filemap.c	2006-04-20 14:13:48.000000000 -0400
+++ linux-2.6.17-rc1-mm2/mm/filemap.c	2006-04-20 14:27:12.000000000 -0400
@@ -31,6 +31,7 @@
 #include <linux/security.h>
 #include <linux/syscalls.h>
 #include <linux/cpuset.h>
+#include <linux/mempolicy.h>
 #include "filemap.h"
 #include "internal.h"
 
@@ -457,6 +458,29 @@ struct page *page_cache_alloc_cold(struc
 	return alloc_pages(mapping_gfp_mask(x)|__GFP_COLD, 0);
 }
 EXPORT_SYMBOL(page_cache_alloc_cold);
+
+int generic_file_set_policy(struct vm_area_struct *vma, unsigned long start,
+			unsigned long end, struct mempolicy *new)
+{
+	struct shared_policy *sp = vma->vm_file->f_mapping->spolicy;
+
+	if (!sp) {
+		sp = mpol_shared_policy_new(MPOL_DEFAULT, NULL);
+		vma->vm_file->f_mapping->spolicy = sp;
+	}
+	return mpol_set_shared_policy(sp, vma, start, end, new);
+}
+
+struct mempolicy *
+generic_file_get_policy(struct vm_area_struct *vma, unsigned long addr)
+{
+	struct shared_policy *sp = vma->vm_file->f_mapping->spolicy;
+	if (!sp)
+		return NULL;
+
+	return mpol_shared_policy_lookup(sp,
+				 vma_addr_to_pgoff(vma, addr, PAGE_SHIFT));
+}
 #endif
 
 /*
@@ -1614,6 +1638,10 @@ EXPORT_SYMBOL(filemap_populate);
 struct vm_operations_struct generic_file_vm_ops = {
 	.nopage		= filemap_nopage,
 	.populate	= filemap_populate,
+#ifdef CONFIG_NUMA
+	.set_policy     = generic_file_set_policy,
+	.get_policy     = generic_file_get_policy,
+#endif
 };
 
 /* This is used for a general mmap of a disk file */


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply

* [PATCH/RFC] Page Cache Policy V0.0 5/5 - use file policy for page cache
From: Lee Schermerhorn @ 2006-04-20 20:50 UTC (permalink / raw)
  To: linux-mm; +Cc: Christoph Lameter, Andi Kleen, Eric Whitney

Page Cache Policy V0.0 5/5 - use file policy for page cache

This patch implements a "get_file_policy()" function, analogous
to get_vma_policy(), but for a given file[inode/mapping] at
at specified offset, using the shared_policy, if any, in the
file's address_space.  If no shared policy, returns the default
policy.

Implement alloc_page_pol() to allocate a page given a policy and
an offset.  No vma,addr needed.   alloc_page_pol() duplicated some
of the code in alloc_page_vma(), so this patch revises alloc_page_vma()
to just call alloc_page_pol() after looking up the policy.  This
change rippled into the interleaving functions.  Was able to
eliminate interleave_nid() by computing the offset at the call sites
and calling [modified] offset_il_node() directly.

	#if out interleave_nid() for now.  If noone complains,
	we can remove it.

Enhance page_cache_alloc[_cold]() to use get_file_policy() and
alloc_page_pol().  Because this would have duplicated a fair
bit of code, the patch extracts the common bits into
__page_cache_alloc() and passes the __GFP_COLD flag when called
from page_cache_alloc_cold(), 0 otherwise.

page_cache_alloc[_cold]() now take an additional offset/index
argument, available at all call sites, to lookup the appropriate
policy.  The patches fixes all in kernel users of the modified
interfaces.


Signed-off-by:  Lee Schermerhorn <lee.schermerhorn@hp.com>

Index: linux-2.6.17-rc1-mm2/mm/filemap.c
===================================================================
--- linux-2.6.17-rc1-mm2.orig/mm/filemap.c	2006-04-20 14:27:12.000000000 -0400
+++ linux-2.6.17-rc1-mm2/mm/filemap.c	2006-04-20 15:24:53.000000000 -0400
@@ -439,23 +439,48 @@ int add_to_page_cache_lru(struct page *p
 }
 
 #ifdef CONFIG_NUMA
-struct page *page_cache_alloc(struct address_space *x)
+/*
+ * Return effective policy for file [address_space] at pgoff
+ */
+static struct mempolicy *get_file_policy(struct address_space *x, pgoff_t pgoff)
+{
+	struct shared_policy *sp = x->spolicy;
+	struct mempolicy *pol = NULL;
+
+	if (sp)
+		pol = mpol_shared_policy_lookup(sp, pgoff);
+	if (pol)
+		return pol;
+
+	return &default_policy;
+}
+
+static struct page *__page_cache_alloc(struct address_space *x, pgoff_t pgoff,
+					int cold)
 {
-	if (cpuset_do_page_mem_spread()) {
+	struct mempolicy *pol = get_file_policy(x, pgoff);
+	gfp_t gfp = mapping_gfp_mask(x) | cold;
+
+	/*
+	 * Only spread if default policy
+	 */
+	if (pol->policy == MPOL_DEFAULT && cpuset_do_page_mem_spread()) {
 		int n = cpuset_mem_spread_node();
-		return alloc_pages_node(n, mapping_gfp_mask(x), 0);
+		return alloc_pages_node(n, gfp, 0);
 	}
-	return alloc_pages(mapping_gfp_mask(x), 0);
+
+	return alloc_page_pol(gfp, pol, pgoff);
+}
+
+struct page *page_cache_alloc(struct address_space *x, pgoff_t pgoff)
+{
+	return __page_cache_alloc(x, pgoff, 0);
 }
 EXPORT_SYMBOL(page_cache_alloc);
 
-struct page *page_cache_alloc_cold(struct address_space *x)
+struct page *page_cache_alloc_cold(struct address_space *x, pgoff_t pgoff)
 {
-	if (cpuset_do_page_mem_spread()) {
-		int n = cpuset_mem_spread_node();
-		return alloc_pages_node(n, mapping_gfp_mask(x)|__GFP_COLD, 0);
-	}
-	return alloc_pages(mapping_gfp_mask(x)|__GFP_COLD, 0);
+	return __page_cache_alloc(x, pgoff, __GFP_COLD);
 }
 EXPORT_SYMBOL(page_cache_alloc_cold);
 
@@ -973,7 +998,7 @@ no_cached_page:
 		 * page..
 		 */
 		if (!cached_page) {
-			cached_page = page_cache_alloc_cold(mapping);
+			cached_page = page_cache_alloc_cold(mapping, index);
 			if (!cached_page) {
 				desc->error = -ENOMEM;
 				goto out;
@@ -1237,7 +1262,7 @@ static int fastcall page_cache_read(stru
 	int ret;
 
 	do {
-		page = page_cache_alloc_cold(mapping);
+		page = page_cache_alloc_cold(mapping, offset);
 		if (!page)
 			return -ENOMEM;
 
@@ -1691,7 +1716,7 @@ repeat:
 	page = find_get_page(mapping, index);
 	if (!page) {
 		if (!cached_page) {
-			cached_page = page_cache_alloc_cold(mapping);
+			cached_page = page_cache_alloc_cold(mapping, index);
 			if (!cached_page)
 				return ERR_PTR(-ENOMEM);
 		}
@@ -1773,7 +1798,7 @@ repeat:
 	page = find_lock_page(mapping, index);
 	if (!page) {
 		if (!*cached_page) {
-			*cached_page = page_cache_alloc(mapping);
+			*cached_page = page_cache_alloc(mapping, index);
 			if (!*cached_page)
 				return NULL;
 		}
Index: linux-2.6.17-rc1-mm2/include/linux/gfp.h
===================================================================
--- linux-2.6.17-rc1-mm2.orig/include/linux/gfp.h	2006-04-20 14:13:48.000000000 -0400
+++ linux-2.6.17-rc1-mm2/include/linux/gfp.h	2006-04-20 14:27:24.000000000 -0400
@@ -133,10 +133,13 @@ alloc_pages(gfp_t gfp_mask, unsigned int
 }
 extern struct page *alloc_page_vma(gfp_t gfp_mask,
 			struct vm_area_struct *vma, unsigned long addr);
+struct mempolicy;
+extern struct page *alloc_page_pol(gfp_t, struct mempolicy *, pgoff_t);
 #else
 #define alloc_pages(gfp_mask, order) \
 		alloc_pages_node(numa_node_id(), gfp_mask, order)
 #define alloc_page_vma(gfp_mask, vma, addr) alloc_pages(gfp_mask, 0)
+#define alloc_page_pol(gfp_mask, pol, off)  alloc_pages(gfp_mask, 0)
 #endif
 #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
 
Index: linux-2.6.17-rc1-mm2/mm/mempolicy.c
===================================================================
--- linux-2.6.17-rc1-mm2.orig/mm/mempolicy.c	2006-04-20 14:19:14.000000000 -0400
+++ linux-2.6.17-rc1-mm2/mm/mempolicy.c	2006-04-20 14:30:28.000000000 -0400
@@ -1151,9 +1151,8 @@ unsigned slab_node(struct mempolicy *pol
 	}
 }
 
-/* Do static interleaving for a VMA with known offset. */
-static unsigned offset_il_node(struct mempolicy *pol,
-		struct vm_area_struct *vma, unsigned long off)
+/* Do static interleaving for a policy with known offset. */
+static unsigned offset_il_node(struct mempolicy *pol, pgoff_t off)
 {
 	unsigned nnodes = nodes_weight(pol->v.nodes);
 	unsigned target = (unsigned)off % nnodes;
@@ -1168,19 +1167,24 @@ static unsigned offset_il_node(struct me
 	return nid;
 }
 
+#if 0
+//TODO:  looks like this is unused after switching to explicit
+//       offsets for interleaving and calling offset_il_node()
+//       directly.  If no-one misses it, we can delete...
 /* Determine a node number for interleave */
 static inline unsigned interleave_nid(struct mempolicy *pol,
-		 struct vm_area_struct *vma, unsigned long addr, int shift)
+		 struct vm_area_struct *vma, pgoff_t off)
 {
 	if (vma) {
 		unsigned long off;
 
 		off = vma->vm_pgoff;
 		off += (addr - vma->vm_start) >> shift;
-		return offset_il_node(pol, vma, off);
+		return offset_il_node(pol, off);
 	} else
 		return interleave_nodes(pol);
 }
+#endif
 
 #ifdef CONFIG_HUGETLBFS
 /* Return a zonelist suitable for a huge page allocation. */
@@ -1191,7 +1195,8 @@ struct zonelist *huge_zonelist(struct vm
 	if (pol->policy == MPOL_INTERLEAVE) {
 		unsigned nid;
 
-		nid = interleave_nid(pol, vma, addr, HPAGE_SHIFT);
+		nid = offset_il_node(pol,
+				vma_addr_to_pgoff(vma, addr, HPAGE_SHIFT));
 		return NODE_DATA(nid)->node_zonelists + gfp_zone(GFP_HIGHUSER);
 	}
 	return zonelist_policy(GFP_HIGHUSER, pol);
@@ -1215,6 +1220,23 @@ static struct page *alloc_page_interleav
 	return page;
 }
 
+/*
+ * alloc_page_pol() -- allocate a page based on policy,offset.
+ * Used for mmap()ed file policy allocations where policy is based
+ * on file offset rather than a vma,addr pair
+ */
+struct page *alloc_page_pol(gfp_t gfp, struct mempolicy *pol, pgoff_t pgoff)
+{
+	if (unlikely(pol->policy == MPOL_INTERLEAVE)) {
+		unsigned nid;
+
+		nid = offset_il_node(pol, pgoff);
+		return alloc_page_interleave(gfp, 0, nid);
+	}
+	return __alloc_pages(gfp, 0, zonelist_policy(gfp, pol));
+}
+EXPORT_SYMBOL(alloc_page_pol);
+
 /**
  * 	alloc_page_vma	- Allocate a page for a VMA.
  *
@@ -1244,13 +1266,8 @@ alloc_page_vma(gfp_t gfp, struct vm_area
 
 	cpuset_update_task_memory_state();
 
-	if (unlikely(pol->policy == MPOL_INTERLEAVE)) {
-		unsigned nid;
-
-		nid = interleave_nid(pol, vma, addr, PAGE_SHIFT);
-		return alloc_page_interleave(gfp, 0, nid);
-	}
-	return __alloc_pages(gfp, 0, zonelist_policy(gfp, pol));
+	return alloc_page_pol(gfp, pol,
+				vma_addr_to_pgoff(vma, addr, PAGE_SHIFT));
 }
 
 /**
Index: linux-2.6.17-rc1-mm2/include/linux/pagemap.h
===================================================================
--- linux-2.6.17-rc1-mm2.orig/include/linux/pagemap.h	2006-04-20 14:13:48.000000000 -0400
+++ linux-2.6.17-rc1-mm2/include/linux/pagemap.h	2006-04-20 14:27:24.000000000 -0400
@@ -52,15 +52,17 @@ static inline void mapping_set_gfp_mask(
 void release_pages(struct page **pages, int nr, int cold);
 
 #ifdef CONFIG_NUMA
-extern struct page *page_cache_alloc(struct address_space *x);
-extern struct page *page_cache_alloc_cold(struct address_space *x);
+extern struct page *page_cache_alloc(struct address_space *, pgoff_t);
+extern struct page *page_cache_alloc_cold(struct address_space *, pgoff_t);
 #else
-static inline struct page *page_cache_alloc(struct address_space *x)
+static inline struct page *page_cache_alloc(struct address_space *x,
+						pgoff_t off)
 {
 	return alloc_pages(mapping_gfp_mask(x), 0);
 }
 
-static inline struct page *page_cache_alloc_cold(struct address_space *x)
+static inline struct page *page_cache_alloc_cold(struct address_space *x,
+						pgoff_t off)
 {
 	return alloc_pages(mapping_gfp_mask(x)|__GFP_COLD, 0);
 }
Index: linux-2.6.17-rc1-mm2/drivers/mtd/devices/block2mtd.c
===================================================================
--- linux-2.6.17-rc1-mm2.orig/drivers/mtd/devices/block2mtd.c	2006-04-20 14:13:48.000000000 -0400
+++ linux-2.6.17-rc1-mm2/drivers/mtd/devices/block2mtd.c	2006-04-20 14:27:24.000000000 -0400
@@ -72,7 +72,7 @@ static void cache_readahead(struct addre
 		if (page)
 			continue;
 		read_unlock_irq(&mapping->tree_lock);
-		page = page_cache_alloc_cold(mapping);
+		page = page_cache_alloc_cold(mapping, pagei);
 		read_lock_irq(&mapping->tree_lock);
 		if (!page)
 			break;
Index: linux-2.6.17-rc1-mm2/fs/splice.c
===================================================================
--- linux-2.6.17-rc1-mm2.orig/fs/splice.c	2006-04-20 14:13:48.000000000 -0400
+++ linux-2.6.17-rc1-mm2/fs/splice.c	2006-04-20 14:27:24.000000000 -0400
@@ -278,7 +278,7 @@ find_page:
 			/*
 			 * page didn't exist, allocate one
 			 */
-			page = page_cache_alloc_cold(mapping);
+			page = page_cache_alloc_cold(mapping, index);
 			if (!page)
 				break;
 
Index: linux-2.6.17-rc1-mm2/mm/readahead.c
===================================================================
--- linux-2.6.17-rc1-mm2.orig/mm/readahead.c	2006-04-20 14:13:48.000000000 -0400
+++ linux-2.6.17-rc1-mm2/mm/readahead.c	2006-04-20 14:27:24.000000000 -0400
@@ -298,7 +298,7 @@ __do_page_cache_readahead(struct address
 			continue;
 
 		read_unlock_irq(&mapping->tree_lock);
-		page = page_cache_alloc_cold(mapping);
+		page = page_cache_alloc_cold(mapping, page_offset);
 		read_lock_irq(&mapping->tree_lock);
 		if (!page)
 			break;
Index: linux-2.6.17-rc1-mm2/fs/ntfs/file.c
===================================================================
--- linux-2.6.17-rc1-mm2.orig/fs/ntfs/file.c	2006-04-20 14:13:48.000000000 -0400
+++ linux-2.6.17-rc1-mm2/fs/ntfs/file.c	2006-04-20 14:27:24.000000000 -0400
@@ -425,7 +425,7 @@ static inline int __ntfs_grab_cache_page
 		pages[nr] = find_lock_page(mapping, index);
 		if (!pages[nr]) {
 			if (!*cached_page) {
-				*cached_page = page_cache_alloc(mapping);
+				*cached_page = page_cache_alloc(mapping, index);
 				if (unlikely(!*cached_page)) {
 					err = -ENOMEM;
 					goto err_out;


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply

* Re: [Patch: 001/006] pgdat allocation for new node add (specify node id)
From: Andrew Morton @ 2006-04-20 22:49 UTC (permalink / raw)
  To: Yasunori Goto; +Cc: linux-kernel, linux-mm
In-Reply-To: <20060420190338.EE4A.Y-GOTO@jp.fujitsu.com>

Yasunori Goto <y-goto@jp.fujitsu.com> wrote:
>
> +int add_memory(int nid, u64 start, u64 size)
>  +{
>  +	int ret;
>  +
>  +	/* call arch's memory hotadd */
>  +	ret = arch_add_memory(nid, start, size;
>  +
>  +	return ret;
>  +}

So this patch is missing a ), but your later patch which touches this code
actually has the ).  Which tells me that this isn't the correct version of
this patch.

I'll fix that all up, but I would ask you to carefully verify that the
patches which I merged are the ones which you meant to send, thanks.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply

* Re: [Patch: 003/006] pgdat allocation for new node add (generic alloc node_data)
From: Andrew Morton @ 2006-04-20 23:01 UTC (permalink / raw)
  To: Yasunori Goto; +Cc: linux-kernel, linux-mm
In-Reply-To: <20060420190547.EE4E.Y-GOTO@jp.fujitsu.com>

Yasunori Goto <y-goto@jp.fujitsu.com> wrote:
>
>  +#define generic_alloc_nodedata(nid)				\
>  +({								\
>  +	(pg_data_t *)kzalloc(sizeof(pg_data_t), GFP_KERNEL);	\
>  +})

In general, library functions which perform memory allocation should not
make assumptions about which gfp_t they are allowed to use.

So this really should be `generic_alloc_nodedata(nid, gfp_mask)'.

However, it's very desirable that memory allocations use GFP_KERNEL rather
than, say, GFP_ATOMIC.  So your interface here _forces_ callers to be in a
state where GFP_KERNEL is legal, which is good discipline.

Although if that turns out to be a problem, we can expect to see a sad
little patch from someone which tries to change this to GFP_ATOMIC, which
makes everything worse - even those callers who _can_ use GFP_KERNEL.

(In practice, NUMA developers seem to never test with sufficient
CONFIG_DEBUG_* flags enabled, and with CONFIG_PREEMPT, so they happily
don't get to discover their sleep-in-spinlock bugs anyway).

Anyway, on balance, I think it'd be best to convert this API to take a
gfp_t as well.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox