All of lore.kernel.org
 help / color / mirror / Atom feed
From: Christoph Rohland <cr@sap.com>
To: Linus Torvalds <torvalds@transmeta.com>,
	Stephen Tweedie <sct@redhat.com>
Cc: Linux Kernel Mailing List <linux-kernel@vger.kernel.org>,
	MM mailing list <linux-mm@kvack.org>
Subject: [Patch] deadlock on write in tmpfs
Date: 01 May 2001 15:39:47 +0200	[thread overview]
Message-ID: <m3hez5ci6p.fsf@linux.local> (raw)

Hi Linus and Stephen,

tmpfs deadlocks when writing into a file from a mapping of the same
file. 

The problem is the following:

- shmem_file_write may call shmem_no_page and calls
  shmem_getpage_locked later,
- shmem_no_page calls shmem_getpage_locked
- shmem_getpage_locked may call shmem_writepage on page allocation

- shmem_file_write holds the inode semaphore
- shmem_getpage_locked prevent races against shmem_writepage with the
  shmem spinlock
- shmem_getpage_locked needs serialization against itself and
  shmem_truncate

The last was done with the inode semaphore, which deadlocks with
shmem_write

So I see two choices: 

1) Do not serialise the whole of shmem_getpage_locked but protect
   critical pathes with the spinlock and do retries after sleeps
2) Add another semaphore to serialize shmem_getpage_locked and
   shmem_truncate

I tried some time to get 1) done but the retry logic became way too
complicated. So the attached patch implements 2)

I still think it's ugly to add another semaphore, but it works.

Greetings
		Christoph

diff -uNr 2.4.4/include/linux/shmem_fs.h c/include/linux/shmem_fs.h
--- 2.4.4/include/linux/shmem_fs.h	Sun Apr 29 20:33:00 2001
+++ c/include/linux/shmem_fs.h	Sun Apr 29 22:43:56 2001
@@ -19,6 +19,7 @@
 
 struct shmem_inode_info {
 	spinlock_t	lock;
+	struct semaphore sem;
 	unsigned long	max_index;
 	swp_entry_t	i_direct[SHMEM_NR_DIRECT]; /* for the first blocks */
 	swp_entry_t   **i_indirect; /* doubly indirect blocks */
diff -uNr 2.4.4/mm/shmem.c c/mm/shmem.c
--- 2.4.4/mm/shmem.c	Mon Apr 30 09:45:39 2001
+++ c/mm/shmem.c	Tue May  1 15:15:38 2001
@@ -161,6 +161,7 @@
 	swp_entry_t **base, **ptr, **last;
 	struct shmem_inode_info * info = &inode->u.shmem_i;
 
+	down(&info->sem);
 	inode->i_ctime = inode->i_mtime = CURRENT_TIME;
 	spin_lock (&info->lock);
 	index = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
@@ -197,6 +198,7 @@
 	info->swapped -= freed;
 	shmem_recalc_inode(inode);
 	spin_unlock (&info->lock);
+	up(&info->sem);
 }
 
 static void shmem_delete_inode(struct inode * inode)
@@ -281,15 +283,12 @@
  * still need to guard against racing with shm_writepage(), which might
  * be trying to move the page to the swap cache as we run.
  */
-static struct page * shmem_getpage_locked(struct inode * inode, unsigned long idx)
+static struct page * shmem_getpage_locked(struct shmem_inode_info *info, struct inode * inode, unsigned long idx)
 {
 	struct address_space * mapping = inode->i_mapping;
-	struct shmem_inode_info *info;
 	struct page * page;
 	swp_entry_t *entry;
 
-	info = &inode->u.shmem_i;
-
 repeat:
 	page = find_lock_page(mapping, idx);
 	if (page)
@@ -393,6 +392,7 @@
 
 static int shmem_getpage(struct inode * inode, unsigned long idx, struct page **ptr)
 {
+	struct shmem_inode_info *info;
 	struct address_space * mapping = inode->i_mapping;
 	int error;
 
@@ -407,27 +407,28 @@
 		page_cache_release(*ptr);
 	}
 
-	down (&inode->i_sem);
-	/* retest we may have slept */
+	info = &inode->u.shmem_i;
+	down (&info->sem);
+	/* retest we may have slept */  	
+
+	*ptr = ERR_PTR(-EFAULT);
 	if (inode->i_size < (loff_t) idx * PAGE_CACHE_SIZE)
-		goto sigbus;
-	*ptr = shmem_getpage_locked(inode, idx);
+		goto failed;
+
+	*ptr = shmem_getpage_locked(&inode->u.shmem_i, inode, idx);
 	if (IS_ERR (*ptr))
 		goto failed;
+
 	UnlockPage(*ptr);
-	up (&inode->i_sem);
+	up (&info->sem);
 	return 0;
 failed:
-	up (&inode->i_sem);
+	up (&info->sem);
 	error = PTR_ERR(*ptr);
-	*ptr = NOPAGE_OOM;
-	if (error != -EFBIG)
-		*ptr = NOPAGE_SIGBUS;
-	return error;
-sigbus:
-	up (&inode->i_sem);
 	*ptr = NOPAGE_SIGBUS;
-	return -EFAULT;
+	if (error == -ENOMEM)
+		*ptr = NOPAGE_OOM;
+	return error;
 }
 
 struct page * shmem_nopage(struct vm_area_struct * vma, unsigned long address, int no_share)
@@ -500,6 +501,7 @@
 struct inode *shmem_get_inode(struct super_block *sb, int mode, int dev)
 {
 	struct inode * inode;
+	struct shmem_inode_info *info;
 
 	spin_lock (&sb->u.shmem_sb.stat_lock);
 	if (!sb->u.shmem_sb.free_inodes) {
@@ -519,7 +521,9 @@
 		inode->i_rdev = to_kdev_t(dev);
 		inode->i_mapping->a_ops = &shmem_aops;
 		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
-		spin_lock_init (&inode->u.shmem_i.lock);
+		info = &inode->u.shmem_i;
+		spin_lock_init (&info->lock);
+		sema_init (&info->sem, 1);
 		switch (mode & S_IFMT) {
 		default:
 			init_special_inode(inode, mode, dev);
@@ -549,6 +553,7 @@
 shmem_file_write(struct file *file,const char *buf,size_t count,loff_t *ppos)
 {
 	struct inode	*inode = file->f_dentry->d_inode; 
+	struct shmem_inode_info *info;
 	unsigned long	limit = current->rlim[RLIMIT_FSIZE].rlim_cur;
 	loff_t		pos;
 	struct page	*page;
@@ -624,7 +629,11 @@
 			__get_user(dummy, buf+bytes-1);
 		}
 
-		page = shmem_getpage_locked(inode, index);
+		info = &inode->u.shmem_i;
+		down (&info->sem);
+		page = shmem_getpage_locked(info, inode, index);
+		up (&info->sem);
+
 		status = PTR_ERR(page);
 		if (IS_ERR(page))
 			break;
@@ -635,7 +644,6 @@
 		}
 
 		kaddr = kmap(page);
-// can this do a truncated write? cr
 		status = copy_from_user(kaddr+offset, buf, bytes);
 		kunmap(page);
 		if (status)
@@ -932,7 +940,7 @@
 		
 	inode = dentry->d_inode;
 	down(&inode->i_sem);
-	page = shmem_getpage_locked(inode, 0);
+	page = shmem_getpage_locked(&inode->u.shmem_i, inode, 0);
 	if (IS_ERR(page))
 		goto fail;
 	kaddr = kmap(page);


WARNING: multiple messages have this Message-ID (diff)
From: Christoph Rohland <cr@sap.com>
To: Linus Torvalds <torvalds@transmeta.com>,
	Stephen Tweedie <sct@redhat.com>
Cc: Linux Kernel Mailing List <linux-kernel@vger.kernel.org>,
	MM mailing list <linux-mm@kvack.org>
Subject: [Patch] deadlock on write in tmpfs
Date: 01 May 2001 15:39:47 +0200	[thread overview]
Message-ID: <m3hez5ci6p.fsf@linux.local> (raw)

Hi Linus and Stephen,

tmpfs deadlocks when writing into a file from a mapping of the same
file. 

The problem is the following:

- shmem_file_write may call shmem_no_page and calls
  shmem_getpage_locked later,
- shmem_no_page calls shmem_getpage_locked
- shmem_getpage_locked may call shmem_writepage on page allocation

- shmem_file_write holds the inode semaphore
- shmem_getpage_locked prevent races against shmem_writepage with the
  shmem spinlock
- shmem_getpage_locked needs serialization against itself and
  shmem_truncate

The last was done with the inode semaphore, which deadlocks with
shmem_write

So I see two choices: 

1) Do not serialise the whole of shmem_getpage_locked but protect
   critical pathes with the spinlock and do retries after sleeps
2) Add another semaphore to serialize shmem_getpage_locked and
   shmem_truncate

I tried some time to get 1) done but the retry logic became way too
complicated. So the attached patch implements 2)

I still think it's ugly to add another semaphore, but it works.

Greetings
		Christoph

diff -uNr 2.4.4/include/linux/shmem_fs.h c/include/linux/shmem_fs.h
--- 2.4.4/include/linux/shmem_fs.h	Sun Apr 29 20:33:00 2001
+++ c/include/linux/shmem_fs.h	Sun Apr 29 22:43:56 2001
@@ -19,6 +19,7 @@
 
 struct shmem_inode_info {
 	spinlock_t	lock;
+	struct semaphore sem;
 	unsigned long	max_index;
 	swp_entry_t	i_direct[SHMEM_NR_DIRECT]; /* for the first blocks */
 	swp_entry_t   **i_indirect; /* doubly indirect blocks */
diff -uNr 2.4.4/mm/shmem.c c/mm/shmem.c
--- 2.4.4/mm/shmem.c	Mon Apr 30 09:45:39 2001
+++ c/mm/shmem.c	Tue May  1 15:15:38 2001
@@ -161,6 +161,7 @@
 	swp_entry_t **base, **ptr, **last;
 	struct shmem_inode_info * info = &inode->u.shmem_i;
 
+	down(&info->sem);
 	inode->i_ctime = inode->i_mtime = CURRENT_TIME;
 	spin_lock (&info->lock);
 	index = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
@@ -197,6 +198,7 @@
 	info->swapped -= freed;
 	shmem_recalc_inode(inode);
 	spin_unlock (&info->lock);
+	up(&info->sem);
 }
 
 static void shmem_delete_inode(struct inode * inode)
@@ -281,15 +283,12 @@
  * still need to guard against racing with shm_writepage(), which might
  * be trying to move the page to the swap cache as we run.
  */
-static struct page * shmem_getpage_locked(struct inode * inode, unsigned long idx)
+static struct page * shmem_getpage_locked(struct shmem_inode_info *info, struct inode * inode, unsigned long idx)
 {
 	struct address_space * mapping = inode->i_mapping;
-	struct shmem_inode_info *info;
 	struct page * page;
 	swp_entry_t *entry;
 
-	info = &inode->u.shmem_i;
-
 repeat:
 	page = find_lock_page(mapping, idx);
 	if (page)
@@ -393,6 +392,7 @@
 
 static int shmem_getpage(struct inode * inode, unsigned long idx, struct page **ptr)
 {
+	struct shmem_inode_info *info;
 	struct address_space * mapping = inode->i_mapping;
 	int error;
 
@@ -407,27 +407,28 @@
 		page_cache_release(*ptr);
 	}
 
-	down (&inode->i_sem);
-	/* retest we may have slept */
+	info = &inode->u.shmem_i;
+	down (&info->sem);
+	/* retest we may have slept */  	
+
+	*ptr = ERR_PTR(-EFAULT);
 	if (inode->i_size < (loff_t) idx * PAGE_CACHE_SIZE)
-		goto sigbus;
-	*ptr = shmem_getpage_locked(inode, idx);
+		goto failed;
+
+	*ptr = shmem_getpage_locked(&inode->u.shmem_i, inode, idx);
 	if (IS_ERR (*ptr))
 		goto failed;
+
 	UnlockPage(*ptr);
-	up (&inode->i_sem);
+	up (&info->sem);
 	return 0;
 failed:
-	up (&inode->i_sem);
+	up (&info->sem);
 	error = PTR_ERR(*ptr);
-	*ptr = NOPAGE_OOM;
-	if (error != -EFBIG)
-		*ptr = NOPAGE_SIGBUS;
-	return error;
-sigbus:
-	up (&inode->i_sem);
 	*ptr = NOPAGE_SIGBUS;
-	return -EFAULT;
+	if (error == -ENOMEM)
+		*ptr = NOPAGE_OOM;
+	return error;
 }
 
 struct page * shmem_nopage(struct vm_area_struct * vma, unsigned long address, int no_share)
@@ -500,6 +501,7 @@
 struct inode *shmem_get_inode(struct super_block *sb, int mode, int dev)
 {
 	struct inode * inode;
+	struct shmem_inode_info *info;
 
 	spin_lock (&sb->u.shmem_sb.stat_lock);
 	if (!sb->u.shmem_sb.free_inodes) {
@@ -519,7 +521,9 @@
 		inode->i_rdev = to_kdev_t(dev);
 		inode->i_mapping->a_ops = &shmem_aops;
 		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
-		spin_lock_init (&inode->u.shmem_i.lock);
+		info = &inode->u.shmem_i;
+		spin_lock_init (&info->lock);
+		sema_init (&info->sem, 1);
 		switch (mode & S_IFMT) {
 		default:
 			init_special_inode(inode, mode, dev);
@@ -549,6 +553,7 @@
 shmem_file_write(struct file *file,const char *buf,size_t count,loff_t *ppos)
 {
 	struct inode	*inode = file->f_dentry->d_inode; 
+	struct shmem_inode_info *info;
 	unsigned long	limit = current->rlim[RLIMIT_FSIZE].rlim_cur;
 	loff_t		pos;
 	struct page	*page;
@@ -624,7 +629,11 @@
 			__get_user(dummy, buf+bytes-1);
 		}
 
-		page = shmem_getpage_locked(inode, index);
+		info = &inode->u.shmem_i;
+		down (&info->sem);
+		page = shmem_getpage_locked(info, inode, index);
+		up (&info->sem);
+
 		status = PTR_ERR(page);
 		if (IS_ERR(page))
 			break;
@@ -635,7 +644,6 @@
 		}
 
 		kaddr = kmap(page);
-// can this do a truncated write? cr
 		status = copy_from_user(kaddr+offset, buf, bytes);
 		kunmap(page);
 		if (status)
@@ -932,7 +940,7 @@
 		
 	inode = dentry->d_inode;
 	down(&inode->i_sem);
-	page = shmem_getpage_locked(inode, 0);
+	page = shmem_getpage_locked(&inode->u.shmem_i, inode, 0);
 	if (IS_ERR(page))
 		goto fail;
 	kaddr = kmap(page);

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux.eu.org/Linux-MM/

             reply	other threads:[~2001-05-01 13:48 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2001-05-01 13:39 Christoph Rohland [this message]
2001-05-01 13:39 ` [Patch] deadlock on write in tmpfs Christoph Rohland
2001-05-01 16:32 ` Stephen C. Tweedie
2001-05-01 16:32   ` Stephen C. Tweedie
2001-05-02 12:00   ` Christoph Rohland
2001-05-02 12:00     ` Christoph Rohland

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=m3hez5ci6p.fsf@linux.local \
    --to=cr@sap.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=sct@redhat.com \
    --cc=torvalds@transmeta.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.