* [01/25] ALSA: mixart: range checking proc file
2010-05-25 18:12 [00/25] 2.6.27.47-stable review, take 2 Greg KH
@ 2010-05-25 18:08 ` Greg KH
2010-05-25 18:09 ` [02/25] ext4: invalidate pages if delalloc block allocation fails Greg KH
` (23 subsequent siblings)
24 siblings, 0 replies; 26+ messages in thread
From: Greg KH @ 2010-05-25 18:08 UTC (permalink / raw)
To: linux-kernel, stable
Cc: stable-review, torvalds, akpm, alan, Dan Carpenter, Takashi Iwai
2.6.27-stable review patch. If anyone has any objections, please let us know.
------------------
From: Dan Carpenter <error27@gmail.com>
commit b0cc58a25d04160d39a80e436847eaa2fbc5aa09 upstream.
The original code doesn't take into consideration that the value of
MIXART_BA0_SIZE - pos can be less than zero which would lead to a large
unsigned value for "count".
Also I moved the check that read size is a multiple of 4 bytes below
the code that adjusts "count".
Signed-off-by: Dan Carpenter <error27@gmail.com>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
sound/pci/mixart/mixart.c | 24 ++++++++++++++----------
1 file changed, 14 insertions(+), 10 deletions(-)
--- a/sound/pci/mixart/mixart.c
+++ b/sound/pci/mixart/mixart.c
@@ -1160,13 +1160,15 @@ static long snd_mixart_BA0_read(struct s
unsigned long count, unsigned long pos)
{
struct mixart_mgr *mgr = entry->private_data;
+ unsigned long maxsize;
- count = count & ~3; /* make sure the read size is a multiple of 4 bytes */
- if(count <= 0)
+ if (pos >= MIXART_BA0_SIZE)
return 0;
- if(pos + count > MIXART_BA0_SIZE)
- count = (long)(MIXART_BA0_SIZE - pos);
- if(copy_to_user_fromio(buf, MIXART_MEM( mgr, pos ), count))
+ maxsize = MIXART_BA0_SIZE - pos;
+ if (count > maxsize)
+ count = maxsize;
+ count = count & ~3; /* make sure the read size is a multiple of 4 bytes */
+ if (copy_to_user_fromio(buf, MIXART_MEM(mgr, pos), count))
return -EFAULT;
return count;
}
@@ -1179,13 +1181,15 @@ static long snd_mixart_BA1_read(struct s
unsigned long count, unsigned long pos)
{
struct mixart_mgr *mgr = entry->private_data;
+ unsigned long maxsize;
- count = count & ~3; /* make sure the read size is a multiple of 4 bytes */
- if(count <= 0)
+ if (pos > MIXART_BA1_SIZE)
return 0;
- if(pos + count > MIXART_BA1_SIZE)
- count = (long)(MIXART_BA1_SIZE - pos);
- if(copy_to_user_fromio(buf, MIXART_REG( mgr, pos ), count))
+ maxsize = MIXART_BA1_SIZE - pos;
+ if (count > maxsize)
+ count = maxsize;
+ count = count & ~3; /* make sure the read size is a multiple of 4 bytes */
+ if (copy_to_user_fromio(buf, MIXART_REG(mgr, pos), count))
return -EFAULT;
return count;
}
^ permalink raw reply [flat|nested] 26+ messages in thread* [02/25] ext4: invalidate pages if delalloc block allocation fails.
2010-05-25 18:12 [00/25] 2.6.27.47-stable review, take 2 Greg KH
2010-05-25 18:08 ` [01/25] ALSA: mixart: range checking proc file Greg KH
@ 2010-05-25 18:09 ` Greg KH
2010-05-25 18:09 ` [03/25] percpu counter: clean up percpu_counter_sum_and_set() Greg KH
` (22 subsequent siblings)
24 siblings, 0 replies; 26+ messages in thread
From: Greg KH @ 2010-05-25 18:09 UTC (permalink / raw)
To: linux-kernel, stable
Cc: stable-review, torvalds, akpm, alan, Ext4 Developers List,
Theodore Tso, Jayson R. King, Aneesh Kumar K.V
2.6.27-stable review patch. If anyone has any objections, please let us know.
------------------
From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
commit c4a0c46ec92c194c873232b88debce4e1a448483 upstream.
We are a bit agressive in invalidating all the pages. But
it is ok because we really don't know why the block allocation
failed and it is better to come of the writeback path
so that user can look for more info.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Jayson R. King <dev@jaysonking.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
fs/ext4/inode.c | 85 ++++++++++++++++++++++++++++++++++++++++++++++++--------
1 file changed, 73 insertions(+), 12 deletions(-)
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1821,6 +1821,39 @@ static inline void __unmap_underlying_bl
unmap_underlying_metadata(bdev, bh->b_blocknr + i);
}
+static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd,
+ sector_t logical, long blk_cnt)
+{
+ int nr_pages, i;
+ pgoff_t index, end;
+ struct pagevec pvec;
+ struct inode *inode = mpd->inode;
+ struct address_space *mapping = inode->i_mapping;
+
+ index = logical >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
+ end = (logical + blk_cnt - 1) >>
+ (PAGE_CACHE_SHIFT - inode->i_blkbits);
+ while (index <= end) {
+ nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
+ if (nr_pages == 0)
+ break;
+ for (i = 0; i < nr_pages; i++) {
+ struct page *page = pvec.pages[i];
+ index = page->index;
+ if (index > end)
+ break;
+ index++;
+
+ BUG_ON(!PageLocked(page));
+ BUG_ON(PageWriteback(page));
+ block_invalidatepage(page, 0);
+ ClearPageUptodate(page);
+ unlock_page(page);
+ }
+ }
+ return;
+}
+
/*
* mpage_da_map_blocks - go through given space
*
@@ -1830,7 +1863,7 @@ static inline void __unmap_underlying_bl
* The function skips space we know is already mapped to disk blocks.
*
*/
-static void mpage_da_map_blocks(struct mpage_da_data *mpd)
+static int mpage_da_map_blocks(struct mpage_da_data *mpd)
{
int err = 0;
struct buffer_head *lbh = &mpd->lbh;
@@ -1841,7 +1874,7 @@ static void mpage_da_map_blocks(struct m
* We consider only non-mapped and non-allocated blocks
*/
if (buffer_mapped(lbh) && !buffer_delay(lbh))
- return;
+ return 0;
new.b_state = lbh->b_state;
new.b_blocknr = 0;
@@ -1852,10 +1885,38 @@ static void mpage_da_map_blocks(struct m
* to write simply return
*/
if (!new.b_size)
- return;
+ return 0;
err = mpd->get_block(mpd->inode, next, &new, 1);
- if (err)
- return;
+ if (err) {
+
+ /* If get block returns with error
+ * we simply return. Later writepage
+ * will redirty the page and writepages
+ * will find the dirty page again
+ */
+ if (err == -EAGAIN)
+ return 0;
+ /*
+ * get block failure will cause us
+ * to loop in writepages. Because
+ * a_ops->writepage won't be able to
+ * make progress. The page will be redirtied
+ * by writepage and writepages will again
+ * try to write the same.
+ */
+ printk(KERN_EMERG "%s block allocation failed for inode %lu "
+ "at logical offset %llu with max blocks "
+ "%zd with error %d\n",
+ __func__, mpd->inode->i_ino,
+ (unsigned long long)next,
+ lbh->b_size >> mpd->inode->i_blkbits, err);
+ printk(KERN_EMERG "This should not happen.!! "
+ "Data will be lost\n");
+ /* invlaidate all the pages */
+ ext4_da_block_invalidatepages(mpd, next,
+ lbh->b_size >> mpd->inode->i_blkbits);
+ return err;
+ }
BUG_ON(new.b_size == 0);
if (buffer_new(&new))
@@ -1868,7 +1929,7 @@ static void mpage_da_map_blocks(struct m
if (buffer_delay(lbh) || buffer_unwritten(lbh))
mpage_put_bnr_to_bhs(mpd, next, &new);
- return;
+ return 0;
}
#define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \
@@ -1937,8 +1998,8 @@ flush_it:
* We couldn't merge the block to our extent, so we
* need to flush current extent and start new one
*/
- mpage_da_map_blocks(mpd);
- mpage_da_submit_io(mpd);
+ if (mpage_da_map_blocks(mpd) == 0)
+ mpage_da_submit_io(mpd);
mpd->io_done = 1;
return;
}
@@ -1980,8 +2041,8 @@ static int __mpage_da_writepage(struct p
* and start IO on them using writepage()
*/
if (mpd->next_page != mpd->first_page) {
- mpage_da_map_blocks(mpd);
- mpage_da_submit_io(mpd);
+ if (mpage_da_map_blocks(mpd) == 0)
+ mpage_da_submit_io(mpd);
/*
* skip rest of the page in the page_vec
*/
@@ -2102,8 +2163,8 @@ static int mpage_da_writepages(struct ad
* Handle last extent of pages
*/
if (!mpd.io_done && mpd.next_page != mpd.first_page) {
- mpage_da_map_blocks(&mpd);
- mpage_da_submit_io(&mpd);
+ if (mpage_da_map_blocks(&mpd) == 0)
+ mpage_da_submit_io(&mpd);
}
wbc->nr_to_write = to_write - mpd.pages_written;
^ permalink raw reply [flat|nested] 26+ messages in thread* [03/25] percpu counter: clean up percpu_counter_sum_and_set()
2010-05-25 18:12 [00/25] 2.6.27.47-stable review, take 2 Greg KH
2010-05-25 18:08 ` [01/25] ALSA: mixart: range checking proc file Greg KH
2010-05-25 18:09 ` [02/25] ext4: invalidate pages if delalloc block allocation fails Greg KH
@ 2010-05-25 18:09 ` Greg KH
2010-05-25 18:09 ` [04/25] ext4: Make sure all the block allocation paths reserve blocks Greg KH
` (21 subsequent siblings)
24 siblings, 0 replies; 26+ messages in thread
From: Greg KH @ 2010-05-25 18:09 UTC (permalink / raw)
To: linux-kernel, stable
Cc: stable-review, torvalds, akpm, alan, Theodore Tso,
Ext4 Developers List, Mingming Cao, Jayson R. King,
Peter Zijlstra
2.6.27-stable review patch. If anyone has any objections, please let us know.
------------------
From: Mingming Cao <cmm@us.ibm.com>
commit 1f7c14c62ce63805f9574664a6c6de3633d4a354 upstream.
percpu_counter_sum_and_set() and percpu_counter_sum() is the same except
the former updates the global counter after accounting. Since we are
taking the fbc->lock to calculate the precise value of the counter in
percpu_counter_sum() anyway, it should simply set fbc->count too, as the
percpu_counter_sum_and_set() does.
This patch merges these two interfaces into one.
Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Jayson R. King <dev@jaysonking.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
fs/ext4/balloc.c | 2 +-
include/linux/percpu_counter.h | 12 +++---------
lib/percpu_counter.c | 8 +++-----
3 files changed, 7 insertions(+), 15 deletions(-)
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -1778,7 +1778,7 @@ ext4_fsblk_t ext4_has_free_blocks(struct
#ifdef CONFIG_SMP
if (free_blocks - root_blocks < FBC_BATCH)
free_blocks =
- percpu_counter_sum_and_set(&sbi->s_freeblocks_counter);
+ percpu_counter_sum(&sbi->s_freeblocks_counter);
#endif
if (free_blocks <= root_blocks)
/* we don't have free space */
--- a/include/linux/percpu_counter.h
+++ b/include/linux/percpu_counter.h
@@ -35,7 +35,7 @@ int percpu_counter_init_irq(struct percp
void percpu_counter_destroy(struct percpu_counter *fbc);
void percpu_counter_set(struct percpu_counter *fbc, s64 amount);
void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch);
-s64 __percpu_counter_sum(struct percpu_counter *fbc, int set);
+s64 __percpu_counter_sum(struct percpu_counter *fbc);
static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount)
{
@@ -44,19 +44,13 @@ static inline void percpu_counter_add(st
static inline s64 percpu_counter_sum_positive(struct percpu_counter *fbc)
{
- s64 ret = __percpu_counter_sum(fbc, 0);
+ s64 ret = __percpu_counter_sum(fbc);
return ret < 0 ? 0 : ret;
}
-static inline s64 percpu_counter_sum_and_set(struct percpu_counter *fbc)
-{
- return __percpu_counter_sum(fbc, 1);
-}
-
-
static inline s64 percpu_counter_sum(struct percpu_counter *fbc)
{
- return __percpu_counter_sum(fbc, 0);
+ return __percpu_counter_sum(fbc);
}
static inline s64 percpu_counter_read(struct percpu_counter *fbc)
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -52,7 +52,7 @@ EXPORT_SYMBOL(__percpu_counter_add);
* Add up all the per-cpu counts, return the result. This is a more accurate
* but much slower version of percpu_counter_read_positive()
*/
-s64 __percpu_counter_sum(struct percpu_counter *fbc, int set)
+s64 __percpu_counter_sum(struct percpu_counter *fbc)
{
s64 ret;
int cpu;
@@ -62,11 +62,9 @@ s64 __percpu_counter_sum(struct percpu_c
for_each_online_cpu(cpu) {
s32 *pcount = per_cpu_ptr(fbc->counters, cpu);
ret += *pcount;
- if (set)
- *pcount = 0;
+ *pcount = 0;
}
- if (set)
- fbc->count = ret;
+ fbc->count = ret;
spin_unlock(&fbc->lock);
return ret;
^ permalink raw reply [flat|nested] 26+ messages in thread* [04/25] ext4: Make sure all the block allocation paths reserve blocks
2010-05-25 18:12 [00/25] 2.6.27.47-stable review, take 2 Greg KH
` (2 preceding siblings ...)
2010-05-25 18:09 ` [03/25] percpu counter: clean up percpu_counter_sum_and_set() Greg KH
@ 2010-05-25 18:09 ` Greg KH
2010-05-25 18:09 ` [05/25] ext4: Add percpu dirty block accounting Greg KH
` (20 subsequent siblings)
24 siblings, 0 replies; 26+ messages in thread
From: Greg KH @ 2010-05-25 18:09 UTC (permalink / raw)
To: linux-kernel, stable
Cc: stable-review, torvalds, akpm, alan, Ext4 Developers List,
Theodore Tso, Jayson R. King, Aneesh Kumar K.V
2.6.27-stable review patch. If anyone has any objections, please let us know.
------------------
From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
commit a30d542a0035b886ffaafd0057ced0a2b28c3a4f upstream.
With delayed allocation we need to make sure block are reserved before
we attempt to allocate them. Otherwise we get block allocation failure
(ENOSPC) during writepages which cannot be handled. This would mean
silent data loss (We do a printk stating data will be lost). This patch
updates the DIO and fallocate code path to do block reservation before
block allocation. This is needed to make sure parallel DIO and fallocate
request doesn't take block out of delayed reserve space.
When free blocks count go below a threshold we switch to a slow patch
which looks at other CPU's accumulated percpu counter values.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Jayson R. King <dev@jaysonking.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
fs/ext4/balloc.c | 58 +++++++++++++++++++++++++++++++++++++++---------------
fs/ext4/ext4.h | 13 ++++++++++++
fs/ext4/inode.c | 5 ----
fs/ext4/mballoc.c | 23 ++++++++++++---------
4 files changed, 69 insertions(+), 30 deletions(-)
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -1754,6 +1754,32 @@ out:
return ret;
}
+int ext4_claim_free_blocks(struct ext4_sb_info *sbi,
+ ext4_fsblk_t nblocks)
+{
+ s64 free_blocks;
+ ext4_fsblk_t root_blocks = 0;
+ struct percpu_counter *fbc = &sbi->s_freeblocks_counter;
+
+ free_blocks = percpu_counter_read(fbc);
+
+ if (!capable(CAP_SYS_RESOURCE) &&
+ sbi->s_resuid != current->fsuid &&
+ (sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid)))
+ root_blocks = ext4_r_blocks_count(sbi->s_es);
+
+ if (free_blocks - (nblocks + root_blocks) < EXT4_FREEBLOCKS_WATERMARK)
+ free_blocks = percpu_counter_sum(&sbi->s_freeblocks_counter);
+
+ if (free_blocks < (root_blocks + nblocks))
+ /* we don't have free space */
+ return -ENOSPC;
+
+ /* reduce fs free blocks counter */
+ percpu_counter_sub(fbc, nblocks);
+ return 0;
+}
+
/**
* ext4_has_free_blocks()
* @sbi: in-core super block structure.
@@ -1775,18 +1801,17 @@ ext4_fsblk_t ext4_has_free_blocks(struct
sbi->s_resuid != current->fsuid &&
(sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid)))
root_blocks = ext4_r_blocks_count(sbi->s_es);
-#ifdef CONFIG_SMP
- if (free_blocks - root_blocks < FBC_BATCH)
- free_blocks =
- percpu_counter_sum(&sbi->s_freeblocks_counter);
-#endif
+
+ if (free_blocks - (nblocks + root_blocks) < EXT4_FREEBLOCKS_WATERMARK)
+ free_blocks = percpu_counter_sum_positive(&sbi->s_freeblocks_counter);
+
if (free_blocks <= root_blocks)
/* we don't have free space */
return 0;
if (free_blocks - root_blocks < nblocks)
return free_blocks - root_blocks;
return nblocks;
- }
+}
/**
@@ -1865,14 +1890,11 @@ ext4_fsblk_t ext4_old_new_blocks(handle_
/*
* With delalloc we already reserved the blocks
*/
- *count = ext4_has_free_blocks(sbi, *count);
- }
- if (*count == 0) {
- *errp = -ENOSPC;
- return 0; /*return with ENOSPC error */
+ if (ext4_claim_free_blocks(sbi, *count)) {
+ *errp = -ENOSPC;
+ return 0; /*return with ENOSPC error */
+ }
}
- num = *count;
-
/*
* Check quota for allocation of this block.
*/
@@ -2067,9 +2089,13 @@ allocated:
le16_add_cpu(&gdp->bg_free_blocks_count, -num);
gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp);
spin_unlock(sb_bgl_lock(sbi, group_no));
- if (!EXT4_I(inode)->i_delalloc_reserved_flag)
- percpu_counter_sub(&sbi->s_freeblocks_counter, num);
-
+ if (!EXT4_I(inode)->i_delalloc_reserved_flag && (*count != num)) {
+ /*
+ * we allocated less blocks than we
+ * claimed. Add the difference back.
+ */
+ percpu_counter_add(&sbi->s_freeblocks_counter, *count - num);
+ }
if (sbi->s_log_groups_per_flex) {
ext4_group_t flex_group = ext4_flex_group(sbi, group_no);
spin_lock(sb_bgl_lock(sbi, flex_group));
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1015,6 +1015,8 @@ extern ext4_fsblk_t ext4_new_blocks(hand
unsigned long *count, int *errp);
extern ext4_fsblk_t ext4_old_new_blocks(handle_t *handle, struct inode *inode,
ext4_fsblk_t goal, unsigned long *count, int *errp);
+extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi,
+ ext4_fsblk_t nblocks);
extern ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi,
ext4_fsblk_t nblocks);
extern void ext4_free_blocks (handle_t *handle, struct inode *inode,
@@ -1245,6 +1247,17 @@ do { \
__ext4_std_error((sb), __func__, (errno)); \
} while (0)
+#ifdef CONFIG_SMP
+/* Each CPU can accumulate FBC_BATCH blocks in their local
+ * counters. So we need to make sure we have free blocks more
+ * than FBC_BATCH * nr_cpu_ids. Also add a window of 4 times.
+ */
+#define EXT4_FREEBLOCKS_WATERMARK (4 * (FBC_BATCH * nr_cpu_ids))
+#else
+#define EXT4_FREEBLOCKS_WATERMARK 0
+#endif
+
+
/*
* Inodes and files operations
*/
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1564,13 +1564,10 @@ static int ext4_da_reserve_space(struct
md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks;
total = md_needed + nrblocks;
- if (ext4_has_free_blocks(sbi, total) < total) {
+ if (ext4_claim_free_blocks(sbi, total)) {
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
return -ENOSPC;
}
- /* reduce fs free blocks counter */
- percpu_counter_sub(&sbi->s_freeblocks_counter, total);
-
EXT4_I(inode)->i_reserved_data_blocks += nrblocks;
EXT4_I(inode)->i_reserved_meta_blocks = mdblocks;
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3194,9 +3194,15 @@ ext4_mb_mark_diskspace_used(struct ext4_
* at write_begin() time for delayed allocation
* do not double accounting
*/
- if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
- percpu_counter_sub(&sbi->s_freeblocks_counter,
- ac->ac_b_ex.fe_len);
+ if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED) &&
+ ac->ac_o_ex.fe_len != ac->ac_b_ex.fe_len) {
+ /*
+ * we allocated less blocks than we calimed
+ * Add the difference back
+ */
+ percpu_counter_add(&sbi->s_freeblocks_counter,
+ ac->ac_o_ex.fe_len - ac->ac_b_ex.fe_len);
+ }
if (sbi->s_log_groups_per_flex) {
ext4_group_t flex_group = ext4_flex_group(sbi,
@@ -4649,14 +4655,11 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t
/*
* With delalloc we already reserved the blocks
*/
- ar->len = ext4_has_free_blocks(sbi, ar->len);
- }
-
- if (ar->len == 0) {
- *errp = -ENOSPC;
- return 0;
+ if (ext4_claim_free_blocks(sbi, ar->len)) {
+ *errp = -ENOSPC;
+ return 0;
+ }
}
-
while (ar->len && DQUOT_ALLOC_BLOCK(ar->inode, ar->len)) {
ar->flags |= EXT4_MB_HINT_NOPREALLOC;
ar->len--;
^ permalink raw reply [flat|nested] 26+ messages in thread* [05/25] ext4: Add percpu dirty block accounting.
2010-05-25 18:12 [00/25] 2.6.27.47-stable review, take 2 Greg KH
` (3 preceding siblings ...)
2010-05-25 18:09 ` [04/25] ext4: Make sure all the block allocation paths reserve blocks Greg KH
@ 2010-05-25 18:09 ` Greg KH
2010-05-25 18:09 ` [06/25] ext4: Retry block reservation Greg KH
` (19 subsequent siblings)
24 siblings, 0 replies; 26+ messages in thread
From: Greg KH @ 2010-05-25 18:09 UTC (permalink / raw)
To: linux-kernel, stable
Cc: stable-review, torvalds, akpm, alan, Theodore Tso,
Ext4 Developers List, Mingming Cao, Jayson R. King,
Aneesh Kumar K.V
2.6.27-stable review patch. If anyone has any objections, please let us know.
------------------
From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
commit 6bc6e63fcd7dac9e633ea29f1fddd9580ab28f3f upstream.
This patch adds dirty block accounting using percpu_counters. Delayed
allocation block reservation is now done by updating dirty block
counter. In a later patch we switch to non delalloc mode if the
filesystem free blocks is greater than 150% of total filesystem dirty
blocks
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Mingming Cao<cmm@us.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Jayson R. King <dev@jaysonking.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
fs/ext4/balloc.c | 62 ++++++++++++++++++++++++++++++++++--------------------
fs/ext4/ext4_sb.h | 1
fs/ext4/inode.c | 22 +++++++++----------
fs/ext4/mballoc.c | 31 ++++++++++++---------------
fs/ext4/super.c | 8 ++++++
5 files changed, 73 insertions(+), 51 deletions(-)
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -1757,26 +1757,38 @@ out:
int ext4_claim_free_blocks(struct ext4_sb_info *sbi,
ext4_fsblk_t nblocks)
{
- s64 free_blocks;
+ s64 free_blocks, dirty_blocks;
ext4_fsblk_t root_blocks = 0;
struct percpu_counter *fbc = &sbi->s_freeblocks_counter;
+ struct percpu_counter *dbc = &sbi->s_dirtyblocks_counter;
- free_blocks = percpu_counter_read(fbc);
+ free_blocks = percpu_counter_read_positive(fbc);
+ dirty_blocks = percpu_counter_read_positive(dbc);
if (!capable(CAP_SYS_RESOURCE) &&
sbi->s_resuid != current->fsuid &&
(sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid)))
root_blocks = ext4_r_blocks_count(sbi->s_es);
- if (free_blocks - (nblocks + root_blocks) < EXT4_FREEBLOCKS_WATERMARK)
- free_blocks = percpu_counter_sum(&sbi->s_freeblocks_counter);
-
- if (free_blocks < (root_blocks + nblocks))
+ if (free_blocks - (nblocks + root_blocks + dirty_blocks) <
+ EXT4_FREEBLOCKS_WATERMARK) {
+ free_blocks = percpu_counter_sum(fbc);
+ dirty_blocks = percpu_counter_sum(dbc);
+ if (dirty_blocks < 0) {
+ printk(KERN_CRIT "Dirty block accounting "
+ "went wrong %lld\n",
+ dirty_blocks);
+ }
+ }
+ /* Check whether we have space after
+ * accounting for current dirty blocks
+ */
+ if (free_blocks < ((s64)(root_blocks + nblocks) + dirty_blocks))
/* we don't have free space */
return -ENOSPC;
- /* reduce fs free blocks counter */
- percpu_counter_sub(fbc, nblocks);
+ /* Add the blocks to nblocks */
+ percpu_counter_add(dbc, nblocks);
return 0;
}
@@ -1792,23 +1804,28 @@ int ext4_claim_free_blocks(struct ext4_s
ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi,
ext4_fsblk_t nblocks)
{
- ext4_fsblk_t free_blocks;
+ ext4_fsblk_t free_blocks, dirty_blocks;
ext4_fsblk_t root_blocks = 0;
+ struct percpu_counter *fbc = &sbi->s_freeblocks_counter;
+ struct percpu_counter *dbc = &sbi->s_dirtyblocks_counter;
- free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
+ free_blocks = percpu_counter_read_positive(fbc);
+ dirty_blocks = percpu_counter_read_positive(dbc);
if (!capable(CAP_SYS_RESOURCE) &&
sbi->s_resuid != current->fsuid &&
(sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid)))
root_blocks = ext4_r_blocks_count(sbi->s_es);
- if (free_blocks - (nblocks + root_blocks) < EXT4_FREEBLOCKS_WATERMARK)
- free_blocks = percpu_counter_sum_positive(&sbi->s_freeblocks_counter);
-
- if (free_blocks <= root_blocks)
+ if (free_blocks - (nblocks + root_blocks + dirty_blocks) <
+ EXT4_FREEBLOCKS_WATERMARK) {
+ free_blocks = percpu_counter_sum_positive(fbc);
+ dirty_blocks = percpu_counter_sum_positive(dbc);
+ }
+ if (free_blocks <= (root_blocks + dirty_blocks))
/* we don't have free space */
return 0;
- if (free_blocks - root_blocks < nblocks)
+ if (free_blocks - (root_blocks + dirty_blocks) < nblocks)
return free_blocks - root_blocks;
return nblocks;
}
@@ -2089,13 +2106,14 @@ allocated:
le16_add_cpu(&gdp->bg_free_blocks_count, -num);
gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp);
spin_unlock(sb_bgl_lock(sbi, group_no));
- if (!EXT4_I(inode)->i_delalloc_reserved_flag && (*count != num)) {
- /*
- * we allocated less blocks than we
- * claimed. Add the difference back.
- */
- percpu_counter_add(&sbi->s_freeblocks_counter, *count - num);
- }
+ percpu_counter_sub(&sbi->s_freeblocks_counter, num);
+ /*
+ * Now reduce the dirty block count also. Should not go negative
+ */
+ if (!EXT4_I(inode)->i_delalloc_reserved_flag)
+ percpu_counter_sub(&sbi->s_dirtyblocks_counter, *count);
+ else
+ percpu_counter_sub(&sbi->s_dirtyblocks_counter, num);
if (sbi->s_log_groups_per_flex) {
ext4_group_t flex_group = ext4_flex_group(sbi, group_no);
spin_lock(sb_bgl_lock(sbi, flex_group));
--- a/fs/ext4/ext4_sb.h
+++ b/fs/ext4/ext4_sb.h
@@ -60,6 +60,7 @@ struct ext4_sb_info {
struct percpu_counter s_freeblocks_counter;
struct percpu_counter s_freeinodes_counter;
struct percpu_counter s_dirs_counter;
+ struct percpu_counter s_dirtyblocks_counter;
struct blockgroup_lock s_blockgroup_lock;
/* root of the per fs reservation window tree */
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1032,19 +1032,20 @@ static void ext4_da_update_reserve_space
BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb;
- /* Account for allocated meta_blocks */
- mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks;
-
- /* update fs free blocks counter for truncate case */
- percpu_counter_add(&sbi->s_freeblocks_counter, mdb_free);
+ if (mdb_free) {
+ /* Account for allocated meta_blocks */
+ mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks;
+
+ /* update fs dirty blocks counter */
+ percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free);
+ EXT4_I(inode)->i_allocated_meta_blocks = 0;
+ EXT4_I(inode)->i_reserved_meta_blocks = mdb;
+ }
/* update per-inode reservations */
BUG_ON(used > EXT4_I(inode)->i_reserved_data_blocks);
EXT4_I(inode)->i_reserved_data_blocks -= used;
- BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
- EXT4_I(inode)->i_reserved_meta_blocks = mdb;
- EXT4_I(inode)->i_allocated_meta_blocks = 0;
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
/*
@@ -1609,8 +1610,8 @@ static void ext4_da_release_space(struct
release = to_free + mdb_free;
- /* update fs free blocks counter for truncate case */
- percpu_counter_add(&sbi->s_freeblocks_counter, release);
+ /* update fs dirty blocks counter for truncate case */
+ percpu_counter_sub(&sbi->s_dirtyblocks_counter, release);
/* update per-inode reservations */
BUG_ON(to_free > EXT4_I(inode)->i_reserved_data_blocks);
@@ -2546,7 +2547,6 @@ static int ext4_da_write_begin(struct fi
index = pos >> PAGE_CACHE_SHIFT;
from = pos & (PAGE_CACHE_SIZE - 1);
to = from + len;
-
retry:
/*
* With delayed allocation, we don't log the i_disksize update
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3100,7 +3100,7 @@ void exit_ext4_mballoc(void)
*/
static noinline_for_stack int
ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
- handle_t *handle)
+ handle_t *handle, unsigned long reserv_blks)
{
struct buffer_head *bitmap_bh = NULL;
struct ext4_super_block *es;
@@ -3188,21 +3188,16 @@ ext4_mb_mark_diskspace_used(struct ext4_
le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len);
gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
-
+ percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len);
/*
- * free blocks account has already be reduced/reserved
- * at write_begin() time for delayed allocation
- * do not double accounting
+ * Now reduce the dirty block count also. Should not go negative
*/
- if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED) &&
- ac->ac_o_ex.fe_len != ac->ac_b_ex.fe_len) {
- /*
- * we allocated less blocks than we calimed
- * Add the difference back
- */
- percpu_counter_add(&sbi->s_freeblocks_counter,
- ac->ac_o_ex.fe_len - ac->ac_b_ex.fe_len);
- }
+ if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
+ /* release all the reserved blocks if non delalloc */
+ percpu_counter_sub(&sbi->s_dirtyblocks_counter, reserv_blks);
+ else
+ percpu_counter_sub(&sbi->s_dirtyblocks_counter,
+ ac->ac_b_ex.fe_len);
if (sbi->s_log_groups_per_flex) {
ext4_group_t flex_group = ext4_flex_group(sbi,
@@ -4636,12 +4631,13 @@ static int ext4_mb_discard_preallocation
ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
struct ext4_allocation_request *ar, int *errp)
{
+ int freed;
struct ext4_allocation_context *ac = NULL;
struct ext4_sb_info *sbi;
struct super_block *sb;
ext4_fsblk_t block = 0;
- int freed;
- int inquota;
+ unsigned long inquota;
+ unsigned long reserv_blks = 0;
sb = ar->inode->i_sb;
sbi = EXT4_SB(sb);
@@ -4659,6 +4655,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t
*errp = -ENOSPC;
return 0;
}
+ reserv_blks = ar->len;
}
while (ar->len && DQUOT_ALLOC_BLOCK(ar->inode, ar->len)) {
ar->flags |= EXT4_MB_HINT_NOPREALLOC;
@@ -4704,7 +4701,7 @@ repeat:
ext4_mb_new_preallocation(ac);
}
if (likely(ac->ac_status == AC_STATUS_FOUND)) {
- *errp = ext4_mb_mark_diskspace_used(ac, handle);
+ *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_blks);
if (*errp == -EAGAIN) {
/*
* drop the reference that we took
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -521,6 +521,7 @@ static void ext4_put_super(struct super_
percpu_counter_destroy(&sbi->s_freeblocks_counter);
percpu_counter_destroy(&sbi->s_freeinodes_counter);
percpu_counter_destroy(&sbi->s_dirs_counter);
+ percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
brelse(sbi->s_sbh);
#ifdef CONFIG_QUOTA
for (i = 0; i < MAXQUOTAS; i++)
@@ -2280,6 +2281,9 @@ static int ext4_fill_super(struct super_
err = percpu_counter_init(&sbi->s_dirs_counter,
ext4_count_dirs(sb));
}
+ if (!err) {
+ err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0);
+ }
if (err) {
printk(KERN_ERR "EXT4-fs: insufficient memory\n");
goto failed_mount3;
@@ -2517,6 +2521,7 @@ failed_mount3:
percpu_counter_destroy(&sbi->s_freeblocks_counter);
percpu_counter_destroy(&sbi->s_freeinodes_counter);
percpu_counter_destroy(&sbi->s_dirs_counter);
+ percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
failed_mount2:
for (i = 0; i < db_count; i++)
brelse(sbi->s_group_desc[i]);
@@ -3208,7 +3213,8 @@ static int ext4_statfs(struct dentry *de
buf->f_type = EXT4_SUPER_MAGIC;
buf->f_bsize = sb->s_blocksize;
buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last;
- buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter);
+ buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) -
+ percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter);
ext4_free_blocks_count_set(es, buf->f_bfree);
buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es);
if (buf->f_bfree < ext4_r_blocks_count(es))
^ permalink raw reply [flat|nested] 26+ messages in thread* [06/25] ext4: Retry block reservation
2010-05-25 18:12 [00/25] 2.6.27.47-stable review, take 2 Greg KH
` (4 preceding siblings ...)
2010-05-25 18:09 ` [05/25] ext4: Add percpu dirty block accounting Greg KH
@ 2010-05-25 18:09 ` Greg KH
2010-05-25 18:09 ` [07/25] ext4: Retry block allocation if we have free blocks left Greg KH
` (18 subsequent siblings)
24 siblings, 0 replies; 26+ messages in thread
From: Greg KH @ 2010-05-25 18:09 UTC (permalink / raw)
To: linux-kernel, stable
Cc: stable-review, torvalds, akpm, alan, Theodore Tso,
Ext4 Developers List, Mingming Cao, Jayson R. King,
Aneesh Kumar K.V
2.6.27-stable review patch. If anyone has any objections, please let us know.
------------------
From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
commit 030ba6bc67b4f2bc5cd174f57785a1745c929abe upstream.
During block reservation if we don't have enough blocks left, retry
block reservation with smaller block counts. This makes sure we try
fallocate and DIO with smaller request size and don't fail early. The
delayed allocation reservation cannot try with smaller block count. So
retry block reservation to handle temporary disk full conditions. Also
print free blocks details if we fail block allocation during writepages.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Jayson R. King <dev@jaysonking.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
fs/ext4/balloc.c | 8 +++++++-
fs/ext4/inode.c | 14 +++++++++++---
fs/ext4/mballoc.c | 7 ++++++-
3 files changed, 24 insertions(+), 5 deletions(-)
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -1907,10 +1907,16 @@ ext4_fsblk_t ext4_old_new_blocks(handle_
/*
* With delalloc we already reserved the blocks
*/
- if (ext4_claim_free_blocks(sbi, *count)) {
+ while (*count && ext4_claim_free_blocks(sbi, *count)) {
+ /* let others to free the space */
+ yield();
+ *count = *count >> 1;
+ }
+ if (!*count) {
*errp = -ENOSPC;
return 0; /*return with ENOSPC error */
}
+ num = *count;
}
/*
* Check quota for allocation of this block.
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1549,6 +1549,7 @@ static int ext4_journalled_write_end(str
static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
{
+ int retries = 0;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
unsigned long md_needed, mdblocks, total = 0;
@@ -1557,6 +1558,7 @@ static int ext4_da_reserve_space(struct
* in order to allocate nrblocks
* worse case is one extent per block
*/
+repeat:
spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
total = EXT4_I(inode)->i_reserved_data_blocks + nrblocks;
mdblocks = ext4_calc_metadata_amount(inode, total);
@@ -1567,6 +1569,10 @@ static int ext4_da_reserve_space(struct
if (ext4_claim_free_blocks(sbi, total)) {
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+ if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
+ yield();
+ goto repeat;
+ }
return -ENOSPC;
}
EXT4_I(inode)->i_reserved_data_blocks += nrblocks;
@@ -1864,20 +1870,18 @@ static void ext4_da_block_invalidatepage
static int mpage_da_map_blocks(struct mpage_da_data *mpd)
{
int err = 0;
+ struct buffer_head new;
struct buffer_head *lbh = &mpd->lbh;
sector_t next = lbh->b_blocknr;
- struct buffer_head new;
/*
* We consider only non-mapped and non-allocated blocks
*/
if (buffer_mapped(lbh) && !buffer_delay(lbh))
return 0;
-
new.b_state = lbh->b_state;
new.b_blocknr = 0;
new.b_size = lbh->b_size;
-
/*
* If we didn't accumulate anything
* to write simply return
@@ -1910,6 +1914,10 @@ static int mpage_da_map_blocks(struct m
lbh->b_size >> mpd->inode->i_blkbits, err);
printk(KERN_EMERG "This should not happen.!! "
"Data will be lost\n");
+ if (err == -ENOSPC) {
+ printk(KERN_CRIT "Total free blocks count %lld\n",
+ ext4_count_free_blocks(mpd->inode->i_sb));
+ }
/* invlaidate all the pages */
ext4_da_block_invalidatepages(mpd, next,
lbh->b_size >> mpd->inode->i_blkbits);
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -4651,7 +4651,12 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t
/*
* With delalloc we already reserved the blocks
*/
- if (ext4_claim_free_blocks(sbi, ar->len)) {
+ while (ar->len && ext4_claim_free_blocks(sbi, ar->len)) {
+ /* let others to free the space */
+ yield();
+ ar->len = ar->len >> 1;
+ }
+ if (!ar->len) {
*errp = -ENOSPC;
return 0;
}
^ permalink raw reply [flat|nested] 26+ messages in thread* [07/25] ext4: Retry block allocation if we have free blocks left
2010-05-25 18:12 [00/25] 2.6.27.47-stable review, take 2 Greg KH
` (5 preceding siblings ...)
2010-05-25 18:09 ` [06/25] ext4: Retry block reservation Greg KH
@ 2010-05-25 18:09 ` Greg KH
2010-05-25 18:09 ` [08/25] ext4: Use tag dirty lookup during mpage_da_submit_io Greg KH
` (17 subsequent siblings)
24 siblings, 0 replies; 26+ messages in thread
From: Greg KH @ 2010-05-25 18:09 UTC (permalink / raw)
To: linux-kernel, stable
Cc: stable-review, torvalds, akpm, alan, Theodore Tso,
Ext4 Developers List, Mingming Cao, Jayson R. King,
Aneesh Kumar K.V
2.6.27-stable review patch. If anyone has any objections, please let us know.
------------------
From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
commit df22291ff0fde0d350cf15dac3e5cc33ac528875 upstream.
When we truncate files, the meta-data blocks released are not reused
untill we commit the truncate transaction. That means delayed get_block
request will return ENOSPC even if we have free blocks left. Force a
journal commit and retry block allocation if we get ENOSPC with free
blocks left.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Jayson R. King <dev@jaysonking.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
fs/ext4/inode.c | 81 +++++++++++++++++++++++++++++++++++++++-----------------
1 file changed, 57 insertions(+), 24 deletions(-)
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1661,6 +1661,7 @@ struct mpage_da_data {
struct writeback_control *wbc;
int io_done;
long pages_written;
+ int retval;
};
/*
@@ -1858,6 +1859,24 @@ static void ext4_da_block_invalidatepage
return;
}
+static void ext4_print_free_blocks(struct inode *inode)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+ printk(KERN_EMERG "Total free blocks count %lld\n",
+ ext4_count_free_blocks(inode->i_sb));
+ printk(KERN_EMERG "Free/Dirty block details\n");
+ printk(KERN_EMERG "free_blocks=%lld\n",
+ percpu_counter_sum(&sbi->s_freeblocks_counter));
+ printk(KERN_EMERG "dirty_blocks=%lld\n",
+ percpu_counter_sum(&sbi->s_dirtyblocks_counter));
+ printk(KERN_EMERG "Block reservation details\n");
+ printk(KERN_EMERG "i_reserved_data_blocks=%lu\n",
+ EXT4_I(inode)->i_reserved_data_blocks);
+ printk(KERN_EMERG "i_reserved_meta_blocks=%lu\n",
+ EXT4_I(inode)->i_reserved_meta_blocks);
+ return;
+}
+
/*
* mpage_da_map_blocks - go through given space
*
@@ -1872,7 +1891,7 @@ static int mpage_da_map_blocks(struct m
int err = 0;
struct buffer_head new;
struct buffer_head *lbh = &mpd->lbh;
- sector_t next = lbh->b_blocknr;
+ sector_t next;
/*
* We consider only non-mapped and non-allocated blocks
@@ -1882,6 +1901,7 @@ static int mpage_da_map_blocks(struct m
new.b_state = lbh->b_state;
new.b_blocknr = 0;
new.b_size = lbh->b_size;
+ next = lbh->b_blocknr;
/*
* If we didn't accumulate anything
* to write simply return
@@ -1898,6 +1918,13 @@ static int mpage_da_map_blocks(struct m
*/
if (err == -EAGAIN)
return 0;
+
+ if (err == -ENOSPC &&
+ ext4_count_free_blocks(mpd->inode->i_sb)) {
+ mpd->retval = err;
+ return 0;
+ }
+
/*
* get block failure will cause us
* to loop in writepages. Because
@@ -1915,8 +1942,7 @@ static int mpage_da_map_blocks(struct m
printk(KERN_EMERG "This should not happen.!! "
"Data will be lost\n");
if (err == -ENOSPC) {
- printk(KERN_CRIT "Total free blocks count %lld\n",
- ext4_count_free_blocks(mpd->inode->i_sb));
+ ext4_print_free_blocks(mpd->inode);
}
/* invlaidate all the pages */
ext4_da_block_invalidatepages(mpd, next,
@@ -2141,39 +2167,36 @@ static int __mpage_da_writepage(struct p
*/
static int mpage_da_writepages(struct address_space *mapping,
struct writeback_control *wbc,
- get_block_t get_block)
+ struct mpage_da_data *mpd)
{
- struct mpage_da_data mpd;
long to_write;
int ret;
- if (!get_block)
+ if (!mpd->get_block)
return generic_writepages(mapping, wbc);
- mpd.wbc = wbc;
- mpd.inode = mapping->host;
- mpd.lbh.b_size = 0;
- mpd.lbh.b_state = 0;
- mpd.lbh.b_blocknr = 0;
- mpd.first_page = 0;
- mpd.next_page = 0;
- mpd.get_block = get_block;
- mpd.io_done = 0;
- mpd.pages_written = 0;
+ mpd->lbh.b_size = 0;
+ mpd->lbh.b_state = 0;
+ mpd->lbh.b_blocknr = 0;
+ mpd->first_page = 0;
+ mpd->next_page = 0;
+ mpd->io_done = 0;
+ mpd->pages_written = 0;
+ mpd->retval = 0;
to_write = wbc->nr_to_write;
- ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, &mpd);
+ ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, mpd);
/*
* Handle last extent of pages
*/
- if (!mpd.io_done && mpd.next_page != mpd.first_page) {
- if (mpage_da_map_blocks(&mpd) == 0)
- mpage_da_submit_io(&mpd);
+ if (!mpd->io_done && mpd->next_page != mpd->first_page) {
+ if (mpage_da_map_blocks(mpd) == 0)
+ mpage_da_submit_io(mpd);
}
- wbc->nr_to_write = to_write - mpd.pages_written;
+ wbc->nr_to_write = to_write - mpd->pages_written;
return ret;
}
@@ -2420,6 +2443,7 @@ static int ext4_da_writepages(struct add
{
handle_t *handle = NULL;
loff_t range_start = 0;
+ struct mpage_da_data mpd;
struct inode *inode = mapping->host;
int needed_blocks, ret = 0, nr_to_writebump = 0;
long to_write, pages_skipped = 0;
@@ -2467,6 +2491,9 @@ static int ext4_da_writepages(struct add
range_start = wbc->range_start;
pages_skipped = wbc->pages_skipped;
+ mpd.wbc = wbc;
+ mpd.inode = mapping->host;
+
restart_loop:
to_write = wbc->nr_to_write;
while (!ret && to_write > 0) {
@@ -2502,11 +2529,17 @@ restart_loop:
goto out_writepages;
}
}
-
to_write -= wbc->nr_to_write;
- ret = mpage_da_writepages(mapping, wbc,
- ext4_da_get_block_write);
+
+ mpd.get_block = ext4_da_get_block_write;
+ ret = mpage_da_writepages(mapping, wbc, &mpd);
+
ext4_journal_stop(handle);
+
+ if (mpd.retval == -ENOSPC)
+ jbd2_journal_force_commit_nested(sbi->s_journal);
+
+ /* reset the retry count */
if (ret == MPAGE_DA_EXTENT_TAIL) {
/*
* got one extent now try with
^ permalink raw reply [flat|nested] 26+ messages in thread* [08/25] ext4: Use tag dirty lookup during mpage_da_submit_io
2010-05-25 18:12 [00/25] 2.6.27.47-stable review, take 2 Greg KH
` (6 preceding siblings ...)
2010-05-25 18:09 ` [07/25] ext4: Retry block allocation if we have free blocks left Greg KH
@ 2010-05-25 18:09 ` Greg KH
2010-05-25 18:09 ` [09/25] vfs: Remove the range_cont writeback mode Greg KH
` (16 subsequent siblings)
24 siblings, 0 replies; 26+ messages in thread
From: Greg KH @ 2010-05-25 18:09 UTC (permalink / raw)
To: linux-kernel, stable
Cc: stable-review, torvalds, akpm, alan, Ext4 Developers List,
Theodore Tso, Jayson R. King, Aneesh Kumar K.V
2.6.27-stable review patch. If anyone has any objections, please let us know.
------------------
From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
commit af6f029d3836eb7264cd3fbb13a6baf0e5fdb5ea upstream.
This enables us to drop the range_cont writeback mode
use from ext4_da_writepages.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Jayson R. King <dev@jaysonking.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
fs/ext4/inode.c | 30 +++++++++++++-----------------
1 file changed, 13 insertions(+), 17 deletions(-)
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1699,17 +1699,23 @@ static int mpage_da_submit_io(struct mpa
pagevec_init(&pvec, 0);
while (index <= end) {
- nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
+ /*
+ * We can use PAGECACHE_TAG_DIRTY lookup here because
+ * even though we have cleared the dirty flag on the page
+ * We still keep the page in the radix tree with tag
+ * PAGECACHE_TAG_DIRTY. See clear_page_dirty_for_io.
+ * The PAGECACHE_TAG_DIRTY is cleared in set_page_writeback
+ * which is called via the below writepage callback.
+ */
+ nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
+ PAGECACHE_TAG_DIRTY,
+ min(end - index,
+ (pgoff_t)PAGEVEC_SIZE-1) + 1);
if (nr_pages == 0)
break;
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];
- index = page->index;
- if (index > end)
- break;
- index++;
-
BUG_ON(!PageLocked(page));
BUG_ON(PageWriteback(page));
@@ -2442,7 +2448,6 @@ static int ext4_da_writepages(struct add
struct writeback_control *wbc)
{
handle_t *handle = NULL;
- loff_t range_start = 0;
struct mpage_da_data mpd;
struct inode *inode = mapping->host;
int needed_blocks, ret = 0, nr_to_writebump = 0;
@@ -2481,14 +2486,7 @@ static int ext4_da_writepages(struct add
wbc->nr_to_write = sbi->s_mb_stream_request;
}
- if (!wbc->range_cyclic)
- /*
- * If range_cyclic is not set force range_cont
- * and save the old writeback_index
- */
- wbc->range_cont = 1;
- range_start = wbc->range_start;
pages_skipped = wbc->pages_skipped;
mpd.wbc = wbc;
@@ -2559,9 +2557,8 @@ restart_loop:
wbc->nr_to_write = to_write;
}
- if (wbc->range_cont && (pages_skipped != wbc->pages_skipped)) {
+ if (!wbc->range_cyclic && (pages_skipped != wbc->pages_skipped)) {
/* We skipped pages in this loop */
- wbc->range_start = range_start;
wbc->nr_to_write = to_write +
wbc->pages_skipped - pages_skipped;
wbc->pages_skipped = pages_skipped;
@@ -2570,7 +2567,6 @@ restart_loop:
out_writepages:
wbc->nr_to_write = to_write - nr_to_writebump;
- wbc->range_start = range_start;
return ret;
}
^ permalink raw reply [flat|nested] 26+ messages in thread* [09/25] vfs: Remove the range_cont writeback mode.
2010-05-25 18:12 [00/25] 2.6.27.47-stable review, take 2 Greg KH
` (7 preceding siblings ...)
2010-05-25 18:09 ` [08/25] ext4: Use tag dirty lookup during mpage_da_submit_io Greg KH
@ 2010-05-25 18:09 ` Greg KH
2010-05-25 18:09 ` [10/25] tty: release_one_tty() forgets to put pids Greg KH
` (15 subsequent siblings)
24 siblings, 0 replies; 26+ messages in thread
From: Greg KH @ 2010-05-25 18:09 UTC (permalink / raw)
To: linux-kernel, stable
Cc: stable-review, torvalds, akpm, alan, linux-fsdevel,
Ext4 Developers List, Theodore Tso, Jayson R. King,
Aneesh Kumar K.V
2.6.27-stable review patch. If anyone has any objections, please let us know.
------------------
From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
commit 74baaaaec8b4f22e1ae279f5ecca4ff705b28912 upstream.
Ext4 was the only user of range_cont writeback mode and ext4 switched
to a different method. So remove the range_cont mode which is not used
in the kernel.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
CC: linux-fsdevel@vger.kernel.org
Signed-off-by: Jayson R. King <dev@jaysonking.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
include/linux/writeback.h | 1 -
mm/page-writeback.c | 2 --
2 files changed, 3 deletions(-)
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -62,7 +62,6 @@ struct writeback_control {
unsigned for_writepages:1; /* This is a writepages() call */
unsigned range_cyclic:1; /* range_start is cyclic */
unsigned more_io:1; /* more io to be dispatched */
- unsigned range_cont:1;
};
/*
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1027,8 +1027,6 @@ continue_unlock:
if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
mapping->writeback_index = done_index;
- if (wbc->range_cont)
- wbc->range_start = index << PAGE_CACHE_SHIFT;
return ret;
}
EXPORT_SYMBOL(write_cache_pages);
^ permalink raw reply [flat|nested] 26+ messages in thread* [10/25] tty: release_one_tty() forgets to put pids
2010-05-25 18:12 [00/25] 2.6.27.47-stable review, take 2 Greg KH
` (8 preceding siblings ...)
2010-05-25 18:09 ` [09/25] vfs: Remove the range_cont writeback mode Greg KH
@ 2010-05-25 18:09 ` Greg KH
2010-05-25 18:09 ` [11/25] [SCSI] megaraid_sas: fix for 32bit apps Greg KH
` (14 subsequent siblings)
24 siblings, 0 replies; 26+ messages in thread
From: Greg KH @ 2010-05-25 18:09 UTC (permalink / raw)
To: linux-kernel, stable
Cc: stable-review, torvalds, akpm, alan, Oleg Nesterov,
Eric W. Biederman
2.6.27-stable review patch. If anyone has any objections, please let us know.
------------------
From: Oleg Nesterov <oleg@redhat.com>
commit 6da8d866d0d39e9509ff826660f6a86a6757c966 upstream.
release_one_tty(tty) can be called when tty still has a reference
to pgrp/session. In this case we leak the pid.
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reported-by: Catalin Marinas <catalin.marinas@arm.com>
Reported-and-tested-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Acked-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
drivers/char/tty_io.c | 2 ++
1 file changed, 2 insertions(+)
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -1893,6 +1893,8 @@ static void release_one_tty(struct tty_s
list_del_init(&tty->tty_files);
file_list_unlock();
+ put_pid(tty->pgrp);
+ put_pid(tty->session);
free_tty_struct(tty);
}
^ permalink raw reply [flat|nested] 26+ messages in thread* [11/25] [SCSI] megaraid_sas: fix for 32bit apps
2010-05-25 18:12 [00/25] 2.6.27.47-stable review, take 2 Greg KH
` (9 preceding siblings ...)
2010-05-25 18:09 ` [10/25] tty: release_one_tty() forgets to put pids Greg KH
@ 2010-05-25 18:09 ` Greg KH
2010-05-25 18:09 ` [12/25] trace: Fix inappropriate substraction on tracing_pages_allocated in trace_free_page() Greg KH
` (13 subsequent siblings)
24 siblings, 0 replies; 26+ messages in thread
From: Greg KH @ 2010-05-25 18:09 UTC (permalink / raw)
To: linux-kernel, stable
Cc: stable-review, torvalds, akpm, alan, Tomas Henzl, Bo Yang,
James Bottomley
2.6.27-stable review patch. If anyone has any objections, please let us know.
------------------
From: Tomas Henzl <thenzl@redhat.com>
commit b3dc1a212e5167984616445990c76056034f8eeb upstream.
It looks like this patch -
commit 7b2519afa1abd1b9f63aa1e90879307842422dae
Author: Yang, Bo <Bo.Yang@lsi.com>
Date: Tue Oct 6 14:52:20 2009 -0600
[SCSI] megaraid_sas: fix 64 bit sense pointer truncation
has caused a problem for 32bit programs with 64bit os -
http://bugzilla.kernel.org/show_bug.cgi?id=15001
fix by converting the user space 32bit pointer to a 64 bit one when
needed.
[jejb: fix up some 64 bit warnings]
Signed-off-by: Tomas Henzl <thenzl@redhat.com>
Cc: Bo Yang <Bo.Yang@lsi.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
drivers/scsi/megaraid/megaraid_sas.c | 18 ++++++++++++++++--
1 file changed, 16 insertions(+), 2 deletions(-)
--- a/drivers/scsi/megaraid/megaraid_sas.c
+++ b/drivers/scsi/megaraid/megaraid_sas.c
@@ -3292,6 +3292,7 @@ static int megasas_mgmt_compat_ioctl_fw(
compat_alloc_user_space(sizeof(struct megasas_iocpacket));
int i;
int error = 0;
+ compat_uptr_t ptr;
if (clear_user(ioc, sizeof(*ioc)))
return -EFAULT;
@@ -3304,9 +3305,22 @@ static int megasas_mgmt_compat_ioctl_fw(
copy_in_user(&ioc->sge_count, &cioc->sge_count, sizeof(u32)))
return -EFAULT;
- for (i = 0; i < MAX_IOCTL_SGE; i++) {
- compat_uptr_t ptr;
+ /*
+ * The sense_ptr is used in megasas_mgmt_fw_ioctl only when
+ * sense_len is not null, so prepare the 64bit value under
+ * the same condition.
+ */
+ if (ioc->sense_len) {
+ void __user **sense_ioc_ptr =
+ (void __user **)(ioc->frame.raw + ioc->sense_off);
+ compat_uptr_t *sense_cioc_ptr =
+ (compat_uptr_t *)(cioc->frame.raw + cioc->sense_off);
+ if (get_user(ptr, sense_cioc_ptr) ||
+ put_user(compat_ptr(ptr), sense_ioc_ptr))
+ return -EFAULT;
+ }
+ for (i = 0; i < MAX_IOCTL_SGE; i++) {
if (get_user(ptr, &cioc->sgl[i].iov_base) ||
put_user(compat_ptr(ptr), &ioc->sgl[i].iov_base) ||
copy_in_user(&ioc->sgl[i].iov_len,
^ permalink raw reply [flat|nested] 26+ messages in thread* [12/25] trace: Fix inappropriate substraction on tracing_pages_allocated in trace_free_page()
2010-05-25 18:12 [00/25] 2.6.27.47-stable review, take 2 Greg KH
` (10 preceding siblings ...)
2010-05-25 18:09 ` [11/25] [SCSI] megaraid_sas: fix for 32bit apps Greg KH
@ 2010-05-25 18:09 ` Greg KH
2010-05-25 18:09 ` [13/25] clockevent: Prevent dead lock on clockevents_lock Greg KH
` (12 subsequent siblings)
24 siblings, 0 replies; 26+ messages in thread
From: Greg KH @ 2010-05-25 18:09 UTC (permalink / raw)
To: linux-kernel, stable, Greg KH
Cc: stable-review, torvalds, akpm, alan, Steven Rostedt,
Frederic Weisbecker, Ingo Molnar, Li Zefan, Wang Sheng-Hui
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain, Size: 1547 bytes --]
2.6.27-stable review patch. If anyone has any objections, please let us know.
------------------
From: Wang Sheng-Hui <crosslonelyover@gmail.com>
[No matching upstream git commit id as it was fixed differently due to a
rewrite of the tracing code there.]
For normal case, the code in trace_free_page() do once more substraction
on tracing_pages_allocated, but for CONFIG_TRACER_MAX_TRACE it doesn't
take the freed page into account. That's not consistent with
trace_alloc_page(). Well, for there are no message related with this,
so we cannot observe its incorrect state when the kernel doesn't define
"CONFIG_TRACER_MAX_TRACE". If you add some pr_info() as
trace_alloc_page(), you may notice it.
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Wang Sheng-Hui <crosslonelyover@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
kernel/trace/trace.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3018,7 +3018,6 @@ static int trace_free_page(void)
ClearPageLRU(page);
list_del(&page->lru);
tracing_pages_allocated--;
- tracing_pages_allocated--;
__free_page(page);
tracing_reset(data);
@@ -3036,6 +3035,7 @@ static int trace_free_page(void)
page = list_entry(p, struct page, lru);
ClearPageLRU(page);
list_del(&page->lru);
+ tracing_pages_allocated--;
__free_page(page);
tracing_reset(data);
^ permalink raw reply [flat|nested] 26+ messages in thread* [13/25] clockevent: Prevent dead lock on clockevents_lock
2010-05-25 18:12 [00/25] 2.6.27.47-stable review, take 2 Greg KH
` (11 preceding siblings ...)
2010-05-25 18:09 ` [12/25] trace: Fix inappropriate substraction on tracing_pages_allocated in trace_free_page() Greg KH
@ 2010-05-25 18:09 ` Greg KH
2010-05-25 18:09 ` [14/25] nfsd4: bug in read_buf Greg KH
` (11 subsequent siblings)
24 siblings, 0 replies; 26+ messages in thread
From: Greg KH @ 2010-05-25 18:09 UTC (permalink / raw)
To: linux-kernel, stable
Cc: stable-review, torvalds, akpm, alan, dsterba, seto.hidetoshi,
suresh.b.siddha, Nagananda.Chumbalkar, jdelvare, tglx,
Thomas Renninger
2.6.27-stable review patch. If anyone has any objections, please let us know.
------------------
From: Suresh Siddha <suresh.b.siddha@intel.com>
This is a merge of two mainline commits, intended
for stable@kernel.org submission for 2.6.27 kernel.
commit f833bab87fca5c3ce13778421b1365845843b976
and
commit 918aae42aa9b611a3663b16ae849fdedc67c2292
Changelog of both:
Currently clockevents_notify() is called with interrupts enabled at
some places and interrupts disabled at some other places.
This results in a deadlock in this scenario.
cpu A holds clockevents_lock in clockevents_notify() with irqs enabled
cpu B waits for clockevents_lock in clockevents_notify() with irqs disabled
cpu C doing set_mtrr() which will try to rendezvous of all the cpus.
This will result in C and A come to the rendezvous point and waiting
for B. B is stuck forever waiting for the spinlock and thus not
reaching the rendezvous point.
Fix the clockevents code so that clockevents_lock is taken with
interrupts disabled and thus avoid the above deadlock.
Also call lapic_timer_propagate_broadcast() on the destination cpu so
that we avoid calling smp_call_function() in the clockevents notifier
chain.
This issue left us wondering if we need to change the MTRR rendezvous
logic to use stop machine logic (instead of smp_call_function) or add
a check in spinlock debug code to see if there are other spinlocks
which gets taken under both interrupts enabled/disabled conditions.
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: "Brown Len" <len.brown@intel.com>
Cc: stable@kernel.org
LKML-Reference: <1250544899.2709.210.camel@sbs-t61.sc.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
I got following warning on ia64 box:
In function 'acpi_processor_power_verify':
642: warning: passing argument 2 of 'smp_call_function_single' from
incompatible pointer type
This smp_call_function_single() was introduced by a commit
f833bab87fca5c3ce13778421b1365845843b976:
The problem is that the lapic_timer_propagate_broadcast() has 2 versions:
One is real code that modified in the above commit, and the other is NOP
code that used when !ARCH_APICTIMER_STOPS_ON_C3:
static void lapic_timer_propagate_broadcast(struct acpi_processor *pr) { }
So I got warning because of !ARCH_APICTIMER_STOPS_ON_C3.
We really want to do nothing here on !ARCH_APICTIMER_STOPS_ON_C3, so
modify lapic_timer_propagate_broadcast() of real version to use
smp_call_function_single() in it.
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Acked-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
Signed-off-by: Thomas Renninger <trenn@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
arch/x86/kernel/process.c | 6 +-----
drivers/acpi/processor_idle.c | 13 ++++++++++---
kernel/time/clockevents.c | 16 ++++++++++------
kernel/time/tick-broadcast.c | 7 +++----
4 files changed, 24 insertions(+), 18 deletions(-)
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -283,16 +283,12 @@ static void c1e_idle(void)
if (!cpu_isset(cpu, c1e_mask)) {
cpu_set(cpu, c1e_mask);
/*
- * Force broadcast so ACPI can not interfere. Needs
- * to run with interrupts enabled as it uses
- * smp_function_call.
+ * Force broadcast so ACPI can not interfere.
*/
- local_irq_enable();
clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE,
&cpu);
printk(KERN_INFO "Switch to broadcast mode on CPU%d\n",
cpu);
- local_irq_disable();
}
clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu);
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -317,8 +317,9 @@ static void acpi_timer_check_state(int s
pr->power.timer_broadcast_on_state = state;
}
-static void acpi_propagate_timer_broadcast(struct acpi_processor *pr)
+static void __lapic_timer_propagate_broadcast(void *arg)
{
+ struct acpi_processor *pr = (struct acpi_processor *) arg;
unsigned long reason;
reason = pr->power.timer_broadcast_on_state < INT_MAX ?
@@ -327,6 +328,12 @@ static void acpi_propagate_timer_broadca
clockevents_notify(reason, &pr->id);
}
+static void lapic_timer_propagate_broadcast(struct acpi_processor *pr)
+{
+ smp_call_function_single(pr->id, __lapic_timer_propagate_broadcast,
+ (void *)pr, 1);
+}
+
/* Power(C) State timer broadcast control */
static void acpi_state_timer_broadcast(struct acpi_processor *pr,
struct acpi_processor_cx *cx,
@@ -347,7 +354,7 @@ static void acpi_state_timer_broadcast(s
static void acpi_timer_check_state(int state, struct acpi_processor *pr,
struct acpi_processor_cx *cstate) { }
-static void acpi_propagate_timer_broadcast(struct acpi_processor *pr) { }
+static void lapic_timer_propagate_broadcast(struct acpi_processor *pr) { }
static void acpi_state_timer_broadcast(struct acpi_processor *pr,
struct acpi_processor_cx *cx,
int broadcast)
@@ -1177,7 +1184,7 @@ static int acpi_processor_power_verify(s
working++;
}
- acpi_propagate_timer_broadcast(pr);
+ lapic_timer_propagate_broadcast(pr);
return (working);
}
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -124,11 +124,12 @@ int clockevents_program_event(struct clo
*/
int clockevents_register_notifier(struct notifier_block *nb)
{
+ unsigned long flags;
int ret;
- spin_lock(&clockevents_lock);
+ spin_lock_irqsave(&clockevents_lock, flags);
ret = raw_notifier_chain_register(&clockevents_chain, nb);
- spin_unlock(&clockevents_lock);
+ spin_unlock_irqrestore(&clockevents_lock, flags);
return ret;
}
@@ -165,6 +166,8 @@ static void clockevents_notify_released(
*/
void clockevents_register_device(struct clock_event_device *dev)
{
+ unsigned long flags;
+
BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
/*
* A nsec2cyc multiplicator of 0 is invalid and we'd crash
@@ -175,13 +178,13 @@ void clockevents_register_device(struct
WARN_ON(1);
}
- spin_lock(&clockevents_lock);
+ spin_lock_irqsave(&clockevents_lock, flags);
list_add(&dev->list, &clockevent_devices);
clockevents_do_notify(CLOCK_EVT_NOTIFY_ADD, dev);
clockevents_notify_released();
- spin_unlock(&clockevents_lock);
+ spin_unlock_irqrestore(&clockevents_lock, flags);
}
/*
@@ -228,8 +231,9 @@ void clockevents_exchange_device(struct
void clockevents_notify(unsigned long reason, void *arg)
{
struct list_head *node, *tmp;
+ unsigned long flags;
- spin_lock(&clockevents_lock);
+ spin_lock_irqsave(&clockevents_lock, flags);
clockevents_do_notify(reason, arg);
switch (reason) {
@@ -244,7 +248,7 @@ void clockevents_notify(unsigned long re
default:
break;
}
- spin_unlock(&clockevents_lock);
+ spin_unlock_irqrestore(&clockevents_lock, flags);
}
EXPORT_SYMBOL_GPL(clockevents_notify);
#endif
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -205,11 +205,11 @@ static void tick_handle_periodic_broadca
* Powerstate information: The system enters/leaves a state, where
* affected devices might stop
*/
-static void tick_do_broadcast_on_off(void *why)
+static void tick_do_broadcast_on_off(unsigned long *reason)
{
struct clock_event_device *bc, *dev;
struct tick_device *td;
- unsigned long flags, *reason = why;
+ unsigned long flags;
int cpu, bc_stopped;
spin_lock_irqsave(&tick_broadcast_lock, flags);
@@ -276,8 +276,7 @@ void tick_broadcast_on_off(unsigned long
printk(KERN_ERR "tick-broadcast: ignoring broadcast for "
"offline CPU #%d\n", *oncpu);
else
- smp_call_function_single(*oncpu, tick_do_broadcast_on_off,
- &reason, 1);
+ tick_do_broadcast_on_off(&reason);
}
/*
^ permalink raw reply [flat|nested] 26+ messages in thread* [14/25] nfsd4: bug in read_buf
2010-05-25 18:12 [00/25] 2.6.27.47-stable review, take 2 Greg KH
` (12 preceding siblings ...)
2010-05-25 18:09 ` [13/25] clockevent: Prevent dead lock on clockevents_lock Greg KH
@ 2010-05-25 18:09 ` Greg KH
2010-05-25 18:09 ` [15/25] USB: fix testing the wrong variable in fs_create_by_name() Greg KH
` (10 subsequent siblings)
24 siblings, 0 replies; 26+ messages in thread
From: Greg KH @ 2010-05-25 18:09 UTC (permalink / raw)
To: linux-kernel, stable; +Cc: stable-review, torvalds, akpm, alan, J. Bruce Fields
2.6.27-stable review patch. If anyone has any objections, please let us know.
------------------
From: Neil Brown <neilb@suse.de>
commit 2bc3c1179c781b359d4f2f3439cb3df72afc17fc upstream.
When read_buf is called to move over to the next page in the pagelist
of an NFSv4 request, it sets argp->end to essentially a random
number, certainly not an address within the page which argp->p now
points to. So subsequent calls to READ_BUF will think there is much
more than a page of spare space (the cast to u32 ensures an unsigned
comparison) so we can expect to fall off the end of the second
page.
We never encountered thsi in testing because typically the only
operations which use more than two pages are write-like operations,
which have their own decoding logic. Something like a getattr after a
write may cross a page boundary, but it would be very unusual for it to
cross another boundary after that.
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
fs/nfsd/nfs4xdr.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -179,10 +179,10 @@ static __be32 *read_buf(struct nfsd4_com
argp->p = page_address(argp->pagelist[0]);
argp->pagelist++;
if (argp->pagelen < PAGE_SIZE) {
- argp->end = p + (argp->pagelen>>2);
+ argp->end = argp->p + (argp->pagelen>>2);
argp->pagelen = 0;
} else {
- argp->end = p + (PAGE_SIZE>>2);
+ argp->end = argp->p + (PAGE_SIZE>>2);
argp->pagelen -= PAGE_SIZE;
}
memcpy(((char*)p)+avail, argp->p, (nbytes - avail));
@@ -1115,10 +1115,10 @@ nfsd4_decode_compound(struct nfsd4_compo
argp->p = page_address(argp->pagelist[0]);
argp->pagelist++;
if (argp->pagelen < PAGE_SIZE) {
- argp->end = p + (argp->pagelen>>2);
+ argp->end = argp->p + (argp->pagelen>>2);
argp->pagelen = 0;
} else {
- argp->end = p + (PAGE_SIZE>>2);
+ argp->end = argp->p + (PAGE_SIZE>>2);
argp->pagelen -= PAGE_SIZE;
}
}
^ permalink raw reply [flat|nested] 26+ messages in thread* [15/25] USB: fix testing the wrong variable in fs_create_by_name()
2010-05-25 18:12 [00/25] 2.6.27.47-stable review, take 2 Greg KH
` (13 preceding siblings ...)
2010-05-25 18:09 ` [14/25] nfsd4: bug in read_buf Greg KH
@ 2010-05-25 18:09 ` Greg KH
2010-05-25 18:09 ` [16/25] nfs d_revalidate() is too trigger-happy with d_drop() Greg KH
` (9 subsequent siblings)
24 siblings, 0 replies; 26+ messages in thread
From: Greg KH @ 2010-05-25 18:09 UTC (permalink / raw)
To: linux-kernel, stable; +Cc: stable-review, torvalds, akpm, alan, Dan Carpenter
2.6.27-stable review patch. If anyone has any objections, please let us know.
------------------
From: Dan Carpenter <error27@gmail.com>
commit fa7fe7af146a7b613e36a311eefbbfb5555325d1 upstream.
There is a typo here. We should be testing "*dentry" which was just
assigned instead of "dentry". This could result in dereferencing an
ERR_PTR inside either usbfs_mkdir() or usbfs_create().
Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
drivers/usb/core/inode.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
--- a/drivers/usb/core/inode.c
+++ b/drivers/usb/core/inode.c
@@ -510,13 +510,13 @@ static int fs_create_by_name (const char
*dentry = NULL;
mutex_lock(&parent->d_inode->i_mutex);
*dentry = lookup_one_len(name, parent, strlen(name));
- if (!IS_ERR(dentry)) {
+ if (!IS_ERR(*dentry)) {
if ((mode & S_IFMT) == S_IFDIR)
error = usbfs_mkdir (parent->d_inode, *dentry, mode);
else
error = usbfs_create (parent->d_inode, *dentry, mode);
} else
- error = PTR_ERR(dentry);
+ error = PTR_ERR(*dentry);
mutex_unlock(&parent->d_inode->i_mutex);
return error;
^ permalink raw reply [flat|nested] 26+ messages in thread* [16/25] nfs d_revalidate() is too trigger-happy with d_drop()
2010-05-25 18:12 [00/25] 2.6.27.47-stable review, take 2 Greg KH
` (14 preceding siblings ...)
2010-05-25 18:09 ` [15/25] USB: fix testing the wrong variable in fs_create_by_name() Greg KH
@ 2010-05-25 18:09 ` Greg KH
2010-05-25 18:09 ` [17/25] NFS: rsize and wsize settings ignored on v4 mounts Greg KH
` (8 subsequent siblings)
24 siblings, 0 replies; 26+ messages in thread
From: Greg KH @ 2010-05-25 18:09 UTC (permalink / raw)
To: linux-kernel, stable; +Cc: stable-review, torvalds, akpm, alan, Al Viro
2.6.27-stable review patch. If anyone has any objections, please let us know.
------------------
From: Al Viro <viro@ZenIV.linux.org.uk>
commit d9e80b7de91db05c1c4d2e5ebbfd70b3b3ba0e0f upstream.
If dentry found stale happens to be a root of disconnected tree, we
can't d_drop() it; its d_hash is actually part of s_anon and d_drop()
would simply hide it from shrink_dcache_for_umount(), leading to
all sorts of fun, including busy inodes on umount and oopsen after
that.
Bug had been there since at least 2006 (commit c636eb already has it),
so it's definitely -stable fodder.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
fs/nfs/dir.c | 2 ++
1 file changed, 2 insertions(+)
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -826,6 +826,8 @@ out_zap_parent:
/* If we have submounts, don't unhash ! */
if (have_submounts(dentry))
goto out_valid;
+ if (dentry->d_flags & DCACHE_DISCONNECTED)
+ goto out_valid;
shrink_dcache_parent(dentry);
}
d_drop(dentry);
^ permalink raw reply [flat|nested] 26+ messages in thread* [17/25] NFS: rsize and wsize settings ignored on v4 mounts
2010-05-25 18:12 [00/25] 2.6.27.47-stable review, take 2 Greg KH
` (15 preceding siblings ...)
2010-05-25 18:09 ` [16/25] nfs d_revalidate() is too trigger-happy with d_drop() Greg KH
@ 2010-05-25 18:09 ` Greg KH
2010-05-25 18:09 ` [18/25] i2c: Fix probing of FSC hardware monitoring chips Greg KH
` (7 subsequent siblings)
24 siblings, 0 replies; 26+ messages in thread
From: Greg KH @ 2010-05-25 18:09 UTC (permalink / raw)
To: linux-kernel, stable
Cc: stable-review, torvalds, akpm, alan, Chuck Lever, Trond Myklebust
2.6.27-stable review patch. If anyone has any objections, please let us know.
------------------
From: Chuck Lever <chuck.lever@oracle.com>
commit 356e76b855bdbfd8d1c5e75bcf0c6bf0dfe83496 upstream.
NFSv4 mounts ignore the rsize and wsize mount options, and always use
the default transfer size for both. This seems to be because all
NFSv4 mounts are now cloned, and the cloning logic doesn't copy the
rsize and wsize settings from the parent nfs_server.
I tested Fedora's 2.6.32.11-99 and it seems to have this problem as
well, so I'm guessing that .33, .32, and perhaps older kernels have
this issue as well.
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
fs/nfs/client.c | 2 ++
1 file changed, 2 insertions(+)
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -826,6 +826,8 @@ out_error:
static void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_server *source)
{
target->flags = source->flags;
+ target->rsize = source->rsize;
+ target->wsize = source->wsize;
target->acregmin = source->acregmin;
target->acregmax = source->acregmax;
target->acdirmin = source->acdirmin;
^ permalink raw reply [flat|nested] 26+ messages in thread* [18/25] i2c: Fix probing of FSC hardware monitoring chips
2010-05-25 18:12 [00/25] 2.6.27.47-stable review, take 2 Greg KH
` (16 preceding siblings ...)
2010-05-25 18:09 ` [17/25] NFS: rsize and wsize settings ignored on v4 mounts Greg KH
@ 2010-05-25 18:09 ` Greg KH
2010-05-25 18:09 ` [19/25] libata: ensure NCQ error result taskfile is fully initialized before returning it via qc->result_tf Greg KH
` (6 subsequent siblings)
24 siblings, 0 replies; 26+ messages in thread
From: Greg KH @ 2010-05-25 18:09 UTC (permalink / raw)
To: linux-kernel, stable
Cc: stable-review, torvalds, akpm, alan, Jean Delvare, Hans de Goede
2.6.27-stable review patch. If anyone has any objections, please let us know.
------------------
From: Jean Delvare <khali@linux-fr.org>
commit b1d4b390ea4bb480e65974ce522a04022608a8df upstream.
Some FSC hardware monitoring chips (Syleus at least) doesn't like
quick writes we typically use to probe for I2C chips. Use a regular
byte read instead for the address they live at (0x73). These are the
only known chips living at this address on PC systems.
For clarity, this fix should not be needed for kernels 2.6.30 and
later, as we started instantiating the hwmon devices explicitly based
on DMI data. Still, this fix is valuable in the following two cases:
* Support for recent FSC chips on older kernels. The DMI-based device
instantiation is more difficult to backport than the device support
itself.
* Case where the DMI-based device instantiation fails, whatever the
reason. We fall back to probing in that case, so it should work.
This fixes kernel bug #15634:
https://bugzilla.kernel.org/show_bug.cgi?id=15634
Signed-off-by: Jean Delvare <khali@linux-fr.org>
Acked-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
drivers/i2c/i2c-core.c | 24 +++++++++++++++++-------
1 file changed, 17 insertions(+), 7 deletions(-)
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -1269,14 +1269,24 @@ static int i2c_detect_address(struct i2c
/* Make sure there is something at this address, unless forced */
if (kind < 0) {
- if (i2c_smbus_xfer(adapter, addr, 0, 0, 0,
- I2C_SMBUS_QUICK, NULL) < 0)
- return 0;
+ if (addr == 0x73 && (adapter->class & I2C_CLASS_HWMON)) {
+ /* Special probe for FSC hwmon chips */
+ union i2c_smbus_data dummy;
- /* prevent 24RF08 corruption */
- if ((addr & ~0x0f) == 0x50)
- i2c_smbus_xfer(adapter, addr, 0, 0, 0,
- I2C_SMBUS_QUICK, NULL);
+ if (i2c_smbus_xfer(adapter, addr, 0, I2C_SMBUS_READ, 0,
+ I2C_SMBUS_BYTE_DATA, &dummy) < 0)
+ return 0;
+ } else {
+ if (i2c_smbus_xfer(adapter, addr, 0, I2C_SMBUS_WRITE, 0,
+ I2C_SMBUS_QUICK, NULL) < 0)
+ return 0;
+
+ /* Prevent 24RF08 corruption */
+ if ((addr & ~0x0f) == 0x50)
+ i2c_smbus_xfer(adapter, addr, 0,
+ I2C_SMBUS_WRITE, 0,
+ I2C_SMBUS_QUICK, NULL);
+ }
}
/* Finally call the custom detection function */
^ permalink raw reply [flat|nested] 26+ messages in thread* [19/25] libata: ensure NCQ error result taskfile is fully initialized before returning it via qc->result_tf.
2010-05-25 18:12 [00/25] 2.6.27.47-stable review, take 2 Greg KH
` (17 preceding siblings ...)
2010-05-25 18:09 ` [18/25] i2c: Fix probing of FSC hardware monitoring chips Greg KH
@ 2010-05-25 18:09 ` Greg KH
2010-05-25 18:09 ` [20/25] libata: retry FS IOs even if it has failed with AC_ERR_INVALID Greg KH
` (5 subsequent siblings)
24 siblings, 0 replies; 26+ messages in thread
From: Greg KH @ 2010-05-25 18:09 UTC (permalink / raw)
To: linux-kernel, stable; +Cc: stable-review, torvalds, akpm, alan, Jeff Garzik
2.6.27-stable review patch. If anyone has any objections, please let us know.
------------------
From: Jeff Garzik <jeff@garzik.org>
commit a09bf4cd53b8ab000197ef81f15d50f29ecf973c upstream.
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
drivers/ata/libata-eh.c | 1 +
1 file changed, 1 insertion(+)
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -1497,6 +1497,7 @@ void ata_eh_analyze_ncq_error(struct ata
}
/* okay, this error is ours */
+ memset(&tf, 0, sizeof(tf));
rc = ata_eh_read_log_10h(dev, &tag, &tf);
if (rc) {
ata_link_printk(link, KERN_ERR, "failed to read log page 10h "
^ permalink raw reply [flat|nested] 26+ messages in thread* [20/25] libata: retry FS IOs even if it has failed with AC_ERR_INVALID
2010-05-25 18:12 [00/25] 2.6.27.47-stable review, take 2 Greg KH
` (18 preceding siblings ...)
2010-05-25 18:09 ` [19/25] libata: ensure NCQ error result taskfile is fully initialized before returning it via qc->result_tf Greg KH
@ 2010-05-25 18:09 ` Greg KH
2010-05-25 18:09 ` [21/25] svc: Clean up deferred requests on transport destruction Greg KH
` (4 subsequent siblings)
24 siblings, 0 replies; 26+ messages in thread
From: Greg KH @ 2010-05-25 18:09 UTC (permalink / raw)
To: linux-kernel, stable
Cc: stable-review, torvalds, akpm, alan, Tejun Heo, Jeff Garzik
2.6.27-stable review patch. If anyone has any objections, please let us know.
------------------
From: Tejun Heo <tj@kernel.org>
commit 534ead709235b967b659947c55d9130873a432c4 upstream.
libata currently doesn't retry if a command fails with AC_ERR_INVALID
assuming that retrying won't get it any further even if retried.
However, a failure may be classified as invalid through hardware
glitch (incorrect reading of the error register or firmware bug) and
there isn't whole lot to gain by not retrying as actually invalid
commands will be failed immediately. Also, commands serving FS IOs
are extremely unlikely to be invalid. Retry FS IOs even if it's
marked invalid.
Transient and incorrect invalid failure was seen while debugging
firmware related issue on Samsung n130 on bko#14314.
http://bugzilla.kernel.org/show_bug.cgi?id=14314
Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Johannes Stezenbach <js@sig21.net>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
drivers/ata/libata-eh.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -1901,8 +1901,9 @@ static void ata_eh_link_autopsy(struct a
qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_OTHER);
/* determine whether the command is worth retrying */
- if (!(qc->err_mask & AC_ERR_INVALID) &&
- ((qc->flags & ATA_QCFLAG_IO) || qc->err_mask != AC_ERR_DEV))
+ if (qc->flags & ATA_QCFLAG_IO ||
+ (!(qc->err_mask & AC_ERR_INVALID) &&
+ qc->err_mask != AC_ERR_DEV))
qc->flags |= ATA_QCFLAG_RETRY;
/* accumulate error info */
^ permalink raw reply [flat|nested] 26+ messages in thread* [21/25] svc: Clean up deferred requests on transport destruction
2010-05-25 18:12 [00/25] 2.6.27.47-stable review, take 2 Greg KH
` (19 preceding siblings ...)
2010-05-25 18:09 ` [20/25] libata: retry FS IOs even if it has failed with AC_ERR_INVALID Greg KH
@ 2010-05-25 18:09 ` Greg KH
2010-05-25 18:09 ` [22/25] hwmon: (w83781d) Request I/O ports individually for probing Greg KH
` (3 subsequent siblings)
24 siblings, 0 replies; 26+ messages in thread
From: Greg KH @ 2010-05-25 18:09 UTC (permalink / raw)
To: linux-kernel, stable
Cc: stable-review, torvalds, akpm, alan, Tom Tucker, J. Bruce Fields,
roma1390
2.6.27-stable review patch. If anyone has any objections, please let us know.
------------------
From: Tom Tucker <tom@opengridcomputing.com>
commit 22945e4a1c7454c97f5d8aee1ef526c83fef3223 upstream.
A race between svc_revisit and svc_delete_xprt can result in
deferred requests holding references on a transport that can never be
recovered because dead transports are not enqueued for subsequent
processing.
Check for XPT_DEAD in revisit to clean up completing deferrals on a dead
transport and sweep a transport's deferred queue to do the same for queued
but unprocessed deferrals.
Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Cc: roma1390 <roma1390@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
net/sunrpc/svc_xprt.c | 25 ++++++++++++++++++-------
1 file changed, 18 insertions(+), 7 deletions(-)
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -810,6 +810,11 @@ static void svc_age_temp_xprts(unsigned
void svc_delete_xprt(struct svc_xprt *xprt)
{
struct svc_serv *serv = xprt->xpt_server;
+ struct svc_deferred_req *dr;
+
+ /* Only do this once */
+ if (test_and_set_bit(XPT_DEAD, &xprt->xpt_flags))
+ return;
dprintk("svc: svc_delete_xprt(%p)\n", xprt);
xprt->xpt_ops->xpo_detach(xprt);
@@ -824,12 +829,16 @@ void svc_delete_xprt(struct svc_xprt *xp
* while still attached to a queue, the queue itself
* is about to be destroyed (in svc_destroy).
*/
- if (!test_and_set_bit(XPT_DEAD, &xprt->xpt_flags)) {
- BUG_ON(atomic_read(&xprt->xpt_ref.refcount) < 2);
- if (test_bit(XPT_TEMP, &xprt->xpt_flags))
- serv->sv_tmpcnt--;
+ if (test_bit(XPT_TEMP, &xprt->xpt_flags))
+ serv->sv_tmpcnt--;
+
+ for (dr = svc_deferred_dequeue(xprt); dr;
+ dr = svc_deferred_dequeue(xprt)) {
svc_xprt_put(xprt);
+ kfree(dr);
}
+
+ svc_xprt_put(xprt);
spin_unlock_bh(&serv->sv_lock);
}
@@ -875,17 +884,19 @@ static void svc_revisit(struct cache_def
container_of(dreq, struct svc_deferred_req, handle);
struct svc_xprt *xprt = dr->xprt;
- if (too_many) {
+ spin_lock(&xprt->xpt_lock);
+ set_bit(XPT_DEFERRED, &xprt->xpt_flags);
+ if (too_many || test_bit(XPT_DEAD, &xprt->xpt_flags)) {
+ spin_unlock(&xprt->xpt_lock);
+ dprintk("revisit canceled\n");
svc_xprt_put(xprt);
kfree(dr);
return;
}
dprintk("revisit queued\n");
dr->xprt = NULL;
- spin_lock(&xprt->xpt_lock);
list_add(&dr->handle.recent, &xprt->xpt_deferred);
spin_unlock(&xprt->xpt_lock);
- set_bit(XPT_DEFERRED, &xprt->xpt_flags);
svc_xprt_enqueue(xprt);
svc_xprt_put(xprt);
}
^ permalink raw reply [flat|nested] 26+ messages in thread* [22/25] hwmon: (w83781d) Request I/O ports individually for probing
2010-05-25 18:12 [00/25] 2.6.27.47-stable review, take 2 Greg KH
` (20 preceding siblings ...)
2010-05-25 18:09 ` [21/25] svc: Clean up deferred requests on transport destruction Greg KH
@ 2010-05-25 18:09 ` Greg KH
2010-05-25 18:09 ` [23/25] i2c-i801: Dont use the block buffer for I2C block writes Greg KH
` (2 subsequent siblings)
24 siblings, 0 replies; 26+ messages in thread
From: Greg KH @ 2010-05-25 18:09 UTC (permalink / raw)
To: linux-kernel, stable; +Cc: stable-review, torvalds, akpm, alan, Jean Delvare
2.6.27-stable review patch. If anyone has any objections, please let us know.
------------------
From: Jean Delvare <khali@linux-fr.org>
commit b0bcdd3cd0adb85a7686b396ba50493871b1135c upstream.
Different motherboards have different PNP declarations for
W83781D/W83782D chips. Some declare the whole range of I/O ports (8
ports), some declare only the useful ports (2 ports at offset 5) and
some declare fancy ranges, for example 4 ports at offset 4. To
properly handle all cases, request all ports individually for probing.
After we have determined that we really have a W83781D or W83782D
chip, the useful port range will be requested again, as a single
block.
I did not see a board which needs this yet, but I know of one for lm78
driver and I'd like to keep the logic of these two drivers in sync.
Signed-off-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
drivers/hwmon/w83781d.c | 24 ++++++++++++------------
1 file changed, 12 insertions(+), 12 deletions(-)
--- a/drivers/hwmon/w83781d.c
+++ b/drivers/hwmon/w83781d.c
@@ -1797,17 +1797,17 @@ static int __init
w83781d_isa_found(unsigned short address)
{
int val, save, found = 0;
+ int port;
- /* We have to request the region in two parts because some
- boards declare base+4 to base+7 as a PNP device */
- if (!request_region(address, 4, "w83781d")) {
- pr_debug("w83781d: Failed to request low part of region\n");
- return 0;
- }
- if (!request_region(address + 4, 4, "w83781d")) {
- pr_debug("w83781d: Failed to request high part of region\n");
- release_region(address, 4);
- return 0;
+ /* Some boards declare base+0 to base+7 as a PNP device, some base+4
+ * to base+7 and some base+5 to base+6. So we better request each port
+ * individually for the probing phase. */
+ for (port = address; port < address + W83781D_EXTENT; port++) {
+ if (!request_region(port, 1, "w83781d")) {
+ pr_debug("w83781d: Failed to request port 0x%x\n",
+ port);
+ goto release;
+ }
}
#define REALLY_SLOW_IO
@@ -1881,8 +1881,8 @@ w83781d_isa_found(unsigned short address
val == 0x30 ? "W83782D" : "W83781D", (int)address);
release:
- release_region(address + 4, 4);
- release_region(address, 4);
+ for (port--; port >= address; port--)
+ release_region(port, 1);
return found;
}
^ permalink raw reply [flat|nested] 26+ messages in thread* [23/25] i2c-i801: Dont use the block buffer for I2C block writes
2010-05-25 18:12 [00/25] 2.6.27.47-stable review, take 2 Greg KH
` (21 preceding siblings ...)
2010-05-25 18:09 ` [22/25] hwmon: (w83781d) Request I/O ports individually for probing Greg KH
@ 2010-05-25 18:09 ` Greg KH
2010-05-25 18:09 ` [24/25] i2c-tiny-usb: Fix on big-endian systems Greg KH
2010-05-25 18:09 ` [25/25] nfsd: fix vm overcommit crash Greg KH
24 siblings, 0 replies; 26+ messages in thread
From: Greg KH @ 2010-05-25 18:09 UTC (permalink / raw)
To: linux-kernel, stable
Cc: stable-review, torvalds, akpm, alan, Jean Delvare, Oleg Ryjkov
2.6.27-stable review patch. If anyone has any objections, please let us know.
------------------
From: Jean Delvare <khali@linux-fr.org>
commit c074c39d62306efa5ba7c69c1a1531bc7333d252 upstream.
Experience has shown that the block buffer can only be used for SMBus
(not I2C) block transactions, even though the datasheet doesn't
mention this limitation.
Reported-by: Felix Rubinstein <felixru@gmail.com>
Signed-off-by: Jean Delvare <khali@linux-fr.org>
Cc: Oleg Ryjkov <oryjkov@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
drivers/i2c/busses/i2c-i801.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
--- a/drivers/i2c/busses/i2c-i801.c
+++ b/drivers/i2c/busses/i2c-i801.c
@@ -413,9 +413,11 @@ static int i801_block_transaction(union
data->block[0] = 32; /* max for SMBus block reads */
}
+ /* Experience has shown that the block buffer can only be used for
+ SMBus (not I2C) block transactions, even though the datasheet
+ doesn't mention this limitation. */
if ((i801_features & FEATURE_BLOCK_BUFFER)
- && !(command == I2C_SMBUS_I2C_BLOCK_DATA
- && read_write == I2C_SMBUS_READ)
+ && command != I2C_SMBUS_I2C_BLOCK_DATA
&& i801_set_block_buffer_mode() == 0)
result = i801_block_transaction_by_block(data, read_write,
hwpec);
^ permalink raw reply [flat|nested] 26+ messages in thread* [24/25] i2c-tiny-usb: Fix on big-endian systems
2010-05-25 18:12 [00/25] 2.6.27.47-stable review, take 2 Greg KH
` (22 preceding siblings ...)
2010-05-25 18:09 ` [23/25] i2c-i801: Dont use the block buffer for I2C block writes Greg KH
@ 2010-05-25 18:09 ` Greg KH
2010-05-25 18:09 ` [25/25] nfsd: fix vm overcommit crash Greg KH
24 siblings, 0 replies; 26+ messages in thread
From: Greg KH @ 2010-05-25 18:09 UTC (permalink / raw)
To: linux-kernel, stable
Cc: stable-review, torvalds, akpm, alan, Jean Delvare, Till Harbaum
2.6.27-stable review patch. If anyone has any objections, please let us know.
------------------
From: Jean Delvare <khali@linux-fr.org>
commit 1c010ff8912cbc08d80e865aab9c32b6b00c527d upstream.
The functionality bit vector is always returned as a little-endian
32-bit number by the device, so it must be byte-swapped to the host
endianness.
On the other hand, the delay value is handled by the USB stack, so no
byte swapping is needed on our side.
This fixes bug #15105:
http://bugzilla.kernel.org/show_bug.cgi?id=15105
Reported-by: Jens Richter <jens@richter-stutensee.de>
Signed-off-by: Jean Delvare <khali@linux-fr.org>
Tested-by: Jens Richter <jens@richter-stutensee.de>
Cc: Till Harbaum <till@harbaum.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
drivers/i2c/busses/i2c-tiny-usb.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
--- a/drivers/i2c/busses/i2c-tiny-usb.c
+++ b/drivers/i2c/busses/i2c-tiny-usb.c
@@ -13,6 +13,7 @@
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/module.h>
+#include <linux/types.h>
/* include interfaces to usb layer */
#include <linux/usb.h>
@@ -31,8 +32,8 @@
#define CMD_I2C_IO_END (1<<1)
/* i2c bit delay, default is 10us -> 100kHz */
-static int delay = 10;
-module_param(delay, int, 0);
+static unsigned short delay = 10;
+module_param(delay, ushort, 0);
MODULE_PARM_DESC(delay, "bit delay in microseconds, "
"e.g. 10 for 100kHz (default is 100kHz)");
@@ -109,7 +110,7 @@ static int usb_xfer(struct i2c_adapter *
static u32 usb_func(struct i2c_adapter *adapter)
{
- u32 func;
+ __le32 func;
/* get functionality from adapter */
if (usb_read(adapter, CMD_GET_FUNC, 0, 0, &func, sizeof(func)) !=
@@ -118,7 +119,7 @@ static u32 usb_func(struct i2c_adapter *
return 0;
}
- return func;
+ return le32_to_cpu(func);
}
/* This is the actual algorithm we define */
@@ -216,8 +217,7 @@ static int i2c_tiny_usb_probe(struct usb
"i2c-tiny-usb at bus %03d device %03d",
dev->usb_dev->bus->busnum, dev->usb_dev->devnum);
- if (usb_write(&dev->adapter, CMD_SET_DELAY,
- cpu_to_le16(delay), 0, NULL, 0) != 0) {
+ if (usb_write(&dev->adapter, CMD_SET_DELAY, delay, 0, NULL, 0) != 0) {
dev_err(&dev->adapter.dev,
"failure setting delay to %dus\n", delay);
retval = -EIO;
^ permalink raw reply [flat|nested] 26+ messages in thread* [25/25] nfsd: fix vm overcommit crash
2010-05-25 18:12 [00/25] 2.6.27.47-stable review, take 2 Greg KH
` (23 preceding siblings ...)
2010-05-25 18:09 ` [24/25] i2c-tiny-usb: Fix on big-endian systems Greg KH
@ 2010-05-25 18:09 ` Greg KH
24 siblings, 0 replies; 26+ messages in thread
From: Greg KH @ 2010-05-25 18:09 UTC (permalink / raw)
To: linux-kernel, stable
Cc: stable-review, torvalds, akpm, alan, James Morris, Alan Cox
2.6.27-stable review patch. If anyone has any objections, please let us know.
------------------
From: Alan Cox <alan@redhat.com>
commit 731572d39fcd3498702eda4600db4c43d51e0b26 upstream.
Junjiro R. Okajima reported a problem where knfsd crashes if you are
using it to export shmemfs objects and run strict overcommit. In this
situation the current->mm based modifier to the overcommit goes through a
NULL pointer.
We could simply check for NULL and skip the modifier but we've caught
other real bugs in the past from mm being NULL here - cases where we did
need a valid mm set up (eg the exec bug about a year ago).
To preserve the checks and get the logic we want shuffle the checking
around and add a new helper to the vm_ security wrappers
Also fix a current->mm reference in nommu that should use the passed mm
[akpm@linux-foundation.org: coding-style fixes]
[akpm@linux-foundation.org: fix build]
Reported-by: Junjiro R. Okajima <hooanon05@yahoo.co.jp>
Acked-by: James Morris <jmorris@namei.org>
Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
include/linux/security.h | 6 ++++++
mm/mmap.c | 3 ++-
mm/nommu.c | 3 ++-
mm/shmem.c | 8 ++++----
security/security.c | 9 +++++++++
5 files changed, 23 insertions(+), 6 deletions(-)
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1590,6 +1590,7 @@ int security_syslog(int type);
int security_settime(struct timespec *ts, struct timezone *tz);
int security_vm_enough_memory(long pages);
int security_vm_enough_memory_mm(struct mm_struct *mm, long pages);
+int security_vm_enough_memory_kern(long pages);
int security_bprm_alloc(struct linux_binprm *bprm);
void security_bprm_free(struct linux_binprm *bprm);
void security_bprm_apply_creds(struct linux_binprm *bprm, int unsafe);
@@ -1824,6 +1825,11 @@ static inline int security_vm_enough_mem
{
return cap_vm_enough_memory(current->mm, pages);
}
+
+static inline int security_vm_enough_memory_kern(long pages)
+{
+ return cap_vm_enough_memory(current->mm, pages);
+}
static inline int security_vm_enough_memory_mm(struct mm_struct *mm, long pages)
{
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -178,7 +178,8 @@ int __vm_enough_memory(struct mm_struct
/* Don't let a single process grow too big:
leave 3% of the size of this process for other processes */
- allowed -= mm->total_vm / 32;
+ if (mm)
+ allowed -= mm->total_vm / 32;
/*
* cast `allowed' as a signed long because vm_committed_space
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1432,7 +1432,8 @@ int __vm_enough_memory(struct mm_struct
/* Don't let a single process grow too big:
leave 3% of the size of this process for other processes */
- allowed -= current->mm->total_vm / 32;
+ if (mm)
+ allowed -= mm->total_vm / 32;
/*
* cast `allowed' as a signed long because vm_committed_space
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -163,8 +163,8 @@ static inline struct shmem_sb_info *SHME
*/
static inline int shmem_acct_size(unsigned long flags, loff_t size)
{
- return (flags & VM_ACCOUNT)?
- security_vm_enough_memory(VM_ACCT(size)): 0;
+ return (flags & VM_ACCOUNT) ?
+ security_vm_enough_memory_kern(VM_ACCT(size)) : 0;
}
static inline void shmem_unacct_size(unsigned long flags, loff_t size)
@@ -181,8 +181,8 @@ static inline void shmem_unacct_size(uns
*/
static inline int shmem_acct_block(unsigned long flags)
{
- return (flags & VM_ACCOUNT)?
- 0: security_vm_enough_memory(VM_ACCT(PAGE_CACHE_SIZE));
+ return (flags & VM_ACCOUNT) ?
+ 0 : security_vm_enough_memory_kern(VM_ACCT(PAGE_CACHE_SIZE));
}
static inline void shmem_unacct_blocks(unsigned long flags, long pages)
--- a/security/security.c
+++ b/security/security.c
@@ -195,14 +195,23 @@ int security_settime(struct timespec *ts
int security_vm_enough_memory(long pages)
{
+ WARN_ON(current->mm == NULL);
return security_ops->vm_enough_memory(current->mm, pages);
}
int security_vm_enough_memory_mm(struct mm_struct *mm, long pages)
{
+ WARN_ON(mm == NULL);
return security_ops->vm_enough_memory(mm, pages);
}
+int security_vm_enough_memory_kern(long pages)
+{
+ /* If current->mm is a kernel thread then we will pass NULL,
+ for this specific case that is fine */
+ return security_ops->vm_enough_memory(current->mm, pages);
+}
+
int security_bprm_alloc(struct linux_binprm *bprm)
{
return security_ops->bprm_alloc_security(bprm);
^ permalink raw reply [flat|nested] 26+ messages in thread