From: Greg KH <gregkh@suse.de>
To: linux-kernel@vger.kernel.org, stable@kernel.org,
akpm@linux-foundation.org, torvalds@linux-foundation.org,
stable-review@kernel.org
Cc: Dmitry Monakhov <dmonakhov@openvz.org>, Jan Kara <jack@suse.cz>
Subject: [36/39] quota: decouple fs reserved space from quota reservation
Date: Tue, 05 Jan 2010 12:02:32 -0800 [thread overview]
Message-ID: <20100105200303.666265338@mini.kroah.org> (raw)
In-Reply-To: <20100105195007.GA23952@kroah.com>
2.6.31-stable review patch. If anyone has any objections, please let us know.
------------------
From: Dmitry Monakhov <dmonakhov@openvz.org>
commit fd8fbfc1709822bd94247c5b2ab15a5f5041e103 upstream.
Currently inode_reservation is managed by fs itself and this
reservation is transfered on dquot_transfer(). This means what
inode_reservation must always be in sync with
dquot->dq_dqb.dqb_rsvspace. Otherwise dquot_transfer() will result
in incorrect quota(WARN_ON in dquot_claim_reserved_space() will be
triggered)
This is not easy because of complex locking order issues
for example http://bugzilla.kernel.org/show_bug.cgi?id=14739
The patch introduce quota reservation field for each fs-inode
(fs specific inode is used in order to prevent bloating generic
vfs inode). This reservation is managed by quota code internally
similar to i_blocks/i_bytes and may not be always in sync with
internal fs reservation.
Also perform some code rearrangement:
- Unify dquot_reserve_space() and dquot_reserve_space()
- Unify dquot_release_reserved_space() and dquot_free_space()
- Also this patch add missing warning update to release_rsv()
dquot_release_reserved_space() must call flush_warnings() as
dquot_free_space() does.
Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
fs/quota/dquot.c | 213 +++++++++++++++++++++++++++-----------------------
include/linux/quota.h | 5 -
2 files changed, 122 insertions(+), 96 deletions(-)
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1388,6 +1388,67 @@ void vfs_dq_drop(struct inode *inode)
EXPORT_SYMBOL(vfs_dq_drop);
/*
+ * inode_reserved_space is managed internally by quota, and protected by
+ * i_lock similar to i_blocks+i_bytes.
+ */
+static qsize_t *inode_reserved_space(struct inode * inode)
+{
+ /* Filesystem must explicitly define it's own method in order to use
+ * quota reservation interface */
+ BUG_ON(!inode->i_sb->dq_op->get_reserved_space);
+ return inode->i_sb->dq_op->get_reserved_space(inode);
+}
+
+static void inode_add_rsv_space(struct inode *inode, qsize_t number)
+{
+ spin_lock(&inode->i_lock);
+ *inode_reserved_space(inode) += number;
+ spin_unlock(&inode->i_lock);
+}
+
+
+static void inode_claim_rsv_space(struct inode *inode, qsize_t number)
+{
+ spin_lock(&inode->i_lock);
+ *inode_reserved_space(inode) -= number;
+ __inode_add_bytes(inode, number);
+ spin_unlock(&inode->i_lock);
+}
+
+static void inode_sub_rsv_space(struct inode *inode, qsize_t number)
+{
+ spin_lock(&inode->i_lock);
+ *inode_reserved_space(inode) -= number;
+ spin_unlock(&inode->i_lock);
+}
+
+static qsize_t inode_get_rsv_space(struct inode *inode)
+{
+ qsize_t ret;
+ spin_lock(&inode->i_lock);
+ ret = *inode_reserved_space(inode);
+ spin_unlock(&inode->i_lock);
+ return ret;
+}
+
+static void inode_incr_space(struct inode *inode, qsize_t number,
+ int reserve)
+{
+ if (reserve)
+ inode_add_rsv_space(inode, number);
+ else
+ inode_add_bytes(inode, number);
+}
+
+static void inode_decr_space(struct inode *inode, qsize_t number, int reserve)
+{
+ if (reserve)
+ inode_sub_rsv_space(inode, number);
+ else
+ inode_sub_bytes(inode, number);
+}
+
+/*
* Following four functions update i_blocks+i_bytes fields and
* quota information (together with appropriate checks)
* NOTE: We absolutely rely on the fact that caller dirties
@@ -1405,6 +1466,21 @@ int __dquot_alloc_space(struct inode *in
int cnt, ret = QUOTA_OK;
char warntype[MAXQUOTAS];
+ /*
+ * First test before acquiring mutex - solves deadlocks when we
+ * re-enter the quota code and are already holding the mutex
+ */
+ if (IS_NOQUOTA(inode)) {
+ inode_incr_space(inode, number, reserve);
+ goto out;
+ }
+
+ down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+ if (IS_NOQUOTA(inode)) {
+ inode_incr_space(inode, number, reserve);
+ goto out_unlock;
+ }
+
for (cnt = 0; cnt < MAXQUOTAS; cnt++)
warntype[cnt] = QUOTA_NL_NOWARN;
@@ -1415,7 +1491,8 @@ int __dquot_alloc_space(struct inode *in
if (check_bdq(inode->i_dquot[cnt], number, warn, warntype+cnt)
== NO_QUOTA) {
ret = NO_QUOTA;
- goto out_unlock;
+ spin_unlock(&dq_data_lock);
+ goto out_flush_warn;
}
}
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -1426,64 +1503,32 @@ int __dquot_alloc_space(struct inode *in
else
dquot_incr_space(inode->i_dquot[cnt], number);
}
- if (!reserve)
- inode_add_bytes(inode, number);
-out_unlock:
+ inode_incr_space(inode, number, reserve);
spin_unlock(&dq_data_lock);
- flush_warnings(inode->i_dquot, warntype);
- return ret;
-}
-
-int dquot_alloc_space(struct inode *inode, qsize_t number, int warn)
-{
- int cnt, ret = QUOTA_OK;
-
- /*
- * First test before acquiring mutex - solves deadlocks when we
- * re-enter the quota code and are already holding the mutex
- */
- if (IS_NOQUOTA(inode)) {
- inode_add_bytes(inode, number);
- goto out;
- }
-
- down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
- if (IS_NOQUOTA(inode)) {
- inode_add_bytes(inode, number);
- goto out_unlock;
- }
-
- ret = __dquot_alloc_space(inode, number, warn, 0);
- if (ret == NO_QUOTA)
- goto out_unlock;
+ if (reserve)
+ goto out_flush_warn;
/* Dirtify all the dquots - this can block when journalling */
for (cnt = 0; cnt < MAXQUOTAS; cnt++)
if (inode->i_dquot[cnt])
mark_dquot_dirty(inode->i_dquot[cnt]);
+out_flush_warn:
+ flush_warnings(inode->i_dquot, warntype);
out_unlock:
up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
out:
return ret;
}
+
+int dquot_alloc_space(struct inode *inode, qsize_t number, int warn)
+{
+ return __dquot_alloc_space(inode, number, warn, 0);
+}
EXPORT_SYMBOL(dquot_alloc_space);
int dquot_reserve_space(struct inode *inode, qsize_t number, int warn)
{
- int ret = QUOTA_OK;
-
- if (IS_NOQUOTA(inode))
- goto out;
-
- down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
- if (IS_NOQUOTA(inode))
- goto out_unlock;
-
- ret = __dquot_alloc_space(inode, number, warn, 1);
-out_unlock:
- up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-out:
- return ret;
+ return __dquot_alloc_space(inode, number, warn, 1);
}
EXPORT_SYMBOL(dquot_reserve_space);
@@ -1540,14 +1585,14 @@ int dquot_claim_space(struct inode *inod
int ret = QUOTA_OK;
if (IS_NOQUOTA(inode)) {
- inode_add_bytes(inode, number);
+ inode_claim_rsv_space(inode, number);
goto out;
}
down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
if (IS_NOQUOTA(inode)) {
up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
- inode_add_bytes(inode, number);
+ inode_claim_rsv_space(inode, number);
goto out;
}
@@ -1559,7 +1604,7 @@ int dquot_claim_space(struct inode *inod
number);
}
/* Update inode bytes */
- inode_add_bytes(inode, number);
+ inode_claim_rsv_space(inode, number);
spin_unlock(&dq_data_lock);
/* Dirtify all the dquots - this can block when journalling */
for (cnt = 0; cnt < MAXQUOTAS; cnt++)
@@ -1572,38 +1617,9 @@ out:
EXPORT_SYMBOL(dquot_claim_space);
/*
- * Release reserved quota space
- */
-void dquot_release_reserved_space(struct inode *inode, qsize_t number)
-{
- int cnt;
-
- if (IS_NOQUOTA(inode))
- goto out;
-
- down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
- if (IS_NOQUOTA(inode))
- goto out_unlock;
-
- spin_lock(&dq_data_lock);
- /* Release reserved dquots */
- for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (inode->i_dquot[cnt])
- dquot_free_reserved_space(inode->i_dquot[cnt], number);
- }
- spin_unlock(&dq_data_lock);
-
-out_unlock:
- up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-out:
- return;
-}
-EXPORT_SYMBOL(dquot_release_reserved_space);
-
-/*
* This operation can block, but only after everything is updated
*/
-int dquot_free_space(struct inode *inode, qsize_t number)
+int __dquot_free_space(struct inode *inode, qsize_t number, int reserve)
{
unsigned int cnt;
char warntype[MAXQUOTAS];
@@ -1612,7 +1628,7 @@ int dquot_free_space(struct inode *inode
* re-enter the quota code and are already holding the mutex */
if (IS_NOQUOTA(inode)) {
out_sub:
- inode_sub_bytes(inode, number);
+ inode_decr_space(inode, number, reserve);
return QUOTA_OK;
}
@@ -1627,21 +1643,43 @@ out_sub:
if (!inode->i_dquot[cnt])
continue;
warntype[cnt] = info_bdq_free(inode->i_dquot[cnt], number);
- dquot_decr_space(inode->i_dquot[cnt], number);
+ if (reserve)
+ dquot_free_reserved_space(inode->i_dquot[cnt], number);
+ else
+ dquot_decr_space(inode->i_dquot[cnt], number);
}
- inode_sub_bytes(inode, number);
+ inode_decr_space(inode, number, reserve);
spin_unlock(&dq_data_lock);
+
+ if (reserve)
+ goto out_unlock;
/* Dirtify all the dquots - this can block when journalling */
for (cnt = 0; cnt < MAXQUOTAS; cnt++)
if (inode->i_dquot[cnt])
mark_dquot_dirty(inode->i_dquot[cnt]);
+out_unlock:
flush_warnings(inode->i_dquot, warntype);
up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
return QUOTA_OK;
}
+
+int dquot_free_space(struct inode *inode, qsize_t number)
+{
+ return __dquot_free_space(inode, number, 0);
+}
EXPORT_SYMBOL(dquot_free_space);
/*
+ * Release reserved quota space
+ */
+void dquot_release_reserved_space(struct inode *inode, qsize_t number)
+{
+ __dquot_free_space(inode, number, 1);
+
+}
+EXPORT_SYMBOL(dquot_release_reserved_space);
+
+/*
* This operation can block, but only after everything is updated
*/
int dquot_free_inode(const struct inode *inode, qsize_t number)
@@ -1679,19 +1717,6 @@ int dquot_free_inode(const struct inode
EXPORT_SYMBOL(dquot_free_inode);
/*
- * call back function, get reserved quota space from underlying fs
- */
-qsize_t dquot_get_reserved_space(struct inode *inode)
-{
- qsize_t reserved_space = 0;
-
- if (sb_any_quota_active(inode->i_sb) &&
- inode->i_sb->dq_op->get_reserved_space)
- reserved_space = inode->i_sb->dq_op->get_reserved_space(inode);
- return reserved_space;
-}
-
-/*
* Transfer the number of inode and blocks from one diskquota to an other.
*
* This operation can block, but only after everything is updated
@@ -1734,7 +1759,7 @@ int dquot_transfer(struct inode *inode,
}
spin_lock(&dq_data_lock);
cur_space = inode_get_bytes(inode);
- rsv_space = dquot_get_reserved_space(inode);
+ rsv_space = inode_get_rsv_space(inode);
space = cur_space + rsv_space;
/* Build the transfer_from list and check the limits */
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -313,8 +313,9 @@ struct dquot_operations {
int (*claim_space) (struct inode *, qsize_t);
/* release rsved quota for delayed alloc */
void (*release_rsv) (struct inode *, qsize_t);
- /* get reserved quota for delayed alloc */
- qsize_t (*get_reserved_space) (struct inode *);
+ /* get reserved quota for delayed alloc, value returned is managed by
+ * quota code only */
+ qsize_t *(*get_reserved_space) (struct inode *);
};
/* Operations handling requests from userspace */
next prev parent reply other threads:[~2010-01-05 20:08 UTC|newest]
Thread overview: 64+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-01-05 19:50 [00/10] 2.6.27.43 stable review Greg KH
2010-01-05 19:47 ` [01/10] Libertas: fix buffer overflow in lbs_get_essid() Greg KH
2010-01-05 19:47 ` [02/10] pata_cmd64x: fix overclocking of UDMA0-2 modes Greg KH
2010-01-05 19:47 ` [03/10] sound: sgio2audio/pdaudiocf/usb-audio: initialize PCM buffer Greg KH
2010-01-05 19:47 ` [04/10] i2c/tsl2550: Fix lux value in extended mode Greg KH
2010-01-05 19:47 ` [05/10] ipv6: reassembly: use seperate reassembly queues for conntrack and local delivery Greg KH
2010-01-05 19:47 ` [06/10] S390: dasd: support DIAG access for read-only devices Greg KH
2010-01-05 19:47 ` [07/10] x86/ptrace: make genregs[32]_get/set more robust Greg KH
2010-01-05 19:47 ` [08/10] rt2x00: Disable powersaving for rt61pci and rt2800pci Greg KH
2010-01-05 20:58 ` Gertjan van Wingerde
2010-01-05 21:21 ` Greg KH
2010-01-05 19:48 ` [09/10] generic_permission: MAY_OPEN is not write access Greg KH
2010-01-05 19:48 ` [10/10] Revert: KVM: MMU: do not free active mmu pages in free_mmu_pages() Greg KH
2010-01-05 20:01 ` [01/39] acerhdf: limit modalias matching to supported Greg KH
2010-01-05 20:01 ` [02/39] ASoC: Do not write to invalid registers on the wm9712 Greg KH
2010-01-05 20:01 ` [03/39] cifs: NULL out tcon, pSesInfo, and srvTcp pointers when chasing DFS referrals Greg KH
2010-01-05 20:02 ` [04/39] clockevents: Prevent clockevent_devices list corruption on cpu hotplug Greg KH
2010-01-05 20:02 ` [05/39] dma: at_hdmac: correct incompatible type for argument 1 of spin_lock_bh Greg KH
2010-01-05 20:02 ` [06/39] drivers/net/usb: Correct code taking the size of a pointer Greg KH
2010-01-05 20:02 ` [07/39] iwmc3200wifi: fix array out-of-boundary access Greg KH
2010-01-06 2:52 ` Zhu Yi
2010-01-06 18:01 ` Greg KH
2010-01-06 18:27 ` [stable] " Greg KH
2010-01-05 20:02 ` [08/39] Libertas: fix buffer overflow in lbs_get_essid() Greg KH
2010-01-05 20:02 ` [09/39] md: Fix unfortunate interaction with evms Greg KH
2010-01-05 20:02 ` [10/39] pata_cmd64x: fix overclocking of UDMA0-2 modes Greg KH
2010-01-05 20:02 ` [11/39] pata_hpt3x2n: fix clock turnaround Greg KH
2010-01-05 20:02 ` [12/39] SCSI: fc class: fix fc_transport_init error handling Greg KH
2010-01-05 20:02 ` [13/39] sound: sgio2audio/pdaudiocf/usb-audio: initialize PCM buffer Greg KH
2010-01-05 20:02 ` [14/39] USB: emi62: fix crash when trying to load EMI 6|2 firmware Greg KH
2010-01-05 20:02 ` [15/39] USB: Fix a bug on appledisplay.c regarding signedness Greg KH
2010-01-05 20:02 ` [16/39] USB: musb: gadget_ep0: avoid SetupEnd interrupt Greg KH
2010-01-05 20:02 ` [17/39] USB: option: support hi speed for modem Haier CE100 Greg KH
2010-01-05 20:02 ` [18/39] x86, cpuid: Add "volatile" to asm in native_cpuid() Greg KH
2010-01-05 20:02 ` [19/39] e100: Use pci pool to work around GFP_ATOMIC order 5 memory allocation failure Greg KH
2010-03-15 21:29 ` [Stable-review] " Stephen Hemminger
2010-03-15 21:32 ` David Miller
2010-03-15 21:36 ` Stephen Hemminger
2010-03-15 21:39 ` David Miller
2010-03-15 22:20 ` David Miller
2010-03-15 22:25 ` Stephen Hemminger
2010-01-05 20:02 ` [20/39] e100: Fix broken cbs accounting due to missing memset Greg KH
2010-01-05 20:02 ` [21/39] hostap: Revert a toxic part of the conversion to net_device_ops Greg KH
2010-01-05 20:02 ` [22/39] hwmon: (fschmd) Fix check on unsigned in watchdog_write() Greg KH
2010-01-05 20:02 ` [23/39] hwmon: (sht15) Off-by-one error in array index + incorrect constants Greg KH
2010-01-05 20:02 ` [24/39] i2c/tsl2550: Fix lux value in extended mode Greg KH
2010-01-05 20:02 ` [25/39] ipv6: reassembly: use seperate reassembly queues for conntrack and local delivery Greg KH
2010-01-05 20:02 ` [26/39] S390: dasd: support DIAG access for read-only devices Greg KH
2010-01-05 20:02 ` [27/39] udf: Try harder when looking for VAT inode Greg KH
2010-01-05 20:02 ` [28/39] V4L/DVB (13596): ov511.c typo: lock => unlock Greg KH
2010-01-05 20:02 ` [29/39] x86/ptrace: make genregs[32]_get/set more robust Greg KH
2010-01-05 20:02 ` [30/39] XFS bug in log recover with quota (bugzilla id 855) Greg KH
2010-01-05 20:02 ` [31/39] generic_permission: MAY_OPEN is not write access Greg KH
2010-01-05 20:02 ` [32/39] rt2x00: Disable powersaving for rt61pci and rt2800pci Greg KH
2010-01-05 20:59 ` Gertjan van Wingerde
2010-01-05 21:21 ` Greg KH
2010-01-05 20:02 ` [33/39] memcg: avoid oom-killing innocent task in case of use_hierarchy Greg KH
2010-01-05 20:02 ` Greg KH
2010-01-05 20:02 ` [34/39] Input: atkbd - add force relese key quirk for Samsung R59P/R60P/R61P Greg KH
2010-01-05 20:02 ` [35/39] Add unlocked version of inode_add_bytes() function Greg KH
2010-01-05 20:02 ` Greg KH [this message]
2010-01-05 20:02 ` [37/39] ext4: Convert to generic reserved quotas space management Greg KH
2010-01-05 20:02 ` [38/39] ext4: Fix potential quota deadlock Greg KH
2010-01-05 20:02 ` [39/39] ext4: fix sleep inside spinlock issue with quota and dealloc (#14739) Greg KH
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20100105200303.666265338@mini.kroah.org \
--to=gregkh@suse.de \
--cc=akpm@linux-foundation.org \
--cc=dmonakhov@openvz.org \
--cc=jack@suse.cz \
--cc=linux-kernel@vger.kernel.org \
--cc=stable-review@kernel.org \
--cc=stable@kernel.org \
--cc=torvalds@linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.