* [PATCH 1/2] xfs: use xfs_get_buf_noaddr for iclogs
@ 2007-03-07 10:13 Christoph Hellwig
2007-03-07 11:59 ` Shailendra Tripathi
2007-03-12 4:41 ` David Chinner
0 siblings, 2 replies; 4+ messages in thread
From: Christoph Hellwig @ 2007-03-07 10:13 UTC (permalink / raw)
To: xfs, ecashin, akpm; +Cc: linux-kernel
Currently xlog_alloc allocates memory for the iclogs first, then
allocates a buffer using xfs_buf_get_empty and finally assigns
the memory to the buffer. We don't really want to do this, but
rather allocate a buffer with memory attached to it using
xfs_buf_get_noaddr. There's a subtile change because
xfs_buf_get_empty returns the buffer locked, but xfs_buf_get_noaddr
returns it unlocked. From my auditing and testing nothing in the
log I/O code cares about this distincition, but I'd be happy if
someone could try to prove this independently.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Index: linux-2.6/fs/xfs/xfs_log.c
===================================================================
--- linux-2.6.orig/fs/xfs/xfs_log.c 2007-03-06 17:26:40.000000000 +0100
+++ linux-2.6/fs/xfs/xfs_log.c 2007-03-06 17:28:03.000000000 +0100
@@ -1199,11 +1199,16 @@
*iclogp = (xlog_in_core_t *)
kmem_zalloc(sizeof(xlog_in_core_t), KM_SLEEP);
iclog = *iclogp;
- iclog->hic_data = (xlog_in_core_2_t *)
- kmem_zalloc(iclogsize, KM_SLEEP | KM_LARGE);
-
iclog->ic_prev = prev_iclog;
prev_iclog = iclog;
+
+ bp = xfs_buf_get_noaddr(log->l_iclog_size, mp->m_logdev_targp);
+ XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone);
+ XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb);
+ XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1);
+ iclog->ic_bp = bp;
+ iclog->hic_data = bp->b_addr;
+
log->l_iclog_bak[i] = (xfs_caddr_t)&(iclog->ic_header);
head = &iclog->ic_header;
@@ -1216,11 +1221,6 @@
INT_SET(head->h_fmt, ARCH_CONVERT, XLOG_FMT);
memcpy(&head->h_fs_uuid, &mp->m_sb.sb_uuid, sizeof(uuid_t));
- bp = xfs_buf_get_empty(log->l_iclog_size, mp->m_logdev_targp);
- XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone);
- XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb);
- XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1);
- iclog->ic_bp = bp;
iclog->ic_size = XFS_BUF_SIZE(bp) - log->l_iclog_hsize;
iclog->ic_state = XLOG_STATE_ACTIVE;
@@ -1229,7 +1229,6 @@
iclog->ic_datap = (char *)iclog->hic_data + log->l_iclog_hsize;
ASSERT(XFS_BUF_ISBUSY(iclog->ic_bp));
- ASSERT(XFS_BUF_VALUSEMA(iclog->ic_bp) <= 0);
sv_init(&iclog->ic_forcesema, SV_DEFAULT, "iclog-force");
sv_init(&iclog->ic_writesema, SV_DEFAULT, "iclog-write");
@@ -1528,7 +1527,6 @@
}
#endif
next_iclog = iclog->ic_next;
- kmem_free(iclog->hic_data, log->l_iclog_size);
kmem_free(iclog, sizeof(xlog_in_core_t));
iclog = next_iclog;
}
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH 1/2] xfs: use xfs_get_buf_noaddr for iclogs
2007-03-07 10:13 [PATCH 1/2] xfs: use xfs_get_buf_noaddr for iclogs Christoph Hellwig
@ 2007-03-07 11:59 ` Shailendra Tripathi
2007-03-12 4:41 ` David Chinner
1 sibling, 0 replies; 4+ messages in thread
From: Shailendra Tripathi @ 2007-03-07 11:59 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: xfs, ecashin, akpm, linux-kernel
Does not look to me either. Looks logical as well because these buffers
are used only in log syncing and only one thread can be ever flushing
one ICLOG and, hence, no need for protection.
Even split buffer (log->l_xbuf) is used by only ICLOG at a time,
should not matter. I don't see protection for this even today as no
locking is done in split sync path.
-shailendra
Christoph Hellwig wrote:
> Currently xlog_alloc allocates memory for the iclogs first, then
> allocates a buffer using xfs_buf_get_empty and finally assigns
> the memory to the buffer. We don't really want to do this, but
> rather allocate a buffer with memory attached to it using
> xfs_buf_get_noaddr. There's a subtile change because
> xfs_buf_get_empty returns the buffer locked, but xfs_buf_get_noaddr
> returns it unlocked. From my auditing and testing nothing in the
> log I/O code cares about this distincition, but I'd be happy if
> someone could try to prove this independently.
>
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
>
> Index: linux-2.6/fs/xfs/xfs_log.c
> ===================================================================
> --- linux-2.6.orig/fs/xfs/xfs_log.c 2007-03-06 17:26:40.000000000 +0100
> +++ linux-2.6/fs/xfs/xfs_log.c 2007-03-06 17:28:03.000000000 +0100
> @@ -1199,11 +1199,16 @@
> *iclogp = (xlog_in_core_t *)
> kmem_zalloc(sizeof(xlog_in_core_t), KM_SLEEP);
> iclog = *iclogp;
> - iclog->hic_data = (xlog_in_core_2_t *)
> - kmem_zalloc(iclogsize, KM_SLEEP | KM_LARGE);
> -
> iclog->ic_prev = prev_iclog;
> prev_iclog = iclog;
> +
> + bp = xfs_buf_get_noaddr(log->l_iclog_size, mp->m_logdev_targp);
> + XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone);
> + XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb);
> + XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1);
> + iclog->ic_bp = bp;
> + iclog->hic_data = bp->b_addr;
> +
> log->l_iclog_bak[i] = (xfs_caddr_t)&(iclog->ic_header);
>
> head = &iclog->ic_header;
> @@ -1216,11 +1221,6 @@
> INT_SET(head->h_fmt, ARCH_CONVERT, XLOG_FMT);
> memcpy(&head->h_fs_uuid, &mp->m_sb.sb_uuid, sizeof(uuid_t));
>
> - bp = xfs_buf_get_empty(log->l_iclog_size, mp->m_logdev_targp);
> - XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone);
> - XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb);
> - XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1);
> - iclog->ic_bp = bp;
>
> iclog->ic_size = XFS_BUF_SIZE(bp) - log->l_iclog_hsize;
> iclog->ic_state = XLOG_STATE_ACTIVE;
> @@ -1229,7 +1229,6 @@
> iclog->ic_datap = (char *)iclog->hic_data + log->l_iclog_hsize;
>
> ASSERT(XFS_BUF_ISBUSY(iclog->ic_bp));
> - ASSERT(XFS_BUF_VALUSEMA(iclog->ic_bp) <= 0);
> sv_init(&iclog->ic_forcesema, SV_DEFAULT, "iclog-force");
> sv_init(&iclog->ic_writesema, SV_DEFAULT, "iclog-write");
>
> @@ -1528,7 +1527,6 @@
> }
> #endif
> next_iclog = iclog->ic_next;
> - kmem_free(iclog->hic_data, log->l_iclog_size);
> kmem_free(iclog, sizeof(xlog_in_core_t));
> iclog = next_iclog;
> }
>
>
^ permalink raw reply [flat|nested] 4+ messages in thread* Re: [PATCH 1/2] xfs: use xfs_get_buf_noaddr for iclogs
2007-03-07 10:13 [PATCH 1/2] xfs: use xfs_get_buf_noaddr for iclogs Christoph Hellwig
2007-03-07 11:59 ` Shailendra Tripathi
@ 2007-03-12 4:41 ` David Chinner
2007-03-17 0:35 ` Christoph Hellwig
1 sibling, 1 reply; 4+ messages in thread
From: David Chinner @ 2007-03-12 4:41 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: xfs, ecashin, akpm, linux-kernel
On Wed, Mar 07, 2007 at 11:13:14AM +0100, Christoph Hellwig wrote:
> xfs_buf_get_noaddr. There's a subtile change because
> xfs_buf_get_empty returns the buffer locked, but xfs_buf_get_noaddr
> returns it unlocked. From my auditing and testing nothing in the
> log I/O code cares about this distincition, but I'd be happy if
> someone could try to prove this independently.
Looks safe to me - we initialise all the fields in the xfs_buf_t
when we allocate out of the slab, so it doesn't really matter what
state the buffer is in when we free it.
OTOH, all other buffers are supposed to be locked when under I/O.
This change makes a special case for the log buffers, and I'd prefer
not to have to remember that this behaviour changed fo log buffers
at some point in time.
I suggest that adding:
> - iclog->hic_data = (xlog_in_core_2_t *)
> - kmem_zalloc(iclogsize, KM_SLEEP | KM_LARGE);
> -
> iclog->ic_prev = prev_iclog;
> prev_iclog = iclog;
> +
> + bp = xfs_buf_get_noaddr(log->l_iclog_size, mp->m_logdev_targp);
> + XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone);
> + XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb);
> + XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1);
+ XFS_BUF_PSEMA(bp, PRIBIO);
> + iclog->ic_bp = bp;
> + iclog->hic_data = bp->b_addr;
> +
> log->l_iclog_bak[i] = (xfs_caddr_t)&(iclog->ic_header);
>
> head = &iclog->ic_header;
To lock the buffer should be added here. That way we don't change
any semantics of the code at all.
> @@ -1216,11 +1221,6 @@
> INT_SET(head->h_fmt, ARCH_CONVERT, XLOG_FMT);
> memcpy(&head->h_fs_uuid, &mp->m_sb.sb_uuid, sizeof(uuid_t));
>
> - bp = xfs_buf_get_empty(log->l_iclog_size, mp->m_logdev_targp);
> - XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone);
> - XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb);
> - XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1);
> - iclog->ic_bp = bp;
>
> iclog->ic_size = XFS_BUF_SIZE(bp) - log->l_iclog_hsize;
> iclog->ic_state = XLOG_STATE_ACTIVE;
> @@ -1229,7 +1229,6 @@
> iclog->ic_datap = (char *)iclog->hic_data + log->l_iclog_hsize;
>
> ASSERT(XFS_BUF_ISBUSY(iclog->ic_bp));
> - ASSERT(XFS_BUF_VALUSEMA(iclog->ic_bp) <= 0);
And this assert can then stay...
Cheers,
Dave.
--
Dave Chinner
Principal Engineer
SGI Australian Software Group
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH 1/2] xfs: use xfs_get_buf_noaddr for iclogs
2007-03-12 4:41 ` David Chinner
@ 2007-03-17 0:35 ` Christoph Hellwig
0 siblings, 0 replies; 4+ messages in thread
From: Christoph Hellwig @ 2007-03-17 0:35 UTC (permalink / raw)
To: David Chinner; +Cc: Christoph Hellwig, xfs, ecashin, akpm, linux-kernel
On Mon, Mar 12, 2007 at 03:41:17PM +1100, David Chinner wrote:
> OTOH, all other buffers are supposed to be locked when under I/O.
> This change makes a special case for the log buffers, and I'd prefer
> not to have to remember that this behaviour changed fo log buffers
> at some point in time.
>
> I suggest that adding:
...
> + XFS_BUF_PSEMA(bp, PRIBIO);
...
> To lock the buffer should be added here. That way we don't change
> any semantics of the code at all.
Here's a patch with your suggestion implemented. Seems to work
fine under heavy NFS load for me. Note that the log recovery has
some inconsistancies already about doing I/O both on locked and
unlocked buffers. Long-term it might be a good idea to change
xfs_get_buf_noaddr to return a locked buffer like xfs_get_buf(_flags)
does already.
Index: linux-2.6/fs/xfs/xfs_log.c
===================================================================
--- linux-2.6.orig/fs/xfs/xfs_log.c 2007-03-16 15:21:43.000000000 +0100
+++ linux-2.6/fs/xfs/xfs_log.c 2007-03-16 15:34:15.000000000 +0100
@@ -1199,11 +1199,18 @@ xlog_alloc_log(xfs_mount_t *mp,
*iclogp = (xlog_in_core_t *)
kmem_zalloc(sizeof(xlog_in_core_t), KM_SLEEP);
iclog = *iclogp;
- iclog->hic_data = (xlog_in_core_2_t *)
- kmem_zalloc(iclogsize, KM_SLEEP | KM_LARGE);
-
iclog->ic_prev = prev_iclog;
prev_iclog = iclog;
+
+ bp = xfs_buf_get_noaddr(log->l_iclog_size, mp->m_logdev_targp);
+ if (!XFS_BUF_CPSEMA(bp))
+ ASSERT(0);
+ XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone);
+ XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb);
+ XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1);
+ iclog->ic_bp = bp;
+ iclog->hic_data = bp->b_addr;
+
log->l_iclog_bak[i] = (xfs_caddr_t)&(iclog->ic_header);
head = &iclog->ic_header;
@@ -1216,11 +1223,6 @@ xlog_alloc_log(xfs_mount_t *mp,
INT_SET(head->h_fmt, ARCH_CONVERT, XLOG_FMT);
memcpy(&head->h_fs_uuid, &mp->m_sb.sb_uuid, sizeof(uuid_t));
- bp = xfs_buf_get_empty(log->l_iclog_size, mp->m_logdev_targp);
- XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone);
- XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb);
- XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1);
- iclog->ic_bp = bp;
iclog->ic_size = XFS_BUF_SIZE(bp) - log->l_iclog_hsize;
iclog->ic_state = XLOG_STATE_ACTIVE;
@@ -1528,7 +1530,6 @@ xlog_dealloc_log(xlog_t *log)
}
#endif
next_iclog = iclog->ic_next;
- kmem_free(iclog->hic_data, log->l_iclog_size);
kmem_free(iclog, sizeof(xlog_in_core_t));
iclog = next_iclog;
}
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2007-03-17 0:35 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-03-07 10:13 [PATCH 1/2] xfs: use xfs_get_buf_noaddr for iclogs Christoph Hellwig
2007-03-07 11:59 ` Shailendra Tripathi
2007-03-12 4:41 ` David Chinner
2007-03-17 0:35 ` Christoph Hellwig
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox