* [PATCH] xfs: remove filestream item xfs_inode reference
@ 2018-04-09 9:57 Christoph Hellwig
2018-04-09 17:24 ` Darrick J. Wong
0 siblings, 1 reply; 2+ messages in thread
From: Christoph Hellwig @ 2018-04-09 9:57 UTC (permalink / raw)
To: linux-xfs
The filestreams allocator stores an xfs_fstrm_item structure in the MRU to
cache inode number to agno mappings for a particular length of time. Each
xfs_fstrm_item contains the internal MRU structure, an inode pointer and
agno value.
The inode pointer stored in the xfs_fstrm_item is not referenced, however,
which means the inode itself can be removed and reclaimed before the MRU
item is freed. If this occurs, xfs_fstrm_free_func() can access freed or
unrelated memory through xfs_fstrm_item->ip and crash.
The obvious solution is to grab an inode reference for xfs_fstrm_item.
The filestream mechanism only actually uses the inode pointer as a means
to access the xfs_mount, however. Rather than add unnecessary
complexity, simplify the implementation to store an xfs_mount pointer in
struct xfs_mru_cache, and pass it to the free callback. This also
requires updates to the tracepoint class to provide the associated data
via parameters rather than the inode and a minor hack to peek at the MRU
key to establish the inode number at free time.
Based on debugging work and an earlier patch from Brian Foster, who
also wrote most of this changelog.
Reported-by: Brian Foster <bfoster@redhat.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Brian Foster <bfoster@redhat.com>
---
fs/xfs/xfs_filestream.c | 19 +++++++++----------
fs/xfs/xfs_mru_cache.c | 8 +++++---
fs/xfs/xfs_mru_cache.h | 8 ++++----
fs/xfs/xfs_trace.h | 14 +++++++-------
4 files changed, 25 insertions(+), 24 deletions(-)
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index 043ca3808ea2..5131a6e25fc9 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -34,7 +34,6 @@
struct xfs_fstrm_item {
struct xfs_mru_cache_elem mru;
- struct xfs_inode *ip;
xfs_agnumber_t ag; /* AG in use for this directory */
};
@@ -122,14 +121,15 @@ xfs_filestream_put_ag(
static void
xfs_fstrm_free_func(
+ void *data,
struct xfs_mru_cache_elem *mru)
{
+ struct xfs_mount *mp = data;
struct xfs_fstrm_item *item =
container_of(mru, struct xfs_fstrm_item, mru);
- xfs_filestream_put_ag(item->ip->i_mount, item->ag);
-
- trace_xfs_filestream_free(item->ip, item->ag);
+ xfs_filestream_put_ag(mp, item->ag);
+ trace_xfs_filestream_free(mp, mru->key, item->ag);
kmem_free(item);
}
@@ -165,7 +165,7 @@ xfs_filestream_pick_ag(
trylock = XFS_ALLOC_FLAG_TRYLOCK;
for (nscan = 0; 1; nscan++) {
- trace_xfs_filestream_scan(ip, ag);
+ trace_xfs_filestream_scan(mp, ip->i_ino, ag);
pag = xfs_perag_get(mp, ag);
@@ -265,7 +265,6 @@ xfs_filestream_pick_ag(
goto out_put_ag;
item->ag = *agp;
- item->ip = ip;
err = xfs_mru_cache_insert(mp->m_filestream, ip->i_ino, &item->mru);
if (err) {
@@ -333,7 +332,7 @@ xfs_filestream_lookup_ag(
ag = container_of(mru, struct xfs_fstrm_item, mru)->ag;
xfs_mru_cache_done(mp->m_filestream);
- trace_xfs_filestream_lookup(ip, ag);
+ trace_xfs_filestream_lookup(mp, ip->i_ino, ag);
goto out;
}
@@ -399,7 +398,7 @@ xfs_filestream_new_ag(
* Only free the item here so we skip over the old AG earlier.
*/
if (mru)
- xfs_fstrm_free_func(mru);
+ xfs_fstrm_free_func(mp, mru);
IRELE(pip);
exit:
@@ -426,8 +425,8 @@ xfs_filestream_mount(
* timer tunable to within about 10 percent. This requires at least 10
* groups.
*/
- return xfs_mru_cache_create(&mp->m_filestream, xfs_fstrm_centisecs * 10,
- 10, xfs_fstrm_free_func);
+ return xfs_mru_cache_create(&mp->m_filestream, mp,
+ xfs_fstrm_centisecs * 10, 10, xfs_fstrm_free_func);
}
void
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c
index f8a674d7f092..70eea7ae2876 100644
--- a/fs/xfs/xfs_mru_cache.c
+++ b/fs/xfs/xfs_mru_cache.c
@@ -112,6 +112,7 @@ struct xfs_mru_cache {
xfs_mru_cache_free_func_t free_func; /* Function pointer for freeing. */
struct delayed_work work; /* Workqueue data for reaping. */
unsigned int queued; /* work has been queued */
+ void *data;
};
static struct workqueue_struct *xfs_mru_reap_wq;
@@ -259,7 +260,7 @@ _xfs_mru_cache_clear_reap_list(
list_for_each_entry_safe(elem, next, &tmp, list_node) {
list_del_init(&elem->list_node);
- mru->free_func(elem);
+ mru->free_func(mru->data, elem);
}
spin_lock(&mru->lock);
@@ -326,6 +327,7 @@ xfs_mru_cache_uninit(void)
int
xfs_mru_cache_create(
struct xfs_mru_cache **mrup,
+ void *data,
unsigned int lifetime_ms,
unsigned int grp_count,
xfs_mru_cache_free_func_t free_func)
@@ -369,7 +371,7 @@ xfs_mru_cache_create(
mru->grp_time = grp_time;
mru->free_func = free_func;
-
+ mru->data = data;
*mrup = mru;
exit:
@@ -492,7 +494,7 @@ xfs_mru_cache_delete(
elem = xfs_mru_cache_remove(mru, key);
if (elem)
- mru->free_func(elem);
+ mru->free_func(mru->data, elem);
}
/*
diff --git a/fs/xfs/xfs_mru_cache.h b/fs/xfs/xfs_mru_cache.h
index fb5245ba5ff7..b3f3fbdfcc47 100644
--- a/fs/xfs/xfs_mru_cache.h
+++ b/fs/xfs/xfs_mru_cache.h
@@ -26,13 +26,13 @@ struct xfs_mru_cache_elem {
};
/* Function pointer type for callback to free a client's data pointer. */
-typedef void (*xfs_mru_cache_free_func_t)(struct xfs_mru_cache_elem *elem);
+typedef void (*xfs_mru_cache_free_func_t)(void *, struct xfs_mru_cache_elem *);
int xfs_mru_cache_init(void);
void xfs_mru_cache_uninit(void);
-int xfs_mru_cache_create(struct xfs_mru_cache **mrup, unsigned int lifetime_ms,
- unsigned int grp_count,
- xfs_mru_cache_free_func_t free_func);
+int xfs_mru_cache_create(struct xfs_mru_cache **mrup, void *data,
+ unsigned int lifetime_ms, unsigned int grp_count,
+ xfs_mru_cache_free_func_t free_func);
void xfs_mru_cache_destroy(struct xfs_mru_cache *mru);
int xfs_mru_cache_insert(struct xfs_mru_cache *mru, unsigned long key,
struct xfs_mru_cache_elem *elem);
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index a982c0b623d0..8955254b900e 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -506,8 +506,8 @@ DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold_release);
DEFINE_BUF_ITEM_EVENT(xfs_trans_binval);
DECLARE_EVENT_CLASS(xfs_filestream_class,
- TP_PROTO(struct xfs_inode *ip, xfs_agnumber_t agno),
- TP_ARGS(ip, agno),
+ TP_PROTO(struct xfs_mount *mp, xfs_ino_t ino, xfs_agnumber_t agno),
+ TP_ARGS(mp, ino, agno),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_ino_t, ino)
@@ -515,10 +515,10 @@ DECLARE_EVENT_CLASS(xfs_filestream_class,
__field(int, streams)
),
TP_fast_assign(
- __entry->dev = VFS_I(ip)->i_sb->s_dev;
- __entry->ino = ip->i_ino;
+ __entry->dev = mp->m_super->s_dev;
+ __entry->ino = ino;
__entry->agno = agno;
- __entry->streams = xfs_filestream_peek_ag(ip->i_mount, agno);
+ __entry->streams = xfs_filestream_peek_ag(mp, agno);
),
TP_printk("dev %d:%d ino 0x%llx agno %u streams %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
@@ -528,8 +528,8 @@ DECLARE_EVENT_CLASS(xfs_filestream_class,
)
#define DEFINE_FILESTREAM_EVENT(name) \
DEFINE_EVENT(xfs_filestream_class, name, \
- TP_PROTO(struct xfs_inode *ip, xfs_agnumber_t agno), \
- TP_ARGS(ip, agno))
+ TP_PROTO(struct xfs_mount *mp, xfs_ino_t ino, xfs_agnumber_t agno), \
+ TP_ARGS(mp, ino, agno))
DEFINE_FILESTREAM_EVENT(xfs_filestream_free);
DEFINE_FILESTREAM_EVENT(xfs_filestream_lookup);
DEFINE_FILESTREAM_EVENT(xfs_filestream_scan);
--
2.16.3
^ permalink raw reply related [flat|nested] 2+ messages in thread
* Re: [PATCH] xfs: remove filestream item xfs_inode reference
2018-04-09 9:57 [PATCH] xfs: remove filestream item xfs_inode reference Christoph Hellwig
@ 2018-04-09 17:24 ` Darrick J. Wong
0 siblings, 0 replies; 2+ messages in thread
From: Darrick J. Wong @ 2018-04-09 17:24 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: linux-xfs
On Mon, Apr 09, 2018 at 11:57:07AM +0200, Christoph Hellwig wrote:
> The filestreams allocator stores an xfs_fstrm_item structure in the MRU to
> cache inode number to agno mappings for a particular length of time. Each
> xfs_fstrm_item contains the internal MRU structure, an inode pointer and
> agno value.
>
> The inode pointer stored in the xfs_fstrm_item is not referenced, however,
> which means the inode itself can be removed and reclaimed before the MRU
> item is freed. If this occurs, xfs_fstrm_free_func() can access freed or
> unrelated memory through xfs_fstrm_item->ip and crash.
>
> The obvious solution is to grab an inode reference for xfs_fstrm_item.
> The filestream mechanism only actually uses the inode pointer as a means
> to access the xfs_mount, however. Rather than add unnecessary
> complexity, simplify the implementation to store an xfs_mount pointer in
> struct xfs_mru_cache, and pass it to the free callback. This also
> requires updates to the tracepoint class to provide the associated data
> via parameters rather than the inode and a minor hack to peek at the MRU
> key to establish the inode number at free time.
>
> Based on debugging work and an earlier patch from Brian Foster, who
> also wrote most of this changelog.
>
> Reported-by: Brian Foster <bfoster@redhat.com>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> Reviewed-by: Brian Foster <bfoster@redhat.com>
Looks ok, doesn't seem to cause any testing regressions,
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
--D
> ---
> fs/xfs/xfs_filestream.c | 19 +++++++++----------
> fs/xfs/xfs_mru_cache.c | 8 +++++---
> fs/xfs/xfs_mru_cache.h | 8 ++++----
> fs/xfs/xfs_trace.h | 14 +++++++-------
> 4 files changed, 25 insertions(+), 24 deletions(-)
>
> diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
> index 043ca3808ea2..5131a6e25fc9 100644
> --- a/fs/xfs/xfs_filestream.c
> +++ b/fs/xfs/xfs_filestream.c
> @@ -34,7 +34,6 @@
>
> struct xfs_fstrm_item {
> struct xfs_mru_cache_elem mru;
> - struct xfs_inode *ip;
> xfs_agnumber_t ag; /* AG in use for this directory */
> };
>
> @@ -122,14 +121,15 @@ xfs_filestream_put_ag(
>
> static void
> xfs_fstrm_free_func(
> + void *data,
> struct xfs_mru_cache_elem *mru)
> {
> + struct xfs_mount *mp = data;
> struct xfs_fstrm_item *item =
> container_of(mru, struct xfs_fstrm_item, mru);
>
> - xfs_filestream_put_ag(item->ip->i_mount, item->ag);
> -
> - trace_xfs_filestream_free(item->ip, item->ag);
> + xfs_filestream_put_ag(mp, item->ag);
> + trace_xfs_filestream_free(mp, mru->key, item->ag);
>
> kmem_free(item);
> }
> @@ -165,7 +165,7 @@ xfs_filestream_pick_ag(
> trylock = XFS_ALLOC_FLAG_TRYLOCK;
>
> for (nscan = 0; 1; nscan++) {
> - trace_xfs_filestream_scan(ip, ag);
> + trace_xfs_filestream_scan(mp, ip->i_ino, ag);
>
> pag = xfs_perag_get(mp, ag);
>
> @@ -265,7 +265,6 @@ xfs_filestream_pick_ag(
> goto out_put_ag;
>
> item->ag = *agp;
> - item->ip = ip;
>
> err = xfs_mru_cache_insert(mp->m_filestream, ip->i_ino, &item->mru);
> if (err) {
> @@ -333,7 +332,7 @@ xfs_filestream_lookup_ag(
> ag = container_of(mru, struct xfs_fstrm_item, mru)->ag;
> xfs_mru_cache_done(mp->m_filestream);
>
> - trace_xfs_filestream_lookup(ip, ag);
> + trace_xfs_filestream_lookup(mp, ip->i_ino, ag);
> goto out;
> }
>
> @@ -399,7 +398,7 @@ xfs_filestream_new_ag(
> * Only free the item here so we skip over the old AG earlier.
> */
> if (mru)
> - xfs_fstrm_free_func(mru);
> + xfs_fstrm_free_func(mp, mru);
>
> IRELE(pip);
> exit:
> @@ -426,8 +425,8 @@ xfs_filestream_mount(
> * timer tunable to within about 10 percent. This requires at least 10
> * groups.
> */
> - return xfs_mru_cache_create(&mp->m_filestream, xfs_fstrm_centisecs * 10,
> - 10, xfs_fstrm_free_func);
> + return xfs_mru_cache_create(&mp->m_filestream, mp,
> + xfs_fstrm_centisecs * 10, 10, xfs_fstrm_free_func);
> }
>
> void
> diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c
> index f8a674d7f092..70eea7ae2876 100644
> --- a/fs/xfs/xfs_mru_cache.c
> +++ b/fs/xfs/xfs_mru_cache.c
> @@ -112,6 +112,7 @@ struct xfs_mru_cache {
> xfs_mru_cache_free_func_t free_func; /* Function pointer for freeing. */
> struct delayed_work work; /* Workqueue data for reaping. */
> unsigned int queued; /* work has been queued */
> + void *data;
> };
>
> static struct workqueue_struct *xfs_mru_reap_wq;
> @@ -259,7 +260,7 @@ _xfs_mru_cache_clear_reap_list(
>
> list_for_each_entry_safe(elem, next, &tmp, list_node) {
> list_del_init(&elem->list_node);
> - mru->free_func(elem);
> + mru->free_func(mru->data, elem);
> }
>
> spin_lock(&mru->lock);
> @@ -326,6 +327,7 @@ xfs_mru_cache_uninit(void)
> int
> xfs_mru_cache_create(
> struct xfs_mru_cache **mrup,
> + void *data,
> unsigned int lifetime_ms,
> unsigned int grp_count,
> xfs_mru_cache_free_func_t free_func)
> @@ -369,7 +371,7 @@ xfs_mru_cache_create(
>
> mru->grp_time = grp_time;
> mru->free_func = free_func;
> -
> + mru->data = data;
> *mrup = mru;
>
> exit:
> @@ -492,7 +494,7 @@ xfs_mru_cache_delete(
>
> elem = xfs_mru_cache_remove(mru, key);
> if (elem)
> - mru->free_func(elem);
> + mru->free_func(mru->data, elem);
> }
>
> /*
> diff --git a/fs/xfs/xfs_mru_cache.h b/fs/xfs/xfs_mru_cache.h
> index fb5245ba5ff7..b3f3fbdfcc47 100644
> --- a/fs/xfs/xfs_mru_cache.h
> +++ b/fs/xfs/xfs_mru_cache.h
> @@ -26,13 +26,13 @@ struct xfs_mru_cache_elem {
> };
>
> /* Function pointer type for callback to free a client's data pointer. */
> -typedef void (*xfs_mru_cache_free_func_t)(struct xfs_mru_cache_elem *elem);
> +typedef void (*xfs_mru_cache_free_func_t)(void *, struct xfs_mru_cache_elem *);
>
> int xfs_mru_cache_init(void);
> void xfs_mru_cache_uninit(void);
> -int xfs_mru_cache_create(struct xfs_mru_cache **mrup, unsigned int lifetime_ms,
> - unsigned int grp_count,
> - xfs_mru_cache_free_func_t free_func);
> +int xfs_mru_cache_create(struct xfs_mru_cache **mrup, void *data,
> + unsigned int lifetime_ms, unsigned int grp_count,
> + xfs_mru_cache_free_func_t free_func);
> void xfs_mru_cache_destroy(struct xfs_mru_cache *mru);
> int xfs_mru_cache_insert(struct xfs_mru_cache *mru, unsigned long key,
> struct xfs_mru_cache_elem *elem);
> diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
> index a982c0b623d0..8955254b900e 100644
> --- a/fs/xfs/xfs_trace.h
> +++ b/fs/xfs/xfs_trace.h
> @@ -506,8 +506,8 @@ DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold_release);
> DEFINE_BUF_ITEM_EVENT(xfs_trans_binval);
>
> DECLARE_EVENT_CLASS(xfs_filestream_class,
> - TP_PROTO(struct xfs_inode *ip, xfs_agnumber_t agno),
> - TP_ARGS(ip, agno),
> + TP_PROTO(struct xfs_mount *mp, xfs_ino_t ino, xfs_agnumber_t agno),
> + TP_ARGS(mp, ino, agno),
> TP_STRUCT__entry(
> __field(dev_t, dev)
> __field(xfs_ino_t, ino)
> @@ -515,10 +515,10 @@ DECLARE_EVENT_CLASS(xfs_filestream_class,
> __field(int, streams)
> ),
> TP_fast_assign(
> - __entry->dev = VFS_I(ip)->i_sb->s_dev;
> - __entry->ino = ip->i_ino;
> + __entry->dev = mp->m_super->s_dev;
> + __entry->ino = ino;
> __entry->agno = agno;
> - __entry->streams = xfs_filestream_peek_ag(ip->i_mount, agno);
> + __entry->streams = xfs_filestream_peek_ag(mp, agno);
> ),
> TP_printk("dev %d:%d ino 0x%llx agno %u streams %d",
> MAJOR(__entry->dev), MINOR(__entry->dev),
> @@ -528,8 +528,8 @@ DECLARE_EVENT_CLASS(xfs_filestream_class,
> )
> #define DEFINE_FILESTREAM_EVENT(name) \
> DEFINE_EVENT(xfs_filestream_class, name, \
> - TP_PROTO(struct xfs_inode *ip, xfs_agnumber_t agno), \
> - TP_ARGS(ip, agno))
> + TP_PROTO(struct xfs_mount *mp, xfs_ino_t ino, xfs_agnumber_t agno), \
> + TP_ARGS(mp, ino, agno))
> DEFINE_FILESTREAM_EVENT(xfs_filestream_free);
> DEFINE_FILESTREAM_EVENT(xfs_filestream_lookup);
> DEFINE_FILESTREAM_EVENT(xfs_filestream_scan);
> --
> 2.16.3
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2018-04-09 17:24 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2018-04-09 9:57 [PATCH] xfs: remove filestream item xfs_inode reference Christoph Hellwig
2018-04-09 17:24 ` Darrick J. Wong
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).