* [PATCH] exfat/balloc: using ffs instead of internal logic
@ 2023-12-07 23:47 ` John Sanpe
2023-12-12 1:15 ` Sungjong Seo
` (2 more replies)
0 siblings, 3 replies; 5+ messages in thread
From: John Sanpe @ 2023-12-07 23:47 UTC (permalink / raw)
To: linkinjeon, sj1557.seo, willy
Cc: linux-fsdevel, Andy.Wu, Wataru.Aoyama, cpgs, John Sanpe
Replaced the internal table lookup algorithm with ffs of
the bitops library with better performance.
Use it to increase the single processing length of the
exfat_find_free_bitmap function, from single-byte search to long type.
Signed-off-by: John Sanpe <sanpeqf@gmail.com>
---
fs/exfat/balloc.c | 41 +++++++++++++++--------------------------
fs/exfat/exfat_fs.h | 3 +--
2 files changed, 16 insertions(+), 28 deletions(-)
diff --git a/fs/exfat/balloc.c b/fs/exfat/balloc.c
index 3e3e9e4cce2f..4bacbb0cf5da 100644
--- a/fs/exfat/balloc.c
+++ b/fs/exfat/balloc.c
@@ -14,29 +14,15 @@
#if BITS_PER_LONG == 32
#define __le_long __le32
#define lel_to_cpu(A) le32_to_cpu(A)
+#define cpu_to_lel(A) cpu_to_le32(A)
#elif BITS_PER_LONG == 64
#define __le_long __le64
#define lel_to_cpu(A) le64_to_cpu(A)
+#define cpu_to_lel(A) cpu_to_le64(A)
#else
#error "BITS_PER_LONG not 32 or 64"
#endif
-static const unsigned char free_bit[] = {
- 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2,/* 0 ~ 19*/
- 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3,/* 20 ~ 39*/
- 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2,/* 40 ~ 59*/
- 0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4,/* 60 ~ 79*/
- 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2,/* 80 ~ 99*/
- 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3,/*100 ~ 119*/
- 0, 1, 0, 2, 0, 1, 0, 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2,/*120 ~ 139*/
- 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5,/*140 ~ 159*/
- 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2,/*160 ~ 179*/
- 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, 3,/*180 ~ 199*/
- 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2,/*200 ~ 219*/
- 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4,/*220 ~ 239*/
- 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 /*240 ~ 254*/
-};
-
/*
* Allocation Bitmap Management Functions
*/
@@ -195,32 +181,35 @@ unsigned int exfat_find_free_bitmap(struct super_block *sb, unsigned int clu)
{
unsigned int i, map_i, map_b, ent_idx;
unsigned int clu_base, clu_free;
- unsigned char k, clu_mask;
+ unsigned long clu_bits, clu_mask;
struct exfat_sb_info *sbi = EXFAT_SB(sb);
+ __le_long bitval;
WARN_ON(clu < EXFAT_FIRST_CLUSTER);
- ent_idx = CLUSTER_TO_BITMAP_ENT(clu);
- clu_base = BITMAP_ENT_TO_CLUSTER(ent_idx & ~(BITS_PER_BYTE_MASK));
+ ent_idx = ALIGN_DOWN(CLUSTER_TO_BITMAP_ENT(clu), BITS_PER_LONG);
+ clu_base = BITMAP_ENT_TO_CLUSTER(ent_idx);
clu_mask = IGNORED_BITS_REMAINED(clu, clu_base);
map_i = BITMAP_OFFSET_SECTOR_INDEX(sb, ent_idx);
map_b = BITMAP_OFFSET_BYTE_IN_SECTOR(sb, ent_idx);
for (i = EXFAT_FIRST_CLUSTER; i < sbi->num_clusters;
- i += BITS_PER_BYTE) {
- k = *(sbi->vol_amap[map_i]->b_data + map_b);
+ i += BITS_PER_LONG) {
+ bitval = *(__le_long *)(sbi->vol_amap[map_i]->b_data + map_b);
if (clu_mask > 0) {
- k |= clu_mask;
+ bitval |= cpu_to_lel(clu_mask);
clu_mask = 0;
}
- if (k < 0xFF) {
- clu_free = clu_base + free_bit[k];
+ if (bitval != ULONG_MAX) {
+ clu_bits = lel_to_cpu(bitval);
+ clu_free = clu_base + ffz(clu_bits);
if (clu_free < sbi->num_clusters)
return clu_free;
}
- clu_base += BITS_PER_BYTE;
+ clu_base += BITS_PER_LONG;
+ map_b += sizeof(long);
- if (++map_b >= sb->s_blocksize ||
+ if (map_b >= sb->s_blocksize ||
clu_base >= sbi->num_clusters) {
if (++map_i >= sbi->map_sectors) {
clu_base = EXFAT_FIRST_CLUSTER;
diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h
index a7a2c35d74fb..8030780a199b 100644
--- a/fs/exfat/exfat_fs.h
+++ b/fs/exfat/exfat_fs.h
@@ -135,8 +135,7 @@ enum {
#define BITMAP_OFFSET_BIT_IN_SECTOR(sb, ent) (ent & BITS_PER_SECTOR_MASK(sb))
#define BITMAP_OFFSET_BYTE_IN_SECTOR(sb, ent) \
((ent / BITS_PER_BYTE) & ((sb)->s_blocksize - 1))
-#define BITS_PER_BYTE_MASK 0x7
-#define IGNORED_BITS_REMAINED(clu, clu_base) ((1 << ((clu) - (clu_base))) - 1)
+#define IGNORED_BITS_REMAINED(clu, clu_base) ((1UL << ((clu) - (clu_base))) - 1)
#define ES_ENTRY_NUM(name_len) (ES_IDX_LAST_FILENAME(name_len) + 1)
/* 19 entries = 1 file entry + 1 stream entry + 17 filename entries */
--
2.43.0
^ permalink raw reply related [flat|nested] 5+ messages in thread
* RE: [PATCH] exfat/balloc: using ffs instead of internal logic
2023-12-07 23:47 ` [PATCH] exfat/balloc: using ffs instead of internal logic John Sanpe
@ 2023-12-12 1:15 ` Sungjong Seo
2023-12-12 1:53 ` Matthew Wilcox
2023-12-13 13:29 ` Namjae Jeon
2 siblings, 0 replies; 5+ messages in thread
From: Sungjong Seo @ 2023-12-12 1:15 UTC (permalink / raw)
To: 'John Sanpe', linkinjeon, willy; +Cc: linux-fsdevel
> Replaced the internal table lookup algorithm with ffs of the bitops
> library with better performance.
>
> Use it to increase the single processing length of the
> exfat_find_free_bitmap function, from single-byte search to long type.
>
> Signed-off-by: John Sanpe <sanpeqf@gmail.com>
Looks good. Thanks for your patch.
Acked-by: Sungjong Seo <sj1557.seo@samsung.com>
> ---
> fs/exfat/balloc.c | 41 +++++++++++++++--------------------------
> fs/exfat/exfat_fs.h | 3 +--
> 2 files changed, 16 insertions(+), 28 deletions(-)
>
> diff --git a/fs/exfat/balloc.c b/fs/exfat/balloc.c index
> 3e3e9e4cce2f..4bacbb0cf5da 100644
> --- a/fs/exfat/balloc.c
> +++ b/fs/exfat/balloc.c
> @@ -14,29 +14,15 @@
> #if BITS_PER_LONG == 32
> #define __le_long __le32
> #define lel_to_cpu(A) le32_to_cpu(A)
> +#define cpu_to_lel(A) cpu_to_le32(A)
> #elif BITS_PER_LONG == 64
> #define __le_long __le64
> #define lel_to_cpu(A) le64_to_cpu(A)
> +#define cpu_to_lel(A) cpu_to_le64(A)
> #else
> #error "BITS_PER_LONG not 32 or 64"
> #endif
>
> -static const unsigned char free_bit[] = {
> - 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2,/* 0 ~
> 19*/
> - 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3,/* 20 ~
> 39*/
> - 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2,/* 40 ~
> 59*/
> - 0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4,/* 60 ~
> 79*/
> - 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2,/* 80 ~
> 99*/
> - 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3,/*100 ~
> 119*/
> - 0, 1, 0, 2, 0, 1, 0, 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2,/*120 ~
> 139*/
> - 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5,/*140 ~
> 159*/
> - 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2,/*160 ~
> 179*/
> - 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, 3,/*180 ~
> 199*/
> - 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2,/*200 ~
> 219*/
> - 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4,/*220 ~
> 239*/
> - 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 /*240 ~
> 254*/
> -};
> -
> /*
> * Allocation Bitmap Management Functions
> */
> @@ -195,32 +181,35 @@ unsigned int exfat_find_free_bitmap(struct
> super_block *sb, unsigned int clu) {
> unsigned int i, map_i, map_b, ent_idx;
> unsigned int clu_base, clu_free;
> - unsigned char k, clu_mask;
> + unsigned long clu_bits, clu_mask;
> struct exfat_sb_info *sbi = EXFAT_SB(sb);
> + __le_long bitval;
>
> WARN_ON(clu < EXFAT_FIRST_CLUSTER);
> - ent_idx = CLUSTER_TO_BITMAP_ENT(clu);
> - clu_base = BITMAP_ENT_TO_CLUSTER(ent_idx & ~(BITS_PER_BYTE_MASK));
> + ent_idx = ALIGN_DOWN(CLUSTER_TO_BITMAP_ENT(clu), BITS_PER_LONG);
> + clu_base = BITMAP_ENT_TO_CLUSTER(ent_idx);
> clu_mask = IGNORED_BITS_REMAINED(clu, clu_base);
>
> map_i = BITMAP_OFFSET_SECTOR_INDEX(sb, ent_idx);
> map_b = BITMAP_OFFSET_BYTE_IN_SECTOR(sb, ent_idx);
>
> for (i = EXFAT_FIRST_CLUSTER; i < sbi->num_clusters;
> - i += BITS_PER_BYTE) {
> - k = *(sbi->vol_amap[map_i]->b_data + map_b);
> + i += BITS_PER_LONG) {
> + bitval = *(__le_long *)(sbi->vol_amap[map_i]->b_data +
> map_b);
> if (clu_mask > 0) {
> - k |= clu_mask;
> + bitval |= cpu_to_lel(clu_mask);
> clu_mask = 0;
> }
> - if (k < 0xFF) {
> - clu_free = clu_base + free_bit[k];
> + if (bitval != ULONG_MAX) {
> + clu_bits = lel_to_cpu(bitval);
> + clu_free = clu_base + ffz(clu_bits);
> if (clu_free < sbi->num_clusters)
> return clu_free;
> }
> - clu_base += BITS_PER_BYTE;
> + clu_base += BITS_PER_LONG;
> + map_b += sizeof(long);
>
> - if (++map_b >= sb->s_blocksize ||
> + if (map_b >= sb->s_blocksize ||
> clu_base >= sbi->num_clusters) {
> if (++map_i >= sbi->map_sectors) {
> clu_base = EXFAT_FIRST_CLUSTER;
> diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h index
> a7a2c35d74fb..8030780a199b 100644
> --- a/fs/exfat/exfat_fs.h
> +++ b/fs/exfat/exfat_fs.h
> @@ -135,8 +135,7 @@ enum {
> #define BITMAP_OFFSET_BIT_IN_SECTOR(sb, ent) (ent &
> BITS_PER_SECTOR_MASK(sb)) #define BITMAP_OFFSET_BYTE_IN_SECTOR(sb, ent) \
> ((ent / BITS_PER_BYTE) & ((sb)->s_blocksize - 1))
> -#define BITS_PER_BYTE_MASK 0x7
> -#define IGNORED_BITS_REMAINED(clu, clu_base) ((1 << ((clu) - (clu_base)))
> - 1)
> +#define IGNORED_BITS_REMAINED(clu, clu_base) ((1UL << ((clu) -
> +(clu_base))) - 1)
>
> #define ES_ENTRY_NUM(name_len) (ES_IDX_LAST_FILENAME(name_len) + 1)
> /* 19 entries = 1 file entry + 1 stream entry + 17 filename entries */
> --
> 2.43.0
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] exfat/balloc: using ffs instead of internal logic
2023-12-07 23:47 ` [PATCH] exfat/balloc: using ffs instead of internal logic John Sanpe
2023-12-12 1:15 ` Sungjong Seo
@ 2023-12-12 1:53 ` Matthew Wilcox
2023-12-12 16:10 ` Fredrik Anderson
2023-12-13 13:29 ` Namjae Jeon
2 siblings, 1 reply; 5+ messages in thread
From: Matthew Wilcox @ 2023-12-12 1:53 UTC (permalink / raw)
To: John Sanpe
Cc: linkinjeon, sj1557.seo, linux-fsdevel, Andy.Wu, Wataru.Aoyama,
cpgs
On Fri, Dec 08, 2023 at 07:47:01AM +0800, John Sanpe wrote:
> WARN_ON(clu < EXFAT_FIRST_CLUSTER);
> - ent_idx = CLUSTER_TO_BITMAP_ENT(clu);
> - clu_base = BITMAP_ENT_TO_CLUSTER(ent_idx & ~(BITS_PER_BYTE_MASK));
> + ent_idx = ALIGN_DOWN(CLUSTER_TO_BITMAP_ENT(clu), BITS_PER_LONG);
> + clu_base = BITMAP_ENT_TO_CLUSTER(ent_idx);
> clu_mask = IGNORED_BITS_REMAINED(clu, clu_base);
>
> map_i = BITMAP_OFFSET_SECTOR_INDEX(sb, ent_idx);
> map_b = BITMAP_OFFSET_BYTE_IN_SECTOR(sb, ent_idx);
>
> for (i = EXFAT_FIRST_CLUSTER; i < sbi->num_clusters;
> - i += BITS_PER_BYTE) {
> - k = *(sbi->vol_amap[map_i]->b_data + map_b);
> + i += BITS_PER_LONG) {
> + bitval = *(__le_long *)(sbi->vol_amap[map_i]->b_data + map_b);
Is this guaranteed to be word-aligned, or might we end up doing
misaligned loads here?
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] exfat/balloc: using ffs instead of internal logic
2023-12-12 1:53 ` Matthew Wilcox
@ 2023-12-12 16:10 ` Fredrik Anderson
0 siblings, 0 replies; 5+ messages in thread
From: Fredrik Anderson @ 2023-12-12 16:10 UTC (permalink / raw)
To: Matthew Wilcox
Cc: linkinjeon, sj1557.seo, linux-fsdevel, Andy.Wu, Wataru.Aoyama,
cpgs
On Fri, Dec 08, 2023 at 07:47:01AM +0800, John Sanpe wrote:
> WARN_ON(clu < EXFAT_FIRST_CLUSTER);
> - ent_idx = CLUSTER_TO_BITMAP_ENT(clu);
> - clu_base = BITMAP_ENT_TO_CLUSTER(ent_idx & ~(BITS_PER_BYTE_MASK));
> + ent_idx = ALIGN_DOWN(CLUSTER_TO_BITMAP_ENT(clu), BITS_PER_LONG);
Thanks a lot for this question, the clusters are aligned here,
so the final calculated "map_b" is word-aligned.
On Tue, Dec 12, 2023 at 9:53 AM Matthew Wilcox <willy@infradead.org> wrote:
>
> On Fri, Dec 08, 2023 at 07:47:01AM +0800, John Sanpe wrote:
> > WARN_ON(clu < EXFAT_FIRST_CLUSTER);
> > - ent_idx = CLUSTER_TO_BITMAP_ENT(clu);
> > - clu_base = BITMAP_ENT_TO_CLUSTER(ent_idx & ~(BITS_PER_BYTE_MASK));
> > + ent_idx = ALIGN_DOWN(CLUSTER_TO_BITMAP_ENT(clu), BITS_PER_LONG);
> > + clu_base = BITMAP_ENT_TO_CLUSTER(ent_idx);
> > clu_mask = IGNORED_BITS_REMAINED(clu, clu_base);
> >
> > map_i = BITMAP_OFFSET_SECTOR_INDEX(sb, ent_idx);
> > map_b = BITMAP_OFFSET_BYTE_IN_SECTOR(sb, ent_idx);
> >
> > for (i = EXFAT_FIRST_CLUSTER; i < sbi->num_clusters;
> > - i += BITS_PER_BYTE) {
> > - k = *(sbi->vol_amap[map_i]->b_data + map_b);
> > + i += BITS_PER_LONG) {
> > + bitval = *(__le_long *)(sbi->vol_amap[map_i]->b_data + map_b);
>
> Is this guaranteed to be word-aligned, or might we end up doing
> misaligned loads here?
>
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] exfat/balloc: using ffs instead of internal logic
2023-12-07 23:47 ` [PATCH] exfat/balloc: using ffs instead of internal logic John Sanpe
2023-12-12 1:15 ` Sungjong Seo
2023-12-12 1:53 ` Matthew Wilcox
@ 2023-12-13 13:29 ` Namjae Jeon
2 siblings, 0 replies; 5+ messages in thread
From: Namjae Jeon @ 2023-12-13 13:29 UTC (permalink / raw)
To: John Sanpe; +Cc: sj1557.seo, willy, linux-fsdevel, Andy.Wu, Wataru.Aoyama, cpgs
2023-12-08 8:47 GMT+09:00, John Sanpe <sanpeqf@gmail.com>:
> Replaced the internal table lookup algorithm with ffs of
> the bitops library with better performance.
>
> Use it to increase the single processing length of the
> exfat_find_free_bitmap function, from single-byte search to long type.
>
> Signed-off-by: John Sanpe <sanpeqf@gmail.com>
Applied it to #dev.
Thanks for your patch!
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2023-12-13 13:29 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
[not found] <CGME20231209203819epcas1p469c0aecdc6a27fff7895a93ed5f3c4ec@epcas1p4.samsung.com>
2023-12-07 23:47 ` [PATCH] exfat/balloc: using ffs instead of internal logic John Sanpe
2023-12-12 1:15 ` Sungjong Seo
2023-12-12 1:53 ` Matthew Wilcox
2023-12-12 16:10 ` Fredrik Anderson
2023-12-13 13:29 ` Namjae Jeon
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).