From 2e1473d5195f9ce1afb143208e568a9350e08b77 Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Mon, 10 Mar 2025 17:54:54 +0800 Subject: [PATCH] erofs: implement 48-bit block addressing for unencoded inodes It adapts the on-disk changes from the previous commit. It also supports EROFS_NULL_ADDR (all 1's) for EROFS_INODE_FLAT_PLAIN inodes to indicate 0-filled inodes, as it's common for composefs use cases. As a result, EROFS_INODE_CHUNK_BASED is no longer needed. Signed-off-by: Gao Xiang Acked-by: Chao Yu Link: https://lore.kernel.org/r/20250310095459.2620647-5-hsiangkao@linux.alibaba.com --- fs/erofs/data.c | 13 +++++++++---- fs/erofs/dir.c | 2 +- fs/erofs/inode.c | 33 ++++++++++++++++++++++++--------- fs/erofs/internal.h | 12 ++++++------ fs/erofs/super.c | 15 ++++++++++----- fs/erofs/sysfs.c | 2 ++ include/trace/events/erofs.h | 2 +- 7 files changed, 53 insertions(+), 26 deletions(-) diff --git a/fs/erofs/data.c b/fs/erofs/data.c index 3c4a4eaffe8c..2409d2ab0c28 100644 --- a/fs/erofs/data.c +++ b/fs/erofs/data.c @@ -77,7 +77,7 @@ int erofs_map_blocks(struct inode *inode, struct erofs_map_blocks *map) unsigned int unit, blksz = sb->s_blocksize; struct erofs_inode *vi = EROFS_I(inode); struct erofs_inode_chunk_index *idx; - erofs_blk_t startblk; + erofs_blk_t startblk, addrmask; bool tailpacking; erofs_off_t pos; u64 chunknr; @@ -91,6 +91,8 @@ int erofs_map_blocks(struct inode *inode, struct erofs_map_blocks *map) if (vi->datalayout != EROFS_INODE_CHUNK_BASED) { tailpacking = (vi->datalayout == EROFS_INODE_FLAT_INLINE); + if (!tailpacking && vi->startblk == EROFS_NULL_ADDR) + goto out; pos = erofs_pos(sb, erofs_iblks(inode) - tailpacking); map->m_flags = EROFS_MAP_MAPPED; @@ -124,8 +126,11 @@ int erofs_map_blocks(struct inode *inode, struct erofs_map_blocks *map) map->m_llen = min_t(erofs_off_t, 1UL << vi->chunkbits, round_up(inode->i_size - map->m_la, blksz)); if (vi->chunkformat & EROFS_CHUNK_FORMAT_INDEXES) { - startblk = le32_to_cpu(idx->startblk_lo); - if (startblk != EROFS_NULL_ADDR) { + addrmask = (vi->chunkformat & EROFS_CHUNK_FORMAT_48BIT) ? + BIT_ULL(48) - 1 : BIT_ULL(32) - 1; + startblk = (((u64)le16_to_cpu(idx->startblk_hi) << 32) | + le32_to_cpu(idx->startblk_lo)) & addrmask; + if ((startblk ^ EROFS_NULL_ADDR) & addrmask) { map->m_deviceid = le16_to_cpu(idx->device_id) & EROFS_SB(sb)->device_id_mask; map->m_pa = erofs_pos(sb, startblk); @@ -133,7 +138,7 @@ int erofs_map_blocks(struct inode *inode, struct erofs_map_blocks *map) } } else { startblk = le32_to_cpu(*(__le32 *)idx); - if (startblk != EROFS_NULL_ADDR) { + if (startblk != (u32)EROFS_NULL_ADDR) { map->m_pa = erofs_pos(sb, startblk); map->m_flags = EROFS_MAP_MAPPED; } diff --git a/fs/erofs/dir.c b/fs/erofs/dir.c index 1d3bb8746ab1..fa3c2d380cc9 100644 --- a/fs/erofs/dir.c +++ b/fs/erofs/dir.c @@ -60,7 +60,7 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx) de = erofs_bread(&buf, dbstart, true); if (IS_ERR(de)) { - erofs_err(sb, "fail to readdir of logical block %u of nid %llu", + erofs_err(sb, "failed to readdir of logical block %llu of nid %llu", erofs_blknr(sb, dbstart), EROFS_I(dir)->nid); err = PTR_ERR(de); break; diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c index e74c0c00aa26..6de281312ea7 100644 --- a/fs/erofs/inode.c +++ b/fs/erofs/inode.c @@ -31,10 +31,10 @@ static int erofs_read_inode(struct inode *inode) unsigned int ofs = erofs_blkoff(sb, erofs_iloc(inode)); struct erofs_buf buf = __EROFS_BUF_INITIALIZER; struct erofs_sb_info *sbi = EROFS_SB(sb); + erofs_blk_t addrmask = BIT_ULL(48) - 1; struct erofs_inode *vi = EROFS_I(inode); struct erofs_inode_extended *die, copied; struct erofs_inode_compact *dic; - union erofs_inode_i_u iu; unsigned int ifmt; void *ptr; int err = 0; @@ -71,6 +71,8 @@ static int erofs_read_inode(struct inode *inode) if (ofs + vi->inode_isize <= sb->s_blocksize) { ofs += vi->inode_isize; die = (struct erofs_inode_extended *)dic; + copied.i_u = die->i_u; + copied.i_nb = die->i_nb; } else { const unsigned int gotten = sb->s_blocksize - ofs; @@ -90,7 +92,6 @@ static int erofs_read_inode(struct inode *inode) vi->xattr_isize = erofs_xattr_ibody_size(die->i_xattr_icount); inode->i_mode = le16_to_cpu(die->i_mode); - iu = die->i_u; i_uid_write(inode, le32_to_cpu(die->i_uid)); i_gid_write(inode, le32_to_cpu(die->i_gid)); set_nlink(inode, le32_to_cpu(die->i_nlink)); @@ -105,11 +106,20 @@ static int erofs_read_inode(struct inode *inode) vi->xattr_isize = erofs_xattr_ibody_size(dic->i_xattr_icount); inode->i_mode = le16_to_cpu(dic->i_mode); - iu = dic->i_u; + copied.i_u = dic->i_u; i_uid_write(inode, le16_to_cpu(dic->i_uid)); i_gid_write(inode, le16_to_cpu(dic->i_gid)); - set_nlink(inode, le16_to_cpu(dic->i_nb.nlink)); - inode_set_mtime(inode, sbi->build_time, sbi->build_time_nsec); + if (!S_ISDIR(inode->i_mode) && + ((ifmt >> EROFS_I_NLINK_1_BIT) & 1)) { + set_nlink(inode, 1); + copied.i_nb = dic->i_nb; + } else { + set_nlink(inode, le16_to_cpu(dic->i_nb.nlink)); + copied.i_nb.startblk_hi = 0; + addrmask = BIT_ULL(32) - 1; + } + inode_set_mtime(inode, sbi->epoch + le32_to_cpu(dic->i_mtime), + sbi->fixed_nsec); inode->i_size = le32_to_cpu(dic->i_size); break; @@ -129,7 +139,12 @@ static int erofs_read_inode(struct inode *inode) case S_IFREG: case S_IFDIR: case S_IFLNK: - vi->startblk = le32_to_cpu(iu.startblk_lo); + vi->startblk = le32_to_cpu(copied.i_u.startblk_lo) | + ((u64)le16_to_cpu(copied.i_nb.startblk_hi) << 32); + if (vi->datalayout == EROFS_INODE_FLAT_PLAIN && + !((vi->startblk ^ EROFS_NULL_ADDR) & addrmask)) + vi->startblk = EROFS_NULL_ADDR; + if(S_ISLNK(inode->i_mode)) { err = erofs_fill_symlink(inode, ptr, ofs); if (err) @@ -138,7 +153,7 @@ static int erofs_read_inode(struct inode *inode) break; case S_IFCHR: case S_IFBLK: - inode->i_rdev = new_decode_dev(le32_to_cpu(iu.rdev)); + inode->i_rdev = new_decode_dev(le32_to_cpu(copied.i_u.rdev)); break; case S_IFIFO: case S_IFSOCK: @@ -152,14 +167,14 @@ static int erofs_read_inode(struct inode *inode) } if (erofs_inode_is_data_compressed(vi->datalayout)) - inode->i_blocks = le32_to_cpu(iu.blocks_lo) << + inode->i_blocks = le32_to_cpu(copied.i_u.blocks_lo) << (sb->s_blocksize_bits - 9); else inode->i_blocks = round_up(inode->i_size, sb->s_blocksize) >> 9; if (vi->datalayout == EROFS_INODE_CHUNK_BASED) { /* fill chunked inode summary info */ - vi->chunkformat = le16_to_cpu(iu.c.format); + vi->chunkformat = le16_to_cpu(copied.i_u.c.format); if (vi->chunkformat & ~EROFS_CHUNK_FORMAT_ALL) { erofs_err(sb, "unsupported chunk format %x of nid %llu", vi->chunkformat, vi->nid); diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h index 58e401131c75..07515a6f2534 100644 --- a/fs/erofs/internal.h +++ b/fs/erofs/internal.h @@ -37,8 +37,7 @@ __printf(2, 3) void _erofs_printk(struct super_block *sb, const char *fmt, ...); typedef u64 erofs_nid_t; typedef u64 erofs_off_t; -/* data type for filesystem-wide blocks number */ -typedef u32 erofs_blk_t; +typedef u64 erofs_blk_t; struct erofs_device_info { char *path; @@ -143,8 +142,8 @@ struct erofs_sb_info { unsigned char blkszbits; /* filesystem block size in bit shift */ u32 sb_size; /* total superblock size */ - u32 build_time_nsec; - u64 build_time; + u32 fixed_nsec; + s64 epoch; /* what we really care is nid, rather than ino.. */ erofs_nid_t root_nid; @@ -205,8 +204,8 @@ struct erofs_buf { }; #define __EROFS_BUF_INITIALIZER ((struct erofs_buf){ .page = NULL }) -#define erofs_blknr(sb, addr) ((erofs_blk_t)((addr) >> (sb)->s_blocksize_bits)) -#define erofs_blkoff(sb, addr) ((addr) & ((sb)->s_blocksize - 1)) +#define erofs_blknr(sb, pos) ((erofs_blk_t)((pos) >> (sb)->s_blocksize_bits)) +#define erofs_blkoff(sb, pos) ((pos) & ((sb)->s_blocksize - 1)) #define erofs_pos(sb, blk) ((erofs_off_t)(blk) << (sb)->s_blocksize_bits) #define erofs_iblks(i) (round_up((i)->i_size, i_blocksize(i)) >> (i)->i_blkbits) @@ -226,6 +225,7 @@ EROFS_FEATURE_FUNCS(ztailpacking, incompat, INCOMPAT_ZTAILPACKING) EROFS_FEATURE_FUNCS(fragments, incompat, INCOMPAT_FRAGMENTS) EROFS_FEATURE_FUNCS(dedupe, incompat, INCOMPAT_DEDUPE) EROFS_FEATURE_FUNCS(xattr_prefixes, incompat, INCOMPAT_XATTR_PREFIXES) +EROFS_FEATURE_FUNCS(48bit, incompat, INCOMPAT_48BIT) EROFS_FEATURE_FUNCS(sb_chksum, compat, COMPAT_SB_CHKSUM) EROFS_FEATURE_FUNCS(xattr_filter, compat, COMPAT_XATTR_FILTER) diff --git a/fs/erofs/super.c b/fs/erofs/super.c index a64f9765e95e..18445dc8597d 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -268,7 +268,7 @@ static int erofs_read_superblock(struct super_block *sb) goto out; } - sbi->blkszbits = dsb->blkszbits; + sbi->blkszbits = dsb->blkszbits; if (sbi->blkszbits < 9 || sbi->blkszbits > PAGE_SHIFT) { erofs_err(sb, "blkszbits %u isn't supported", sbi->blkszbits); goto out; @@ -308,13 +308,18 @@ static int erofs_read_superblock(struct super_block *sb) sbi->xattr_filter_reserved = dsb->xattr_filter_reserved; #endif sbi->islotbits = ilog2(sizeof(struct erofs_inode_compact)); - sbi->root_nid = le16_to_cpu(dsb->rb.rootnid_2b); + if (erofs_sb_has_48bit(sbi) && dsb->rootnid_8b) { + sbi->root_nid = le64_to_cpu(dsb->rootnid_8b); + sbi->dif0.blocks = (sbi->dif0.blocks << 32) | + le16_to_cpu(dsb->rb.blocks_hi); + } else { + sbi->root_nid = le16_to_cpu(dsb->rb.rootnid_2b); + } sbi->packed_nid = le64_to_cpu(dsb->packed_nid); sbi->inos = le64_to_cpu(dsb->inos); - sbi->build_time = le64_to_cpu(dsb->epoch); - sbi->build_time_nsec = le32_to_cpu(dsb->fixed_nsec); - + sbi->epoch = (s64)le64_to_cpu(dsb->epoch); + sbi->fixed_nsec = le32_to_cpu(dsb->fixed_nsec); super_set_uuid(sb, (void *)dsb->uuid, sizeof(dsb->uuid)); /* parse on-disk compression configurations */ diff --git a/fs/erofs/sysfs.c b/fs/erofs/sysfs.c index 19d586273b70..dad4e6c6c155 100644 --- a/fs/erofs/sysfs.c +++ b/fs/erofs/sysfs.c @@ -81,6 +81,7 @@ EROFS_ATTR_FEATURE(sb_chksum); EROFS_ATTR_FEATURE(ztailpacking); EROFS_ATTR_FEATURE(fragments); EROFS_ATTR_FEATURE(dedupe); +EROFS_ATTR_FEATURE(48bit); static struct attribute *erofs_feat_attrs[] = { ATTR_LIST(zero_padding), @@ -93,6 +94,7 @@ static struct attribute *erofs_feat_attrs[] = { ATTR_LIST(ztailpacking), ATTR_LIST(fragments), ATTR_LIST(dedupe), + ATTR_LIST(48bit), NULL, }; ATTRIBUTE_GROUPS(erofs_feat); diff --git a/include/trace/events/erofs.h b/include/trace/events/erofs.h index 57df3843e650..c69c7b1e41d1 100644 --- a/include/trace/events/erofs.h +++ b/include/trace/events/erofs.h @@ -75,7 +75,7 @@ TRACE_EVENT(erofs_fill_inode, __entry->ofs = erofs_blkoff(inode->i_sb, erofs_iloc(inode)); ), - TP_printk("dev = (%d,%d), nid = %llu, blkaddr %u ofs %u", + TP_printk("dev = (%d,%d), nid = %llu, blkaddr %llu ofs %u", show_dev_nid(__entry), __entry->blkaddr, __entry->ofs) );