mirror of
https://github.com/torvalds/linux.git
synced 2025-04-09 14:45:27 +00:00
Ext4 bug fixes and cleanups, including:
* hardening against maliciously fuzzed file systems * backwards compatibility for the brief period when we attempted to ignore zero-width characters * avoid potentially BUG'ing if there is a file system corruption found during the file system unmount * fix free space reporting by statfs when project quotas are enabled and the free space is less than the remaining project quota Also improve performance when replaying a journal with a very large number of revoke records (applicable for Lustre volumes). -----BEGIN PGP SIGNATURE----- iQEzBAABCAAdFiEEK2m5VNv+CHkogTfJ8vlZVpUNgaMFAmflfY4ACgkQ8vlZVpUN gaMx7Qf/akTELvyBZ7iPCCHh2HwayuO8qLhPNqrU0TmYMFvgwgYUPcQ3BLn8CE+/ j5UeT8XxNaLU4GJn3z+q6yW6PnNHfqZqKry9j/iPc3s1mjTslntr/xENlgu6i4Bp Q58xc7Pj45vdmP+xmYhRnJcefgsZMvB/N1SEHxwIP8bntZqsEvP9pI82r9Ouc8SA ZLQ1/K4OADmk7f3GhlPr9AtgH7O0CjlAas30h/AW77DXBQl7ZgbDsGDlgTwaGqkR jHcvfr6hLnWy+MUVGmlNZ2HY6iUgBPItWlYCP/fsrUdnc+CONyl5E17JPSl1QQtR CLYlo4xV8j1+zJ094DjhDWMKI2G7jw== =oudL -----END PGP SIGNATURE----- Merge tag 'ext4-for_linus-6.15-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4 Pull ext4 updates from Ted Ts'o: "Ext4 bug fixes and cleanups, including: - hardening against maliciously fuzzed file systems - backwards compatibility for the brief period when we attempted to ignore zero-width characters - avoid potentially BUG'ing if there is a file system corruption found during the file system unmount - fix free space reporting by statfs when project quotas are enabled and the free space is less than the remaining project quota Also improve performance when replaying a journal with a very large number of revoke records (applicable for Lustre volumes)" * tag 'ext4-for_linus-6.15-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (71 commits) ext4: fix OOB read when checking dotdot dir ext4: on a remount, only log the ro or r/w state when it has changed ext4: correct the error handle in ext4_fallocate() ext4: Make sb update interval tunable ext4: avoid journaling sb update on error if journal is destroying ext4: define ext4_journal_destroy wrapper ext4: hash: simplify kzalloc(n * 1, ...) to kzalloc(n, ...) jbd2: add a missing data flush during file and fs synchronization ext4: don't over-report free space or inodes in statvfs ext4: clear DISCARD flag if device does not support discard jbd2: remove jbd2_journal_unfile_buffer() ext4: reorder capability check last ext4: update the comment about mb_optimize_scan jbd2: fix off-by-one while erasing journal ext4: remove references to bh->b_page ext4: goto right label 'out_mmap_sem' in ext4_setattr() ext4: fix out-of-bound read in ext4_xattr_inode_dec_ref_all() ext4: introduce ITAIL helper jbd2: remove redundant function jbd2_journal_has_csum_v2or3_feature ext4: remove redundant function ext4_has_metadata_csum ...
This commit is contained in:
commit
5c2a430e85
@ -238,11 +238,10 @@ When mounting an ext4 filesystem, the following option are accepted:
|
||||
configured using tune2fs)
|
||||
|
||||
data_err=ignore(*)
|
||||
Just print an error message if an error occurs in a file data buffer in
|
||||
ordered mode.
|
||||
Just print an error message if an error occurs in a file data buffer.
|
||||
|
||||
data_err=abort
|
||||
Abort the journal if an error occurs in a file data buffer in ordered
|
||||
mode.
|
||||
Abort the journal if an error occurs in a file data buffer.
|
||||
|
||||
grpid | bsdgroups
|
||||
New objects have the group ID of their parent.
|
||||
|
@ -111,9 +111,7 @@ a callback function when the transaction is finally committed to disk,
|
||||
so that you can do some of your own management. You ask the journalling
|
||||
layer for calling the callback by simply setting
|
||||
``journal->j_commit_callback`` function pointer and that function is
|
||||
called after each transaction commit. You can also use
|
||||
``transaction->t_private_list`` for attaching entries to a transaction
|
||||
that need processing when the transaction commits.
|
||||
called after each transaction commit.
|
||||
|
||||
JBD2 also provides a way to block all transaction updates via
|
||||
jbd2_journal_lock_updates() /
|
||||
|
@ -649,8 +649,8 @@ static int ext4_has_free_clusters(struct ext4_sb_info *sbi,
|
||||
/* Hm, nope. Are (enough) root reserved clusters available? */
|
||||
if (uid_eq(sbi->s_resuid, current_fsuid()) ||
|
||||
(!gid_eq(sbi->s_resgid, GLOBAL_ROOT_GID) && in_group_p(sbi->s_resgid)) ||
|
||||
capable(CAP_SYS_RESOURCE) ||
|
||||
(flags & EXT4_MB_USE_ROOT_BLOCKS)) {
|
||||
(flags & EXT4_MB_USE_ROOT_BLOCKS) ||
|
||||
capable(CAP_SYS_RESOURCE)) {
|
||||
|
||||
if (free_clusters >= (nclusters + dirty_clusters +
|
||||
resv_clusters))
|
||||
|
@ -25,7 +25,7 @@ int ext4_inode_bitmap_csum_verify(struct super_block *sb,
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
int sz;
|
||||
|
||||
if (!ext4_has_metadata_csum(sb))
|
||||
if (!ext4_has_feature_metadata_csum(sb))
|
||||
return 1;
|
||||
|
||||
sz = EXT4_INODES_PER_GROUP(sb) >> 3;
|
||||
@ -48,7 +48,7 @@ void ext4_inode_bitmap_csum_set(struct super_block *sb,
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
int sz;
|
||||
|
||||
if (!ext4_has_metadata_csum(sb))
|
||||
if (!ext4_has_feature_metadata_csum(sb))
|
||||
return;
|
||||
|
||||
sz = EXT4_INODES_PER_GROUP(sb) >> 3;
|
||||
@ -67,7 +67,7 @@ int ext4_block_bitmap_csum_verify(struct super_block *sb,
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
int sz = EXT4_CLUSTERS_PER_GROUP(sb) / 8;
|
||||
|
||||
if (!ext4_has_metadata_csum(sb))
|
||||
if (!ext4_has_feature_metadata_csum(sb))
|
||||
return 1;
|
||||
|
||||
provided = le16_to_cpu(gdp->bg_block_bitmap_csum_lo);
|
||||
@ -89,7 +89,7 @@ void ext4_block_bitmap_csum_set(struct super_block *sb,
|
||||
__u32 csum;
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
|
||||
if (!ext4_has_metadata_csum(sb))
|
||||
if (!ext4_has_feature_metadata_csum(sb))
|
||||
return;
|
||||
|
||||
csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz);
|
||||
|
@ -86,7 +86,7 @@ int __ext4_check_dir_entry(const char *function, unsigned int line,
|
||||
dir->i_sb->s_blocksize);
|
||||
const int next_offset = ((char *) de - buf) + rlen;
|
||||
bool fake = is_fake_dir_entry(de);
|
||||
bool has_csum = ext4_has_metadata_csum(dir->i_sb);
|
||||
bool has_csum = ext4_has_feature_metadata_csum(dir->i_sb);
|
||||
|
||||
if (unlikely(rlen < ext4_dir_rec_len(1, fake ? NULL : dir)))
|
||||
error_msg = "rec_len is smaller than minimal";
|
||||
@ -104,6 +104,9 @@ int __ext4_check_dir_entry(const char *function, unsigned int line,
|
||||
else if (unlikely(le32_to_cpu(de->inode) >
|
||||
le32_to_cpu(EXT4_SB(dir->i_sb)->s_es->s_inodes_count)))
|
||||
error_msg = "inode out of bounds";
|
||||
else if (unlikely(next_offset == size && de->name_len == 1 &&
|
||||
de->name[0] == '.'))
|
||||
error_msg = "'.' directory cannot be the last in data block";
|
||||
else
|
||||
return 0;
|
||||
|
||||
@ -145,7 +148,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
|
||||
return err;
|
||||
|
||||
/* Can we just clear INDEX flag to ignore htree information? */
|
||||
if (!ext4_has_metadata_csum(sb)) {
|
||||
if (!ext4_has_feature_metadata_csum(sb)) {
|
||||
/*
|
||||
* We don't set the inode dirty flag since it's not
|
||||
* critical that it gets flushed back to the disk.
|
||||
|
@ -278,7 +278,10 @@ struct ext4_system_blocks {
|
||||
/*
|
||||
* Flags for ext4_io_end->flags
|
||||
*/
|
||||
#define EXT4_IO_END_UNWRITTEN 0x0001
|
||||
#define EXT4_IO_END_UNWRITTEN 0x0001
|
||||
#define EXT4_IO_END_FAILED 0x0002
|
||||
|
||||
#define EXT4_IO_END_DEFER_COMPLETION (EXT4_IO_END_UNWRITTEN | EXT4_IO_END_FAILED)
|
||||
|
||||
struct ext4_io_end_vec {
|
||||
struct list_head list; /* list of io_end_vec */
|
||||
@ -367,6 +370,8 @@ struct ext4_io_submit {
|
||||
#define EXT4_MAX_BLOCKS(size, offset, blkbits) \
|
||||
((EXT4_BLOCK_ALIGN(size + offset, blkbits) >> blkbits) - (offset >> \
|
||||
blkbits))
|
||||
#define EXT4_B_TO_LBLK(inode, offset) \
|
||||
(round_up((offset), i_blocksize(inode)) >> (inode)->i_blkbits)
|
||||
|
||||
/* Translate a block number to a cluster number */
|
||||
#define EXT4_B2C(sbi, blk) ((blk) >> (sbi)->s_cluster_bits)
|
||||
@ -1058,7 +1063,8 @@ struct ext4_inode_info {
|
||||
|
||||
/* Number of ongoing updates on this inode */
|
||||
atomic_t i_fc_updates;
|
||||
atomic_t i_unwritten; /* Nr. of inflight conversions pending */
|
||||
|
||||
spinlock_t i_raw_lock; /* protects updates to the raw inode */
|
||||
|
||||
/* Fast commit wait queue for this inode */
|
||||
wait_queue_head_t i_fc_wait;
|
||||
@ -1097,8 +1103,6 @@ struct ext4_inode_info {
|
||||
struct inode vfs_inode;
|
||||
struct jbd2_inode *jinode;
|
||||
|
||||
spinlock_t i_raw_lock; /* protects updates to the raw inode */
|
||||
|
||||
/*
|
||||
* File creation time. Its function is same as that of
|
||||
* struct timespec64 i_{a,c,m}time in the generic inode.
|
||||
@ -1141,6 +1145,7 @@ struct ext4_inode_info {
|
||||
/* quota space reservation, managed internally by quota code */
|
||||
qsize_t i_reserved_quota;
|
||||
#endif
|
||||
spinlock_t i_block_reservation_lock;
|
||||
|
||||
/* Lock protecting lists below */
|
||||
spinlock_t i_completed_io_lock;
|
||||
@ -1151,8 +1156,6 @@ struct ext4_inode_info {
|
||||
struct list_head i_rsv_conversion_list;
|
||||
struct work_struct i_rsv_conversion_work;
|
||||
|
||||
spinlock_t i_block_reservation_lock;
|
||||
|
||||
/*
|
||||
* Transactions that contain inode's metadata needed to complete
|
||||
* fsync and fdatasync, respectively.
|
||||
@ -1606,6 +1609,8 @@ struct ext4_sb_info {
|
||||
unsigned int s_mb_prefetch;
|
||||
unsigned int s_mb_prefetch_limit;
|
||||
unsigned int s_mb_best_avail_max_trim_order;
|
||||
unsigned int s_sb_update_sec;
|
||||
unsigned int s_sb_update_kb;
|
||||
|
||||
/* stats for buddy allocator */
|
||||
atomic_t s_bal_reqs; /* number of reqs with len > 1 */
|
||||
@ -1821,7 +1826,8 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
|
||||
*/
|
||||
enum {
|
||||
EXT4_MF_MNTDIR_SAMPLED,
|
||||
EXT4_MF_FC_INELIGIBLE /* Fast commit ineligible */
|
||||
EXT4_MF_FC_INELIGIBLE, /* Fast commit ineligible */
|
||||
EXT4_MF_JOURNAL_DESTROY /* Journal is in process of destroying */
|
||||
};
|
||||
|
||||
static inline void ext4_set_mount_flag(struct super_block *sb, int bit)
|
||||
@ -2232,15 +2238,32 @@ extern int ext4_feature_set_ok(struct super_block *sb, int readonly);
|
||||
/*
|
||||
* Superblock flags
|
||||
*/
|
||||
#define EXT4_FLAGS_RESIZING 0
|
||||
#define EXT4_FLAGS_SHUTDOWN 1
|
||||
#define EXT4_FLAGS_BDEV_IS_DAX 2
|
||||
enum {
|
||||
EXT4_FLAGS_RESIZING, /* Avoid superblock update and resize race */
|
||||
EXT4_FLAGS_SHUTDOWN, /* Prevent access to the file system */
|
||||
EXT4_FLAGS_BDEV_IS_DAX, /* Current block device support DAX */
|
||||
EXT4_FLAGS_EMERGENCY_RO,/* Emergency read-only due to fs errors */
|
||||
};
|
||||
|
||||
static inline int ext4_forced_shutdown(struct super_block *sb)
|
||||
{
|
||||
return test_bit(EXT4_FLAGS_SHUTDOWN, &EXT4_SB(sb)->s_ext4_flags);
|
||||
}
|
||||
|
||||
static inline int ext4_emergency_ro(struct super_block *sb)
|
||||
{
|
||||
return test_bit(EXT4_FLAGS_EMERGENCY_RO, &EXT4_SB(sb)->s_ext4_flags);
|
||||
}
|
||||
|
||||
static inline int ext4_emergency_state(struct super_block *sb)
|
||||
{
|
||||
if (unlikely(ext4_forced_shutdown(sb)))
|
||||
return -EIO;
|
||||
if (unlikely(ext4_emergency_ro(sb)))
|
||||
return -EROFS;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Default values for user and/or group using reserved blocks
|
||||
*/
|
||||
@ -2277,6 +2300,13 @@ static inline int ext4_forced_shutdown(struct super_block *sb)
|
||||
#define EXT4_DEF_MIN_BATCH_TIME 0
|
||||
#define EXT4_DEF_MAX_BATCH_TIME 15000 /* 15ms */
|
||||
|
||||
/*
|
||||
* Default values for superblock update
|
||||
*/
|
||||
#define EXT4_DEF_SB_UPDATE_INTERVAL_SEC (3600) /* seconds (1 hour) */
|
||||
#define EXT4_DEF_SB_UPDATE_INTERVAL_KB (16384) /* kilobytes (16MB) */
|
||||
|
||||
|
||||
/*
|
||||
* Minimum number of groups in a flexgroup before we separate out
|
||||
* directories into the first block group of a flexgroup
|
||||
@ -2810,8 +2840,7 @@ extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
|
||||
struct ext4_dir_entry_2 *dirent,
|
||||
struct fscrypt_str *ent_name);
|
||||
extern void ext4_htree_free_dir_info(struct dir_private_info *p);
|
||||
extern int ext4_find_dest_de(struct inode *dir, struct inode *inode,
|
||||
struct buffer_head *bh,
|
||||
extern int ext4_find_dest_de(struct inode *dir, struct buffer_head *bh,
|
||||
void *buf, int buf_size,
|
||||
struct ext4_filename *fname,
|
||||
struct ext4_dir_entry_2 **dest_de);
|
||||
@ -3001,6 +3030,8 @@ extern int ext4_inode_attach_jinode(struct inode *inode);
|
||||
extern int ext4_can_truncate(struct inode *inode);
|
||||
extern int ext4_truncate(struct inode *);
|
||||
extern int ext4_break_layouts(struct inode *);
|
||||
extern int ext4_truncate_page_cache_block_range(struct inode *inode,
|
||||
loff_t start, loff_t end);
|
||||
extern int ext4_punch_hole(struct file *file, loff_t offset, loff_t length);
|
||||
extern void ext4_set_inode_flags(struct inode *, bool init);
|
||||
extern int ext4_alloc_da_blocks(struct inode *inode);
|
||||
@ -3259,14 +3290,10 @@ extern void ext4_group_desc_csum_set(struct super_block *sb, __u32 group,
|
||||
extern int ext4_register_li_request(struct super_block *sb,
|
||||
ext4_group_t first_not_zeroed);
|
||||
|
||||
static inline int ext4_has_metadata_csum(struct super_block *sb)
|
||||
{
|
||||
return ext4_has_feature_metadata_csum(sb);
|
||||
}
|
||||
|
||||
static inline int ext4_has_group_desc_csum(struct super_block *sb)
|
||||
{
|
||||
return ext4_has_feature_gdt_csum(sb) || ext4_has_metadata_csum(sb);
|
||||
return ext4_has_feature_gdt_csum(sb) ||
|
||||
ext4_has_feature_metadata_csum(sb);
|
||||
}
|
||||
|
||||
#define ext4_read_incompat_64bit_val(es, name) \
|
||||
@ -3546,11 +3573,11 @@ extern int ext4_try_to_write_inline_data(struct address_space *mapping,
|
||||
struct folio **foliop);
|
||||
int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len,
|
||||
unsigned copied, struct folio *folio);
|
||||
extern int ext4_da_write_inline_data_begin(struct address_space *mapping,
|
||||
struct inode *inode,
|
||||
loff_t pos, unsigned len,
|
||||
struct folio **foliop,
|
||||
void **fsdata);
|
||||
extern int ext4_generic_write_inline_data(struct address_space *mapping,
|
||||
struct inode *inode,
|
||||
loff_t pos, unsigned len,
|
||||
struct folio **foliop,
|
||||
void **fsdata, bool da);
|
||||
extern int ext4_try_add_inline_entry(handle_t *handle,
|
||||
struct ext4_filename *fname,
|
||||
struct inode *dir, struct inode *inode);
|
||||
@ -3785,34 +3812,19 @@ static inline void set_bitmap_uptodate(struct buffer_head *bh)
|
||||
set_bit(BH_BITMAP_UPTODATE, &(bh)->b_state);
|
||||
}
|
||||
|
||||
/* For ioend & aio unwritten conversion wait queues */
|
||||
#define EXT4_WQ_HASH_SZ 37
|
||||
#define ext4_ioend_wq(v) (&ext4__ioend_wq[((unsigned long)(v)) %\
|
||||
EXT4_WQ_HASH_SZ])
|
||||
extern wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
|
||||
|
||||
extern int ext4_resize_begin(struct super_block *sb);
|
||||
extern int ext4_resize_end(struct super_block *sb, bool update_backups);
|
||||
|
||||
static inline void ext4_set_io_unwritten_flag(struct inode *inode,
|
||||
struct ext4_io_end *io_end)
|
||||
static inline void ext4_set_io_unwritten_flag(struct ext4_io_end *io_end)
|
||||
{
|
||||
if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
|
||||
if (!(io_end->flag & EXT4_IO_END_UNWRITTEN))
|
||||
io_end->flag |= EXT4_IO_END_UNWRITTEN;
|
||||
atomic_inc(&EXT4_I(inode)->i_unwritten);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end)
|
||||
{
|
||||
struct inode *inode = io_end->inode;
|
||||
|
||||
if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
|
||||
if (io_end->flag & EXT4_IO_END_UNWRITTEN)
|
||||
io_end->flag &= ~EXT4_IO_END_UNWRITTEN;
|
||||
/* Wake up anyone waiting on unwritten extent conversion */
|
||||
if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten))
|
||||
wake_up_all(ext4_ioend_wq(inode));
|
||||
}
|
||||
}
|
||||
|
||||
extern const struct iomap_ops ext4_iomap_ops;
|
||||
|
@ -63,12 +63,14 @@ static void ext4_put_nojournal(handle_t *handle)
|
||||
*/
|
||||
static int ext4_journal_check_start(struct super_block *sb)
|
||||
{
|
||||
int ret;
|
||||
journal_t *journal;
|
||||
|
||||
might_sleep();
|
||||
|
||||
if (unlikely(ext4_forced_shutdown(sb)))
|
||||
return -EIO;
|
||||
ret = ext4_emergency_state(sb);
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
|
||||
if (WARN_ON_ONCE(sb_rdonly(sb)))
|
||||
return -EROFS;
|
||||
@ -244,7 +246,8 @@ int __ext4_journal_get_write_access(const char *where, unsigned int line,
|
||||
}
|
||||
} else
|
||||
ext4_check_bdev_write_error(sb);
|
||||
if (trigger_type == EXT4_JTR_NONE || !ext4_has_metadata_csum(sb))
|
||||
if (trigger_type == EXT4_JTR_NONE ||
|
||||
!ext4_has_feature_metadata_csum(sb))
|
||||
return 0;
|
||||
BUG_ON(trigger_type >= EXT4_JOURNAL_TRIGGER_COUNT);
|
||||
jbd2_journal_set_triggers(bh,
|
||||
@ -331,7 +334,8 @@ int __ext4_journal_get_create_access(const char *where, unsigned int line,
|
||||
err);
|
||||
return err;
|
||||
}
|
||||
if (trigger_type == EXT4_JTR_NONE || !ext4_has_metadata_csum(sb))
|
||||
if (trigger_type == EXT4_JTR_NONE ||
|
||||
!ext4_has_feature_metadata_csum(sb))
|
||||
return 0;
|
||||
BUG_ON(trigger_type >= EXT4_JOURNAL_TRIGGER_COUNT);
|
||||
jbd2_journal_set_triggers(bh,
|
||||
|
@ -122,90 +122,6 @@
|
||||
#define EXT4_HT_EXT_CONVERT 11
|
||||
#define EXT4_HT_MAX 12
|
||||
|
||||
/**
|
||||
* struct ext4_journal_cb_entry - Base structure for callback information.
|
||||
*
|
||||
* This struct is a 'seed' structure for a using with your own callback
|
||||
* structs. If you are using callbacks you must allocate one of these
|
||||
* or another struct of your own definition which has this struct
|
||||
* as it's first element and pass it to ext4_journal_callback_add().
|
||||
*/
|
||||
struct ext4_journal_cb_entry {
|
||||
/* list information for other callbacks attached to the same handle */
|
||||
struct list_head jce_list;
|
||||
|
||||
/* Function to call with this callback structure */
|
||||
void (*jce_func)(struct super_block *sb,
|
||||
struct ext4_journal_cb_entry *jce, int error);
|
||||
|
||||
/* user data goes here */
|
||||
};
|
||||
|
||||
/**
|
||||
* ext4_journal_callback_add: add a function to call after transaction commit
|
||||
* @handle: active journal transaction handle to register callback on
|
||||
* @func: callback function to call after the transaction has committed:
|
||||
* @sb: superblock of current filesystem for transaction
|
||||
* @jce: returned journal callback data
|
||||
* @rc: journal state at commit (0 = transaction committed properly)
|
||||
* @jce: journal callback data (internal and function private data struct)
|
||||
*
|
||||
* The registered function will be called in the context of the journal thread
|
||||
* after the transaction for which the handle was created has completed.
|
||||
*
|
||||
* No locks are held when the callback function is called, so it is safe to
|
||||
* call blocking functions from within the callback, but the callback should
|
||||
* not block or run for too long, or the filesystem will be blocked waiting for
|
||||
* the next transaction to commit. No journaling functions can be used, or
|
||||
* there is a risk of deadlock.
|
||||
*
|
||||
* There is no guaranteed calling order of multiple registered callbacks on
|
||||
* the same transaction.
|
||||
*/
|
||||
static inline void _ext4_journal_callback_add(handle_t *handle,
|
||||
struct ext4_journal_cb_entry *jce)
|
||||
{
|
||||
/* Add the jce to transaction's private list */
|
||||
list_add_tail(&jce->jce_list, &handle->h_transaction->t_private_list);
|
||||
}
|
||||
|
||||
static inline void ext4_journal_callback_add(handle_t *handle,
|
||||
void (*func)(struct super_block *sb,
|
||||
struct ext4_journal_cb_entry *jce,
|
||||
int rc),
|
||||
struct ext4_journal_cb_entry *jce)
|
||||
{
|
||||
struct ext4_sb_info *sbi =
|
||||
EXT4_SB(handle->h_transaction->t_journal->j_private);
|
||||
|
||||
/* Add the jce to transaction's private list */
|
||||
jce->jce_func = func;
|
||||
spin_lock(&sbi->s_md_lock);
|
||||
_ext4_journal_callback_add(handle, jce);
|
||||
spin_unlock(&sbi->s_md_lock);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* ext4_journal_callback_del: delete a registered callback
|
||||
* @handle: active journal transaction handle on which callback was registered
|
||||
* @jce: registered journal callback entry to unregister
|
||||
* Return true if object was successfully removed
|
||||
*/
|
||||
static inline bool ext4_journal_callback_try_del(handle_t *handle,
|
||||
struct ext4_journal_cb_entry *jce)
|
||||
{
|
||||
bool deleted;
|
||||
struct ext4_sb_info *sbi =
|
||||
EXT4_SB(handle->h_transaction->t_journal->j_private);
|
||||
|
||||
spin_lock(&sbi->s_md_lock);
|
||||
deleted = !list_empty(&jce->jce_list);
|
||||
list_del_init(&jce->jce_list);
|
||||
spin_unlock(&sbi->s_md_lock);
|
||||
return deleted;
|
||||
}
|
||||
|
||||
int
|
||||
ext4_mark_iloc_dirty(handle_t *handle,
|
||||
struct inode *inode,
|
||||
@ -513,4 +429,33 @@ static inline int ext4_should_dioread_nolock(struct inode *inode)
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Pass journal explicitly as it may not be cached in the sbi->s_journal in some
|
||||
* cases
|
||||
*/
|
||||
static inline int ext4_journal_destroy(struct ext4_sb_info *sbi, journal_t *journal)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
/*
|
||||
* At this point only two things can be operating on the journal.
|
||||
* JBD2 thread performing transaction commit and s_sb_upd_work
|
||||
* issuing sb update through the journal. Once we set
|
||||
* EXT4_JOURNAL_DESTROY, new ext4_handle_error() calls will not
|
||||
* queue s_sb_upd_work and ext4_force_commit() makes sure any
|
||||
* ext4_handle_error() calls from the running transaction commit are
|
||||
* finished. Hence no new s_sb_upd_work can be queued after we
|
||||
* flush it here.
|
||||
*/
|
||||
ext4_set_mount_flag(sbi->s_sb, EXT4_MF_JOURNAL_DESTROY);
|
||||
|
||||
ext4_force_commit(sbi->s_sb);
|
||||
flush_work(&sbi->s_sb_upd_work);
|
||||
|
||||
err = jbd2_journal_destroy(journal);
|
||||
sbi->s_journal = NULL;
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
#endif /* _EXT4_JBD2_H */
|
||||
|
@ -63,7 +63,7 @@ static int ext4_extent_block_csum_verify(struct inode *inode,
|
||||
{
|
||||
struct ext4_extent_tail *et;
|
||||
|
||||
if (!ext4_has_metadata_csum(inode->i_sb))
|
||||
if (!ext4_has_feature_metadata_csum(inode->i_sb))
|
||||
return 1;
|
||||
|
||||
et = find_ext4_extent_tail(eh);
|
||||
@ -77,7 +77,7 @@ static void ext4_extent_block_csum_set(struct inode *inode,
|
||||
{
|
||||
struct ext4_extent_tail *et;
|
||||
|
||||
if (!ext4_has_metadata_csum(inode->i_sb))
|
||||
if (!ext4_has_feature_metadata_csum(inode->i_sb))
|
||||
return;
|
||||
|
||||
et = find_ext4_extent_tail(eh);
|
||||
@ -4568,131 +4568,65 @@ static long ext4_zero_range(struct file *file, loff_t offset,
|
||||
loff_t len, int mode)
|
||||
{
|
||||
struct inode *inode = file_inode(file);
|
||||
struct address_space *mapping = file->f_mapping;
|
||||
handle_t *handle = NULL;
|
||||
unsigned int max_blocks;
|
||||
loff_t new_size = 0;
|
||||
int ret = 0;
|
||||
int flags;
|
||||
int credits;
|
||||
int partial_begin, partial_end;
|
||||
loff_t start, end;
|
||||
ext4_lblk_t lblk;
|
||||
loff_t end = offset + len;
|
||||
ext4_lblk_t start_lblk, end_lblk;
|
||||
unsigned int blocksize = i_blocksize(inode);
|
||||
unsigned int blkbits = inode->i_blkbits;
|
||||
int ret, flags, credits;
|
||||
|
||||
trace_ext4_zero_range(inode, offset, len, mode);
|
||||
WARN_ON_ONCE(!inode_is_locked(inode));
|
||||
|
||||
/*
|
||||
* Round up offset. This is not fallocate, we need to zero out
|
||||
* blocks, so convert interior block aligned part of the range to
|
||||
* unwritten and possibly manually zero out unaligned parts of the
|
||||
* range. Here, start and partial_begin are inclusive, end and
|
||||
* partial_end are exclusive.
|
||||
*/
|
||||
start = round_up(offset, 1 << blkbits);
|
||||
end = round_down((offset + len), 1 << blkbits);
|
||||
|
||||
if (start < offset || end > offset + len)
|
||||
return -EINVAL;
|
||||
partial_begin = offset & ((1 << blkbits) - 1);
|
||||
partial_end = (offset + len) & ((1 << blkbits) - 1);
|
||||
|
||||
lblk = start >> blkbits;
|
||||
max_blocks = (end >> blkbits);
|
||||
if (max_blocks < lblk)
|
||||
max_blocks = 0;
|
||||
else
|
||||
max_blocks -= lblk;
|
||||
|
||||
inode_lock(inode);
|
||||
|
||||
/*
|
||||
* Indirect files do not support unwritten extents
|
||||
*/
|
||||
if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
|
||||
ret = -EOPNOTSUPP;
|
||||
goto out_mutex;
|
||||
}
|
||||
/* Indirect files do not support unwritten extents */
|
||||
if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (!(mode & FALLOC_FL_KEEP_SIZE) &&
|
||||
(offset + len > inode->i_size ||
|
||||
offset + len > EXT4_I(inode)->i_disksize)) {
|
||||
new_size = offset + len;
|
||||
(end > inode->i_size || end > EXT4_I(inode)->i_disksize)) {
|
||||
new_size = end;
|
||||
ret = inode_newsize_ok(inode, new_size);
|
||||
if (ret)
|
||||
goto out_mutex;
|
||||
return ret;
|
||||
}
|
||||
|
||||
flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT;
|
||||
|
||||
/* Wait all existing dio workers, newcomers will block on i_rwsem */
|
||||
inode_dio_wait(inode);
|
||||
|
||||
ret = file_modified(file);
|
||||
if (ret)
|
||||
goto out_mutex;
|
||||
|
||||
/* Preallocate the range including the unaligned edges */
|
||||
if (partial_begin || partial_end) {
|
||||
ret = ext4_alloc_file_blocks(file,
|
||||
round_down(offset, 1 << blkbits) >> blkbits,
|
||||
(round_up((offset + len), 1 << blkbits) -
|
||||
round_down(offset, 1 << blkbits)) >> blkbits,
|
||||
new_size, flags);
|
||||
if (ret)
|
||||
goto out_mutex;
|
||||
if (!IS_ALIGNED(offset | end, blocksize)) {
|
||||
ext4_lblk_t alloc_lblk = offset >> blkbits;
|
||||
ext4_lblk_t len_lblk = EXT4_MAX_BLOCKS(len, offset, blkbits);
|
||||
|
||||
ret = ext4_alloc_file_blocks(file, alloc_lblk, len_lblk,
|
||||
new_size, flags);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = ext4_update_disksize_before_punch(inode, offset, len);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* Now release the pages and zero block aligned part of pages */
|
||||
ret = ext4_truncate_page_cache_block_range(inode, offset, end);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* Zero range excluding the unaligned edges */
|
||||
if (max_blocks > 0) {
|
||||
flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN |
|
||||
EXT4_EX_NOCACHE);
|
||||
start_lblk = EXT4_B_TO_LBLK(inode, offset);
|
||||
end_lblk = end >> blkbits;
|
||||
if (end_lblk > start_lblk) {
|
||||
ext4_lblk_t zero_blks = end_lblk - start_lblk;
|
||||
|
||||
/*
|
||||
* Prevent page faults from reinstantiating pages we have
|
||||
* released from page cache.
|
||||
*/
|
||||
filemap_invalidate_lock(mapping);
|
||||
|
||||
ret = ext4_break_layouts(inode);
|
||||
if (ret) {
|
||||
filemap_invalidate_unlock(mapping);
|
||||
goto out_mutex;
|
||||
}
|
||||
|
||||
ret = ext4_update_disksize_before_punch(inode, offset, len);
|
||||
if (ret) {
|
||||
filemap_invalidate_unlock(mapping);
|
||||
goto out_mutex;
|
||||
}
|
||||
|
||||
/*
|
||||
* For journalled data we need to write (and checkpoint) pages
|
||||
* before discarding page cache to avoid inconsitent data on
|
||||
* disk in case of crash before zeroing trans is committed.
|
||||
*/
|
||||
if (ext4_should_journal_data(inode)) {
|
||||
ret = filemap_write_and_wait_range(mapping, start,
|
||||
end - 1);
|
||||
if (ret) {
|
||||
filemap_invalidate_unlock(mapping);
|
||||
goto out_mutex;
|
||||
}
|
||||
}
|
||||
|
||||
/* Now release the pages and zero block aligned part of pages */
|
||||
truncate_pagecache_range(inode, start, end - 1);
|
||||
inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
|
||||
|
||||
ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
|
||||
flags);
|
||||
filemap_invalidate_unlock(mapping);
|
||||
flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN | EXT4_EX_NOCACHE);
|
||||
ret = ext4_alloc_file_blocks(file, start_lblk, zero_blks,
|
||||
new_size, flags);
|
||||
if (ret)
|
||||
goto out_mutex;
|
||||
return ret;
|
||||
}
|
||||
if (!partial_begin && !partial_end)
|
||||
goto out_mutex;
|
||||
/* Finish zeroing out if it doesn't contain partial block */
|
||||
if (IS_ALIGNED(offset | end, blocksize))
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* In worst case we have to writeout two nonadjacent unwritten
|
||||
@ -4705,27 +4639,69 @@ static long ext4_zero_range(struct file *file, loff_t offset,
|
||||
if (IS_ERR(handle)) {
|
||||
ret = PTR_ERR(handle);
|
||||
ext4_std_error(inode->i_sb, ret);
|
||||
goto out_mutex;
|
||||
return ret;
|
||||
}
|
||||
|
||||
inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
|
||||
/* Zero out partial block at the edges of the range */
|
||||
ret = ext4_zero_partial_blocks(handle, inode, offset, len);
|
||||
if (ret)
|
||||
goto out_handle;
|
||||
|
||||
if (new_size)
|
||||
ext4_update_inode_size(inode, new_size);
|
||||
ret = ext4_mark_inode_dirty(handle, inode);
|
||||
if (unlikely(ret))
|
||||
goto out_handle;
|
||||
/* Zero out partial block at the edges of the range */
|
||||
ret = ext4_zero_partial_blocks(handle, inode, offset, len);
|
||||
if (ret >= 0)
|
||||
ext4_update_inode_fsync_trans(handle, inode, 1);
|
||||
|
||||
ext4_update_inode_fsync_trans(handle, inode, 1);
|
||||
if (file->f_flags & O_SYNC)
|
||||
ext4_handle_sync(handle);
|
||||
|
||||
out_handle:
|
||||
ext4_journal_stop(handle);
|
||||
out_mutex:
|
||||
inode_unlock(inode);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static long ext4_do_fallocate(struct file *file, loff_t offset,
|
||||
loff_t len, int mode)
|
||||
{
|
||||
struct inode *inode = file_inode(file);
|
||||
loff_t end = offset + len;
|
||||
loff_t new_size = 0;
|
||||
ext4_lblk_t start_lblk, len_lblk;
|
||||
int ret;
|
||||
|
||||
trace_ext4_fallocate_enter(inode, offset, len, mode);
|
||||
WARN_ON_ONCE(!inode_is_locked(inode));
|
||||
|
||||
start_lblk = offset >> inode->i_blkbits;
|
||||
len_lblk = EXT4_MAX_BLOCKS(len, offset, inode->i_blkbits);
|
||||
|
||||
/* We only support preallocation for extent-based files only. */
|
||||
if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
|
||||
ret = -EOPNOTSUPP;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!(mode & FALLOC_FL_KEEP_SIZE) &&
|
||||
(end > inode->i_size || end > EXT4_I(inode)->i_disksize)) {
|
||||
new_size = end;
|
||||
ret = inode_newsize_ok(inode, new_size);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = ext4_alloc_file_blocks(file, start_lblk, len_lblk, new_size,
|
||||
EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
if (file->f_flags & O_SYNC && EXT4_SB(inode->i_sb)->s_journal) {
|
||||
ret = ext4_fc_commit(EXT4_SB(inode->i_sb)->s_journal,
|
||||
EXT4_I(inode)->i_sync_tid);
|
||||
}
|
||||
out:
|
||||
trace_ext4_fallocate_exit(inode, offset, len_lblk, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -4739,12 +4715,8 @@ out_mutex:
|
||||
long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
|
||||
{
|
||||
struct inode *inode = file_inode(file);
|
||||
loff_t new_size = 0;
|
||||
unsigned int max_blocks;
|
||||
int ret = 0;
|
||||
int flags;
|
||||
ext4_lblk_t lblk;
|
||||
unsigned int blkbits = inode->i_blkbits;
|
||||
struct address_space *mapping = file->f_mapping;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* Encrypted inodes can't handle collapse range or insert
|
||||
@ -4764,73 +4736,47 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
|
||||
|
||||
inode_lock(inode);
|
||||
ret = ext4_convert_inline_data(inode);
|
||||
inode_unlock(inode);
|
||||
if (ret)
|
||||
goto exit;
|
||||
|
||||
if (mode & FALLOC_FL_PUNCH_HOLE) {
|
||||
ret = ext4_punch_hole(file, offset, len);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
if (mode & FALLOC_FL_COLLAPSE_RANGE) {
|
||||
ret = ext4_collapse_range(file, offset, len);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
if (mode & FALLOC_FL_INSERT_RANGE) {
|
||||
ret = ext4_insert_range(file, offset, len);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
if (mode & FALLOC_FL_ZERO_RANGE) {
|
||||
ret = ext4_zero_range(file, offset, len, mode);
|
||||
goto exit;
|
||||
}
|
||||
trace_ext4_fallocate_enter(inode, offset, len, mode);
|
||||
lblk = offset >> blkbits;
|
||||
|
||||
max_blocks = EXT4_MAX_BLOCKS(len, offset, blkbits);
|
||||
flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT;
|
||||
|
||||
inode_lock(inode);
|
||||
|
||||
/*
|
||||
* We only support preallocation for extent-based files only
|
||||
*/
|
||||
if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
|
||||
ret = -EOPNOTSUPP;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!(mode & FALLOC_FL_KEEP_SIZE) &&
|
||||
(offset + len > inode->i_size ||
|
||||
offset + len > EXT4_I(inode)->i_disksize)) {
|
||||
new_size = offset + len;
|
||||
ret = inode_newsize_ok(inode, new_size);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
goto out_inode_lock;
|
||||
|
||||
/* Wait all existing dio workers, newcomers will block on i_rwsem */
|
||||
inode_dio_wait(inode);
|
||||
|
||||
ret = file_modified(file);
|
||||
if (ret)
|
||||
goto out;
|
||||
goto out_inode_lock;
|
||||
|
||||
ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, flags);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
if (file->f_flags & O_SYNC && EXT4_SB(inode->i_sb)->s_journal) {
|
||||
ret = ext4_fc_commit(EXT4_SB(inode->i_sb)->s_journal,
|
||||
EXT4_I(inode)->i_sync_tid);
|
||||
if ((mode & FALLOC_FL_MODE_MASK) == FALLOC_FL_ALLOCATE_RANGE) {
|
||||
ret = ext4_do_fallocate(file, offset, len, mode);
|
||||
goto out_inode_lock;
|
||||
}
|
||||
out:
|
||||
|
||||
/*
|
||||
* Follow-up operations will drop page cache, hold invalidate lock
|
||||
* to prevent page faults from reinstantiating pages we have
|
||||
* released from page cache.
|
||||
*/
|
||||
filemap_invalidate_lock(mapping);
|
||||
|
||||
ret = ext4_break_layouts(inode);
|
||||
if (ret)
|
||||
goto out_invalidate_lock;
|
||||
|
||||
if (mode & FALLOC_FL_PUNCH_HOLE)
|
||||
ret = ext4_punch_hole(file, offset, len);
|
||||
else if (mode & FALLOC_FL_COLLAPSE_RANGE)
|
||||
ret = ext4_collapse_range(file, offset, len);
|
||||
else if (mode & FALLOC_FL_INSERT_RANGE)
|
||||
ret = ext4_insert_range(file, offset, len);
|
||||
else if (mode & FALLOC_FL_ZERO_RANGE)
|
||||
ret = ext4_zero_range(file, offset, len, mode);
|
||||
else
|
||||
ret = -EOPNOTSUPP;
|
||||
|
||||
out_invalidate_lock:
|
||||
filemap_invalidate_unlock(mapping);
|
||||
out_inode_lock:
|
||||
inode_unlock(inode);
|
||||
trace_ext4_fallocate_exit(inode, offset, max_blocks, ret);
|
||||
exit:
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -5332,109 +5278,72 @@ static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len)
|
||||
struct inode *inode = file_inode(file);
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct address_space *mapping = inode->i_mapping;
|
||||
ext4_lblk_t punch_start, punch_stop;
|
||||
loff_t end = offset + len;
|
||||
ext4_lblk_t start_lblk, end_lblk;
|
||||
handle_t *handle;
|
||||
unsigned int credits;
|
||||
loff_t new_size, ioffset;
|
||||
loff_t start, new_size;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* We need to test this early because xfstests assumes that a
|
||||
* collapse range of (0, 1) will return EOPNOTSUPP if the file
|
||||
* system does not support collapse range.
|
||||
*/
|
||||
trace_ext4_collapse_range(inode, offset, len);
|
||||
WARN_ON_ONCE(!inode_is_locked(inode));
|
||||
|
||||
/* Currently just for extent based files */
|
||||
if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
/* Collapse range works only on fs cluster size aligned regions. */
|
||||
if (!IS_ALIGNED(offset | len, EXT4_CLUSTER_SIZE(sb)))
|
||||
return -EINVAL;
|
||||
|
||||
trace_ext4_collapse_range(inode, offset, len);
|
||||
|
||||
punch_start = offset >> EXT4_BLOCK_SIZE_BITS(sb);
|
||||
punch_stop = (offset + len) >> EXT4_BLOCK_SIZE_BITS(sb);
|
||||
|
||||
inode_lock(inode);
|
||||
/*
|
||||
* There is no need to overlap collapse range with EOF, in which case
|
||||
* it is effectively a truncate operation
|
||||
*/
|
||||
if (offset + len >= inode->i_size) {
|
||||
ret = -EINVAL;
|
||||
goto out_mutex;
|
||||
}
|
||||
|
||||
/* Currently just for extent based files */
|
||||
if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
|
||||
ret = -EOPNOTSUPP;
|
||||
goto out_mutex;
|
||||
}
|
||||
|
||||
/* Wait for existing dio to complete */
|
||||
inode_dio_wait(inode);
|
||||
|
||||
ret = file_modified(file);
|
||||
if (ret)
|
||||
goto out_mutex;
|
||||
|
||||
/*
|
||||
* Prevent page faults from reinstantiating pages we have released from
|
||||
* page cache.
|
||||
*/
|
||||
filemap_invalidate_lock(mapping);
|
||||
|
||||
ret = ext4_break_layouts(inode);
|
||||
if (ret)
|
||||
goto out_mmap;
|
||||
if (end >= inode->i_size)
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* Write tail of the last page before removed range and data that
|
||||
* will be shifted since they will get removed from the page cache
|
||||
* below. We are also protected from pages becoming dirty by
|
||||
* i_rwsem and invalidate_lock.
|
||||
* Need to round down offset to be aligned with page size boundary
|
||||
* for page size > block size.
|
||||
*/
|
||||
ioffset = round_down(offset, PAGE_SIZE);
|
||||
/*
|
||||
* Write tail of the last page before removed range since it will get
|
||||
* removed from the page cache below.
|
||||
*/
|
||||
ret = filemap_write_and_wait_range(mapping, ioffset, offset);
|
||||
start = round_down(offset, PAGE_SIZE);
|
||||
ret = filemap_write_and_wait_range(mapping, start, offset);
|
||||
if (!ret)
|
||||
ret = filemap_write_and_wait_range(mapping, end, LLONG_MAX);
|
||||
if (ret)
|
||||
goto out_mmap;
|
||||
/*
|
||||
* Write data that will be shifted to preserve them when discarding
|
||||
* page cache below. We are also protected from pages becoming dirty
|
||||
* by i_rwsem and invalidate_lock.
|
||||
*/
|
||||
ret = filemap_write_and_wait_range(mapping, offset + len,
|
||||
LLONG_MAX);
|
||||
if (ret)
|
||||
goto out_mmap;
|
||||
truncate_pagecache(inode, ioffset);
|
||||
return ret;
|
||||
|
||||
truncate_pagecache(inode, start);
|
||||
|
||||
credits = ext4_writepage_trans_blocks(inode);
|
||||
handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
|
||||
if (IS_ERR(handle)) {
|
||||
ret = PTR_ERR(handle);
|
||||
goto out_mmap;
|
||||
}
|
||||
if (IS_ERR(handle))
|
||||
return PTR_ERR(handle);
|
||||
|
||||
ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE, handle);
|
||||
|
||||
start_lblk = offset >> inode->i_blkbits;
|
||||
end_lblk = (offset + len) >> inode->i_blkbits;
|
||||
|
||||
down_write(&EXT4_I(inode)->i_data_sem);
|
||||
ext4_discard_preallocations(inode);
|
||||
ext4_es_remove_extent(inode, punch_start, EXT_MAX_BLOCKS - punch_start);
|
||||
ext4_es_remove_extent(inode, start_lblk, EXT_MAX_BLOCKS - start_lblk);
|
||||
|
||||
ret = ext4_ext_remove_space(inode, punch_start, punch_stop - 1);
|
||||
ret = ext4_ext_remove_space(inode, start_lblk, end_lblk - 1);
|
||||
if (ret) {
|
||||
up_write(&EXT4_I(inode)->i_data_sem);
|
||||
goto out_stop;
|
||||
goto out_handle;
|
||||
}
|
||||
ext4_discard_preallocations(inode);
|
||||
|
||||
ret = ext4_ext_shift_extents(inode, handle, punch_stop,
|
||||
punch_stop - punch_start, SHIFT_LEFT);
|
||||
ret = ext4_ext_shift_extents(inode, handle, end_lblk,
|
||||
end_lblk - start_lblk, SHIFT_LEFT);
|
||||
if (ret) {
|
||||
up_write(&EXT4_I(inode)->i_data_sem);
|
||||
goto out_stop;
|
||||
goto out_handle;
|
||||
}
|
||||
|
||||
new_size = inode->i_size - len;
|
||||
@ -5442,18 +5351,16 @@ static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len)
|
||||
EXT4_I(inode)->i_disksize = new_size;
|
||||
|
||||
up_write(&EXT4_I(inode)->i_data_sem);
|
||||
ret = ext4_mark_inode_dirty(handle, inode);
|
||||
if (ret)
|
||||
goto out_handle;
|
||||
|
||||
ext4_update_inode_fsync_trans(handle, inode, 1);
|
||||
if (IS_SYNC(inode))
|
||||
ext4_handle_sync(handle);
|
||||
inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
|
||||
ret = ext4_mark_inode_dirty(handle, inode);
|
||||
ext4_update_inode_fsync_trans(handle, inode, 1);
|
||||
|
||||
out_stop:
|
||||
out_handle:
|
||||
ext4_journal_stop(handle);
|
||||
out_mmap:
|
||||
filemap_invalidate_unlock(mapping);
|
||||
out_mutex:
|
||||
inode_unlock(inode);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -5473,100 +5380,63 @@ static int ext4_insert_range(struct file *file, loff_t offset, loff_t len)
|
||||
handle_t *handle;
|
||||
struct ext4_ext_path *path;
|
||||
struct ext4_extent *extent;
|
||||
ext4_lblk_t offset_lblk, len_lblk, ee_start_lblk = 0;
|
||||
ext4_lblk_t start_lblk, len_lblk, ee_start_lblk = 0;
|
||||
unsigned int credits, ee_len;
|
||||
int ret = 0, depth, split_flag = 0;
|
||||
loff_t ioffset;
|
||||
int ret, depth, split_flag = 0;
|
||||
loff_t start;
|
||||
|
||||
/*
|
||||
* We need to test this early because xfstests assumes that an
|
||||
* insert range of (0, 1) will return EOPNOTSUPP if the file
|
||||
* system does not support insert range.
|
||||
*/
|
||||
trace_ext4_insert_range(inode, offset, len);
|
||||
WARN_ON_ONCE(!inode_is_locked(inode));
|
||||
|
||||
/* Currently just for extent based files */
|
||||
if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
/* Insert range works only on fs cluster size aligned regions. */
|
||||
if (!IS_ALIGNED(offset | len, EXT4_CLUSTER_SIZE(sb)))
|
||||
return -EINVAL;
|
||||
|
||||
trace_ext4_insert_range(inode, offset, len);
|
||||
|
||||
offset_lblk = offset >> EXT4_BLOCK_SIZE_BITS(sb);
|
||||
len_lblk = len >> EXT4_BLOCK_SIZE_BITS(sb);
|
||||
|
||||
inode_lock(inode);
|
||||
/* Currently just for extent based files */
|
||||
if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
|
||||
ret = -EOPNOTSUPP;
|
||||
goto out_mutex;
|
||||
}
|
||||
|
||||
/* Check whether the maximum file size would be exceeded */
|
||||
if (len > inode->i_sb->s_maxbytes - inode->i_size) {
|
||||
ret = -EFBIG;
|
||||
goto out_mutex;
|
||||
}
|
||||
|
||||
/* Offset must be less than i_size */
|
||||
if (offset >= inode->i_size) {
|
||||
ret = -EINVAL;
|
||||
goto out_mutex;
|
||||
}
|
||||
|
||||
/* Wait for existing dio to complete */
|
||||
inode_dio_wait(inode);
|
||||
|
||||
ret = file_modified(file);
|
||||
if (ret)
|
||||
goto out_mutex;
|
||||
if (offset >= inode->i_size)
|
||||
return -EINVAL;
|
||||
/* Check whether the maximum file size would be exceeded */
|
||||
if (len > inode->i_sb->s_maxbytes - inode->i_size)
|
||||
return -EFBIG;
|
||||
|
||||
/*
|
||||
* Prevent page faults from reinstantiating pages we have released from
|
||||
* page cache.
|
||||
* Write out all dirty pages. Need to round down to align start offset
|
||||
* to page size boundary for page size > block size.
|
||||
*/
|
||||
filemap_invalidate_lock(mapping);
|
||||
|
||||
ret = ext4_break_layouts(inode);
|
||||
start = round_down(offset, PAGE_SIZE);
|
||||
ret = filemap_write_and_wait_range(mapping, start, LLONG_MAX);
|
||||
if (ret)
|
||||
goto out_mmap;
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* Need to round down to align start offset to page size boundary
|
||||
* for page size > block size.
|
||||
*/
|
||||
ioffset = round_down(offset, PAGE_SIZE);
|
||||
/* Write out all dirty pages */
|
||||
ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
|
||||
LLONG_MAX);
|
||||
if (ret)
|
||||
goto out_mmap;
|
||||
truncate_pagecache(inode, ioffset);
|
||||
truncate_pagecache(inode, start);
|
||||
|
||||
credits = ext4_writepage_trans_blocks(inode);
|
||||
handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
|
||||
if (IS_ERR(handle)) {
|
||||
ret = PTR_ERR(handle);
|
||||
goto out_mmap;
|
||||
}
|
||||
if (IS_ERR(handle))
|
||||
return PTR_ERR(handle);
|
||||
|
||||
ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE, handle);
|
||||
|
||||
/* Expand file to avoid data loss if there is error while shifting */
|
||||
inode->i_size += len;
|
||||
EXT4_I(inode)->i_disksize += len;
|
||||
inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
|
||||
ret = ext4_mark_inode_dirty(handle, inode);
|
||||
if (ret)
|
||||
goto out_stop;
|
||||
goto out_handle;
|
||||
|
||||
start_lblk = offset >> inode->i_blkbits;
|
||||
len_lblk = len >> inode->i_blkbits;
|
||||
|
||||
down_write(&EXT4_I(inode)->i_data_sem);
|
||||
ext4_discard_preallocations(inode);
|
||||
|
||||
path = ext4_find_extent(inode, offset_lblk, NULL, 0);
|
||||
path = ext4_find_extent(inode, start_lblk, NULL, 0);
|
||||
if (IS_ERR(path)) {
|
||||
up_write(&EXT4_I(inode)->i_data_sem);
|
||||
ret = PTR_ERR(path);
|
||||
goto out_stop;
|
||||
goto out_handle;
|
||||
}
|
||||
|
||||
depth = ext_depth(inode);
|
||||
@ -5576,16 +5446,16 @@ static int ext4_insert_range(struct file *file, loff_t offset, loff_t len)
|
||||
ee_len = ext4_ext_get_actual_len(extent);
|
||||
|
||||
/*
|
||||
* If offset_lblk is not the starting block of extent, split
|
||||
* the extent @offset_lblk
|
||||
* If start_lblk is not the starting block of extent, split
|
||||
* the extent @start_lblk
|
||||
*/
|
||||
if ((offset_lblk > ee_start_lblk) &&
|
||||
(offset_lblk < (ee_start_lblk + ee_len))) {
|
||||
if ((start_lblk > ee_start_lblk) &&
|
||||
(start_lblk < (ee_start_lblk + ee_len))) {
|
||||
if (ext4_ext_is_unwritten(extent))
|
||||
split_flag = EXT4_EXT_MARK_UNWRIT1 |
|
||||
EXT4_EXT_MARK_UNWRIT2;
|
||||
path = ext4_split_extent_at(handle, inode, path,
|
||||
offset_lblk, split_flag,
|
||||
start_lblk, split_flag,
|
||||
EXT4_EX_NOCACHE |
|
||||
EXT4_GET_BLOCKS_PRE_IO |
|
||||
EXT4_GET_BLOCKS_METADATA_NOFAIL);
|
||||
@ -5594,32 +5464,29 @@ static int ext4_insert_range(struct file *file, loff_t offset, loff_t len)
|
||||
if (IS_ERR(path)) {
|
||||
up_write(&EXT4_I(inode)->i_data_sem);
|
||||
ret = PTR_ERR(path);
|
||||
goto out_stop;
|
||||
goto out_handle;
|
||||
}
|
||||
}
|
||||
|
||||
ext4_free_ext_path(path);
|
||||
ext4_es_remove_extent(inode, offset_lblk, EXT_MAX_BLOCKS - offset_lblk);
|
||||
ext4_es_remove_extent(inode, start_lblk, EXT_MAX_BLOCKS - start_lblk);
|
||||
|
||||
/*
|
||||
* if offset_lblk lies in a hole which is at start of file, use
|
||||
* if start_lblk lies in a hole which is at start of file, use
|
||||
* ee_start_lblk to shift extents
|
||||
*/
|
||||
ret = ext4_ext_shift_extents(inode, handle,
|
||||
max(ee_start_lblk, offset_lblk), len_lblk, SHIFT_RIGHT);
|
||||
|
||||
max(ee_start_lblk, start_lblk), len_lblk, SHIFT_RIGHT);
|
||||
up_write(&EXT4_I(inode)->i_data_sem);
|
||||
if (ret)
|
||||
goto out_handle;
|
||||
|
||||
ext4_update_inode_fsync_trans(handle, inode, 1);
|
||||
if (IS_SYNC(inode))
|
||||
ext4_handle_sync(handle);
|
||||
if (ret >= 0)
|
||||
ext4_update_inode_fsync_trans(handle, inode, 1);
|
||||
|
||||
out_stop:
|
||||
out_handle:
|
||||
ext4_journal_stop(handle);
|
||||
out_mmap:
|
||||
filemap_invalidate_unlock(mapping);
|
||||
out_mutex:
|
||||
inode_unlock(inode);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -1551,7 +1551,6 @@ retry:
|
||||
|
||||
ext4_es_print_tree(inode);
|
||||
ext4_da_release_space(inode, reserved);
|
||||
return;
|
||||
}
|
||||
|
||||
static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
|
||||
|
@ -688,10 +688,12 @@ out:
|
||||
static ssize_t
|
||||
ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
||||
{
|
||||
int ret;
|
||||
struct inode *inode = file_inode(iocb->ki_filp);
|
||||
|
||||
if (unlikely(ext4_forced_shutdown(inode->i_sb)))
|
||||
return -EIO;
|
||||
ret = ext4_emergency_state(inode->i_sb);
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
|
||||
#ifdef CONFIG_FS_DAX
|
||||
if (IS_DAX(inode))
|
||||
@ -700,7 +702,6 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
||||
|
||||
if (iocb->ki_flags & IOCB_ATOMIC) {
|
||||
size_t len = iov_iter_count(from);
|
||||
int ret;
|
||||
|
||||
if (len < EXT4_SB(inode->i_sb)->s_awu_min ||
|
||||
len > EXT4_SB(inode->i_sb)->s_awu_max)
|
||||
@ -800,11 +801,16 @@ static const struct vm_operations_struct ext4_file_vm_ops = {
|
||||
|
||||
static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
|
||||
{
|
||||
int ret;
|
||||
struct inode *inode = file->f_mapping->host;
|
||||
struct dax_device *dax_dev = EXT4_SB(inode->i_sb)->s_daxdev;
|
||||
|
||||
if (unlikely(ext4_forced_shutdown(inode->i_sb)))
|
||||
return -EIO;
|
||||
if (file->f_mode & FMODE_WRITE)
|
||||
ret = ext4_emergency_state(inode->i_sb);
|
||||
else
|
||||
ret = ext4_forced_shutdown(inode->i_sb) ? -EIO : 0;
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* We don't support synchronous mappings for non-DAX files and
|
||||
@ -835,7 +841,8 @@ static int ext4_sample_last_mounted(struct super_block *sb,
|
||||
if (likely(ext4_test_mount_flag(sb, EXT4_MF_MNTDIR_SAMPLED)))
|
||||
return 0;
|
||||
|
||||
if (sb_rdonly(sb) || !sb_start_intwrite_trylock(sb))
|
||||
if (ext4_emergency_state(sb) || sb_rdonly(sb) ||
|
||||
!sb_start_intwrite_trylock(sb))
|
||||
return 0;
|
||||
|
||||
ext4_set_mount_flag(sb, EXT4_MF_MNTDIR_SAMPLED);
|
||||
@ -878,8 +885,12 @@ static int ext4_file_open(struct inode *inode, struct file *filp)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (unlikely(ext4_forced_shutdown(inode->i_sb)))
|
||||
return -EIO;
|
||||
if (filp->f_mode & FMODE_WRITE)
|
||||
ret = ext4_emergency_state(inode->i_sb);
|
||||
else
|
||||
ret = ext4_forced_shutdown(inode->i_sb) ? -EIO : 0;
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
|
||||
ret = ext4_sample_last_mounted(inode->i_sb, filp->f_path.mnt);
|
||||
if (ret)
|
||||
|
@ -132,20 +132,16 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
|
||||
bool needs_barrier = false;
|
||||
struct inode *inode = file->f_mapping->host;
|
||||
|
||||
if (unlikely(ext4_forced_shutdown(inode->i_sb)))
|
||||
return -EIO;
|
||||
ret = ext4_emergency_state(inode->i_sb);
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
|
||||
ASSERT(ext4_journal_current_handle() == NULL);
|
||||
|
||||
trace_ext4_sync_file_enter(file, datasync);
|
||||
|
||||
if (sb_rdonly(inode->i_sb)) {
|
||||
/* Make sure that we read updated s_ext4_flags value */
|
||||
smp_rmb();
|
||||
if (ext4_forced_shutdown(inode->i_sb))
|
||||
ret = -EROFS;
|
||||
if (sb_rdonly(inode->i_sb))
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!EXT4_SB(inode->i_sb)->s_journal) {
|
||||
ret = ext4_fsync_nojournal(file, start, end, datasync,
|
||||
|
@ -302,7 +302,7 @@ int ext4fs_dirhash(const struct inode *dir, const char *name, int len,
|
||||
|
||||
if (len && IS_CASEFOLDED(dir) &&
|
||||
(!IS_ENCRYPTED(dir) || fscrypt_has_encryption_key(dir))) {
|
||||
buff = kzalloc(sizeof(char) * PATH_MAX, GFP_KERNEL);
|
||||
buff = kzalloc(PATH_MAX, GFP_KERNEL);
|
||||
if (!buff)
|
||||
return -ENOMEM;
|
||||
|
||||
|
@ -951,8 +951,9 @@ struct inode *__ext4_new_inode(struct mnt_idmap *idmap,
|
||||
sb = dir->i_sb;
|
||||
sbi = EXT4_SB(sb);
|
||||
|
||||
if (unlikely(ext4_forced_shutdown(sb)))
|
||||
return ERR_PTR(-EIO);
|
||||
ret2 = ext4_emergency_state(sb);
|
||||
if (unlikely(ret2))
|
||||
return ERR_PTR(ret2);
|
||||
|
||||
ngroups = ext4_get_groups_count(sb);
|
||||
trace_ext4_request_inode(dir, mode);
|
||||
@ -1282,7 +1283,7 @@ got:
|
||||
inode->i_generation = get_random_u32();
|
||||
|
||||
/* Precompute checksum seed for inode metadata */
|
||||
if (ext4_has_metadata_csum(sb)) {
|
||||
if (ext4_has_feature_metadata_csum(sb)) {
|
||||
__u32 csum;
|
||||
__le32 inum = cpu_to_le32(inode->i_ino);
|
||||
__le32 gen = cpu_to_le32(inode->i_generation);
|
||||
@ -1298,7 +1299,7 @@ got:
|
||||
ei->i_extra_isize = sbi->s_want_extra_isize;
|
||||
ei->i_inline_off = 0;
|
||||
if (ext4_has_feature_inline_data(sb) &&
|
||||
(!(ei->i_flags & EXT4_DAX_FL) || S_ISDIR(mode)))
|
||||
(!(ei->i_flags & (EXT4_DAX_FL|EXT4_EA_INODE_FL)) || S_ISDIR(mode)))
|
||||
ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
|
||||
ret = inode;
|
||||
err = dquot_alloc_inode(inode);
|
||||
|
265
fs/ext4/inline.c
265
fs/ext4/inline.c
@ -20,6 +20,11 @@
|
||||
#define EXT4_INLINE_DOTDOT_OFFSET 2
|
||||
#define EXT4_INLINE_DOTDOT_SIZE 4
|
||||
|
||||
|
||||
static int ext4_da_convert_inline_data_to_extent(struct address_space *mapping,
|
||||
struct inode *inode,
|
||||
void **fsdata);
|
||||
|
||||
static int ext4_get_inline_size(struct inode *inode)
|
||||
{
|
||||
if (EXT4_I(inode)->i_inline_off)
|
||||
@ -228,7 +233,7 @@ static void ext4_write_inline_data(struct inode *inode, struct ext4_iloc *iloc,
|
||||
struct ext4_inode *raw_inode;
|
||||
int cp_len = 0;
|
||||
|
||||
if (unlikely(ext4_forced_shutdown(inode->i_sb)))
|
||||
if (unlikely(ext4_emergency_state(inode->i_sb)))
|
||||
return;
|
||||
|
||||
BUG_ON(!EXT4_I(inode)->i_inline_off);
|
||||
@ -652,6 +657,95 @@ out_nofolio:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Prepare the write for the inline data.
|
||||
* If the data can be written into the inode, we just read
|
||||
* the page and make it uptodate, and start the journal.
|
||||
* Otherwise read the page, makes it dirty so that it can be
|
||||
* handle in writepages(the i_disksize update is left to the
|
||||
* normal ext4_da_write_end).
|
||||
*/
|
||||
int ext4_generic_write_inline_data(struct address_space *mapping,
|
||||
struct inode *inode,
|
||||
loff_t pos, unsigned len,
|
||||
struct folio **foliop,
|
||||
void **fsdata, bool da)
|
||||
{
|
||||
int ret;
|
||||
handle_t *handle;
|
||||
struct folio *folio;
|
||||
struct ext4_iloc iloc;
|
||||
int retries = 0;
|
||||
|
||||
ret = ext4_get_inode_loc(inode, &iloc);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
retry_journal:
|
||||
handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
|
||||
if (IS_ERR(handle)) {
|
||||
ret = PTR_ERR(handle);
|
||||
goto out_release_bh;
|
||||
}
|
||||
|
||||
ret = ext4_prepare_inline_data(handle, inode, pos + len);
|
||||
if (ret && ret != -ENOSPC)
|
||||
goto out_stop_journal;
|
||||
|
||||
if (ret == -ENOSPC) {
|
||||
ext4_journal_stop(handle);
|
||||
if (!da) {
|
||||
brelse(iloc.bh);
|
||||
/* Retry inside */
|
||||
return ext4_convert_inline_data_to_extent(mapping, inode);
|
||||
}
|
||||
|
||||
ret = ext4_da_convert_inline_data_to_extent(mapping, inode, fsdata);
|
||||
if (ret == -ENOSPC &&
|
||||
ext4_should_retry_alloc(inode->i_sb, &retries))
|
||||
goto retry_journal;
|
||||
goto out_release_bh;
|
||||
}
|
||||
|
||||
folio = __filemap_get_folio(mapping, 0, FGP_WRITEBEGIN | FGP_NOFS,
|
||||
mapping_gfp_mask(mapping));
|
||||
if (IS_ERR(folio)) {
|
||||
ret = PTR_ERR(folio);
|
||||
goto out_stop_journal;
|
||||
}
|
||||
|
||||
down_read(&EXT4_I(inode)->xattr_sem);
|
||||
/* Someone else had converted it to extent */
|
||||
if (!ext4_has_inline_data(inode)) {
|
||||
ret = 0;
|
||||
goto out_release_folio;
|
||||
}
|
||||
|
||||
if (!folio_test_uptodate(folio)) {
|
||||
ret = ext4_read_inline_folio(inode, folio);
|
||||
if (ret < 0)
|
||||
goto out_release_folio;
|
||||
}
|
||||
|
||||
ret = ext4_journal_get_write_access(handle, inode->i_sb, iloc.bh, EXT4_JTR_NONE);
|
||||
if (ret)
|
||||
goto out_release_folio;
|
||||
*foliop = folio;
|
||||
up_read(&EXT4_I(inode)->xattr_sem);
|
||||
brelse(iloc.bh);
|
||||
return 1;
|
||||
|
||||
out_release_folio:
|
||||
up_read(&EXT4_I(inode)->xattr_sem);
|
||||
folio_unlock(folio);
|
||||
folio_put(folio);
|
||||
out_stop_journal:
|
||||
ext4_journal_stop(handle);
|
||||
out_release_bh:
|
||||
brelse(iloc.bh);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Try to write data in the inode.
|
||||
* If the inode has inline data, check whether the new write can be
|
||||
@ -663,81 +757,10 @@ int ext4_try_to_write_inline_data(struct address_space *mapping,
|
||||
loff_t pos, unsigned len,
|
||||
struct folio **foliop)
|
||||
{
|
||||
int ret;
|
||||
handle_t *handle;
|
||||
struct folio *folio;
|
||||
struct ext4_iloc iloc;
|
||||
|
||||
if (pos + len > ext4_get_max_inline_size(inode))
|
||||
goto convert;
|
||||
|
||||
ret = ext4_get_inode_loc(inode, &iloc);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* The possible write could happen in the inode,
|
||||
* so try to reserve the space in inode first.
|
||||
*/
|
||||
handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
|
||||
if (IS_ERR(handle)) {
|
||||
ret = PTR_ERR(handle);
|
||||
handle = NULL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = ext4_prepare_inline_data(handle, inode, pos + len);
|
||||
if (ret && ret != -ENOSPC)
|
||||
goto out;
|
||||
|
||||
/* We don't have space in inline inode, so convert it to extent. */
|
||||
if (ret == -ENOSPC) {
|
||||
ext4_journal_stop(handle);
|
||||
brelse(iloc.bh);
|
||||
goto convert;
|
||||
}
|
||||
|
||||
ret = ext4_journal_get_write_access(handle, inode->i_sb, iloc.bh,
|
||||
EXT4_JTR_NONE);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
folio = __filemap_get_folio(mapping, 0, FGP_WRITEBEGIN | FGP_NOFS,
|
||||
mapping_gfp_mask(mapping));
|
||||
if (IS_ERR(folio)) {
|
||||
ret = PTR_ERR(folio);
|
||||
goto out;
|
||||
}
|
||||
|
||||
*foliop = folio;
|
||||
down_read(&EXT4_I(inode)->xattr_sem);
|
||||
if (!ext4_has_inline_data(inode)) {
|
||||
ret = 0;
|
||||
folio_unlock(folio);
|
||||
folio_put(folio);
|
||||
goto out_up_read;
|
||||
}
|
||||
|
||||
if (!folio_test_uptodate(folio)) {
|
||||
ret = ext4_read_inline_folio(inode, folio);
|
||||
if (ret < 0) {
|
||||
folio_unlock(folio);
|
||||
folio_put(folio);
|
||||
goto out_up_read;
|
||||
}
|
||||
}
|
||||
|
||||
ret = 1;
|
||||
handle = NULL;
|
||||
out_up_read:
|
||||
up_read(&EXT4_I(inode)->xattr_sem);
|
||||
out:
|
||||
if (handle && (ret != 1))
|
||||
ext4_journal_stop(handle);
|
||||
brelse(iloc.bh);
|
||||
return ret;
|
||||
convert:
|
||||
return ext4_convert_inline_data_to_extent(mapping, inode);
|
||||
return ext4_convert_inline_data_to_extent(mapping, inode);
|
||||
return ext4_generic_write_inline_data(mapping, inode, pos, len,
|
||||
foliop, NULL, false);
|
||||
}
|
||||
|
||||
int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len,
|
||||
@ -881,94 +904,6 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Prepare the write for the inline data.
|
||||
* If the data can be written into the inode, we just read
|
||||
* the page and make it uptodate, and start the journal.
|
||||
* Otherwise read the page, makes it dirty so that it can be
|
||||
* handle in writepages(the i_disksize update is left to the
|
||||
* normal ext4_da_write_end).
|
||||
*/
|
||||
int ext4_da_write_inline_data_begin(struct address_space *mapping,
|
||||
struct inode *inode,
|
||||
loff_t pos, unsigned len,
|
||||
struct folio **foliop,
|
||||
void **fsdata)
|
||||
{
|
||||
int ret;
|
||||
handle_t *handle;
|
||||
struct folio *folio;
|
||||
struct ext4_iloc iloc;
|
||||
int retries = 0;
|
||||
|
||||
ret = ext4_get_inode_loc(inode, &iloc);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
retry_journal:
|
||||
handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
|
||||
if (IS_ERR(handle)) {
|
||||
ret = PTR_ERR(handle);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = ext4_prepare_inline_data(handle, inode, pos + len);
|
||||
if (ret && ret != -ENOSPC)
|
||||
goto out_journal;
|
||||
|
||||
if (ret == -ENOSPC) {
|
||||
ext4_journal_stop(handle);
|
||||
ret = ext4_da_convert_inline_data_to_extent(mapping,
|
||||
inode,
|
||||
fsdata);
|
||||
if (ret == -ENOSPC &&
|
||||
ext4_should_retry_alloc(inode->i_sb, &retries))
|
||||
goto retry_journal;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* We cannot recurse into the filesystem as the transaction
|
||||
* is already started.
|
||||
*/
|
||||
folio = __filemap_get_folio(mapping, 0, FGP_WRITEBEGIN | FGP_NOFS,
|
||||
mapping_gfp_mask(mapping));
|
||||
if (IS_ERR(folio)) {
|
||||
ret = PTR_ERR(folio);
|
||||
goto out_journal;
|
||||
}
|
||||
|
||||
down_read(&EXT4_I(inode)->xattr_sem);
|
||||
if (!ext4_has_inline_data(inode)) {
|
||||
ret = 0;
|
||||
goto out_release_page;
|
||||
}
|
||||
|
||||
if (!folio_test_uptodate(folio)) {
|
||||
ret = ext4_read_inline_folio(inode, folio);
|
||||
if (ret < 0)
|
||||
goto out_release_page;
|
||||
}
|
||||
ret = ext4_journal_get_write_access(handle, inode->i_sb, iloc.bh,
|
||||
EXT4_JTR_NONE);
|
||||
if (ret)
|
||||
goto out_release_page;
|
||||
|
||||
up_read(&EXT4_I(inode)->xattr_sem);
|
||||
*foliop = folio;
|
||||
brelse(iloc.bh);
|
||||
return 1;
|
||||
out_release_page:
|
||||
up_read(&EXT4_I(inode)->xattr_sem);
|
||||
folio_unlock(folio);
|
||||
folio_put(folio);
|
||||
out_journal:
|
||||
ext4_journal_stop(handle);
|
||||
out:
|
||||
brelse(iloc.bh);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef INLINE_DIR_DEBUG
|
||||
void ext4_show_inline_dir(struct inode *dir, struct buffer_head *bh,
|
||||
void *inline_start, int inline_size)
|
||||
@ -1012,7 +947,7 @@ static int ext4_add_dirent_to_inline(handle_t *handle,
|
||||
int err;
|
||||
struct ext4_dir_entry_2 *de;
|
||||
|
||||
err = ext4_find_dest_de(dir, inode, iloc->bh, inline_start,
|
||||
err = ext4_find_dest_de(dir, iloc->bh, inline_start,
|
||||
inline_size, fname, &de);
|
||||
if (err)
|
||||
return err;
|
||||
@ -1146,7 +1081,7 @@ static int ext4_finish_convert_inline_dir(handle_t *handle,
|
||||
memcpy((void *)de, buf + EXT4_INLINE_DOTDOT_SIZE,
|
||||
inline_size - EXT4_INLINE_DOTDOT_SIZE);
|
||||
|
||||
if (ext4_has_metadata_csum(inode->i_sb))
|
||||
if (ext4_has_feature_metadata_csum(inode->i_sb))
|
||||
csum_size = sizeof(struct ext4_dir_entry_tail);
|
||||
|
||||
inode->i_size = inode->i_sb->s_blocksize;
|
||||
|
288
fs/ext4/inode.c
288
fs/ext4/inode.c
@ -31,6 +31,7 @@
|
||||
#include <linux/writeback.h>
|
||||
#include <linux/pagevec.h>
|
||||
#include <linux/mpage.h>
|
||||
#include <linux/rmap.h>
|
||||
#include <linux/namei.h>
|
||||
#include <linux/uio.h>
|
||||
#include <linux/bio.h>
|
||||
@ -93,7 +94,7 @@ static int ext4_inode_csum_verify(struct inode *inode, struct ext4_inode *raw,
|
||||
|
||||
if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
|
||||
cpu_to_le32(EXT4_OS_LINUX) ||
|
||||
!ext4_has_metadata_csum(inode->i_sb))
|
||||
!ext4_has_feature_metadata_csum(inode->i_sb))
|
||||
return 1;
|
||||
|
||||
provided = le16_to_cpu(raw->i_checksum_lo);
|
||||
@ -114,7 +115,7 @@ void ext4_inode_csum_set(struct inode *inode, struct ext4_inode *raw,
|
||||
|
||||
if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
|
||||
cpu_to_le32(EXT4_OS_LINUX) ||
|
||||
!ext4_has_metadata_csum(inode->i_sb))
|
||||
!ext4_has_feature_metadata_csum(inode->i_sb))
|
||||
return;
|
||||
|
||||
csum = ext4_inode_csum(inode, raw, ei);
|
||||
@ -751,7 +752,7 @@ static void ext4_update_bh_state(struct buffer_head *bh, unsigned long flags)
|
||||
flags &= EXT4_MAP_FLAGS;
|
||||
|
||||
/* Dummy buffer_head? Set non-atomically. */
|
||||
if (!bh->b_page) {
|
||||
if (!bh->b_folio) {
|
||||
bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | flags;
|
||||
return;
|
||||
}
|
||||
@ -1149,8 +1150,9 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
|
||||
pgoff_t index;
|
||||
unsigned from, to;
|
||||
|
||||
if (unlikely(ext4_forced_shutdown(inode->i_sb)))
|
||||
return -EIO;
|
||||
ret = ext4_emergency_state(inode->i_sb);
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
|
||||
trace_ext4_write_begin(inode, pos, len);
|
||||
/*
|
||||
@ -2225,7 +2227,7 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd)
|
||||
mpd->io_submit.io_end->handle = handle->h_rsv_handle;
|
||||
handle->h_rsv_handle = NULL;
|
||||
}
|
||||
ext4_set_io_unwritten_flag(inode, mpd->io_submit.io_end);
|
||||
ext4_set_io_unwritten_flag(mpd->io_submit.io_end);
|
||||
}
|
||||
|
||||
BUG_ON(map->m_len == 0);
|
||||
@ -2273,7 +2275,7 @@ static int mpage_map_and_submit_extent(handle_t *handle,
|
||||
if (err < 0) {
|
||||
struct super_block *sb = inode->i_sb;
|
||||
|
||||
if (ext4_forced_shutdown(sb))
|
||||
if (ext4_emergency_state(sb))
|
||||
goto invalidate_dirty_pages;
|
||||
/*
|
||||
* Let the uper layers retry transient errors.
|
||||
@ -2599,10 +2601,9 @@ static int ext4_do_writepages(struct mpage_da_data *mpd)
|
||||
* *never* be called, so if that ever happens, we would want
|
||||
* the stack trace.
|
||||
*/
|
||||
if (unlikely(ext4_forced_shutdown(mapping->host->i_sb))) {
|
||||
ret = -EROFS;
|
||||
ret = ext4_emergency_state(mapping->host->i_sb);
|
||||
if (unlikely(ret))
|
||||
goto out_writepages;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we have inline data and arrive here, it means that
|
||||
@ -2817,8 +2818,9 @@ static int ext4_writepages(struct address_space *mapping,
|
||||
int ret;
|
||||
int alloc_ctx;
|
||||
|
||||
if (unlikely(ext4_forced_shutdown(sb)))
|
||||
return -EIO;
|
||||
ret = ext4_emergency_state(sb);
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
|
||||
alloc_ctx = ext4_writepages_down_read(sb);
|
||||
ret = ext4_do_writepages(&mpd);
|
||||
@ -2858,8 +2860,9 @@ static int ext4_dax_writepages(struct address_space *mapping,
|
||||
struct inode *inode = mapping->host;
|
||||
int alloc_ctx;
|
||||
|
||||
if (unlikely(ext4_forced_shutdown(inode->i_sb)))
|
||||
return -EIO;
|
||||
ret = ext4_emergency_state(inode->i_sb);
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
|
||||
alloc_ctx = ext4_writepages_down_read(inode->i_sb);
|
||||
trace_ext4_writepages(inode, wbc);
|
||||
@ -2915,8 +2918,9 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
|
||||
pgoff_t index;
|
||||
struct inode *inode = mapping->host;
|
||||
|
||||
if (unlikely(ext4_forced_shutdown(inode->i_sb)))
|
||||
return -EIO;
|
||||
ret = ext4_emergency_state(inode->i_sb);
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
|
||||
index = pos >> PAGE_SHIFT;
|
||||
|
||||
@ -2929,8 +2933,8 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
|
||||
trace_ext4_da_write_begin(inode, pos, len);
|
||||
|
||||
if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) {
|
||||
ret = ext4_da_write_inline_data_begin(mapping, inode, pos, len,
|
||||
foliop, fsdata);
|
||||
ret = ext4_generic_write_inline_data(mapping, inode, pos, len,
|
||||
foliop, fsdata, true);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
if (ret == 1)
|
||||
@ -3906,6 +3910,68 @@ int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline void ext4_truncate_folio(struct inode *inode,
|
||||
loff_t start, loff_t end)
|
||||
{
|
||||
unsigned long blocksize = i_blocksize(inode);
|
||||
struct folio *folio;
|
||||
|
||||
/* Nothing to be done if no complete block needs to be truncated. */
|
||||
if (round_up(start, blocksize) >= round_down(end, blocksize))
|
||||
return;
|
||||
|
||||
folio = filemap_lock_folio(inode->i_mapping, start >> PAGE_SHIFT);
|
||||
if (IS_ERR(folio))
|
||||
return;
|
||||
|
||||
if (folio_mkclean(folio))
|
||||
folio_mark_dirty(folio);
|
||||
folio_unlock(folio);
|
||||
folio_put(folio);
|
||||
}
|
||||
|
||||
int ext4_truncate_page_cache_block_range(struct inode *inode,
|
||||
loff_t start, loff_t end)
|
||||
{
|
||||
unsigned long blocksize = i_blocksize(inode);
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* For journalled data we need to write (and checkpoint) pages
|
||||
* before discarding page cache to avoid inconsitent data on disk
|
||||
* in case of crash before freeing or unwritten converting trans
|
||||
* is committed.
|
||||
*/
|
||||
if (ext4_should_journal_data(inode)) {
|
||||
ret = filemap_write_and_wait_range(inode->i_mapping, start,
|
||||
end - 1);
|
||||
if (ret)
|
||||
return ret;
|
||||
goto truncate_pagecache;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the block size is less than the page size, the file's mapped
|
||||
* blocks within one page could be freed or converted to unwritten.
|
||||
* So it's necessary to remove writable userspace mappings, and then
|
||||
* ext4_page_mkwrite() can be called during subsequent write access
|
||||
* to these partial folios.
|
||||
*/
|
||||
if (!IS_ALIGNED(start | end, PAGE_SIZE) &&
|
||||
blocksize < PAGE_SIZE && start < inode->i_size) {
|
||||
loff_t page_boundary = round_up(start, PAGE_SIZE);
|
||||
|
||||
ext4_truncate_folio(inode, start, min(page_boundary, end));
|
||||
if (end > page_boundary)
|
||||
ext4_truncate_folio(inode,
|
||||
round_down(end, PAGE_SIZE), end);
|
||||
}
|
||||
|
||||
truncate_pagecache:
|
||||
truncate_pagecache_range(inode, start, end - 1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void ext4_wait_dax_page(struct inode *inode)
|
||||
{
|
||||
filemap_invalidate_unlock(inode->i_mapping);
|
||||
@ -3950,91 +4016,50 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
|
||||
{
|
||||
struct inode *inode = file_inode(file);
|
||||
struct super_block *sb = inode->i_sb;
|
||||
ext4_lblk_t first_block, stop_block;
|
||||
struct address_space *mapping = inode->i_mapping;
|
||||
loff_t first_block_offset, last_block_offset, max_length;
|
||||
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||||
ext4_lblk_t start_lblk, end_lblk;
|
||||
loff_t max_end = EXT4_SB(sb)->s_bitmap_maxbytes - sb->s_blocksize;
|
||||
loff_t end = offset + length;
|
||||
handle_t *handle;
|
||||
unsigned int credits;
|
||||
int ret = 0, ret2 = 0;
|
||||
int ret;
|
||||
|
||||
trace_ext4_punch_hole(inode, offset, length, 0);
|
||||
|
||||
/*
|
||||
* Write out all dirty pages to avoid race conditions
|
||||
* Then release them.
|
||||
*/
|
||||
if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
|
||||
ret = filemap_write_and_wait_range(mapping, offset,
|
||||
offset + length - 1);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
inode_lock(inode);
|
||||
WARN_ON_ONCE(!inode_is_locked(inode));
|
||||
|
||||
/* No need to punch hole beyond i_size */
|
||||
if (offset >= inode->i_size)
|
||||
goto out_mutex;
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* If the hole extends beyond i_size, set the hole
|
||||
* to end after the page that contains i_size
|
||||
* If the hole extends beyond i_size, set the hole to end after
|
||||
* the page that contains i_size, and also make sure that the hole
|
||||
* within one block before last range.
|
||||
*/
|
||||
if (offset + length > inode->i_size) {
|
||||
length = inode->i_size +
|
||||
PAGE_SIZE - (inode->i_size & (PAGE_SIZE - 1)) -
|
||||
offset;
|
||||
}
|
||||
if (end > inode->i_size)
|
||||
end = round_up(inode->i_size, PAGE_SIZE);
|
||||
if (end > max_end)
|
||||
end = max_end;
|
||||
length = end - offset;
|
||||
|
||||
/*
|
||||
* For punch hole the length + offset needs to be within one block
|
||||
* before last range. Adjust the length if it goes beyond that limit.
|
||||
* Attach jinode to inode for jbd2 if we do any zeroing of partial
|
||||
* block.
|
||||
*/
|
||||
max_length = sbi->s_bitmap_maxbytes - inode->i_sb->s_blocksize;
|
||||
if (offset + length > max_length)
|
||||
length = max_length - offset;
|
||||
|
||||
if (offset & (sb->s_blocksize - 1) ||
|
||||
(offset + length) & (sb->s_blocksize - 1)) {
|
||||
/*
|
||||
* Attach jinode to inode for jbd2 if we do any zeroing of
|
||||
* partial block
|
||||
*/
|
||||
if (!IS_ALIGNED(offset | end, sb->s_blocksize)) {
|
||||
ret = ext4_inode_attach_jinode(inode);
|
||||
if (ret < 0)
|
||||
goto out_mutex;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Wait all existing dio workers, newcomers will block on i_rwsem */
|
||||
inode_dio_wait(inode);
|
||||
|
||||
ret = file_modified(file);
|
||||
ret = ext4_update_disksize_before_punch(inode, offset, length);
|
||||
if (ret)
|
||||
goto out_mutex;
|
||||
|
||||
/*
|
||||
* Prevent page faults from reinstantiating pages we have released from
|
||||
* page cache.
|
||||
*/
|
||||
filemap_invalidate_lock(mapping);
|
||||
|
||||
ret = ext4_break_layouts(inode);
|
||||
if (ret)
|
||||
goto out_dio;
|
||||
|
||||
first_block_offset = round_up(offset, sb->s_blocksize);
|
||||
last_block_offset = round_down((offset + length), sb->s_blocksize) - 1;
|
||||
return ret;
|
||||
|
||||
/* Now release the pages and zero block aligned part of pages*/
|
||||
if (last_block_offset > first_block_offset) {
|
||||
ret = ext4_update_disksize_before_punch(inode, offset, length);
|
||||
if (ret)
|
||||
goto out_dio;
|
||||
truncate_pagecache_range(inode, first_block_offset,
|
||||
last_block_offset);
|
||||
}
|
||||
ret = ext4_truncate_page_cache_block_range(inode, offset, end);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
|
||||
credits = ext4_writepage_trans_blocks(inode);
|
||||
@ -4044,54 +4069,51 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
|
||||
if (IS_ERR(handle)) {
|
||||
ret = PTR_ERR(handle);
|
||||
ext4_std_error(sb, ret);
|
||||
goto out_dio;
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = ext4_zero_partial_blocks(handle, inode, offset,
|
||||
length);
|
||||
ret = ext4_zero_partial_blocks(handle, inode, offset, length);
|
||||
if (ret)
|
||||
goto out_stop;
|
||||
|
||||
first_block = (offset + sb->s_blocksize - 1) >>
|
||||
EXT4_BLOCK_SIZE_BITS(sb);
|
||||
stop_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb);
|
||||
goto out_handle;
|
||||
|
||||
/* If there are blocks to remove, do it */
|
||||
if (stop_block > first_block) {
|
||||
ext4_lblk_t hole_len = stop_block - first_block;
|
||||
start_lblk = EXT4_B_TO_LBLK(inode, offset);
|
||||
end_lblk = end >> inode->i_blkbits;
|
||||
|
||||
if (end_lblk > start_lblk) {
|
||||
ext4_lblk_t hole_len = end_lblk - start_lblk;
|
||||
|
||||
down_write(&EXT4_I(inode)->i_data_sem);
|
||||
ext4_discard_preallocations(inode);
|
||||
|
||||
ext4_es_remove_extent(inode, first_block, hole_len);
|
||||
ext4_es_remove_extent(inode, start_lblk, hole_len);
|
||||
|
||||
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
|
||||
ret = ext4_ext_remove_space(inode, first_block,
|
||||
stop_block - 1);
|
||||
ret = ext4_ext_remove_space(inode, start_lblk,
|
||||
end_lblk - 1);
|
||||
else
|
||||
ret = ext4_ind_remove_space(handle, inode, first_block,
|
||||
stop_block);
|
||||
ret = ext4_ind_remove_space(handle, inode, start_lblk,
|
||||
end_lblk);
|
||||
if (ret) {
|
||||
up_write(&EXT4_I(inode)->i_data_sem);
|
||||
goto out_handle;
|
||||
}
|
||||
|
||||
ext4_es_insert_extent(inode, first_block, hole_len, ~0,
|
||||
ext4_es_insert_extent(inode, start_lblk, hole_len, ~0,
|
||||
EXTENT_STATUS_HOLE, 0);
|
||||
up_write(&EXT4_I(inode)->i_data_sem);
|
||||
}
|
||||
ext4_fc_track_range(handle, inode, first_block, stop_block);
|
||||
ext4_fc_track_range(handle, inode, start_lblk, end_lblk);
|
||||
|
||||
ret = ext4_mark_inode_dirty(handle, inode);
|
||||
if (unlikely(ret))
|
||||
goto out_handle;
|
||||
|
||||
ext4_update_inode_fsync_trans(handle, inode, 1);
|
||||
if (IS_SYNC(inode))
|
||||
ext4_handle_sync(handle);
|
||||
|
||||
inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
|
||||
ret2 = ext4_mark_inode_dirty(handle, inode);
|
||||
if (unlikely(ret2))
|
||||
ret = ret2;
|
||||
if (ret >= 0)
|
||||
ext4_update_inode_fsync_trans(handle, inode, 1);
|
||||
out_stop:
|
||||
out_handle:
|
||||
ext4_journal_stop(handle);
|
||||
out_dio:
|
||||
filemap_invalidate_unlock(mapping);
|
||||
out_mutex:
|
||||
inode_unlock(inode);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -4678,6 +4700,11 @@ static inline int ext4_iget_extra_inode(struct inode *inode,
|
||||
*magic == cpu_to_le32(EXT4_XATTR_MAGIC)) {
|
||||
int err;
|
||||
|
||||
err = xattr_check_inode(inode, IHDR(inode, raw_inode),
|
||||
ITAIL(inode, raw_inode));
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
ext4_set_inode_state(inode, EXT4_STATE_XATTR);
|
||||
err = ext4_find_inline_data_nolock(inode);
|
||||
if (!err && ext4_has_inline_data(inode))
|
||||
@ -4804,7 +4831,7 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
|
||||
ei->i_extra_isize = 0;
|
||||
|
||||
/* Precompute checksum seed for inode metadata */
|
||||
if (ext4_has_metadata_csum(sb)) {
|
||||
if (ext4_has_feature_metadata_csum(sb)) {
|
||||
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||||
__u32 csum;
|
||||
__le32 inum = cpu_to_le32(inode->i_ino);
|
||||
@ -4891,7 +4918,8 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
|
||||
* we'd normally treat htree data as empty space. But with metadata
|
||||
* checksumming that corrupts checksums so forbid that.
|
||||
*/
|
||||
if (!ext4_has_feature_dir_index(sb) && ext4_has_metadata_csum(sb) &&
|
||||
if (!ext4_has_feature_dir_index(sb) &&
|
||||
ext4_has_feature_metadata_csum(sb) &&
|
||||
ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) {
|
||||
ext4_error_inode(inode, function, line, 0,
|
||||
"iget: Dir with htree data on filesystem without dir_index feature.");
|
||||
@ -5011,8 +5039,16 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
|
||||
inode->i_op = &ext4_encrypted_symlink_inode_operations;
|
||||
} else if (ext4_inode_is_fast_symlink(inode)) {
|
||||
inode->i_op = &ext4_fast_symlink_inode_operations;
|
||||
nd_terminate_link(ei->i_data, inode->i_size,
|
||||
sizeof(ei->i_data) - 1);
|
||||
if (inode->i_size == 0 ||
|
||||
inode->i_size >= sizeof(ei->i_data) ||
|
||||
strnlen((char *)ei->i_data, inode->i_size + 1) !=
|
||||
inode->i_size) {
|
||||
ext4_error_inode(inode, function, line, 0,
|
||||
"invalid fast symlink length %llu",
|
||||
(unsigned long long)inode->i_size);
|
||||
ret = -EFSCORRUPTED;
|
||||
goto bad_inode;
|
||||
}
|
||||
inode_set_cached_link(inode, (char *)ei->i_data,
|
||||
inode->i_size);
|
||||
} else {
|
||||
@ -5232,8 +5268,9 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
|
||||
if (WARN_ON_ONCE(current->flags & PF_MEMALLOC))
|
||||
return 0;
|
||||
|
||||
if (unlikely(ext4_forced_shutdown(inode->i_sb)))
|
||||
return -EIO;
|
||||
err = ext4_emergency_state(inode->i_sb);
|
||||
if (unlikely(err))
|
||||
return err;
|
||||
|
||||
if (EXT4_SB(inode->i_sb)->s_journal) {
|
||||
if (ext4_journal_current_handle()) {
|
||||
@ -5355,8 +5392,9 @@ int ext4_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
|
||||
const unsigned int ia_valid = attr->ia_valid;
|
||||
bool inc_ivers = true;
|
||||
|
||||
if (unlikely(ext4_forced_shutdown(inode->i_sb)))
|
||||
return -EIO;
|
||||
error = ext4_emergency_state(inode->i_sb);
|
||||
if (unlikely(error))
|
||||
return error;
|
||||
|
||||
if (unlikely(IS_IMMUTABLE(inode)))
|
||||
return -EPERM;
|
||||
@ -5468,7 +5506,7 @@ int ext4_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
|
||||
oldsize & (inode->i_sb->s_blocksize - 1)) {
|
||||
error = ext4_inode_attach_jinode(inode);
|
||||
if (error)
|
||||
goto err_out;
|
||||
goto out_mmap_sem;
|
||||
}
|
||||
|
||||
handle = ext4_journal_start(inode, EXT4_HT_INODE, 3);
|
||||
@ -5800,9 +5838,10 @@ int ext4_mark_iloc_dirty(handle_t *handle,
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
if (unlikely(ext4_forced_shutdown(inode->i_sb))) {
|
||||
err = ext4_emergency_state(inode->i_sb);
|
||||
if (unlikely(err)) {
|
||||
put_bh(iloc->bh);
|
||||
return -EIO;
|
||||
return err;
|
||||
}
|
||||
ext4_fc_track_inode(handle, inode);
|
||||
|
||||
@ -5826,8 +5865,9 @@ ext4_reserve_inode_write(handle_t *handle, struct inode *inode,
|
||||
{
|
||||
int err;
|
||||
|
||||
if (unlikely(ext4_forced_shutdown(inode->i_sb)))
|
||||
return -EIO;
|
||||
err = ext4_emergency_state(inode->i_sb);
|
||||
if (unlikely(err))
|
||||
return err;
|
||||
|
||||
err = ext4_get_inode_loc(inode, iloc);
|
||||
if (!err) {
|
||||
|
@ -142,7 +142,7 @@ static int ext4_update_backup_sb(struct super_block *sb,
|
||||
|
||||
es = (struct ext4_super_block *) (bh->b_data + offset);
|
||||
lock_buffer(bh);
|
||||
if (ext4_has_metadata_csum(sb) &&
|
||||
if (ext4_has_feature_metadata_csum(sb) &&
|
||||
es->s_checksum != ext4_superblock_csum(sb, es)) {
|
||||
ext4_msg(sb, KERN_ERR, "Invalid checksum for backup "
|
||||
"superblock %llu", sb_block);
|
||||
@ -150,7 +150,7 @@ static int ext4_update_backup_sb(struct super_block *sb,
|
||||
goto out_bh;
|
||||
}
|
||||
func(es, arg);
|
||||
if (ext4_has_metadata_csum(sb))
|
||||
if (ext4_has_feature_metadata_csum(sb))
|
||||
es->s_checksum = ext4_superblock_csum(sb, es);
|
||||
set_buffer_uptodate(bh);
|
||||
unlock_buffer(bh);
|
||||
@ -351,7 +351,7 @@ void ext4_reset_inode_seed(struct inode *inode)
|
||||
__le32 gen = cpu_to_le32(inode->i_generation);
|
||||
__u32 csum;
|
||||
|
||||
if (!ext4_has_metadata_csum(inode->i_sb))
|
||||
if (!ext4_has_feature_metadata_csum(inode->i_sb))
|
||||
return;
|
||||
|
||||
csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&inum, sizeof(inum));
|
||||
@ -1205,7 +1205,8 @@ static int ext4_ioctl_setuuid(struct file *filp,
|
||||
* If any checksums (group descriptors or metadata) are being used
|
||||
* then the checksum seed feature is required to change the UUID.
|
||||
*/
|
||||
if (((ext4_has_feature_gdt_csum(sb) || ext4_has_metadata_csum(sb))
|
||||
if (((ext4_has_feature_gdt_csum(sb) ||
|
||||
ext4_has_feature_metadata_csum(sb))
|
||||
&& !ext4_has_feature_csum_seed(sb))
|
||||
|| ext4_has_feature_stable_inodes(sb))
|
||||
return -EOPNOTSUPP;
|
||||
@ -1253,7 +1254,7 @@ static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
||||
if (!inode_owner_or_capable(idmap, inode))
|
||||
return -EPERM;
|
||||
|
||||
if (ext4_has_metadata_csum(inode->i_sb)) {
|
||||
if (ext4_has_feature_metadata_csum(inode->i_sb)) {
|
||||
ext4_warning(sb, "Setting inode version is not "
|
||||
"supported with metadata_csum enabled.");
|
||||
return -ENOTTY;
|
||||
@ -1705,7 +1706,7 @@ int ext4_update_overhead(struct super_block *sb, bool force)
|
||||
{
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
|
||||
if (sb_rdonly(sb))
|
||||
if (ext4_emergency_state(sb) || sb_rdonly(sb))
|
||||
return 0;
|
||||
if (!force &&
|
||||
(sbi->s_overhead == 0 ||
|
||||
|
@ -796,6 +796,7 @@ static void test_mb_mark_used(struct kunit *test)
|
||||
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, buddy);
|
||||
grp = kunit_kzalloc(test, offsetof(struct ext4_group_info,
|
||||
bb_counters[MB_NUM_ORDERS(sb)]), GFP_KERNEL);
|
||||
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, grp);
|
||||
|
||||
ret = ext4_mb_load_buddy(sb, TEST_GOAL_GROUP, &e4b);
|
||||
KUNIT_ASSERT_EQ(test, ret, 0);
|
||||
@ -860,6 +861,7 @@ static void test_mb_free_blocks(struct kunit *test)
|
||||
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, buddy);
|
||||
grp = kunit_kzalloc(test, offsetof(struct ext4_group_info,
|
||||
bb_counters[MB_NUM_ORDERS(sb)]), GFP_KERNEL);
|
||||
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, grp);
|
||||
|
||||
ret = ext4_mb_load_buddy(sb, TEST_GOAL_GROUP, &e4b);
|
||||
KUNIT_ASSERT_EQ(test, ret, 0);
|
||||
|
@ -187,7 +187,7 @@
|
||||
* /sys/fs/ext4/<partition>/mb_min_to_scan
|
||||
* /sys/fs/ext4/<partition>/mb_max_to_scan
|
||||
* /sys/fs/ext4/<partition>/mb_order2_req
|
||||
* /sys/fs/ext4/<partition>/mb_linear_limit
|
||||
* /sys/fs/ext4/<partition>/mb_max_linear_groups
|
||||
*
|
||||
* The regular allocator uses buddy scan only if the request len is power of
|
||||
* 2 blocks and the order of allocation is >= sbi->s_mb_order2_reqs. The
|
||||
@ -209,7 +209,7 @@
|
||||
* get traversed linearly. That may result in subsequent allocations being not
|
||||
* close to each other. And so, the underlying device may get filled up in a
|
||||
* non-linear fashion. While that may not matter on non-rotational devices, for
|
||||
* rotational devices that may result in higher seek times. "mb_linear_limit"
|
||||
* rotational devices that may result in higher seek times. "mb_max_linear_groups"
|
||||
* tells mballoc how many groups mballoc should search linearly before
|
||||
* performing consulting above data structures for more efficient lookups. For
|
||||
* non rotational devices, this value defaults to 0 and for rotational devices
|
||||
@ -5653,7 +5653,7 @@ static inline void ext4_mb_show_pa(struct super_block *sb)
|
||||
{
|
||||
ext4_group_t i, ngroups;
|
||||
|
||||
if (ext4_forced_shutdown(sb))
|
||||
if (ext4_emergency_state(sb))
|
||||
return;
|
||||
|
||||
ngroups = ext4_get_groups_count(sb);
|
||||
@ -5687,7 +5687,7 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
|
||||
{
|
||||
struct super_block *sb = ac->ac_sb;
|
||||
|
||||
if (ext4_forced_shutdown(sb))
|
||||
if (ext4_emergency_state(sb))
|
||||
return;
|
||||
|
||||
mb_debug(sb, "Can't allocate:"
|
||||
|
@ -21,7 +21,7 @@ static __le32 ext4_mmp_csum(struct super_block *sb, struct mmp_struct *mmp)
|
||||
|
||||
static int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp)
|
||||
{
|
||||
if (!ext4_has_metadata_csum(sb))
|
||||
if (!ext4_has_feature_metadata_csum(sb))
|
||||
return 1;
|
||||
|
||||
return mmp->mmp_checksum == ext4_mmp_csum(sb, mmp);
|
||||
@ -29,7 +29,7 @@ static int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp)
|
||||
|
||||
static void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp)
|
||||
{
|
||||
if (!ext4_has_metadata_csum(sb))
|
||||
if (!ext4_has_feature_metadata_csum(sb))
|
||||
return;
|
||||
|
||||
mmp->mmp_checksum = ext4_mmp_csum(sb, mmp);
|
||||
@ -162,7 +162,7 @@ static int kmmpd(void *data)
|
||||
memcpy(mmp->mmp_nodename, init_utsname()->nodename,
|
||||
sizeof(mmp->mmp_nodename));
|
||||
|
||||
while (!kthread_should_stop() && !ext4_forced_shutdown(sb)) {
|
||||
while (!kthread_should_stop() && !ext4_emergency_state(sb)) {
|
||||
if (!ext4_has_feature_mmp(sb)) {
|
||||
ext4_warning(sb, "kmmpd being stopped since MMP feature"
|
||||
" has been disabled.");
|
||||
|
117
fs/ext4/namei.c
117
fs/ext4/namei.c
@ -176,7 +176,7 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode,
|
||||
brelse(bh);
|
||||
return ERR_PTR(-EFSCORRUPTED);
|
||||
}
|
||||
if (!ext4_has_metadata_csum(inode->i_sb) ||
|
||||
if (!ext4_has_feature_metadata_csum(inode->i_sb) ||
|
||||
buffer_verified(bh))
|
||||
return bh;
|
||||
|
||||
@ -291,36 +291,6 @@ struct dx_tail {
|
||||
__le32 dt_checksum; /* crc32c(uuid+inum+dirblock) */
|
||||
};
|
||||
|
||||
static inline ext4_lblk_t dx_get_block(struct dx_entry *entry);
|
||||
static void dx_set_block(struct dx_entry *entry, ext4_lblk_t value);
|
||||
static inline unsigned dx_get_hash(struct dx_entry *entry);
|
||||
static void dx_set_hash(struct dx_entry *entry, unsigned value);
|
||||
static unsigned dx_get_count(struct dx_entry *entries);
|
||||
static unsigned dx_get_limit(struct dx_entry *entries);
|
||||
static void dx_set_count(struct dx_entry *entries, unsigned value);
|
||||
static void dx_set_limit(struct dx_entry *entries, unsigned value);
|
||||
static unsigned dx_root_limit(struct inode *dir, unsigned infosize);
|
||||
static unsigned dx_node_limit(struct inode *dir);
|
||||
static struct dx_frame *dx_probe(struct ext4_filename *fname,
|
||||
struct inode *dir,
|
||||
struct dx_hash_info *hinfo,
|
||||
struct dx_frame *frame);
|
||||
static void dx_release(struct dx_frame *frames);
|
||||
static int dx_make_map(struct inode *dir, struct buffer_head *bh,
|
||||
struct dx_hash_info *hinfo,
|
||||
struct dx_map_entry *map_tail);
|
||||
static void dx_sort_map(struct dx_map_entry *map, unsigned count);
|
||||
static struct ext4_dir_entry_2 *dx_move_dirents(struct inode *dir, char *from,
|
||||
char *to, struct dx_map_entry *offsets,
|
||||
int count, unsigned int blocksize);
|
||||
static struct ext4_dir_entry_2 *dx_pack_dirents(struct inode *dir, char *base,
|
||||
unsigned int blocksize);
|
||||
static void dx_insert_block(struct dx_frame *frame,
|
||||
u32 hash, ext4_lblk_t block);
|
||||
static int ext4_htree_next_block(struct inode *dir, __u32 hash,
|
||||
struct dx_frame *frame,
|
||||
struct dx_frame *frames,
|
||||
__u32 *start_hash);
|
||||
static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
|
||||
struct ext4_filename *fname,
|
||||
struct ext4_dir_entry_2 **res_dir);
|
||||
@ -398,7 +368,7 @@ int ext4_dirblock_csum_verify(struct inode *inode, struct buffer_head *bh)
|
||||
{
|
||||
struct ext4_dir_entry_tail *t;
|
||||
|
||||
if (!ext4_has_metadata_csum(inode->i_sb))
|
||||
if (!ext4_has_feature_metadata_csum(inode->i_sb))
|
||||
return 1;
|
||||
|
||||
t = get_dirent_tail(inode, bh);
|
||||
@ -419,7 +389,7 @@ static void ext4_dirblock_csum_set(struct inode *inode,
|
||||
{
|
||||
struct ext4_dir_entry_tail *t;
|
||||
|
||||
if (!ext4_has_metadata_csum(inode->i_sb))
|
||||
if (!ext4_has_feature_metadata_csum(inode->i_sb))
|
||||
return;
|
||||
|
||||
t = get_dirent_tail(inode, bh);
|
||||
@ -494,7 +464,7 @@ static int ext4_dx_csum_verify(struct inode *inode,
|
||||
struct dx_tail *t;
|
||||
int count_offset, limit, count;
|
||||
|
||||
if (!ext4_has_metadata_csum(inode->i_sb))
|
||||
if (!ext4_has_feature_metadata_csum(inode->i_sb))
|
||||
return 1;
|
||||
|
||||
c = get_dx_countlimit(inode, dirent, &count_offset);
|
||||
@ -523,7 +493,7 @@ static void ext4_dx_csum_set(struct inode *inode, struct ext4_dir_entry *dirent)
|
||||
struct dx_tail *t;
|
||||
int count_offset, limit, count;
|
||||
|
||||
if (!ext4_has_metadata_csum(inode->i_sb))
|
||||
if (!ext4_has_feature_metadata_csum(inode->i_sb))
|
||||
return;
|
||||
|
||||
c = get_dx_countlimit(inode, dirent, &count_offset);
|
||||
@ -612,7 +582,7 @@ static inline unsigned dx_root_limit(struct inode *dir, unsigned infosize)
|
||||
ext4_dir_rec_len(1, NULL) -
|
||||
ext4_dir_rec_len(2, NULL) - infosize;
|
||||
|
||||
if (ext4_has_metadata_csum(dir->i_sb))
|
||||
if (ext4_has_feature_metadata_csum(dir->i_sb))
|
||||
entry_space -= sizeof(struct dx_tail);
|
||||
return entry_space / sizeof(struct dx_entry);
|
||||
}
|
||||
@ -622,7 +592,7 @@ static inline unsigned dx_node_limit(struct inode *dir)
|
||||
unsigned int entry_space = dir->i_sb->s_blocksize -
|
||||
ext4_dir_rec_len(0, dir);
|
||||
|
||||
if (ext4_has_metadata_csum(dir->i_sb))
|
||||
if (ext4_has_feature_metadata_csum(dir->i_sb))
|
||||
entry_space -= sizeof(struct dx_tail);
|
||||
return entry_space / sizeof(struct dx_entry);
|
||||
}
|
||||
@ -1076,7 +1046,7 @@ static int htree_dirblock_to_tree(struct file *dir_file,
|
||||
struct ext4_dir_entry_2 *de, *top;
|
||||
int err = 0, count = 0;
|
||||
struct fscrypt_str fname_crypto_str = FSTR_INIT(NULL, 0), tmp_str;
|
||||
int csum = ext4_has_metadata_csum(dir->i_sb);
|
||||
int csum = ext4_has_feature_metadata_csum(dir->i_sb);
|
||||
|
||||
dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n",
|
||||
(unsigned long)block));
|
||||
@ -1320,7 +1290,7 @@ static int dx_make_map(struct inode *dir, struct buffer_head *bh,
|
||||
struct dx_hash_info h = *hinfo;
|
||||
int blocksize = EXT4_BLOCK_SIZE(dir->i_sb);
|
||||
|
||||
if (ext4_has_metadata_csum(dir->i_sb))
|
||||
if (ext4_has_feature_metadata_csum(dir->i_sb))
|
||||
buflen -= sizeof(struct ext4_dir_entry_tail);
|
||||
|
||||
while ((char *) de < base + buflen) {
|
||||
@ -1462,7 +1432,8 @@ static bool ext4_match(struct inode *parent,
|
||||
* sure cf_name was properly initialized before
|
||||
* considering the calculated hash.
|
||||
*/
|
||||
if (IS_ENCRYPTED(parent) && fname->cf_name.name &&
|
||||
if (sb_no_casefold_compat_fallback(parent->i_sb) &&
|
||||
IS_ENCRYPTED(parent) && fname->cf_name.name &&
|
||||
(fname->hinfo.hash != EXT4_DIRENT_HASH(de) ||
|
||||
fname->hinfo.minor_hash != EXT4_DIRENT_MINOR_HASH(de)))
|
||||
return false;
|
||||
@ -1595,10 +1566,15 @@ static struct buffer_head *__ext4_find_entry(struct inode *dir,
|
||||
* return. Otherwise, fall back to doing a search the
|
||||
* old fashioned way.
|
||||
*/
|
||||
if (!IS_ERR(ret) || PTR_ERR(ret) != ERR_BAD_DX_DIR)
|
||||
if (IS_ERR(ret) && PTR_ERR(ret) == ERR_BAD_DX_DIR)
|
||||
dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, "
|
||||
"falling back\n"));
|
||||
else if (!sb_no_casefold_compat_fallback(dir->i_sb) &&
|
||||
*res_dir == NULL && IS_CASEFOLDED(dir))
|
||||
dxtrace(printk(KERN_DEBUG "ext4_find_entry: casefold "
|
||||
"failed, falling back\n"));
|
||||
else
|
||||
goto cleanup_and_exit;
|
||||
dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, "
|
||||
"falling back\n"));
|
||||
ret = NULL;
|
||||
}
|
||||
nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb);
|
||||
@ -1945,7 +1921,7 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
|
||||
int csum_size = 0;
|
||||
int err = 0, i;
|
||||
|
||||
if (ext4_has_metadata_csum(dir->i_sb))
|
||||
if (ext4_has_feature_metadata_csum(dir->i_sb))
|
||||
csum_size = sizeof(struct ext4_dir_entry_tail);
|
||||
|
||||
bh2 = ext4_append(handle, dir, &newblock);
|
||||
@ -2060,8 +2036,7 @@ out:
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
int ext4_find_dest_de(struct inode *dir, struct inode *inode,
|
||||
struct buffer_head *bh,
|
||||
int ext4_find_dest_de(struct inode *dir, struct buffer_head *bh,
|
||||
void *buf, int buf_size,
|
||||
struct ext4_filename *fname,
|
||||
struct ext4_dir_entry_2 **dest_de)
|
||||
@ -2143,11 +2118,11 @@ static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname,
|
||||
int csum_size = 0;
|
||||
int err, err2;
|
||||
|
||||
if (ext4_has_metadata_csum(inode->i_sb))
|
||||
if (ext4_has_feature_metadata_csum(inode->i_sb))
|
||||
csum_size = sizeof(struct ext4_dir_entry_tail);
|
||||
|
||||
if (!de) {
|
||||
err = ext4_find_dest_de(dir, inode, bh, bh->b_data,
|
||||
err = ext4_find_dest_de(dir, bh, bh->b_data,
|
||||
blocksize - csum_size, fname, &de);
|
||||
if (err)
|
||||
return err;
|
||||
@ -2252,7 +2227,7 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
|
||||
struct fake_dirent *fde;
|
||||
int csum_size = 0;
|
||||
|
||||
if (ext4_has_metadata_csum(inode->i_sb))
|
||||
if (ext4_has_feature_metadata_csum(inode->i_sb))
|
||||
csum_size = sizeof(struct ext4_dir_entry_tail);
|
||||
|
||||
blocksize = dir->i_sb->s_blocksize;
|
||||
@ -2396,7 +2371,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
|
||||
ext4_lblk_t block, blocks;
|
||||
int csum_size = 0;
|
||||
|
||||
if (ext4_has_metadata_csum(inode->i_sb))
|
||||
if (ext4_has_feature_metadata_csum(inode->i_sb))
|
||||
csum_size = sizeof(struct ext4_dir_entry_tail);
|
||||
|
||||
sb = dir->i_sb;
|
||||
@ -2427,7 +2402,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
|
||||
if (!retval || (retval != ERR_BAD_DX_DIR))
|
||||
goto out;
|
||||
/* Can we just ignore htree data? */
|
||||
if (ext4_has_metadata_csum(sb)) {
|
||||
if (ext4_has_feature_metadata_csum(sb)) {
|
||||
EXT4_ERROR_INODE(dir,
|
||||
"Directory has corrupted htree index.");
|
||||
retval = -EFSCORRUPTED;
|
||||
@ -2577,8 +2552,10 @@ again:
|
||||
BUFFER_TRACE(frame->bh, "get_write_access");
|
||||
err = ext4_journal_get_write_access(handle, sb, frame->bh,
|
||||
EXT4_JTR_NONE);
|
||||
if (err)
|
||||
if (err) {
|
||||
brelse(bh2);
|
||||
goto journal_error;
|
||||
}
|
||||
if (!add_level) {
|
||||
unsigned icount1 = icount/2, icount2 = icount - icount1;
|
||||
unsigned hash2 = dx_get_hash(entries + icount1);
|
||||
@ -2589,8 +2566,10 @@ again:
|
||||
err = ext4_journal_get_write_access(handle, sb,
|
||||
(frame - 1)->bh,
|
||||
EXT4_JTR_NONE);
|
||||
if (err)
|
||||
if (err) {
|
||||
brelse(bh2);
|
||||
goto journal_error;
|
||||
}
|
||||
|
||||
memcpy((char *) entries2, (char *) (entries + icount1),
|
||||
icount2 * sizeof(struct dx_entry));
|
||||
@ -2609,8 +2588,10 @@ again:
|
||||
dxtrace(dx_show_index("node",
|
||||
((struct dx_node *) bh2->b_data)->entries));
|
||||
err = ext4_handle_dirty_dx_node(handle, dir, bh2);
|
||||
if (err)
|
||||
if (err) {
|
||||
brelse(bh2);
|
||||
goto journal_error;
|
||||
}
|
||||
brelse (bh2);
|
||||
err = ext4_handle_dirty_dx_node(handle, dir,
|
||||
(frame - 1)->bh);
|
||||
@ -2635,8 +2616,10 @@ again:
|
||||
"Creating %d level index...\n",
|
||||
dxroot->info.indirect_levels));
|
||||
err = ext4_handle_dirty_dx_node(handle, dir, frame->bh);
|
||||
if (err)
|
||||
if (err) {
|
||||
brelse(bh2);
|
||||
goto journal_error;
|
||||
}
|
||||
err = ext4_handle_dirty_dx_node(handle, dir, bh2);
|
||||
brelse(bh2);
|
||||
restart = 1;
|
||||
@ -2733,7 +2716,7 @@ static int ext4_delete_entry(handle_t *handle,
|
||||
return err;
|
||||
}
|
||||
|
||||
if (ext4_has_metadata_csum(dir->i_sb))
|
||||
if (ext4_has_feature_metadata_csum(dir->i_sb))
|
||||
csum_size = sizeof(struct ext4_dir_entry_tail);
|
||||
|
||||
BUFFER_TRACE(bh, "get_write_access");
|
||||
@ -2973,7 +2956,7 @@ int ext4_init_new_dir(handle_t *handle, struct inode *dir,
|
||||
int csum_size = 0;
|
||||
int err;
|
||||
|
||||
if (ext4_has_metadata_csum(dir->i_sb))
|
||||
if (ext4_has_feature_metadata_csum(dir->i_sb))
|
||||
csum_size = sizeof(struct ext4_dir_entry_tail);
|
||||
|
||||
if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) {
|
||||
@ -3151,8 +3134,9 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
|
||||
struct ext4_dir_entry_2 *de;
|
||||
handle_t *handle = NULL;
|
||||
|
||||
if (unlikely(ext4_forced_shutdown(dir->i_sb)))
|
||||
return -EIO;
|
||||
retval = ext4_emergency_state(dir->i_sb);
|
||||
if (unlikely(retval))
|
||||
return retval;
|
||||
|
||||
/* Initialize quotas before so that eventual writes go in
|
||||
* separate transaction */
|
||||
@ -3309,8 +3293,9 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
|
||||
{
|
||||
int retval;
|
||||
|
||||
if (unlikely(ext4_forced_shutdown(dir->i_sb)))
|
||||
return -EIO;
|
||||
retval = ext4_emergency_state(dir->i_sb);
|
||||
if (unlikely(retval))
|
||||
return retval;
|
||||
|
||||
trace_ext4_unlink_enter(dir, dentry);
|
||||
/*
|
||||
@ -3376,8 +3361,9 @@ static int ext4_symlink(struct mnt_idmap *idmap, struct inode *dir,
|
||||
struct fscrypt_str disk_link;
|
||||
int retries = 0;
|
||||
|
||||
if (unlikely(ext4_forced_shutdown(dir->i_sb)))
|
||||
return -EIO;
|
||||
err = ext4_emergency_state(dir->i_sb);
|
||||
if (unlikely(err))
|
||||
return err;
|
||||
|
||||
err = fscrypt_prepare_symlink(dir, symname, len, dir->i_sb->s_blocksize,
|
||||
&disk_link);
|
||||
@ -4199,8 +4185,9 @@ static int ext4_rename2(struct mnt_idmap *idmap,
|
||||
{
|
||||
int err;
|
||||
|
||||
if (unlikely(ext4_forced_shutdown(old_dir->i_sb)))
|
||||
return -EIO;
|
||||
err = ext4_emergency_state(old_dir->i_sb);
|
||||
if (unlikely(err))
|
||||
return err;
|
||||
|
||||
if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
|
||||
return -EINVAL;
|
||||
|
@ -537,7 +537,7 @@ static int ext4_orphan_file_block_csum_verify(struct super_block *sb,
|
||||
struct ext4_orphan_block_tail *ot;
|
||||
__le64 dsk_block_nr = cpu_to_le64(bh->b_blocknr);
|
||||
|
||||
if (!ext4_has_metadata_csum(sb))
|
||||
if (!ext4_has_feature_metadata_csum(sb))
|
||||
return 1;
|
||||
|
||||
ot = ext4_orphan_block_tail(sb, bh);
|
||||
|
@ -164,7 +164,8 @@ static void ext4_release_io_end(ext4_io_end_t *io_end)
|
||||
}
|
||||
|
||||
/*
|
||||
* Check a range of space and convert unwritten extents to written. Note that
|
||||
* On successful IO, check a range of space and convert unwritten extents to
|
||||
* written. On IO failure, check if journal abort is needed. Note that
|
||||
* we are protected from truncate touching same part of extent tree by the
|
||||
* fact that truncate code waits for all DIO to finish (thus exclusion from
|
||||
* direct IO is achieved) and also waits for PageWriteback bits. Thus we
|
||||
@ -175,20 +176,36 @@ static int ext4_end_io_end(ext4_io_end_t *io_end)
|
||||
{
|
||||
struct inode *inode = io_end->inode;
|
||||
handle_t *handle = io_end->handle;
|
||||
struct super_block *sb = inode->i_sb;
|
||||
int ret = 0;
|
||||
|
||||
ext4_debug("ext4_end_io_nolock: io_end 0x%p from inode %lu,list->next 0x%p,"
|
||||
"list->prev 0x%p\n",
|
||||
io_end, inode->i_ino, io_end->list.next, io_end->list.prev);
|
||||
|
||||
io_end->handle = NULL; /* Following call will use up the handle */
|
||||
ret = ext4_convert_unwritten_io_end_vec(handle, io_end);
|
||||
if (ret < 0 && !ext4_forced_shutdown(inode->i_sb)) {
|
||||
ext4_msg(inode->i_sb, KERN_EMERG,
|
||||
/*
|
||||
* Do not convert the unwritten extents if data writeback fails,
|
||||
* or stale data may be exposed.
|
||||
*/
|
||||
io_end->handle = NULL; /* Following call will use up the handle */
|
||||
if (unlikely(io_end->flag & EXT4_IO_END_FAILED)) {
|
||||
ret = -EIO;
|
||||
if (handle)
|
||||
jbd2_journal_free_reserved(handle);
|
||||
|
||||
if (test_opt(sb, DATA_ERR_ABORT))
|
||||
jbd2_journal_abort(EXT4_SB(sb)->s_journal, ret);
|
||||
} else {
|
||||
ret = ext4_convert_unwritten_io_end_vec(handle, io_end);
|
||||
}
|
||||
if (ret < 0 && !ext4_emergency_state(sb) &&
|
||||
io_end->flag & EXT4_IO_END_UNWRITTEN) {
|
||||
ext4_msg(sb, KERN_EMERG,
|
||||
"failed to convert unwritten extents to written "
|
||||
"extents -- potential data loss! "
|
||||
"(inode %lu, error %d)", inode->i_ino, ret);
|
||||
}
|
||||
|
||||
ext4_clear_io_unwritten_flag(io_end);
|
||||
ext4_release_io_end(io_end);
|
||||
return ret;
|
||||
@ -217,6 +234,16 @@ static void dump_completed_IO(struct inode *inode, struct list_head *head)
|
||||
#endif
|
||||
}
|
||||
|
||||
static bool ext4_io_end_defer_completion(ext4_io_end_t *io_end)
|
||||
{
|
||||
if (io_end->flag & EXT4_IO_END_UNWRITTEN)
|
||||
return true;
|
||||
if (test_opt(io_end->inode->i_sb, DATA_ERR_ABORT) &&
|
||||
io_end->flag & EXT4_IO_END_FAILED)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Add the io_end to per-inode completed end_io list. */
|
||||
static void ext4_add_complete_io(ext4_io_end_t *io_end)
|
||||
{
|
||||
@ -225,9 +252,11 @@ static void ext4_add_complete_io(ext4_io_end_t *io_end)
|
||||
struct workqueue_struct *wq;
|
||||
unsigned long flags;
|
||||
|
||||
/* Only reserved conversions from writeback should enter here */
|
||||
WARN_ON(!(io_end->flag & EXT4_IO_END_UNWRITTEN));
|
||||
WARN_ON(!io_end->handle && sbi->s_journal);
|
||||
/* Only reserved conversions or pending IO errors will enter here. */
|
||||
WARN_ON(!(io_end->flag & EXT4_IO_END_DEFER_COMPLETION));
|
||||
WARN_ON(io_end->flag & EXT4_IO_END_UNWRITTEN &&
|
||||
!io_end->handle && sbi->s_journal);
|
||||
|
||||
spin_lock_irqsave(&ei->i_completed_io_lock, flags);
|
||||
wq = sbi->rsv_conversion_wq;
|
||||
if (list_empty(&ei->i_rsv_conversion_list))
|
||||
@ -252,7 +281,7 @@ static int ext4_do_flush_completed_IO(struct inode *inode,
|
||||
|
||||
while (!list_empty(&unwritten)) {
|
||||
io_end = list_entry(unwritten.next, ext4_io_end_t, list);
|
||||
BUG_ON(!(io_end->flag & EXT4_IO_END_UNWRITTEN));
|
||||
BUG_ON(!(io_end->flag & EXT4_IO_END_DEFER_COMPLETION));
|
||||
list_del_init(&io_end->list);
|
||||
|
||||
err = ext4_end_io_end(io_end);
|
||||
@ -263,7 +292,8 @@ static int ext4_do_flush_completed_IO(struct inode *inode,
|
||||
}
|
||||
|
||||
/*
|
||||
* work on completed IO, to convert unwritten extents to extents
|
||||
* Used to convert unwritten extents to written extents upon IO completion,
|
||||
* or used to abort the journal upon IO errors.
|
||||
*/
|
||||
void ext4_end_io_rsv_work(struct work_struct *work)
|
||||
{
|
||||
@ -288,29 +318,25 @@ ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
|
||||
void ext4_put_io_end_defer(ext4_io_end_t *io_end)
|
||||
{
|
||||
if (refcount_dec_and_test(&io_end->count)) {
|
||||
if (!(io_end->flag & EXT4_IO_END_UNWRITTEN) ||
|
||||
list_empty(&io_end->list_vec)) {
|
||||
ext4_release_io_end(io_end);
|
||||
if (io_end->flag & EXT4_IO_END_FAILED ||
|
||||
(io_end->flag & EXT4_IO_END_UNWRITTEN &&
|
||||
!list_empty(&io_end->list_vec))) {
|
||||
ext4_add_complete_io(io_end);
|
||||
return;
|
||||
}
|
||||
ext4_add_complete_io(io_end);
|
||||
ext4_release_io_end(io_end);
|
||||
}
|
||||
}
|
||||
|
||||
int ext4_put_io_end(ext4_io_end_t *io_end)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
if (refcount_dec_and_test(&io_end->count)) {
|
||||
if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
|
||||
err = ext4_convert_unwritten_io_end_vec(io_end->handle,
|
||||
io_end);
|
||||
io_end->handle = NULL;
|
||||
ext4_clear_io_unwritten_flag(io_end);
|
||||
}
|
||||
if (ext4_io_end_defer_completion(io_end))
|
||||
return ext4_end_io_end(io_end);
|
||||
|
||||
ext4_release_io_end(io_end);
|
||||
}
|
||||
return err;
|
||||
return 0;
|
||||
}
|
||||
|
||||
ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end)
|
||||
@ -344,11 +370,12 @@ static void ext4_end_bio(struct bio *bio)
|
||||
bio->bi_status, inode->i_ino,
|
||||
(unsigned long long)
|
||||
bi_sector >> (inode->i_blkbits - 9));
|
||||
io_end->flag |= EXT4_IO_END_FAILED;
|
||||
mapping_set_error(inode->i_mapping,
|
||||
blk_status_to_errno(bio->bi_status));
|
||||
}
|
||||
|
||||
if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
|
||||
if (ext4_io_end_defer_completion(io_end)) {
|
||||
/*
|
||||
* Link bio into list hanging from io_end. We have to do it
|
||||
* atomically as bio completions can be racing against each
|
||||
|
@ -1118,7 +1118,7 @@ static inline void ext4_set_block_group_nr(struct super_block *sb, char *data,
|
||||
struct ext4_super_block *es = (struct ext4_super_block *) data;
|
||||
|
||||
es->s_block_group_nr = cpu_to_le16(group);
|
||||
if (ext4_has_metadata_csum(sb))
|
||||
if (ext4_has_feature_metadata_csum(sb))
|
||||
es->s_checksum = ext4_superblock_csum(sb, es);
|
||||
}
|
||||
|
||||
@ -1315,7 +1315,7 @@ static int ext4_set_bitmap_checksums(struct super_block *sb,
|
||||
{
|
||||
struct buffer_head *bh;
|
||||
|
||||
if (!ext4_has_metadata_csum(sb))
|
||||
if (!ext4_has_feature_metadata_csum(sb))
|
||||
return 0;
|
||||
|
||||
bh = ext4_get_bitmap(sb, group_data->inode_bitmap);
|
||||
|
267
fs/ext4/super.c
267
fs/ext4/super.c
@ -79,7 +79,6 @@ static int ext4_unfreeze(struct super_block *sb);
|
||||
static int ext4_freeze(struct super_block *sb);
|
||||
static inline int ext2_feature_set_ok(struct super_block *sb);
|
||||
static inline int ext3_feature_set_ok(struct super_block *sb);
|
||||
static void ext4_destroy_lazyinit_thread(void);
|
||||
static void ext4_unregister_li_request(struct super_block *sb);
|
||||
static void ext4_clear_request_list(void);
|
||||
static struct inode *ext4_get_journal_inode(struct super_block *sb,
|
||||
@ -302,7 +301,7 @@ __le32 ext4_superblock_csum(struct super_block *sb,
|
||||
static int ext4_superblock_csum_verify(struct super_block *sb,
|
||||
struct ext4_super_block *es)
|
||||
{
|
||||
if (!ext4_has_metadata_csum(sb))
|
||||
if (!ext4_has_feature_metadata_csum(sb))
|
||||
return 1;
|
||||
|
||||
return es->s_checksum == ext4_superblock_csum(sb, es);
|
||||
@ -312,7 +311,7 @@ void ext4_superblock_csum_set(struct super_block *sb)
|
||||
{
|
||||
struct ext4_super_block *es = EXT4_SB(sb)->s_es;
|
||||
|
||||
if (!ext4_has_metadata_csum(sb))
|
||||
if (!ext4_has_feature_metadata_csum(sb))
|
||||
return;
|
||||
|
||||
es->s_checksum = ext4_superblock_csum(sb, es);
|
||||
@ -448,9 +447,6 @@ static time64_t __ext4_get_tstamp(__le32 *lo, __u8 *hi)
|
||||
#define ext4_get_tstamp(es, tstamp) \
|
||||
__ext4_get_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi)
|
||||
|
||||
#define EXT4_SB_REFRESH_INTERVAL_SEC (3600) /* seconds (1 hour) */
|
||||
#define EXT4_SB_REFRESH_INTERVAL_KB (16384) /* kilobytes (16MB) */
|
||||
|
||||
/*
|
||||
* The ext4_maybe_update_superblock() function checks and updates the
|
||||
* superblock if needed.
|
||||
@ -458,8 +454,10 @@ static time64_t __ext4_get_tstamp(__le32 *lo, __u8 *hi)
|
||||
* This function is designed to update the on-disk superblock only under
|
||||
* certain conditions to prevent excessive disk writes and unnecessary
|
||||
* waking of the disk from sleep. The superblock will be updated if:
|
||||
* 1. More than an hour has passed since the last superblock update, and
|
||||
* 2. More than 16MB have been written since the last superblock update.
|
||||
* 1. More than sbi->s_sb_update_sec (def: 1 hour) has passed since the last
|
||||
* superblock update
|
||||
* 2. More than sbi->s_sb_update_kb (def: 16MB) kbs have been written since the
|
||||
* last superblock update.
|
||||
*
|
||||
* @sb: The superblock
|
||||
*/
|
||||
@ -473,14 +471,15 @@ static void ext4_maybe_update_superblock(struct super_block *sb)
|
||||
__u64 lifetime_write_kbytes;
|
||||
__u64 diff_size;
|
||||
|
||||
if (sb_rdonly(sb) || !(sb->s_flags & SB_ACTIVE) ||
|
||||
!journal || (journal->j_flags & JBD2_UNMOUNT))
|
||||
if (ext4_emergency_state(sb) || sb_rdonly(sb) ||
|
||||
!(sb->s_flags & SB_ACTIVE) || !journal ||
|
||||
journal->j_flags & JBD2_UNMOUNT)
|
||||
return;
|
||||
|
||||
now = ktime_get_real_seconds();
|
||||
last_update = ext4_get_tstamp(es, s_wtime);
|
||||
|
||||
if (likely(now - last_update < EXT4_SB_REFRESH_INTERVAL_SEC))
|
||||
if (likely(now - last_update < sbi->s_sb_update_sec))
|
||||
return;
|
||||
|
||||
lifetime_write_kbytes = sbi->s_kbytes_written +
|
||||
@ -495,32 +494,18 @@ static void ext4_maybe_update_superblock(struct super_block *sb)
|
||||
*/
|
||||
diff_size = lifetime_write_kbytes - le64_to_cpu(es->s_kbytes_written);
|
||||
|
||||
if (diff_size > EXT4_SB_REFRESH_INTERVAL_KB)
|
||||
if (diff_size > sbi->s_sb_update_kb)
|
||||
schedule_work(&EXT4_SB(sb)->s_sb_upd_work);
|
||||
}
|
||||
|
||||
static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
|
||||
{
|
||||
struct super_block *sb = journal->j_private;
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
int error = is_journal_aborted(journal);
|
||||
struct ext4_journal_cb_entry *jce;
|
||||
|
||||
BUG_ON(txn->t_state == T_FINISHED);
|
||||
|
||||
ext4_process_freed_data(sb, txn->t_tid);
|
||||
ext4_maybe_update_superblock(sb);
|
||||
|
||||
spin_lock(&sbi->s_md_lock);
|
||||
while (!list_empty(&txn->t_private_list)) {
|
||||
jce = list_entry(txn->t_private_list.next,
|
||||
struct ext4_journal_cb_entry, jce_list);
|
||||
list_del_init(&jce->jce_list);
|
||||
spin_unlock(&sbi->s_md_lock);
|
||||
jce->jce_func(sb, jce, error);
|
||||
spin_lock(&sbi->s_md_lock);
|
||||
}
|
||||
spin_unlock(&sbi->s_md_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -707,11 +692,8 @@ static void ext4_handle_error(struct super_block *sb, bool force_ro, int error,
|
||||
if (test_opt(sb, WARN_ON_ERROR))
|
||||
WARN_ON_ONCE(1);
|
||||
|
||||
if (!continue_fs && !sb_rdonly(sb)) {
|
||||
set_bit(EXT4_FLAGS_SHUTDOWN, &EXT4_SB(sb)->s_ext4_flags);
|
||||
if (journal)
|
||||
jbd2_journal_abort(journal, -EIO);
|
||||
}
|
||||
if (!continue_fs && !ext4_emergency_ro(sb) && journal)
|
||||
jbd2_journal_abort(journal, -EIO);
|
||||
|
||||
if (!bdev_read_only(sb->s_bdev)) {
|
||||
save_error_info(sb, error, ino, block, func, line);
|
||||
@ -719,9 +701,13 @@ static void ext4_handle_error(struct super_block *sb, bool force_ro, int error,
|
||||
* In case the fs should keep running, we need to writeout
|
||||
* superblock through the journal. Due to lock ordering
|
||||
* constraints, it may not be safe to do it right here so we
|
||||
* defer superblock flushing to a workqueue.
|
||||
* defer superblock flushing to a workqueue. We just need to be
|
||||
* careful when the journal is already shutting down. If we get
|
||||
* here in that case, just update the sb directly as the last
|
||||
* transaction won't commit anyway.
|
||||
*/
|
||||
if (continue_fs && journal)
|
||||
if (continue_fs && journal &&
|
||||
!ext4_test_mount_flag(sb, EXT4_MF_JOURNAL_DESTROY))
|
||||
schedule_work(&EXT4_SB(sb)->s_sb_upd_work);
|
||||
else
|
||||
ext4_commit_super(sb);
|
||||
@ -737,17 +723,17 @@ static void ext4_handle_error(struct super_block *sb, bool force_ro, int error,
|
||||
sb->s_id);
|
||||
}
|
||||
|
||||
if (sb_rdonly(sb) || continue_fs)
|
||||
if (ext4_emergency_ro(sb) || continue_fs)
|
||||
return;
|
||||
|
||||
ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
|
||||
/*
|
||||
* EXT4_FLAGS_SHUTDOWN was set which stops all filesystem
|
||||
* modifications. We don't set SB_RDONLY because that requires
|
||||
* sb->s_umount semaphore and setting it without proper remount
|
||||
* procedure is confusing code such as freeze_super() leading to
|
||||
* deadlocks and other problems.
|
||||
* We don't set SB_RDONLY because that requires sb->s_umount
|
||||
* semaphore and setting it without proper remount procedure is
|
||||
* confusing code such as freeze_super() leading to deadlocks
|
||||
* and other problems.
|
||||
*/
|
||||
set_bit(EXT4_FLAGS_EMERGENCY_RO, &EXT4_SB(sb)->s_ext4_flags);
|
||||
}
|
||||
|
||||
static void update_super_work(struct work_struct *work)
|
||||
@ -765,7 +751,8 @@ static void update_super_work(struct work_struct *work)
|
||||
* We use directly jbd2 functions here to avoid recursing back into
|
||||
* ext4 error handling code during handling of previous errors.
|
||||
*/
|
||||
if (!sb_rdonly(sbi->s_sb) && journal) {
|
||||
if (!ext4_emergency_state(sbi->s_sb) &&
|
||||
!sb_rdonly(sbi->s_sb) && journal) {
|
||||
struct buffer_head *sbh = sbi->s_sbh;
|
||||
bool call_notify_err = false;
|
||||
|
||||
@ -819,7 +806,7 @@ void __ext4_error(struct super_block *sb, const char *function,
|
||||
struct va_format vaf;
|
||||
va_list args;
|
||||
|
||||
if (unlikely(ext4_forced_shutdown(sb)))
|
||||
if (unlikely(ext4_emergency_state(sb)))
|
||||
return;
|
||||
|
||||
trace_ext4_error(sb, function, line);
|
||||
@ -844,7 +831,7 @@ void __ext4_error_inode(struct inode *inode, const char *function,
|
||||
va_list args;
|
||||
struct va_format vaf;
|
||||
|
||||
if (unlikely(ext4_forced_shutdown(inode->i_sb)))
|
||||
if (unlikely(ext4_emergency_state(inode->i_sb)))
|
||||
return;
|
||||
|
||||
trace_ext4_error(inode->i_sb, function, line);
|
||||
@ -879,7 +866,7 @@ void __ext4_error_file(struct file *file, const char *function,
|
||||
struct inode *inode = file_inode(file);
|
||||
char pathname[80], *path;
|
||||
|
||||
if (unlikely(ext4_forced_shutdown(inode->i_sb)))
|
||||
if (unlikely(ext4_emergency_state(inode->i_sb)))
|
||||
return;
|
||||
|
||||
trace_ext4_error(inode->i_sb, function, line);
|
||||
@ -959,7 +946,7 @@ void __ext4_std_error(struct super_block *sb, const char *function,
|
||||
char nbuf[16];
|
||||
const char *errstr;
|
||||
|
||||
if (unlikely(ext4_forced_shutdown(sb)))
|
||||
if (unlikely(ext4_emergency_state(sb)))
|
||||
return;
|
||||
|
||||
/* Special case: if the error is EROFS, and we're not already
|
||||
@ -1053,7 +1040,7 @@ __acquires(bitlock)
|
||||
struct va_format vaf;
|
||||
va_list args;
|
||||
|
||||
if (unlikely(ext4_forced_shutdown(sb)))
|
||||
if (unlikely(ext4_emergency_state(sb)))
|
||||
return;
|
||||
|
||||
trace_ext4_error(sb, function, line);
|
||||
@ -1306,18 +1293,17 @@ static void ext4_put_super(struct super_block *sb)
|
||||
ext4_unregister_li_request(sb);
|
||||
ext4_quotas_off(sb, EXT4_MAXQUOTAS);
|
||||
|
||||
flush_work(&sbi->s_sb_upd_work);
|
||||
destroy_workqueue(sbi->rsv_conversion_wq);
|
||||
ext4_release_orphan_info(sb);
|
||||
|
||||
if (sbi->s_journal) {
|
||||
aborted = is_journal_aborted(sbi->s_journal);
|
||||
err = jbd2_journal_destroy(sbi->s_journal);
|
||||
sbi->s_journal = NULL;
|
||||
err = ext4_journal_destroy(sbi, sbi->s_journal);
|
||||
if ((err < 0) && !aborted) {
|
||||
ext4_abort(sb, -err, "Couldn't clean up the journal");
|
||||
}
|
||||
}
|
||||
} else
|
||||
flush_work(&sbi->s_sb_upd_work);
|
||||
|
||||
ext4_es_unregister_shrinker(sbi);
|
||||
timer_shutdown_sync(&sbi->s_err_report);
|
||||
@ -1325,13 +1311,14 @@ static void ext4_put_super(struct super_block *sb)
|
||||
ext4_mb_release(sb);
|
||||
ext4_ext_release(sb);
|
||||
|
||||
if (!sb_rdonly(sb) && !aborted) {
|
||||
ext4_clear_feature_journal_needs_recovery(sb);
|
||||
ext4_clear_feature_orphan_present(sb);
|
||||
es->s_state = cpu_to_le16(sbi->s_mount_state);
|
||||
}
|
||||
if (!sb_rdonly(sb))
|
||||
if (!ext4_emergency_state(sb) && !sb_rdonly(sb)) {
|
||||
if (!aborted) {
|
||||
ext4_clear_feature_journal_needs_recovery(sb);
|
||||
ext4_clear_feature_orphan_present(sb);
|
||||
es->s_state = cpu_to_le16(sbi->s_mount_state);
|
||||
}
|
||||
ext4_commit_super(sb);
|
||||
}
|
||||
|
||||
ext4_group_desc_free(sbi);
|
||||
ext4_flex_groups_free(sbi);
|
||||
@ -1426,7 +1413,6 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
|
||||
spin_lock_init(&ei->i_completed_io_lock);
|
||||
ei->i_sync_tid = 0;
|
||||
ei->i_datasync_tid = 0;
|
||||
atomic_set(&ei->i_unwritten, 0);
|
||||
INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work);
|
||||
ext4_fc_init_inode(&ei->vfs_inode);
|
||||
mutex_init(&ei->i_fc_lock);
|
||||
@ -2785,6 +2771,13 @@ static int ext4_check_opt_consistency(struct fs_context *fc,
|
||||
}
|
||||
|
||||
if (is_remount) {
|
||||
if (!sbi->s_journal &&
|
||||
ctx_test_mount_opt(ctx, EXT4_MOUNT_DATA_ERR_ABORT)) {
|
||||
ext4_msg(NULL, KERN_WARNING,
|
||||
"Remounting fs w/o journal so ignoring data_err option");
|
||||
ctx_clear_mount_opt(ctx, EXT4_MOUNT_DATA_ERR_ABORT);
|
||||
}
|
||||
|
||||
if (ctx_test_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS) &&
|
||||
(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)) {
|
||||
ext4_msg(NULL, KERN_ERR, "can't mount with "
|
||||
@ -3038,6 +3031,12 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
|
||||
if (nodefs && !test_opt(sb, NO_PREFETCH_BLOCK_BITMAPS))
|
||||
SEQ_OPTS_PUTS("prefetch_block_bitmaps");
|
||||
|
||||
if (ext4_emergency_ro(sb))
|
||||
SEQ_OPTS_PUTS("emergency_ro");
|
||||
|
||||
if (ext4_forced_shutdown(sb))
|
||||
SEQ_OPTS_PUTS("shutdown");
|
||||
|
||||
ext4_show_quota_options(seq, sb);
|
||||
return 0;
|
||||
}
|
||||
@ -3205,7 +3204,7 @@ static __le16 ext4_group_desc_csum(struct super_block *sb, __u32 block_group,
|
||||
__le32 le_group = cpu_to_le32(block_group);
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
|
||||
if (ext4_has_metadata_csum(sbi->s_sb)) {
|
||||
if (ext4_has_feature_metadata_csum(sbi->s_sb)) {
|
||||
/* Use new metadata_csum algorithm */
|
||||
__u32 csum32;
|
||||
__u16 dummy_csum = 0;
|
||||
@ -3693,7 +3692,8 @@ static int ext4_run_li_request(struct ext4_li_request *elr)
|
||||
if (group >= elr->lr_next_group) {
|
||||
ret = 1;
|
||||
if (elr->lr_first_not_zeroed != ngroups &&
|
||||
!sb_rdonly(sb) && test_opt(sb, INIT_INODE_TABLE)) {
|
||||
!ext4_emergency_state(sb) && !sb_rdonly(sb) &&
|
||||
test_opt(sb, INIT_INODE_TABLE)) {
|
||||
elr->lr_next_group = elr->lr_first_not_zeroed;
|
||||
elr->lr_mode = EXT4_LI_MODE_ITABLE;
|
||||
ret = 0;
|
||||
@ -3998,7 +3998,7 @@ int ext4_register_li_request(struct super_block *sb,
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (sb_rdonly(sb) ||
|
||||
if (ext4_emergency_state(sb) || sb_rdonly(sb) ||
|
||||
(test_opt(sb, NO_PREFETCH_BLOCK_BITMAPS) &&
|
||||
(first_not_zeroed == ngroups || !test_opt(sb, INIT_INODE_TABLE))))
|
||||
goto out;
|
||||
@ -4061,7 +4061,7 @@ static int set_journal_csum_feature_set(struct super_block *sb)
|
||||
int compat, incompat;
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
|
||||
if (ext4_has_metadata_csum(sb)) {
|
||||
if (ext4_has_feature_metadata_csum(sb)) {
|
||||
/* journal checksum v3 */
|
||||
compat = 0;
|
||||
incompat = JBD2_FEATURE_INCOMPAT_CSUM_V3;
|
||||
@ -4349,7 +4349,7 @@ static void ext4_set_def_opts(struct super_block *sb,
|
||||
if (ext4_has_feature_fast_commit(sb))
|
||||
set_opt2(sb, JOURNAL_FAST_COMMIT);
|
||||
/* don't forget to enable journal_csum when metadata_csum is enabled. */
|
||||
if (ext4_has_metadata_csum(sb))
|
||||
if (ext4_has_feature_metadata_csum(sb))
|
||||
set_opt(sb, JOURNAL_CHECKSUM);
|
||||
|
||||
if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
|
||||
@ -4642,7 +4642,8 @@ static int ext4_init_metadata_csum(struct super_block *sb, struct ext4_super_blo
|
||||
/* Precompute checksum seed for all metadata */
|
||||
if (ext4_has_feature_csum_seed(sb))
|
||||
sbi->s_csum_seed = le32_to_cpu(es->s_checksum_seed);
|
||||
else if (ext4_has_metadata_csum(sb) || ext4_has_feature_ea_inode(sb))
|
||||
else if (ext4_has_feature_metadata_csum(sb) ||
|
||||
ext4_has_feature_ea_inode(sb))
|
||||
sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid,
|
||||
sizeof(es->s_uuid));
|
||||
return 0;
|
||||
@ -4973,10 +4974,7 @@ static int ext4_load_and_init_journal(struct super_block *sb,
|
||||
return 0;
|
||||
|
||||
out:
|
||||
/* flush s_sb_upd_work before destroying the journal. */
|
||||
flush_work(&sbi->s_sb_upd_work);
|
||||
jbd2_journal_destroy(sbi->s_journal);
|
||||
sbi->s_journal = NULL;
|
||||
ext4_journal_destroy(sbi, sbi->s_journal);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
@ -5013,6 +5011,24 @@ static int ext4_check_journal_data_mode(struct super_block *sb)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const char *ext4_has_journal_option(struct super_block *sb)
|
||||
{
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
|
||||
if (test_opt(sb, JOURNAL_ASYNC_COMMIT))
|
||||
return "journal_async_commit";
|
||||
if (test_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM))
|
||||
return "journal_checksum";
|
||||
if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ)
|
||||
return "commit=";
|
||||
if (EXT4_MOUNT_DATA_FLAGS &
|
||||
(sbi->s_mount_opt ^ sbi->s_def_mount_opt))
|
||||
return "data=";
|
||||
if (test_opt(sb, DATA_ERR_ABORT))
|
||||
return "data_err=abort";
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int ext4_load_super(struct super_block *sb, ext4_fsblk_t *lsb,
|
||||
int silent)
|
||||
{
|
||||
@ -5263,6 +5279,8 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
|
||||
sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ;
|
||||
sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
|
||||
sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
|
||||
sbi->s_sb_update_kb = EXT4_DEF_SB_UPDATE_INTERVAL_KB;
|
||||
sbi->s_sb_update_sec = EXT4_DEF_SB_UPDATE_INTERVAL_SEC;
|
||||
|
||||
/*
|
||||
* set default s_li_wait_mult for lazyinit, for the case there is
|
||||
@ -5404,30 +5422,17 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
|
||||
"suppressed and not mounted read-only");
|
||||
goto failed_mount3a;
|
||||
} else {
|
||||
const char *journal_option;
|
||||
|
||||
/* Nojournal mode, all journal mount options are illegal */
|
||||
if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
|
||||
ext4_msg(sb, KERN_ERR, "can't mount with "
|
||||
"journal_async_commit, fs mounted w/o journal");
|
||||
journal_option = ext4_has_journal_option(sb);
|
||||
if (journal_option != NULL) {
|
||||
ext4_msg(sb, KERN_ERR,
|
||||
"can't mount with %s, fs mounted w/o journal",
|
||||
journal_option);
|
||||
goto failed_mount3a;
|
||||
}
|
||||
|
||||
if (test_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM)) {
|
||||
ext4_msg(sb, KERN_ERR, "can't mount with "
|
||||
"journal_checksum, fs mounted w/o journal");
|
||||
goto failed_mount3a;
|
||||
}
|
||||
if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) {
|
||||
ext4_msg(sb, KERN_ERR, "can't mount with "
|
||||
"commit=%lu, fs mounted w/o journal",
|
||||
sbi->s_commit_interval / HZ);
|
||||
goto failed_mount3a;
|
||||
}
|
||||
if (EXT4_MOUNT_DATA_FLAGS &
|
||||
(sbi->s_mount_opt ^ sbi->s_def_mount_opt)) {
|
||||
ext4_msg(sb, KERN_ERR, "can't mount with "
|
||||
"data=, fs mounted w/o journal");
|
||||
goto failed_mount3a;
|
||||
}
|
||||
sbi->s_def_mount_opt &= ~EXT4_MOUNT_JOURNAL_CHECKSUM;
|
||||
clear_opt(sb, JOURNAL_CHECKSUM);
|
||||
clear_opt(sb, DATA_FLAGS);
|
||||
@ -5616,9 +5621,11 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
|
||||
goto failed_mount9;
|
||||
}
|
||||
|
||||
if (test_opt(sb, DISCARD) && !bdev_max_discard_sectors(sb->s_bdev))
|
||||
if (test_opt(sb, DISCARD) && !bdev_max_discard_sectors(sb->s_bdev)) {
|
||||
ext4_msg(sb, KERN_WARNING,
|
||||
"mounting with \"discard\" option, but the device does not support discard");
|
||||
clear_opt(sb, DISCARD);
|
||||
}
|
||||
|
||||
if (es->s_error_count)
|
||||
mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */
|
||||
@ -5665,10 +5672,7 @@ failed_mount_wq:
|
||||
sbi->s_ea_block_cache = NULL;
|
||||
|
||||
if (sbi->s_journal) {
|
||||
/* flush s_sb_upd_work before journal destroy. */
|
||||
flush_work(&sbi->s_sb_upd_work);
|
||||
jbd2_journal_destroy(sbi->s_journal);
|
||||
sbi->s_journal = NULL;
|
||||
ext4_journal_destroy(sbi, sbi->s_journal);
|
||||
}
|
||||
failed_mount3a:
|
||||
ext4_es_unregister_shrinker(sbi);
|
||||
@ -5773,10 +5777,6 @@ static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
|
||||
journal->j_flags |= JBD2_BARRIER;
|
||||
else
|
||||
journal->j_flags &= ~JBD2_BARRIER;
|
||||
if (test_opt(sb, DATA_ERR_ABORT))
|
||||
journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR;
|
||||
else
|
||||
journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR;
|
||||
/*
|
||||
* Always enable journal cycle record option, letting the journal
|
||||
* records log transactions continuously between each mount.
|
||||
@ -5973,7 +5973,7 @@ static journal_t *ext4_open_dev_journal(struct super_block *sb,
|
||||
return journal;
|
||||
|
||||
out_journal:
|
||||
jbd2_journal_destroy(journal);
|
||||
ext4_journal_destroy(EXT4_SB(sb), journal);
|
||||
out_bdev:
|
||||
bdev_fput(bdev_file);
|
||||
return ERR_PTR(errno);
|
||||
@ -6090,8 +6090,7 @@ static int ext4_load_journal(struct super_block *sb,
|
||||
EXT4_SB(sb)->s_journal = journal;
|
||||
err = ext4_clear_journal_err(sb, es);
|
||||
if (err) {
|
||||
EXT4_SB(sb)->s_journal = NULL;
|
||||
jbd2_journal_destroy(journal);
|
||||
ext4_journal_destroy(EXT4_SB(sb), journal);
|
||||
return err;
|
||||
}
|
||||
|
||||
@ -6109,7 +6108,7 @@ static int ext4_load_journal(struct super_block *sb,
|
||||
return 0;
|
||||
|
||||
err_out:
|
||||
jbd2_journal_destroy(journal);
|
||||
ext4_journal_destroy(EXT4_SB(sb), journal);
|
||||
return err;
|
||||
}
|
||||
|
||||
@ -6336,8 +6335,9 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
|
||||
bool needs_barrier = false;
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
|
||||
if (unlikely(ext4_forced_shutdown(sb)))
|
||||
return -EIO;
|
||||
ret = ext4_emergency_state(sb);
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
|
||||
trace_ext4_sync_fs(sb, wait);
|
||||
flush_workqueue(sbi->rsv_conversion_wq);
|
||||
@ -6419,7 +6419,7 @@ out:
|
||||
*/
|
||||
static int ext4_unfreeze(struct super_block *sb)
|
||||
{
|
||||
if (ext4_forced_shutdown(sb))
|
||||
if (ext4_emergency_state(sb))
|
||||
return 0;
|
||||
|
||||
if (EXT4_SB(sb)->s_journal) {
|
||||
@ -6575,7 +6575,7 @@ static int __ext4_remount(struct fs_context *fc, struct super_block *sb)
|
||||
flush_work(&sbi->s_sb_upd_work);
|
||||
|
||||
if ((bool)(fc->sb_flags & SB_RDONLY) != sb_rdonly(sb)) {
|
||||
if (ext4_forced_shutdown(sb)) {
|
||||
if (ext4_emergency_state(sb)) {
|
||||
err = -EROFS;
|
||||
goto restore_opts;
|
||||
}
|
||||
@ -6780,6 +6780,7 @@ static int ext4_reconfigure(struct fs_context *fc)
|
||||
{
|
||||
struct super_block *sb = fc->root->d_sb;
|
||||
int ret;
|
||||
bool old_ro = sb_rdonly(sb);
|
||||
|
||||
fc->s_fs_info = EXT4_SB(sb);
|
||||
|
||||
@ -6791,9 +6792,9 @@ static int ext4_reconfigure(struct fs_context *fc)
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
ext4_msg(sb, KERN_INFO, "re-mounted %pU %s. Quota mode: %s.",
|
||||
&sb->s_uuid, sb_rdonly(sb) ? "ro" : "r/w",
|
||||
ext4_quota_mode(sb));
|
||||
ext4_msg(sb, KERN_INFO, "re-mounted %pU%s.",
|
||||
&sb->s_uuid,
|
||||
(old_ro != sb_rdonly(sb)) ? (sb_rdonly(sb) ? " ro" : " r/w") : "");
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -6817,22 +6818,29 @@ static int ext4_statfs_project(struct super_block *sb,
|
||||
dquot->dq_dqb.dqb_bhardlimit);
|
||||
limit >>= sb->s_blocksize_bits;
|
||||
|
||||
if (limit && buf->f_blocks > limit) {
|
||||
if (limit) {
|
||||
uint64_t remaining = 0;
|
||||
|
||||
curblock = (dquot->dq_dqb.dqb_curspace +
|
||||
dquot->dq_dqb.dqb_rsvspace) >> sb->s_blocksize_bits;
|
||||
buf->f_blocks = limit;
|
||||
buf->f_bfree = buf->f_bavail =
|
||||
(buf->f_blocks > curblock) ?
|
||||
(buf->f_blocks - curblock) : 0;
|
||||
if (limit > curblock)
|
||||
remaining = limit - curblock;
|
||||
|
||||
buf->f_blocks = min(buf->f_blocks, limit);
|
||||
buf->f_bfree = min(buf->f_bfree, remaining);
|
||||
buf->f_bavail = min(buf->f_bavail, remaining);
|
||||
}
|
||||
|
||||
limit = min_not_zero(dquot->dq_dqb.dqb_isoftlimit,
|
||||
dquot->dq_dqb.dqb_ihardlimit);
|
||||
if (limit && buf->f_files > limit) {
|
||||
buf->f_files = limit;
|
||||
buf->f_ffree =
|
||||
(buf->f_files > dquot->dq_dqb.dqb_curinodes) ?
|
||||
(buf->f_files - dquot->dq_dqb.dqb_curinodes) : 0;
|
||||
if (limit) {
|
||||
uint64_t remaining = 0;
|
||||
|
||||
if (limit > dquot->dq_dqb.dqb_curinodes)
|
||||
remaining = limit - dquot->dq_dqb.dqb_curinodes;
|
||||
|
||||
buf->f_files = min(buf->f_files, limit);
|
||||
buf->f_ffree = min(buf->f_ffree, remaining);
|
||||
}
|
||||
|
||||
spin_unlock(&dquot->dq_dqb_lock);
|
||||
@ -6935,12 +6943,25 @@ static int ext4_release_dquot(struct dquot *dquot)
|
||||
{
|
||||
int ret, err;
|
||||
handle_t *handle;
|
||||
bool freeze_protected = false;
|
||||
|
||||
/*
|
||||
* Trying to sb_start_intwrite() in a running transaction
|
||||
* can result in a deadlock. Further, running transactions
|
||||
* are already protected from freezing.
|
||||
*/
|
||||
if (!ext4_journal_current_handle()) {
|
||||
sb_start_intwrite(dquot->dq_sb);
|
||||
freeze_protected = true;
|
||||
}
|
||||
|
||||
handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA,
|
||||
EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb));
|
||||
if (IS_ERR(handle)) {
|
||||
/* Release dquot anyway to avoid endless cycle in dqput() */
|
||||
dquot_release(dquot);
|
||||
if (freeze_protected)
|
||||
sb_end_intwrite(dquot->dq_sb);
|
||||
return PTR_ERR(handle);
|
||||
}
|
||||
ret = dquot_release(dquot);
|
||||
@ -6951,6 +6972,10 @@ static int ext4_release_dquot(struct dquot *dquot)
|
||||
err = ext4_journal_stop(handle);
|
||||
if (!ret)
|
||||
ret = err;
|
||||
|
||||
if (freeze_protected)
|
||||
sb_end_intwrite(dquot->dq_sb);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -7288,7 +7313,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
|
||||
}
|
||||
lock_buffer(bh);
|
||||
memcpy(bh->b_data+offset, data, len);
|
||||
flush_dcache_page(bh->b_page);
|
||||
flush_dcache_folio(bh->b_folio);
|
||||
unlock_buffer(bh);
|
||||
err = ext4_handle_dirty_metadata(handle, NULL, bh);
|
||||
brelse(bh);
|
||||
@ -7381,12 +7406,9 @@ static struct file_system_type ext4_fs_type = {
|
||||
};
|
||||
MODULE_ALIAS_FS("ext4");
|
||||
|
||||
/* Shared across all ext4 file systems */
|
||||
wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
|
||||
|
||||
static int __init ext4_init_fs(void)
|
||||
{
|
||||
int i, err;
|
||||
int err;
|
||||
|
||||
ratelimit_state_init(&ext4_mount_msg_ratelimit, 30 * HZ, 64);
|
||||
ext4_li_info = NULL;
|
||||
@ -7394,9 +7416,6 @@ static int __init ext4_init_fs(void)
|
||||
/* Build-time check for flags consistency */
|
||||
ext4_check_flag_values();
|
||||
|
||||
for (i = 0; i < EXT4_WQ_HASH_SZ; i++)
|
||||
init_waitqueue_head(&ext4__ioend_wq[i]);
|
||||
|
||||
err = ext4_init_es();
|
||||
if (err)
|
||||
return err;
|
||||
|
@ -254,6 +254,8 @@ EXT4_ATTR(journal_task, 0444, journal_task);
|
||||
EXT4_RW_ATTR_SBI_UI(mb_prefetch, s_mb_prefetch);
|
||||
EXT4_RW_ATTR_SBI_UI(mb_prefetch_limit, s_mb_prefetch_limit);
|
||||
EXT4_RW_ATTR_SBI_UL(last_trim_minblks, s_last_trim_minblks);
|
||||
EXT4_RW_ATTR_SBI_UI(sb_update_sec, s_sb_update_sec);
|
||||
EXT4_RW_ATTR_SBI_UI(sb_update_kb, s_sb_update_kb);
|
||||
|
||||
static unsigned int old_bump_val = 128;
|
||||
EXT4_ATTR_PTR(max_writeback_mb_bump, 0444, pointer_ui, &old_bump_val);
|
||||
@ -305,6 +307,8 @@ static struct attribute *ext4_attrs[] = {
|
||||
ATTR_LIST(mb_prefetch),
|
||||
ATTR_LIST(mb_prefetch_limit),
|
||||
ATTR_LIST(last_trim_minblks),
|
||||
ATTR_LIST(sb_update_sec),
|
||||
ATTR_LIST(sb_update_kb),
|
||||
NULL,
|
||||
};
|
||||
ATTRIBUTE_GROUPS(ext4);
|
||||
|
@ -156,7 +156,7 @@ static int ext4_xattr_block_csum_verify(struct inode *inode,
|
||||
struct ext4_xattr_header *hdr = BHDR(bh);
|
||||
int ret = 1;
|
||||
|
||||
if (ext4_has_metadata_csum(inode->i_sb)) {
|
||||
if (ext4_has_feature_metadata_csum(inode->i_sb)) {
|
||||
lock_buffer(bh);
|
||||
ret = (hdr->h_checksum == ext4_xattr_block_csum(inode,
|
||||
bh->b_blocknr, hdr));
|
||||
@ -168,7 +168,7 @@ static int ext4_xattr_block_csum_verify(struct inode *inode,
|
||||
static void ext4_xattr_block_csum_set(struct inode *inode,
|
||||
struct buffer_head *bh)
|
||||
{
|
||||
if (ext4_has_metadata_csum(inode->i_sb))
|
||||
if (ext4_has_feature_metadata_csum(inode->i_sb))
|
||||
BHDR(bh)->h_checksum = ext4_xattr_block_csum(inode,
|
||||
bh->b_blocknr, BHDR(bh));
|
||||
}
|
||||
@ -308,7 +308,7 @@ __ext4_xattr_check_block(struct inode *inode, struct buffer_head *bh,
|
||||
__ext4_xattr_check_block((inode), (bh), __func__, __LINE__)
|
||||
|
||||
|
||||
static inline int
|
||||
int
|
||||
__xattr_check_inode(struct inode *inode, struct ext4_xattr_ibody_header *header,
|
||||
void *end, const char *function, unsigned int line)
|
||||
{
|
||||
@ -316,9 +316,6 @@ __xattr_check_inode(struct inode *inode, struct ext4_xattr_ibody_header *header,
|
||||
function, line);
|
||||
}
|
||||
|
||||
#define xattr_check_inode(inode, header, end) \
|
||||
__xattr_check_inode((inode), (header), (end), __func__, __LINE__)
|
||||
|
||||
static int
|
||||
xattr_find_entry(struct inode *inode, struct ext4_xattr_entry **pentry,
|
||||
void *end, int name_index, const char *name, int sorted)
|
||||
@ -649,10 +646,7 @@ ext4_xattr_ibody_get(struct inode *inode, int name_index, const char *name,
|
||||
return error;
|
||||
raw_inode = ext4_raw_inode(&iloc);
|
||||
header = IHDR(inode, raw_inode);
|
||||
end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
|
||||
error = xattr_check_inode(inode, header, end);
|
||||
if (error)
|
||||
goto cleanup;
|
||||
end = ITAIL(inode, raw_inode);
|
||||
entry = IFIRST(header);
|
||||
error = xattr_find_entry(inode, &entry, end, name_index, name, 0);
|
||||
if (error)
|
||||
@ -783,7 +777,6 @@ ext4_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size)
|
||||
struct ext4_xattr_ibody_header *header;
|
||||
struct ext4_inode *raw_inode;
|
||||
struct ext4_iloc iloc;
|
||||
void *end;
|
||||
int error;
|
||||
|
||||
if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR))
|
||||
@ -793,14 +786,9 @@ ext4_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size)
|
||||
return error;
|
||||
raw_inode = ext4_raw_inode(&iloc);
|
||||
header = IHDR(inode, raw_inode);
|
||||
end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
|
||||
error = xattr_check_inode(inode, header, end);
|
||||
if (error)
|
||||
goto cleanup;
|
||||
error = ext4_xattr_list_entries(dentry, IFIRST(header),
|
||||
buffer, buffer_size);
|
||||
|
||||
cleanup:
|
||||
brelse(iloc.bh);
|
||||
return error;
|
||||
}
|
||||
@ -868,7 +856,6 @@ int ext4_get_inode_usage(struct inode *inode, qsize_t *usage)
|
||||
struct ext4_xattr_ibody_header *header;
|
||||
struct ext4_xattr_entry *entry;
|
||||
qsize_t ea_inode_refs = 0;
|
||||
void *end;
|
||||
int ret;
|
||||
|
||||
lockdep_assert_held_read(&EXT4_I(inode)->xattr_sem);
|
||||
@ -879,10 +866,6 @@ int ext4_get_inode_usage(struct inode *inode, qsize_t *usage)
|
||||
goto out;
|
||||
raw_inode = ext4_raw_inode(&iloc);
|
||||
header = IHDR(inode, raw_inode);
|
||||
end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
|
||||
ret = xattr_check_inode(inode, header, end);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
for (entry = IFIRST(header); !IS_LAST_ENTRY(entry);
|
||||
entry = EXT4_XATTR_NEXT(entry))
|
||||
@ -1176,15 +1159,24 @@ ext4_xattr_inode_dec_ref_all(handle_t *handle, struct inode *parent,
|
||||
{
|
||||
struct inode *ea_inode;
|
||||
struct ext4_xattr_entry *entry;
|
||||
struct ext4_iloc iloc;
|
||||
bool dirty = false;
|
||||
unsigned int ea_ino;
|
||||
int err;
|
||||
int credits;
|
||||
void *end;
|
||||
|
||||
if (block_csum)
|
||||
end = (void *)bh->b_data + bh->b_size;
|
||||
else {
|
||||
ext4_get_inode_loc(parent, &iloc);
|
||||
end = (void *)ext4_raw_inode(&iloc) + EXT4_SB(parent->i_sb)->s_inode_size;
|
||||
}
|
||||
|
||||
/* One credit for dec ref on ea_inode, one for orphan list addition, */
|
||||
credits = 2 + extra_credits;
|
||||
|
||||
for (entry = first; !IS_LAST_ENTRY(entry);
|
||||
for (entry = first; (void *)entry < end && !IS_LAST_ENTRY(entry);
|
||||
entry = EXT4_XATTR_NEXT(entry)) {
|
||||
if (!entry->e_value_inum)
|
||||
continue;
|
||||
@ -2235,11 +2227,8 @@ int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i,
|
||||
header = IHDR(inode, raw_inode);
|
||||
is->s.base = is->s.first = IFIRST(header);
|
||||
is->s.here = is->s.first;
|
||||
is->s.end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
|
||||
is->s.end = ITAIL(inode, raw_inode);
|
||||
if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) {
|
||||
error = xattr_check_inode(inode, header, is->s.end);
|
||||
if (error)
|
||||
return error;
|
||||
/* Find the named attribute. */
|
||||
error = xattr_find_entry(inode, &is->s.here, is->s.end,
|
||||
i->name_index, i->name, 0);
|
||||
@ -2786,14 +2775,10 @@ retry:
|
||||
*/
|
||||
|
||||
base = IFIRST(header);
|
||||
end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
|
||||
end = ITAIL(inode, raw_inode);
|
||||
min_offs = end - base;
|
||||
total_ino = sizeof(struct ext4_xattr_ibody_header) + sizeof(u32);
|
||||
|
||||
error = xattr_check_inode(inode, header, end);
|
||||
if (error)
|
||||
goto cleanup;
|
||||
|
||||
ifree = ext4_xattr_free_space(base, &min_offs, base, &total_ino);
|
||||
if (ifree >= isize_diff)
|
||||
goto shift;
|
||||
|
@ -67,6 +67,9 @@ struct ext4_xattr_entry {
|
||||
((void *)raw_inode + \
|
||||
EXT4_GOOD_OLD_INODE_SIZE + \
|
||||
EXT4_I(inode)->i_extra_isize))
|
||||
#define ITAIL(inode, raw_inode) \
|
||||
((void *)(raw_inode) + \
|
||||
EXT4_SB((inode)->i_sb)->s_inode_size)
|
||||
#define IFIRST(hdr) ((struct ext4_xattr_entry *)((hdr)+1))
|
||||
|
||||
/*
|
||||
@ -206,6 +209,13 @@ extern int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode,
|
||||
extern struct mb_cache *ext4_xattr_create_cache(void);
|
||||
extern void ext4_xattr_destroy_cache(struct mb_cache *);
|
||||
|
||||
extern int
|
||||
__xattr_check_inode(struct inode *inode, struct ext4_xattr_ibody_header *header,
|
||||
void *end, const char *function, unsigned int line);
|
||||
|
||||
#define xattr_check_inode(inode, header, end) \
|
||||
__xattr_check_inode((inode), (header), (end), __func__, __LINE__)
|
||||
|
||||
#ifdef CONFIG_EXT4_FS_SECURITY
|
||||
extern int ext4_init_security(handle_t *handle, struct inode *inode,
|
||||
struct inode *dir, const struct qstr *qstr);
|
||||
|
@ -57,8 +57,8 @@ static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
|
||||
* So here, we have a buffer which has just come off the forget list. Look to
|
||||
* see if we can strip all buffers from the backing page.
|
||||
*
|
||||
* Called under lock_journal(), and possibly under journal_datalist_lock. The
|
||||
* caller provided us with a ref against the buffer, and we drop that here.
|
||||
* Called under j_list_lock. The caller provided us with a ref against the
|
||||
* buffer, and we drop that here.
|
||||
*/
|
||||
static void release_buffer_page(struct buffer_head *bh)
|
||||
{
|
||||
@ -738,10 +738,8 @@ start_journal_io:
|
||||
err = journal_finish_inode_data_buffers(journal, commit_transaction);
|
||||
if (err) {
|
||||
printk(KERN_WARNING
|
||||
"JBD2: Detected IO errors while flushing file data "
|
||||
"on %s\n", journal->j_devname);
|
||||
if (journal->j_flags & JBD2_ABORT_ON_SYNCDATA_ERR)
|
||||
jbd2_journal_abort(journal, err);
|
||||
"JBD2: Detected IO errors %d while flushing file data on %s\n",
|
||||
err, journal->j_devname);
|
||||
err = 0;
|
||||
}
|
||||
|
||||
|
@ -603,7 +603,7 @@ int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid)
|
||||
int jbd2_trans_will_send_data_barrier(journal_t *journal, tid_t tid)
|
||||
{
|
||||
int ret = 0;
|
||||
transaction_t *commit_trans;
|
||||
transaction_t *commit_trans, *running_trans;
|
||||
|
||||
if (!(journal->j_flags & JBD2_BARRIER))
|
||||
return 0;
|
||||
@ -613,6 +613,16 @@ int jbd2_trans_will_send_data_barrier(journal_t *journal, tid_t tid)
|
||||
goto out;
|
||||
commit_trans = journal->j_committing_transaction;
|
||||
if (!commit_trans || commit_trans->t_tid != tid) {
|
||||
running_trans = journal->j_running_transaction;
|
||||
/*
|
||||
* The query transaction hasn't started committing,
|
||||
* it must still be running.
|
||||
*/
|
||||
if (WARN_ON_ONCE(!running_trans ||
|
||||
running_trans->t_tid != tid))
|
||||
goto out;
|
||||
|
||||
running_trans->t_need_data_flush = 1;
|
||||
ret = 1;
|
||||
goto out;
|
||||
}
|
||||
@ -947,7 +957,7 @@ int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr,
|
||||
* descriptor blocks we do need to generate bona fide buffers.
|
||||
*
|
||||
* After the caller of jbd2_journal_get_descriptor_buffer() has finished modifying
|
||||
* the buffer's contents they really should run flush_dcache_page(bh->b_page).
|
||||
* the buffer's contents they really should run flush_dcache_folio(bh->b_folio).
|
||||
* But we don't bother doing that, so there will be coherency problems with
|
||||
* mmaps of blockdevs which hold live JBD-controlled filesystems.
|
||||
*/
|
||||
@ -1361,7 +1371,7 @@ static int journal_check_superblock(journal_t *journal)
|
||||
return err;
|
||||
}
|
||||
|
||||
if (jbd2_journal_has_csum_v2or3_feature(journal) &&
|
||||
if (jbd2_journal_has_csum_v2or3(journal) &&
|
||||
jbd2_has_feature_checksum(journal)) {
|
||||
/* Can't have checksum v1 and v2 on at the same time! */
|
||||
printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2/3 "
|
||||
@ -1369,7 +1379,7 @@ static int journal_check_superblock(journal_t *journal)
|
||||
return err;
|
||||
}
|
||||
|
||||
if (jbd2_journal_has_csum_v2or3_feature(journal)) {
|
||||
if (jbd2_journal_has_csum_v2or3(journal)) {
|
||||
if (sb->s_checksum_type != JBD2_CRC32C_CHKSUM) {
|
||||
printk(KERN_ERR "JBD2: Unknown checksum type\n");
|
||||
return err;
|
||||
@ -1869,7 +1879,6 @@ int jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid,
|
||||
|
||||
/* Log is no longer empty */
|
||||
write_lock(&journal->j_state_lock);
|
||||
WARN_ON(!sb->s_sequence);
|
||||
journal->j_flags &= ~JBD2_FLUSHED;
|
||||
write_unlock(&journal->j_state_lock);
|
||||
|
||||
@ -1965,17 +1974,15 @@ static int __jbd2_journal_erase(journal_t *journal, unsigned int flags)
|
||||
return err;
|
||||
}
|
||||
|
||||
if (block_start == ~0ULL) {
|
||||
block_start = phys_block;
|
||||
block_stop = block_start - 1;
|
||||
}
|
||||
if (block_start == ~0ULL)
|
||||
block_stop = block_start = phys_block;
|
||||
|
||||
/*
|
||||
* last block not contiguous with current block,
|
||||
* process last contiguous region and return to this block on
|
||||
* next loop
|
||||
*/
|
||||
if (phys_block != block_stop + 1) {
|
||||
if (phys_block != block_stop) {
|
||||
block--;
|
||||
} else {
|
||||
block_stop++;
|
||||
@ -1994,11 +2001,10 @@ static int __jbd2_journal_erase(journal_t *journal, unsigned int flags)
|
||||
*/
|
||||
byte_start = block_start * journal->j_blocksize;
|
||||
byte_stop = block_stop * journal->j_blocksize;
|
||||
byte_count = (block_stop - block_start + 1) *
|
||||
journal->j_blocksize;
|
||||
byte_count = (block_stop - block_start) * journal->j_blocksize;
|
||||
|
||||
truncate_inode_pages_range(journal->j_dev->bd_mapping,
|
||||
byte_start, byte_stop);
|
||||
byte_start, byte_stop - 1);
|
||||
|
||||
if (flags & JBD2_JOURNAL_FLUSH_DISCARD) {
|
||||
err = blkdev_issue_discard(journal->j_dev,
|
||||
@ -2013,7 +2019,7 @@ static int __jbd2_journal_erase(journal_t *journal, unsigned int flags)
|
||||
}
|
||||
|
||||
if (unlikely(err != 0)) {
|
||||
pr_err("JBD2: (error %d) unable to wipe journal at physical blocks %llu - %llu",
|
||||
pr_err("JBD2: (error %d) unable to wipe journal at physical blocks [%llu, %llu)",
|
||||
err, block_start, block_stop);
|
||||
return err;
|
||||
}
|
||||
|
@ -39,7 +39,7 @@ struct recovery_info
|
||||
|
||||
static int do_one_pass(journal_t *journal,
|
||||
struct recovery_info *info, enum passtype pass);
|
||||
static int scan_revoke_records(journal_t *, struct buffer_head *,
|
||||
static int scan_revoke_records(journal_t *, enum passtype, struct buffer_head *,
|
||||
tid_t, struct recovery_info *);
|
||||
|
||||
#ifdef __KERNEL__
|
||||
@ -65,9 +65,8 @@ static void journal_brelse_array(struct buffer_head *b[], int n)
|
||||
*/
|
||||
|
||||
#define MAXBUF 8
|
||||
static int do_readahead(journal_t *journal, unsigned int start)
|
||||
static void do_readahead(journal_t *journal, unsigned int start)
|
||||
{
|
||||
int err;
|
||||
unsigned int max, nbufs, next;
|
||||
unsigned long long blocknr;
|
||||
struct buffer_head *bh;
|
||||
@ -85,7 +84,7 @@ static int do_readahead(journal_t *journal, unsigned int start)
|
||||
nbufs = 0;
|
||||
|
||||
for (next = start; next < max; next++) {
|
||||
err = jbd2_journal_bmap(journal, next, &blocknr);
|
||||
int err = jbd2_journal_bmap(journal, next, &blocknr);
|
||||
|
||||
if (err) {
|
||||
printk(KERN_ERR "JBD2: bad block at offset %u\n",
|
||||
@ -94,10 +93,8 @@ static int do_readahead(journal_t *journal, unsigned int start)
|
||||
}
|
||||
|
||||
bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
|
||||
if (!bh) {
|
||||
err = -ENOMEM;
|
||||
if (!bh)
|
||||
goto failed;
|
||||
}
|
||||
|
||||
if (!buffer_uptodate(bh) && !buffer_locked(bh)) {
|
||||
bufs[nbufs++] = bh;
|
||||
@ -112,12 +109,10 @@ static int do_readahead(journal_t *journal, unsigned int start)
|
||||
|
||||
if (nbufs)
|
||||
bh_readahead_batch(nbufs, bufs, 0);
|
||||
err = 0;
|
||||
|
||||
failed:
|
||||
if (nbufs)
|
||||
journal_brelse_array(bufs, nbufs);
|
||||
return err;
|
||||
}
|
||||
|
||||
#endif /* __KERNEL__ */
|
||||
@ -287,19 +282,20 @@ static int fc_do_one_pass(journal_t *journal,
|
||||
int jbd2_journal_recover(journal_t *journal)
|
||||
{
|
||||
int err, err2;
|
||||
journal_superblock_t * sb;
|
||||
|
||||
struct recovery_info info;
|
||||
|
||||
memset(&info, 0, sizeof(info));
|
||||
sb = journal->j_superblock;
|
||||
|
||||
/*
|
||||
* The journal superblock's s_start field (the current log head)
|
||||
* is always zero if, and only if, the journal was cleanly
|
||||
* unmounted.
|
||||
* unmounted. We use its in-memory version j_tail here because
|
||||
* jbd2_journal_wipe() could have updated it without updating journal
|
||||
* superblock.
|
||||
*/
|
||||
if (!sb->s_start) {
|
||||
if (!journal->j_tail) {
|
||||
journal_superblock_t *sb = journal->j_superblock;
|
||||
|
||||
jbd2_debug(1, "No recovery required, last transaction %d, head block %u\n",
|
||||
be32_to_cpu(sb->s_sequence), be32_to_cpu(sb->s_head));
|
||||
journal->j_transaction_sequence = be32_to_cpu(sb->s_sequence) + 1;
|
||||
@ -327,6 +323,12 @@ int jbd2_journal_recover(journal_t *journal)
|
||||
journal->j_transaction_sequence, journal->j_head);
|
||||
|
||||
jbd2_journal_clear_revoke(journal);
|
||||
/* Free revoke table allocated for replay */
|
||||
if (journal->j_revoke != journal->j_revoke_table[0] &&
|
||||
journal->j_revoke != journal->j_revoke_table[1]) {
|
||||
jbd2_journal_destroy_revoke_table(journal->j_revoke);
|
||||
journal->j_revoke = journal->j_revoke_table[1];
|
||||
}
|
||||
err2 = sync_blockdev(journal->j_fs_dev);
|
||||
if (!err)
|
||||
err = err2;
|
||||
@ -612,6 +614,31 @@ static int do_one_pass(journal_t *journal,
|
||||
first_commit_ID = next_commit_ID;
|
||||
if (pass == PASS_SCAN)
|
||||
info->start_transaction = first_commit_ID;
|
||||
else if (pass == PASS_REVOKE) {
|
||||
/*
|
||||
* Would the default revoke table have too long hash chains
|
||||
* during replay?
|
||||
*/
|
||||
if (info->nr_revokes > JOURNAL_REVOKE_DEFAULT_HASH * 16) {
|
||||
unsigned int hash_size;
|
||||
|
||||
/*
|
||||
* Aim for average chain length of 8, limit at 1M
|
||||
* entries to avoid problems with malicious
|
||||
* filesystems.
|
||||
*/
|
||||
hash_size = min(roundup_pow_of_two(info->nr_revokes / 8),
|
||||
1U << 20);
|
||||
journal->j_revoke =
|
||||
jbd2_journal_init_revoke_table(hash_size);
|
||||
if (!journal->j_revoke) {
|
||||
printk(KERN_ERR
|
||||
"JBD2: failed to allocate revoke table for replay with %u entries. "
|
||||
"Journal replay may be slow.\n", hash_size);
|
||||
journal->j_revoke = journal->j_revoke_table[1];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
jbd2_debug(1, "Starting recovery pass %d\n", pass);
|
||||
|
||||
@ -851,6 +878,13 @@ chksum_ok:
|
||||
continue;
|
||||
|
||||
case JBD2_REVOKE_BLOCK:
|
||||
/*
|
||||
* If we aren't in the SCAN or REVOKE pass, then we can
|
||||
* just skip over this block.
|
||||
*/
|
||||
if (pass != PASS_REVOKE && pass != PASS_SCAN)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Check revoke block crc in pass_scan, if csum verify
|
||||
* failed, check commit block time later.
|
||||
@ -863,12 +897,7 @@ chksum_ok:
|
||||
need_check_commit_time = true;
|
||||
}
|
||||
|
||||
/* If we aren't in the REVOKE pass, then we can
|
||||
* just skip over this block. */
|
||||
if (pass != PASS_REVOKE)
|
||||
continue;
|
||||
|
||||
err = scan_revoke_records(journal, bh,
|
||||
err = scan_revoke_records(journal, pass, bh,
|
||||
next_commit_ID, info);
|
||||
if (err)
|
||||
goto failed;
|
||||
@ -922,8 +951,9 @@ chksum_ok:
|
||||
|
||||
/* Scan a revoke record, marking all blocks mentioned as revoked. */
|
||||
|
||||
static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
|
||||
tid_t sequence, struct recovery_info *info)
|
||||
static int scan_revoke_records(journal_t *journal, enum passtype pass,
|
||||
struct buffer_head *bh, tid_t sequence,
|
||||
struct recovery_info *info)
|
||||
{
|
||||
jbd2_journal_revoke_header_t *header;
|
||||
int offset, max;
|
||||
@ -944,6 +974,11 @@ static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
|
||||
if (jbd2_has_feature_64bit(journal))
|
||||
record_len = 8;
|
||||
|
||||
if (pass == PASS_SCAN) {
|
||||
info->nr_revokes += (max - offset) / record_len;
|
||||
return 0;
|
||||
}
|
||||
|
||||
while (offset + record_len <= max) {
|
||||
unsigned long long blocknr;
|
||||
int err;
|
||||
@ -956,7 +991,6 @@ static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
|
||||
err = jbd2_journal_set_revoke(journal, blocknr, sequence);
|
||||
if (err)
|
||||
return err;
|
||||
++info->nr_revokes;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
@ -215,7 +215,7 @@ int __init jbd2_journal_init_revoke_table_cache(void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct jbd2_revoke_table_s *jbd2_journal_init_revoke_table(int hash_size)
|
||||
struct jbd2_revoke_table_s *jbd2_journal_init_revoke_table(int hash_size)
|
||||
{
|
||||
int shift = 0;
|
||||
int tmp = hash_size;
|
||||
@ -231,7 +231,7 @@ static struct jbd2_revoke_table_s *jbd2_journal_init_revoke_table(int hash_size)
|
||||
table->hash_size = hash_size;
|
||||
table->hash_shift = shift;
|
||||
table->hash_table =
|
||||
kmalloc_array(hash_size, sizeof(struct list_head), GFP_KERNEL);
|
||||
kvmalloc_array(hash_size, sizeof(struct list_head), GFP_KERNEL);
|
||||
if (!table->hash_table) {
|
||||
kmem_cache_free(jbd2_revoke_table_cache, table);
|
||||
table = NULL;
|
||||
@ -245,7 +245,7 @@ out:
|
||||
return table;
|
||||
}
|
||||
|
||||
static void jbd2_journal_destroy_revoke_table(struct jbd2_revoke_table_s *table)
|
||||
void jbd2_journal_destroy_revoke_table(struct jbd2_revoke_table_s *table)
|
||||
{
|
||||
int i;
|
||||
struct list_head *hash_list;
|
||||
@ -255,7 +255,7 @@ static void jbd2_journal_destroy_revoke_table(struct jbd2_revoke_table_s *table)
|
||||
J_ASSERT(list_empty(hash_list));
|
||||
}
|
||||
|
||||
kfree(table->hash_table);
|
||||
kvfree(table->hash_table);
|
||||
kmem_cache_free(jbd2_revoke_table_cache, table);
|
||||
}
|
||||
|
||||
@ -420,12 +420,11 @@ int jbd2_journal_revoke(handle_t *handle, unsigned long long blocknr,
|
||||
* do not trust the Revoked bit on buffers unless RevokeValid is also
|
||||
* set.
|
||||
*/
|
||||
int jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh)
|
||||
void jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh)
|
||||
{
|
||||
struct jbd2_revoke_record_s *record;
|
||||
journal_t *journal = handle->h_transaction->t_journal;
|
||||
int need_cancel;
|
||||
int did_revoke = 0; /* akpm: debug */
|
||||
struct buffer_head *bh = jh2bh(jh);
|
||||
|
||||
jbd2_debug(4, "journal_head %p, cancelling revoke\n", jh);
|
||||
@ -450,7 +449,6 @@ int jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh)
|
||||
list_del(&record->hash);
|
||||
spin_unlock(&journal->j_revoke_lock);
|
||||
kmem_cache_free(jbd2_revoke_record_cache, record);
|
||||
did_revoke = 1;
|
||||
}
|
||||
}
|
||||
|
||||
@ -473,11 +471,10 @@ int jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh)
|
||||
__brelse(bh2);
|
||||
}
|
||||
}
|
||||
return did_revoke;
|
||||
}
|
||||
|
||||
/*
|
||||
* journal_clear_revoked_flag clears revoked flag of buffers in
|
||||
* jbd2_clear_buffer_revoked_flags clears revoked flag of buffers in
|
||||
* revoke table to reflect there is no revoked buffers in the next
|
||||
* transaction which is going to be started.
|
||||
*/
|
||||
@ -506,9 +503,9 @@ void jbd2_clear_buffer_revoked_flags(journal_t *journal)
|
||||
}
|
||||
}
|
||||
|
||||
/* journal_switch_revoke table select j_revoke for next transaction
|
||||
* we do not want to suspend any processing until all revokes are
|
||||
* written -bzzz
|
||||
/* jbd2_journal_switch_revoke_table table select j_revoke for next
|
||||
* transaction we do not want to suspend any processing until all
|
||||
* revokes are written -bzzz
|
||||
*/
|
||||
void jbd2_journal_switch_revoke_table(journal_t *journal)
|
||||
{
|
||||
|
@ -92,7 +92,6 @@ static void jbd2_get_transaction(journal_t *journal,
|
||||
atomic_set(&transaction->t_outstanding_revokes, 0);
|
||||
atomic_set(&transaction->t_handle_count, 0);
|
||||
INIT_LIST_HEAD(&transaction->t_inode_list);
|
||||
INIT_LIST_HEAD(&transaction->t_private_list);
|
||||
|
||||
/* Set up the commit timer for the new transaction. */
|
||||
journal->j_commit_timer.expires = round_jiffies_up(transaction->t_expires);
|
||||
@ -114,12 +113,9 @@ static void jbd2_get_transaction(journal_t *journal,
|
||||
*/
|
||||
|
||||
/*
|
||||
* Update transaction's maximum wait time, if debugging is enabled.
|
||||
*
|
||||
* t_max_wait is carefully updated here with use of atomic compare exchange.
|
||||
* Note that there could be multiplre threads trying to do this simultaneously
|
||||
* hence using cmpxchg to avoid any use of locks in this case.
|
||||
* With this t_max_wait can be updated w/o enabling jbd2_journal_enable_debug.
|
||||
*/
|
||||
static inline void update_t_max_wait(transaction_t *transaction,
|
||||
unsigned long ts)
|
||||
@ -2079,21 +2075,6 @@ static void __jbd2_journal_unfile_buffer(struct journal_head *jh)
|
||||
jh->b_transaction = NULL;
|
||||
}
|
||||
|
||||
void jbd2_journal_unfile_buffer(journal_t *journal, struct journal_head *jh)
|
||||
{
|
||||
struct buffer_head *bh = jh2bh(jh);
|
||||
|
||||
/* Get reference so that buffer cannot be freed before we unlock it */
|
||||
get_bh(bh);
|
||||
spin_lock(&jh->b_state_lock);
|
||||
spin_lock(&journal->j_list_lock);
|
||||
__jbd2_journal_unfile_buffer(jh);
|
||||
spin_unlock(&journal->j_list_lock);
|
||||
spin_unlock(&jh->b_state_lock);
|
||||
jbd2_journal_put_journal_head(jh);
|
||||
__brelse(bh);
|
||||
}
|
||||
|
||||
/**
|
||||
* jbd2_journal_try_to_free_buffers() - try to free page buffers.
|
||||
* @journal: journal for operation
|
||||
@ -2192,7 +2173,7 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
|
||||
/*
|
||||
* We don't want to write the buffer anymore, clear the
|
||||
* bit so that we don't confuse checks in
|
||||
* __journal_file_buffer
|
||||
* __jbd2_journal_file_buffer
|
||||
*/
|
||||
clear_buffer_dirty(bh);
|
||||
__jbd2_journal_file_buffer(jh, transaction, BJ_Forget);
|
||||
|
@ -1248,11 +1248,19 @@ extern int send_sigurg(struct file *file);
|
||||
#define SB_NOUSER BIT(31)
|
||||
|
||||
/* These flags relate to encoding and casefolding */
|
||||
#define SB_ENC_STRICT_MODE_FL (1 << 0)
|
||||
#define SB_ENC_STRICT_MODE_FL (1 << 0)
|
||||
#define SB_ENC_NO_COMPAT_FALLBACK_FL (1 << 1)
|
||||
|
||||
#define sb_has_strict_encoding(sb) \
|
||||
(sb->s_encoding_flags & SB_ENC_STRICT_MODE_FL)
|
||||
|
||||
#if IS_ENABLED(CONFIG_UNICODE)
|
||||
#define sb_no_casefold_compat_fallback(sb) \
|
||||
(sb->s_encoding_flags & SB_ENC_NO_COMPAT_FALLBACK_FL)
|
||||
#else
|
||||
#define sb_no_casefold_compat_fallback(sb) (1)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Umount options
|
||||
*/
|
||||
|
@ -459,7 +459,6 @@ struct jbd2_revoke_table_s;
|
||||
* @h_ref: Reference count on this handle.
|
||||
* @h_err: Field for caller's use to track errors through large fs operations.
|
||||
* @h_sync: Flag for sync-on-close.
|
||||
* @h_jdata: Flag to force data journaling.
|
||||
* @h_reserved: Flag for handle for reserved credits.
|
||||
* @h_aborted: Flag indicating fatal error on handle.
|
||||
* @h_type: For handle statistics.
|
||||
@ -491,7 +490,6 @@ struct jbd2_journal_handle
|
||||
|
||||
/* Flags [no locking] */
|
||||
unsigned int h_sync: 1;
|
||||
unsigned int h_jdata: 1;
|
||||
unsigned int h_reserved: 1;
|
||||
unsigned int h_aborted: 1;
|
||||
unsigned int h_type: 8;
|
||||
@ -700,12 +698,6 @@ struct transaction_s
|
||||
|
||||
/* Disk flush needs to be sent to fs partition [no locking] */
|
||||
int t_need_data_flush;
|
||||
|
||||
/*
|
||||
* For use by the filesystem to store fs-specific data
|
||||
* structures associated with the transaction
|
||||
*/
|
||||
struct list_head t_private_list;
|
||||
};
|
||||
|
||||
struct transaction_run_stats_s {
|
||||
@ -1388,9 +1380,6 @@ JBD2_FEATURE_INCOMPAT_FUNCS(fast_commit, FAST_COMMIT)
|
||||
#define JBD2_FLUSHED 0x008 /* The journal superblock has been flushed */
|
||||
#define JBD2_LOADED 0x010 /* The journal superblock has been loaded */
|
||||
#define JBD2_BARRIER 0x020 /* Use IDE barriers */
|
||||
#define JBD2_ABORT_ON_SYNCDATA_ERR 0x040 /* Abort the journal on file
|
||||
* data write error in ordered
|
||||
* mode */
|
||||
#define JBD2_CYCLE_RECORD 0x080 /* Journal cycled record log on
|
||||
* clean and empty filesystem
|
||||
* logging area */
|
||||
@ -1407,7 +1396,6 @@ JBD2_FEATURE_INCOMPAT_FUNCS(fast_commit, FAST_COMMIT)
|
||||
*/
|
||||
|
||||
/* Filing buffers */
|
||||
extern void jbd2_journal_unfile_buffer(journal_t *, struct journal_head *);
|
||||
extern bool __jbd2_journal_refile_buffer(struct journal_head *);
|
||||
extern void jbd2_journal_refile_buffer(journal_t *, struct journal_head *);
|
||||
extern void __jbd2_journal_file_buffer(struct journal_head *, transaction_t *, int);
|
||||
@ -1627,10 +1615,12 @@ extern void jbd2_journal_destroy_revoke_record_cache(void);
|
||||
extern void jbd2_journal_destroy_revoke_table_cache(void);
|
||||
extern int __init jbd2_journal_init_revoke_record_cache(void);
|
||||
extern int __init jbd2_journal_init_revoke_table_cache(void);
|
||||
struct jbd2_revoke_table_s *jbd2_journal_init_revoke_table(int hash_size);
|
||||
void jbd2_journal_destroy_revoke_table(struct jbd2_revoke_table_s *table);
|
||||
|
||||
extern void jbd2_journal_destroy_revoke(journal_t *);
|
||||
extern int jbd2_journal_revoke (handle_t *, unsigned long long, struct buffer_head *);
|
||||
extern int jbd2_journal_cancel_revoke(handle_t *, struct journal_head *);
|
||||
extern void jbd2_journal_cancel_revoke(handle_t *, struct journal_head *);
|
||||
extern void jbd2_journal_write_revoke_records(transaction_t *transaction,
|
||||
struct list_head *log_bufs);
|
||||
|
||||
@ -1736,14 +1726,10 @@ static inline int tid_geq(tid_t x, tid_t y)
|
||||
extern int jbd2_journal_blocks_per_page(struct inode *inode);
|
||||
extern size_t journal_tag_bytes(journal_t *journal);
|
||||
|
||||
static inline bool jbd2_journal_has_csum_v2or3_feature(journal_t *j)
|
||||
{
|
||||
return jbd2_has_feature_csum2(j) || jbd2_has_feature_csum3(j);
|
||||
}
|
||||
|
||||
static inline int jbd2_journal_has_csum_v2or3(journal_t *journal)
|
||||
{
|
||||
return jbd2_journal_has_csum_v2or3_feature(journal);
|
||||
return jbd2_has_feature_csum2(journal) ||
|
||||
jbd2_has_feature_csum3(journal);
|
||||
}
|
||||
|
||||
static inline int jbd2_journal_get_num_fc_blks(journal_superblock_t *jsb)
|
||||
|
Loading…
x
Reference in New Issue
Block a user