mirror of
https://github.com/torvalds/linux.git
synced 2025-04-12 06:49:52 +00:00
bcachefs: Fix discard path journal flushing
The discard path is supposed to issue journal flushes when there's too many buckets empty buckets that need a journal commit before they can be written to again, but at some point this code seems to have been lost. Bring it back with a new optimization to make sure we don't issue too many journal flushes: the journal now tracks the sequence number of the most recent flush in progress, which the discard path uses when deciding which buckets need a journal flush. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
2ef995df0c
commit
9e9033522a
@ -1803,7 +1803,6 @@ struct discard_buckets_state {
|
||||
u64 open;
|
||||
u64 need_journal_commit;
|
||||
u64 discarded;
|
||||
u64 need_journal_commit_this_dev;
|
||||
};
|
||||
|
||||
static int bch2_discard_one_bucket(struct btree_trans *trans,
|
||||
@ -1827,11 +1826,11 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (bch2_bucket_needs_journal_commit(&c->buckets_waiting_for_journal,
|
||||
c->journal.flushed_seq_ondisk,
|
||||
pos.inode, pos.offset)) {
|
||||
s->need_journal_commit++;
|
||||
s->need_journal_commit_this_dev++;
|
||||
u64 seq_ready = bch2_bucket_journal_seq_ready(&c->buckets_waiting_for_journal,
|
||||
pos.inode, pos.offset);
|
||||
if (seq_ready > c->journal.flushed_seq_ondisk) {
|
||||
if (seq_ready > c->journal.flushing_seq)
|
||||
s->need_journal_commit++;
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -1865,23 +1864,24 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
|
||||
discard_locked = true;
|
||||
}
|
||||
|
||||
if (!bkey_eq(*discard_pos_done, iter.pos) &&
|
||||
ca->mi.discard && !c->opts.nochanges) {
|
||||
/*
|
||||
* This works without any other locks because this is the only
|
||||
* thread that removes items from the need_discard tree
|
||||
*/
|
||||
bch2_trans_unlock_long(trans);
|
||||
blkdev_issue_discard(ca->disk_sb.bdev,
|
||||
k.k->p.offset * ca->mi.bucket_size,
|
||||
ca->mi.bucket_size,
|
||||
GFP_KERNEL);
|
||||
*discard_pos_done = iter.pos;
|
||||
if (!bkey_eq(*discard_pos_done, iter.pos)) {
|
||||
s->discarded++;
|
||||
*discard_pos_done = iter.pos;
|
||||
|
||||
ret = bch2_trans_relock_notrace(trans);
|
||||
if (ret)
|
||||
goto out;
|
||||
if (ca->mi.discard && !c->opts.nochanges) {
|
||||
/*
|
||||
* This works without any other locks because this is the only
|
||||
* thread that removes items from the need_discard tree
|
||||
*/
|
||||
bch2_trans_unlock_long(trans);
|
||||
blkdev_issue_discard(ca->disk_sb.bdev,
|
||||
k.k->p.offset * ca->mi.bucket_size,
|
||||
ca->mi.bucket_size,
|
||||
GFP_KERNEL);
|
||||
ret = bch2_trans_relock_notrace(trans);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
SET_BCH_ALLOC_V4_NEED_DISCARD(&a->v, false);
|
||||
@ -1929,6 +1929,9 @@ static void bch2_do_discards_work(struct work_struct *work)
|
||||
POS(ca->dev_idx, U64_MAX), 0, k,
|
||||
bch2_discard_one_bucket(trans, ca, &iter, &discard_pos_done, &s, false)));
|
||||
|
||||
if (s.need_journal_commit > dev_buckets_available(ca, BCH_WATERMARK_normal))
|
||||
bch2_journal_flush_async(&c->journal, NULL);
|
||||
|
||||
trace_discard_buckets(c, s.seen, s.open, s.need_journal_commit, s.discarded,
|
||||
bch2_err_str(ret));
|
||||
|
||||
@ -2024,7 +2027,7 @@ static void bch2_do_discards_fast_work(struct work_struct *work)
|
||||
break;
|
||||
}
|
||||
|
||||
trace_discard_buckets(c, s.seen, s.open, s.need_journal_commit, s.discarded, bch2_err_str(ret));
|
||||
trace_discard_buckets_fast(c, s.seen, s.open, s.need_journal_commit, s.discarded, bch2_err_str(ret));
|
||||
|
||||
bch2_trans_put(trans);
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
|
@ -205,8 +205,12 @@ static inline bool may_alloc_bucket(struct bch_fs *c,
|
||||
return false;
|
||||
}
|
||||
|
||||
if (bch2_bucket_needs_journal_commit(&c->buckets_waiting_for_journal,
|
||||
c->journal.flushed_seq_ondisk, bucket.inode, bucket.offset)) {
|
||||
u64 journal_seq_ready =
|
||||
bch2_bucket_journal_seq_ready(&c->buckets_waiting_for_journal,
|
||||
bucket.inode, bucket.offset);
|
||||
if (journal_seq_ready > c->journal.flushed_seq_ondisk) {
|
||||
if (journal_seq_ready > c->journal.flushing_seq)
|
||||
s->need_journal_commit++;
|
||||
s->skipped_need_journal_commit++;
|
||||
return false;
|
||||
}
|
||||
@ -570,7 +574,7 @@ alloc:
|
||||
? bch2_bucket_alloc_freelist(trans, ca, watermark, &s, cl)
|
||||
: bch2_bucket_alloc_early(trans, ca, watermark, &s, cl);
|
||||
|
||||
if (s.skipped_need_journal_commit * 2 > avail)
|
||||
if (s.need_journal_commit * 2 > avail)
|
||||
bch2_journal_flush_async(&c->journal, NULL);
|
||||
|
||||
if (!ob && s.btree_bitmap != BTREE_BITMAP_ANY) {
|
||||
|
@ -18,6 +18,7 @@ struct bucket_alloc_state {
|
||||
u64 buckets_seen;
|
||||
u64 skipped_open;
|
||||
u64 skipped_need_journal_commit;
|
||||
u64 need_journal_commit;
|
||||
u64 skipped_nocow;
|
||||
u64 skipped_nouse;
|
||||
u64 skipped_mi_btree_bitmap;
|
||||
|
@ -22,23 +22,21 @@ static void bucket_table_init(struct buckets_waiting_for_journal_table *t, size_
|
||||
memset(t->d, 0, sizeof(t->d[0]) << t->bits);
|
||||
}
|
||||
|
||||
bool bch2_bucket_needs_journal_commit(struct buckets_waiting_for_journal *b,
|
||||
u64 flushed_seq,
|
||||
unsigned dev, u64 bucket)
|
||||
u64 bch2_bucket_journal_seq_ready(struct buckets_waiting_for_journal *b,
|
||||
unsigned dev, u64 bucket)
|
||||
{
|
||||
struct buckets_waiting_for_journal_table *t;
|
||||
u64 dev_bucket = (u64) dev << 56 | bucket;
|
||||
bool ret = false;
|
||||
unsigned i;
|
||||
u64 ret = 0;
|
||||
|
||||
mutex_lock(&b->lock);
|
||||
t = b->t;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(t->hash_seeds); i++) {
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(t->hash_seeds); i++) {
|
||||
struct bucket_hashed *h = bucket_hash(t, i, dev_bucket);
|
||||
|
||||
if (h->dev_bucket == dev_bucket) {
|
||||
ret = h->journal_seq > flushed_seq;
|
||||
ret = h->journal_seq;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -4,8 +4,8 @@
|
||||
|
||||
#include "buckets_waiting_for_journal_types.h"
|
||||
|
||||
bool bch2_bucket_needs_journal_commit(struct buckets_waiting_for_journal *,
|
||||
u64, unsigned, u64);
|
||||
u64 bch2_bucket_journal_seq_ready(struct buckets_waiting_for_journal *,
|
||||
unsigned, u64);
|
||||
int bch2_set_bucket_needs_journal_commit(struct buckets_waiting_for_journal *,
|
||||
u64, unsigned, u64, u64);
|
||||
|
||||
|
@ -796,6 +796,7 @@ recheck_need_open:
|
||||
}
|
||||
|
||||
buf->must_flush = true;
|
||||
j->flushing_seq = max(j->flushing_seq, seq);
|
||||
|
||||
if (parent && !closure_wait(&buf->wait, parent))
|
||||
BUG();
|
||||
|
@ -237,6 +237,7 @@ struct journal {
|
||||
/* seq, last_seq from the most recent journal entry successfully written */
|
||||
u64 seq_ondisk;
|
||||
u64 flushed_seq_ondisk;
|
||||
u64 flushing_seq;
|
||||
u64 last_seq_ondisk;
|
||||
u64 err_seq;
|
||||
u64 last_empty_seq;
|
||||
|
@ -727,7 +727,7 @@ DEFINE_EVENT(fs_str, bucket_alloc_fail,
|
||||
TP_ARGS(c, str)
|
||||
);
|
||||
|
||||
TRACE_EVENT(discard_buckets,
|
||||
DECLARE_EVENT_CLASS(discard_buckets_class,
|
||||
TP_PROTO(struct bch_fs *c, u64 seen, u64 open,
|
||||
u64 need_journal_commit, u64 discarded, const char *err),
|
||||
TP_ARGS(c, seen, open, need_journal_commit, discarded, err),
|
||||
@ -759,6 +759,18 @@ TRACE_EVENT(discard_buckets,
|
||||
__entry->err)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(discard_buckets_class, discard_buckets,
|
||||
TP_PROTO(struct bch_fs *c, u64 seen, u64 open,
|
||||
u64 need_journal_commit, u64 discarded, const char *err),
|
||||
TP_ARGS(c, seen, open, need_journal_commit, discarded, err)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(discard_buckets_class, discard_buckets_fast,
|
||||
TP_PROTO(struct bch_fs *c, u64 seen, u64 open,
|
||||
u64 need_journal_commit, u64 discarded, const char *err),
|
||||
TP_ARGS(c, seen, open, need_journal_commit, discarded, err)
|
||||
);
|
||||
|
||||
TRACE_EVENT(bucket_invalidate,
|
||||
TP_PROTO(struct bch_fs *c, unsigned dev, u64 bucket, u32 sectors),
|
||||
TP_ARGS(c, dev, bucket, sectors),
|
||||
|
Loading…
x
Reference in New Issue
Block a user