bcachefs: Fix discard path journal flushing

The discard path is supposed to issue journal flushes when there's too
many buckets empty buckets that need a journal commit before they can be
written to again, but at some point this code seems to have been lost.

Bring it back with a new optimization to make sure we don't issue too
many journal flushes: the journal now tracks the sequence number of the
most recent flush in progress, which the discard path uses when deciding
which buckets need a journal flush.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2025-01-27 01:21:44 -05:00
parent 2ef995df0c
commit 9e9033522a
8 changed files with 55 additions and 35 deletions

View File

@ -1803,7 +1803,6 @@ struct discard_buckets_state {
u64 open;
u64 need_journal_commit;
u64 discarded;
u64 need_journal_commit_this_dev;
};
static int bch2_discard_one_bucket(struct btree_trans *trans,
@ -1827,11 +1826,11 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
goto out;
}
if (bch2_bucket_needs_journal_commit(&c->buckets_waiting_for_journal,
c->journal.flushed_seq_ondisk,
pos.inode, pos.offset)) {
s->need_journal_commit++;
s->need_journal_commit_this_dev++;
u64 seq_ready = bch2_bucket_journal_seq_ready(&c->buckets_waiting_for_journal,
pos.inode, pos.offset);
if (seq_ready > c->journal.flushed_seq_ondisk) {
if (seq_ready > c->journal.flushing_seq)
s->need_journal_commit++;
goto out;
}
@ -1865,23 +1864,24 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
discard_locked = true;
}
if (!bkey_eq(*discard_pos_done, iter.pos) &&
ca->mi.discard && !c->opts.nochanges) {
/*
* This works without any other locks because this is the only
* thread that removes items from the need_discard tree
*/
bch2_trans_unlock_long(trans);
blkdev_issue_discard(ca->disk_sb.bdev,
k.k->p.offset * ca->mi.bucket_size,
ca->mi.bucket_size,
GFP_KERNEL);
*discard_pos_done = iter.pos;
if (!bkey_eq(*discard_pos_done, iter.pos)) {
s->discarded++;
*discard_pos_done = iter.pos;
ret = bch2_trans_relock_notrace(trans);
if (ret)
goto out;
if (ca->mi.discard && !c->opts.nochanges) {
/*
* This works without any other locks because this is the only
* thread that removes items from the need_discard tree
*/
bch2_trans_unlock_long(trans);
blkdev_issue_discard(ca->disk_sb.bdev,
k.k->p.offset * ca->mi.bucket_size,
ca->mi.bucket_size,
GFP_KERNEL);
ret = bch2_trans_relock_notrace(trans);
if (ret)
goto out;
}
}
SET_BCH_ALLOC_V4_NEED_DISCARD(&a->v, false);
@ -1929,6 +1929,9 @@ static void bch2_do_discards_work(struct work_struct *work)
POS(ca->dev_idx, U64_MAX), 0, k,
bch2_discard_one_bucket(trans, ca, &iter, &discard_pos_done, &s, false)));
if (s.need_journal_commit > dev_buckets_available(ca, BCH_WATERMARK_normal))
bch2_journal_flush_async(&c->journal, NULL);
trace_discard_buckets(c, s.seen, s.open, s.need_journal_commit, s.discarded,
bch2_err_str(ret));
@ -2024,7 +2027,7 @@ static void bch2_do_discards_fast_work(struct work_struct *work)
break;
}
trace_discard_buckets(c, s.seen, s.open, s.need_journal_commit, s.discarded, bch2_err_str(ret));
trace_discard_buckets_fast(c, s.seen, s.open, s.need_journal_commit, s.discarded, bch2_err_str(ret));
bch2_trans_put(trans);
percpu_ref_put(&ca->io_ref);

View File

@ -205,8 +205,12 @@ static inline bool may_alloc_bucket(struct bch_fs *c,
return false;
}
if (bch2_bucket_needs_journal_commit(&c->buckets_waiting_for_journal,
c->journal.flushed_seq_ondisk, bucket.inode, bucket.offset)) {
u64 journal_seq_ready =
bch2_bucket_journal_seq_ready(&c->buckets_waiting_for_journal,
bucket.inode, bucket.offset);
if (journal_seq_ready > c->journal.flushed_seq_ondisk) {
if (journal_seq_ready > c->journal.flushing_seq)
s->need_journal_commit++;
s->skipped_need_journal_commit++;
return false;
}
@ -570,7 +574,7 @@ alloc:
? bch2_bucket_alloc_freelist(trans, ca, watermark, &s, cl)
: bch2_bucket_alloc_early(trans, ca, watermark, &s, cl);
if (s.skipped_need_journal_commit * 2 > avail)
if (s.need_journal_commit * 2 > avail)
bch2_journal_flush_async(&c->journal, NULL);
if (!ob && s.btree_bitmap != BTREE_BITMAP_ANY) {

View File

@ -18,6 +18,7 @@ struct bucket_alloc_state {
u64 buckets_seen;
u64 skipped_open;
u64 skipped_need_journal_commit;
u64 need_journal_commit;
u64 skipped_nocow;
u64 skipped_nouse;
u64 skipped_mi_btree_bitmap;

View File

@ -22,23 +22,21 @@ static void bucket_table_init(struct buckets_waiting_for_journal_table *t, size_
memset(t->d, 0, sizeof(t->d[0]) << t->bits);
}
bool bch2_bucket_needs_journal_commit(struct buckets_waiting_for_journal *b,
u64 flushed_seq,
unsigned dev, u64 bucket)
u64 bch2_bucket_journal_seq_ready(struct buckets_waiting_for_journal *b,
unsigned dev, u64 bucket)
{
struct buckets_waiting_for_journal_table *t;
u64 dev_bucket = (u64) dev << 56 | bucket;
bool ret = false;
unsigned i;
u64 ret = 0;
mutex_lock(&b->lock);
t = b->t;
for (i = 0; i < ARRAY_SIZE(t->hash_seeds); i++) {
for (unsigned i = 0; i < ARRAY_SIZE(t->hash_seeds); i++) {
struct bucket_hashed *h = bucket_hash(t, i, dev_bucket);
if (h->dev_bucket == dev_bucket) {
ret = h->journal_seq > flushed_seq;
ret = h->journal_seq;
break;
}
}

View File

@ -4,8 +4,8 @@
#include "buckets_waiting_for_journal_types.h"
bool bch2_bucket_needs_journal_commit(struct buckets_waiting_for_journal *,
u64, unsigned, u64);
u64 bch2_bucket_journal_seq_ready(struct buckets_waiting_for_journal *,
unsigned, u64);
int bch2_set_bucket_needs_journal_commit(struct buckets_waiting_for_journal *,
u64, unsigned, u64, u64);

View File

@ -796,6 +796,7 @@ recheck_need_open:
}
buf->must_flush = true;
j->flushing_seq = max(j->flushing_seq, seq);
if (parent && !closure_wait(&buf->wait, parent))
BUG();

View File

@ -237,6 +237,7 @@ struct journal {
/* seq, last_seq from the most recent journal entry successfully written */
u64 seq_ondisk;
u64 flushed_seq_ondisk;
u64 flushing_seq;
u64 last_seq_ondisk;
u64 err_seq;
u64 last_empty_seq;

View File

@ -727,7 +727,7 @@ DEFINE_EVENT(fs_str, bucket_alloc_fail,
TP_ARGS(c, str)
);
TRACE_EVENT(discard_buckets,
DECLARE_EVENT_CLASS(discard_buckets_class,
TP_PROTO(struct bch_fs *c, u64 seen, u64 open,
u64 need_journal_commit, u64 discarded, const char *err),
TP_ARGS(c, seen, open, need_journal_commit, discarded, err),
@ -759,6 +759,18 @@ TRACE_EVENT(discard_buckets,
__entry->err)
);
DEFINE_EVENT(discard_buckets_class, discard_buckets,
TP_PROTO(struct bch_fs *c, u64 seen, u64 open,
u64 need_journal_commit, u64 discarded, const char *err),
TP_ARGS(c, seen, open, need_journal_commit, discarded, err)
);
DEFINE_EVENT(discard_buckets_class, discard_buckets_fast,
TP_PROTO(struct bch_fs *c, u64 seen, u64 open,
u64 need_journal_commit, u64 discarded, const char *err),
TP_ARGS(c, seen, open, need_journal_commit, discarded, err)
);
TRACE_EVENT(bucket_invalidate,
TP_PROTO(struct bch_fs *c, unsigned dev, u64 bucket, u32 sectors),
TP_ARGS(c, dev, bucket, sectors),