afs: Use netfslib for directories

In the AFS ecosystem, directories are just a special type of file that is
downloaded and parsed locally.  Download is done by the same mechanism as
ordinary files and the data can be cached.  There is one important semantic
restriction on directories over files: the client must download the entire
directory in one go because, for example, the server could fabricate the
contents of the blob on the fly with each download and give a different
image each time.

So that we can cache the directory download, switch AFS directory support
over to using the netfslib single-object API, thereby allowing directory
content to be stored in the local cache.

To make this work, the following changes are made:

 (1) A directory's contents are now stored in a folio_queue chain attached
     to the afs_vnode (inode) struct rather than its associated pagecache,
     though multipage folios are still used to hold the data.  The folio
     queue is discarded when the directory inode is evicted.

     This also helps with the phasing out of ITER_XARRAY.

 (2) Various directory operations are made to use and unuse the cache
     cookie.

 (3) The content checking, content dumping and content iteration are now
     performed with a standard iov_iter iterator over the contents of the
     folio queue.

 (4) Iteration and modification must be done with the vnode's validate_lock
     held.  In conjunction with (1), this means that the iteration can be
     done without the need to lock pages or take extra refs on them, unlike
     when accessing ->i_pages.

 (5) Convert to using netfs_read_single() to read data.

 (6) Provide a ->writepages() to call netfs_writeback_single() to save the
     data to the cache according to the VM's scheduling whilst holding the
     validate_lock read-locked as (4).

 (7) Change local directory image editing functions:

     (a) Provide a function to get a specific block by number from the
     	 folio_queue as we can no longer use the i_pages xarray to locate
     	 folios by index.  This uses a cursor to remember the current
     	 position as we need to iterate through the directory contents.
     	 The block is kmapped before being returned.

     (b) Make the function in (a) extend the directory by an extra folio if
     	 we run out of space.

     (c) Raise the check of the block free space counter, for those blocks
     	 that have one, higher in the function to eliminate a call to get a
     	 block.

     (d) Remove the page unlocking and putting done during the editing
     	 loops.  This is no longer necessary as the folio_queue holds the
     	 references and the pages are no longer in the pagecache.

     (e) Mark the inode dirty and pin the cache usage till writeback at the
     	 end of a successful edit.

 (8) Don't set the large_folios flag on the inode as we do the allocation
     ourselves rather than the VM doing it automatically.

 (9) Mark the inode as being a single object that isn't uploaded to the
     server.

(10) Enable caching on directories.

(11) Only set the upload key for writeback for regular files.

Notes:

 (*) We keep the ->release_folio(), ->invalidate_folio() and
     ->migrate_folio() ops as we set the mapping pointer on the folio.

Signed-off-by: David Howells <dhowells@redhat.com>
Link: https://lore.kernel.org/r/20241216204124.3752367-22-dhowells@redhat.com
cc: Marc Dionne <marc.dionne@auristor.com>
cc: Jeff Layton <jlayton@kernel.org>
cc: linux-afs@lists.infradead.org
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
This commit is contained in:
David Howells 2024-12-16 20:41:11 +00:00 committed by Christian Brauner
parent b2604315e8
commit 6dd8093661
No known key found for this signature in database
GPG Key ID: 91C61BC06578DCA2
8 changed files with 533 additions and 486 deletions

File diff suppressed because it is too large Load Diff

View File

@ -10,6 +10,7 @@
#include <linux/namei.h>
#include <linux/pagemap.h>
#include <linux/iversion.h>
#include <linux/folio_queue.h>
#include "internal.h"
#include "xdr_fs.h"
@ -105,23 +106,57 @@ static void afs_clear_contig_bits(union afs_xdr_dir_block *block,
}
/*
* Get a new directory folio.
* Get a specific block, extending the directory storage to cover it as needed.
*/
static struct folio *afs_dir_get_folio(struct afs_vnode *vnode, pgoff_t index)
static union afs_xdr_dir_block *afs_dir_get_block(struct afs_dir_iter *iter, size_t block)
{
struct address_space *mapping = vnode->netfs.inode.i_mapping;
struct folio_queue *fq;
struct afs_vnode *dvnode = iter->dvnode;
struct folio *folio;
size_t blpos = block * AFS_DIR_BLOCK_SIZE;
size_t blend = (block + 1) * AFS_DIR_BLOCK_SIZE, fpos = iter->fpos;
int ret;
folio = __filemap_get_folio(mapping, index,
FGP_LOCK | FGP_ACCESSED | FGP_CREAT,
mapping->gfp_mask);
if (IS_ERR(folio)) {
afs_invalidate_dir(vnode, afs_dir_invalid_edit_get_block);
return NULL;
if (dvnode->directory_size < blend) {
size_t cur_size = dvnode->directory_size;
ret = netfs_alloc_folioq_buffer(
NULL, &dvnode->directory, &cur_size, blend,
mapping_gfp_mask(dvnode->netfs.inode.i_mapping));
dvnode->directory_size = cur_size;
if (ret < 0)
goto fail;
}
if (!folio_test_private(folio))
folio_attach_private(folio, (void *)1);
return folio;
fq = iter->fq;
if (!fq)
fq = dvnode->directory;
/* Search the folio queue for the folio containing the block... */
for (; fq; fq = fq->next) {
for (int s = iter->fq_slot; s < folioq_count(fq); s++) {
size_t fsize = folioq_folio_size(fq, s);
if (blend <= fpos + fsize) {
/* ... and then return the mapped block. */
folio = folioq_folio(fq, s);
if (WARN_ON_ONCE(folio_pos(folio) != fpos))
goto fail;
iter->fq = fq;
iter->fq_slot = s;
iter->fpos = fpos;
return kmap_local_folio(folio, blpos - fpos);
}
fpos += fsize;
}
iter->fq_slot = 0;
}
fail:
iter->fq = NULL;
iter->fq_slot = 0;
afs_invalidate_dir(dvnode, afs_dir_invalid_edit_get_block);
return NULL;
}
/*
@ -209,9 +244,8 @@ void afs_edit_dir_add(struct afs_vnode *vnode,
{
union afs_xdr_dir_block *meta, *block;
union afs_xdr_dirent *de;
struct folio *folio0, *folio;
struct afs_dir_iter iter = { .dvnode = vnode };
unsigned int need_slots, nr_blocks, b;
pgoff_t index;
loff_t i_size;
int slot;
@ -224,16 +258,13 @@ void afs_edit_dir_add(struct afs_vnode *vnode,
return;
}
folio0 = afs_dir_get_folio(vnode, 0);
if (!folio0) {
_leave(" [fgp]");
meta = afs_dir_get_block(&iter, 0);
if (!meta)
return;
}
/* Work out how many slots we're going to need. */
need_slots = afs_dir_calc_slots(name->len);
meta = kmap_local_folio(folio0, 0);
if (i_size == 0)
goto new_directory;
nr_blocks = i_size / AFS_DIR_BLOCK_SIZE;
@ -245,18 +276,17 @@ void afs_edit_dir_add(struct afs_vnode *vnode,
/* If the directory extended into a new folio, then we need to
* tack a new folio on the end.
*/
index = b / AFS_DIR_BLOCKS_PER_PAGE;
if (nr_blocks >= AFS_DIR_MAX_BLOCKS)
goto error_too_many_blocks;
if (index >= folio_nr_pages(folio0)) {
folio = afs_dir_get_folio(vnode, index);
if (!folio)
goto error;
} else {
folio = folio0;
}
block = kmap_local_folio(folio, b * AFS_DIR_BLOCK_SIZE - folio_pos(folio));
/* Lower dir blocks have a counter in the header we can check. */
if (b < AFS_DIR_BLOCKS_WITH_CTR &&
meta->meta.alloc_ctrs[b] < need_slots)
continue;
block = afs_dir_get_block(&iter, b);
if (!block)
goto error;
/* Abandon the edit if we got a callback break. */
if (!test_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
@ -275,24 +305,16 @@ void afs_edit_dir_add(struct afs_vnode *vnode,
afs_set_i_size(vnode, (b + 1) * AFS_DIR_BLOCK_SIZE);
}
/* Only lower dir blocks have a counter in the header. */
if (b >= AFS_DIR_BLOCKS_WITH_CTR ||
meta->meta.alloc_ctrs[b] >= need_slots) {
/* We need to try and find one or more consecutive
* slots to hold the entry.
*/
slot = afs_find_contig_bits(block, need_slots);
if (slot >= 0) {
_debug("slot %u", slot);
goto found_space;
}
/* We need to try and find one or more consecutive slots to
* hold the entry.
*/
slot = afs_find_contig_bits(block, need_slots);
if (slot >= 0) {
_debug("slot %u", slot);
goto found_space;
}
kunmap_local(block);
if (folio != folio0) {
folio_unlock(folio);
folio_put(folio);
}
}
/* There are no spare slots of sufficient size, yet the operation
@ -307,8 +329,7 @@ new_directory:
i_size = AFS_DIR_BLOCK_SIZE;
afs_set_i_size(vnode, i_size);
slot = AFS_DIR_RESV_BLOCKS0;
folio = folio0;
block = kmap_local_folio(folio, 0);
block = afs_dir_get_block(&iter, 0);
nr_blocks = 1;
b = 0;
@ -328,10 +349,6 @@ found_space:
/* Adjust the bitmap. */
afs_set_contig_bits(block, slot, need_slots);
kunmap_local(block);
if (folio != folio0) {
folio_unlock(folio);
folio_put(folio);
}
/* Adjust the allocation counter. */
if (b < AFS_DIR_BLOCKS_WITH_CTR)
@ -341,20 +358,16 @@ found_space:
afs_stat_v(vnode, n_dir_cr);
_debug("Insert %s in %u[%u]", name->name, b, slot);
netfs_single_mark_inode_dirty(&vnode->netfs.inode);
out_unmap:
kunmap_local(meta);
folio_unlock(folio0);
folio_put(folio0);
_leave("");
return;
already_invalidated:
trace_afs_edit_dir(vnode, why, afs_edit_dir_create_inval, 0, 0, 0, 0, name->name);
kunmap_local(block);
if (folio != folio0) {
folio_unlock(folio);
folio_put(folio);
}
goto out_unmap;
error_too_many_blocks:
@ -376,9 +389,8 @@ void afs_edit_dir_remove(struct afs_vnode *vnode,
{
union afs_xdr_dir_block *meta, *block;
union afs_xdr_dirent *de;
struct folio *folio0, *folio;
struct afs_dir_iter iter = { .dvnode = vnode };
unsigned int need_slots, nr_blocks, b;
pgoff_t index;
loff_t i_size;
int slot;
@ -393,31 +405,20 @@ void afs_edit_dir_remove(struct afs_vnode *vnode,
}
nr_blocks = i_size / AFS_DIR_BLOCK_SIZE;
folio0 = afs_dir_get_folio(vnode, 0);
if (!folio0) {
_leave(" [fgp]");
meta = afs_dir_get_block(&iter, 0);
if (!meta)
return;
}
/* Work out how many slots we're going to discard. */
need_slots = afs_dir_calc_slots(name->len);
meta = kmap_local_folio(folio0, 0);
/* Find a block that has sufficient slots available. Each folio
* contains two or more directory blocks.
*/
for (b = 0; b < nr_blocks; b++) {
index = b / AFS_DIR_BLOCKS_PER_PAGE;
if (index >= folio_nr_pages(folio0)) {
folio = afs_dir_get_folio(vnode, index);
if (!folio)
goto error;
} else {
folio = folio0;
}
block = kmap_local_folio(folio, b * AFS_DIR_BLOCK_SIZE - folio_pos(folio));
block = afs_dir_get_block(&iter, b);
if (!block)
goto error;
/* Abandon the edit if we got a callback break. */
if (!test_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
@ -431,10 +432,6 @@ void afs_edit_dir_remove(struct afs_vnode *vnode,
}
kunmap_local(block);
if (folio != folio0) {
folio_unlock(folio);
folio_put(folio);
}
}
/* Didn't find the dirent to clobber. Download the directory again. */
@ -455,34 +452,26 @@ found_dirent:
/* Adjust the bitmap. */
afs_clear_contig_bits(block, slot, need_slots);
kunmap_local(block);
if (folio != folio0) {
folio_unlock(folio);
folio_put(folio);
}
/* Adjust the allocation counter. */
if (b < AFS_DIR_BLOCKS_WITH_CTR)
meta->meta.alloc_ctrs[b] += need_slots;
netfs_single_mark_inode_dirty(&vnode->netfs.inode);
inode_set_iversion_raw(&vnode->netfs.inode, vnode->status.data_version);
afs_stat_v(vnode, n_dir_rm);
_debug("Remove %s from %u[%u]", name->name, b, slot);
out_unmap:
kunmap_local(meta);
folio_unlock(folio0);
folio_put(folio0);
_leave("");
return;
already_invalidated:
kunmap_local(block);
trace_afs_edit_dir(vnode, why, afs_edit_dir_delete_inval,
0, 0, 0, 0, name->name);
kunmap_local(block);
if (folio != folio0) {
folio_unlock(folio);
folio_put(folio);
}
goto out_unmap;
error:
@ -500,9 +489,8 @@ void afs_edit_dir_update_dotdot(struct afs_vnode *vnode, struct afs_vnode *new_d
{
union afs_xdr_dir_block *block;
union afs_xdr_dirent *de;
struct folio *folio;
struct afs_dir_iter iter = { .dvnode = vnode };
unsigned int nr_blocks, b;
pgoff_t index;
loff_t i_size;
int slot;
@ -513,19 +501,17 @@ void afs_edit_dir_update_dotdot(struct afs_vnode *vnode, struct afs_vnode *new_d
afs_invalidate_dir(vnode, afs_dir_invalid_edit_upd_bad_size);
return;
}
nr_blocks = i_size / AFS_DIR_BLOCK_SIZE;
/* Find a block that has sufficient slots available. Each folio
* contains two or more directory blocks.
*/
for (b = 0; b < nr_blocks; b++) {
index = b / AFS_DIR_BLOCKS_PER_PAGE;
folio = afs_dir_get_folio(vnode, index);
if (!folio)
block = afs_dir_get_block(&iter, b);
if (!block)
goto error;
block = kmap_local_folio(folio, b * AFS_DIR_BLOCK_SIZE - folio_pos(folio));
/* Abandon the edit if we got a callback break. */
if (!test_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
goto already_invalidated;
@ -535,8 +521,6 @@ void afs_edit_dir_update_dotdot(struct afs_vnode *vnode, struct afs_vnode *new_d
goto found_dirent;
kunmap_local(block);
folio_unlock(folio);
folio_put(folio);
}
/* Didn't find the dirent to clobber. Download the directory again. */
@ -554,8 +538,7 @@ found_dirent:
ntohl(de->u.vnode), ntohl(de->u.unique), "..");
kunmap_local(block);
folio_unlock(folio);
folio_put(folio);
netfs_single_mark_inode_dirty(&vnode->netfs.inode);
inode_set_iversion_raw(&vnode->netfs.inode, vnode->status.data_version);
out:
@ -564,8 +547,6 @@ out:
already_invalidated:
kunmap_local(block);
folio_unlock(folio);
folio_put(folio);
trace_afs_edit_dir(vnode, why, afs_edit_dir_update_inval,
0, 0, 0, 0, "..");
goto out;

View File

@ -389,6 +389,14 @@ static int afs_init_request(struct netfs_io_request *rreq, struct file *file)
rreq->netfs_priv = key;
}
break;
case NETFS_WRITEBACK:
case NETFS_WRITETHROUGH:
case NETFS_UNBUFFERED_WRITE:
case NETFS_DIO_WRITE:
if (S_ISREG(rreq->inode->i_mode))
rreq->io_streams[0].avail = true;
break;
case NETFS_WRITEBACK_SINGLE:
default:
break;
}

View File

@ -110,7 +110,9 @@ static int afs_inode_init_from_status(struct afs_operation *op,
inode->i_op = &afs_dir_inode_operations;
inode->i_fop = &afs_dir_file_operations;
inode->i_mapping->a_ops = &afs_dir_aops;
mapping_set_large_folios(inode->i_mapping);
__set_bit(NETFS_ICTX_SINGLE_NO_UPLOAD, &vnode->netfs.flags);
/* Assume locally cached directory data will be valid. */
__set_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
break;
case AFS_FTYPE_SYMLINK:
/* Symlinks with a mode of 0644 are actually mountpoints. */
@ -440,7 +442,8 @@ static void afs_get_inode_cache(struct afs_vnode *vnode)
} __packed key;
struct afs_vnode_cache_aux aux;
if (vnode->status.type != AFS_FTYPE_FILE) {
if (vnode->status.type != AFS_FTYPE_FILE &&
vnode->status.type != AFS_FTYPE_DIR) {
vnode->netfs.cache = NULL;
return;
}
@ -642,6 +645,7 @@ int afs_drop_inode(struct inode *inode)
void afs_evict_inode(struct inode *inode)
{
struct afs_vnode_cache_aux aux;
struct afs_super_info *sbi = AFS_FS_S(inode->i_sb);
struct afs_vnode *vnode = AFS_FS_I(inode);
_enter("{%llx:%llu.%d}",
@ -653,8 +657,21 @@ void afs_evict_inode(struct inode *inode)
ASSERTCMP(inode->i_ino, ==, vnode->fid.vnode);
if ((S_ISDIR(inode->i_mode)) &&
(inode->i_state & I_DIRTY) &&
!sbi->dyn_root) {
struct writeback_control wbc = {
.sync_mode = WB_SYNC_ALL,
.for_sync = true,
.range_end = LLONG_MAX,
};
afs_single_writepages(inode->i_mapping, &wbc);
}
netfs_wait_for_outstanding_io(inode);
truncate_inode_pages_final(&inode->i_data);
netfs_free_folioq_buffer(vnode->directory);
afs_set_cache_aux(vnode, &aux);
netfs_clear_inode_writeback(inode, &aux);

View File

@ -720,7 +720,9 @@ struct afs_vnode {
#define AFS_VNODE_NEW_CONTENT 8 /* Set if file has new content (create/trunc-0) */
#define AFS_VNODE_SILLY_DELETED 9 /* Set if file has been silly-deleted */
#define AFS_VNODE_MODIFYING 10 /* Set if we're performing a modification op */
#define AFS_VNODE_DIR_READ 11 /* Set if we've read a dir's contents */
struct folio_queue *directory; /* Directory contents */
struct list_head wb_keys; /* List of keys available for writeback */
struct list_head pending_locks; /* locks waiting to be granted */
struct list_head granted_locks; /* locks granted on this file */
@ -729,6 +731,7 @@ struct afs_vnode {
ktime_t locked_at; /* Time at which lock obtained */
enum afs_lock_state lock_state : 8;
afs_lock_type_t lock_type : 8;
unsigned int directory_size; /* Amount of space in ->directory */
/* outstanding callback notification on this file */
struct work_struct cb_work; /* Work for mmap'd files */
@ -984,6 +987,16 @@ static inline void afs_invalidate_cache(struct afs_vnode *vnode, unsigned int fl
i_size_read(&vnode->netfs.inode), flags);
}
/*
* Directory iteration management.
*/
struct afs_dir_iter {
struct afs_vnode *dvnode;
struct folio_queue *fq;
unsigned int fpos;
int fq_slot;
};
#include <trace/events/afs.h>
/*****************************************************************************/
@ -1065,8 +1078,11 @@ extern const struct inode_operations afs_dir_inode_operations;
extern const struct address_space_operations afs_dir_aops;
extern const struct dentry_operations afs_fs_dentry_operations;
ssize_t afs_read_single(struct afs_vnode *dvnode, struct file *file);
extern void afs_d_release(struct dentry *);
extern void afs_check_for_remote_deletion(struct afs_operation *);
int afs_single_writepages(struct address_space *mapping,
struct writeback_control *wbc);
/*
* dir_edit.c

View File

@ -696,6 +696,8 @@ static struct inode *afs_alloc_inode(struct super_block *sb)
vnode->volume = NULL;
vnode->lock_key = NULL;
vnode->permit_cache = NULL;
vnode->directory = NULL;
vnode->directory_size = 0;
vnode->flags = 1 << AFS_VNODE_UNSET;
vnode->lock_state = AFS_VNODE_LOCK_NONE;

View File

@ -182,8 +182,8 @@ void afs_issue_write(struct netfs_io_subrequest *subreq)
*/
void afs_begin_writeback(struct netfs_io_request *wreq)
{
afs_get_writeback_key(wreq);
wreq->io_streams[0].avail = true;
if (S_ISREG(wreq->inode->i_mode))
afs_get_writeback_key(wreq);
}
/*

View File

@ -930,9 +930,9 @@ TRACE_EVENT(afs_sent_data,
);
TRACE_EVENT(afs_dir_check_failed,
TP_PROTO(struct afs_vnode *vnode, loff_t off, loff_t i_size),
TP_PROTO(struct afs_vnode *vnode, loff_t off),
TP_ARGS(vnode, off, i_size),
TP_ARGS(vnode, off),
TP_STRUCT__entry(
__field(struct afs_vnode *, vnode)
@ -943,7 +943,7 @@ TRACE_EVENT(afs_dir_check_failed,
TP_fast_assign(
__entry->vnode = vnode;
__entry->off = off;
__entry->i_size = i_size;
__entry->i_size = i_size_read(&vnode->netfs.inode);
),
TP_printk("vn=%p %llx/%llx",