1
0
mirror of https://github.com/torvalds/linux.git synced 2025-04-10 18:27:40 +00:00
NeilBrown 88d5baf690
Change inode_operations.mkdir to return struct dentry *
Some filesystems, such as NFS, cifs, ceph, and fuse, do not have
complete control of sequencing on the actual filesystem (e.g.  on a
different server) and may find that the inode created for a mkdir
request already exists in the icache and dcache by the time the mkdir
request returns.  For example, if the filesystem is mounted twice the
directory could be visible on the other mount before it is on the
original mount, and a pair of name_to_handle_at(), open_by_handle_at()
calls could instantiate the directory inode with an IS_ROOT() dentry
before the first mkdir returns.

This means that the dentry passed to ->mkdir() may not be the one that
is associated with the inode after the ->mkdir() completes.  Some
callers need to interact with the inode after the ->mkdir completes and
they currently need to perform a lookup in the (rare) case that the
dentry is no longer hashed.

This lookup-after-mkdir requires that the directory remains locked to
avoid races.  Planned future patches to lock the dentry rather than the
directory will mean that this lookup cannot be performed atomically with
the mkdir.

To remove this barrier, this patch changes ->mkdir to return the
resulting dentry if it is different from the one passed in.
Possible returns are:
  NULL - the directory was created and no other dentry was used
  ERR_PTR() - an error occurred
  non-NULL - this other dentry was spliced in

This patch only changes file-systems to return "ERR_PTR(err)" instead of
"err" or equivalent transformations.  Subsequent patches will make
further changes to some file-systems to return a correct dentry.

Not all filesystems reliably result in a positive hashed dentry:

- NFS, cifs, hostfs will sometimes need to perform a lookup of
  the name to get inode information.  Races could result in this
  returning something different. Note that this lookup is
  non-atomic which is what we are trying to avoid.  Placing the
  lookup in filesystem code means it only happens when the filesystem
  has no other option.
- kernfs and tracefs leave the dentry negative and the ->revalidate
  operation ensures that lookup will be called to correctly populate
  the dentry.  This could be fixed but I don't think it is important
  to any of the users of vfs_mkdir() which look at the dentry.

The recommendation to use
    d_drop();d_splice_alias()
is ugly but fits with current practice.  A planned future patch will
change this.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: NeilBrown <neilb@suse.de>
Link: https://lore.kernel.org/r/20250227013949.536172-2-neilb@suse.de
Signed-off-by: Christian Brauner <brauner@kernel.org>
2025-02-27 20:00:17 +01:00

1256 lines
30 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/*
* linux/fs/vfat/namei.c
*
* Written 1992,1993 by Werner Almesberger
*
* Windows95/Windows NT compatible extended MSDOS filesystem
* by Gordon Chaffee Copyright (C) 1995. Send bug reports for the
* VFAT filesystem to <chaffee@cs.berkeley.edu>. Specify
* what file operation caused you trouble and if you can duplicate
* the problem, send a script that demonstrates it.
*
* Short name translation 1999, 2001 by Wolfram Pienkoss <wp@bszh.de>
*
* Support Multibyte characters and cleanup by
* OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
*/
#include <linux/module.h>
#include <linux/ctype.h>
#include <linux/slab.h>
#include <linux/namei.h>
#include <linux/kernel.h>
#include <linux/iversion.h>
#include "fat.h"
static inline unsigned long vfat_d_version(struct dentry *dentry)
{
return (unsigned long) dentry->d_fsdata;
}
static inline void vfat_d_version_set(struct dentry *dentry,
unsigned long version)
{
dentry->d_fsdata = (void *) version;
}
/*
* If new entry was created in the parent, it could create the 8.3
* alias (the shortname of logname). So, the parent may have the
* negative-dentry which matches the created 8.3 alias.
*
* If it happened, the negative dentry isn't actually negative
* anymore. So, drop it.
*/
static bool vfat_revalidate_shortname(struct dentry *dentry, struct inode *dir)
{
return inode_eq_iversion(dir, vfat_d_version(dentry));
}
static int vfat_revalidate(struct inode *dir, const struct qstr *name,
struct dentry *dentry, unsigned int flags)
{
if (flags & LOOKUP_RCU)
return -ECHILD;
/* This is not negative dentry. Always valid. */
if (d_really_is_positive(dentry))
return 1;
return vfat_revalidate_shortname(dentry, dir);
}
static int vfat_revalidate_ci(struct inode *dir, const struct qstr *name,
struct dentry *dentry, unsigned int flags)
{
if (flags & LOOKUP_RCU)
return -ECHILD;
/*
* This is not negative dentry. Always valid.
*
* Note, rename() to existing directory entry will have ->d_inode,
* and will use existing name which isn't specified name by user.
*
* We may be able to drop this positive dentry here. But dropping
* positive dentry isn't good idea. So it's unsupported like
* rename("filename", "FILENAME") for now.
*/
if (d_really_is_positive(dentry))
return 1;
/*
* This may be nfsd (or something), anyway, we can't see the
* intent of this. So, since this can be for creation, drop it.
*/
if (!flags)
return 0;
/*
* Drop the negative dentry, in order to make sure to use the
* case sensitive name which is specified by user if this is
* for creation.
*/
if (flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET))
return 0;
return vfat_revalidate_shortname(dentry, dir);
}
/* returns the length of a struct qstr, ignoring trailing dots */
static unsigned int __vfat_striptail_len(unsigned int len, const char *name)
{
while (len && name[len - 1] == '.')
len--;
return len;
}
static unsigned int vfat_striptail_len(const struct qstr *qstr)
{
return __vfat_striptail_len(qstr->len, qstr->name);
}
/*
* Compute the hash for the vfat name corresponding to the dentry.
* Note: if the name is invalid, we leave the hash code unchanged so
* that the existing dentry can be used. The vfat fs routines will
* return ENOENT or EINVAL as appropriate.
*/
static int vfat_hash(const struct dentry *dentry, struct qstr *qstr)
{
qstr->hash = full_name_hash(dentry, qstr->name, vfat_striptail_len(qstr));
return 0;
}
/*
* Compute the hash for the vfat name corresponding to the dentry.
* Note: if the name is invalid, we leave the hash code unchanged so
* that the existing dentry can be used. The vfat fs routines will
* return ENOENT or EINVAL as appropriate.
*/
static int vfat_hashi(const struct dentry *dentry, struct qstr *qstr)
{
struct nls_table *t = MSDOS_SB(dentry->d_sb)->nls_io;
const unsigned char *name;
unsigned int len;
unsigned long hash;
name = qstr->name;
len = vfat_striptail_len(qstr);
hash = init_name_hash(dentry);
while (len--)
hash = partial_name_hash(nls_tolower(t, *name++), hash);
qstr->hash = end_name_hash(hash);
return 0;
}
/*
* Case insensitive compare of two vfat names.
*/
static int vfat_cmpi(const struct dentry *dentry,
unsigned int len, const char *str, const struct qstr *name)
{
struct nls_table *t = MSDOS_SB(dentry->d_sb)->nls_io;
unsigned int alen, blen;
/* A filename cannot end in '.' or we treat it like it has none */
alen = vfat_striptail_len(name);
blen = __vfat_striptail_len(len, str);
if (alen == blen) {
if (nls_strnicmp(t, name->name, str, alen) == 0)
return 0;
}
return 1;
}
/*
* Case sensitive compare of two vfat names.
*/
static int vfat_cmp(const struct dentry *dentry,
unsigned int len, const char *str, const struct qstr *name)
{
unsigned int alen, blen;
/* A filename cannot end in '.' or we treat it like it has none */
alen = vfat_striptail_len(name);
blen = __vfat_striptail_len(len, str);
if (alen == blen) {
if (strncmp(name->name, str, alen) == 0)
return 0;
}
return 1;
}
static const struct dentry_operations vfat_ci_dentry_ops = {
.d_revalidate = vfat_revalidate_ci,
.d_hash = vfat_hashi,
.d_compare = vfat_cmpi,
};
static const struct dentry_operations vfat_dentry_ops = {
.d_revalidate = vfat_revalidate,
.d_hash = vfat_hash,
.d_compare = vfat_cmp,
};
/* Characters that are undesirable in an MS-DOS file name */
static inline bool vfat_bad_char(wchar_t w)
{
return (w < 0x0020)
|| (w == '*') || (w == '?') || (w == '<') || (w == '>')
|| (w == '|') || (w == '"') || (w == ':') || (w == '/')
|| (w == '\\');
}
static inline bool vfat_replace_char(wchar_t w)
{
return (w == '[') || (w == ']') || (w == ';') || (w == ',')
|| (w == '+') || (w == '=');
}
static wchar_t vfat_skip_char(wchar_t w)
{
return (w == '.') || (w == ' ');
}
static inline int vfat_is_used_badchars(const wchar_t *s, int len)
{
int i;
for (i = 0; i < len; i++)
if (vfat_bad_char(s[i]))
return -EINVAL;
if (s[i - 1] == ' ') /* last character cannot be space */
return -EINVAL;
return 0;
}
static int vfat_find_form(struct inode *dir, unsigned char *name)
{
struct fat_slot_info sinfo;
int err = fat_scan(dir, name, &sinfo);
if (err)
return -ENOENT;
brelse(sinfo.bh);
return 0;
}
/*
* 1) Valid characters for the 8.3 format alias are any combination of
* letters, uppercase alphabets, digits, any of the
* following special characters:
* $ % ' ` - @ { } ~ ! # ( ) & _ ^
* In this case Longfilename is not stored in disk.
*
* WinNT's Extension:
* File name and extension name is contain uppercase/lowercase
* only. And it is expressed by CASE_LOWER_BASE and CASE_LOWER_EXT.
*
* 2) File name is 8.3 format, but it contain the uppercase and
* lowercase char, muliti bytes char, etc. In this case numtail is not
* added, but Longfilename is stored.
*
* 3) When the one except for the above, or the following special
* character are contained:
* . [ ] ; , + =
* numtail is added, and Longfilename must be stored in disk .
*/
struct shortname_info {
unsigned char lower:1,
upper:1,
valid:1;
};
#define INIT_SHORTNAME_INFO(x) do { \
(x)->lower = 1; \
(x)->upper = 1; \
(x)->valid = 1; \
} while (0)
static inline int to_shortname_char(struct nls_table *nls,
unsigned char *buf, int buf_size,
wchar_t *src, struct shortname_info *info)
{
int len;
if (vfat_skip_char(*src)) {
info->valid = 0;
return 0;
}
if (vfat_replace_char(*src)) {
info->valid = 0;
buf[0] = '_';
return 1;
}
len = nls->uni2char(*src, buf, buf_size);
if (len <= 0) {
info->valid = 0;
buf[0] = '_';
len = 1;
} else if (len == 1) {
unsigned char prev = buf[0];
if (buf[0] >= 0x7F) {
info->lower = 0;
info->upper = 0;
}
buf[0] = nls_toupper(nls, buf[0]);
if (isalpha(buf[0])) {
if (buf[0] == prev)
info->lower = 0;
else
info->upper = 0;
}
} else {
info->lower = 0;
info->upper = 0;
}
return len;
}
/*
* Given a valid longname, create a unique shortname. Make sure the
* shortname does not exist
* Returns negative number on error, 0 for a normal
* return, and 1 for valid shortname
*/
static int vfat_create_shortname(struct inode *dir, struct nls_table *nls,
wchar_t *uname, int ulen,
unsigned char *name_res, unsigned char *lcase)
{
struct fat_mount_options *opts = &MSDOS_SB(dir->i_sb)->options;
wchar_t *ip, *ext_start, *end, *name_start;
unsigned char base[9], ext[4], buf[5], *p;
unsigned char charbuf[NLS_MAX_CHARSET_SIZE];
int chl, chi;
int sz = 0, extlen, baselen, i, numtail_baselen, numtail2_baselen;
int is_shortname;
struct shortname_info base_info, ext_info;
is_shortname = 1;
INIT_SHORTNAME_INFO(&base_info);
INIT_SHORTNAME_INFO(&ext_info);
/* Now, we need to create a shortname from the long name */
ext_start = end = &uname[ulen];
while (--ext_start >= uname) {
if (*ext_start == 0x002E) { /* is `.' */
if (ext_start == end - 1) {
sz = ulen;
ext_start = NULL;
}
break;
}
}
if (ext_start == uname - 1) {
sz = ulen;
ext_start = NULL;
} else if (ext_start) {
/*
* Names which start with a dot could be just
* an extension eg. "...test". In this case Win95
* uses the extension as the name and sets no extension.
*/
name_start = &uname[0];
while (name_start < ext_start) {
if (!vfat_skip_char(*name_start))
break;
name_start++;
}
if (name_start != ext_start) {
sz = ext_start - uname;
ext_start++;
} else {
sz = ulen;
ext_start = NULL;
}
}
numtail_baselen = 6;
numtail2_baselen = 2;
for (baselen = i = 0, p = base, ip = uname; i < sz; i++, ip++) {
chl = to_shortname_char(nls, charbuf, sizeof(charbuf),
ip, &base_info);
if (chl == 0)
continue;
if (baselen < 2 && (baselen + chl) > 2)
numtail2_baselen = baselen;
if (baselen < 6 && (baselen + chl) > 6)
numtail_baselen = baselen;
for (chi = 0; chi < chl; chi++) {
*p++ = charbuf[chi];
baselen++;
if (baselen >= 8)
break;
}
if (baselen >= 8) {
if ((chi < chl - 1) || (ip + 1) - uname < sz)
is_shortname = 0;
break;
}
}
if (baselen == 0) {
return -EINVAL;
}
extlen = 0;
if (ext_start) {
for (p = ext, ip = ext_start; extlen < 3 && ip < end; ip++) {
chl = to_shortname_char(nls, charbuf, sizeof(charbuf),
ip, &ext_info);
if (chl == 0)
continue;
if ((extlen + chl) > 3) {
is_shortname = 0;
break;
}
for (chi = 0; chi < chl; chi++) {
*p++ = charbuf[chi];
extlen++;
}
if (extlen >= 3) {
if (ip + 1 != end)
is_shortname = 0;
break;
}
}
}
ext[extlen] = '\0';
base[baselen] = '\0';
/* Yes, it can happen. ".\xe5" would do it. */
if (base[0] == DELETED_FLAG)
base[0] = 0x05;
/* OK, at this point we know that base is not longer than 8 symbols,
* ext is not longer than 3, base is nonempty, both don't contain
* any bad symbols (lowercase transformed to uppercase).
*/
memset(name_res, ' ', MSDOS_NAME);
memcpy(name_res, base, baselen);
memcpy(name_res + 8, ext, extlen);
*lcase = 0;
if (is_shortname && base_info.valid && ext_info.valid) {
if (vfat_find_form(dir, name_res) == 0)
return -EEXIST;
if (opts->shortname & VFAT_SFN_CREATE_WIN95) {
return (base_info.upper && ext_info.upper);
} else if (opts->shortname & VFAT_SFN_CREATE_WINNT) {
if ((base_info.upper || base_info.lower) &&
(ext_info.upper || ext_info.lower)) {
if (!base_info.upper && base_info.lower)
*lcase |= CASE_LOWER_BASE;
if (!ext_info.upper && ext_info.lower)
*lcase |= CASE_LOWER_EXT;
return 1;
}
return 0;
} else {
BUG();
}
}
if (opts->numtail == 0)
if (vfat_find_form(dir, name_res) < 0)
return 0;
/*
* Try to find a unique extension. This used to
* iterate through all possibilities sequentially,
* but that gave extremely bad performance. Windows
* only tries a few cases before using random
* values for part of the base.
*/
if (baselen > 6) {
baselen = numtail_baselen;
name_res[7] = ' ';
}
name_res[baselen] = '~';
for (i = 1; i < 10; i++) {
name_res[baselen + 1] = i + '0';
if (vfat_find_form(dir, name_res) < 0)
return 0;
}
i = jiffies;
sz = (jiffies >> 16) & 0x7;
if (baselen > 2) {
baselen = numtail2_baselen;
name_res[7] = ' ';
}
name_res[baselen + 4] = '~';
name_res[baselen + 5] = '1' + sz;
while (1) {
snprintf(buf, sizeof(buf), "%04X", i & 0xffff);
memcpy(&name_res[baselen], buf, 4);
if (vfat_find_form(dir, name_res) < 0)
break;
i -= 11;
}
return 0;
}
/* Translate a string, including coded sequences into Unicode */
static int
xlate_to_uni(const unsigned char *name, int len, unsigned char *outname,
int *longlen, int *outlen, int escape, int utf8,
struct nls_table *nls)
{
const unsigned char *ip;
unsigned char *op;
int i, fill;
int charlen;
if (utf8) {
*outlen = utf8s_to_utf16s(name, len, UTF16_HOST_ENDIAN,
(wchar_t *) outname, FAT_LFN_LEN + 2);
if (*outlen < 0)
return *outlen;
else if (*outlen > FAT_LFN_LEN)
return -ENAMETOOLONG;
op = &outname[*outlen * sizeof(wchar_t)];
} else {
for (i = 0, ip = name, op = outname, *outlen = 0;
i < len && *outlen < FAT_LFN_LEN;
*outlen += 1) {
if (escape && (*ip == ':')) {
u8 uc[2];
if (i > len - 5)
return -EINVAL;
if (hex2bin(uc, ip + 1, 2) < 0)
return -EINVAL;
*(wchar_t *)op = uc[0] << 8 | uc[1];
op += 2;
ip += 5;
i += 5;
} else {
charlen = nls->char2uni(ip, len - i,
(wchar_t *)op);
if (charlen < 0)
return -EINVAL;
ip += charlen;
i += charlen;
op += 2;
}
}
if (i < len)
return -ENAMETOOLONG;
}
*longlen = *outlen;
if (*outlen % 13) {
*op++ = 0;
*op++ = 0;
*outlen += 1;
if (*outlen % 13) {
fill = 13 - (*outlen % 13);
for (i = 0; i < fill; i++) {
*op++ = 0xff;
*op++ = 0xff;
}
*outlen += fill;
}
}
return 0;
}
static int vfat_build_slots(struct inode *dir, const unsigned char *name,
int len, int is_dir, int cluster,
struct timespec64 *ts,
struct msdos_dir_slot *slots, int *nr_slots)
{
struct msdos_sb_info *sbi = MSDOS_SB(dir->i_sb);
struct fat_mount_options *opts = &sbi->options;
struct msdos_dir_slot *ps;
struct msdos_dir_entry *de;
unsigned char cksum, lcase;
unsigned char msdos_name[MSDOS_NAME];
wchar_t *uname;
__le16 time, date;
u8 time_cs;
int err, ulen, usize, i;
loff_t offset;
*nr_slots = 0;
uname = __getname();
if (!uname)
return -ENOMEM;
err = xlate_to_uni(name, len, (unsigned char *)uname, &ulen, &usize,
opts->unicode_xlate, opts->utf8, sbi->nls_io);
if (err)
goto out_free;
err = vfat_is_used_badchars(uname, ulen);
if (err)
goto out_free;
err = vfat_create_shortname(dir, sbi->nls_disk, uname, ulen,
msdos_name, &lcase);
if (err < 0)
goto out_free;
else if (err == 1) {
de = (struct msdos_dir_entry *)slots;
err = 0;
goto shortname;
}
/* build the entry of long file name */
cksum = fat_checksum(msdos_name);
*nr_slots = usize / 13;
for (ps = slots, i = *nr_slots; i > 0; i--, ps++) {
ps->id = i;
ps->attr = ATTR_EXT;
ps->reserved = 0;
ps->alias_checksum = cksum;
ps->start = 0;
offset = (i - 1) * 13;
fatwchar_to16(ps->name0_4, uname + offset, 5);
fatwchar_to16(ps->name5_10, uname + offset + 5, 6);
fatwchar_to16(ps->name11_12, uname + offset + 11, 2);
}
slots[0].id |= 0x40;
de = (struct msdos_dir_entry *)ps;
shortname:
/* build the entry of 8.3 alias name */
(*nr_slots)++;
memcpy(de->name, msdos_name, MSDOS_NAME);
de->attr = is_dir ? ATTR_DIR : ATTR_ARCH;
de->lcase = lcase;
fat_time_unix2fat(sbi, ts, &time, &date, &time_cs);
de->time = de->ctime = time;
de->date = de->cdate = de->adate = date;
de->ctime_cs = time_cs;
fat_set_start(de, cluster);
de->size = 0;
out_free:
__putname(uname);
return err;
}
static int vfat_add_entry(struct inode *dir, const struct qstr *qname,
int is_dir, int cluster, struct timespec64 *ts,
struct fat_slot_info *sinfo)
{
struct msdos_dir_slot *slots;
unsigned int len;
int err, nr_slots;
len = vfat_striptail_len(qname);
if (len == 0)
return -ENOENT;
slots = kmalloc_array(MSDOS_SLOTS, sizeof(*slots), GFP_NOFS);
if (slots == NULL)
return -ENOMEM;
err = vfat_build_slots(dir, qname->name, len, is_dir, cluster, ts,
slots, &nr_slots);
if (err)
goto cleanup;
err = fat_add_entries(dir, slots, nr_slots, sinfo);
if (err)
goto cleanup;
/* update timestamp */
fat_truncate_time(dir, ts, S_CTIME|S_MTIME);
if (IS_DIRSYNC(dir))
(void)fat_sync_inode(dir);
else
mark_inode_dirty(dir);
cleanup:
kfree(slots);
return err;
}
static int vfat_find(struct inode *dir, const struct qstr *qname,
struct fat_slot_info *sinfo)
{
unsigned int len = vfat_striptail_len(qname);
if (len == 0)
return -ENOENT;
return fat_search_long(dir, qname->name, len, sinfo);
}
static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry,
unsigned int flags)
{
struct super_block *sb = dir->i_sb;
struct fat_slot_info sinfo;
struct inode *inode;
struct dentry *alias;
int err;
mutex_lock(&MSDOS_SB(sb)->s_lock);
err = vfat_find(dir, &dentry->d_name, &sinfo);
if (err) {
if (err == -ENOENT) {
inode = NULL;
goto out;
}
goto error;
}
inode = fat_build_inode(sb, sinfo.de, sinfo.i_pos);
brelse(sinfo.bh);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
goto error;
}
alias = d_find_alias(inode);
/*
* Checking "alias->d_parent == dentry->d_parent" to make sure
* FS is not corrupted (especially double linked dir).
*/
if (alias && alias->d_parent == dentry->d_parent) {
/*
* This inode has non anonymous-DCACHE_DISCONNECTED
* dentry. This means, the user did ->lookup() by an
* another name (longname vs 8.3 alias of it) in past.
*
* Switch to new one for reason of locality if possible.
*/
if (!S_ISDIR(inode->i_mode))
d_move(alias, dentry);
iput(inode);
mutex_unlock(&MSDOS_SB(sb)->s_lock);
return alias;
} else
dput(alias);
out:
mutex_unlock(&MSDOS_SB(sb)->s_lock);
if (!inode)
vfat_d_version_set(dentry, inode_query_iversion(dir));
return d_splice_alias(inode, dentry);
error:
mutex_unlock(&MSDOS_SB(sb)->s_lock);
return ERR_PTR(err);
}
static int vfat_create(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, bool excl)
{
struct super_block *sb = dir->i_sb;
struct inode *inode;
struct fat_slot_info sinfo;
struct timespec64 ts;
int err;
mutex_lock(&MSDOS_SB(sb)->s_lock);
ts = current_time(dir);
err = vfat_add_entry(dir, &dentry->d_name, 0, 0, &ts, &sinfo);
if (err)
goto out;
inode_inc_iversion(dir);
inode = fat_build_inode(sb, sinfo.de, sinfo.i_pos);
brelse(sinfo.bh);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
goto out;
}
inode_inc_iversion(inode);
d_instantiate(dentry, inode);
out:
mutex_unlock(&MSDOS_SB(sb)->s_lock);
return err;
}
static int vfat_rmdir(struct inode *dir, struct dentry *dentry)
{
struct inode *inode = d_inode(dentry);
struct super_block *sb = dir->i_sb;
struct fat_slot_info sinfo;
int err;
mutex_lock(&MSDOS_SB(sb)->s_lock);
err = fat_dir_empty(inode);
if (err)
goto out;
err = vfat_find(dir, &dentry->d_name, &sinfo);
if (err)
goto out;
err = fat_remove_entries(dir, &sinfo); /* and releases bh */
if (err)
goto out;
drop_nlink(dir);
clear_nlink(inode);
fat_truncate_time(inode, NULL, S_ATIME|S_MTIME);
fat_detach(inode);
vfat_d_version_set(dentry, inode_query_iversion(dir));
out:
mutex_unlock(&MSDOS_SB(sb)->s_lock);
return err;
}
static int vfat_unlink(struct inode *dir, struct dentry *dentry)
{
struct inode *inode = d_inode(dentry);
struct super_block *sb = dir->i_sb;
struct fat_slot_info sinfo;
int err;
mutex_lock(&MSDOS_SB(sb)->s_lock);
err = vfat_find(dir, &dentry->d_name, &sinfo);
if (err)
goto out;
err = fat_remove_entries(dir, &sinfo); /* and releases bh */
if (err)
goto out;
clear_nlink(inode);
fat_truncate_time(inode, NULL, S_ATIME|S_MTIME);
fat_detach(inode);
vfat_d_version_set(dentry, inode_query_iversion(dir));
out:
mutex_unlock(&MSDOS_SB(sb)->s_lock);
return err;
}
static struct dentry *vfat_mkdir(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode)
{
struct super_block *sb = dir->i_sb;
struct inode *inode;
struct fat_slot_info sinfo;
struct timespec64 ts;
int err, cluster;
mutex_lock(&MSDOS_SB(sb)->s_lock);
ts = current_time(dir);
cluster = fat_alloc_new_dir(dir, &ts);
if (cluster < 0) {
err = cluster;
goto out;
}
err = vfat_add_entry(dir, &dentry->d_name, 1, cluster, &ts, &sinfo);
if (err)
goto out_free;
inode_inc_iversion(dir);
inc_nlink(dir);
inode = fat_build_inode(sb, sinfo.de, sinfo.i_pos);
brelse(sinfo.bh);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
/* the directory was completed, just return a error */
goto out;
}
inode_inc_iversion(inode);
set_nlink(inode, 2);
d_instantiate(dentry, inode);
mutex_unlock(&MSDOS_SB(sb)->s_lock);
return NULL;
out_free:
fat_free_clusters(dir, cluster);
out:
mutex_unlock(&MSDOS_SB(sb)->s_lock);
return ERR_PTR(err);
}
static int vfat_get_dotdot_de(struct inode *inode, struct buffer_head **bh,
struct msdos_dir_entry **de)
{
if (S_ISDIR(inode->i_mode)) {
if (fat_get_dotdot_entry(inode, bh, de))
return -EIO;
}
return 0;
}
static int vfat_sync_ipos(struct inode *dir, struct inode *inode)
{
if (IS_DIRSYNC(dir))
return fat_sync_inode(inode);
mark_inode_dirty(inode);
return 0;
}
static int vfat_update_dotdot_de(struct inode *dir, struct inode *inode,
struct buffer_head *dotdot_bh,
struct msdos_dir_entry *dotdot_de)
{
fat_set_start(dotdot_de, MSDOS_I(dir)->i_logstart);
mark_buffer_dirty_inode(dotdot_bh, inode);
if (IS_DIRSYNC(dir))
return sync_dirty_buffer(dotdot_bh);
return 0;
}
static void vfat_update_dir_metadata(struct inode *dir, struct timespec64 *ts)
{
inode_inc_iversion(dir);
fat_truncate_time(dir, ts, S_CTIME | S_MTIME);
if (IS_DIRSYNC(dir))
(void)fat_sync_inode(dir);
else
mark_inode_dirty(dir);
}
static int vfat_rename(struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry)
{
struct buffer_head *dotdot_bh;
struct msdos_dir_entry *dotdot_de = NULL;
struct inode *old_inode, *new_inode;
struct fat_slot_info old_sinfo, sinfo;
struct timespec64 ts;
loff_t new_i_pos;
int err, is_dir, corrupt = 0;
struct super_block *sb = old_dir->i_sb;
old_sinfo.bh = sinfo.bh = dotdot_bh = NULL;
old_inode = d_inode(old_dentry);
new_inode = d_inode(new_dentry);
mutex_lock(&MSDOS_SB(sb)->s_lock);
err = vfat_find(old_dir, &old_dentry->d_name, &old_sinfo);
if (err)
goto out;
if (old_dir != new_dir) {
err = vfat_get_dotdot_de(old_inode, &dotdot_bh, &dotdot_de);
if (err)
goto out;
}
is_dir = S_ISDIR(old_inode->i_mode);
ts = current_time(old_dir);
if (new_inode) {
if (is_dir) {
err = fat_dir_empty(new_inode);
if (err)
goto out;
}
new_i_pos = MSDOS_I(new_inode)->i_pos;
fat_detach(new_inode);
} else {
err = vfat_add_entry(new_dir, &new_dentry->d_name, is_dir, 0,
&ts, &sinfo);
if (err)
goto out;
new_i_pos = sinfo.i_pos;
}
inode_inc_iversion(new_dir);
fat_detach(old_inode);
fat_attach(old_inode, new_i_pos);
err = vfat_sync_ipos(new_dir, old_inode);
if (err)
goto error_inode;
if (dotdot_de) {
err = vfat_update_dotdot_de(new_dir, old_inode, dotdot_bh,
dotdot_de);
if (err)
goto error_dotdot;
drop_nlink(old_dir);
if (!new_inode)
inc_nlink(new_dir);
}
err = fat_remove_entries(old_dir, &old_sinfo); /* and releases bh */
old_sinfo.bh = NULL;
if (err)
goto error_dotdot;
vfat_update_dir_metadata(old_dir, &ts);
if (new_inode) {
drop_nlink(new_inode);
if (is_dir)
drop_nlink(new_inode);
fat_truncate_time(new_inode, &ts, S_CTIME);
}
out:
brelse(sinfo.bh);
brelse(dotdot_bh);
brelse(old_sinfo.bh);
mutex_unlock(&MSDOS_SB(sb)->s_lock);
return err;
error_dotdot:
/* data cluster is shared, serious corruption */
corrupt = 1;
if (dotdot_de) {
corrupt |= vfat_update_dotdot_de(old_dir, old_inode, dotdot_bh,
dotdot_de);
}
error_inode:
fat_detach(old_inode);
fat_attach(old_inode, old_sinfo.i_pos);
if (new_inode) {
fat_attach(new_inode, new_i_pos);
if (corrupt)
corrupt |= fat_sync_inode(new_inode);
} else {
/*
* If new entry was not sharing the data cluster, it
* shouldn't be serious corruption.
*/
int err2 = fat_remove_entries(new_dir, &sinfo);
if (corrupt)
corrupt |= err2;
sinfo.bh = NULL;
}
if (corrupt < 0) {
fat_fs_error(new_dir->i_sb,
"%s: Filesystem corrupted (i_pos %lld)",
__func__, new_i_pos);
}
goto out;
}
static void vfat_exchange_ipos(struct inode *old_inode, struct inode *new_inode,
loff_t old_i_pos, loff_t new_i_pos)
{
fat_detach(old_inode);
fat_detach(new_inode);
fat_attach(old_inode, new_i_pos);
fat_attach(new_inode, old_i_pos);
}
static void vfat_move_nlink(struct inode *src, struct inode *dst)
{
drop_nlink(src);
inc_nlink(dst);
}
static int vfat_rename_exchange(struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry)
{
struct buffer_head *old_dotdot_bh = NULL, *new_dotdot_bh = NULL;
struct msdos_dir_entry *old_dotdot_de = NULL, *new_dotdot_de = NULL;
struct inode *old_inode, *new_inode;
struct timespec64 ts = current_time(old_dir);
loff_t old_i_pos, new_i_pos;
int err, corrupt = 0;
struct super_block *sb = old_dir->i_sb;
old_inode = d_inode(old_dentry);
new_inode = d_inode(new_dentry);
/* Acquire super block lock for the operation to be atomic */
mutex_lock(&MSDOS_SB(sb)->s_lock);
/* if directories are not the same, get ".." info to update */
if (old_dir != new_dir) {
err = vfat_get_dotdot_de(old_inode, &old_dotdot_bh,
&old_dotdot_de);
if (err)
goto out;
err = vfat_get_dotdot_de(new_inode, &new_dotdot_bh,
&new_dotdot_de);
if (err)
goto out;
}
old_i_pos = MSDOS_I(old_inode)->i_pos;
new_i_pos = MSDOS_I(new_inode)->i_pos;
vfat_exchange_ipos(old_inode, new_inode, old_i_pos, new_i_pos);
err = vfat_sync_ipos(old_dir, new_inode);
if (err)
goto error_exchange;
err = vfat_sync_ipos(new_dir, old_inode);
if (err)
goto error_exchange;
/* update ".." directory entry info */
if (old_dotdot_de) {
err = vfat_update_dotdot_de(new_dir, old_inode, old_dotdot_bh,
old_dotdot_de);
if (err)
goto error_old_dotdot;
}
if (new_dotdot_de) {
err = vfat_update_dotdot_de(old_dir, new_inode, new_dotdot_bh,
new_dotdot_de);
if (err)
goto error_new_dotdot;
}
/* if cross directory and only one is a directory, adjust nlink */
if (!old_dotdot_de != !new_dotdot_de) {
if (old_dotdot_de)
vfat_move_nlink(old_dir, new_dir);
else
vfat_move_nlink(new_dir, old_dir);
}
vfat_update_dir_metadata(old_dir, &ts);
/* if directories are not the same, update new_dir as well */
if (old_dir != new_dir)
vfat_update_dir_metadata(new_dir, &ts);
out:
brelse(old_dotdot_bh);
brelse(new_dotdot_bh);
mutex_unlock(&MSDOS_SB(sb)->s_lock);
return err;
error_new_dotdot:
if (new_dotdot_de) {
corrupt |= vfat_update_dotdot_de(new_dir, new_inode,
new_dotdot_bh, new_dotdot_de);
}
error_old_dotdot:
if (old_dotdot_de) {
corrupt |= vfat_update_dotdot_de(old_dir, old_inode,
old_dotdot_bh, old_dotdot_de);
}
error_exchange:
vfat_exchange_ipos(old_inode, new_inode, new_i_pos, old_i_pos);
corrupt |= vfat_sync_ipos(new_dir, new_inode);
corrupt |= vfat_sync_ipos(old_dir, old_inode);
if (corrupt < 0) {
fat_fs_error(new_dir->i_sb,
"%s: Filesystem corrupted (i_pos %lld, %lld)",
__func__, old_i_pos, new_i_pos);
}
goto out;
}
static int vfat_rename2(struct mnt_idmap *idmap, struct inode *old_dir,
struct dentry *old_dentry, struct inode *new_dir,
struct dentry *new_dentry, unsigned int flags)
{
if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
return -EINVAL;
if (flags & RENAME_EXCHANGE) {
return vfat_rename_exchange(old_dir, old_dentry,
new_dir, new_dentry);
}
/* VFS already handled RENAME_NOREPLACE, handle it as a normal rename */
return vfat_rename(old_dir, old_dentry, new_dir, new_dentry);
}
static const struct inode_operations vfat_dir_inode_operations = {
.create = vfat_create,
.lookup = vfat_lookup,
.unlink = vfat_unlink,
.mkdir = vfat_mkdir,
.rmdir = vfat_rmdir,
.rename = vfat_rename2,
.setattr = fat_setattr,
.getattr = fat_getattr,
.update_time = fat_update_time,
};
static void setup(struct super_block *sb)
{
MSDOS_SB(sb)->dir_ops = &vfat_dir_inode_operations;
if (MSDOS_SB(sb)->options.name_check != 's')
sb->s_d_op = &vfat_ci_dentry_ops;
else
sb->s_d_op = &vfat_dentry_ops;
}
static int vfat_fill_super(struct super_block *sb, struct fs_context *fc)
{
return fat_fill_super(sb, fc, setup);
}
static int vfat_get_tree(struct fs_context *fc)
{
return get_tree_bdev(fc, vfat_fill_super);
}
static int vfat_parse_param(struct fs_context *fc, struct fs_parameter *param)
{
return fat_parse_param(fc, param, true);
}
static const struct fs_context_operations vfat_context_ops = {
.parse_param = vfat_parse_param,
.get_tree = vfat_get_tree,
.reconfigure = fat_reconfigure,
.free = fat_free_fc,
};
static int vfat_init_fs_context(struct fs_context *fc)
{
int err;
/* Initialize with is_vfat == true */
err = fat_init_fs_context(fc, true);
if (err)
return err;
fc->ops = &vfat_context_ops;
return 0;
}
static struct file_system_type vfat_fs_type = {
.owner = THIS_MODULE,
.name = "vfat",
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP,
.init_fs_context = vfat_init_fs_context,
.parameters = fat_param_spec,
};
MODULE_ALIAS_FS("vfat");
static int __init init_vfat_fs(void)
{
return register_filesystem(&vfat_fs_type);
}
static void __exit exit_vfat_fs(void)
{
unregister_filesystem(&vfat_fs_type);
}
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("VFAT filesystem support");
MODULE_AUTHOR("Gordon Chaffee");
module_init(init_vfat_fs)
module_exit(exit_vfat_fs)