linux/fs/mnt_idmapping.c
Christian Brauner 37c4a9590e
statmount: allow to retrieve idmappings
This adds the STATMOUNT_MNT_UIDMAP and STATMOUNT_MNT_GIDMAP options.
It allows the retrieval of idmappings via statmount().

Currently it isn't possible to figure out what idmappings are applied to
an idmapped mount. This information is often crucial. Before statmount()
the only realistic options for an interface like this would have been to
add it to /proc/<pid>/fdinfo/<nr> or to expose it in
/proc/<pid>/mountinfo. Both solution would have been pretty ugly and
would've shown information that is of strong interest to some
application but not all. statmount() is perfect for this.

The idmappings applied to an idmapped mount are shown relative to the
caller's user namespace. This is the most useful solution that doesn't
risk leaking information or confuse the caller.

For example, an idmapped mount might have been created with the
following idmappings:

    mount --bind -o X-mount.idmap="0:10000:1000 2000:2000:1 3000:3000:1" /srv /opt

Listing the idmappings through statmount() in the same context shows:

    mnt_id:        2147485088
    mnt_parent_id: 2147484816
    fs_type:       btrfs
    mnt_root:      /srv
    mnt_point:     /opt
    mnt_opts:      ssd,discard=async,space_cache=v2,subvolid=5,subvol=/
    mnt_uidmap[0]: 0 10000 1000
    mnt_uidmap[1]: 2000 2000 1
    mnt_uidmap[2]: 3000 3000 1
    mnt_gidmap[0]: 0 10000 1000
    mnt_gidmap[1]: 2000 2000 1
    mnt_gidmap[2]: 3000 3000 1

But the idmappings might not always be resolvable in the caller's user
namespace. For example:

    unshare --user --map-root

In this case statmount() will skip any mappings that fil to resolve in
the caller's idmapping:

    mnt_id:        2147485087
    mnt_parent_id: 2147484016
    fs_type:       btrfs
    mnt_root:      /srv
    mnt_point:     /opt
    mnt_opts:      ssd,discard=async,space_cache=v2,subvolid=5,subvol=/

The caller can differentiate between a mount not being idmapped and a
mount that is idmapped but where all mappings fail to resolve in the
caller's idmapping by check for the STATMOUNT_MNT_{G,U}IDMAP flag being
raised but the number of mappings in ->mnt_{g,u}idmap_num being zero.

Note that statmount() requires that the whole range must be resolvable
in the caller's user namespace. If a subrange fails to map it will still
list the map as not resolvable. This is a practical compromise to avoid
having to find which subranges are resovable and wich aren't.

Idmappings are listed as a string array with each mapping separated by
zero bytes. This allows to retrieve the idmappings and immediately use
them for writing to e.g., /proc/<pid>/{g,u}id_map and it also allow for
simple iteration like:

    if (stmnt->mask & STATMOUNT_MNT_UIDMAP) {
            const char *idmap = stmnt->str + stmnt->mnt_uidmap;

            for (size_t idx = 0; idx < stmnt->mnt_uidmap_nr; idx++) {
                    printf("mnt_uidmap[%lu]: %s\n", idx, idmap);
                    idmap += strlen(idmap) + 1;
            }
    }

Link: https://lore.kernel.org/r/20250204-work-mnt_idmap-statmount-v2-2-007720f39f2e@kernel.org
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
2025-02-12 12:12:27 +01:00

388 lines
10 KiB
C

// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2022 Christian Brauner <brauner@kernel.org> */
#include <linux/cred.h>
#include <linux/fs.h>
#include <linux/mnt_idmapping.h>
#include <linux/slab.h>
#include <linux/user_namespace.h>
#include <linux/seq_file.h>
#include "internal.h"
/*
* Outside of this file vfs{g,u}id_t are always created from k{g,u}id_t,
* never from raw values. These are just internal helpers.
*/
#define VFSUIDT_INIT_RAW(val) (vfsuid_t){ val }
#define VFSGIDT_INIT_RAW(val) (vfsgid_t){ val }
struct mnt_idmap {
struct uid_gid_map uid_map;
struct uid_gid_map gid_map;
refcount_t count;
};
/*
* Carries the initial idmapping of 0:0:4294967295 which is an identity
* mapping. This means that {g,u}id 0 is mapped to {g,u}id 0, {g,u}id 1 is
* mapped to {g,u}id 1, [...], {g,u}id 1000 to {g,u}id 1000, [...].
*/
struct mnt_idmap nop_mnt_idmap = {
.count = REFCOUNT_INIT(1),
};
EXPORT_SYMBOL_GPL(nop_mnt_idmap);
/*
* Carries the invalid idmapping of a full 0-4294967295 {g,u}id range.
* This means that all {g,u}ids are mapped to INVALID_VFS{G,U}ID.
*/
struct mnt_idmap invalid_mnt_idmap = {
.count = REFCOUNT_INIT(1),
};
EXPORT_SYMBOL_GPL(invalid_mnt_idmap);
/**
* initial_idmapping - check whether this is the initial mapping
* @ns: idmapping to check
*
* Check whether this is the initial mapping, mapping 0 to 0, 1 to 1,
* [...], 1000 to 1000 [...].
*
* Return: true if this is the initial mapping, false if not.
*/
static inline bool initial_idmapping(const struct user_namespace *ns)
{
return ns == &init_user_ns;
}
/**
* make_vfsuid - map a filesystem kuid according to an idmapping
* @idmap: the mount's idmapping
* @fs_userns: the filesystem's idmapping
* @kuid : kuid to be mapped
*
* Take a @kuid and remap it from @fs_userns into @idmap. Use this
* function when preparing a @kuid to be reported to userspace.
*
* If initial_idmapping() determines that this is not an idmapped mount
* we can simply return @kuid unchanged.
* If initial_idmapping() tells us that the filesystem is not mounted with an
* idmapping we know the value of @kuid won't change when calling
* from_kuid() so we can simply retrieve the value via __kuid_val()
* directly.
*
* Return: @kuid mapped according to @idmap.
* If @kuid has no mapping in either @idmap or @fs_userns INVALID_UID is
* returned.
*/
vfsuid_t make_vfsuid(struct mnt_idmap *idmap,
struct user_namespace *fs_userns,
kuid_t kuid)
{
uid_t uid;
if (idmap == &nop_mnt_idmap)
return VFSUIDT_INIT(kuid);
if (idmap == &invalid_mnt_idmap)
return INVALID_VFSUID;
if (initial_idmapping(fs_userns))
uid = __kuid_val(kuid);
else
uid = from_kuid(fs_userns, kuid);
if (uid == (uid_t)-1)
return INVALID_VFSUID;
return VFSUIDT_INIT_RAW(map_id_down(&idmap->uid_map, uid));
}
EXPORT_SYMBOL_GPL(make_vfsuid);
/**
* make_vfsgid - map a filesystem kgid according to an idmapping
* @idmap: the mount's idmapping
* @fs_userns: the filesystem's idmapping
* @kgid : kgid to be mapped
*
* Take a @kgid and remap it from @fs_userns into @idmap. Use this
* function when preparing a @kgid to be reported to userspace.
*
* If initial_idmapping() determines that this is not an idmapped mount
* we can simply return @kgid unchanged.
* If initial_idmapping() tells us that the filesystem is not mounted with an
* idmapping we know the value of @kgid won't change when calling
* from_kgid() so we can simply retrieve the value via __kgid_val()
* directly.
*
* Return: @kgid mapped according to @idmap.
* If @kgid has no mapping in either @idmap or @fs_userns INVALID_GID is
* returned.
*/
vfsgid_t make_vfsgid(struct mnt_idmap *idmap,
struct user_namespace *fs_userns, kgid_t kgid)
{
gid_t gid;
if (idmap == &nop_mnt_idmap)
return VFSGIDT_INIT(kgid);
if (idmap == &invalid_mnt_idmap)
return INVALID_VFSGID;
if (initial_idmapping(fs_userns))
gid = __kgid_val(kgid);
else
gid = from_kgid(fs_userns, kgid);
if (gid == (gid_t)-1)
return INVALID_VFSGID;
return VFSGIDT_INIT_RAW(map_id_down(&idmap->gid_map, gid));
}
EXPORT_SYMBOL_GPL(make_vfsgid);
/**
* from_vfsuid - map a vfsuid into the filesystem idmapping
* @idmap: the mount's idmapping
* @fs_userns: the filesystem's idmapping
* @vfsuid : vfsuid to be mapped
*
* Map @vfsuid into the filesystem idmapping. This function has to be used in
* order to e.g. write @vfsuid to inode->i_uid.
*
* Return: @vfsuid mapped into the filesystem idmapping
*/
kuid_t from_vfsuid(struct mnt_idmap *idmap,
struct user_namespace *fs_userns, vfsuid_t vfsuid)
{
uid_t uid;
if (idmap == &nop_mnt_idmap)
return AS_KUIDT(vfsuid);
if (idmap == &invalid_mnt_idmap)
return INVALID_UID;
uid = map_id_up(&idmap->uid_map, __vfsuid_val(vfsuid));
if (uid == (uid_t)-1)
return INVALID_UID;
if (initial_idmapping(fs_userns))
return KUIDT_INIT(uid);
return make_kuid(fs_userns, uid);
}
EXPORT_SYMBOL_GPL(from_vfsuid);
/**
* from_vfsgid - map a vfsgid into the filesystem idmapping
* @idmap: the mount's idmapping
* @fs_userns: the filesystem's idmapping
* @vfsgid : vfsgid to be mapped
*
* Map @vfsgid into the filesystem idmapping. This function has to be used in
* order to e.g. write @vfsgid to inode->i_gid.
*
* Return: @vfsgid mapped into the filesystem idmapping
*/
kgid_t from_vfsgid(struct mnt_idmap *idmap,
struct user_namespace *fs_userns, vfsgid_t vfsgid)
{
gid_t gid;
if (idmap == &nop_mnt_idmap)
return AS_KGIDT(vfsgid);
if (idmap == &invalid_mnt_idmap)
return INVALID_GID;
gid = map_id_up(&idmap->gid_map, __vfsgid_val(vfsgid));
if (gid == (gid_t)-1)
return INVALID_GID;
if (initial_idmapping(fs_userns))
return KGIDT_INIT(gid);
return make_kgid(fs_userns, gid);
}
EXPORT_SYMBOL_GPL(from_vfsgid);
#ifdef CONFIG_MULTIUSER
/**
* vfsgid_in_group_p() - check whether a vfsuid matches the caller's groups
* @vfsgid: the mnt gid to match
*
* This function can be used to determine whether @vfsuid matches any of the
* caller's groups.
*
* Return: 1 if vfsuid matches caller's groups, 0 if not.
*/
int vfsgid_in_group_p(vfsgid_t vfsgid)
{
return in_group_p(AS_KGIDT(vfsgid));
}
#else
int vfsgid_in_group_p(vfsgid_t vfsgid)
{
return 1;
}
#endif
EXPORT_SYMBOL_GPL(vfsgid_in_group_p);
static int copy_mnt_idmap(struct uid_gid_map *map_from,
struct uid_gid_map *map_to)
{
struct uid_gid_extent *forward, *reverse;
u32 nr_extents = READ_ONCE(map_from->nr_extents);
/* Pairs with smp_wmb() when writing the idmapping. */
smp_rmb();
/*
* Don't blindly copy @map_to into @map_from if nr_extents is
* smaller or equal to UID_GID_MAP_MAX_BASE_EXTENTS. Since we
* read @nr_extents someone could have written an idmapping and
* then we might end up with inconsistent data. So just don't do
* anything at all.
*/
if (nr_extents == 0)
return -EINVAL;
/*
* Here we know that nr_extents is greater than zero which means
* a map has been written. Since idmappings can't be changed
* once they have been written we know that we can safely copy
* from @map_to into @map_from.
*/
if (nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS) {
*map_to = *map_from;
return 0;
}
forward = kmemdup_array(map_from->forward, nr_extents,
sizeof(struct uid_gid_extent),
GFP_KERNEL_ACCOUNT);
if (!forward)
return -ENOMEM;
reverse = kmemdup_array(map_from->reverse, nr_extents,
sizeof(struct uid_gid_extent),
GFP_KERNEL_ACCOUNT);
if (!reverse) {
kfree(forward);
return -ENOMEM;
}
/*
* The idmapping isn't exposed anywhere so we don't need to care
* about ordering between extent pointers and @nr_extents
* initialization.
*/
map_to->forward = forward;
map_to->reverse = reverse;
map_to->nr_extents = nr_extents;
return 0;
}
static void free_mnt_idmap(struct mnt_idmap *idmap)
{
if (idmap->uid_map.nr_extents > UID_GID_MAP_MAX_BASE_EXTENTS) {
kfree(idmap->uid_map.forward);
kfree(idmap->uid_map.reverse);
}
if (idmap->gid_map.nr_extents > UID_GID_MAP_MAX_BASE_EXTENTS) {
kfree(idmap->gid_map.forward);
kfree(idmap->gid_map.reverse);
}
kfree(idmap);
}
struct mnt_idmap *alloc_mnt_idmap(struct user_namespace *mnt_userns)
{
struct mnt_idmap *idmap;
int ret;
idmap = kzalloc(sizeof(struct mnt_idmap), GFP_KERNEL_ACCOUNT);
if (!idmap)
return ERR_PTR(-ENOMEM);
refcount_set(&idmap->count, 1);
ret = copy_mnt_idmap(&mnt_userns->uid_map, &idmap->uid_map);
if (!ret)
ret = copy_mnt_idmap(&mnt_userns->gid_map, &idmap->gid_map);
if (ret) {
free_mnt_idmap(idmap);
idmap = ERR_PTR(ret);
}
return idmap;
}
/**
* mnt_idmap_get - get a reference to an idmapping
* @idmap: the idmap to bump the reference on
*
* If @idmap is not the @nop_mnt_idmap bump the reference count.
*
* Return: @idmap with reference count bumped if @not_mnt_idmap isn't passed.
*/
struct mnt_idmap *mnt_idmap_get(struct mnt_idmap *idmap)
{
if (idmap != &nop_mnt_idmap && idmap != &invalid_mnt_idmap)
refcount_inc(&idmap->count);
return idmap;
}
EXPORT_SYMBOL_GPL(mnt_idmap_get);
/**
* mnt_idmap_put - put a reference to an idmapping
* @idmap: the idmap to put the reference on
*
* If this is a non-initial idmapping, put the reference count when a mount is
* released and free it if we're the last user.
*/
void mnt_idmap_put(struct mnt_idmap *idmap)
{
if (idmap != &nop_mnt_idmap && idmap != &invalid_mnt_idmap &&
refcount_dec_and_test(&idmap->count))
free_mnt_idmap(idmap);
}
EXPORT_SYMBOL_GPL(mnt_idmap_put);
int statmount_mnt_idmap(struct mnt_idmap *idmap, struct seq_file *seq, bool uid_map)
{
struct uid_gid_map *map, *map_up;
u32 idx, nr_mappings;
if (!is_valid_mnt_idmap(idmap))
return 0;
/*
* Idmappings are shown relative to the caller's idmapping.
* This is both the most intuitive and most useful solution.
*/
if (uid_map) {
map = &idmap->uid_map;
map_up = &current_user_ns()->uid_map;
} else {
map = &idmap->gid_map;
map_up = &current_user_ns()->gid_map;
}
for (idx = 0, nr_mappings = 0; idx < map->nr_extents; idx++) {
uid_t lower;
struct uid_gid_extent *extent;
if (map->nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS)
extent = &map->extent[idx];
else
extent = &map->forward[idx];
/*
* Verify that the whole range of the mapping can be
* resolved in the caller's idmapping. If it cannot be
* resolved skip the mapping.
*/
lower = map_id_range_up(map_up, extent->lower_first, extent->count);
if (lower == (uid_t) -1)
continue;
seq_printf(seq, "%u %u %u", extent->first, lower, extent->count);
seq->count++; /* mappings are separated by \0 */
if (seq_has_overflowed(seq))
return -EAGAIN;
nr_mappings++;
}
return nr_mappings;
}