1
0
mirror of https://github.com/torvalds/linux.git synced 2025-04-11 04:53:02 +00:00

Merge tag 'md-6.15-20250312' of https://git.kernel.org/pub/scm/linux/kernel/git/mdraid/linux into for-6.15/block

Merge MD changes from Yu:

"- fix recovery can preempt resync (Li Nan)
 - fix md-bitmap IO limit (Su Yue)
 - fix raid10 discard with REQ_NOWAIT (Xiao Ni)
 - fix raid1 memory leak (Zheng Qixing)
 - fix mddev uaf (Yu Kuai)
 - fix raid1,raid10 IO flags (Yu Kuai)
 - some refactor and cleanup (Yu Kuai)"

* tag 'md-6.15-20250312' of https://git.kernel.org/pub/scm/linux/kernel/git/mdraid/linux:
  md/raid10: wait barrier before returning discard request with REQ_NOWAIT
  md/md-bitmap: fix wrong bitmap_limit for clustermd when write sb
  md/raid1,raid10: don't ignore IO flags
  md/raid5: merge reshape_progress checking inside get_reshape_loc()
  md: fix mddev uaf while iterating all_mddevs list
  md: switch md-cluster to use md_submodle_head
  md: don't export md_cluster_ops
  md/md-cluster: cleanup md_cluster_ops reference
  md: switch personalities to use md_submodule_head
  md: introduce struct md_submodule_head and APIs
  md: only include md-cluster.h if necessary
  md: merge common code into find_pers()
  md/raid1: fix memory leak in raid1_run() if no active rdev
  md: ensure resync is prioritized over recovery
This commit is contained in:
Jens Axboe 2025-03-13 05:34:51 -06:00
commit 017ff379b6
11 changed files with 342 additions and 273 deletions

@ -29,8 +29,10 @@
#include <linux/buffer_head.h>
#include <linux/seq_file.h>
#include <trace/events/block.h>
#include "md.h"
#include "md-bitmap.h"
#include "md-cluster.h"
#define BITMAP_MAJOR_LO 3
/* version 4 insists the bitmap is in little-endian order
@ -426,8 +428,8 @@ static int __write_sb_page(struct md_rdev *rdev, struct bitmap *bitmap,
struct block_device *bdev;
struct mddev *mddev = bitmap->mddev;
struct bitmap_storage *store = &bitmap->storage;
unsigned int bitmap_limit = (bitmap->storage.file_pages - pg_index) <<
PAGE_SHIFT;
unsigned long num_pages = bitmap->storage.file_pages;
unsigned int bitmap_limit = (num_pages - pg_index % num_pages) << PAGE_SHIFT;
loff_t sboff, offset = mddev->bitmap_info.offset;
sector_t ps = pg_index * PAGE_SIZE / SECTOR_SIZE;
unsigned int size = PAGE_SIZE;
@ -436,7 +438,7 @@ static int __write_sb_page(struct md_rdev *rdev, struct bitmap *bitmap,
bdev = (rdev->meta_bdev) ? rdev->meta_bdev : rdev->bdev;
/* we compare length (page numbers), not page offset. */
if ((pg_index - store->sb_index) == store->file_pages - 1) {
if ((pg_index - store->sb_index) == num_pages - 1) {
unsigned int last_page_size = store->bytes & (PAGE_SIZE - 1);
if (last_page_size == 0)
@ -942,7 +944,7 @@ out:
bmname(bitmap), err);
goto out_no_sb;
}
bitmap->cluster_slot = md_cluster_ops->slot_number(bitmap->mddev);
bitmap->cluster_slot = bitmap->mddev->cluster_ops->slot_number(bitmap->mddev);
goto re_read;
}
@ -2021,7 +2023,7 @@ static void md_bitmap_free(void *data)
sysfs_put(bitmap->sysfs_can_clear);
if (mddev_is_clustered(bitmap->mddev) && bitmap->mddev->cluster_info &&
bitmap->cluster_slot == md_cluster_ops->slot_number(bitmap->mddev))
bitmap->cluster_slot == bitmap->mddev->cluster_ops->slot_number(bitmap->mddev))
md_cluster_stop(bitmap->mddev);
/* Shouldn't be needed - but just in case.... */
@ -2229,7 +2231,7 @@ static int bitmap_load(struct mddev *mddev)
mddev_create_serial_pool(mddev, rdev);
if (mddev_is_clustered(mddev))
md_cluster_ops->load_bitmaps(mddev, mddev->bitmap_info.nodes);
mddev->cluster_ops->load_bitmaps(mddev, mddev->bitmap_info.nodes);
/* Clear out old bitmap info first: Either there is none, or we
* are resuming after someone else has possibly changed things,

@ -1166,7 +1166,7 @@ static int resize_bitmaps(struct mddev *mddev, sector_t newsize, sector_t oldsiz
struct dlm_lock_resource *bm_lockres;
char str[64];
if (i == md_cluster_ops->slot_number(mddev))
if (i == slot_number(mddev))
continue;
bitmap = mddev->bitmap_ops->get_from_slot(mddev, i);
@ -1216,7 +1216,7 @@ out:
*/
static int cluster_check_sync_size(struct mddev *mddev)
{
int current_slot = md_cluster_ops->slot_number(mddev);
int current_slot = slot_number(mddev);
int node_num = mddev->bitmap_info.nodes;
struct dlm_lock_resource *bm_lockres;
struct md_bitmap_stats stats;
@ -1612,7 +1612,14 @@ out:
return err;
}
static const struct md_cluster_operations cluster_ops = {
static struct md_cluster_operations cluster_ops = {
.head = {
.type = MD_CLUSTER,
.id = ID_CLUSTER,
.name = "cluster",
.owner = THIS_MODULE,
},
.join = join,
.leave = leave,
.slot_number = slot_number,
@ -1642,13 +1649,12 @@ static int __init cluster_init(void)
{
pr_warn("md-cluster: support raid1 and raid10 (limited support)\n");
pr_info("Registering Cluster MD functions\n");
register_md_cluster_operations(&cluster_ops, THIS_MODULE);
return 0;
return register_md_submodule(&cluster_ops.head);
}
static void cluster_exit(void)
{
unregister_md_cluster_operations();
unregister_md_submodule(&cluster_ops.head);
}
module_init(cluster_init);

@ -10,6 +10,8 @@ struct mddev;
struct md_rdev;
struct md_cluster_operations {
struct md_submodule_head head;
int (*join)(struct mddev *mddev, int nodes);
int (*leave)(struct mddev *mddev);
int (*slot_number)(struct mddev *mddev);
@ -35,4 +37,8 @@ struct md_cluster_operations {
void (*update_size)(struct mddev *mddev, sector_t old_dev_sectors);
};
extern int md_setup_cluster(struct mddev *mddev, int nodes);
extern void md_cluster_stop(struct mddev *mddev);
extern void md_reload_sb(struct mddev *mddev, int raid_disk);
#endif /* _MD_CLUSTER_H */

@ -5,7 +5,6 @@
*/
#include <linux/blkdev.h>
#include <linux/raid/md_u.h>
#include <linux/seq_file.h>
#include <linux/module.h>
#include <linux/slab.h>
@ -320,9 +319,13 @@ static void linear_quiesce(struct mddev *mddev, int state)
}
static struct md_personality linear_personality = {
.name = "linear",
.level = LEVEL_LINEAR,
.owner = THIS_MODULE,
.head = {
.type = MD_PERSONALITY,
.id = ID_LINEAR,
.name = "linear",
.owner = THIS_MODULE,
},
.make_request = linear_make_request,
.run = linear_run,
.free = linear_free,
@ -335,12 +338,12 @@ static struct md_personality linear_personality = {
static int __init linear_init(void)
{
return register_md_personality(&linear_personality);
return register_md_submodule(&linear_personality.head);
}
static void linear_exit(void)
{
unregister_md_personality(&linear_personality);
unregister_md_submodule(&linear_personality.head);
}
module_init(linear_init);

@ -79,16 +79,10 @@ static const char *action_name[NR_SYNC_ACTIONS] = {
[ACTION_IDLE] = "idle",
};
/* pers_list is a list of registered personalities protected by pers_lock. */
static LIST_HEAD(pers_list);
static DEFINE_SPINLOCK(pers_lock);
static DEFINE_XARRAY(md_submodule);
static const struct kobj_type md_ktype;
const struct md_cluster_operations *md_cluster_ops;
EXPORT_SYMBOL(md_cluster_ops);
static struct module *md_cluster_mod;
static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
static struct workqueue_struct *md_wq;
@ -629,6 +623,12 @@ static void __mddev_put(struct mddev *mddev)
queue_work(md_misc_wq, &mddev->del_work);
}
static void mddev_put_locked(struct mddev *mddev)
{
if (atomic_dec_and_test(&mddev->active))
__mddev_put(mddev);
}
void mddev_put(struct mddev *mddev)
{
if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
@ -888,16 +888,40 @@ struct md_rdev *md_find_rdev_rcu(struct mddev *mddev, dev_t dev)
}
EXPORT_SYMBOL_GPL(md_find_rdev_rcu);
static struct md_personality *find_pers(int level, char *clevel)
static struct md_personality *get_pers(int level, char *clevel)
{
struct md_personality *pers;
list_for_each_entry(pers, &pers_list, list) {
if (level != LEVEL_NONE && pers->level == level)
return pers;
if (strcmp(pers->name, clevel)==0)
return pers;
struct md_personality *ret = NULL;
struct md_submodule_head *head;
unsigned long i;
xa_lock(&md_submodule);
xa_for_each(&md_submodule, i, head) {
if (head->type != MD_PERSONALITY)
continue;
if ((level != LEVEL_NONE && head->id == level) ||
!strcmp(head->name, clevel)) {
if (try_module_get(head->owner))
ret = (void *)head;
break;
}
}
return NULL;
xa_unlock(&md_submodule);
if (!ret) {
if (level != LEVEL_NONE)
pr_warn("md: personality for level %d is not loaded!\n",
level);
else
pr_warn("md: personality for level %s is not loaded!\n",
clevel);
}
return ret;
}
static void put_pers(struct md_personality *pers)
{
module_put(pers->head.owner);
}
/* return the offset of the super block in 512byte sectors */
@ -1180,7 +1204,7 @@ int md_check_no_bitmap(struct mddev *mddev)
if (!mddev->bitmap_info.file && !mddev->bitmap_info.offset)
return 0;
pr_warn("%s: bitmaps are not supported for %s\n",
mdname(mddev), mddev->pers->name);
mdname(mddev), mddev->pers->head.name);
return 1;
}
EXPORT_SYMBOL(md_check_no_bitmap);
@ -2626,11 +2650,11 @@ repeat:
force_change = 1;
if (test_and_clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags))
nospares = 1;
ret = md_cluster_ops->metadata_update_start(mddev);
ret = mddev->cluster_ops->metadata_update_start(mddev);
/* Has someone else has updated the sb */
if (!does_sb_need_changing(mddev)) {
if (ret == 0)
md_cluster_ops->metadata_update_cancel(mddev);
mddev->cluster_ops->metadata_update_cancel(mddev);
bit_clear_unless(&mddev->sb_flags, BIT(MD_SB_CHANGE_PENDING),
BIT(MD_SB_CHANGE_DEVS) |
BIT(MD_SB_CHANGE_CLEAN));
@ -2770,7 +2794,7 @@ rewrite:
/* if there was a failure, MD_SB_CHANGE_DEVS was set, and we re-write super */
if (mddev_is_clustered(mddev) && ret == 0)
md_cluster_ops->metadata_update_finish(mddev);
mddev->cluster_ops->metadata_update_finish(mddev);
if (mddev->in_sync != sync_req ||
!bit_clear_unless(&mddev->sb_flags, BIT(MD_SB_CHANGE_PENDING),
@ -2929,7 +2953,7 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
else {
err = 0;
if (mddev_is_clustered(mddev))
err = md_cluster_ops->remove_disk(mddev, rdev);
err = mddev->cluster_ops->remove_disk(mddev, rdev);
if (err == 0) {
md_kick_rdev_from_array(rdev);
@ -3039,7 +3063,7 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
* by this node eventually
*/
if (!mddev_is_clustered(rdev->mddev) ||
(err = md_cluster_ops->gather_bitmaps(rdev)) == 0) {
(err = mddev->cluster_ops->gather_bitmaps(rdev)) == 0) {
clear_bit(Faulty, &rdev->flags);
err = add_bound_rdev(rdev);
}
@ -3847,7 +3871,7 @@ level_show(struct mddev *mddev, char *page)
spin_lock(&mddev->lock);
p = mddev->pers;
if (p)
ret = sprintf(page, "%s\n", p->name);
ret = sprintf(page, "%s\n", p->head.name);
else if (mddev->clevel[0])
ret = sprintf(page, "%s\n", mddev->clevel);
else if (mddev->level != LEVEL_NONE)
@ -3904,7 +3928,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
rv = -EINVAL;
if (!mddev->pers->quiesce) {
pr_warn("md: %s: %s does not support online personality change\n",
mdname(mddev), mddev->pers->name);
mdname(mddev), mddev->pers->head.name);
goto out_unlock;
}
@ -3918,24 +3942,20 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
if (request_module("md-%s", clevel) != 0)
request_module("md-level-%s", clevel);
spin_lock(&pers_lock);
pers = find_pers(level, clevel);
if (!pers || !try_module_get(pers->owner)) {
spin_unlock(&pers_lock);
pr_warn("md: personality %s not loaded\n", clevel);
pers = get_pers(level, clevel);
if (!pers) {
rv = -EINVAL;
goto out_unlock;
}
spin_unlock(&pers_lock);
if (pers == mddev->pers) {
/* Nothing to do! */
module_put(pers->owner);
put_pers(pers);
rv = len;
goto out_unlock;
}
if (!pers->takeover) {
module_put(pers->owner);
put_pers(pers);
pr_warn("md: %s: %s does not support personality takeover\n",
mdname(mddev), clevel);
rv = -EINVAL;
@ -3956,7 +3976,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
mddev->raid_disks -= mddev->delta_disks;
mddev->delta_disks = 0;
mddev->reshape_backwards = 0;
module_put(pers->owner);
put_pers(pers);
pr_warn("md: %s: %s would not accept array\n",
mdname(mddev), clevel);
rv = PTR_ERR(priv);
@ -3971,7 +3991,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
oldpriv = mddev->private;
mddev->pers = pers;
mddev->private = priv;
strscpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
strscpy(mddev->clevel, pers->head.name, sizeof(mddev->clevel));
mddev->level = mddev->new_level;
mddev->layout = mddev->new_layout;
mddev->chunk_sectors = mddev->new_chunk_sectors;
@ -4013,7 +4033,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
mddev->to_remove = &md_redundancy_group;
}
module_put(oldpers->owner);
put_pers(oldpers);
rdev_for_each(rdev, mddev) {
if (rdev->raid_disk < 0)
@ -5571,7 +5591,7 @@ __ATTR(fail_last_dev, S_IRUGO | S_IWUSR, fail_last_dev_show,
static ssize_t serialize_policy_show(struct mddev *mddev, char *page)
{
if (mddev->pers == NULL || (mddev->pers->level != 1))
if (mddev->pers == NULL || (mddev->pers->head.id != ID_RAID1))
return sprintf(page, "n/a\n");
else
return sprintf(page, "%d\n", mddev->serialize_policy);
@ -5597,7 +5617,7 @@ serialize_policy_store(struct mddev *mddev, const char *buf, size_t len)
err = mddev_suspend_and_lock(mddev);
if (err)
return err;
if (mddev->pers == NULL || (mddev->pers->level != 1)) {
if (mddev->pers == NULL || (mddev->pers->head.id != ID_RAID1)) {
pr_err("md: serialize_policy is only effective for raid1\n");
err = -EINVAL;
goto unlock;
@ -6083,30 +6103,21 @@ int md_run(struct mddev *mddev)
goto exit_sync_set;
}
spin_lock(&pers_lock);
pers = find_pers(mddev->level, mddev->clevel);
if (!pers || !try_module_get(pers->owner)) {
spin_unlock(&pers_lock);
if (mddev->level != LEVEL_NONE)
pr_warn("md: personality for level %d is not loaded!\n",
mddev->level);
else
pr_warn("md: personality for level %s is not loaded!\n",
mddev->clevel);
pers = get_pers(mddev->level, mddev->clevel);
if (!pers) {
err = -EINVAL;
goto abort;
}
spin_unlock(&pers_lock);
if (mddev->level != pers->level) {
mddev->level = pers->level;
mddev->new_level = pers->level;
if (mddev->level != pers->head.id) {
mddev->level = pers->head.id;
mddev->new_level = pers->head.id;
}
strscpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
strscpy(mddev->clevel, pers->head.name, sizeof(mddev->clevel));
if (mddev->reshape_position != MaxSector &&
pers->start_reshape == NULL) {
/* This personality cannot handle reshaping... */
module_put(pers->owner);
put_pers(pers);
err = -EINVAL;
goto abort;
}
@ -6233,7 +6244,7 @@ bitmap_abort:
if (mddev->private)
pers->free(mddev, mddev->private);
mddev->private = NULL;
module_put(pers->owner);
put_pers(pers);
mddev->bitmap_ops->destroy(mddev);
abort:
bioset_exit(&mddev->io_clone_set);
@ -6454,7 +6465,7 @@ static void __md_stop(struct mddev *mddev)
mddev->private = NULL;
if (pers->sync_request && mddev->to_remove == NULL)
mddev->to_remove = &md_redundancy_group;
module_put(pers->owner);
put_pers(pers);
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
bioset_exit(&mddev->bio_set);
@ -6970,7 +6981,7 @@ int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info)
set_bit(Candidate, &rdev->flags);
else if (info->state & (1 << MD_DISK_CLUSTER_ADD)) {
/* --add initiated by this node */
err = md_cluster_ops->add_new_disk(mddev, rdev);
err = mddev->cluster_ops->add_new_disk(mddev, rdev);
if (err) {
export_rdev(rdev, mddev);
return err;
@ -6987,14 +6998,14 @@ int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info)
if (mddev_is_clustered(mddev)) {
if (info->state & (1 << MD_DISK_CANDIDATE)) {
if (!err) {
err = md_cluster_ops->new_disk_ack(mddev,
err == 0);
err = mddev->cluster_ops->new_disk_ack(
mddev, err == 0);
if (err)
md_kick_rdev_from_array(rdev);
}
} else {
if (err)
md_cluster_ops->add_new_disk_cancel(mddev);
mddev->cluster_ops->add_new_disk_cancel(mddev);
else
err = add_bound_rdev(rdev);
}
@ -7074,10 +7085,9 @@ static int hot_remove_disk(struct mddev *mddev, dev_t dev)
goto busy;
kick_rdev:
if (mddev_is_clustered(mddev)) {
if (md_cluster_ops->remove_disk(mddev, rdev))
goto busy;
}
if (mddev_is_clustered(mddev) &&
mddev->cluster_ops->remove_disk(mddev, rdev))
goto busy;
md_kick_rdev_from_array(rdev);
set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
@ -7380,7 +7390,7 @@ static int update_size(struct mddev *mddev, sector_t num_sectors)
rv = mddev->pers->resize(mddev, num_sectors);
if (!rv) {
if (mddev_is_clustered(mddev))
md_cluster_ops->update_size(mddev, old_dev_sectors);
mddev->cluster_ops->update_size(mddev, old_dev_sectors);
else if (!mddev_is_dm(mddev))
set_capacity_and_notify(mddev->gendisk,
mddev->array_sectors);
@ -7428,6 +7438,28 @@ static int update_raid_disks(struct mddev *mddev, int raid_disks)
return rv;
}
static int get_cluster_ops(struct mddev *mddev)
{
xa_lock(&md_submodule);
mddev->cluster_ops = xa_load(&md_submodule, ID_CLUSTER);
if (mddev->cluster_ops &&
!try_module_get(mddev->cluster_ops->head.owner))
mddev->cluster_ops = NULL;
xa_unlock(&md_submodule);
return mddev->cluster_ops == NULL ? -ENOENT : 0;
}
static void put_cluster_ops(struct mddev *mddev)
{
if (!mddev->cluster_ops)
return;
mddev->cluster_ops->leave(mddev);
module_put(mddev->cluster_ops->head.owner);
mddev->cluster_ops = NULL;
}
/*
* update_array_info is used to change the configuration of an
* on-line array.
@ -7536,16 +7568,15 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
if (mddev->bitmap_info.nodes) {
/* hold PW on all the bitmap lock */
if (md_cluster_ops->lock_all_bitmaps(mddev) <= 0) {
if (mddev->cluster_ops->lock_all_bitmaps(mddev) <= 0) {
pr_warn("md: can't change bitmap to none since the array is in use by more than one node\n");
rv = -EPERM;
md_cluster_ops->unlock_all_bitmaps(mddev);
mddev->cluster_ops->unlock_all_bitmaps(mddev);
goto err;
}
mddev->bitmap_info.nodes = 0;
md_cluster_ops->leave(mddev);
module_put(md_cluster_mod);
put_cluster_ops(mddev);
mddev->safemode_delay = DEFAULT_SAFEMODE_DELAY;
}
mddev->bitmap_ops->destroy(mddev);
@ -7829,7 +7860,7 @@ static int md_ioctl(struct block_device *bdev, blk_mode_t mode,
case CLUSTERED_DISK_NACK:
if (mddev_is_clustered(mddev))
md_cluster_ops->new_disk_ack(mddev, false);
mddev->cluster_ops->new_disk_ack(mddev, false);
else
err = -EINVAL;
goto unlock;
@ -8111,7 +8142,8 @@ void md_error(struct mddev *mddev, struct md_rdev *rdev)
return;
mddev->pers->error_handler(mddev, rdev);
if (mddev->pers->level == 0 || mddev->pers->level == LEVEL_LINEAR)
if (mddev->pers->head.id == ID_RAID0 ||
mddev->pers->head.id == ID_LINEAR)
return;
if (mddev->degraded && !test_bit(MD_BROKEN, &mddev->flags))
@ -8149,14 +8181,17 @@ static void status_unused(struct seq_file *seq)
static void status_personalities(struct seq_file *seq)
{
struct md_personality *pers;
struct md_submodule_head *head;
unsigned long i;
seq_puts(seq, "Personalities : ");
spin_lock(&pers_lock);
list_for_each_entry(pers, &pers_list, list)
seq_printf(seq, "[%s] ", pers->name);
spin_unlock(&pers_lock);
xa_lock(&md_submodule);
xa_for_each(&md_submodule, i, head)
if (head->type == MD_PERSONALITY)
seq_printf(seq, "[%s] ", head->name);
xa_unlock(&md_submodule);
seq_puts(seq, "\n");
}
@ -8379,7 +8414,7 @@ static int md_seq_show(struct seq_file *seq, void *v)
seq_printf(seq, " (read-only)");
if (mddev->ro == MD_AUTO_READ)
seq_printf(seq, " (auto-read-only)");
seq_printf(seq, " %s", mddev->pers->name);
seq_printf(seq, " %s", mddev->pers->head.name);
} else {
seq_printf(seq, "inactive");
}
@ -8448,9 +8483,7 @@ static int md_seq_show(struct seq_file *seq, void *v)
if (mddev == list_last_entry(&all_mddevs, struct mddev, all_mddevs))
status_unused(seq);
if (atomic_dec_and_test(&mddev->active))
__mddev_put(mddev);
mddev_put_locked(mddev);
return 0;
}
@ -8501,67 +8534,34 @@ static const struct proc_ops mdstat_proc_ops = {
.proc_poll = mdstat_poll,
};
int register_md_personality(struct md_personality *p)
int register_md_submodule(struct md_submodule_head *msh)
{
pr_debug("md: %s personality registered for level %d\n",
p->name, p->level);
spin_lock(&pers_lock);
list_add_tail(&p->list, &pers_list);
spin_unlock(&pers_lock);
return 0;
return xa_insert(&md_submodule, msh->id, msh, GFP_KERNEL);
}
EXPORT_SYMBOL(register_md_personality);
EXPORT_SYMBOL_GPL(register_md_submodule);
int unregister_md_personality(struct md_personality *p)
void unregister_md_submodule(struct md_submodule_head *msh)
{
pr_debug("md: %s personality unregistered\n", p->name);
spin_lock(&pers_lock);
list_del_init(&p->list);
spin_unlock(&pers_lock);
return 0;
xa_erase(&md_submodule, msh->id);
}
EXPORT_SYMBOL(unregister_md_personality);
int register_md_cluster_operations(const struct md_cluster_operations *ops,
struct module *module)
{
int ret = 0;
spin_lock(&pers_lock);
if (md_cluster_ops != NULL)
ret = -EALREADY;
else {
md_cluster_ops = ops;
md_cluster_mod = module;
}
spin_unlock(&pers_lock);
return ret;
}
EXPORT_SYMBOL(register_md_cluster_operations);
int unregister_md_cluster_operations(void)
{
spin_lock(&pers_lock);
md_cluster_ops = NULL;
spin_unlock(&pers_lock);
return 0;
}
EXPORT_SYMBOL(unregister_md_cluster_operations);
EXPORT_SYMBOL_GPL(unregister_md_submodule);
int md_setup_cluster(struct mddev *mddev, int nodes)
{
int ret;
if (!md_cluster_ops)
request_module("md-cluster");
spin_lock(&pers_lock);
/* ensure module won't be unloaded */
if (!md_cluster_ops || !try_module_get(md_cluster_mod)) {
pr_warn("can't find md-cluster module or get its reference.\n");
spin_unlock(&pers_lock);
return -ENOENT;
}
spin_unlock(&pers_lock);
int ret = get_cluster_ops(mddev);
ret = md_cluster_ops->join(mddev, nodes);
if (ret) {
request_module("md-cluster");
ret = get_cluster_ops(mddev);
}
/* ensure module won't be unloaded */
if (ret) {
pr_warn("can't find md-cluster module or get its reference.\n");
return ret;
}
ret = mddev->cluster_ops->join(mddev, nodes);
if (!ret)
mddev->safemode_delay = 0;
return ret;
@ -8569,10 +8569,7 @@ int md_setup_cluster(struct mddev *mddev, int nodes)
void md_cluster_stop(struct mddev *mddev)
{
if (!md_cluster_ops)
return;
md_cluster_ops->leave(mddev);
module_put(md_cluster_mod);
put_cluster_ops(mddev);
}
static int is_mddev_idle(struct mddev *mddev, int init)
@ -8965,7 +8962,7 @@ void md_do_sync(struct md_thread *thread)
}
if (mddev_is_clustered(mddev)) {
ret = md_cluster_ops->resync_start(mddev);
ret = mddev->cluster_ops->resync_start(mddev);
if (ret)
goto skip;
@ -8992,7 +8989,7 @@ void md_do_sync(struct md_thread *thread)
*
*/
if (mddev_is_clustered(mddev))
md_cluster_ops->resync_start_notify(mddev);
mddev->cluster_ops->resync_start_notify(mddev);
do {
int mddev2_minor = -1;
mddev->curr_resync = MD_RESYNC_DELAYED;
@ -9447,6 +9444,13 @@ static bool md_choose_sync_action(struct mddev *mddev, int *spares)
return true;
}
/* Check if resync is in progress. */
if (mddev->recovery_cp < MaxSector) {
set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
return true;
}
/*
* Remove any failed drives, then add spares if possible. Spares are
* also removed and re-added, to allow the personality to fail the
@ -9463,13 +9467,6 @@ static bool md_choose_sync_action(struct mddev *mddev, int *spares)
return true;
}
/* Check if recovery is in progress. */
if (mddev->recovery_cp < MaxSector) {
set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
return true;
}
/* Delay to choose resync/check/repair in md_do_sync(). */
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
return true;
@ -9776,7 +9773,7 @@ void md_reap_sync_thread(struct mddev *mddev)
* call resync_finish here if MD_CLUSTER_RESYNC_LOCKED is set by
* clustered raid */
if (test_and_clear_bit(MD_CLUSTER_RESYNC_LOCKED, &mddev->flags))
md_cluster_ops->resync_finish(mddev);
mddev->cluster_ops->resync_finish(mddev);
clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
@ -9784,13 +9781,13 @@ void md_reap_sync_thread(struct mddev *mddev)
clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
/*
* We call md_cluster_ops->update_size here because sync_size could
* We call mddev->cluster_ops->update_size here because sync_size could
* be changed by md_update_sb, and MD_RECOVERY_RESHAPE is cleared,
* so it is time to update size across cluster.
*/
if (mddev_is_clustered(mddev) && is_reshaped
&& !test_bit(MD_CLOSING, &mddev->flags))
md_cluster_ops->update_size(mddev, old_dev_sectors);
mddev->cluster_ops->update_size(mddev, old_dev_sectors);
/* flag recovery needed just to double check */
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
sysfs_notify_dirent_safe(mddev->sysfs_completed);
@ -9882,11 +9879,11 @@ EXPORT_SYMBOL_GPL(rdev_clear_badblocks);
static int md_notify_reboot(struct notifier_block *this,
unsigned long code, void *x)
{
struct mddev *mddev, *n;
struct mddev *mddev;
int need_delay = 0;
spin_lock(&all_mddevs_lock);
list_for_each_entry_safe(mddev, n, &all_mddevs, all_mddevs) {
list_for_each_entry(mddev, &all_mddevs, all_mddevs) {
if (!mddev_get(mddev))
continue;
spin_unlock(&all_mddevs_lock);
@ -9898,8 +9895,8 @@ static int md_notify_reboot(struct notifier_block *this,
mddev_unlock(mddev);
}
need_delay = 1;
mddev_put(mddev);
spin_lock(&all_mddevs_lock);
mddev_put_locked(mddev);
}
spin_unlock(&all_mddevs_lock);
@ -10016,7 +10013,7 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
if (rdev2->raid_disk == -1 && role != MD_DISK_ROLE_SPARE &&
!(le32_to_cpu(sb->feature_map) &
MD_FEATURE_RESHAPE_ACTIVE) &&
!md_cluster_ops->resync_status_get(mddev)) {
!mddev->cluster_ops->resync_status_get(mddev)) {
/*
* -1 to make raid1_add_disk() set conf->fullsync
* to 1. This could avoid skipping sync when the
@ -10232,7 +10229,7 @@ void md_autostart_arrays(int part)
static __exit void md_exit(void)
{
struct mddev *mddev, *n;
struct mddev *mddev;
int delay = 1;
unregister_blkdev(MD_MAJOR,"md");
@ -10253,7 +10250,7 @@ static __exit void md_exit(void)
remove_proc_entry("mdstat", NULL);
spin_lock(&all_mddevs_lock);
list_for_each_entry_safe(mddev, n, &all_mddevs, all_mddevs) {
list_for_each_entry(mddev, &all_mddevs, all_mddevs) {
if (!mddev_get(mddev))
continue;
spin_unlock(&all_mddevs_lock);
@ -10265,8 +10262,8 @@ static __exit void md_exit(void)
* the mddev for destruction by a workqueue, and the
* destroy_workqueue() below will wait for that to complete.
*/
mddev_put(mddev);
spin_lock(&all_mddevs_lock);
mddev_put_locked(mddev);
}
spin_unlock(&all_mddevs_lock);

@ -18,11 +18,37 @@
#include <linux/timer.h>
#include <linux/wait.h>
#include <linux/workqueue.h>
#include <linux/raid/md_u.h>
#include <trace/events/block.h>
#include "md-cluster.h"
#define MaxSector (~(sector_t)0)
enum md_submodule_type {
MD_PERSONALITY = 0,
MD_CLUSTER,
MD_BITMAP, /* TODO */
};
enum md_submodule_id {
ID_LINEAR = LEVEL_LINEAR,
ID_RAID0 = 0,
ID_RAID1 = 1,
ID_RAID4 = 4,
ID_RAID5 = 5,
ID_RAID6 = 6,
ID_RAID10 = 10,
ID_CLUSTER,
ID_BITMAP, /* TODO */
ID_LLBITMAP, /* TODO */
};
struct md_submodule_head {
enum md_submodule_type type;
enum md_submodule_id id;
const char *name;
struct module *owner;
};
/*
* These flags should really be called "NO_RETRY" rather than
* "FAILFAST" because they don't make any promise about time lapse,
@ -294,6 +320,7 @@ extern bool rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
extern void rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
int is_new);
struct md_cluster_info;
struct md_cluster_operations;
/**
* enum mddev_flags - md device flags.
@ -576,6 +603,7 @@ struct mddev {
mempool_t *serial_info_pool;
void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev);
struct md_cluster_info *cluster_info;
struct md_cluster_operations *cluster_ops;
unsigned int good_device_nr; /* good device num within cluster raid */
unsigned int noio_flag; /* for memalloc scope API */
@ -699,10 +727,8 @@ static inline void md_sync_acct_bio(struct bio *bio, unsigned long nr_sectors)
struct md_personality
{
char *name;
int level;
struct list_head list;
struct module *owner;
struct md_submodule_head head;
bool __must_check (*make_request)(struct mddev *mddev, struct bio *bio);
/*
* start up works that do NOT require md_thread. tasks that
@ -843,13 +869,9 @@ static inline void safe_put_page(struct page *p)
if (p) put_page(p);
}
extern int register_md_personality(struct md_personality *p);
extern int unregister_md_personality(struct md_personality *p);
extern int register_md_cluster_operations(const struct md_cluster_operations *ops,
struct module *module);
extern int unregister_md_cluster_operations(void);
extern int md_setup_cluster(struct mddev *mddev, int nodes);
extern void md_cluster_stop(struct mddev *mddev);
int register_md_submodule(struct md_submodule_head *msh);
void unregister_md_submodule(struct md_submodule_head *msh);
extern struct md_thread *md_register_thread(
void (*run)(struct md_thread *thread),
struct mddev *mddev,
@ -906,7 +928,6 @@ extern void md_idle_sync_thread(struct mddev *mddev);
extern void md_frozen_sync_thread(struct mddev *mddev);
extern void md_unfrozen_sync_thread(struct mddev *mddev);
extern void md_reload_sb(struct mddev *mddev, int raid_disk);
extern void md_update_sb(struct mddev *mddev, int force);
extern void mddev_create_serial_pool(struct mddev *mddev, struct md_rdev *rdev);
extern void mddev_destroy_serial_pool(struct mddev *mddev,
@ -928,7 +949,6 @@ static inline void rdev_dec_pending(struct md_rdev *rdev, struct mddev *mddev)
}
}
extern const struct md_cluster_operations *md_cluster_ops;
static inline int mddev_is_clustered(struct mddev *mddev)
{
return mddev->cluster_info && mddev->bitmap_info.nodes > 1;

@ -811,9 +811,13 @@ static void raid0_quiesce(struct mddev *mddev, int quiesce)
static struct md_personality raid0_personality=
{
.name = "raid0",
.level = 0,
.owner = THIS_MODULE,
.head = {
.type = MD_PERSONALITY,
.id = ID_RAID0,
.name = "raid0",
.owner = THIS_MODULE,
},
.make_request = raid0_make_request,
.run = raid0_run,
.free = raid0_free,
@ -824,14 +828,14 @@ static struct md_personality raid0_personality=
.error_handler = raid0_error,
};
static int __init raid0_init (void)
static int __init raid0_init(void)
{
return register_md_personality (&raid0_personality);
return register_md_submodule(&raid0_personality.head);
}
static void raid0_exit (void)
static void __exit raid0_exit(void)
{
unregister_md_personality (&raid0_personality);
unregister_md_submodule(&raid0_personality.head);
}
module_init(raid0_init);

@ -287,8 +287,8 @@ static inline bool raid1_should_read_first(struct mddev *mddev,
return true;
if (mddev_is_clustered(mddev) &&
md_cluster_ops->area_resyncing(mddev, READ, this_sector,
this_sector + len))
mddev->cluster_ops->area_resyncing(mddev, READ, this_sector,
this_sector + len))
return true;
return false;

@ -36,6 +36,7 @@
#include "md.h"
#include "raid1.h"
#include "md-bitmap.h"
#include "md-cluster.h"
#define UNSUPPORTED_MDDEV_FLAGS \
((1L << MD_HAS_JOURNAL) | \
@ -45,6 +46,7 @@
static void allow_barrier(struct r1conf *conf, sector_t sector_nr);
static void lower_barrier(struct r1conf *conf, sector_t sector_nr);
static void raid1_free(struct mddev *mddev, void *priv);
#define RAID_1_10_NAME "raid1"
#include "raid1-10.c"
@ -1315,8 +1317,6 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
struct r1conf *conf = mddev->private;
struct raid1_info *mirror;
struct bio *read_bio;
const enum req_op op = bio_op(bio);
const blk_opf_t do_sync = bio->bi_opf & REQ_SYNC;
int max_sectors;
int rdisk, error;
bool r1bio_existed = !!r1_bio;
@ -1404,7 +1404,6 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
read_bio->bi_iter.bi_sector = r1_bio->sector +
mirror->rdev->data_offset;
read_bio->bi_end_io = raid1_end_read_request;
read_bio->bi_opf = op | do_sync;
if (test_bit(FailFast, &mirror->rdev->flags) &&
test_bit(R1BIO_FailFast, &r1_bio->state))
read_bio->bi_opf |= MD_FAILFAST;
@ -1467,7 +1466,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
bool is_discard = (bio_op(bio) == REQ_OP_DISCARD);
if (mddev_is_clustered(mddev) &&
md_cluster_ops->area_resyncing(mddev, WRITE,
mddev->cluster_ops->area_resyncing(mddev, WRITE,
bio->bi_iter.bi_sector, bio_end_sector(bio))) {
DEFINE_WAIT(w);
@ -1478,7 +1477,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
for (;;) {
prepare_to_wait(&conf->wait_barrier,
&w, TASK_IDLE);
if (!md_cluster_ops->area_resyncing(mddev, WRITE,
if (!mddev->cluster_ops->area_resyncing(mddev, WRITE,
bio->bi_iter.bi_sector,
bio_end_sector(bio)))
break;
@ -1653,8 +1652,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
mbio->bi_iter.bi_sector = (r1_bio->sector + rdev->data_offset);
mbio->bi_end_io = raid1_end_write_request;
mbio->bi_opf = bio_op(bio) |
(bio->bi_opf & (REQ_SYNC | REQ_FUA | REQ_ATOMIC));
if (test_bit(FailFast, &rdev->flags) &&
!test_bit(WriteMostly, &rdev->flags) &&
conf->raid_disks - mddev->degraded > 1)
@ -3038,9 +3035,9 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
conf->cluster_sync_low = mddev->curr_resync_completed;
conf->cluster_sync_high = conf->cluster_sync_low + CLUSTER_RESYNC_WINDOW_SECTORS;
/* Send resync message */
md_cluster_ops->resync_info_update(mddev,
conf->cluster_sync_low,
conf->cluster_sync_high);
mddev->cluster_ops->resync_info_update(mddev,
conf->cluster_sync_low,
conf->cluster_sync_high);
}
/* For a user-requested sync, we read all readable devices and do a
@ -3258,8 +3255,11 @@ static int raid1_run(struct mddev *mddev)
if (!mddev_is_dm(mddev)) {
ret = raid1_set_limits(mddev);
if (ret)
if (ret) {
if (!mddev->private)
raid1_free(mddev, conf);
return ret;
}
}
mddev->degraded = 0;
@ -3273,6 +3273,8 @@ static int raid1_run(struct mddev *mddev)
*/
if (conf->raid_disks - mddev->degraded < 1) {
md_unregister_thread(mddev, &conf->thread);
if (!mddev->private)
raid1_free(mddev, conf);
return -EINVAL;
}
@ -3493,9 +3495,13 @@ static void *raid1_takeover(struct mddev *mddev)
static struct md_personality raid1_personality =
{
.name = "raid1",
.level = 1,
.owner = THIS_MODULE,
.head = {
.type = MD_PERSONALITY,
.id = ID_RAID1,
.name = "raid1",
.owner = THIS_MODULE,
},
.make_request = raid1_make_request,
.run = raid1_run,
.free = raid1_free,
@ -3512,18 +3518,18 @@ static struct md_personality raid1_personality =
.takeover = raid1_takeover,
};
static int __init raid_init(void)
static int __init raid1_init(void)
{
return register_md_personality(&raid1_personality);
return register_md_submodule(&raid1_personality.head);
}
static void raid_exit(void)
static void __exit raid1_exit(void)
{
unregister_md_personality(&raid1_personality);
unregister_md_submodule(&raid1_personality.head);
}
module_init(raid_init);
module_exit(raid_exit);
module_init(raid1_init);
module_exit(raid1_exit);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("RAID1 (mirroring) personality for MD");
MODULE_ALIAS("md-personality-3"); /* RAID1 */

@ -24,6 +24,7 @@
#include "raid10.h"
#include "raid0.h"
#include "md-bitmap.h"
#include "md-cluster.h"
/*
* RAID10 provides a combination of RAID0 and RAID1 functionality.
@ -1146,8 +1147,6 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
{
struct r10conf *conf = mddev->private;
struct bio *read_bio;
const enum req_op op = bio_op(bio);
const blk_opf_t do_sync = bio->bi_opf & REQ_SYNC;
int max_sectors;
struct md_rdev *rdev;
char b[BDEVNAME_SIZE];
@ -1228,7 +1227,6 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
read_bio->bi_iter.bi_sector = r10_bio->devs[slot].addr +
choose_data_offset(r10_bio, rdev);
read_bio->bi_end_io = raid10_end_read_request;
read_bio->bi_opf = op | do_sync;
if (test_bit(FailFast, &rdev->flags) &&
test_bit(R10BIO_FailFast, &r10_bio->state))
read_bio->bi_opf |= MD_FAILFAST;
@ -1247,10 +1245,6 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
struct bio *bio, bool replacement,
int n_copy)
{
const enum req_op op = bio_op(bio);
const blk_opf_t do_sync = bio->bi_opf & REQ_SYNC;
const blk_opf_t do_fua = bio->bi_opf & REQ_FUA;
const blk_opf_t do_atomic = bio->bi_opf & REQ_ATOMIC;
unsigned long flags;
struct r10conf *conf = mddev->private;
struct md_rdev *rdev;
@ -1269,7 +1263,6 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
mbio->bi_iter.bi_sector = (r10_bio->devs[n_copy].addr +
choose_data_offset(r10_bio, rdev));
mbio->bi_end_io = raid10_end_write_request;
mbio->bi_opf = op | do_sync | do_fua | do_atomic;
if (!replacement && test_bit(FailFast,
&conf->mirrors[devnum].rdev->flags)
&& enough(conf, devnum))
@ -1355,9 +1348,9 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
int error;
if ((mddev_is_clustered(mddev) &&
md_cluster_ops->area_resyncing(mddev, WRITE,
bio->bi_iter.bi_sector,
bio_end_sector(bio)))) {
mddev->cluster_ops->area_resyncing(mddev, WRITE,
bio->bi_iter.bi_sector,
bio_end_sector(bio)))) {
DEFINE_WAIT(w);
/* Bail out if REQ_NOWAIT is set for the bio */
if (bio->bi_opf & REQ_NOWAIT) {
@ -1367,7 +1360,7 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
for (;;) {
prepare_to_wait(&conf->wait_barrier,
&w, TASK_IDLE);
if (!md_cluster_ops->area_resyncing(mddev, WRITE,
if (!mddev->cluster_ops->area_resyncing(mddev, WRITE,
bio->bi_iter.bi_sector, bio_end_sector(bio)))
break;
schedule();
@ -1631,11 +1624,10 @@ static int raid10_handle_discard(struct mddev *mddev, struct bio *bio)
if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
return -EAGAIN;
if (WARN_ON_ONCE(bio->bi_opf & REQ_NOWAIT)) {
if (!wait_barrier(conf, bio->bi_opf & REQ_NOWAIT)) {
bio_wouldblock_error(bio);
return 0;
}
wait_barrier(conf, false);
/*
* Check reshape again to avoid reshape happens after checking
@ -3716,7 +3708,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
conf->cluster_sync_low = mddev->curr_resync_completed;
raid10_set_cluster_sync_high(conf);
/* Send resync message */
md_cluster_ops->resync_info_update(mddev,
mddev->cluster_ops->resync_info_update(mddev,
conf->cluster_sync_low,
conf->cluster_sync_high);
}
@ -3749,7 +3741,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
}
if (broadcast_msg) {
raid10_set_cluster_sync_high(conf);
md_cluster_ops->resync_info_update(mddev,
mddev->cluster_ops->resync_info_update(mddev,
conf->cluster_sync_low,
conf->cluster_sync_high);
}
@ -4543,7 +4535,7 @@ static int raid10_start_reshape(struct mddev *mddev)
if (ret)
goto abort;
ret = md_cluster_ops->resize_bitmaps(mddev, newsize, oldsize);
ret = mddev->cluster_ops->resize_bitmaps(mddev, newsize, oldsize);
if (ret) {
mddev->bitmap_ops->resize(mddev, oldsize, 0, false);
goto abort;
@ -4834,7 +4826,7 @@ read_more:
conf->cluster_sync_low = sb_reshape_pos;
}
md_cluster_ops->resync_info_update(mddev, conf->cluster_sync_low,
mddev->cluster_ops->resync_info_update(mddev, conf->cluster_sync_low,
conf->cluster_sync_high);
}
@ -4979,7 +4971,7 @@ static void raid10_update_reshape_pos(struct mddev *mddev)
struct r10conf *conf = mddev->private;
sector_t lo, hi;
md_cluster_ops->resync_info_get(mddev, &lo, &hi);
mddev->cluster_ops->resync_info_get(mddev, &lo, &hi);
if (((mddev->reshape_position <= hi) && (mddev->reshape_position >= lo))
|| mddev->reshape_position == MaxSector)
conf->reshape_progress = mddev->reshape_position;
@ -5125,9 +5117,13 @@ static void raid10_finish_reshape(struct mddev *mddev)
static struct md_personality raid10_personality =
{
.name = "raid10",
.level = 10,
.owner = THIS_MODULE,
.head = {
.type = MD_PERSONALITY,
.id = ID_RAID10,
.name = "raid10",
.owner = THIS_MODULE,
},
.make_request = raid10_make_request,
.run = raid10_run,
.free = raid10_free,
@ -5147,18 +5143,18 @@ static struct md_personality raid10_personality =
.update_reshape_pos = raid10_update_reshape_pos,
};
static int __init raid_init(void)
static int __init raid10_init(void)
{
return register_md_personality(&raid10_personality);
return register_md_submodule(&raid10_personality.head);
}
static void raid_exit(void)
static void __exit raid10_exit(void)
{
unregister_md_personality(&raid10_personality);
unregister_md_submodule(&raid10_personality.head);
}
module_init(raid_init);
module_exit(raid_exit);
module_init(raid10_init);
module_exit(raid10_exit);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("RAID10 (striped mirror) personality for MD");
MODULE_ALIAS("md-personality-9"); /* RAID10 */

@ -5858,6 +5858,9 @@ static enum reshape_loc get_reshape_loc(struct mddev *mddev,
struct r5conf *conf, sector_t logical_sector)
{
sector_t reshape_progress, reshape_safe;
if (likely(conf->reshape_progress == MaxSector))
return LOC_NO_RESHAPE;
/*
* Spinlock is needed as reshape_progress may be
* 64bit on a 32bit platform, and so it might be
@ -5935,22 +5938,19 @@ static enum stripe_result make_stripe_request(struct mddev *mddev,
const int rw = bio_data_dir(bi);
enum stripe_result ret;
struct stripe_head *sh;
enum reshape_loc loc;
sector_t new_sector;
int previous = 0, flags = 0;
int seq, dd_idx;
seq = read_seqcount_begin(&conf->gen_lock);
if (unlikely(conf->reshape_progress != MaxSector)) {
enum reshape_loc loc = get_reshape_loc(mddev, conf,
logical_sector);
if (loc == LOC_INSIDE_RESHAPE) {
ret = STRIPE_SCHEDULE_AND_RETRY;
goto out;
}
if (loc == LOC_AHEAD_OF_RESHAPE)
previous = 1;
loc = get_reshape_loc(mddev, conf, logical_sector);
if (loc == LOC_INSIDE_RESHAPE) {
ret = STRIPE_SCHEDULE_AND_RETRY;
goto out;
}
if (loc == LOC_AHEAD_OF_RESHAPE)
previous = 1;
new_sector = raid5_compute_sector(conf, logical_sector, previous,
&dd_idx, NULL);
@ -6127,7 +6127,6 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
/* Bail out if conflicts with reshape and REQ_NOWAIT is set */
if ((bi->bi_opf & REQ_NOWAIT) &&
(conf->reshape_progress != MaxSector) &&
get_reshape_loc(mddev, conf, logical_sector) == LOC_INSIDE_RESHAPE) {
bio_wouldblock_error(bi);
if (rw == WRITE)
@ -8954,9 +8953,13 @@ static void raid5_prepare_suspend(struct mddev *mddev)
static struct md_personality raid6_personality =
{
.name = "raid6",
.level = 6,
.owner = THIS_MODULE,
.head = {
.type = MD_PERSONALITY,
.id = ID_RAID6,
.name = "raid6",
.owner = THIS_MODULE,
},
.make_request = raid5_make_request,
.run = raid5_run,
.start = raid5_start,
@ -8980,9 +8983,13 @@ static struct md_personality raid6_personality =
};
static struct md_personality raid5_personality =
{
.name = "raid5",
.level = 5,
.owner = THIS_MODULE,
.head = {
.type = MD_PERSONALITY,
.id = ID_RAID5,
.name = "raid5",
.owner = THIS_MODULE,
},
.make_request = raid5_make_request,
.run = raid5_run,
.start = raid5_start,
@ -9007,9 +9014,13 @@ static struct md_personality raid5_personality =
static struct md_personality raid4_personality =
{
.name = "raid4",
.level = 4,
.owner = THIS_MODULE,
.head = {
.type = MD_PERSONALITY,
.id = ID_RAID4,
.name = "raid4",
.owner = THIS_MODULE,
},
.make_request = raid5_make_request,
.run = raid5_run,
.start = raid5_start,
@ -9045,21 +9056,39 @@ static int __init raid5_init(void)
"md/raid5:prepare",
raid456_cpu_up_prepare,
raid456_cpu_dead);
if (ret) {
destroy_workqueue(raid5_wq);
return ret;
}
register_md_personality(&raid6_personality);
register_md_personality(&raid5_personality);
register_md_personality(&raid4_personality);
if (ret)
goto err_destroy_wq;
ret = register_md_submodule(&raid6_personality.head);
if (ret)
goto err_cpuhp_remove;
ret = register_md_submodule(&raid5_personality.head);
if (ret)
goto err_unregister_raid6;
ret = register_md_submodule(&raid4_personality.head);
if (ret)
goto err_unregister_raid5;
return 0;
err_unregister_raid5:
unregister_md_submodule(&raid5_personality.head);
err_unregister_raid6:
unregister_md_submodule(&raid6_personality.head);
err_cpuhp_remove:
cpuhp_remove_multi_state(CPUHP_MD_RAID5_PREPARE);
err_destroy_wq:
destroy_workqueue(raid5_wq);
return ret;
}
static void raid5_exit(void)
static void __exit raid5_exit(void)
{
unregister_md_personality(&raid6_personality);
unregister_md_personality(&raid5_personality);
unregister_md_personality(&raid4_personality);
unregister_md_submodule(&raid6_personality.head);
unregister_md_submodule(&raid5_personality.head);
unregister_md_submodule(&raid4_personality.head);
cpuhp_remove_multi_state(CPUHP_MD_RAID5_PREPARE);
destroy_workqueue(raid5_wq);
}