3 Star 0 Fork 0

mirrors_git_kernel_org/pub_scm_utils_mdadm_mdadm

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
克隆/下载
Manage.c 48.66 KB
一键复制 编辑 原始数据 按行查看 历史
Mariusz Tkaczyk 提交于 2024-10-07 11:45 . sysfs: add sysfs_open_memb_attr()
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851
/*
* mdadm - manage Linux "md" devices aka RAID arrays.
*
* Copyright (C) 2001-2013 Neil Brown <[email protected]>
*
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* Author: Neil Brown
* Email: <[email protected]>
*/
#include "mdadm.h"
#include "md_u.h"
#include "md_p.h"
#include "udev.h"
#include "xmalloc.h"
#include <ctype.h>
int Manage_ro(char *devname, int fd, int readonly)
{
/* switch to readonly or rw
*
* requires >= 0.90.0
* first check that array is runing
* use RESTART_ARRAY_RW or STOP_ARRAY_RO
*
*/
struct mdinfo *mdi;
int rv = 0;
/* If this is an externally-managed array, we need to modify the
* metadata_version so that mdmon doesn't undo our change.
*/
mdi = sysfs_read(fd, NULL, GET_LEVEL|GET_VERSION);
if (mdi &&
mdi->array.major_version == -1 &&
is_subarray(mdi->text_version)) {
char vers[64];
strcpy(vers, "external:");
strcat(vers, mdi->text_version);
if (readonly > 0) {
int rv;
/* We set readonly ourselves. */
vers[9] = '-';
sysfs_set_str(mdi, NULL, "metadata_version", vers);
close_fd(&fd);
rv = sysfs_set_str(mdi, NULL, "array_state", "readonly");
if (rv < 0) {
pr_err("failed to set readonly for %s: %s\n",
devname, strerror(errno));
vers[9] = mdi->text_version[0];
sysfs_set_str(mdi, NULL, "metadata_version", vers);
rv = 1;
goto out;
}
} else {
char *cp;
/* We cannot set read/write - must signal mdmon */
vers[9] = '/';
sysfs_set_str(mdi, NULL, "metadata_version", vers);
cp = strchr(vers+10, '/');
if (cp)
*cp = 0;
ping_monitor(vers+10);
if (mdi->array.level <= 0)
sysfs_set_str(mdi, NULL, "array_state", "active");
}
goto out;
}
if (!md_array_active(fd)) {
pr_err("%s does not appear to be active.\n", devname);
rv = 1;
goto out;
}
if (readonly > 0) {
if (ioctl(fd, STOP_ARRAY_RO, NULL)) {
pr_err("failed to set readonly for %s: %s\n",
devname, strerror(errno));
rv = 1;
goto out;
}
} else if (readonly < 0) {
if (ioctl(fd, RESTART_ARRAY_RW, NULL)) {
pr_err("failed to set writable for %s: %s\n",
devname, strerror(errno));
rv = 1;
goto out;
}
}
out:
sysfs_free(mdi);
return rv;
}
static void remove_devices(char *devnm, char *path)
{
/*
* Remove names at 'path' - possibly with
* partition suffixes - which link to the 'standard'
* name for devnm. These were probably created
* by mdadm when the array was assembled.
*/
char base[40];
char *path2;
char link[1024];
int n;
int part;
char *be;
char *pe;
if (!path)
return;
sprintf(base, "/dev/%s", devnm);
be = base + strlen(base);
path2 = xmalloc(strlen(path)+20);
strcpy(path2, path);
pe = path2 + strlen(path2);
for (part = 0; part < 16; part++) {
if (part) {
sprintf(be, "p%d", part);
if (isdigit(pe[-1]))
sprintf(pe, "p%d", part);
else
sprintf(pe, "%d", part);
}
n = readlink(path2, link, sizeof(link));
if (n > 0 && (int)strlen(base) == n &&
strncmp(link, base, n) == 0)
unlink(path2);
}
free(path2);
}
int Manage_run(char *devname, int fd, struct context *c)
{
/* Run the array. Array must already be configured
* Requires >= 0.90.0
*/
char nm[32], *nmp;
nmp = fd2devnm(fd);
if (!nmp) {
pr_err("Cannot find %s in sysfs!!\n", devname);
return 1;
}
snprintf(nm, sizeof(nm), "%s", nmp);
return IncrementalScan(c, nm);
}
int Manage_stop(char *devname, int fd, int verbose, int will_retry)
{
/* Stop the array. Array must already be configured
* 'will_retry' means that error messages are not wanted.
*/
int rv = 0;
struct map_ent *map = NULL;
struct mdinfo *mdi;
char devnm[32];
char container[MD_NAME_MAX] = {0};
int err;
int count;
char buf[SYSFS_MAX_BUF_SIZE];
unsigned long long rd1, rd2;
if (will_retry && verbose == 0)
verbose = -1;
snprintf(devnm, sizeof(devnm), "%s", fd2devnm(fd));
/* Get EXCL access first. If this fails, then attempting
* to stop is probably a bad idea.
*/
mdi = sysfs_read(fd, NULL, GET_LEVEL|GET_COMPONENT|GET_VERSION);
if (mdi && is_subarray(mdi->text_version))
sysfs_get_container_devnm(mdi, container);
close_fd(&fd);
count = 5;
while (((fd = ((devname[0] == '/')
?open(devname, O_RDONLY|O_EXCL)
:open_dev_flags(devnm, O_RDONLY|O_EXCL))) < 0 ||
strcmp(fd2devnm(fd), devnm) != 0) && container[0] &&
mdmon_running(container) && count) {
/* Can't open, so something might be wrong. However it
* is a container, so we might be racing with mdmon, so
* retry for a bit.
*/
close_fd(&fd);
flush_mdmon(container);
count--;
}
if (fd < 0 || strcmp(fd2devnm(fd), devnm) != 0) {
close_fd(&fd);
if (verbose >= 0)
pr_err("Cannot get exclusive access to %s:Perhaps a running process, mounted filesystem or active volume group?\n",
devname);
sysfs_free(mdi);
return 1;
}
/* If this is an mdmon managed array, just write 'inactive'
* to the array state and let mdmon clear up.
*/
if (mdi &&
mdi->array.level > 0 &&
is_subarray(mdi->text_version)) {
int err;
/* This is mdmon managed. */
close_fd(&fd);
/* As we had an O_EXCL open, any use of the device
* which blocks STOP_ARRAY is probably a transient use,
* so it is reasonable to retry for a while - 5 seconds.
*/
count = 25;
while (count &&
(err = sysfs_set_str(mdi, NULL,
"array_state",
"inactive")) < 0 &&
errno == EBUSY) {
err = errno;
sleep_for(0, MSEC_TO_NSEC(200), true);
count--;
}
if (err) {
if (verbose >= 0)
pr_err("failed to stop array %s: %s\n",
devname, strerror(err));
rv = 1;
goto out;
}
/* Give monitor a chance to act */
ping_monitor(mdi->text_version);
fd = open_dev_excl(devnm);
if (fd < 0) {
if (verbose >= 0)
pr_err("failed to completely stop %s: Device is busy\n",
devname);
rv = 1;
goto out;
}
} else if (mdi &&
mdi->array.major_version == -1 &&
mdi->array.minor_version == -2 &&
!is_subarray(mdi->text_version)) {
struct mdstat_ent *mds, *m;
/* container, possibly mdmon-managed.
* Make sure mdmon isn't opening it, which
* would interfere with the 'stop'
*/
ping_monitor(mdi->sys_name);
/* now check that there are no existing arrays
* which are members of this array
*/
mds = mdstat_read(0, 0);
for (m = mds; m; m = m->next)
if (is_mdstat_ent_external(m) &&
metadata_container_matches(m->metadata_version + 9, devnm)) {
if (verbose >= 0)
pr_err("Cannot stop container %s: member %s still active\n",
devname, m->devnm);
free_mdstat(mds);
rv = 1;
goto out;
}
}
/* If the array is undergoing a reshape which changes the number
* of devices, then it would be nice to stop it at a point where
* it has completed a full number of stripes in both old and
* new layouts as this will allow the reshape to be reverted.
* So if 'sync_action' is "reshape" and 'raid_disks' shows two
* different numbers, then
* - freeze reshape
* - set sync_max to next multiple of both data_disks and
* chunk sizes (or next but one)
* - unfreeze reshape
* - wait on 'sync_completed' for that point to be reached.
*/
if (mdi && is_level456(mdi->array.level) &&
sysfs_attribute_available(mdi, NULL, "sync_action") &&
sysfs_attribute_available(mdi, NULL, "reshape_direction") &&
sysfs_get_str(mdi, NULL, "sync_action", buf, sizeof(buf)) > 0 &&
strcmp(buf, "reshape\n") == 0 &&
sysfs_get_two(mdi, NULL, "raid_disks", &rd1, &rd2) == 2) {
unsigned long long position, curr;
unsigned long long chunk1, chunk2;
unsigned long long rddiv, chunkdiv;
unsigned long long sectors;
unsigned long long sync_max, old_sync_max;
unsigned long long completed;
int backwards = 0;
int delay;
int scfd;
delay = 40;
while (rd1 > rd2 && delay > 0 &&
sysfs_get_ll(mdi, NULL, "sync_max", &old_sync_max) == 0) {
/* must be in the critical section - wait a bit */
delay -= 1;
sleep_for(0, MSEC_TO_NSEC(100), true);
}
if (sysfs_set_str(mdi, NULL, "sync_action", "frozen") != 0)
goto done;
/* Array is frozen */
rd1 -= mdi->array.level == 6 ? 2 : 1;
rd2 -= mdi->array.level == 6 ? 2 : 1;
sysfs_get_str(mdi, NULL, "reshape_direction", buf, sizeof(buf));
if (strncmp(buf, "back", 4) == 0)
backwards = 1;
if (sysfs_get_ll(mdi, NULL, "reshape_position", &position) != 0) {
/* reshape must have finished now */
sysfs_set_str(mdi, NULL, "sync_action", "idle");
goto done;
}
sysfs_get_two(mdi, NULL, "chunk_size", &chunk1, &chunk2);
chunk1 /= 512;
chunk2 /= 512;
rddiv = GCD(rd1, rd2);
chunkdiv = GCD(chunk1, chunk2);
sectors = (chunk1/chunkdiv) * chunk2 * (rd1/rddiv) * rd2;
if (backwards) {
/* Need to subtract 'reshape_position' from
* array size to get equivalent of sync_max.
* Size calculation based on raid5_size in kernel.
*/
unsigned long long size = mdi->component_size;
size &= ~(chunk1-1);
size &= ~(chunk2-1);
/* rd1 must be smaller */
/* Reshape may have progressed further backwards than
* recorded, so target even further back (hence "-1")
*/
position = (position / sectors - 1) * sectors;
/* rd1 is always the conversion factor between 'sync'
* position and 'reshape' position.
* We read 1 "new" stripe worth of data from where-ever,
* and when write out that full stripe.
*/
sync_max = size - position/rd1;
} else {
/* Reshape will very likely be beyond position, and it may
* be too late to stop at '+1', so aim for '+2'
*/
position = (position / sectors + 2) * sectors;
sync_max = position/rd1;
}
if (sysfs_get_ll(mdi, NULL, "sync_max", &old_sync_max) < 0)
old_sync_max = mdi->component_size;
/* Must not advance sync_max as that could confuse
* the reshape monitor */
if (sync_max < old_sync_max)
sysfs_set_num(mdi, NULL, "sync_max", sync_max);
sysfs_set_str(mdi, NULL, "sync_action", "idle");
/* That should have set things going again. Now we
* wait a little while (3 second max) for sync_completed
* to reach the target.
* The reshape process can block for 500msec if
* the sync speed limit is hit, so we need to wait
* a lot longer than that. 1 second is usually
* enough. 3 is safe.
*/
delay = 3000;
scfd = sysfs_open(mdi->sys_name, NULL, "sync_completed");
while (scfd >= 0 && delay > 0 && old_sync_max > 0) {
unsigned long long max_completed;
sysfs_get_ll(mdi, NULL, "reshape_position", &curr);
sysfs_fd_get_str(scfd, buf, sizeof(buf));
if (str_is_none(buf) == true) {
/* Either reshape has aborted, or hasn't
* quite started yet. Wait a bit and
* check 'sync_action' to see.
*/
sleep_for(0, MSEC_TO_NSEC(10), true);
sysfs_get_str(mdi, NULL, "sync_action", buf, sizeof(buf));
if (strncmp(buf, "reshape", 7) != 0)
break;
}
if (sysfs_fd_get_two(scfd, &completed,
&max_completed) == 2 &&
/* 'completed' sometimes reads as max-uulong */
completed < max_completed &&
(completed > sync_max ||
(completed == sync_max && curr != position))) {
while (completed > sync_max) {
sync_max += sectors / rd1;
if (backwards)
position -= sectors;
else
position += sectors;
}
if (sync_max < old_sync_max)
sysfs_set_num(mdi, NULL, "sync_max", sync_max);
}
if (!backwards && curr >= position)
break;
if (backwards && curr <= position)
break;
sysfs_wait(scfd, &delay);
}
close_fd(&scfd);
}
done:
/* As we have an O_EXCL open, any use of the device
* which blocks STOP_ARRAY is probably a transient use,
* so it is reasonable to retry for a while - 5 seconds.
*/
count = 25; err = 0;
while (count && fd >= 0 &&
(err = ioctl(fd, STOP_ARRAY, NULL)) < 0 && errno == EBUSY) {
err = errno;
sleep_for(0, MSEC_TO_NSEC(200), true);
count --;
}
if (fd >= 0 && err) {
if (verbose >= 0) {
pr_err("failed to stop array %s: %s\n",
devname, strerror(err));
if (err == EBUSY)
cont_err("Perhaps a running process, mounted filesystem or active volume group?\n");
}
rv = 1;
goto out;
}
if (devnm[0] && udev_is_available()) {
struct map_ent *mp = map_by_devnm(&map, devnm);
remove_devices(devnm, mp ? mp->path : NULL);
}
if (verbose >= 0)
pr_info("stopped %s\n", devname);
map_lock(&map);
map_remove(&map, devnm);
map_unlock(&map);
out:
sysfs_free(mdi);
close_fd(&fd);
return rv;
}
static struct mddev_dev *add_one(struct mddev_dev *dv, char *name, char disp)
{
struct mddev_dev *new;
new = xmalloc(sizeof(*new));
memset(new, 0, sizeof(*new));
new->devname = xstrdup(name);
new->disposition = disp;
new->next = dv->next;
dv->next = new;
return new;
}
static void add_faulty(struct mddev_dev *dv, int fd, char disp)
{
mdu_array_info_t array;
mdu_disk_info_t disk;
int remaining_disks;
int i;
if (md_get_array_info(fd, &array) != 0)
return;
remaining_disks = array.nr_disks;
for (i = 0; i < MAX_DISKS && remaining_disks > 0; i++) {
char buf[40];
disk.number = i;
if (md_get_disk_info(fd, &disk) != 0)
continue;
if (disk.major == 0 && disk.minor == 0)
continue;
remaining_disks--;
if ((disk.state & 1) == 0) /* not faulty */
continue;
sprintf(buf, "%d:%d", disk.major, disk.minor);
dv = add_one(dv, buf, disp);
}
}
static void add_detached(struct mddev_dev *dv, int fd, char disp)
{
mdu_array_info_t array;
mdu_disk_info_t disk;
int remaining_disks;
int i;
if (md_get_array_info(fd, &array) != 0)
return;
remaining_disks = array.nr_disks;
for (i = 0; i < MAX_DISKS && remaining_disks > 0; i++) {
char buf[40];
int sfd;
disk.number = i;
if (md_get_disk_info(fd, &disk) != 0)
continue;
if (disk.major == 0 && disk.minor == 0)
continue;
remaining_disks--;
if (disp == 'f' && (disk.state & 1) != 0) /* already faulty */
continue;
sprintf(buf, "%d:%d", disk.major, disk.minor);
sfd = dev_open(buf, O_RDONLY);
if (sfd >= 0) {
/* Not detached */
close(sfd);
continue;
}
if (errno != ENXIO)
/* Probably not detached */
continue;
dv = add_one(dv, buf, disp);
}
}
static void add_set(struct mddev_dev *dv, int fd, char set_char)
{
mdu_array_info_t array;
mdu_disk_info_t disk;
int remaining_disks;
int copies, set;
int i;
if (md_get_array_info(fd, &array) != 0)
return;
if (array.level != 10)
return;
copies = ((array.layout & 0xff) *
((array.layout >> 8) & 0xff));
if (array.raid_disks % copies)
return;
remaining_disks = array.nr_disks;
for (i = 0; i < MAX_DISKS && remaining_disks > 0; i++) {
char buf[40];
disk.number = i;
if (md_get_disk_info(fd, &disk) != 0)
continue;
if (disk.major == 0 && disk.minor == 0)
continue;
remaining_disks--;
set = disk.raid_disk % copies;
if (set_char != set + 'A')
continue;
sprintf(buf, "%d:%d", disk.major, disk.minor);
dv = add_one(dv, buf, dv->disposition);
}
}
int attempt_re_add(int fd, int tfd, struct mddev_dev *dv,
struct supertype *dev_st, struct supertype *tst,
unsigned long rdev, enum update_opt update,
char *devname, int verbose, mdu_array_info_t *array)
{
struct mdinfo mdi;
int duuid[4];
int ouuid[4];
dev_st->ss->getinfo_super(dev_st, &mdi, NULL);
dev_st->ss->uuid_from_super(dev_st, ouuid);
if (tst->sb)
tst->ss->uuid_from_super(tst, duuid);
else
/* Assume uuid matches: kernel will check */
memcpy(duuid, ouuid, sizeof(ouuid));
if ((mdi.disk.state & (1<<MD_DISK_ACTIVE)) &&
!(mdi.disk.state & (1<<MD_DISK_FAULTY)) &&
memcmp(duuid, ouuid, sizeof(ouuid))==0) {
/* Looks like it is worth a
* try. Need to make sure
* kernel will accept it
* though.
*/
mdu_disk_info_t disc;
disc.number = mdi.disk.number;
if (md_get_disk_info(fd, &disc) != 0 ||
disc.major != 0 || disc.minor != 0)
goto skip_re_add;
disc.major = major(rdev);
disc.minor = minor(rdev);
disc.number = mdi.disk.number;
disc.raid_disk = mdi.disk.raid_disk;
disc.state = mdi.disk.state;
if (array->state & (1 << MD_SB_CLUSTERED)) {
/* extra flags are needed when adding to a cluster as
* there are two cases to distinguish
*/
if (dv->disposition == 'c')
disc.state |= (1 << MD_DISK_CANDIDATE);
else
disc.state |= (1 << MD_DISK_CLUSTER_ADD);
}
if (dv->writemostly == FlagSet)
disc.state |= 1 << MD_DISK_WRITEMOSTLY;
if (dv->writemostly == FlagClear)
disc.state &= ~(1 << MD_DISK_WRITEMOSTLY);
if (dv->failfast == FlagSet)
disc.state |= 1 << MD_DISK_FAILFAST;
if (dv->failfast == FlagClear)
disc.state &= ~(1 << MD_DISK_FAILFAST);
remove_partitions(tfd);
if (update || dv->writemostly != FlagDefault ||
dv->failfast != FlagDefault) {
int rv = -1;
tfd = dev_open(dv->devname, O_RDWR);
if (tfd < 0) {
pr_err("failed to open %s for superblock update during re-add\n", dv->devname);
return -1;
}
if (dv->writemostly == FlagSet)
rv = dev_st->ss->update_super(
dev_st, NULL, UOPT_SPEC_WRITEMOSTLY,
devname, verbose, 0, NULL);
if (dv->writemostly == FlagClear)
rv = dev_st->ss->update_super(
dev_st, NULL, UOPT_SPEC_READWRITE,
devname, verbose, 0, NULL);
if (dv->failfast == FlagSet)
rv = dev_st->ss->update_super(
dev_st, NULL, UOPT_SPEC_FAILFAST,
devname, verbose, 0, NULL);
if (dv->failfast == FlagClear)
rv = dev_st->ss->update_super(
dev_st, NULL, UOPT_SPEC_NOFAILFAST,
devname, verbose, 0, NULL);
if (update)
rv = dev_st->ss->update_super(
dev_st, NULL, update,
devname, verbose, 0, NULL);
if (rv == 0)
rv = dev_st->ss->store_super(dev_st, tfd);
close_fd(&tfd);
if (rv != 0) {
pr_err("failed to update superblock during re-add\n");
return -1;
}
}
/* don't even try if disk is marked as faulty */
errno = 0;
if (ioctl(fd, ADD_NEW_DISK, &disc) == 0) {
if (verbose >= 0)
pr_err("re-added %s\n", dv->devname);
return 1;
}
if (errno == ENOMEM || errno == EROFS) {
pr_err("add new device failed for %s: %s\n",
dv->devname, strerror(errno));
if (dv->disposition == 'M')
return 0;
return -1;
}
}
skip_re_add:
return 0;
}
/**
* manage_add_external() - Add disk to external container.
* @st: external supertype pointer, must not be NULL, superblock is released here.
* @fd: container file descriptor, must not have O_EXCL mode.
* @disk_fd: device to add file descriptor.
* @disk_name: name of the device to add.
* @disc: disk info.
*
* Superblock is released here because any open fd with O_EXCL will block sysfs_add_disk().
*/
mdadm_status_t manage_add_external(struct supertype *st, int fd, char *disk_name,
mdu_disk_info_t *disc)
{
mdadm_status_t rv = MDADM_STATUS_ERROR;
char container_devpath[MD_NAME_MAX];
struct dev_policy *pols = NULL;
struct mdinfo new_mdi;
struct mdinfo *sra = NULL;
int container_fd;
int disk_fd = -1;
snprintf(container_devpath, MD_NAME_MAX, "%s", fd2devnm(fd));
container_fd = open_dev_excl(container_devpath);
if (!is_fd_valid(container_fd)) {
pr_err("Failed to get exclusive access to container %s\n", container_devpath);
return MDADM_STATUS_ERROR;
}
/* Check if metadata handler is able to accept the drive */
if (!st->ss->validate_geometry(st, LEVEL_CONTAINER, 0, 1, NULL, 0, 0, disk_name, NULL,
0, 1))
goto out;
if (mddev_test_and_add_drive_policies(st, &pols, container_fd, 1))
goto out;
Kill(disk_name, NULL, 0, -1, 0);
disk_fd = dev_open(disk_name, O_RDWR | O_EXCL | O_DIRECT);
if (!is_fd_valid(disk_fd)) {
pr_err("Failed to exclusively open %s\n", disk_name);
goto out;
}
if (drive_test_and_add_policies(st, &pols, disk_fd, 1))
goto out;
if (st->ss->add_to_super(st, disc, disk_fd, disk_name, INVALID_SECTORS))
goto out;
if (!mdmon_running(st->container_devnm))
st->ss->sync_metadata(st);
sra = sysfs_read(container_fd, NULL, 0);
if (!sra) {
pr_err("Failed to read sysfs for %s\n", disk_name);
goto out;
}
sra->array.level = LEVEL_CONTAINER;
/* Need to set data_offset and component_size */
st->ss->getinfo_super(st, &new_mdi, NULL);
new_mdi.disk.major = disc->major;
new_mdi.disk.minor = disc->minor;
new_mdi.recovery_start = 0;
st->ss->free_super(st);
if (sysfs_add_disk(sra, &new_mdi, 0) != 0) {
pr_err("Failed to add %s to container %s\n", disk_name, container_devpath);
goto out;
}
ping_monitor(container_devpath);
rv = MDADM_STATUS_SUCCESS;
out:
close_fd(&container_fd);
dev_policy_free(pols);
if (sra)
sysfs_free(sra);
if (rv != MDADM_STATUS_SUCCESS)
/* Metadata handler records this descriptor, so release it only on failure. */
close_fd(&disk_fd);
if (st->sb)
st->ss->free_super(st);
return rv;
}
int Manage_add(int fd, int tfd, struct mddev_dev *dv,
struct supertype *tst, mdu_array_info_t *array,
int force, int verbose, char *devname,
enum update_opt update, unsigned long rdev,
unsigned long long array_size, int raid_slot)
{
unsigned long long ldsize;
struct supertype *dev_st;
int j;
mdu_disk_info_t disc;
struct map_ent *map = NULL;
bool add_new_super = false;
if (!get_dev_size(tfd, dv->devname, &ldsize)) {
if (dv->disposition == 'M')
return 0;
else
return -1;
}
if (tst->ss == &super0 && ldsize > 4ULL*1024*1024*1024*1024) {
/* More than 4TB is wasted on v0.90 */
if (!force) {
pr_err("%s is larger than %s can effectively use.\n"
" Add --force is you really want to add this device.\n",
dv->devname, devname);
return -1;
}
pr_err("%s is larger than %s can effectively use.\n"
" Adding anyway as --force was given.\n",
dv->devname, devname);
}
if (array->not_persistent == 0 || tst->ss->external) {
/* need to find a sample superblock to copy, and
* a spare slot to use.
* For 'external' array (well, container based),
* We can just load the metadata for the array->
*/
int array_failed;
if (tst->sb)
/* already loaded */;
else if (tst->ss->external) {
tst->ss->load_container(tst, fd, NULL);
} else for (j = 0; j < tst->max_devs; j++) {
char *dev;
int dfd;
disc.number = j;
if (md_get_disk_info(fd, &disc))
continue;
if (disc.major==0 && disc.minor==0)
continue;
if ((disc.state & 4)==0) /* sync */
continue;
/* Looks like a good device to try */
dev = map_dev(disc.major, disc.minor, 1);
if (!dev)
continue;
dfd = dev_open(dev, O_RDONLY);
if (dfd < 0)
continue;
if (tst->ss->load_super(tst, dfd,
NULL)) {
close_fd(&dfd);
continue;
}
close_fd(&dfd);
break;
}
/* FIXME this is a bad test to be using */
if (!tst->sb && (dv->disposition != 'a' &&
dv->disposition != 'S')) {
/* we are re-adding a device to a
* completely dead array - have to depend
* on kernel to check
*/
} else if (!tst->sb) {
pr_err("cannot load array metadata from %s\n", devname);
return -1;
}
/* Make sure device is large enough */
if (dv->disposition != 'j' && /* skip size check for Journal */
tst->sb &&
tst->ss->avail_size(tst, ldsize/512, INVALID_SECTORS) <
array_size) {
if (dv->disposition == 'M')
return 0;
pr_err("%s not large enough to join array\n",
dv->devname);
return -1;
}
/* Possibly this device was recently part of
* the array and was temporarily removed, and
* is now being re-added. If so, we can
* simply re-add it.
*/
if (array->not_persistent == 0 && dv->disposition != 'S') {
int rv = 0;
dev_st = dup_super(tst);
dev_st->ss->load_super(dev_st, tfd, NULL);
if (dev_st->sb) {
rv = attempt_re_add(fd, tfd, dv, dev_st, tst, rdev, update,
devname, verbose, array);
dev_st->ss->free_super(dev_st);
}
free(dev_st);
if (rv)
return rv;
}
if (dv->disposition == 'M') {
if (verbose > 0)
pr_err("--re-add for %s to %s is not possible\n",
dv->devname, devname);
return 0;
}
if (dv->disposition == 'A') {
pr_err("--re-add for %s to %s is not possible\n",
dv->devname, devname);
return -1;
}
if (array->active_disks < array->raid_disks) {
char *avail = xcalloc(array->raid_disks, 1);
int d;
int found = 0;
for (d = 0; d < MAX_DISKS && found < array->nr_disks; d++) {
disc.number = d;
if (md_get_disk_info(fd, &disc))
continue;
if (disc.major == 0 && disc.minor == 0)
continue;
if (!(disc.state & (1<<MD_DISK_SYNC)))
continue;
avail[disc.raid_disk] = 1;
found++;
}
array_failed = !enough(array->level, array->raid_disks,
array->layout, 1, avail);
free(avail);
} else
array_failed = 0;
if (array_failed) {
pr_err("%s has failed so using --add cannot work and might destroy\n",
devname);
pr_err("data on %s. You should stop the array and re-assemble it.\n",
dv->devname);
return -1;
}
} else {
/* non-persistent. Must ensure that new drive
* is at least array->size big.
*/
if (ldsize/512 < array_size) {
pr_err("%s not large enough to join array\n",
dv->devname);
return -1;
}
}
/* committed to really trying this device now*/
remove_partitions(tfd);
/* in 2.6.17 and earlier, version-1 superblocks won't
* use the number we write, but will choose a free number.
* we must choose the same free number, which requires
* starting at 'raid_disks' and counting up
*/
for (j = array->raid_disks; j < tst->max_devs; j++) {
disc.number = j;
if (md_get_disk_info(fd, &disc))
break;
if (disc.major==0 && disc.minor==0)
break;
if (disc.state & 8) /* removed */
break;
}
disc.major = major(rdev);
disc.minor = minor(rdev);
if (raid_slot < 0)
disc.number = j;
else
disc.number = raid_slot;
disc.state = 0;
/* only add journal to array that supports journaling */
if (dv->disposition == 'j') {
struct mdinfo *mdp;
mdp = sysfs_read(fd, NULL, GET_ARRAY_STATE);
if (!mdp) {
pr_err("%s unable to read array state.\n", devname);
return -1;
}
if (mdp->array_state != ARRAY_READONLY) {
sysfs_free(mdp);
pr_err("%s is not readonly, cannot add journal.\n", devname);
return -1;
}
sysfs_free(mdp);
disc.raid_disk = 0;
}
if (map_lock(&map))
pr_err("failed to get exclusive lock on mapfile when add disk\n");
if (array->not_persistent==0) {
int dfd;
if (dv->disposition == 'j')
disc.state |= (1 << MD_DISK_JOURNAL) | (1 << MD_DISK_SYNC);
if (dv->writemostly == FlagSet)
disc.state |= 1 << MD_DISK_WRITEMOSTLY;
if (dv->failfast == FlagSet)
disc.state |= 1 << MD_DISK_FAILFAST;
dfd = dev_open(dv->devname, O_RDWR | O_EXCL|O_DIRECT);
if (tst->ss->add_to_super(tst, &disc, dfd,
dv->devname, INVALID_SECTORS))
goto unlock;
if (tst->ss->write_init_super(tst))
goto unlock;
add_new_super = true;
} else if (dv->disposition == 'A') {
/* this had better be raid1.
* As we are "--re-add"ing we must find a spare slot
* to fill.
*/
char *used = xcalloc(array->raid_disks, 1);
for (j = 0; j < tst->max_devs; j++) {
mdu_disk_info_t disc2;
disc2.number = j;
if (md_get_disk_info(fd, &disc2))
continue;
if (disc2.major==0 && disc2.minor==0)
continue;
if (disc2.state & 8) /* removed */
continue;
if (disc2.raid_disk < 0)
continue;
if (disc2.raid_disk > array->raid_disks)
continue;
used[disc2.raid_disk] = 1;
}
for (j = 0 ; j < array->raid_disks; j++)
if (!used[j]) {
disc.raid_disk = j;
disc.state |= (1<<MD_DISK_SYNC);
break;
}
free(used);
}
if (array->state & (1 << MD_SB_CLUSTERED)) {
if (dv->disposition == 'c')
disc.state |= (1 << MD_DISK_CANDIDATE);
else
disc.state |= (1 << MD_DISK_CLUSTER_ADD);
}
if (dv->writemostly == FlagSet)
disc.state |= (1 << MD_DISK_WRITEMOSTLY);
if (dv->failfast == FlagSet)
disc.state |= (1 << MD_DISK_FAILFAST);
if (tst->ss->external) {
if (manage_add_external(tst, fd, dv->devname, &disc) != MDADM_STATUS_SUCCESS)
goto unlock;
} else {
tst->ss->free_super(tst);
if (ioctl(fd, ADD_NEW_DISK, &disc)) {
if (dv->disposition == 'j')
pr_err("Failed to hot add %s as journal, "
"please try restart %s.\n", dv->devname, devname);
else
pr_err("add new device failed for %s as %d: %s\n",
dv->devname, j, strerror(errno));
goto unlock;
}
if (dv->disposition == 'j') {
pr_err("Journal added successfully, making %s read-write\n", devname);
if (Manage_ro(devname, fd, -1))
pr_err("Failed to make %s read-write\n", devname);
}
}
if (verbose >= 0)
pr_err("added %s\n", dv->devname);
map_unlock(&map);
return 1;
unlock:
if (add_new_super)
Kill(dv->devname, tst, 0, -1, 0);
map_unlock(&map);
return -1;
}
int Manage_remove(struct supertype *tst, int fd, struct mddev_dev *dv,
int sysfd, unsigned long rdev, int force, int verbose, char *devname)
{
int lfd = -1;
int err;
if (tst->ss->external) {
/* To remove a device from a container, we must
* check that it isn't in use in an array.
* This involves looking in the 'holders'
* directory - there must be just one entry,
* the container.
* To ensure that it doesn't get used as a
* hot spare while we are checking, we
* get an O_EXCL open on the container
*/
int ret;
char devnm[32];
snprintf(devnm, sizeof(devnm), "%s", fd2devnm(fd));
lfd = open_dev_excl(devnm);
if (lfd < 0) {
pr_err("Cannot get exclusive access to container - odd\n");
return -1;
}
/* We may not be able to check on holders in
* sysfs, either because we don't have the dev num
* (rdev == 0) or because the device has been detached
* and the 'holders' directory no longer exists
* (ret == -1). In that case, assume it is OK to
* remove.
*/
if (rdev == 0)
ret = -1;
else {
/*
* The drive has already been set to 'faulty', however
* monitor might not have had time to process it and the
* drive might still have an entry in the 'holders'
* directory. Try a few times to avoid a false error
*/
int count = 20;
do {
ret = sysfs_unique_holder(devnm, rdev);
if (ret < 2)
break;
sleep_for(0, MSEC_TO_NSEC(100), true);
} while (--count > 0);
if (ret == 0) {
pr_err("%s is not a member, cannot remove.\n",
dv->devname);
close_fd(&lfd);
return -1;
}
if (ret >= 2) {
pr_err("%s is still in use, cannot remove.\n",
dv->devname);
close_fd(&lfd);
return -1;
}
}
}
/* FIXME check that it is a current member */
if (sysfd >= 0) {
/* device has been removed and we don't know
* the major:minor number
*/
err = sys_hot_remove_disk(sysfd, force);
} else {
err = hot_remove_disk(fd, rdev, force);
if (err && errno == ENODEV) {
/* Old kernels rejected this if no personality
* is registered */
struct mdinfo *sra = sysfs_read(fd, NULL, GET_DEVS);
struct mdinfo *dev = NULL;
if (!sra) {
err = -1;
} else {
for (dev = sra->devs; dev ; dev = dev->next)
if (dev->disk.major == (int)major(rdev) &&
dev->disk.minor == (int)minor(rdev))
break;
if (dev)
err = sysfs_set_str(sra, dev,
"state", "remove");
sysfs_free(sra);
}
}
}
if (err) {
pr_err("hot remove failed for %s: %s\n", dv->devname,
strerror(errno));
close_fd(&lfd);
return -1;
}
if (tst->ss->external) {
/*
* Before dropping our exclusive open we make an
* attempt at preventing mdmon from seeing an
* 'add' event before reconciling this 'remove'
* event.
*/
char *devnm = fd2devnm(fd);
if (!devnm) {
pr_err("unable to get container name\n");
close_fd(&lfd);
return -1;
}
ping_manager(devnm);
}
close_fd(&lfd);
if (verbose >= 0)
pr_err("hot removed %s from %s\n",
dv->devname, devname);
return 1;
}
int Manage_replace(struct supertype *tst, int fd, struct mddev_dev *dv,
unsigned long rdev, int verbose, char *devname)
{
struct mdinfo *mdi, *di;
if (tst->ss->external) {
pr_err("--replace only supported for native metadata (0.90 or 1.x)\n");
return -1;
}
/* Need to find the device in sysfs and add 'want_replacement' to the
* status.
*/
mdi = sysfs_read(fd, NULL, GET_DEVS);
if (!mdi || !mdi->devs) {
pr_err("Cannot find status of %s to enable replacement - strange\n",
devname);
goto abort;
}
for (di = mdi->devs; di; di = di->next)
if (di->disk.major == (int)major(rdev) &&
di->disk.minor == (int)minor(rdev))
break;
if (di) {
int rv;
if (di->disk.raid_disk < 0) {
pr_err("%s is not active and so cannot be replaced.\n",
dv->devname);
goto abort;
}
rv = sysfs_set_str(mdi, di,
"state", "want_replacement");
if (rv) {
pr_err("Failed to request replacement for %s\n",
dv->devname);
goto abort;
}
if (verbose >= 0)
pr_err("Marked %s (device %d in %s) for replacement\n",
dv->devname, di->disk.raid_disk, devname);
/* If there is a matching 'with', we need to tell it which
* raid disk
*/
while (dv && dv->disposition != 'W')
dv = dv->next;
if (dv) {
dv->disposition = 'w';
dv->used = di->disk.raid_disk;
}
sysfs_free(mdi);
return 1;
}
pr_err("%s not found in %s so cannot --replace it\n",
dv->devname, devname);
abort:
sysfs_free(mdi);
return -1;
}
int Manage_with(struct supertype *tst, int fd, struct mddev_dev *dv,
unsigned long rdev, int verbose, char *devname)
{
struct mdinfo *mdi, *di;
/* try to set 'slot' for 'rdev' in 'fd' to 'dv->used' */
mdi = sysfs_read(fd, NULL, GET_DEVS|GET_STATE);
if (!mdi || !mdi->devs) {
pr_err("Cannot find status of %s to enable replacement - strange\n",
devname);
goto abort;
}
for (di = mdi->devs; di; di = di->next)
if (di->disk.major == (int)major(rdev) &&
di->disk.minor == (int)minor(rdev))
break;
if (di) {
int rv;
if (di->disk.state & (1<<MD_DISK_FAULTY)) {
pr_err("%s is faulty and cannot be a replacement\n",
dv->devname);
goto abort;
}
if (di->disk.raid_disk >= 0) {
pr_err("%s is active and cannot be a replacement\n",
dv->devname);
goto abort;
}
rv = sysfs_set_num(mdi, di,
"slot", dv->used);
if (rv) {
pr_err("Failed to set %s as preferred replacement.\n",
dv->devname);
goto abort;
}
if (verbose >= 0)
pr_err("Marked %s in %s as replacement for device %d\n",
dv->devname, devname, dv->used);
sysfs_free(mdi);
return 1;
}
pr_err("%s not found in %s so cannot make it preferred replacement\n",
dv->devname, devname);
abort:
sysfs_free(mdi);
return -1;
}
/**
* is_remove_safe() - Check if remove is safe.
* @array: Array info.
* @fd: Array file descriptor.
* @devname: Name of device to remove.
* @verbose: Verbose.
*
* The function determines if array will be operational
* after removing &devname.
*
* Return: True if array will be operational, false otherwise.
*/
bool is_remove_safe(mdu_array_info_t *array, const int fd, char *devname, const int verbose)
{
dev_t devid = devnm2devid(devname + 5);
struct mdinfo *mdi = sysfs_read(fd, NULL, GET_DEVS | GET_DISKS | GET_STATE);
struct mdinfo *disk;
if (!mdi) {
if (verbose)
pr_err("Failed to read sysfs attributes for %s\n", devname);
return false;
}
char *avail = xcalloc(array->raid_disks, sizeof(char));
for (disk = mdi->devs; disk; disk = disk->next) {
if (disk->disk.raid_disk < 0)
continue;
if (!(disk->disk.state & (1 << MD_DISK_SYNC)))
continue;
if (makedev(disk->disk.major, disk->disk.minor) == devid)
continue;
avail[disk->disk.raid_disk] = 1;
}
sysfs_free(mdi);
bool is_enough = enough(array->level, array->raid_disks,
array->layout, 1, avail);
free(avail);
return is_enough;
}
/**
* Manage_subdevs() - Execute operation depending on devmode.
*
* @devname: name of the device.
* @fd: file descriptor.
* @devlist: list of sub-devices to manage.
* @verbose: verbose level.
* @test: test flag.
* @update: type of update.
* @force: force flag.
*
* This function executes operation defined by devmode
* for each dev from devlist.
* Devmode can be:
* 'a' - add the device
* 'S' - add the device as a spare - don't try re-add
* 'j' - add the device as a journal device
* 'A' - re-add the device
* 'r' - remove the device: HOT_REMOVE_DISK
* device can be 'faulty' or 'detached' in which case all
* matching devices are removed.
* 'f' - set the device faulty SET_DISK_FAULTY
* device can be 'detached' in which case any device that
* is inaccessible will be marked faulty.
* 'I' - remove device by using incremental fail
* which is executed when device is removed surprisingly.
* 'R' - mark this device as wanting replacement.
* 'W' - this device is added if necessary and activated as
* a replacement for a previous 'R' device.
* -----
* 'w' - 'W' will be changed to 'w' when it is paired with
* a 'R' device. If a 'W' is found while walking the list
* it must be unpaired, and is an error.
* 'M' - this is created by a 'missing' target. It is a slight
* variant on 'A'
* 'F' - Another variant of 'A', where the device was faulty
* so must be removed from the array first.
* 'c' - confirm the device as found (for clustered environments)
*
* For 'f' and 'r', the device can also be a kernel-internal
* name such as 'sdb'.
*
* Return: 0 on success, otherwise 1 or 2.
*/
int Manage_subdevs(char *devname, int fd,
struct mddev_dev *devlist, int verbose, int test,
enum update_opt update, int force)
{
mdu_array_info_t array;
unsigned long long array_size;
struct mddev_dev *dv;
int tfd = -1;
struct supertype *tst = NULL;
char *subarray = NULL;
int sysfd = -1;
int count = 0; /* number of actions taken */
struct mdinfo info;
struct mdinfo devinfo;
int frozen = 0;
int busy = 0;
int raid_slot = -1;
if (sysfs_init(&info, fd, NULL)) {
pr_err("sysfs not availabile for %s\n", devname);
goto abort;
}
if (md_get_array_info(fd, &array)) {
pr_err("Cannot get array info for %s\n", devname);
goto abort;
}
/* array.size is only 32 bits and may be truncated.
* So read from sysfs if possible, and record number of sectors
*/
array_size = get_component_size(fd);
if (array_size <= 0)
array_size = array.size * 2;
tst = super_by_fd(fd, &subarray);
if (!tst) {
pr_err("unsupport array - version %d.%d\n",
array.major_version, array.minor_version);
goto abort;
}
for (dv = devlist; dv; dv = dv->next) {
dev_t rdev = 0; /* device to add/remove etc */
int rv, err = 0;
int mj, mn;
raid_slot = -1;
if (dv->disposition == 'c') {
rv = parse_cluster_confirm_arg(dv->devname, &dv->devname, &raid_slot);
if (rv) {
pr_err("Could not get the devname of cluster\n");
goto abort;
}
}
if (strcmp(dv->devname, "failed") == 0 || strcmp(dv->devname, "faulty") == 0) {
if (dv->disposition != 'A' && dv->disposition != 'r') {
pr_err("%s only meaningful with -r or --re-add, not -%c\n",
dv->devname, dv->disposition);
goto abort;
}
add_faulty(dv, fd, (dv->disposition == 'A' ? 'F' : 'r'));
continue;
}
if (strcmp(dv->devname, "detached") == 0) {
if (dv->disposition != 'r' && dv->disposition != 'f') {
pr_err("%s only meaningful with -r of -f, not -%c\n",
dv->devname, dv->disposition);
goto abort;
}
add_detached(dv, fd, dv->disposition);
continue;
}
if (strcmp(dv->devname, "missing") == 0) {
struct mddev_dev *add_devlist;
struct mddev_dev **dp;
if (dv->disposition == 'c') {
rv = ioctl(fd, CLUSTERED_DISK_NACK, NULL);
break;
}
if (dv->disposition != 'A') {
pr_err("'missing' only meaningful with --re-add\n");
goto abort;
}
add_devlist = conf_get_devs();
if (add_devlist == NULL) {
pr_err("no devices to scan for missing members.\n");
continue;
}
for (dp = &add_devlist; *dp; dp = &(*dp)->next)
/* 'M' (for 'missing') is like 'A' without errors */
(*dp)->disposition = 'M';
*dp = dv->next;
dv->next = add_devlist;
continue;
}
if (strncmp(dv->devname, "set-", 4) == 0 && strlen(dv->devname) == 5) {
int copies;
if (dv->disposition != 'r' && dv->disposition != 'f') {
pr_err("'%s' only meaningful with -r or -f\n", dv->devname);
goto abort;
}
if (array.level != 10) {
pr_err("'%s' only meaningful with RAID10 arrays\n", dv->devname);
goto abort;
}
copies = ((array.layout & 0xff) * ((array.layout >> 8) & 0xff));
if (array.raid_disks % copies != 0 || dv->devname[4] < 'A' ||
dv->devname[4] >= 'A' + copies || copies > 26) {
pr_err("'%s' not meaningful with this array\n", dv->devname);
goto abort;
}
add_set(dv, fd, dv->devname[4]);
continue;
}
if (!strchr(dv->devname, '/') && !strchr(dv->devname, ':') &&
strlen(dv->devname) < 50) {
char *array_devnm = fd2devnm(fd);
/* This is a kernel-internal name like 'sda1' */
if (!strchr("rfI", dv->disposition)) {
pr_err("%s only meaningful with -r, -f or -I, not -%c\n",
dv->devname, dv->disposition);
goto abort;
}
sysfd = sysfs_open_memb_attr(array_devnm, dv->devname, "state", O_RDWR);
if (!is_fd_valid(sysfd)) {
pr_err("%s does not appear to be a component of %s\n", dv->devname,
devname);
goto abort;
}
} else if (strchr("rf", dv->disposition) && get_maj_min(dv->devname, &mj, &mn)) {
/* for 'fail' and 'remove', the device might not exist. */
rdev = makedev(mj, mn);
} else {
tfd = dev_open(dv->devname, O_RDONLY);
if (tfd >= 0) {
fstat_is_blkdev(tfd, dv->devname, &rdev);
close_fd(&tfd);
} else {
int open_err = errno;
if (!stat_is_blkdev(dv->devname, &rdev)) {
if (dv->disposition == 'M')
/* non-fatal. Also improbable */
continue;
goto abort;
}
if (dv->disposition == 'r')
/* Be happy, the stat worked, that is
* enough for --remove
*/
;
else {
if (dv->disposition == 'M')
/* non-fatal */
continue;
pr_err("Cannot open %s: %s\n", dv->devname,
strerror(open_err));
goto abort;
}
}
}
switch (dv->disposition) {
default:
pr_err("internal error - devmode[%s]=%d\n", dv->devname, dv->disposition);
goto abort;
case 'a':
case 'S': /* --add-spare */
case 'j': /* --add-journal */
case 'A':
case 'M': /* --re-add missing */
case 'F': /* --re-add faulty */
case 'c': /* --cluster-confirm */
/* add the device */
if (subarray) {
pr_err("Cannot add disks to a \'member\' array, perform this operation on the parent container\n");
goto abort;
}
/* Let's first try to write re-add to sysfs */
if (rdev != 0 && (dv->disposition == 'A' || dv->disposition == 'F')) {
sysfs_init_dev(&devinfo, rdev);
if (sysfs_set_str(&info, &devinfo, "state", "re-add") == 0) {
pr_err("re-add %s to %s succeed\n", dv->devname,
info.sys_name);
break;
}
}
if (dv->disposition == 'F')
/* Need to remove first */
hot_remove_disk(fd, rdev, force);
/* Make sure it isn't in use (in 2.6 or later) */
tfd = dev_open(dv->devname, O_RDONLY|O_EXCL);
if (tfd >= 0) {
/* We know no-one else is using it. We'll
* need non-exclusive access to add it, so
* do that now.
*/
close_fd(&tfd);
tfd = dev_open(dv->devname, O_RDONLY);
}
if (tfd < 0) {
if (dv->disposition == 'M')
continue;
pr_err("Cannot open %s: %s\n",
dv->devname, strerror(errno));
goto abort;
}
if (!frozen) {
if (sysfs_freeze_array(&info) == 1)
frozen = 1;
else
frozen = -1;
}
rv = Manage_add(fd, tfd, dv, tst, &array, force, verbose, devname, update,
rdev, array_size, raid_slot);
close_fd(&tfd);
if (rv < 0)
goto abort;
if (rv > 0)
count++;
break;
case 'r':
/* hot remove */
if (subarray) {
pr_err("Cannot remove disks from a \'member\' array, perform this operation on the parent container\n");
rv = -1;
} else
rv = Manage_remove(tst, fd, dv, sysfd, rdev, verbose, force,
devname);
close_fd(&sysfd);
if (rv < 0)
goto abort;
if (rv > 0)
count++;
break;
case 'f': /* set faulty */
if (!is_remove_safe(&array, fd, dv->devname, verbose)) {
pr_err("Cannot remove %s from %s, array will be failed.\n",
dv->devname, devname);
close_fd(&sysfd);
goto abort;
}
case 'I':
if (is_fd_valid(sysfd)) {
static const char val[] = "faulty";
rv = sysfs_write_descriptor(sysfd, val, strlen(val), &err);
} else {
rv = ioctl(fd, SET_DISK_FAULTY, rdev);
if (rv)
err = errno;
}
close_fd(&sysfd);
if (rv == MDADM_STATUS_SUCCESS) {
count++;
pr_vrb("set %s faulty in %s\n", dv->devname, devname);
break;
}
if (err == EBUSY)
busy = 1;
pr_err("set device faulty failed for %s: %s\n", dv->devname, strerror(err));
goto abort;
case 'R': /* Mark as replaceable */
if (subarray) {
pr_err("Cannot replace disks in a \'member\' array, perform this operation on the parent container\n");
rv = -1;
} else {
if (!frozen) {
if (sysfs_freeze_array(&info) == 1)
frozen = 1;
else
frozen = -1;
}
rv = Manage_replace(tst, fd, dv, rdev, verbose, devname);
}
if (rv < 0)
goto abort;
if (rv > 0)
count++;
break;
case 'W': /* --with device that doesn't match */
pr_err("No matching --replace device for --with %s\n", dv->devname);
goto abort;
case 'w': /* --with device which was matched */
rv = Manage_with(tst, fd, dv, rdev, verbose, devname);
if (rv < 0)
goto abort;
break;
}
}
free(tst);
if (frozen > 0)
sysfs_set_str(&info, NULL, "sync_action", "idle");
if (test && count == 0)
return 2;
return 0;
abort:
free(tst);
if (frozen > 0)
sysfs_set_str(&info, NULL, "sync_action", "idle");
return !test && busy ? 2 : 1;
}
int autodetect(void)
{
/* Open any md device, and issue the RAID_AUTORUN ioctl */
int rv = 1;
int fd = dev_open("9:0", O_RDONLY);
if (fd >= 0) {
if (ioctl(fd, RAID_AUTORUN, 0) == 0)
rv = 0;
close_fd(&fd);
}
return rv;
}
int Update_subarray(char *dev, char *subarray, enum update_opt update,
struct mddev_ident *ident, int verbose)
{
struct supertype supertype, *st = &supertype;
int fd, rv = 2;
struct mdinfo *info = NULL;
char *update_verb = map_num(update_options, update);
bool allow_active = update == UOPT_PPL || update == UOPT_NO_PPL;
memset(st, 0, sizeof(*st));
fd = open_subarray(dev, subarray, st, verbose < 0);
if (fd < 0)
return 2;
if (!st->ss->update_subarray) {
if (verbose >= 0)
pr_err("Operation not supported for %s metadata\n",
st->ss->name);
goto free_super;
}
if (!allow_active && is_subarray_active(subarray, st->devnm)) {
if (verbose >= 0)
pr_err("Subarray %s in %s is active, cannot update %s\n",
subarray, dev, update_verb);
goto free_super;
}
if (mdmon_running(st->devnm))
st->update_tail = &st->updates;
info = st->ss->container_content(st, subarray);
if (update == UOPT_PPL && !is_level456(info->array.level)) {
pr_err("RWH policy ppl is supported only for raid4, raid5 and raid6.\n");
goto free_super;
}
rv = st->ss->update_subarray(st, subarray, update, ident);
if (rv) {
if (verbose >= 0)
pr_err("Failed to update %s of subarray-%s in %s\n",
update_verb, subarray, dev);
} else if (st->update_tail)
flush_metadata_updates(st);
else
st->ss->sync_metadata(st);
if (rv == 0 && update == UOPT_NAME && verbose >= 0)
pr_err("Updated subarray-%s name from %s, UUIDs may have changed\n",
subarray, dev);
free_super:
if (info)
free(info);
st->ss->free_super(st);
close_fd(&fd);
return rv;
}
/* Move spare from one array to another If adding to destination array fails
* add back to original array.
* Returns 1 on success, 0 on failure */
int move_spare(char *from_devname, char *to_devname, dev_t devid)
{
struct mddev_dev devlist;
char devname[20];
/* try to remove and add */
int fd1 = open(to_devname, O_RDONLY);
int fd2 = open(from_devname, O_RDONLY);
if (fd1 < 0 || fd2 < 0) {
close_fd(&fd1);
close_fd(&fd2);
return 0;
}
devlist.next = NULL;
devlist.used = 0;
devlist.writemostly = FlagDefault;
devlist.failfast = FlagDefault;
devlist.devname = devname;
sprintf(devname, "%d:%d", major(devid), minor(devid));
devlist.disposition = 'r';
if (Manage_subdevs(from_devname, fd2, &devlist, -1, 0, UOPT_UNDEFINED, 0) == 0) {
devlist.disposition = 'a';
if (Manage_subdevs(to_devname, fd1, &devlist, -1, 0,
UOPT_UNDEFINED, 0) == 0) {
/* make sure manager is aware of changes */
ping_manager(to_devname);
ping_manager(from_devname);
close_fd(&fd1);
close_fd(&fd2);
return 1;
}
else
Manage_subdevs(from_devname, fd2, &devlist,
-1, 0, UOPT_UNDEFINED, 0);
}
close_fd(&fd1);
close_fd(&fd2);
return 0;
}
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/mirrors_git_kernel_org/pub_scm_utils_mdadm_mdadm.git
[email protected]:mirrors_git_kernel_org/pub_scm_utils_mdadm_mdadm.git
mirrors_git_kernel_org
pub_scm_utils_mdadm_mdadm
pub_scm_utils_mdadm_mdadm
master

搜索帮助