Merge branch 'for-linus' of git://neil.brown.name/md
* 'for-linus' of git://neil.brown.name/md: md: allow resync_start to be set while an array is active. md/raid10: reformat some loops with less indenting. md/raid10: remove unused variable. md/raid10: make more use of 'slot' in raid10d. md/raid10: some tidying up in fix_read_error md/raid1: improve handling of pages allocated for write-behind. md/raid1: try fix_sync_read_error before process_checks. md/raid1: tidy up new functions: process_checks and fix_sync_read_error. md/raid1: split out two sub-functions from sync_request_write md: make error_handler functions more uniform and correct. md/multipath: discard ->working_disks in favour of ->degraded md/raid1: clean up read_balance. md: simplify raid10 read_balance md/bitmap: fix saving of events_cleared and other state. md: reject a re-add request that cannot be honoured. md: Fix race when creating a new md device.
This commit is contained in:
commit
4b382d0643
8 changed files with 538 additions and 537 deletions
|
@ -493,11 +493,11 @@ void bitmap_update_sb(struct bitmap *bitmap)
|
|||
spin_unlock_irqrestore(&bitmap->lock, flags);
|
||||
sb = kmap_atomic(bitmap->sb_page, KM_USER0);
|
||||
sb->events = cpu_to_le64(bitmap->mddev->events);
|
||||
if (bitmap->mddev->events < bitmap->events_cleared) {
|
||||
if (bitmap->mddev->events < bitmap->events_cleared)
|
||||
/* rocking back to read-only */
|
||||
bitmap->events_cleared = bitmap->mddev->events;
|
||||
sb->events_cleared = cpu_to_le64(bitmap->events_cleared);
|
||||
}
|
||||
sb->events_cleared = cpu_to_le64(bitmap->events_cleared);
|
||||
sb->state = cpu_to_le32(bitmap->flags);
|
||||
/* Just in case these have been changed via sysfs: */
|
||||
sb->daemon_sleep = cpu_to_le32(bitmap->mddev->bitmap_info.daemon_sleep/HZ);
|
||||
sb->write_behind = cpu_to_le32(bitmap->mddev->bitmap_info.max_write_behind);
|
||||
|
@ -618,7 +618,7 @@ success:
|
|||
if (le32_to_cpu(sb->version) == BITMAP_MAJOR_HOSTENDIAN)
|
||||
bitmap->flags |= BITMAP_HOSTENDIAN;
|
||||
bitmap->events_cleared = le64_to_cpu(sb->events_cleared);
|
||||
if (sb->state & cpu_to_le32(BITMAP_STALE))
|
||||
if (bitmap->flags & BITMAP_STALE)
|
||||
bitmap->events_cleared = bitmap->mddev->events;
|
||||
err = 0;
|
||||
out:
|
||||
|
@ -652,9 +652,11 @@ static int bitmap_mask_state(struct bitmap *bitmap, enum bitmap_state bits,
|
|||
switch (op) {
|
||||
case MASK_SET:
|
||||
sb->state |= cpu_to_le32(bits);
|
||||
bitmap->flags |= bits;
|
||||
break;
|
||||
case MASK_UNSET:
|
||||
sb->state &= cpu_to_le32(~bits);
|
||||
bitmap->flags &= ~bits;
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
|
|
|
@ -3324,7 +3324,7 @@ resync_start_store(mddev_t *mddev, const char *buf, size_t len)
|
|||
char *e;
|
||||
unsigned long long n = simple_strtoull(buf, &e, 10);
|
||||
|
||||
if (mddev->pers)
|
||||
if (mddev->pers && !test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
|
||||
return -EBUSY;
|
||||
if (cmd_match(buf, "none"))
|
||||
n = MaxSector;
|
||||
|
@ -4347,13 +4347,19 @@ static int md_alloc(dev_t dev, char *name)
|
|||
disk->fops = &md_fops;
|
||||
disk->private_data = mddev;
|
||||
disk->queue = mddev->queue;
|
||||
blk_queue_flush(mddev->queue, REQ_FLUSH | REQ_FUA);
|
||||
/* Allow extended partitions. This makes the
|
||||
* 'mdp' device redundant, but we can't really
|
||||
* remove it now.
|
||||
*/
|
||||
disk->flags |= GENHD_FL_EXT_DEVT;
|
||||
add_disk(disk);
|
||||
mddev->gendisk = disk;
|
||||
/* As soon as we call add_disk(), another thread could get
|
||||
* through to md_open, so make sure it doesn't get too far
|
||||
*/
|
||||
mutex_lock(&mddev->open_mutex);
|
||||
add_disk(disk);
|
||||
|
||||
error = kobject_init_and_add(&mddev->kobj, &md_ktype,
|
||||
&disk_to_dev(disk)->kobj, "%s", "md");
|
||||
if (error) {
|
||||
|
@ -4367,8 +4373,7 @@ static int md_alloc(dev_t dev, char *name)
|
|||
if (mddev->kobj.sd &&
|
||||
sysfs_create_group(&mddev->kobj, &md_bitmap_group))
|
||||
printk(KERN_DEBUG "pointless warning\n");
|
||||
|
||||
blk_queue_flush(mddev->queue, REQ_FLUSH | REQ_FUA);
|
||||
mutex_unlock(&mddev->open_mutex);
|
||||
abort:
|
||||
mutex_unlock(&disks_mutex);
|
||||
if (!error && mddev->kobj.sd) {
|
||||
|
@ -5211,6 +5216,16 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
|
|||
} else
|
||||
super_types[mddev->major_version].
|
||||
validate_super(mddev, rdev);
|
||||
if ((info->state & (1<<MD_DISK_SYNC)) &&
|
||||
(!test_bit(In_sync, &rdev->flags) ||
|
||||
rdev->raid_disk != info->raid_disk)) {
|
||||
/* This was a hot-add request, but events doesn't
|
||||
* match, so reject it.
|
||||
*/
|
||||
export_rdev(rdev);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (test_bit(In_sync, &rdev->flags))
|
||||
rdev->saved_raid_disk = rdev->raid_disk;
|
||||
else
|
||||
|
|
|
@ -146,7 +146,7 @@ static void multipath_status (struct seq_file *seq, mddev_t *mddev)
|
|||
int i;
|
||||
|
||||
seq_printf (seq, " [%d/%d] [", conf->raid_disks,
|
||||
conf->working_disks);
|
||||
conf->raid_disks - mddev->degraded);
|
||||
for (i = 0; i < conf->raid_disks; i++)
|
||||
seq_printf (seq, "%s",
|
||||
conf->multipaths[i].rdev &&
|
||||
|
@ -186,35 +186,36 @@ static int multipath_congested(void *data, int bits)
|
|||
static void multipath_error (mddev_t *mddev, mdk_rdev_t *rdev)
|
||||
{
|
||||
multipath_conf_t *conf = mddev->private;
|
||||
char b[BDEVNAME_SIZE];
|
||||
|
||||
if (conf->working_disks <= 1) {
|
||||
if (conf->raid_disks - mddev->degraded <= 1) {
|
||||
/*
|
||||
* Uh oh, we can do nothing if this is our last path, but
|
||||
* first check if this is a queued request for a device
|
||||
* which has just failed.
|
||||
*/
|
||||
printk(KERN_ALERT
|
||||
"multipath: only one IO path left and IO error.\n");
|
||||
"multipath: only one IO path left and IO error.\n");
|
||||
/* leave it active... it's all we have */
|
||||
} else {
|
||||
/*
|
||||
* Mark disk as unusable
|
||||
*/
|
||||
if (!test_bit(Faulty, &rdev->flags)) {
|
||||
char b[BDEVNAME_SIZE];
|
||||
clear_bit(In_sync, &rdev->flags);
|
||||
set_bit(Faulty, &rdev->flags);
|
||||
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
||||
conf->working_disks--;
|
||||
mddev->degraded++;
|
||||
printk(KERN_ALERT "multipath: IO failure on %s,"
|
||||
" disabling IO path.\n"
|
||||
"multipath: Operation continuing"
|
||||
" on %d IO paths.\n",
|
||||
bdevname (rdev->bdev,b),
|
||||
conf->working_disks);
|
||||
}
|
||||
return;
|
||||
}
|
||||
/*
|
||||
* Mark disk as unusable
|
||||
*/
|
||||
if (test_and_clear_bit(In_sync, &rdev->flags)) {
|
||||
unsigned long flags;
|
||||
spin_lock_irqsave(&conf->device_lock, flags);
|
||||
mddev->degraded++;
|
||||
spin_unlock_irqrestore(&conf->device_lock, flags);
|
||||
}
|
||||
set_bit(Faulty, &rdev->flags);
|
||||
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
||||
printk(KERN_ALERT "multipath: IO failure on %s,"
|
||||
" disabling IO path.\n"
|
||||
"multipath: Operation continuing"
|
||||
" on %d IO paths.\n",
|
||||
bdevname(rdev->bdev, b),
|
||||
conf->raid_disks - mddev->degraded);
|
||||
}
|
||||
|
||||
static void print_multipath_conf (multipath_conf_t *conf)
|
||||
|
@ -227,7 +228,7 @@ static void print_multipath_conf (multipath_conf_t *conf)
|
|||
printk("(conf==NULL)\n");
|
||||
return;
|
||||
}
|
||||
printk(" --- wd:%d rd:%d\n", conf->working_disks,
|
||||
printk(" --- wd:%d rd:%d\n", conf->raid_disks - conf->mddev->degraded,
|
||||
conf->raid_disks);
|
||||
|
||||
for (i = 0; i < conf->raid_disks; i++) {
|
||||
|
@ -274,10 +275,11 @@ static int multipath_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
|
|||
PAGE_CACHE_SIZE - 1);
|
||||
}
|
||||
|
||||
conf->working_disks++;
|
||||
spin_lock_irq(&conf->device_lock);
|
||||
mddev->degraded--;
|
||||
rdev->raid_disk = path;
|
||||
set_bit(In_sync, &rdev->flags);
|
||||
spin_unlock_irq(&conf->device_lock);
|
||||
rcu_assign_pointer(p->rdev, rdev);
|
||||
err = 0;
|
||||
md_integrity_add_rdev(rdev, mddev);
|
||||
|
@ -391,6 +393,7 @@ static int multipath_run (mddev_t *mddev)
|
|||
int disk_idx;
|
||||
struct multipath_info *disk;
|
||||
mdk_rdev_t *rdev;
|
||||
int working_disks;
|
||||
|
||||
if (md_check_no_bitmap(mddev))
|
||||
return -EINVAL;
|
||||
|
@ -424,7 +427,7 @@ static int multipath_run (mddev_t *mddev)
|
|||
goto out_free_conf;
|
||||
}
|
||||
|
||||
conf->working_disks = 0;
|
||||
working_disks = 0;
|
||||
list_for_each_entry(rdev, &mddev->disks, same_set) {
|
||||
disk_idx = rdev->raid_disk;
|
||||
if (disk_idx < 0 ||
|
||||
|
@ -446,7 +449,7 @@ static int multipath_run (mddev_t *mddev)
|
|||
}
|
||||
|
||||
if (!test_bit(Faulty, &rdev->flags))
|
||||
conf->working_disks++;
|
||||
working_disks++;
|
||||
}
|
||||
|
||||
conf->raid_disks = mddev->raid_disks;
|
||||
|
@ -454,12 +457,12 @@ static int multipath_run (mddev_t *mddev)
|
|||
spin_lock_init(&conf->device_lock);
|
||||
INIT_LIST_HEAD(&conf->retry_list);
|
||||
|
||||
if (!conf->working_disks) {
|
||||
if (!working_disks) {
|
||||
printk(KERN_ERR "multipath: no operational IO paths for %s\n",
|
||||
mdname(mddev));
|
||||
goto out_free_conf;
|
||||
}
|
||||
mddev->degraded = conf->raid_disks - conf->working_disks;
|
||||
mddev->degraded = conf->raid_disks - working_disks;
|
||||
|
||||
conf->pool = mempool_create_kmalloc_pool(NR_RESERVED_BUFS,
|
||||
sizeof(struct multipath_bh));
|
||||
|
@ -481,7 +484,8 @@ static int multipath_run (mddev_t *mddev)
|
|||
|
||||
printk(KERN_INFO
|
||||
"multipath: array %s active with %d out of %d IO paths\n",
|
||||
mdname(mddev), conf->working_disks, mddev->raid_disks);
|
||||
mdname(mddev), conf->raid_disks - mddev->degraded,
|
||||
mddev->raid_disks);
|
||||
/*
|
||||
* Ok, everything is just fine now
|
||||
*/
|
||||
|
|
|
@ -9,7 +9,6 @@ struct multipath_private_data {
|
|||
mddev_t *mddev;
|
||||
struct multipath_info *multipaths;
|
||||
int raid_disks;
|
||||
int working_disks;
|
||||
spinlock_t device_lock;
|
||||
struct list_head retry_list;
|
||||
|
||||
|
|
|
@ -297,23 +297,24 @@ static void raid1_end_read_request(struct bio *bio, int error)
|
|||
rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev);
|
||||
}
|
||||
|
||||
static void r1_bio_write_done(r1bio_t *r1_bio, int vcnt, struct bio_vec *bv,
|
||||
int behind)
|
||||
static void r1_bio_write_done(r1bio_t *r1_bio)
|
||||
{
|
||||
if (atomic_dec_and_test(&r1_bio->remaining))
|
||||
{
|
||||
/* it really is the end of this request */
|
||||
if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
|
||||
/* free extra copy of the data pages */
|
||||
int i = vcnt;
|
||||
int i = r1_bio->behind_page_count;
|
||||
while (i--)
|
||||
safe_put_page(bv[i].bv_page);
|
||||
safe_put_page(r1_bio->behind_pages[i]);
|
||||
kfree(r1_bio->behind_pages);
|
||||
r1_bio->behind_pages = NULL;
|
||||
}
|
||||
/* clear the bitmap if all writes complete successfully */
|
||||
bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector,
|
||||
r1_bio->sectors,
|
||||
!test_bit(R1BIO_Degraded, &r1_bio->state),
|
||||
behind);
|
||||
test_bit(R1BIO_BehindIO, &r1_bio->state));
|
||||
md_write_end(r1_bio->mddev);
|
||||
raid_end_bio_io(r1_bio);
|
||||
}
|
||||
|
@ -386,7 +387,7 @@ static void raid1_end_write_request(struct bio *bio, int error)
|
|||
* Let's see if all mirrored write operations have finished
|
||||
* already.
|
||||
*/
|
||||
r1_bio_write_done(r1_bio, bio->bi_vcnt, bio->bi_io_vec, behind);
|
||||
r1_bio_write_done(r1_bio);
|
||||
|
||||
if (to_put)
|
||||
bio_put(to_put);
|
||||
|
@ -411,10 +412,10 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
|
|||
{
|
||||
const sector_t this_sector = r1_bio->sector;
|
||||
const int sectors = r1_bio->sectors;
|
||||
int new_disk = -1;
|
||||
int start_disk;
|
||||
int best_disk;
|
||||
int i;
|
||||
sector_t new_distance, current_distance;
|
||||
sector_t best_dist;
|
||||
mdk_rdev_t *rdev;
|
||||
int choose_first;
|
||||
|
||||
|
@ -425,6 +426,8 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
|
|||
* We take the first readable disk when above the resync window.
|
||||
*/
|
||||
retry:
|
||||
best_disk = -1;
|
||||
best_dist = MaxSector;
|
||||
if (conf->mddev->recovery_cp < MaxSector &&
|
||||
(this_sector + sectors >= conf->next_resync)) {
|
||||
choose_first = 1;
|
||||
|
@ -434,8 +437,8 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
|
|||
start_disk = conf->last_used;
|
||||
}
|
||||
|
||||
/* make sure the disk is operational */
|
||||
for (i = 0 ; i < conf->raid_disks ; i++) {
|
||||
sector_t dist;
|
||||
int disk = start_disk + i;
|
||||
if (disk >= conf->raid_disks)
|
||||
disk -= conf->raid_disks;
|
||||
|
@ -443,60 +446,43 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
|
|||
rdev = rcu_dereference(conf->mirrors[disk].rdev);
|
||||
if (r1_bio->bios[disk] == IO_BLOCKED
|
||||
|| rdev == NULL
|
||||
|| !test_bit(In_sync, &rdev->flags))
|
||||
|| test_bit(Faulty, &rdev->flags))
|
||||
continue;
|
||||
|
||||
new_disk = disk;
|
||||
if (!test_bit(WriteMostly, &rdev->flags))
|
||||
break;
|
||||
}
|
||||
|
||||
if (new_disk < 0 || choose_first)
|
||||
goto rb_out;
|
||||
|
||||
/*
|
||||
* Don't change to another disk for sequential reads:
|
||||
*/
|
||||
if (conf->next_seq_sect == this_sector)
|
||||
goto rb_out;
|
||||
if (this_sector == conf->mirrors[new_disk].head_position)
|
||||
goto rb_out;
|
||||
|
||||
current_distance = abs(this_sector
|
||||
- conf->mirrors[new_disk].head_position);
|
||||
|
||||
/* look for a better disk - i.e. head is closer */
|
||||
start_disk = new_disk;
|
||||
for (i = 1; i < conf->raid_disks; i++) {
|
||||
int disk = start_disk + 1;
|
||||
if (disk >= conf->raid_disks)
|
||||
disk -= conf->raid_disks;
|
||||
|
||||
rdev = rcu_dereference(conf->mirrors[disk].rdev);
|
||||
if (r1_bio->bios[disk] == IO_BLOCKED
|
||||
|| rdev == NULL
|
||||
|| !test_bit(In_sync, &rdev->flags)
|
||||
|| test_bit(WriteMostly, &rdev->flags))
|
||||
if (!test_bit(In_sync, &rdev->flags) &&
|
||||
rdev->recovery_offset < this_sector + sectors)
|
||||
continue;
|
||||
|
||||
if (!atomic_read(&rdev->nr_pending)) {
|
||||
new_disk = disk;
|
||||
if (test_bit(WriteMostly, &rdev->flags)) {
|
||||
/* Don't balance among write-mostly, just
|
||||
* use the first as a last resort */
|
||||
if (best_disk < 0)
|
||||
best_disk = disk;
|
||||
continue;
|
||||
}
|
||||
/* This is a reasonable device to use. It might
|
||||
* even be best.
|
||||
*/
|
||||
dist = abs(this_sector - conf->mirrors[disk].head_position);
|
||||
if (choose_first
|
||||
/* Don't change to another disk for sequential reads */
|
||||
|| conf->next_seq_sect == this_sector
|
||||
|| dist == 0
|
||||
/* If device is idle, use it */
|
||||
|| atomic_read(&rdev->nr_pending) == 0) {
|
||||
best_disk = disk;
|
||||
break;
|
||||
}
|
||||
new_distance = abs(this_sector - conf->mirrors[disk].head_position);
|
||||
if (new_distance < current_distance) {
|
||||
current_distance = new_distance;
|
||||
new_disk = disk;
|
||||
if (dist < best_dist) {
|
||||
best_dist = dist;
|
||||
best_disk = disk;
|
||||
}
|
||||
}
|
||||
|
||||
rb_out:
|
||||
if (new_disk >= 0) {
|
||||
rdev = rcu_dereference(conf->mirrors[new_disk].rdev);
|
||||
if (best_disk >= 0) {
|
||||
rdev = rcu_dereference(conf->mirrors[best_disk].rdev);
|
||||
if (!rdev)
|
||||
goto retry;
|
||||
atomic_inc(&rdev->nr_pending);
|
||||
if (!test_bit(In_sync, &rdev->flags)) {
|
||||
if (test_bit(Faulty, &rdev->flags)) {
|
||||
/* cannot risk returning a device that failed
|
||||
* before we inc'ed nr_pending
|
||||
*/
|
||||
|
@ -504,11 +490,11 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
|
|||
goto retry;
|
||||
}
|
||||
conf->next_seq_sect = this_sector + sectors;
|
||||
conf->last_used = new_disk;
|
||||
conf->last_used = best_disk;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
return new_disk;
|
||||
return best_disk;
|
||||
}
|
||||
|
||||
static int raid1_congested(void *data, int bits)
|
||||
|
@ -675,37 +661,36 @@ static void unfreeze_array(conf_t *conf)
|
|||
|
||||
|
||||
/* duplicate the data pages for behind I/O
|
||||
* We return a list of bio_vec rather than just page pointers
|
||||
* as it makes freeing easier
|
||||
*/
|
||||
static struct bio_vec *alloc_behind_pages(struct bio *bio)
|
||||
static void alloc_behind_pages(struct bio *bio, r1bio_t *r1_bio)
|
||||
{
|
||||
int i;
|
||||
struct bio_vec *bvec;
|
||||
struct bio_vec *pages = kzalloc(bio->bi_vcnt * sizeof(struct bio_vec),
|
||||
struct page **pages = kzalloc(bio->bi_vcnt * sizeof(struct page*),
|
||||
GFP_NOIO);
|
||||
if (unlikely(!pages))
|
||||
goto do_sync_io;
|
||||
return;
|
||||
|
||||
bio_for_each_segment(bvec, bio, i) {
|
||||
pages[i].bv_page = alloc_page(GFP_NOIO);
|
||||
if (unlikely(!pages[i].bv_page))
|
||||
pages[i] = alloc_page(GFP_NOIO);
|
||||
if (unlikely(!pages[i]))
|
||||
goto do_sync_io;
|
||||
memcpy(kmap(pages[i].bv_page) + bvec->bv_offset,
|
||||
memcpy(kmap(pages[i]) + bvec->bv_offset,
|
||||
kmap(bvec->bv_page) + bvec->bv_offset, bvec->bv_len);
|
||||
kunmap(pages[i].bv_page);
|
||||
kunmap(pages[i]);
|
||||
kunmap(bvec->bv_page);
|
||||
}
|
||||
|
||||
return pages;
|
||||
r1_bio->behind_pages = pages;
|
||||
r1_bio->behind_page_count = bio->bi_vcnt;
|
||||
set_bit(R1BIO_BehindIO, &r1_bio->state);
|
||||
return;
|
||||
|
||||
do_sync_io:
|
||||
if (pages)
|
||||
for (i = 0; i < bio->bi_vcnt && pages[i].bv_page; i++)
|
||||
put_page(pages[i].bv_page);
|
||||
for (i = 0; i < bio->bi_vcnt; i++)
|
||||
if (pages[i])
|
||||
put_page(pages[i]);
|
||||
kfree(pages);
|
||||
PRINTK("%dB behind alloc failed, doing sync I/O\n", bio->bi_size);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int make_request(mddev_t *mddev, struct bio * bio)
|
||||
|
@ -717,7 +702,6 @@ static int make_request(mddev_t *mddev, struct bio * bio)
|
|||
int i, targets = 0, disks;
|
||||
struct bitmap *bitmap;
|
||||
unsigned long flags;
|
||||
struct bio_vec *behind_pages = NULL;
|
||||
const int rw = bio_data_dir(bio);
|
||||
const unsigned long do_sync = (bio->bi_rw & REQ_SYNC);
|
||||
const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA));
|
||||
|
@ -870,9 +854,8 @@ static int make_request(mddev_t *mddev, struct bio * bio)
|
|||
if (bitmap &&
|
||||
(atomic_read(&bitmap->behind_writes)
|
||||
< mddev->bitmap_info.max_write_behind) &&
|
||||
!waitqueue_active(&bitmap->behind_wait) &&
|
||||
(behind_pages = alloc_behind_pages(bio)) != NULL)
|
||||
set_bit(R1BIO_BehindIO, &r1_bio->state);
|
||||
!waitqueue_active(&bitmap->behind_wait))
|
||||
alloc_behind_pages(bio, r1_bio);
|
||||
|
||||
atomic_set(&r1_bio->remaining, 1);
|
||||
atomic_set(&r1_bio->behind_remaining, 0);
|
||||
|
@ -893,7 +876,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
|
|||
mbio->bi_rw = WRITE | do_flush_fua | do_sync;
|
||||
mbio->bi_private = r1_bio;
|
||||
|
||||
if (behind_pages) {
|
||||
if (r1_bio->behind_pages) {
|
||||
struct bio_vec *bvec;
|
||||
int j;
|
||||
|
||||
|
@ -905,7 +888,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
|
|||
* them all
|
||||
*/
|
||||
__bio_for_each_segment(bvec, mbio, j, 0)
|
||||
bvec->bv_page = behind_pages[j].bv_page;
|
||||
bvec->bv_page = r1_bio->behind_pages[j];
|
||||
if (test_bit(WriteMostly, &conf->mirrors[i].rdev->flags))
|
||||
atomic_inc(&r1_bio->behind_remaining);
|
||||
}
|
||||
|
@ -915,8 +898,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
|
|||
bio_list_add(&conf->pending_bio_list, mbio);
|
||||
spin_unlock_irqrestore(&conf->device_lock, flags);
|
||||
}
|
||||
r1_bio_write_done(r1_bio, bio->bi_vcnt, behind_pages, behind_pages != NULL);
|
||||
kfree(behind_pages); /* the behind pages are attached to the bios now */
|
||||
r1_bio_write_done(r1_bio);
|
||||
|
||||
/* In case raid1d snuck in to freeze_array */
|
||||
wake_up(&conf->wait_barrier);
|
||||
|
@ -1196,6 +1178,193 @@ static void end_sync_write(struct bio *bio, int error)
|
|||
}
|
||||
}
|
||||
|
||||
static int fix_sync_read_error(r1bio_t *r1_bio)
|
||||
{
|
||||
/* Try some synchronous reads of other devices to get
|
||||
* good data, much like with normal read errors. Only
|
||||
* read into the pages we already have so we don't
|
||||
* need to re-issue the read request.
|
||||
* We don't need to freeze the array, because being in an
|
||||
* active sync request, there is no normal IO, and
|
||||
* no overlapping syncs.
|
||||
*/
|
||||
mddev_t *mddev = r1_bio->mddev;
|
||||
conf_t *conf = mddev->private;
|
||||
struct bio *bio = r1_bio->bios[r1_bio->read_disk];
|
||||
sector_t sect = r1_bio->sector;
|
||||
int sectors = r1_bio->sectors;
|
||||
int idx = 0;
|
||||
|
||||
while(sectors) {
|
||||
int s = sectors;
|
||||
int d = r1_bio->read_disk;
|
||||
int success = 0;
|
||||
mdk_rdev_t *rdev;
|
||||
int start;
|
||||
|
||||
if (s > (PAGE_SIZE>>9))
|
||||
s = PAGE_SIZE >> 9;
|
||||
do {
|
||||
if (r1_bio->bios[d]->bi_end_io == end_sync_read) {
|
||||
/* No rcu protection needed here devices
|
||||
* can only be removed when no resync is
|
||||
* active, and resync is currently active
|
||||
*/
|
||||
rdev = conf->mirrors[d].rdev;
|
||||
if (sync_page_io(rdev,
|
||||
sect,
|
||||
s<<9,
|
||||
bio->bi_io_vec[idx].bv_page,
|
||||
READ, false)) {
|
||||
success = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
d++;
|
||||
if (d == conf->raid_disks)
|
||||
d = 0;
|
||||
} while (!success && d != r1_bio->read_disk);
|
||||
|
||||
if (!success) {
|
||||
char b[BDEVNAME_SIZE];
|
||||
/* Cannot read from anywhere, array is toast */
|
||||
md_error(mddev, conf->mirrors[r1_bio->read_disk].rdev);
|
||||
printk(KERN_ALERT "md/raid1:%s: %s: unrecoverable I/O read error"
|
||||
" for block %llu\n",
|
||||
mdname(mddev),
|
||||
bdevname(bio->bi_bdev, b),
|
||||
(unsigned long long)r1_bio->sector);
|
||||
md_done_sync(mddev, r1_bio->sectors, 0);
|
||||
put_buf(r1_bio);
|
||||
return 0;
|
||||
}
|
||||
|
||||
start = d;
|
||||
/* write it back and re-read */
|
||||
while (d != r1_bio->read_disk) {
|
||||
if (d == 0)
|
||||
d = conf->raid_disks;
|
||||
d--;
|
||||
if (r1_bio->bios[d]->bi_end_io != end_sync_read)
|
||||
continue;
|
||||
rdev = conf->mirrors[d].rdev;
|
||||
if (sync_page_io(rdev,
|
||||
sect,
|
||||
s<<9,
|
||||
bio->bi_io_vec[idx].bv_page,
|
||||
WRITE, false) == 0) {
|
||||
r1_bio->bios[d]->bi_end_io = NULL;
|
||||
rdev_dec_pending(rdev, mddev);
|
||||
md_error(mddev, rdev);
|
||||
} else
|
||||
atomic_add(s, &rdev->corrected_errors);
|
||||
}
|
||||
d = start;
|
||||
while (d != r1_bio->read_disk) {
|
||||
if (d == 0)
|
||||
d = conf->raid_disks;
|
||||
d--;
|
||||
if (r1_bio->bios[d]->bi_end_io != end_sync_read)
|
||||
continue;
|
||||
rdev = conf->mirrors[d].rdev;
|
||||
if (sync_page_io(rdev,
|
||||
sect,
|
||||
s<<9,
|
||||
bio->bi_io_vec[idx].bv_page,
|
||||
READ, false) == 0)
|
||||
md_error(mddev, rdev);
|
||||
}
|
||||
sectors -= s;
|
||||
sect += s;
|
||||
idx ++;
|
||||
}
|
||||
set_bit(R1BIO_Uptodate, &r1_bio->state);
|
||||
set_bit(BIO_UPTODATE, &bio->bi_flags);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int process_checks(r1bio_t *r1_bio)
|
||||
{
|
||||
/* We have read all readable devices. If we haven't
|
||||
* got the block, then there is no hope left.
|
||||
* If we have, then we want to do a comparison
|
||||
* and skip the write if everything is the same.
|
||||
* If any blocks failed to read, then we need to
|
||||
* attempt an over-write
|
||||
*/
|
||||
mddev_t *mddev = r1_bio->mddev;
|
||||
conf_t *conf = mddev->private;
|
||||
int primary;
|
||||
int i;
|
||||
|
||||
for (primary = 0; primary < conf->raid_disks; primary++)
|
||||
if (r1_bio->bios[primary]->bi_end_io == end_sync_read &&
|
||||
test_bit(BIO_UPTODATE, &r1_bio->bios[primary]->bi_flags)) {
|
||||
r1_bio->bios[primary]->bi_end_io = NULL;
|
||||
rdev_dec_pending(conf->mirrors[primary].rdev, mddev);
|
||||
break;
|
||||
}
|
||||
r1_bio->read_disk = primary;
|
||||
for (i = 0; i < conf->raid_disks; i++) {
|
||||
int j;
|
||||
int vcnt = r1_bio->sectors >> (PAGE_SHIFT- 9);
|
||||
struct bio *pbio = r1_bio->bios[primary];
|
||||
struct bio *sbio = r1_bio->bios[i];
|
||||
int size;
|
||||
|
||||
if (r1_bio->bios[i]->bi_end_io != end_sync_read)
|
||||
continue;
|
||||
|
||||
if (test_bit(BIO_UPTODATE, &sbio->bi_flags)) {
|
||||
for (j = vcnt; j-- ; ) {
|
||||
struct page *p, *s;
|
||||
p = pbio->bi_io_vec[j].bv_page;
|
||||
s = sbio->bi_io_vec[j].bv_page;
|
||||
if (memcmp(page_address(p),
|
||||
page_address(s),
|
||||
PAGE_SIZE))
|
||||
break;
|
||||
}
|
||||
} else
|
||||
j = 0;
|
||||
if (j >= 0)
|
||||
mddev->resync_mismatches += r1_bio->sectors;
|
||||
if (j < 0 || (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)
|
||||
&& test_bit(BIO_UPTODATE, &sbio->bi_flags))) {
|
||||
/* No need to write to this device. */
|
||||
sbio->bi_end_io = NULL;
|
||||
rdev_dec_pending(conf->mirrors[i].rdev, mddev);
|
||||
continue;
|
||||
}
|
||||
/* fixup the bio for reuse */
|
||||
sbio->bi_vcnt = vcnt;
|
||||
sbio->bi_size = r1_bio->sectors << 9;
|
||||
sbio->bi_idx = 0;
|
||||
sbio->bi_phys_segments = 0;
|
||||
sbio->bi_flags &= ~(BIO_POOL_MASK - 1);
|
||||
sbio->bi_flags |= 1 << BIO_UPTODATE;
|
||||
sbio->bi_next = NULL;
|
||||
sbio->bi_sector = r1_bio->sector +
|
||||
conf->mirrors[i].rdev->data_offset;
|
||||
sbio->bi_bdev = conf->mirrors[i].rdev->bdev;
|
||||
size = sbio->bi_size;
|
||||
for (j = 0; j < vcnt ; j++) {
|
||||
struct bio_vec *bi;
|
||||
bi = &sbio->bi_io_vec[j];
|
||||
bi->bv_offset = 0;
|
||||
if (size > PAGE_SIZE)
|
||||
bi->bv_len = PAGE_SIZE;
|
||||
else
|
||||
bi->bv_len = size;
|
||||
size -= PAGE_SIZE;
|
||||
memcpy(page_address(bi->bv_page),
|
||||
page_address(pbio->bi_io_vec[j].bv_page),
|
||||
PAGE_SIZE);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
|
||||
{
|
||||
conf_t *conf = mddev->private;
|
||||
|
@ -1205,185 +1374,14 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
|
|||
|
||||
bio = r1_bio->bios[r1_bio->read_disk];
|
||||
|
||||
|
||||
if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
|
||||
/* We have read all readable devices. If we haven't
|
||||
* got the block, then there is no hope left.
|
||||
* If we have, then we want to do a comparison
|
||||
* and skip the write if everything is the same.
|
||||
* If any blocks failed to read, then we need to
|
||||
* attempt an over-write
|
||||
*/
|
||||
int primary;
|
||||
if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) {
|
||||
for (i=0; i<mddev->raid_disks; i++)
|
||||
if (r1_bio->bios[i]->bi_end_io == end_sync_read)
|
||||
md_error(mddev, conf->mirrors[i].rdev);
|
||||
|
||||
md_done_sync(mddev, r1_bio->sectors, 1);
|
||||
put_buf(r1_bio);
|
||||
if (!test_bit(R1BIO_Uptodate, &r1_bio->state))
|
||||
/* ouch - failed to read all of that. */
|
||||
if (!fix_sync_read_error(r1_bio))
|
||||
return;
|
||||
}
|
||||
for (primary=0; primary<mddev->raid_disks; primary++)
|
||||
if (r1_bio->bios[primary]->bi_end_io == end_sync_read &&
|
||||
test_bit(BIO_UPTODATE, &r1_bio->bios[primary]->bi_flags)) {
|
||||
r1_bio->bios[primary]->bi_end_io = NULL;
|
||||
rdev_dec_pending(conf->mirrors[primary].rdev, mddev);
|
||||
break;
|
||||
}
|
||||
r1_bio->read_disk = primary;
|
||||
for (i=0; i<mddev->raid_disks; i++)
|
||||
if (r1_bio->bios[i]->bi_end_io == end_sync_read) {
|
||||
int j;
|
||||
int vcnt = r1_bio->sectors >> (PAGE_SHIFT- 9);
|
||||
struct bio *pbio = r1_bio->bios[primary];
|
||||
struct bio *sbio = r1_bio->bios[i];
|
||||
|
||||
if (test_bit(BIO_UPTODATE, &sbio->bi_flags)) {
|
||||
for (j = vcnt; j-- ; ) {
|
||||
struct page *p, *s;
|
||||
p = pbio->bi_io_vec[j].bv_page;
|
||||
s = sbio->bi_io_vec[j].bv_page;
|
||||
if (memcmp(page_address(p),
|
||||
page_address(s),
|
||||
PAGE_SIZE))
|
||||
break;
|
||||
}
|
||||
} else
|
||||
j = 0;
|
||||
if (j >= 0)
|
||||
mddev->resync_mismatches += r1_bio->sectors;
|
||||
if (j < 0 || (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)
|
||||
&& test_bit(BIO_UPTODATE, &sbio->bi_flags))) {
|
||||
sbio->bi_end_io = NULL;
|
||||
rdev_dec_pending(conf->mirrors[i].rdev, mddev);
|
||||
} else {
|
||||
/* fixup the bio for reuse */
|
||||
int size;
|
||||
sbio->bi_vcnt = vcnt;
|
||||
sbio->bi_size = r1_bio->sectors << 9;
|
||||
sbio->bi_idx = 0;
|
||||
sbio->bi_phys_segments = 0;
|
||||
sbio->bi_flags &= ~(BIO_POOL_MASK - 1);
|
||||
sbio->bi_flags |= 1 << BIO_UPTODATE;
|
||||
sbio->bi_next = NULL;
|
||||
sbio->bi_sector = r1_bio->sector +
|
||||
conf->mirrors[i].rdev->data_offset;
|
||||
sbio->bi_bdev = conf->mirrors[i].rdev->bdev;
|
||||
size = sbio->bi_size;
|
||||
for (j = 0; j < vcnt ; j++) {
|
||||
struct bio_vec *bi;
|
||||
bi = &sbio->bi_io_vec[j];
|
||||
bi->bv_offset = 0;
|
||||
if (size > PAGE_SIZE)
|
||||
bi->bv_len = PAGE_SIZE;
|
||||
else
|
||||
bi->bv_len = size;
|
||||
size -= PAGE_SIZE;
|
||||
memcpy(page_address(bi->bv_page),
|
||||
page_address(pbio->bi_io_vec[j].bv_page),
|
||||
PAGE_SIZE);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) {
|
||||
/* ouch - failed to read all of that.
|
||||
* Try some synchronous reads of other devices to get
|
||||
* good data, much like with normal read errors. Only
|
||||
* read into the pages we already have so we don't
|
||||
* need to re-issue the read request.
|
||||
* We don't need to freeze the array, because being in an
|
||||
* active sync request, there is no normal IO, and
|
||||
* no overlapping syncs.
|
||||
*/
|
||||
sector_t sect = r1_bio->sector;
|
||||
int sectors = r1_bio->sectors;
|
||||
int idx = 0;
|
||||
|
||||
while(sectors) {
|
||||
int s = sectors;
|
||||
int d = r1_bio->read_disk;
|
||||
int success = 0;
|
||||
mdk_rdev_t *rdev;
|
||||
|
||||
if (s > (PAGE_SIZE>>9))
|
||||
s = PAGE_SIZE >> 9;
|
||||
do {
|
||||
if (r1_bio->bios[d]->bi_end_io == end_sync_read) {
|
||||
/* No rcu protection needed here devices
|
||||
* can only be removed when no resync is
|
||||
* active, and resync is currently active
|
||||
*/
|
||||
rdev = conf->mirrors[d].rdev;
|
||||
if (sync_page_io(rdev,
|
||||
sect,
|
||||
s<<9,
|
||||
bio->bi_io_vec[idx].bv_page,
|
||||
READ, false)) {
|
||||
success = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
d++;
|
||||
if (d == conf->raid_disks)
|
||||
d = 0;
|
||||
} while (!success && d != r1_bio->read_disk);
|
||||
|
||||
if (success) {
|
||||
int start = d;
|
||||
/* write it back and re-read */
|
||||
set_bit(R1BIO_Uptodate, &r1_bio->state);
|
||||
while (d != r1_bio->read_disk) {
|
||||
if (d == 0)
|
||||
d = conf->raid_disks;
|
||||
d--;
|
||||
if (r1_bio->bios[d]->bi_end_io != end_sync_read)
|
||||
continue;
|
||||
rdev = conf->mirrors[d].rdev;
|
||||
atomic_add(s, &rdev->corrected_errors);
|
||||
if (sync_page_io(rdev,
|
||||
sect,
|
||||
s<<9,
|
||||
bio->bi_io_vec[idx].bv_page,
|
||||
WRITE, false) == 0)
|
||||
md_error(mddev, rdev);
|
||||
}
|
||||
d = start;
|
||||
while (d != r1_bio->read_disk) {
|
||||
if (d == 0)
|
||||
d = conf->raid_disks;
|
||||
d--;
|
||||
if (r1_bio->bios[d]->bi_end_io != end_sync_read)
|
||||
continue;
|
||||
rdev = conf->mirrors[d].rdev;
|
||||
if (sync_page_io(rdev,
|
||||
sect,
|
||||
s<<9,
|
||||
bio->bi_io_vec[idx].bv_page,
|
||||
READ, false) == 0)
|
||||
md_error(mddev, rdev);
|
||||
}
|
||||
} else {
|
||||
char b[BDEVNAME_SIZE];
|
||||
/* Cannot read from anywhere, array is toast */
|
||||
md_error(mddev, conf->mirrors[r1_bio->read_disk].rdev);
|
||||
printk(KERN_ALERT "md/raid1:%s: %s: unrecoverable I/O read error"
|
||||
" for block %llu\n",
|
||||
mdname(mddev),
|
||||
bdevname(bio->bi_bdev, b),
|
||||
(unsigned long long)r1_bio->sector);
|
||||
md_done_sync(mddev, r1_bio->sectors, 0);
|
||||
put_buf(r1_bio);
|
||||
return;
|
||||
}
|
||||
sectors -= s;
|
||||
sect += s;
|
||||
idx ++;
|
||||
}
|
||||
}
|
||||
|
||||
if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
|
||||
if (process_checks(r1_bio) < 0)
|
||||
return;
|
||||
/*
|
||||
* schedule writes
|
||||
*/
|
||||
|
@ -2063,7 +2061,7 @@ static int raid1_resize(mddev_t *mddev, sector_t sectors)
|
|||
set_capacity(mddev->gendisk, mddev->array_sectors);
|
||||
revalidate_disk(mddev->gendisk);
|
||||
if (sectors > mddev->dev_sectors &&
|
||||
mddev->recovery_cp == MaxSector) {
|
||||
mddev->recovery_cp > mddev->dev_sectors) {
|
||||
mddev->recovery_cp = mddev->dev_sectors;
|
||||
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
||||
}
|
||||
|
|
|
@ -94,7 +94,9 @@ struct r1bio_s {
|
|||
int read_disk;
|
||||
|
||||
struct list_head retry_list;
|
||||
struct bitmap_update *bitmap_update;
|
||||
/* Next two are only valid when R1BIO_BehindIO is set */
|
||||
struct page **behind_pages;
|
||||
int behind_page_count;
|
||||
/*
|
||||
* if the IO is in WRITE direction, then multiple bios are used.
|
||||
* We choose the number when they are allocated.
|
||||
|
|
|
@ -271,9 +271,10 @@ static void raid10_end_read_request(struct bio *bio, int error)
|
|||
*/
|
||||
set_bit(R10BIO_Uptodate, &r10_bio->state);
|
||||
raid_end_bio_io(r10_bio);
|
||||
rdev_dec_pending(conf->mirrors[dev].rdev, conf->mddev);
|
||||
} else {
|
||||
/*
|
||||
* oops, read error:
|
||||
* oops, read error - keep the refcount on the rdev
|
||||
*/
|
||||
char b[BDEVNAME_SIZE];
|
||||
if (printk_ratelimit())
|
||||
|
@ -282,8 +283,6 @@ static void raid10_end_read_request(struct bio *bio, int error)
|
|||
bdevname(conf->mirrors[dev].rdev->bdev,b), (unsigned long long)r10_bio->sector);
|
||||
reschedule_retry(r10_bio);
|
||||
}
|
||||
|
||||
rdev_dec_pending(conf->mirrors[dev].rdev, conf->mddev);
|
||||
}
|
||||
|
||||
static void raid10_end_write_request(struct bio *bio, int error)
|
||||
|
@ -488,13 +487,19 @@ static int raid10_mergeable_bvec(struct request_queue *q,
|
|||
static int read_balance(conf_t *conf, r10bio_t *r10_bio)
|
||||
{
|
||||
const sector_t this_sector = r10_bio->sector;
|
||||
int disk, slot, nslot;
|
||||
int disk, slot;
|
||||
const int sectors = r10_bio->sectors;
|
||||
sector_t new_distance, current_distance;
|
||||
sector_t new_distance, best_dist;
|
||||
mdk_rdev_t *rdev;
|
||||
int do_balance;
|
||||
int best_slot;
|
||||
|
||||
raid10_find_phys(conf, r10_bio);
|
||||
rcu_read_lock();
|
||||
retry:
|
||||
best_slot = -1;
|
||||
best_dist = MaxSector;
|
||||
do_balance = 1;
|
||||
/*
|
||||
* Check if we can balance. We can balance on the whole
|
||||
* device if no resync is going on (recovery is ok), or below
|
||||
|
@ -502,86 +507,58 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio)
|
|||
* above the resync window.
|
||||
*/
|
||||
if (conf->mddev->recovery_cp < MaxSector
|
||||
&& (this_sector + sectors >= conf->next_resync)) {
|
||||
/* make sure that disk is operational */
|
||||
slot = 0;
|
||||
disk = r10_bio->devs[slot].devnum;
|
||||
&& (this_sector + sectors >= conf->next_resync))
|
||||
do_balance = 0;
|
||||
|
||||
while ((rdev = rcu_dereference(conf->mirrors[disk].rdev)) == NULL ||
|
||||
r10_bio->devs[slot].bio == IO_BLOCKED ||
|
||||
!test_bit(In_sync, &rdev->flags)) {
|
||||
slot++;
|
||||
if (slot == conf->copies) {
|
||||
slot = 0;
|
||||
disk = -1;
|
||||
break;
|
||||
}
|
||||
disk = r10_bio->devs[slot].devnum;
|
||||
}
|
||||
goto rb_out;
|
||||
}
|
||||
|
||||
|
||||
/* make sure the disk is operational */
|
||||
slot = 0;
|
||||
disk = r10_bio->devs[slot].devnum;
|
||||
while ((rdev=rcu_dereference(conf->mirrors[disk].rdev)) == NULL ||
|
||||
r10_bio->devs[slot].bio == IO_BLOCKED ||
|
||||
!test_bit(In_sync, &rdev->flags)) {
|
||||
slot ++;
|
||||
if (slot == conf->copies) {
|
||||
disk = -1;
|
||||
goto rb_out;
|
||||
}
|
||||
disk = r10_bio->devs[slot].devnum;
|
||||
}
|
||||
|
||||
|
||||
current_distance = abs(r10_bio->devs[slot].addr -
|
||||
conf->mirrors[disk].head_position);
|
||||
|
||||
/* Find the disk whose head is closest,
|
||||
* or - for far > 1 - find the closest to partition beginning */
|
||||
|
||||
for (nslot = slot; nslot < conf->copies; nslot++) {
|
||||
int ndisk = r10_bio->devs[nslot].devnum;
|
||||
|
||||
|
||||
if ((rdev=rcu_dereference(conf->mirrors[ndisk].rdev)) == NULL ||
|
||||
r10_bio->devs[nslot].bio == IO_BLOCKED ||
|
||||
!test_bit(In_sync, &rdev->flags))
|
||||
for (slot = 0; slot < conf->copies ; slot++) {
|
||||
if (r10_bio->devs[slot].bio == IO_BLOCKED)
|
||||
continue;
|
||||
disk = r10_bio->devs[slot].devnum;
|
||||
rdev = rcu_dereference(conf->mirrors[disk].rdev);
|
||||
if (rdev == NULL)
|
||||
continue;
|
||||
if (!test_bit(In_sync, &rdev->flags))
|
||||
continue;
|
||||
|
||||
if (!do_balance)
|
||||
break;
|
||||
|
||||
/* This optimisation is debatable, and completely destroys
|
||||
* sequential read speed for 'far copies' arrays. So only
|
||||
* keep it for 'near' arrays, and review those later.
|
||||
*/
|
||||
if (conf->near_copies > 1 && !atomic_read(&rdev->nr_pending)) {
|
||||
disk = ndisk;
|
||||
slot = nslot;
|
||||
if (conf->near_copies > 1 && !atomic_read(&rdev->nr_pending))
|
||||
break;
|
||||
}
|
||||
|
||||
/* for far > 1 always use the lowest address */
|
||||
if (conf->far_copies > 1)
|
||||
new_distance = r10_bio->devs[nslot].addr;
|
||||
new_distance = r10_bio->devs[slot].addr;
|
||||
else
|
||||
new_distance = abs(r10_bio->devs[nslot].addr -
|
||||
conf->mirrors[ndisk].head_position);
|
||||
if (new_distance < current_distance) {
|
||||
current_distance = new_distance;
|
||||
disk = ndisk;
|
||||
slot = nslot;
|
||||
new_distance = abs(r10_bio->devs[slot].addr -
|
||||
conf->mirrors[disk].head_position);
|
||||
if (new_distance < best_dist) {
|
||||
best_dist = new_distance;
|
||||
best_slot = slot;
|
||||
}
|
||||
}
|
||||
if (slot == conf->copies)
|
||||
slot = best_slot;
|
||||
|
||||
rb_out:
|
||||
r10_bio->read_slot = slot;
|
||||
/* conf->next_seq_sect = this_sector + sectors;*/
|
||||
|
||||
if (disk >= 0 && (rdev=rcu_dereference(conf->mirrors[disk].rdev))!= NULL)
|
||||
atomic_inc(&conf->mirrors[disk].rdev->nr_pending);
|
||||
else
|
||||
if (slot >= 0) {
|
||||
disk = r10_bio->devs[slot].devnum;
|
||||
rdev = rcu_dereference(conf->mirrors[disk].rdev);
|
||||
if (!rdev)
|
||||
goto retry;
|
||||
atomic_inc(&rdev->nr_pending);
|
||||
if (test_bit(Faulty, &rdev->flags)) {
|
||||
/* Cannot risk returning a device that failed
|
||||
* before we inc'ed nr_pending
|
||||
*/
|
||||
rdev_dec_pending(rdev, conf->mddev);
|
||||
goto retry;
|
||||
}
|
||||
r10_bio->read_slot = slot;
|
||||
} else
|
||||
disk = -1;
|
||||
rcu_read_unlock();
|
||||
|
||||
|
@ -1460,40 +1437,33 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
|
|||
int max_read_errors = atomic_read(&mddev->max_corr_read_errors);
|
||||
int d = r10_bio->devs[r10_bio->read_slot].devnum;
|
||||
|
||||
rcu_read_lock();
|
||||
rdev = rcu_dereference(conf->mirrors[d].rdev);
|
||||
if (rdev) { /* If rdev is not NULL */
|
||||
char b[BDEVNAME_SIZE];
|
||||
int cur_read_error_count = 0;
|
||||
/* still own a reference to this rdev, so it cannot
|
||||
* have been cleared recently.
|
||||
*/
|
||||
rdev = conf->mirrors[d].rdev;
|
||||
|
||||
if (test_bit(Faulty, &rdev->flags))
|
||||
/* drive has already been failed, just ignore any
|
||||
more fix_read_error() attempts */
|
||||
return;
|
||||
|
||||
check_decay_read_errors(mddev, rdev);
|
||||
atomic_inc(&rdev->read_errors);
|
||||
if (atomic_read(&rdev->read_errors) > max_read_errors) {
|
||||
char b[BDEVNAME_SIZE];
|
||||
bdevname(rdev->bdev, b);
|
||||
|
||||
if (test_bit(Faulty, &rdev->flags)) {
|
||||
rcu_read_unlock();
|
||||
/* drive has already been failed, just ignore any
|
||||
more fix_read_error() attempts */
|
||||
return;
|
||||
}
|
||||
|
||||
check_decay_read_errors(mddev, rdev);
|
||||
atomic_inc(&rdev->read_errors);
|
||||
cur_read_error_count = atomic_read(&rdev->read_errors);
|
||||
if (cur_read_error_count > max_read_errors) {
|
||||
rcu_read_unlock();
|
||||
printk(KERN_NOTICE
|
||||
"md/raid10:%s: %s: Raid device exceeded "
|
||||
"read_error threshold "
|
||||
"[cur %d:max %d]\n",
|
||||
mdname(mddev),
|
||||
b, cur_read_error_count, max_read_errors);
|
||||
printk(KERN_NOTICE
|
||||
"md/raid10:%s: %s: Failing raid "
|
||||
"device\n", mdname(mddev), b);
|
||||
md_error(mddev, conf->mirrors[d].rdev);
|
||||
return;
|
||||
}
|
||||
printk(KERN_NOTICE
|
||||
"md/raid10:%s: %s: Raid device exceeded "
|
||||
"read_error threshold [cur %d:max %d]\n",
|
||||
mdname(mddev), b,
|
||||
atomic_read(&rdev->read_errors), max_read_errors);
|
||||
printk(KERN_NOTICE
|
||||
"md/raid10:%s: %s: Failing raid device\n",
|
||||
mdname(mddev), b);
|
||||
md_error(mddev, conf->mirrors[d].rdev);
|
||||
return;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
while(sectors) {
|
||||
int s = sectors;
|
||||
|
@ -1562,8 +1532,8 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
|
|||
"write failed"
|
||||
" (%d sectors at %llu on %s)\n",
|
||||
mdname(mddev), s,
|
||||
(unsigned long long)(sect+
|
||||
rdev->data_offset),
|
||||
(unsigned long long)(
|
||||
sect + rdev->data_offset),
|
||||
bdevname(rdev->bdev, b));
|
||||
printk(KERN_NOTICE "md/raid10:%s: %s: failing "
|
||||
"drive\n",
|
||||
|
@ -1599,8 +1569,8 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
|
|||
"corrected sectors"
|
||||
" (%d sectors at %llu on %s)\n",
|
||||
mdname(mddev), s,
|
||||
(unsigned long long)(sect+
|
||||
rdev->data_offset),
|
||||
(unsigned long long)(
|
||||
sect + rdev->data_offset),
|
||||
bdevname(rdev->bdev, b));
|
||||
printk(KERN_NOTICE "md/raid10:%s: %s: failing drive\n",
|
||||
mdname(mddev),
|
||||
|
@ -1612,8 +1582,8 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
|
|||
"md/raid10:%s: read error corrected"
|
||||
" (%d sectors at %llu on %s)\n",
|
||||
mdname(mddev), s,
|
||||
(unsigned long long)(sect+
|
||||
rdev->data_offset),
|
||||
(unsigned long long)(
|
||||
sect + rdev->data_offset),
|
||||
bdevname(rdev->bdev, b));
|
||||
}
|
||||
|
||||
|
@ -1663,7 +1633,8 @@ static void raid10d(mddev_t *mddev)
|
|||
else if (test_bit(R10BIO_IsRecover, &r10_bio->state))
|
||||
recovery_request_write(mddev, r10_bio);
|
||||
else {
|
||||
int mirror;
|
||||
int slot = r10_bio->read_slot;
|
||||
int mirror = r10_bio->devs[slot].devnum;
|
||||
/* we got a read error. Maybe the drive is bad. Maybe just
|
||||
* the block and we can fix it.
|
||||
* We freeze all other IO, and try reading the block from
|
||||
|
@ -1677,9 +1648,10 @@ static void raid10d(mddev_t *mddev)
|
|||
fix_read_error(conf, mddev, r10_bio);
|
||||
unfreeze_array(conf);
|
||||
}
|
||||
rdev_dec_pending(conf->mirrors[mirror].rdev, mddev);
|
||||
|
||||
bio = r10_bio->devs[r10_bio->read_slot].bio;
|
||||
r10_bio->devs[r10_bio->read_slot].bio =
|
||||
bio = r10_bio->devs[slot].bio;
|
||||
r10_bio->devs[slot].bio =
|
||||
mddev->ro ? IO_BLOCKED : NULL;
|
||||
mirror = read_balance(conf, r10_bio);
|
||||
if (mirror == -1) {
|
||||
|
@ -1693,6 +1665,7 @@ static void raid10d(mddev_t *mddev)
|
|||
} else {
|
||||
const unsigned long do_sync = (r10_bio->master_bio->bi_rw & REQ_SYNC);
|
||||
bio_put(bio);
|
||||
slot = r10_bio->read_slot;
|
||||
rdev = conf->mirrors[mirror].rdev;
|
||||
if (printk_ratelimit())
|
||||
printk(KERN_ERR "md/raid10:%s: %s: redirecting sector %llu to"
|
||||
|
@ -1702,8 +1675,8 @@ static void raid10d(mddev_t *mddev)
|
|||
(unsigned long long)r10_bio->sector);
|
||||
bio = bio_clone_mddev(r10_bio->master_bio,
|
||||
GFP_NOIO, mddev);
|
||||
r10_bio->devs[r10_bio->read_slot].bio = bio;
|
||||
bio->bi_sector = r10_bio->devs[r10_bio->read_slot].addr
|
||||
r10_bio->devs[slot].bio = bio;
|
||||
bio->bi_sector = r10_bio->devs[slot].addr
|
||||
+ rdev->data_offset;
|
||||
bio->bi_bdev = rdev->bdev;
|
||||
bio->bi_rw = READ | do_sync;
|
||||
|
@ -1763,13 +1736,13 @@ static int init_resync(conf_t *conf)
|
|||
*
|
||||
*/
|
||||
|
||||
static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster)
|
||||
static sector_t sync_request(mddev_t *mddev, sector_t sector_nr,
|
||||
int *skipped, int go_faster)
|
||||
{
|
||||
conf_t *conf = mddev->private;
|
||||
r10bio_t *r10_bio;
|
||||
struct bio *biolist = NULL, *bio;
|
||||
sector_t max_sector, nr_sectors;
|
||||
int disk;
|
||||
int i;
|
||||
int max_sync;
|
||||
sector_t sync_blocks;
|
||||
|
@ -1858,108 +1831,114 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
|
|||
int j, k;
|
||||
r10_bio = NULL;
|
||||
|
||||
for (i=0 ; i<conf->raid_disks; i++)
|
||||
if (conf->mirrors[i].rdev &&
|
||||
!test_bit(In_sync, &conf->mirrors[i].rdev->flags)) {
|
||||
int still_degraded = 0;
|
||||
/* want to reconstruct this device */
|
||||
r10bio_t *rb2 = r10_bio;
|
||||
sector_t sect = raid10_find_virt(conf, sector_nr, i);
|
||||
int must_sync;
|
||||
/* Unless we are doing a full sync, we only need
|
||||
* to recover the block if it is set in the bitmap
|
||||
for (i=0 ; i<conf->raid_disks; i++) {
|
||||
int still_degraded;
|
||||
r10bio_t *rb2;
|
||||
sector_t sect;
|
||||
int must_sync;
|
||||
|
||||
if (conf->mirrors[i].rdev == NULL ||
|
||||
test_bit(In_sync, &conf->mirrors[i].rdev->flags))
|
||||
continue;
|
||||
|
||||
still_degraded = 0;
|
||||
/* want to reconstruct this device */
|
||||
rb2 = r10_bio;
|
||||
sect = raid10_find_virt(conf, sector_nr, i);
|
||||
/* Unless we are doing a full sync, we only need
|
||||
* to recover the block if it is set in the bitmap
|
||||
*/
|
||||
must_sync = bitmap_start_sync(mddev->bitmap, sect,
|
||||
&sync_blocks, 1);
|
||||
if (sync_blocks < max_sync)
|
||||
max_sync = sync_blocks;
|
||||
if (!must_sync &&
|
||||
!conf->fullsync) {
|
||||
/* yep, skip the sync_blocks here, but don't assume
|
||||
* that there will never be anything to do here
|
||||
*/
|
||||
must_sync = bitmap_start_sync(mddev->bitmap, sect,
|
||||
&sync_blocks, 1);
|
||||
if (sync_blocks < max_sync)
|
||||
max_sync = sync_blocks;
|
||||
if (!must_sync &&
|
||||
!conf->fullsync) {
|
||||
/* yep, skip the sync_blocks here, but don't assume
|
||||
* that there will never be anything to do here
|
||||
*/
|
||||
chunks_skipped = -1;
|
||||
continue;
|
||||
}
|
||||
chunks_skipped = -1;
|
||||
continue;
|
||||
}
|
||||
|
||||
r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO);
|
||||
raise_barrier(conf, rb2 != NULL);
|
||||
atomic_set(&r10_bio->remaining, 0);
|
||||
r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO);
|
||||
raise_barrier(conf, rb2 != NULL);
|
||||
atomic_set(&r10_bio->remaining, 0);
|
||||
|
||||
r10_bio->master_bio = (struct bio*)rb2;
|
||||
if (rb2)
|
||||
atomic_inc(&rb2->remaining);
|
||||
r10_bio->mddev = mddev;
|
||||
set_bit(R10BIO_IsRecover, &r10_bio->state);
|
||||
r10_bio->sector = sect;
|
||||
r10_bio->master_bio = (struct bio*)rb2;
|
||||
if (rb2)
|
||||
atomic_inc(&rb2->remaining);
|
||||
r10_bio->mddev = mddev;
|
||||
set_bit(R10BIO_IsRecover, &r10_bio->state);
|
||||
r10_bio->sector = sect;
|
||||
|
||||
raid10_find_phys(conf, r10_bio);
|
||||
raid10_find_phys(conf, r10_bio);
|
||||
|
||||
/* Need to check if the array will still be
|
||||
* degraded
|
||||
*/
|
||||
for (j=0; j<conf->raid_disks; j++)
|
||||
if (conf->mirrors[j].rdev == NULL ||
|
||||
test_bit(Faulty, &conf->mirrors[j].rdev->flags)) {
|
||||
still_degraded = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
must_sync = bitmap_start_sync(mddev->bitmap, sect,
|
||||
&sync_blocks, still_degraded);
|
||||
|
||||
for (j=0; j<conf->copies;j++) {
|
||||
int d = r10_bio->devs[j].devnum;
|
||||
if (conf->mirrors[d].rdev &&
|
||||
test_bit(In_sync, &conf->mirrors[d].rdev->flags)) {
|
||||
/* This is where we read from */
|
||||
bio = r10_bio->devs[0].bio;
|
||||
bio->bi_next = biolist;
|
||||
biolist = bio;
|
||||
bio->bi_private = r10_bio;
|
||||
bio->bi_end_io = end_sync_read;
|
||||
bio->bi_rw = READ;
|
||||
bio->bi_sector = r10_bio->devs[j].addr +
|
||||
conf->mirrors[d].rdev->data_offset;
|
||||
bio->bi_bdev = conf->mirrors[d].rdev->bdev;
|
||||
atomic_inc(&conf->mirrors[d].rdev->nr_pending);
|
||||
atomic_inc(&r10_bio->remaining);
|
||||
/* and we write to 'i' */
|
||||
|
||||
for (k=0; k<conf->copies; k++)
|
||||
if (r10_bio->devs[k].devnum == i)
|
||||
break;
|
||||
BUG_ON(k == conf->copies);
|
||||
bio = r10_bio->devs[1].bio;
|
||||
bio->bi_next = biolist;
|
||||
biolist = bio;
|
||||
bio->bi_private = r10_bio;
|
||||
bio->bi_end_io = end_sync_write;
|
||||
bio->bi_rw = WRITE;
|
||||
bio->bi_sector = r10_bio->devs[k].addr +
|
||||
conf->mirrors[i].rdev->data_offset;
|
||||
bio->bi_bdev = conf->mirrors[i].rdev->bdev;
|
||||
|
||||
r10_bio->devs[0].devnum = d;
|
||||
r10_bio->devs[1].devnum = i;
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (j == conf->copies) {
|
||||
/* Cannot recover, so abort the recovery */
|
||||
put_buf(r10_bio);
|
||||
if (rb2)
|
||||
atomic_dec(&rb2->remaining);
|
||||
r10_bio = rb2;
|
||||
if (!test_and_set_bit(MD_RECOVERY_INTR,
|
||||
&mddev->recovery))
|
||||
printk(KERN_INFO "md/raid10:%s: insufficient "
|
||||
"working devices for recovery.\n",
|
||||
mdname(mddev));
|
||||
/* Need to check if the array will still be
|
||||
* degraded
|
||||
*/
|
||||
for (j=0; j<conf->raid_disks; j++)
|
||||
if (conf->mirrors[j].rdev == NULL ||
|
||||
test_bit(Faulty, &conf->mirrors[j].rdev->flags)) {
|
||||
still_degraded = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
must_sync = bitmap_start_sync(mddev->bitmap, sect,
|
||||
&sync_blocks, still_degraded);
|
||||
|
||||
for (j=0; j<conf->copies;j++) {
|
||||
int d = r10_bio->devs[j].devnum;
|
||||
if (!conf->mirrors[d].rdev ||
|
||||
!test_bit(In_sync, &conf->mirrors[d].rdev->flags))
|
||||
continue;
|
||||
/* This is where we read from */
|
||||
bio = r10_bio->devs[0].bio;
|
||||
bio->bi_next = biolist;
|
||||
biolist = bio;
|
||||
bio->bi_private = r10_bio;
|
||||
bio->bi_end_io = end_sync_read;
|
||||
bio->bi_rw = READ;
|
||||
bio->bi_sector = r10_bio->devs[j].addr +
|
||||
conf->mirrors[d].rdev->data_offset;
|
||||
bio->bi_bdev = conf->mirrors[d].rdev->bdev;
|
||||
atomic_inc(&conf->mirrors[d].rdev->nr_pending);
|
||||
atomic_inc(&r10_bio->remaining);
|
||||
/* and we write to 'i' */
|
||||
|
||||
for (k=0; k<conf->copies; k++)
|
||||
if (r10_bio->devs[k].devnum == i)
|
||||
break;
|
||||
BUG_ON(k == conf->copies);
|
||||
bio = r10_bio->devs[1].bio;
|
||||
bio->bi_next = biolist;
|
||||
biolist = bio;
|
||||
bio->bi_private = r10_bio;
|
||||
bio->bi_end_io = end_sync_write;
|
||||
bio->bi_rw = WRITE;
|
||||
bio->bi_sector = r10_bio->devs[k].addr +
|
||||
conf->mirrors[i].rdev->data_offset;
|
||||
bio->bi_bdev = conf->mirrors[i].rdev->bdev;
|
||||
|
||||
r10_bio->devs[0].devnum = d;
|
||||
r10_bio->devs[1].devnum = i;
|
||||
|
||||
break;
|
||||
}
|
||||
if (j == conf->copies) {
|
||||
/* Cannot recover, so abort the recovery */
|
||||
put_buf(r10_bio);
|
||||
if (rb2)
|
||||
atomic_dec(&rb2->remaining);
|
||||
r10_bio = rb2;
|
||||
if (!test_and_set_bit(MD_RECOVERY_INTR,
|
||||
&mddev->recovery))
|
||||
printk(KERN_INFO "md/raid10:%s: insufficient "
|
||||
"working devices for recovery.\n",
|
||||
mdname(mddev));
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (biolist == NULL) {
|
||||
while (r10_bio) {
|
||||
r10bio_t *rb2 = r10_bio;
|
||||
|
@ -1977,7 +1956,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
|
|||
|
||||
if (!bitmap_start_sync(mddev->bitmap, sector_nr,
|
||||
&sync_blocks, mddev->degraded) &&
|
||||
!conf->fullsync && !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
|
||||
!conf->fullsync && !test_bit(MD_RECOVERY_REQUESTED,
|
||||
&mddev->recovery)) {
|
||||
/* We can skip this block */
|
||||
*skipped = 1;
|
||||
return sync_blocks + sectors_skipped;
|
||||
|
@ -2022,7 +2002,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
|
|||
for (i=0; i<conf->copies; i++) {
|
||||
int d = r10_bio->devs[i].devnum;
|
||||
if (r10_bio->devs[i].bio->bi_end_io)
|
||||
rdev_dec_pending(conf->mirrors[d].rdev, mddev);
|
||||
rdev_dec_pending(conf->mirrors[d].rdev,
|
||||
mddev);
|
||||
}
|
||||
put_buf(r10_bio);
|
||||
biolist = NULL;
|
||||
|
@ -2047,26 +2028,27 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
|
|||
do {
|
||||
struct page *page;
|
||||
int len = PAGE_SIZE;
|
||||
disk = 0;
|
||||
if (sector_nr + (len>>9) > max_sector)
|
||||
len = (max_sector - sector_nr) << 9;
|
||||
if (len == 0)
|
||||
break;
|
||||
for (bio= biolist ; bio ; bio=bio->bi_next) {
|
||||
struct bio *bio2;
|
||||
page = bio->bi_io_vec[bio->bi_vcnt].bv_page;
|
||||
if (bio_add_page(bio, page, len, 0) == 0) {
|
||||
/* stop here */
|
||||
struct bio *bio2;
|
||||
bio->bi_io_vec[bio->bi_vcnt].bv_page = page;
|
||||
for (bio2 = biolist; bio2 && bio2 != bio; bio2 = bio2->bi_next) {
|
||||
/* remove last page from this bio */
|
||||
bio2->bi_vcnt--;
|
||||
bio2->bi_size -= len;
|
||||
bio2->bi_flags &= ~(1<< BIO_SEG_VALID);
|
||||
}
|
||||
goto bio_full;
|
||||
if (bio_add_page(bio, page, len, 0))
|
||||
continue;
|
||||
|
||||
/* stop here */
|
||||
bio->bi_io_vec[bio->bi_vcnt].bv_page = page;
|
||||
for (bio2 = biolist;
|
||||
bio2 && bio2 != bio;
|
||||
bio2 = bio2->bi_next) {
|
||||
/* remove last page from this bio */
|
||||
bio2->bi_vcnt--;
|
||||
bio2->bi_size -= len;
|
||||
bio2->bi_flags &= ~(1<< BIO_SEG_VALID);
|
||||
}
|
||||
disk = i;
|
||||
goto bio_full;
|
||||
}
|
||||
nr_sectors += len>>9;
|
||||
sector_nr += len>>9;
|
||||
|
|
|
@ -1700,27 +1700,25 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
|
|||
raid5_conf_t *conf = mddev->private;
|
||||
pr_debug("raid456: error called\n");
|
||||
|
||||
if (!test_bit(Faulty, &rdev->flags)) {
|
||||
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
||||
if (test_and_clear_bit(In_sync, &rdev->flags)) {
|
||||
unsigned long flags;
|
||||
spin_lock_irqsave(&conf->device_lock, flags);
|
||||
mddev->degraded++;
|
||||
spin_unlock_irqrestore(&conf->device_lock, flags);
|
||||
/*
|
||||
* if recovery was running, make sure it aborts.
|
||||
*/
|
||||
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
||||
}
|
||||
set_bit(Faulty, &rdev->flags);
|
||||
printk(KERN_ALERT
|
||||
"md/raid:%s: Disk failure on %s, disabling device.\n"
|
||||
"md/raid:%s: Operation continuing on %d devices.\n",
|
||||
mdname(mddev),
|
||||
bdevname(rdev->bdev, b),
|
||||
mdname(mddev),
|
||||
conf->raid_disks - mddev->degraded);
|
||||
if (test_and_clear_bit(In_sync, &rdev->flags)) {
|
||||
unsigned long flags;
|
||||
spin_lock_irqsave(&conf->device_lock, flags);
|
||||
mddev->degraded++;
|
||||
spin_unlock_irqrestore(&conf->device_lock, flags);
|
||||
/*
|
||||
* if recovery was running, make sure it aborts.
|
||||
*/
|
||||
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
||||
}
|
||||
set_bit(Faulty, &rdev->flags);
|
||||
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
||||
printk(KERN_ALERT
|
||||
"md/raid:%s: Disk failure on %s, disabling device.\n"
|
||||
"md/raid:%s: Operation continuing on %d devices.\n",
|
||||
mdname(mddev),
|
||||
bdevname(rdev->bdev, b),
|
||||
mdname(mddev),
|
||||
conf->raid_disks - mddev->degraded);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -5391,7 +5389,8 @@ static int raid5_resize(mddev_t *mddev, sector_t sectors)
|
|||
return -EINVAL;
|
||||
set_capacity(mddev->gendisk, mddev->array_sectors);
|
||||
revalidate_disk(mddev->gendisk);
|
||||
if (sectors > mddev->dev_sectors && mddev->recovery_cp == MaxSector) {
|
||||
if (sectors > mddev->dev_sectors &&
|
||||
mddev->recovery_cp > mddev->dev_sectors) {
|
||||
mddev->recovery_cp = mddev->dev_sectors;
|
||||
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue