md updates for 3.4
Mostly tidying up code in preparation for some bigger changes next time. A few bug fixes tagged for -stable. Main functionality change is that some RAID10 arrays can now grow to use extra space that may have been made available on the individual devices. -----BEGIN PGP SIGNATURE----- Version: GnuPG v2.0.18 (GNU/Linux) iQIVAwUAT2bLBjnsnt1WYoG5AQKN3xAAv1UlR5Kem5WN7Ex4lmR9xj3lr9dbURYT TtvrUuCy3pYYWdTuijb+IBqkbODF0kPDHIhUiBx9fXUfMavkp/b9heXS/vJ3pcH4 1j99NUbOGL/AylD1TPRV9TQxGTKhEjK3n26bY0t/amLc92bWJaytMO1B9cz38LN+ qx6ufpIepz4DPXXtPYpnkBR4cZ6L4/ZXQvjf5BqG6WfKwc+0Nyncg8ipYEqhBWy7 R7ztF5yPo0yl96Wopa2KG91OroWflmyZo1DNYcbUbKtbNGGtYC92GFadOH+wNupM FnmXv10ivfVGU5w4SpshAwOg+4OSUqmWNsBxUhpYbf8ChbN+lOl0VZdH6UBxo19D 3SqZWT/yz4I4HYd5rtr35MXFdOeBNM++CHQs4F68BLA0B6OcHfWsA9bvly2tnBVx iEBFPd277qWztUr8m6yz7AFf/0dgyXuIhuB3d7IkVrG5yG3FX6hPi2T0FSA33qMx Lwi5w6O4DREg5tG09xEYEnXgXe+PnB8HsKb1U/m76XMQ0UScvX6dLA6934Vg+DCv xf+AYqob0Tc/Op5I7h2PbVXq7DciNXwlX1WvM0m+TEaV+3fl1FB0VsCcANAV6JVn uRLmvtePQRt0hxAog2p7OsumVnxMhbuEo5h8rJMKWM7IbhueKNoz+gBwpcFLzBmY ygWc4peLQpE= =MGuM -----END PGP SIGNATURE----- Merge tag 'md-3.4' of git://neil.brown.name/md Pull md updates for 3.4 from Neil Brown: "Mostly tidying up code in preparation for some bigger changes next time. A few bug fixes tagged for -stable. Main functionality change is that some RAID10 arrays can now grow to use extra space that may have been made available on the individual devices." Fixed up trivial conflicts with the k[un]map_atomic() cleanups in drivers/md/bitmap.c. * tag 'md-3.4' of git://neil.brown.name/md: (22 commits) md: Add judgement bb->unacked_exist in function md_ack_all_badblocks(). md: fix clearing of the 'changed' flags for the bad blocks list. md/bitmap: discard CHUNK_BLOCK_SHIFT macro md/bitmap: remove unnecessary indirection when allocating. md/bitmap: remove some pointless locking. md/bitmap: change a 'goto' to a normal 'if' construct. md/bitmap: move printing of bitmap status to bitmap.c md/bitmap: remove some unused noise from bitmap.h md/raid10 - support resizing some RAID10 arrays. md/raid1: handle merge_bvec_fn in member devices. md/raid10: handle merge_bvec_fn in member devices. md: add proper merge_bvec handling to RAID0 and Linear. md: tidy up rdev_for_each usage. md/raid1,raid10: avoid deadlock during resync/recovery. md/bitmap: ensure to load bitmap when creating via sysfs. md: don't set md arrays to readonly on shutdown. md: allow re-add to failed arrays. md/raid5: use atomic_dec_return() instead of atomic_dec() and atomic_read(). md: Use existed macros instead of numbers md/raid5: removed unused 'added_devices' variable. ...
This commit is contained in:
commit
267d7b23dd
14 changed files with 496 additions and 374 deletions
|
@ -26,6 +26,7 @@
|
||||||
#include <linux/file.h>
|
#include <linux/file.h>
|
||||||
#include <linux/mount.h>
|
#include <linux/mount.h>
|
||||||
#include <linux/buffer_head.h>
|
#include <linux/buffer_head.h>
|
||||||
|
#include <linux/seq_file.h>
|
||||||
#include "md.h"
|
#include "md.h"
|
||||||
#include "bitmap.h"
|
#include "bitmap.h"
|
||||||
|
|
||||||
|
@ -34,31 +35,6 @@ static inline char *bmname(struct bitmap *bitmap)
|
||||||
return bitmap->mddev ? mdname(bitmap->mddev) : "mdX";
|
return bitmap->mddev ? mdname(bitmap->mddev) : "mdX";
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* just a placeholder - calls kmalloc for bitmap pages
|
|
||||||
*/
|
|
||||||
static unsigned char *bitmap_alloc_page(struct bitmap *bitmap)
|
|
||||||
{
|
|
||||||
unsigned char *page;
|
|
||||||
|
|
||||||
page = kzalloc(PAGE_SIZE, GFP_NOIO);
|
|
||||||
if (!page)
|
|
||||||
printk("%s: bitmap_alloc_page FAILED\n", bmname(bitmap));
|
|
||||||
else
|
|
||||||
pr_debug("%s: bitmap_alloc_page: allocated page at %p\n",
|
|
||||||
bmname(bitmap), page);
|
|
||||||
return page;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* for now just a placeholder -- just calls kfree for bitmap pages
|
|
||||||
*/
|
|
||||||
static void bitmap_free_page(struct bitmap *bitmap, unsigned char *page)
|
|
||||||
{
|
|
||||||
pr_debug("%s: bitmap_free_page: free page %p\n", bmname(bitmap), page);
|
|
||||||
kfree(page);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* check a page and, if necessary, allocate it (or hijack it if the alloc fails)
|
* check a page and, if necessary, allocate it (or hijack it if the alloc fails)
|
||||||
*
|
*
|
||||||
|
@ -96,7 +72,7 @@ __acquires(bitmap->lock)
|
||||||
/* this page has not been allocated yet */
|
/* this page has not been allocated yet */
|
||||||
|
|
||||||
spin_unlock_irq(&bitmap->lock);
|
spin_unlock_irq(&bitmap->lock);
|
||||||
mappage = bitmap_alloc_page(bitmap);
|
mappage = kzalloc(PAGE_SIZE, GFP_NOIO);
|
||||||
spin_lock_irq(&bitmap->lock);
|
spin_lock_irq(&bitmap->lock);
|
||||||
|
|
||||||
if (mappage == NULL) {
|
if (mappage == NULL) {
|
||||||
|
@ -109,7 +85,7 @@ __acquires(bitmap->lock)
|
||||||
} else if (bitmap->bp[page].map ||
|
} else if (bitmap->bp[page].map ||
|
||||||
bitmap->bp[page].hijacked) {
|
bitmap->bp[page].hijacked) {
|
||||||
/* somebody beat us to getting the page */
|
/* somebody beat us to getting the page */
|
||||||
bitmap_free_page(bitmap, mappage);
|
kfree(mappage);
|
||||||
return 0;
|
return 0;
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
|
@ -141,7 +117,7 @@ static void bitmap_checkfree(struct bitmap *bitmap, unsigned long page)
|
||||||
ptr = bitmap->bp[page].map;
|
ptr = bitmap->bp[page].map;
|
||||||
bitmap->bp[page].map = NULL;
|
bitmap->bp[page].map = NULL;
|
||||||
bitmap->missing_pages++;
|
bitmap->missing_pages++;
|
||||||
bitmap_free_page(bitmap, ptr);
|
kfree(ptr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -171,7 +147,7 @@ static struct page *read_sb_page(struct mddev *mddev, loff_t offset,
|
||||||
did_alloc = 1;
|
did_alloc = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
list_for_each_entry(rdev, &mddev->disks, same_set) {
|
rdev_for_each(rdev, mddev) {
|
||||||
if (! test_bit(In_sync, &rdev->flags)
|
if (! test_bit(In_sync, &rdev->flags)
|
||||||
|| test_bit(Faulty, &rdev->flags))
|
|| test_bit(Faulty, &rdev->flags))
|
||||||
continue;
|
continue;
|
||||||
|
@ -445,18 +421,13 @@ out:
|
||||||
void bitmap_update_sb(struct bitmap *bitmap)
|
void bitmap_update_sb(struct bitmap *bitmap)
|
||||||
{
|
{
|
||||||
bitmap_super_t *sb;
|
bitmap_super_t *sb;
|
||||||
unsigned long flags;
|
|
||||||
|
|
||||||
if (!bitmap || !bitmap->mddev) /* no bitmap for this array */
|
if (!bitmap || !bitmap->mddev) /* no bitmap for this array */
|
||||||
return;
|
return;
|
||||||
if (bitmap->mddev->bitmap_info.external)
|
if (bitmap->mddev->bitmap_info.external)
|
||||||
return;
|
return;
|
||||||
spin_lock_irqsave(&bitmap->lock, flags);
|
if (!bitmap->sb_page) /* no superblock */
|
||||||
if (!bitmap->sb_page) { /* no superblock */
|
|
||||||
spin_unlock_irqrestore(&bitmap->lock, flags);
|
|
||||||
return;
|
return;
|
||||||
}
|
|
||||||
spin_unlock_irqrestore(&bitmap->lock, flags);
|
|
||||||
sb = kmap_atomic(bitmap->sb_page);
|
sb = kmap_atomic(bitmap->sb_page);
|
||||||
sb->events = cpu_to_le64(bitmap->mddev->events);
|
sb->events = cpu_to_le64(bitmap->mddev->events);
|
||||||
if (bitmap->mddev->events < bitmap->events_cleared)
|
if (bitmap->mddev->events < bitmap->events_cleared)
|
||||||
|
@ -632,26 +603,28 @@ static int bitmap_read_sb(struct bitmap *bitmap)
|
||||||
/* keep the array size field of the bitmap superblock up to date */
|
/* keep the array size field of the bitmap superblock up to date */
|
||||||
sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);
|
sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);
|
||||||
|
|
||||||
if (!bitmap->mddev->persistent)
|
if (bitmap->mddev->persistent) {
|
||||||
goto success;
|
/*
|
||||||
|
* We have a persistent array superblock, so compare the
|
||||||
|
* bitmap's UUID and event counter to the mddev's
|
||||||
|
*/
|
||||||
|
if (memcmp(sb->uuid, bitmap->mddev->uuid, 16)) {
|
||||||
|
printk(KERN_INFO
|
||||||
|
"%s: bitmap superblock UUID mismatch\n",
|
||||||
|
bmname(bitmap));
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
events = le64_to_cpu(sb->events);
|
||||||
|
if (events < bitmap->mddev->events) {
|
||||||
|
printk(KERN_INFO
|
||||||
|
"%s: bitmap file is out of date (%llu < %llu) "
|
||||||
|
"-- forcing full recovery\n",
|
||||||
|
bmname(bitmap), events,
|
||||||
|
(unsigned long long) bitmap->mddev->events);
|
||||||
|
sb->state |= cpu_to_le32(BITMAP_STALE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* if we have a persistent array superblock, compare the
|
|
||||||
* bitmap's UUID and event counter to the mddev's
|
|
||||||
*/
|
|
||||||
if (memcmp(sb->uuid, bitmap->mddev->uuid, 16)) {
|
|
||||||
printk(KERN_INFO "%s: bitmap superblock UUID mismatch\n",
|
|
||||||
bmname(bitmap));
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
events = le64_to_cpu(sb->events);
|
|
||||||
if (events < bitmap->mddev->events) {
|
|
||||||
printk(KERN_INFO "%s: bitmap file is out of date (%llu < %llu) "
|
|
||||||
"-- forcing full recovery\n", bmname(bitmap), events,
|
|
||||||
(unsigned long long) bitmap->mddev->events);
|
|
||||||
sb->state |= cpu_to_le32(BITMAP_STALE);
|
|
||||||
}
|
|
||||||
success:
|
|
||||||
/* assign fields using values from superblock */
|
/* assign fields using values from superblock */
|
||||||
bitmap->mddev->bitmap_info.chunksize = chunksize;
|
bitmap->mddev->bitmap_info.chunksize = chunksize;
|
||||||
bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
|
bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
|
||||||
|
@ -680,15 +653,10 @@ static int bitmap_mask_state(struct bitmap *bitmap, enum bitmap_state bits,
|
||||||
enum bitmap_mask_op op)
|
enum bitmap_mask_op op)
|
||||||
{
|
{
|
||||||
bitmap_super_t *sb;
|
bitmap_super_t *sb;
|
||||||
unsigned long flags;
|
|
||||||
int old;
|
int old;
|
||||||
|
|
||||||
spin_lock_irqsave(&bitmap->lock, flags);
|
if (!bitmap->sb_page) /* can't set the state */
|
||||||
if (!bitmap->sb_page) { /* can't set the state */
|
|
||||||
spin_unlock_irqrestore(&bitmap->lock, flags);
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
|
||||||
spin_unlock_irqrestore(&bitmap->lock, flags);
|
|
||||||
sb = kmap_atomic(bitmap->sb_page);
|
sb = kmap_atomic(bitmap->sb_page);
|
||||||
old = le32_to_cpu(sb->state) & bits;
|
old = le32_to_cpu(sb->state) & bits;
|
||||||
switch (op) {
|
switch (op) {
|
||||||
|
@ -870,7 +838,7 @@ static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
|
||||||
unsigned long bit;
|
unsigned long bit;
|
||||||
struct page *page;
|
struct page *page;
|
||||||
void *kaddr;
|
void *kaddr;
|
||||||
unsigned long chunk = block >> CHUNK_BLOCK_SHIFT(bitmap);
|
unsigned long chunk = block >> bitmap->chunkshift;
|
||||||
|
|
||||||
if (!bitmap->filemap)
|
if (!bitmap->filemap)
|
||||||
return;
|
return;
|
||||||
|
@ -1069,10 +1037,10 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
|
||||||
kunmap_atomic(paddr);
|
kunmap_atomic(paddr);
|
||||||
if (b) {
|
if (b) {
|
||||||
/* if the disk bit is set, set the memory bit */
|
/* if the disk bit is set, set the memory bit */
|
||||||
int needed = ((sector_t)(i+1) << (CHUNK_BLOCK_SHIFT(bitmap))
|
int needed = ((sector_t)(i+1) << bitmap->chunkshift
|
||||||
>= start);
|
>= start);
|
||||||
bitmap_set_memory_bits(bitmap,
|
bitmap_set_memory_bits(bitmap,
|
||||||
(sector_t)i << CHUNK_BLOCK_SHIFT(bitmap),
|
(sector_t)i << bitmap->chunkshift,
|
||||||
needed);
|
needed);
|
||||||
bit_cnt++;
|
bit_cnt++;
|
||||||
}
|
}
|
||||||
|
@ -1116,7 +1084,7 @@ void bitmap_write_all(struct bitmap *bitmap)
|
||||||
|
|
||||||
static void bitmap_count_page(struct bitmap *bitmap, sector_t offset, int inc)
|
static void bitmap_count_page(struct bitmap *bitmap, sector_t offset, int inc)
|
||||||
{
|
{
|
||||||
sector_t chunk = offset >> CHUNK_BLOCK_SHIFT(bitmap);
|
sector_t chunk = offset >> bitmap->chunkshift;
|
||||||
unsigned long page = chunk >> PAGE_COUNTER_SHIFT;
|
unsigned long page = chunk >> PAGE_COUNTER_SHIFT;
|
||||||
bitmap->bp[page].count += inc;
|
bitmap->bp[page].count += inc;
|
||||||
bitmap_checkfree(bitmap, page);
|
bitmap_checkfree(bitmap, page);
|
||||||
|
@ -1222,7 +1190,7 @@ void bitmap_daemon_work(struct mddev *mddev)
|
||||||
bitmap->allclean = 0;
|
bitmap->allclean = 0;
|
||||||
}
|
}
|
||||||
bmc = bitmap_get_counter(bitmap,
|
bmc = bitmap_get_counter(bitmap,
|
||||||
(sector_t)j << CHUNK_BLOCK_SHIFT(bitmap),
|
(sector_t)j << bitmap->chunkshift,
|
||||||
&blocks, 0);
|
&blocks, 0);
|
||||||
if (!bmc)
|
if (!bmc)
|
||||||
j |= PAGE_COUNTER_MASK;
|
j |= PAGE_COUNTER_MASK;
|
||||||
|
@ -1231,7 +1199,7 @@ void bitmap_daemon_work(struct mddev *mddev)
|
||||||
/* we can clear the bit */
|
/* we can clear the bit */
|
||||||
*bmc = 0;
|
*bmc = 0;
|
||||||
bitmap_count_page(bitmap,
|
bitmap_count_page(bitmap,
|
||||||
(sector_t)j << CHUNK_BLOCK_SHIFT(bitmap),
|
(sector_t)j << bitmap->chunkshift,
|
||||||
-1);
|
-1);
|
||||||
|
|
||||||
/* clear the bit */
|
/* clear the bit */
|
||||||
|
@ -1285,7 +1253,7 @@ __acquires(bitmap->lock)
|
||||||
* The lock must have been taken with interrupts enabled.
|
* The lock must have been taken with interrupts enabled.
|
||||||
* If !create, we don't release the lock.
|
* If !create, we don't release the lock.
|
||||||
*/
|
*/
|
||||||
sector_t chunk = offset >> CHUNK_BLOCK_SHIFT(bitmap);
|
sector_t chunk = offset >> bitmap->chunkshift;
|
||||||
unsigned long page = chunk >> PAGE_COUNTER_SHIFT;
|
unsigned long page = chunk >> PAGE_COUNTER_SHIFT;
|
||||||
unsigned long pageoff = (chunk & PAGE_COUNTER_MASK) << COUNTER_BYTE_SHIFT;
|
unsigned long pageoff = (chunk & PAGE_COUNTER_MASK) << COUNTER_BYTE_SHIFT;
|
||||||
sector_t csize;
|
sector_t csize;
|
||||||
|
@ -1295,10 +1263,10 @@ __acquires(bitmap->lock)
|
||||||
|
|
||||||
if (bitmap->bp[page].hijacked ||
|
if (bitmap->bp[page].hijacked ||
|
||||||
bitmap->bp[page].map == NULL)
|
bitmap->bp[page].map == NULL)
|
||||||
csize = ((sector_t)1) << (CHUNK_BLOCK_SHIFT(bitmap) +
|
csize = ((sector_t)1) << (bitmap->chunkshift +
|
||||||
PAGE_COUNTER_SHIFT - 1);
|
PAGE_COUNTER_SHIFT - 1);
|
||||||
else
|
else
|
||||||
csize = ((sector_t)1) << (CHUNK_BLOCK_SHIFT(bitmap));
|
csize = ((sector_t)1) << bitmap->chunkshift;
|
||||||
*blocks = csize - (offset & (csize - 1));
|
*blocks = csize - (offset & (csize - 1));
|
||||||
|
|
||||||
if (err < 0)
|
if (err < 0)
|
||||||
|
@ -1424,7 +1392,7 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto
|
||||||
set_page_attr(bitmap,
|
set_page_attr(bitmap,
|
||||||
filemap_get_page(
|
filemap_get_page(
|
||||||
bitmap,
|
bitmap,
|
||||||
offset >> CHUNK_BLOCK_SHIFT(bitmap)),
|
offset >> bitmap->chunkshift),
|
||||||
BITMAP_PAGE_PENDING);
|
BITMAP_PAGE_PENDING);
|
||||||
bitmap->allclean = 0;
|
bitmap->allclean = 0;
|
||||||
}
|
}
|
||||||
|
@ -1512,7 +1480,7 @@ void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, i
|
||||||
else {
|
else {
|
||||||
if (*bmc <= 2) {
|
if (*bmc <= 2) {
|
||||||
set_page_attr(bitmap,
|
set_page_attr(bitmap,
|
||||||
filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap)),
|
filemap_get_page(bitmap, offset >> bitmap->chunkshift),
|
||||||
BITMAP_PAGE_PENDING);
|
BITMAP_PAGE_PENDING);
|
||||||
bitmap->allclean = 0;
|
bitmap->allclean = 0;
|
||||||
}
|
}
|
||||||
|
@ -1559,7 +1527,7 @@ void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector)
|
||||||
|
|
||||||
bitmap->mddev->curr_resync_completed = sector;
|
bitmap->mddev->curr_resync_completed = sector;
|
||||||
set_bit(MD_CHANGE_CLEAN, &bitmap->mddev->flags);
|
set_bit(MD_CHANGE_CLEAN, &bitmap->mddev->flags);
|
||||||
sector &= ~((1ULL << CHUNK_BLOCK_SHIFT(bitmap)) - 1);
|
sector &= ~((1ULL << bitmap->chunkshift) - 1);
|
||||||
s = 0;
|
s = 0;
|
||||||
while (s < sector && s < bitmap->mddev->resync_max_sectors) {
|
while (s < sector && s < bitmap->mddev->resync_max_sectors) {
|
||||||
bitmap_end_sync(bitmap, s, &blocks, 0);
|
bitmap_end_sync(bitmap, s, &blocks, 0);
|
||||||
|
@ -1589,7 +1557,7 @@ static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int n
|
||||||
struct page *page;
|
struct page *page;
|
||||||
*bmc = 2 | (needed ? NEEDED_MASK : 0);
|
*bmc = 2 | (needed ? NEEDED_MASK : 0);
|
||||||
bitmap_count_page(bitmap, offset, 1);
|
bitmap_count_page(bitmap, offset, 1);
|
||||||
page = filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap));
|
page = filemap_get_page(bitmap, offset >> bitmap->chunkshift);
|
||||||
set_page_attr(bitmap, page, BITMAP_PAGE_PENDING);
|
set_page_attr(bitmap, page, BITMAP_PAGE_PENDING);
|
||||||
bitmap->allclean = 0;
|
bitmap->allclean = 0;
|
||||||
}
|
}
|
||||||
|
@ -1602,7 +1570,7 @@ void bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long e)
|
||||||
unsigned long chunk;
|
unsigned long chunk;
|
||||||
|
|
||||||
for (chunk = s; chunk <= e; chunk++) {
|
for (chunk = s; chunk <= e; chunk++) {
|
||||||
sector_t sec = (sector_t)chunk << CHUNK_BLOCK_SHIFT(bitmap);
|
sector_t sec = (sector_t)chunk << bitmap->chunkshift;
|
||||||
bitmap_set_memory_bits(bitmap, sec, 1);
|
bitmap_set_memory_bits(bitmap, sec, 1);
|
||||||
spin_lock_irq(&bitmap->lock);
|
spin_lock_irq(&bitmap->lock);
|
||||||
bitmap_file_set_bit(bitmap, sec);
|
bitmap_file_set_bit(bitmap, sec);
|
||||||
|
@ -1759,11 +1727,12 @@ int bitmap_create(struct mddev *mddev)
|
||||||
goto error;
|
goto error;
|
||||||
|
|
||||||
bitmap->daemon_lastrun = jiffies;
|
bitmap->daemon_lastrun = jiffies;
|
||||||
bitmap->chunkshift = ffz(~mddev->bitmap_info.chunksize);
|
bitmap->chunkshift = (ffz(~mddev->bitmap_info.chunksize)
|
||||||
|
- BITMAP_BLOCK_SHIFT);
|
||||||
|
|
||||||
/* now that chunksize and chunkshift are set, we can use these macros */
|
/* now that chunksize and chunkshift are set, we can use these macros */
|
||||||
chunks = (blocks + CHUNK_BLOCK_RATIO(bitmap) - 1) >>
|
chunks = (blocks + bitmap->chunkshift - 1) >>
|
||||||
CHUNK_BLOCK_SHIFT(bitmap);
|
bitmap->chunkshift;
|
||||||
pages = (chunks + PAGE_COUNTER_RATIO - 1) / PAGE_COUNTER_RATIO;
|
pages = (chunks + PAGE_COUNTER_RATIO - 1) / PAGE_COUNTER_RATIO;
|
||||||
|
|
||||||
BUG_ON(!pages);
|
BUG_ON(!pages);
|
||||||
|
@ -1836,6 +1805,33 @@ out:
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(bitmap_load);
|
EXPORT_SYMBOL_GPL(bitmap_load);
|
||||||
|
|
||||||
|
void bitmap_status(struct seq_file *seq, struct bitmap *bitmap)
|
||||||
|
{
|
||||||
|
unsigned long chunk_kb;
|
||||||
|
unsigned long flags;
|
||||||
|
|
||||||
|
if (!bitmap)
|
||||||
|
return;
|
||||||
|
|
||||||
|
spin_lock_irqsave(&bitmap->lock, flags);
|
||||||
|
chunk_kb = bitmap->mddev->bitmap_info.chunksize >> 10;
|
||||||
|
seq_printf(seq, "bitmap: %lu/%lu pages [%luKB], "
|
||||||
|
"%lu%s chunk",
|
||||||
|
bitmap->pages - bitmap->missing_pages,
|
||||||
|
bitmap->pages,
|
||||||
|
(bitmap->pages - bitmap->missing_pages)
|
||||||
|
<< (PAGE_SHIFT - 10),
|
||||||
|
chunk_kb ? chunk_kb : bitmap->mddev->bitmap_info.chunksize,
|
||||||
|
chunk_kb ? "KB" : "B");
|
||||||
|
if (bitmap->file) {
|
||||||
|
seq_printf(seq, ", file: ");
|
||||||
|
seq_path(seq, &bitmap->file->f_path, " \t\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
seq_printf(seq, "\n");
|
||||||
|
spin_unlock_irqrestore(&bitmap->lock, flags);
|
||||||
|
}
|
||||||
|
|
||||||
static ssize_t
|
static ssize_t
|
||||||
location_show(struct mddev *mddev, char *page)
|
location_show(struct mddev *mddev, char *page)
|
||||||
{
|
{
|
||||||
|
@ -1904,6 +1900,8 @@ location_store(struct mddev *mddev, const char *buf, size_t len)
|
||||||
if (mddev->pers) {
|
if (mddev->pers) {
|
||||||
mddev->pers->quiesce(mddev, 1);
|
mddev->pers->quiesce(mddev, 1);
|
||||||
rv = bitmap_create(mddev);
|
rv = bitmap_create(mddev);
|
||||||
|
if (!rv)
|
||||||
|
rv = bitmap_load(mddev);
|
||||||
if (rv) {
|
if (rv) {
|
||||||
bitmap_destroy(mddev);
|
bitmap_destroy(mddev);
|
||||||
mddev->bitmap_info.offset = 0;
|
mddev->bitmap_info.offset = 0;
|
||||||
|
|
|
@ -13,8 +13,6 @@
|
||||||
#define BITMAP_MAJOR_HI 4
|
#define BITMAP_MAJOR_HI 4
|
||||||
#define BITMAP_MAJOR_HOSTENDIAN 3
|
#define BITMAP_MAJOR_HOSTENDIAN 3
|
||||||
|
|
||||||
#define BITMAP_MINOR 39
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* in-memory bitmap:
|
* in-memory bitmap:
|
||||||
*
|
*
|
||||||
|
@ -101,21 +99,10 @@ typedef __u16 bitmap_counter_t;
|
||||||
/* same, except a mask value for more efficient bitops */
|
/* same, except a mask value for more efficient bitops */
|
||||||
#define PAGE_COUNTER_MASK (PAGE_COUNTER_RATIO - 1)
|
#define PAGE_COUNTER_MASK (PAGE_COUNTER_RATIO - 1)
|
||||||
|
|
||||||
#define BITMAP_BLOCK_SIZE 512
|
|
||||||
#define BITMAP_BLOCK_SHIFT 9
|
#define BITMAP_BLOCK_SHIFT 9
|
||||||
|
|
||||||
/* how many blocks per chunk? (this is variable) */
|
/* how many blocks per chunk? (this is variable) */
|
||||||
#define CHUNK_BLOCK_RATIO(bitmap) ((bitmap)->mddev->bitmap_info.chunksize >> BITMAP_BLOCK_SHIFT)
|
#define CHUNK_BLOCK_RATIO(bitmap) ((bitmap)->mddev->bitmap_info.chunksize >> BITMAP_BLOCK_SHIFT)
|
||||||
#define CHUNK_BLOCK_SHIFT(bitmap) ((bitmap)->chunkshift - BITMAP_BLOCK_SHIFT)
|
|
||||||
#define CHUNK_BLOCK_MASK(bitmap) (CHUNK_BLOCK_RATIO(bitmap) - 1)
|
|
||||||
|
|
||||||
/* when hijacked, the counters and bits represent even larger "chunks" */
|
|
||||||
/* there will be 1024 chunks represented by each counter in the page pointers */
|
|
||||||
#define PAGEPTR_BLOCK_RATIO(bitmap) \
|
|
||||||
(CHUNK_BLOCK_RATIO(bitmap) << PAGE_COUNTER_SHIFT >> 1)
|
|
||||||
#define PAGEPTR_BLOCK_SHIFT(bitmap) \
|
|
||||||
(CHUNK_BLOCK_SHIFT(bitmap) + PAGE_COUNTER_SHIFT - 1)
|
|
||||||
#define PAGEPTR_BLOCK_MASK(bitmap) (PAGEPTR_BLOCK_RATIO(bitmap) - 1)
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -181,12 +168,6 @@ struct bitmap_page {
|
||||||
unsigned int count:31;
|
unsigned int count:31;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* keep track of bitmap file pages that have pending writes on them */
|
|
||||||
struct page_list {
|
|
||||||
struct list_head list;
|
|
||||||
struct page *page;
|
|
||||||
};
|
|
||||||
|
|
||||||
/* the main bitmap structure - one per mddev */
|
/* the main bitmap structure - one per mddev */
|
||||||
struct bitmap {
|
struct bitmap {
|
||||||
struct bitmap_page *bp;
|
struct bitmap_page *bp;
|
||||||
|
@ -196,7 +177,7 @@ struct bitmap {
|
||||||
struct mddev *mddev; /* the md device that the bitmap is for */
|
struct mddev *mddev; /* the md device that the bitmap is for */
|
||||||
|
|
||||||
/* bitmap chunksize -- how much data does each bit represent? */
|
/* bitmap chunksize -- how much data does each bit represent? */
|
||||||
unsigned long chunkshift; /* chunksize = 2^chunkshift (for bitops) */
|
unsigned long chunkshift; /* chunksize = 2^(chunkshift+9) (for bitops) */
|
||||||
unsigned long chunks; /* total number of data chunks for the array */
|
unsigned long chunks; /* total number of data chunks for the array */
|
||||||
|
|
||||||
__u64 events_cleared;
|
__u64 events_cleared;
|
||||||
|
@ -245,6 +226,7 @@ void bitmap_destroy(struct mddev *mddev);
|
||||||
|
|
||||||
void bitmap_print_sb(struct bitmap *bitmap);
|
void bitmap_print_sb(struct bitmap *bitmap);
|
||||||
void bitmap_update_sb(struct bitmap *bitmap);
|
void bitmap_update_sb(struct bitmap *bitmap);
|
||||||
|
void bitmap_status(struct seq_file *seq, struct bitmap *bitmap);
|
||||||
|
|
||||||
int bitmap_setallbits(struct bitmap *bitmap);
|
int bitmap_setallbits(struct bitmap *bitmap);
|
||||||
void bitmap_write_all(struct bitmap *bitmap);
|
void bitmap_write_all(struct bitmap *bitmap);
|
||||||
|
|
|
@ -615,14 +615,14 @@ static int read_disk_sb(struct md_rdev *rdev, int size)
|
||||||
|
|
||||||
static void super_sync(struct mddev *mddev, struct md_rdev *rdev)
|
static void super_sync(struct mddev *mddev, struct md_rdev *rdev)
|
||||||
{
|
{
|
||||||
struct md_rdev *r, *t;
|
struct md_rdev *r;
|
||||||
uint64_t failed_devices;
|
uint64_t failed_devices;
|
||||||
struct dm_raid_superblock *sb;
|
struct dm_raid_superblock *sb;
|
||||||
|
|
||||||
sb = page_address(rdev->sb_page);
|
sb = page_address(rdev->sb_page);
|
||||||
failed_devices = le64_to_cpu(sb->failed_devices);
|
failed_devices = le64_to_cpu(sb->failed_devices);
|
||||||
|
|
||||||
rdev_for_each(r, t, mddev)
|
rdev_for_each(r, mddev)
|
||||||
if ((r->raid_disk >= 0) && test_bit(Faulty, &r->flags))
|
if ((r->raid_disk >= 0) && test_bit(Faulty, &r->flags))
|
||||||
failed_devices |= (1ULL << r->raid_disk);
|
failed_devices |= (1ULL << r->raid_disk);
|
||||||
|
|
||||||
|
@ -707,7 +707,7 @@ static int super_init_validation(struct mddev *mddev, struct md_rdev *rdev)
|
||||||
struct dm_raid_superblock *sb;
|
struct dm_raid_superblock *sb;
|
||||||
uint32_t new_devs = 0;
|
uint32_t new_devs = 0;
|
||||||
uint32_t rebuilds = 0;
|
uint32_t rebuilds = 0;
|
||||||
struct md_rdev *r, *t;
|
struct md_rdev *r;
|
||||||
struct dm_raid_superblock *sb2;
|
struct dm_raid_superblock *sb2;
|
||||||
|
|
||||||
sb = page_address(rdev->sb_page);
|
sb = page_address(rdev->sb_page);
|
||||||
|
@ -750,7 +750,7 @@ static int super_init_validation(struct mddev *mddev, struct md_rdev *rdev)
|
||||||
* case the In_sync bit will /not/ be set and
|
* case the In_sync bit will /not/ be set and
|
||||||
* recovery_cp must be MaxSector.
|
* recovery_cp must be MaxSector.
|
||||||
*/
|
*/
|
||||||
rdev_for_each(r, t, mddev) {
|
rdev_for_each(r, mddev) {
|
||||||
if (!test_bit(In_sync, &r->flags)) {
|
if (!test_bit(In_sync, &r->flags)) {
|
||||||
DMINFO("Device %d specified for rebuild: "
|
DMINFO("Device %d specified for rebuild: "
|
||||||
"Clearing superblock", r->raid_disk);
|
"Clearing superblock", r->raid_disk);
|
||||||
|
@ -782,7 +782,7 @@ static int super_init_validation(struct mddev *mddev, struct md_rdev *rdev)
|
||||||
* Now we set the Faulty bit for those devices that are
|
* Now we set the Faulty bit for those devices that are
|
||||||
* recorded in the superblock as failed.
|
* recorded in the superblock as failed.
|
||||||
*/
|
*/
|
||||||
rdev_for_each(r, t, mddev) {
|
rdev_for_each(r, mddev) {
|
||||||
if (!r->sb_page)
|
if (!r->sb_page)
|
||||||
continue;
|
continue;
|
||||||
sb2 = page_address(r->sb_page);
|
sb2 = page_address(r->sb_page);
|
||||||
|
@ -855,11 +855,11 @@ static int super_validate(struct mddev *mddev, struct md_rdev *rdev)
|
||||||
static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs)
|
static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs)
|
||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
struct md_rdev *rdev, *freshest, *tmp;
|
struct md_rdev *rdev, *freshest;
|
||||||
struct mddev *mddev = &rs->md;
|
struct mddev *mddev = &rs->md;
|
||||||
|
|
||||||
freshest = NULL;
|
freshest = NULL;
|
||||||
rdev_for_each(rdev, tmp, mddev) {
|
rdev_for_each(rdev, mddev) {
|
||||||
if (!rdev->meta_bdev)
|
if (!rdev->meta_bdev)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
@ -888,7 +888,7 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs)
|
||||||
if (super_validate(mddev, freshest))
|
if (super_validate(mddev, freshest))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
rdev_for_each(rdev, tmp, mddev)
|
rdev_for_each(rdev, mddev)
|
||||||
if ((rdev != freshest) && super_validate(mddev, rdev))
|
if ((rdev != freshest) && super_validate(mddev, rdev))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
|
|
|
@ -315,7 +315,7 @@ static int run(struct mddev *mddev)
|
||||||
}
|
}
|
||||||
conf->nfaults = 0;
|
conf->nfaults = 0;
|
||||||
|
|
||||||
list_for_each_entry(rdev, &mddev->disks, same_set)
|
rdev_for_each(rdev, mddev)
|
||||||
conf->rdev = rdev;
|
conf->rdev = rdev;
|
||||||
|
|
||||||
md_set_array_sectors(mddev, faulty_size(mddev, 0, 0));
|
md_set_array_sectors(mddev, faulty_size(mddev, 0, 0));
|
||||||
|
|
|
@ -68,10 +68,19 @@ static int linear_mergeable_bvec(struct request_queue *q,
|
||||||
struct dev_info *dev0;
|
struct dev_info *dev0;
|
||||||
unsigned long maxsectors, bio_sectors = bvm->bi_size >> 9;
|
unsigned long maxsectors, bio_sectors = bvm->bi_size >> 9;
|
||||||
sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
|
sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
|
||||||
|
int maxbytes = biovec->bv_len;
|
||||||
|
struct request_queue *subq;
|
||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
dev0 = which_dev(mddev, sector);
|
dev0 = which_dev(mddev, sector);
|
||||||
maxsectors = dev0->end_sector - sector;
|
maxsectors = dev0->end_sector - sector;
|
||||||
|
subq = bdev_get_queue(dev0->rdev->bdev);
|
||||||
|
if (subq->merge_bvec_fn) {
|
||||||
|
bvm->bi_bdev = dev0->rdev->bdev;
|
||||||
|
bvm->bi_sector -= dev0->end_sector - dev0->rdev->sectors;
|
||||||
|
maxbytes = min(maxbytes, subq->merge_bvec_fn(subq, bvm,
|
||||||
|
biovec));
|
||||||
|
}
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
|
|
||||||
if (maxsectors < bio_sectors)
|
if (maxsectors < bio_sectors)
|
||||||
|
@ -80,12 +89,12 @@ static int linear_mergeable_bvec(struct request_queue *q,
|
||||||
maxsectors -= bio_sectors;
|
maxsectors -= bio_sectors;
|
||||||
|
|
||||||
if (maxsectors <= (PAGE_SIZE >> 9 ) && bio_sectors == 0)
|
if (maxsectors <= (PAGE_SIZE >> 9 ) && bio_sectors == 0)
|
||||||
return biovec->bv_len;
|
return maxbytes;
|
||||||
/* The bytes available at this offset could be really big,
|
|
||||||
* so we cap at 2^31 to avoid overflow */
|
if (maxsectors > (maxbytes >> 9))
|
||||||
if (maxsectors > (1 << (31-9)))
|
return maxbytes;
|
||||||
return 1<<31;
|
else
|
||||||
return maxsectors << 9;
|
return maxsectors << 9;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int linear_congested(void *data, int bits)
|
static int linear_congested(void *data, int bits)
|
||||||
|
@ -138,7 +147,7 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks)
|
||||||
cnt = 0;
|
cnt = 0;
|
||||||
conf->array_sectors = 0;
|
conf->array_sectors = 0;
|
||||||
|
|
||||||
list_for_each_entry(rdev, &mddev->disks, same_set) {
|
rdev_for_each(rdev, mddev) {
|
||||||
int j = rdev->raid_disk;
|
int j = rdev->raid_disk;
|
||||||
struct dev_info *disk = conf->disks + j;
|
struct dev_info *disk = conf->disks + j;
|
||||||
sector_t sectors;
|
sector_t sectors;
|
||||||
|
@ -158,15 +167,6 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks)
|
||||||
|
|
||||||
disk_stack_limits(mddev->gendisk, rdev->bdev,
|
disk_stack_limits(mddev->gendisk, rdev->bdev,
|
||||||
rdev->data_offset << 9);
|
rdev->data_offset << 9);
|
||||||
/* as we don't honour merge_bvec_fn, we must never risk
|
|
||||||
* violating it, so limit max_segments to 1 lying within
|
|
||||||
* a single page.
|
|
||||||
*/
|
|
||||||
if (rdev->bdev->bd_disk->queue->merge_bvec_fn) {
|
|
||||||
blk_queue_max_segments(mddev->queue, 1);
|
|
||||||
blk_queue_segment_boundary(mddev->queue,
|
|
||||||
PAGE_CACHE_SIZE - 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
conf->array_sectors += rdev->sectors;
|
conf->array_sectors += rdev->sectors;
|
||||||
cnt++;
|
cnt++;
|
||||||
|
|
140
drivers/md/md.c
140
drivers/md/md.c
|
@ -439,7 +439,7 @@ static void submit_flushes(struct work_struct *ws)
|
||||||
INIT_WORK(&mddev->flush_work, md_submit_flush_data);
|
INIT_WORK(&mddev->flush_work, md_submit_flush_data);
|
||||||
atomic_set(&mddev->flush_pending, 1);
|
atomic_set(&mddev->flush_pending, 1);
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
list_for_each_entry_rcu(rdev, &mddev->disks, same_set)
|
rdev_for_each_rcu(rdev, mddev)
|
||||||
if (rdev->raid_disk >= 0 &&
|
if (rdev->raid_disk >= 0 &&
|
||||||
!test_bit(Faulty, &rdev->flags)) {
|
!test_bit(Faulty, &rdev->flags)) {
|
||||||
/* Take two references, one is dropped
|
/* Take two references, one is dropped
|
||||||
|
@ -749,7 +749,7 @@ static struct md_rdev * find_rdev_nr(struct mddev *mddev, int nr)
|
||||||
{
|
{
|
||||||
struct md_rdev *rdev;
|
struct md_rdev *rdev;
|
||||||
|
|
||||||
list_for_each_entry(rdev, &mddev->disks, same_set)
|
rdev_for_each(rdev, mddev)
|
||||||
if (rdev->desc_nr == nr)
|
if (rdev->desc_nr == nr)
|
||||||
return rdev;
|
return rdev;
|
||||||
|
|
||||||
|
@ -760,7 +760,7 @@ static struct md_rdev * find_rdev(struct mddev * mddev, dev_t dev)
|
||||||
{
|
{
|
||||||
struct md_rdev *rdev;
|
struct md_rdev *rdev;
|
||||||
|
|
||||||
list_for_each_entry(rdev, &mddev->disks, same_set)
|
rdev_for_each(rdev, mddev)
|
||||||
if (rdev->bdev->bd_dev == dev)
|
if (rdev->bdev->bd_dev == dev)
|
||||||
return rdev;
|
return rdev;
|
||||||
|
|
||||||
|
@ -1342,7 +1342,7 @@ static void super_90_sync(struct mddev *mddev, struct md_rdev *rdev)
|
||||||
sb->state |= (1<<MD_SB_BITMAP_PRESENT);
|
sb->state |= (1<<MD_SB_BITMAP_PRESENT);
|
||||||
|
|
||||||
sb->disks[0].state = (1<<MD_DISK_REMOVED);
|
sb->disks[0].state = (1<<MD_DISK_REMOVED);
|
||||||
list_for_each_entry(rdev2, &mddev->disks, same_set) {
|
rdev_for_each(rdev2, mddev) {
|
||||||
mdp_disk_t *d;
|
mdp_disk_t *d;
|
||||||
int desc_nr;
|
int desc_nr;
|
||||||
int is_active = test_bit(In_sync, &rdev2->flags);
|
int is_active = test_bit(In_sync, &rdev2->flags);
|
||||||
|
@ -1805,18 +1805,18 @@ retry:
|
||||||
| BB_LEN(internal_bb));
|
| BB_LEN(internal_bb));
|
||||||
*bbp++ = cpu_to_le64(store_bb);
|
*bbp++ = cpu_to_le64(store_bb);
|
||||||
}
|
}
|
||||||
|
bb->changed = 0;
|
||||||
if (read_seqretry(&bb->lock, seq))
|
if (read_seqretry(&bb->lock, seq))
|
||||||
goto retry;
|
goto retry;
|
||||||
|
|
||||||
bb->sector = (rdev->sb_start +
|
bb->sector = (rdev->sb_start +
|
||||||
(int)le32_to_cpu(sb->bblog_offset));
|
(int)le32_to_cpu(sb->bblog_offset));
|
||||||
bb->size = le16_to_cpu(sb->bblog_size);
|
bb->size = le16_to_cpu(sb->bblog_size);
|
||||||
bb->changed = 0;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
max_dev = 0;
|
max_dev = 0;
|
||||||
list_for_each_entry(rdev2, &mddev->disks, same_set)
|
rdev_for_each(rdev2, mddev)
|
||||||
if (rdev2->desc_nr+1 > max_dev)
|
if (rdev2->desc_nr+1 > max_dev)
|
||||||
max_dev = rdev2->desc_nr+1;
|
max_dev = rdev2->desc_nr+1;
|
||||||
|
|
||||||
|
@ -1833,7 +1833,7 @@ retry:
|
||||||
for (i=0; i<max_dev;i++)
|
for (i=0; i<max_dev;i++)
|
||||||
sb->dev_roles[i] = cpu_to_le16(0xfffe);
|
sb->dev_roles[i] = cpu_to_le16(0xfffe);
|
||||||
|
|
||||||
list_for_each_entry(rdev2, &mddev->disks, same_set) {
|
rdev_for_each(rdev2, mddev) {
|
||||||
i = rdev2->desc_nr;
|
i = rdev2->desc_nr;
|
||||||
if (test_bit(Faulty, &rdev2->flags))
|
if (test_bit(Faulty, &rdev2->flags))
|
||||||
sb->dev_roles[i] = cpu_to_le16(0xfffe);
|
sb->dev_roles[i] = cpu_to_le16(0xfffe);
|
||||||
|
@ -1948,7 +1948,7 @@ int md_integrity_register(struct mddev *mddev)
|
||||||
return 0; /* nothing to do */
|
return 0; /* nothing to do */
|
||||||
if (!mddev->gendisk || blk_get_integrity(mddev->gendisk))
|
if (!mddev->gendisk || blk_get_integrity(mddev->gendisk))
|
||||||
return 0; /* shouldn't register, or already is */
|
return 0; /* shouldn't register, or already is */
|
||||||
list_for_each_entry(rdev, &mddev->disks, same_set) {
|
rdev_for_each(rdev, mddev) {
|
||||||
/* skip spares and non-functional disks */
|
/* skip spares and non-functional disks */
|
||||||
if (test_bit(Faulty, &rdev->flags))
|
if (test_bit(Faulty, &rdev->flags))
|
||||||
continue;
|
continue;
|
||||||
|
@ -2175,7 +2175,7 @@ static void export_array(struct mddev *mddev)
|
||||||
{
|
{
|
||||||
struct md_rdev *rdev, *tmp;
|
struct md_rdev *rdev, *tmp;
|
||||||
|
|
||||||
rdev_for_each(rdev, tmp, mddev) {
|
rdev_for_each_safe(rdev, tmp, mddev) {
|
||||||
if (!rdev->mddev) {
|
if (!rdev->mddev) {
|
||||||
MD_BUG();
|
MD_BUG();
|
||||||
continue;
|
continue;
|
||||||
|
@ -2307,11 +2307,11 @@ static void md_print_devices(void)
|
||||||
bitmap_print_sb(mddev->bitmap);
|
bitmap_print_sb(mddev->bitmap);
|
||||||
else
|
else
|
||||||
printk("%s: ", mdname(mddev));
|
printk("%s: ", mdname(mddev));
|
||||||
list_for_each_entry(rdev, &mddev->disks, same_set)
|
rdev_for_each(rdev, mddev)
|
||||||
printk("<%s>", bdevname(rdev->bdev,b));
|
printk("<%s>", bdevname(rdev->bdev,b));
|
||||||
printk("\n");
|
printk("\n");
|
||||||
|
|
||||||
list_for_each_entry(rdev, &mddev->disks, same_set)
|
rdev_for_each(rdev, mddev)
|
||||||
print_rdev(rdev, mddev->major_version);
|
print_rdev(rdev, mddev->major_version);
|
||||||
}
|
}
|
||||||
printk("md: **********************************\n");
|
printk("md: **********************************\n");
|
||||||
|
@ -2328,7 +2328,7 @@ static void sync_sbs(struct mddev * mddev, int nospares)
|
||||||
* with the rest of the array)
|
* with the rest of the array)
|
||||||
*/
|
*/
|
||||||
struct md_rdev *rdev;
|
struct md_rdev *rdev;
|
||||||
list_for_each_entry(rdev, &mddev->disks, same_set) {
|
rdev_for_each(rdev, mddev) {
|
||||||
if (rdev->sb_events == mddev->events ||
|
if (rdev->sb_events == mddev->events ||
|
||||||
(nospares &&
|
(nospares &&
|
||||||
rdev->raid_disk < 0 &&
|
rdev->raid_disk < 0 &&
|
||||||
|
@ -2351,7 +2351,7 @@ static void md_update_sb(struct mddev * mddev, int force_change)
|
||||||
|
|
||||||
repeat:
|
repeat:
|
||||||
/* First make sure individual recovery_offsets are correct */
|
/* First make sure individual recovery_offsets are correct */
|
||||||
list_for_each_entry(rdev, &mddev->disks, same_set) {
|
rdev_for_each(rdev, mddev) {
|
||||||
if (rdev->raid_disk >= 0 &&
|
if (rdev->raid_disk >= 0 &&
|
||||||
mddev->delta_disks >= 0 &&
|
mddev->delta_disks >= 0 &&
|
||||||
!test_bit(In_sync, &rdev->flags) &&
|
!test_bit(In_sync, &rdev->flags) &&
|
||||||
|
@ -2364,8 +2364,9 @@ repeat:
|
||||||
clear_bit(MD_CHANGE_DEVS, &mddev->flags);
|
clear_bit(MD_CHANGE_DEVS, &mddev->flags);
|
||||||
if (!mddev->external) {
|
if (!mddev->external) {
|
||||||
clear_bit(MD_CHANGE_PENDING, &mddev->flags);
|
clear_bit(MD_CHANGE_PENDING, &mddev->flags);
|
||||||
list_for_each_entry(rdev, &mddev->disks, same_set) {
|
rdev_for_each(rdev, mddev) {
|
||||||
if (rdev->badblocks.changed) {
|
if (rdev->badblocks.changed) {
|
||||||
|
rdev->badblocks.changed = 0;
|
||||||
md_ack_all_badblocks(&rdev->badblocks);
|
md_ack_all_badblocks(&rdev->badblocks);
|
||||||
md_error(mddev, rdev);
|
md_error(mddev, rdev);
|
||||||
}
|
}
|
||||||
|
@ -2430,7 +2431,7 @@ repeat:
|
||||||
mddev->events --;
|
mddev->events --;
|
||||||
}
|
}
|
||||||
|
|
||||||
list_for_each_entry(rdev, &mddev->disks, same_set) {
|
rdev_for_each(rdev, mddev) {
|
||||||
if (rdev->badblocks.changed)
|
if (rdev->badblocks.changed)
|
||||||
any_badblocks_changed++;
|
any_badblocks_changed++;
|
||||||
if (test_bit(Faulty, &rdev->flags))
|
if (test_bit(Faulty, &rdev->flags))
|
||||||
|
@ -2444,7 +2445,7 @@ repeat:
|
||||||
mdname(mddev), mddev->in_sync);
|
mdname(mddev), mddev->in_sync);
|
||||||
|
|
||||||
bitmap_update_sb(mddev->bitmap);
|
bitmap_update_sb(mddev->bitmap);
|
||||||
list_for_each_entry(rdev, &mddev->disks, same_set) {
|
rdev_for_each(rdev, mddev) {
|
||||||
char b[BDEVNAME_SIZE];
|
char b[BDEVNAME_SIZE];
|
||||||
|
|
||||||
if (rdev->sb_loaded != 1)
|
if (rdev->sb_loaded != 1)
|
||||||
|
@ -2493,7 +2494,7 @@ repeat:
|
||||||
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
|
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
|
||||||
sysfs_notify(&mddev->kobj, NULL, "sync_completed");
|
sysfs_notify(&mddev->kobj, NULL, "sync_completed");
|
||||||
|
|
||||||
list_for_each_entry(rdev, &mddev->disks, same_set) {
|
rdev_for_each(rdev, mddev) {
|
||||||
if (test_and_clear_bit(FaultRecorded, &rdev->flags))
|
if (test_and_clear_bit(FaultRecorded, &rdev->flags))
|
||||||
clear_bit(Blocked, &rdev->flags);
|
clear_bit(Blocked, &rdev->flags);
|
||||||
|
|
||||||
|
@ -2896,7 +2897,7 @@ rdev_size_store(struct md_rdev *rdev, const char *buf, size_t len)
|
||||||
struct md_rdev *rdev2;
|
struct md_rdev *rdev2;
|
||||||
|
|
||||||
mddev_lock(mddev);
|
mddev_lock(mddev);
|
||||||
list_for_each_entry(rdev2, &mddev->disks, same_set)
|
rdev_for_each(rdev2, mddev)
|
||||||
if (rdev->bdev == rdev2->bdev &&
|
if (rdev->bdev == rdev2->bdev &&
|
||||||
rdev != rdev2 &&
|
rdev != rdev2 &&
|
||||||
overlaps(rdev->data_offset, rdev->sectors,
|
overlaps(rdev->data_offset, rdev->sectors,
|
||||||
|
@ -3193,7 +3194,7 @@ static void analyze_sbs(struct mddev * mddev)
|
||||||
char b[BDEVNAME_SIZE];
|
char b[BDEVNAME_SIZE];
|
||||||
|
|
||||||
freshest = NULL;
|
freshest = NULL;
|
||||||
rdev_for_each(rdev, tmp, mddev)
|
rdev_for_each_safe(rdev, tmp, mddev)
|
||||||
switch (super_types[mddev->major_version].
|
switch (super_types[mddev->major_version].
|
||||||
load_super(rdev, freshest, mddev->minor_version)) {
|
load_super(rdev, freshest, mddev->minor_version)) {
|
||||||
case 1:
|
case 1:
|
||||||
|
@ -3214,7 +3215,7 @@ static void analyze_sbs(struct mddev * mddev)
|
||||||
validate_super(mddev, freshest);
|
validate_super(mddev, freshest);
|
||||||
|
|
||||||
i = 0;
|
i = 0;
|
||||||
rdev_for_each(rdev, tmp, mddev) {
|
rdev_for_each_safe(rdev, tmp, mddev) {
|
||||||
if (mddev->max_disks &&
|
if (mddev->max_disks &&
|
||||||
(rdev->desc_nr >= mddev->max_disks ||
|
(rdev->desc_nr >= mddev->max_disks ||
|
||||||
i > mddev->max_disks)) {
|
i > mddev->max_disks)) {
|
||||||
|
@ -3403,7 +3404,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
list_for_each_entry(rdev, &mddev->disks, same_set)
|
rdev_for_each(rdev, mddev)
|
||||||
rdev->new_raid_disk = rdev->raid_disk;
|
rdev->new_raid_disk = rdev->raid_disk;
|
||||||
|
|
||||||
/* ->takeover must set new_* and/or delta_disks
|
/* ->takeover must set new_* and/or delta_disks
|
||||||
|
@ -3456,7 +3457,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
|
||||||
mddev->safemode = 0;
|
mddev->safemode = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
list_for_each_entry(rdev, &mddev->disks, same_set) {
|
rdev_for_each(rdev, mddev) {
|
||||||
if (rdev->raid_disk < 0)
|
if (rdev->raid_disk < 0)
|
||||||
continue;
|
continue;
|
||||||
if (rdev->new_raid_disk >= mddev->raid_disks)
|
if (rdev->new_raid_disk >= mddev->raid_disks)
|
||||||
|
@ -3465,7 +3466,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
|
||||||
continue;
|
continue;
|
||||||
sysfs_unlink_rdev(mddev, rdev);
|
sysfs_unlink_rdev(mddev, rdev);
|
||||||
}
|
}
|
||||||
list_for_each_entry(rdev, &mddev->disks, same_set) {
|
rdev_for_each(rdev, mddev) {
|
||||||
if (rdev->raid_disk < 0)
|
if (rdev->raid_disk < 0)
|
||||||
continue;
|
continue;
|
||||||
if (rdev->new_raid_disk == rdev->raid_disk)
|
if (rdev->new_raid_disk == rdev->raid_disk)
|
||||||
|
@ -4796,7 +4797,7 @@ int md_run(struct mddev *mddev)
|
||||||
* the only valid external interface is through the md
|
* the only valid external interface is through the md
|
||||||
* device.
|
* device.
|
||||||
*/
|
*/
|
||||||
list_for_each_entry(rdev, &mddev->disks, same_set) {
|
rdev_for_each(rdev, mddev) {
|
||||||
if (test_bit(Faulty, &rdev->flags))
|
if (test_bit(Faulty, &rdev->flags))
|
||||||
continue;
|
continue;
|
||||||
sync_blockdev(rdev->bdev);
|
sync_blockdev(rdev->bdev);
|
||||||
|
@ -4867,8 +4868,8 @@ int md_run(struct mddev *mddev)
|
||||||
struct md_rdev *rdev2;
|
struct md_rdev *rdev2;
|
||||||
int warned = 0;
|
int warned = 0;
|
||||||
|
|
||||||
list_for_each_entry(rdev, &mddev->disks, same_set)
|
rdev_for_each(rdev, mddev)
|
||||||
list_for_each_entry(rdev2, &mddev->disks, same_set) {
|
rdev_for_each(rdev2, mddev) {
|
||||||
if (rdev < rdev2 &&
|
if (rdev < rdev2 &&
|
||||||
rdev->bdev->bd_contains ==
|
rdev->bdev->bd_contains ==
|
||||||
rdev2->bdev->bd_contains) {
|
rdev2->bdev->bd_contains) {
|
||||||
|
@ -4945,7 +4946,7 @@ int md_run(struct mddev *mddev)
|
||||||
mddev->in_sync = 1;
|
mddev->in_sync = 1;
|
||||||
smp_wmb();
|
smp_wmb();
|
||||||
mddev->ready = 1;
|
mddev->ready = 1;
|
||||||
list_for_each_entry(rdev, &mddev->disks, same_set)
|
rdev_for_each(rdev, mddev)
|
||||||
if (rdev->raid_disk >= 0)
|
if (rdev->raid_disk >= 0)
|
||||||
if (sysfs_link_rdev(mddev, rdev))
|
if (sysfs_link_rdev(mddev, rdev))
|
||||||
/* failure here is OK */;
|
/* failure here is OK */;
|
||||||
|
@ -5073,6 +5074,7 @@ static void md_clean(struct mddev *mddev)
|
||||||
mddev->changed = 0;
|
mddev->changed = 0;
|
||||||
mddev->degraded = 0;
|
mddev->degraded = 0;
|
||||||
mddev->safemode = 0;
|
mddev->safemode = 0;
|
||||||
|
mddev->merge_check_needed = 0;
|
||||||
mddev->bitmap_info.offset = 0;
|
mddev->bitmap_info.offset = 0;
|
||||||
mddev->bitmap_info.default_offset = 0;
|
mddev->bitmap_info.default_offset = 0;
|
||||||
mddev->bitmap_info.chunksize = 0;
|
mddev->bitmap_info.chunksize = 0;
|
||||||
|
@ -5175,7 +5177,7 @@ static int do_md_stop(struct mddev * mddev, int mode, int is_open)
|
||||||
/* tell userspace to handle 'inactive' */
|
/* tell userspace to handle 'inactive' */
|
||||||
sysfs_notify_dirent_safe(mddev->sysfs_state);
|
sysfs_notify_dirent_safe(mddev->sysfs_state);
|
||||||
|
|
||||||
list_for_each_entry(rdev, &mddev->disks, same_set)
|
rdev_for_each(rdev, mddev)
|
||||||
if (rdev->raid_disk >= 0)
|
if (rdev->raid_disk >= 0)
|
||||||
sysfs_unlink_rdev(mddev, rdev);
|
sysfs_unlink_rdev(mddev, rdev);
|
||||||
|
|
||||||
|
@ -5226,7 +5228,7 @@ static void autorun_array(struct mddev *mddev)
|
||||||
|
|
||||||
printk(KERN_INFO "md: running: ");
|
printk(KERN_INFO "md: running: ");
|
||||||
|
|
||||||
list_for_each_entry(rdev, &mddev->disks, same_set) {
|
rdev_for_each(rdev, mddev) {
|
||||||
char b[BDEVNAME_SIZE];
|
char b[BDEVNAME_SIZE];
|
||||||
printk("<%s>", bdevname(rdev->bdev,b));
|
printk("<%s>", bdevname(rdev->bdev,b));
|
||||||
}
|
}
|
||||||
|
@ -5356,7 +5358,7 @@ static int get_array_info(struct mddev * mddev, void __user * arg)
|
||||||
struct md_rdev *rdev;
|
struct md_rdev *rdev;
|
||||||
|
|
||||||
nr=working=insync=failed=spare=0;
|
nr=working=insync=failed=spare=0;
|
||||||
list_for_each_entry(rdev, &mddev->disks, same_set) {
|
rdev_for_each(rdev, mddev) {
|
||||||
nr++;
|
nr++;
|
||||||
if (test_bit(Faulty, &rdev->flags))
|
if (test_bit(Faulty, &rdev->flags))
|
||||||
failed++;
|
failed++;
|
||||||
|
@ -5923,7 +5925,7 @@ static int update_size(struct mddev *mddev, sector_t num_sectors)
|
||||||
* grow, and re-add.
|
* grow, and re-add.
|
||||||
*/
|
*/
|
||||||
return -EBUSY;
|
return -EBUSY;
|
||||||
list_for_each_entry(rdev, &mddev->disks, same_set) {
|
rdev_for_each(rdev, mddev) {
|
||||||
sector_t avail = rdev->sectors;
|
sector_t avail = rdev->sectors;
|
||||||
|
|
||||||
if (fit && (num_sectors == 0 || num_sectors > avail))
|
if (fit && (num_sectors == 0 || num_sectors > avail))
|
||||||
|
@ -6724,7 +6726,6 @@ static int md_seq_show(struct seq_file *seq, void *v)
|
||||||
struct mddev *mddev = v;
|
struct mddev *mddev = v;
|
||||||
sector_t sectors;
|
sector_t sectors;
|
||||||
struct md_rdev *rdev;
|
struct md_rdev *rdev;
|
||||||
struct bitmap *bitmap;
|
|
||||||
|
|
||||||
if (v == (void*)1) {
|
if (v == (void*)1) {
|
||||||
struct md_personality *pers;
|
struct md_personality *pers;
|
||||||
|
@ -6758,7 +6759,7 @@ static int md_seq_show(struct seq_file *seq, void *v)
|
||||||
}
|
}
|
||||||
|
|
||||||
sectors = 0;
|
sectors = 0;
|
||||||
list_for_each_entry(rdev, &mddev->disks, same_set) {
|
rdev_for_each(rdev, mddev) {
|
||||||
char b[BDEVNAME_SIZE];
|
char b[BDEVNAME_SIZE];
|
||||||
seq_printf(seq, " %s[%d]",
|
seq_printf(seq, " %s[%d]",
|
||||||
bdevname(rdev->bdev,b), rdev->desc_nr);
|
bdevname(rdev->bdev,b), rdev->desc_nr);
|
||||||
|
@ -6812,27 +6813,7 @@ static int md_seq_show(struct seq_file *seq, void *v)
|
||||||
} else
|
} else
|
||||||
seq_printf(seq, "\n ");
|
seq_printf(seq, "\n ");
|
||||||
|
|
||||||
if ((bitmap = mddev->bitmap)) {
|
bitmap_status(seq, mddev->bitmap);
|
||||||
unsigned long chunk_kb;
|
|
||||||
unsigned long flags;
|
|
||||||
spin_lock_irqsave(&bitmap->lock, flags);
|
|
||||||
chunk_kb = mddev->bitmap_info.chunksize >> 10;
|
|
||||||
seq_printf(seq, "bitmap: %lu/%lu pages [%luKB], "
|
|
||||||
"%lu%s chunk",
|
|
||||||
bitmap->pages - bitmap->missing_pages,
|
|
||||||
bitmap->pages,
|
|
||||||
(bitmap->pages - bitmap->missing_pages)
|
|
||||||
<< (PAGE_SHIFT - 10),
|
|
||||||
chunk_kb ? chunk_kb : mddev->bitmap_info.chunksize,
|
|
||||||
chunk_kb ? "KB" : "B");
|
|
||||||
if (bitmap->file) {
|
|
||||||
seq_printf(seq, ", file: ");
|
|
||||||
seq_path(seq, &bitmap->file->f_path, " \t\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
seq_printf(seq, "\n");
|
|
||||||
spin_unlock_irqrestore(&bitmap->lock, flags);
|
|
||||||
}
|
|
||||||
|
|
||||||
seq_printf(seq, "\n");
|
seq_printf(seq, "\n");
|
||||||
}
|
}
|
||||||
|
@ -7170,7 +7151,7 @@ void md_do_sync(struct mddev *mddev)
|
||||||
max_sectors = mddev->dev_sectors;
|
max_sectors = mddev->dev_sectors;
|
||||||
j = MaxSector;
|
j = MaxSector;
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
list_for_each_entry_rcu(rdev, &mddev->disks, same_set)
|
rdev_for_each_rcu(rdev, mddev)
|
||||||
if (rdev->raid_disk >= 0 &&
|
if (rdev->raid_disk >= 0 &&
|
||||||
!test_bit(Faulty, &rdev->flags) &&
|
!test_bit(Faulty, &rdev->flags) &&
|
||||||
!test_bit(In_sync, &rdev->flags) &&
|
!test_bit(In_sync, &rdev->flags) &&
|
||||||
|
@ -7342,7 +7323,7 @@ void md_do_sync(struct mddev *mddev)
|
||||||
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
|
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
|
||||||
mddev->curr_resync = MaxSector;
|
mddev->curr_resync = MaxSector;
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
list_for_each_entry_rcu(rdev, &mddev->disks, same_set)
|
rdev_for_each_rcu(rdev, mddev)
|
||||||
if (rdev->raid_disk >= 0 &&
|
if (rdev->raid_disk >= 0 &&
|
||||||
mddev->delta_disks >= 0 &&
|
mddev->delta_disks >= 0 &&
|
||||||
!test_bit(Faulty, &rdev->flags) &&
|
!test_bit(Faulty, &rdev->flags) &&
|
||||||
|
@ -7388,7 +7369,7 @@ static int remove_and_add_spares(struct mddev *mddev)
|
||||||
|
|
||||||
mddev->curr_resync_completed = 0;
|
mddev->curr_resync_completed = 0;
|
||||||
|
|
||||||
list_for_each_entry(rdev, &mddev->disks, same_set)
|
rdev_for_each(rdev, mddev)
|
||||||
if (rdev->raid_disk >= 0 &&
|
if (rdev->raid_disk >= 0 &&
|
||||||
!test_bit(Blocked, &rdev->flags) &&
|
!test_bit(Blocked, &rdev->flags) &&
|
||||||
(test_bit(Faulty, &rdev->flags) ||
|
(test_bit(Faulty, &rdev->flags) ||
|
||||||
|
@ -7406,7 +7387,7 @@ static int remove_and_add_spares(struct mddev *mddev)
|
||||||
"degraded");
|
"degraded");
|
||||||
|
|
||||||
|
|
||||||
list_for_each_entry(rdev, &mddev->disks, same_set) {
|
rdev_for_each(rdev, mddev) {
|
||||||
if (rdev->raid_disk >= 0 &&
|
if (rdev->raid_disk >= 0 &&
|
||||||
!test_bit(In_sync, &rdev->flags) &&
|
!test_bit(In_sync, &rdev->flags) &&
|
||||||
!test_bit(Faulty, &rdev->flags))
|
!test_bit(Faulty, &rdev->flags))
|
||||||
|
@ -7451,7 +7432,7 @@ static void reap_sync_thread(struct mddev *mddev)
|
||||||
* do the superblock for an incrementally recovered device
|
* do the superblock for an incrementally recovered device
|
||||||
* written out.
|
* written out.
|
||||||
*/
|
*/
|
||||||
list_for_each_entry(rdev, &mddev->disks, same_set)
|
rdev_for_each(rdev, mddev)
|
||||||
if (!mddev->degraded ||
|
if (!mddev->degraded ||
|
||||||
test_bit(In_sync, &rdev->flags))
|
test_bit(In_sync, &rdev->flags))
|
||||||
rdev->saved_raid_disk = -1;
|
rdev->saved_raid_disk = -1;
|
||||||
|
@ -7529,7 +7510,7 @@ void md_check_recovery(struct mddev *mddev)
|
||||||
* failed devices.
|
* failed devices.
|
||||||
*/
|
*/
|
||||||
struct md_rdev *rdev;
|
struct md_rdev *rdev;
|
||||||
list_for_each_entry(rdev, &mddev->disks, same_set)
|
rdev_for_each(rdev, mddev)
|
||||||
if (rdev->raid_disk >= 0 &&
|
if (rdev->raid_disk >= 0 &&
|
||||||
!test_bit(Blocked, &rdev->flags) &&
|
!test_bit(Blocked, &rdev->flags) &&
|
||||||
test_bit(Faulty, &rdev->flags) &&
|
test_bit(Faulty, &rdev->flags) &&
|
||||||
|
@ -8040,7 +8021,7 @@ void md_ack_all_badblocks(struct badblocks *bb)
|
||||||
return;
|
return;
|
||||||
write_seqlock_irq(&bb->lock);
|
write_seqlock_irq(&bb->lock);
|
||||||
|
|
||||||
if (bb->changed == 0) {
|
if (bb->changed == 0 && bb->unacked_exist) {
|
||||||
u64 *p = bb->page;
|
u64 *p = bb->page;
|
||||||
int i;
|
int i;
|
||||||
for (i = 0; i < bb->count ; i++) {
|
for (i = 0; i < bb->count ; i++) {
|
||||||
|
@ -8157,30 +8138,23 @@ static int md_notify_reboot(struct notifier_block *this,
|
||||||
struct mddev *mddev;
|
struct mddev *mddev;
|
||||||
int need_delay = 0;
|
int need_delay = 0;
|
||||||
|
|
||||||
if ((code == SYS_DOWN) || (code == SYS_HALT) || (code == SYS_POWER_OFF)) {
|
for_each_mddev(mddev, tmp) {
|
||||||
|
if (mddev_trylock(mddev)) {
|
||||||
printk(KERN_INFO "md: stopping all md devices.\n");
|
__md_stop_writes(mddev);
|
||||||
|
mddev->safemode = 2;
|
||||||
for_each_mddev(mddev, tmp) {
|
mddev_unlock(mddev);
|
||||||
if (mddev_trylock(mddev)) {
|
|
||||||
/* Force a switch to readonly even array
|
|
||||||
* appears to still be in use. Hence
|
|
||||||
* the '100'.
|
|
||||||
*/
|
|
||||||
md_set_readonly(mddev, 100);
|
|
||||||
mddev_unlock(mddev);
|
|
||||||
}
|
|
||||||
need_delay = 1;
|
|
||||||
}
|
}
|
||||||
/*
|
need_delay = 1;
|
||||||
* certain more exotic SCSI devices are known to be
|
|
||||||
* volatile wrt too early system reboots. While the
|
|
||||||
* right place to handle this issue is the given
|
|
||||||
* driver, we do want to have a safe RAID driver ...
|
|
||||||
*/
|
|
||||||
if (need_delay)
|
|
||||||
mdelay(1000*1);
|
|
||||||
}
|
}
|
||||||
|
/*
|
||||||
|
* certain more exotic SCSI devices are known to be
|
||||||
|
* volatile wrt too early system reboots. While the
|
||||||
|
* right place to handle this issue is the given
|
||||||
|
* driver, we do want to have a safe RAID driver ...
|
||||||
|
*/
|
||||||
|
if (need_delay)
|
||||||
|
mdelay(1000*1);
|
||||||
|
|
||||||
return NOTIFY_DONE;
|
return NOTIFY_DONE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -128,6 +128,10 @@ struct md_rdev {
|
||||||
enum flag_bits {
|
enum flag_bits {
|
||||||
Faulty, /* device is known to have a fault */
|
Faulty, /* device is known to have a fault */
|
||||||
In_sync, /* device is in_sync with rest of array */
|
In_sync, /* device is in_sync with rest of array */
|
||||||
|
Unmerged, /* device is being added to array and should
|
||||||
|
* be considerred for bvec_merge_fn but not
|
||||||
|
* yet for actual IO
|
||||||
|
*/
|
||||||
WriteMostly, /* Avoid reading if at all possible */
|
WriteMostly, /* Avoid reading if at all possible */
|
||||||
AutoDetected, /* added by auto-detect */
|
AutoDetected, /* added by auto-detect */
|
||||||
Blocked, /* An error occurred but has not yet
|
Blocked, /* An error occurred but has not yet
|
||||||
|
@ -345,6 +349,10 @@ struct mddev {
|
||||||
int degraded; /* whether md should consider
|
int degraded; /* whether md should consider
|
||||||
* adding a spare
|
* adding a spare
|
||||||
*/
|
*/
|
||||||
|
int merge_check_needed; /* at least one
|
||||||
|
* member device
|
||||||
|
* has a
|
||||||
|
* merge_bvec_fn */
|
||||||
|
|
||||||
atomic_t recovery_active; /* blocks scheduled, but not written */
|
atomic_t recovery_active; /* blocks scheduled, but not written */
|
||||||
wait_queue_head_t recovery_wait;
|
wait_queue_head_t recovery_wait;
|
||||||
|
@ -519,7 +527,10 @@ static inline void sysfs_unlink_rdev(struct mddev *mddev, struct md_rdev *rdev)
|
||||||
/*
|
/*
|
||||||
* iterates through the 'same array disks' ringlist
|
* iterates through the 'same array disks' ringlist
|
||||||
*/
|
*/
|
||||||
#define rdev_for_each(rdev, tmp, mddev) \
|
#define rdev_for_each(rdev, mddev) \
|
||||||
|
list_for_each_entry(rdev, &((mddev)->disks), same_set)
|
||||||
|
|
||||||
|
#define rdev_for_each_safe(rdev, tmp, mddev) \
|
||||||
list_for_each_entry_safe(rdev, tmp, &((mddev)->disks), same_set)
|
list_for_each_entry_safe(rdev, tmp, &((mddev)->disks), same_set)
|
||||||
|
|
||||||
#define rdev_for_each_rcu(rdev, mddev) \
|
#define rdev_for_each_rcu(rdev, mddev) \
|
||||||
|
|
|
@ -428,7 +428,7 @@ static int multipath_run (struct mddev *mddev)
|
||||||
}
|
}
|
||||||
|
|
||||||
working_disks = 0;
|
working_disks = 0;
|
||||||
list_for_each_entry(rdev, &mddev->disks, same_set) {
|
rdev_for_each(rdev, mddev) {
|
||||||
disk_idx = rdev->raid_disk;
|
disk_idx = rdev->raid_disk;
|
||||||
if (disk_idx < 0 ||
|
if (disk_idx < 0 ||
|
||||||
disk_idx >= mddev->raid_disks)
|
disk_idx >= mddev->raid_disks)
|
||||||
|
|
|
@ -91,7 +91,7 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
|
||||||
|
|
||||||
if (!conf)
|
if (!conf)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
list_for_each_entry(rdev1, &mddev->disks, same_set) {
|
rdev_for_each(rdev1, mddev) {
|
||||||
pr_debug("md/raid0:%s: looking at %s\n",
|
pr_debug("md/raid0:%s: looking at %s\n",
|
||||||
mdname(mddev),
|
mdname(mddev),
|
||||||
bdevname(rdev1->bdev, b));
|
bdevname(rdev1->bdev, b));
|
||||||
|
@ -102,7 +102,7 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
|
||||||
sector_div(sectors, mddev->chunk_sectors);
|
sector_div(sectors, mddev->chunk_sectors);
|
||||||
rdev1->sectors = sectors * mddev->chunk_sectors;
|
rdev1->sectors = sectors * mddev->chunk_sectors;
|
||||||
|
|
||||||
list_for_each_entry(rdev2, &mddev->disks, same_set) {
|
rdev_for_each(rdev2, mddev) {
|
||||||
pr_debug("md/raid0:%s: comparing %s(%llu)"
|
pr_debug("md/raid0:%s: comparing %s(%llu)"
|
||||||
" with %s(%llu)\n",
|
" with %s(%llu)\n",
|
||||||
mdname(mddev),
|
mdname(mddev),
|
||||||
|
@ -157,7 +157,7 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
|
||||||
smallest = NULL;
|
smallest = NULL;
|
||||||
dev = conf->devlist;
|
dev = conf->devlist;
|
||||||
err = -EINVAL;
|
err = -EINVAL;
|
||||||
list_for_each_entry(rdev1, &mddev->disks, same_set) {
|
rdev_for_each(rdev1, mddev) {
|
||||||
int j = rdev1->raid_disk;
|
int j = rdev1->raid_disk;
|
||||||
|
|
||||||
if (mddev->level == 10) {
|
if (mddev->level == 10) {
|
||||||
|
@ -188,16 +188,10 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
|
||||||
|
|
||||||
disk_stack_limits(mddev->gendisk, rdev1->bdev,
|
disk_stack_limits(mddev->gendisk, rdev1->bdev,
|
||||||
rdev1->data_offset << 9);
|
rdev1->data_offset << 9);
|
||||||
/* as we don't honour merge_bvec_fn, we must never risk
|
|
||||||
* violating it, so limit ->max_segments to 1, lying within
|
|
||||||
* a single page.
|
|
||||||
*/
|
|
||||||
|
|
||||||
if (rdev1->bdev->bd_disk->queue->merge_bvec_fn) {
|
if (rdev1->bdev->bd_disk->queue->merge_bvec_fn)
|
||||||
blk_queue_max_segments(mddev->queue, 1);
|
conf->has_merge_bvec = 1;
|
||||||
blk_queue_segment_boundary(mddev->queue,
|
|
||||||
PAGE_CACHE_SIZE - 1);
|
|
||||||
}
|
|
||||||
if (!smallest || (rdev1->sectors < smallest->sectors))
|
if (!smallest || (rdev1->sectors < smallest->sectors))
|
||||||
smallest = rdev1;
|
smallest = rdev1;
|
||||||
cnt++;
|
cnt++;
|
||||||
|
@ -290,8 +284,64 @@ abort:
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Find the zone which holds a particular offset
|
||||||
|
* Update *sectorp to be an offset in that zone
|
||||||
|
*/
|
||||||
|
static struct strip_zone *find_zone(struct r0conf *conf,
|
||||||
|
sector_t *sectorp)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
struct strip_zone *z = conf->strip_zone;
|
||||||
|
sector_t sector = *sectorp;
|
||||||
|
|
||||||
|
for (i = 0; i < conf->nr_strip_zones; i++)
|
||||||
|
if (sector < z[i].zone_end) {
|
||||||
|
if (i)
|
||||||
|
*sectorp = sector - z[i-1].zone_end;
|
||||||
|
return z + i;
|
||||||
|
}
|
||||||
|
BUG();
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* remaps the bio to the target device. we separate two flows.
|
||||||
|
* power 2 flow and a general flow for the sake of perfromance
|
||||||
|
*/
|
||||||
|
static struct md_rdev *map_sector(struct mddev *mddev, struct strip_zone *zone,
|
||||||
|
sector_t sector, sector_t *sector_offset)
|
||||||
|
{
|
||||||
|
unsigned int sect_in_chunk;
|
||||||
|
sector_t chunk;
|
||||||
|
struct r0conf *conf = mddev->private;
|
||||||
|
int raid_disks = conf->strip_zone[0].nb_dev;
|
||||||
|
unsigned int chunk_sects = mddev->chunk_sectors;
|
||||||
|
|
||||||
|
if (is_power_of_2(chunk_sects)) {
|
||||||
|
int chunksect_bits = ffz(~chunk_sects);
|
||||||
|
/* find the sector offset inside the chunk */
|
||||||
|
sect_in_chunk = sector & (chunk_sects - 1);
|
||||||
|
sector >>= chunksect_bits;
|
||||||
|
/* chunk in zone */
|
||||||
|
chunk = *sector_offset;
|
||||||
|
/* quotient is the chunk in real device*/
|
||||||
|
sector_div(chunk, zone->nb_dev << chunksect_bits);
|
||||||
|
} else{
|
||||||
|
sect_in_chunk = sector_div(sector, chunk_sects);
|
||||||
|
chunk = *sector_offset;
|
||||||
|
sector_div(chunk, chunk_sects * zone->nb_dev);
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* position the bio over the real device
|
||||||
|
* real sector = chunk in device + starting of zone
|
||||||
|
* + the position in the chunk
|
||||||
|
*/
|
||||||
|
*sector_offset = (chunk * chunk_sects) + sect_in_chunk;
|
||||||
|
return conf->devlist[(zone - conf->strip_zone)*raid_disks
|
||||||
|
+ sector_div(sector, zone->nb_dev)];
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* raid0_mergeable_bvec -- tell bio layer if a two requests can be merged
|
* raid0_mergeable_bvec -- tell bio layer if two requests can be merged
|
||||||
* @q: request queue
|
* @q: request queue
|
||||||
* @bvm: properties of new bio
|
* @bvm: properties of new bio
|
||||||
* @biovec: the request that could be merged to it.
|
* @biovec: the request that could be merged to it.
|
||||||
|
@ -303,10 +353,15 @@ static int raid0_mergeable_bvec(struct request_queue *q,
|
||||||
struct bio_vec *biovec)
|
struct bio_vec *biovec)
|
||||||
{
|
{
|
||||||
struct mddev *mddev = q->queuedata;
|
struct mddev *mddev = q->queuedata;
|
||||||
|
struct r0conf *conf = mddev->private;
|
||||||
sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
|
sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
|
||||||
|
sector_t sector_offset = sector;
|
||||||
int max;
|
int max;
|
||||||
unsigned int chunk_sectors = mddev->chunk_sectors;
|
unsigned int chunk_sectors = mddev->chunk_sectors;
|
||||||
unsigned int bio_sectors = bvm->bi_size >> 9;
|
unsigned int bio_sectors = bvm->bi_size >> 9;
|
||||||
|
struct strip_zone *zone;
|
||||||
|
struct md_rdev *rdev;
|
||||||
|
struct request_queue *subq;
|
||||||
|
|
||||||
if (is_power_of_2(chunk_sectors))
|
if (is_power_of_2(chunk_sectors))
|
||||||
max = (chunk_sectors - ((sector & (chunk_sectors-1))
|
max = (chunk_sectors - ((sector & (chunk_sectors-1))
|
||||||
|
@ -314,10 +369,27 @@ static int raid0_mergeable_bvec(struct request_queue *q,
|
||||||
else
|
else
|
||||||
max = (chunk_sectors - (sector_div(sector, chunk_sectors)
|
max = (chunk_sectors - (sector_div(sector, chunk_sectors)
|
||||||
+ bio_sectors)) << 9;
|
+ bio_sectors)) << 9;
|
||||||
if (max < 0) max = 0; /* bio_add cannot handle a negative return */
|
if (max < 0)
|
||||||
|
max = 0; /* bio_add cannot handle a negative return */
|
||||||
if (max <= biovec->bv_len && bio_sectors == 0)
|
if (max <= biovec->bv_len && bio_sectors == 0)
|
||||||
return biovec->bv_len;
|
return biovec->bv_len;
|
||||||
else
|
if (max < biovec->bv_len)
|
||||||
|
/* too small already, no need to check further */
|
||||||
|
return max;
|
||||||
|
if (!conf->has_merge_bvec)
|
||||||
|
return max;
|
||||||
|
|
||||||
|
/* May need to check subordinate device */
|
||||||
|
sector = sector_offset;
|
||||||
|
zone = find_zone(mddev->private, §or_offset);
|
||||||
|
rdev = map_sector(mddev, zone, sector, §or_offset);
|
||||||
|
subq = bdev_get_queue(rdev->bdev);
|
||||||
|
if (subq->merge_bvec_fn) {
|
||||||
|
bvm->bi_bdev = rdev->bdev;
|
||||||
|
bvm->bi_sector = sector_offset + zone->dev_start +
|
||||||
|
rdev->data_offset;
|
||||||
|
return min(max, subq->merge_bvec_fn(subq, bvm, biovec));
|
||||||
|
} else
|
||||||
return max;
|
return max;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -329,7 +401,7 @@ static sector_t raid0_size(struct mddev *mddev, sector_t sectors, int raid_disks
|
||||||
WARN_ONCE(sectors || raid_disks,
|
WARN_ONCE(sectors || raid_disks,
|
||||||
"%s does not support generic reshape\n", __func__);
|
"%s does not support generic reshape\n", __func__);
|
||||||
|
|
||||||
list_for_each_entry(rdev, &mddev->disks, same_set)
|
rdev_for_each(rdev, mddev)
|
||||||
array_sectors += rdev->sectors;
|
array_sectors += rdev->sectors;
|
||||||
|
|
||||||
return array_sectors;
|
return array_sectors;
|
||||||
|
@ -397,62 +469,6 @@ static int raid0_stop(struct mddev *mddev)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Find the zone which holds a particular offset
|
|
||||||
* Update *sectorp to be an offset in that zone
|
|
||||||
*/
|
|
||||||
static struct strip_zone *find_zone(struct r0conf *conf,
|
|
||||||
sector_t *sectorp)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
struct strip_zone *z = conf->strip_zone;
|
|
||||||
sector_t sector = *sectorp;
|
|
||||||
|
|
||||||
for (i = 0; i < conf->nr_strip_zones; i++)
|
|
||||||
if (sector < z[i].zone_end) {
|
|
||||||
if (i)
|
|
||||||
*sectorp = sector - z[i-1].zone_end;
|
|
||||||
return z + i;
|
|
||||||
}
|
|
||||||
BUG();
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* remaps the bio to the target device. we separate two flows.
|
|
||||||
* power 2 flow and a general flow for the sake of perfromance
|
|
||||||
*/
|
|
||||||
static struct md_rdev *map_sector(struct mddev *mddev, struct strip_zone *zone,
|
|
||||||
sector_t sector, sector_t *sector_offset)
|
|
||||||
{
|
|
||||||
unsigned int sect_in_chunk;
|
|
||||||
sector_t chunk;
|
|
||||||
struct r0conf *conf = mddev->private;
|
|
||||||
int raid_disks = conf->strip_zone[0].nb_dev;
|
|
||||||
unsigned int chunk_sects = mddev->chunk_sectors;
|
|
||||||
|
|
||||||
if (is_power_of_2(chunk_sects)) {
|
|
||||||
int chunksect_bits = ffz(~chunk_sects);
|
|
||||||
/* find the sector offset inside the chunk */
|
|
||||||
sect_in_chunk = sector & (chunk_sects - 1);
|
|
||||||
sector >>= chunksect_bits;
|
|
||||||
/* chunk in zone */
|
|
||||||
chunk = *sector_offset;
|
|
||||||
/* quotient is the chunk in real device*/
|
|
||||||
sector_div(chunk, zone->nb_dev << chunksect_bits);
|
|
||||||
} else{
|
|
||||||
sect_in_chunk = sector_div(sector, chunk_sects);
|
|
||||||
chunk = *sector_offset;
|
|
||||||
sector_div(chunk, chunk_sects * zone->nb_dev);
|
|
||||||
}
|
|
||||||
/*
|
|
||||||
* position the bio over the real device
|
|
||||||
* real sector = chunk in device + starting of zone
|
|
||||||
* + the position in the chunk
|
|
||||||
*/
|
|
||||||
*sector_offset = (chunk * chunk_sects) + sect_in_chunk;
|
|
||||||
return conf->devlist[(zone - conf->strip_zone)*raid_disks
|
|
||||||
+ sector_div(sector, zone->nb_dev)];
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Is io distribute over 1 or more chunks ?
|
* Is io distribute over 1 or more chunks ?
|
||||||
*/
|
*/
|
||||||
|
@ -505,7 +521,7 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio)
|
||||||
}
|
}
|
||||||
|
|
||||||
sector_offset = bio->bi_sector;
|
sector_offset = bio->bi_sector;
|
||||||
zone = find_zone(mddev->private, §or_offset);
|
zone = find_zone(mddev->private, §or_offset);
|
||||||
tmp_dev = map_sector(mddev, zone, bio->bi_sector,
|
tmp_dev = map_sector(mddev, zone, bio->bi_sector,
|
||||||
§or_offset);
|
§or_offset);
|
||||||
bio->bi_bdev = tmp_dev->bdev;
|
bio->bi_bdev = tmp_dev->bdev;
|
||||||
|
@ -543,7 +559,7 @@ static void *raid0_takeover_raid45(struct mddev *mddev)
|
||||||
return ERR_PTR(-EINVAL);
|
return ERR_PTR(-EINVAL);
|
||||||
}
|
}
|
||||||
|
|
||||||
list_for_each_entry(rdev, &mddev->disks, same_set) {
|
rdev_for_each(rdev, mddev) {
|
||||||
/* check slot number for a disk */
|
/* check slot number for a disk */
|
||||||
if (rdev->raid_disk == mddev->raid_disks-1) {
|
if (rdev->raid_disk == mddev->raid_disks-1) {
|
||||||
printk(KERN_ERR "md/raid0:%s: raid5 must have missing parity disk!\n",
|
printk(KERN_ERR "md/raid0:%s: raid5 must have missing parity disk!\n",
|
||||||
|
|
|
@ -4,13 +4,16 @@
|
||||||
struct strip_zone {
|
struct strip_zone {
|
||||||
sector_t zone_end; /* Start of the next zone (in sectors) */
|
sector_t zone_end; /* Start of the next zone (in sectors) */
|
||||||
sector_t dev_start; /* Zone offset in real dev (in sectors) */
|
sector_t dev_start; /* Zone offset in real dev (in sectors) */
|
||||||
int nb_dev; /* # of devices attached to the zone */
|
int nb_dev; /* # of devices attached to the zone */
|
||||||
};
|
};
|
||||||
|
|
||||||
struct r0conf {
|
struct r0conf {
|
||||||
struct strip_zone *strip_zone;
|
struct strip_zone *strip_zone;
|
||||||
struct md_rdev **devlist; /* lists of rdevs, pointed to by strip_zone->dev */
|
struct md_rdev **devlist; /* lists of rdevs, pointed to
|
||||||
int nr_strip_zones;
|
* by strip_zone->dev */
|
||||||
|
int nr_strip_zones;
|
||||||
|
int has_merge_bvec; /* at least one member has
|
||||||
|
* a merge_bvec_fn */
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -523,6 +523,7 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
|
||||||
rdev = rcu_dereference(conf->mirrors[disk].rdev);
|
rdev = rcu_dereference(conf->mirrors[disk].rdev);
|
||||||
if (r1_bio->bios[disk] == IO_BLOCKED
|
if (r1_bio->bios[disk] == IO_BLOCKED
|
||||||
|| rdev == NULL
|
|| rdev == NULL
|
||||||
|
|| test_bit(Unmerged, &rdev->flags)
|
||||||
|| test_bit(Faulty, &rdev->flags))
|
|| test_bit(Faulty, &rdev->flags))
|
||||||
continue;
|
continue;
|
||||||
if (!test_bit(In_sync, &rdev->flags) &&
|
if (!test_bit(In_sync, &rdev->flags) &&
|
||||||
|
@ -614,6 +615,39 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
|
||||||
return best_disk;
|
return best_disk;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int raid1_mergeable_bvec(struct request_queue *q,
|
||||||
|
struct bvec_merge_data *bvm,
|
||||||
|
struct bio_vec *biovec)
|
||||||
|
{
|
||||||
|
struct mddev *mddev = q->queuedata;
|
||||||
|
struct r1conf *conf = mddev->private;
|
||||||
|
sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
|
||||||
|
int max = biovec->bv_len;
|
||||||
|
|
||||||
|
if (mddev->merge_check_needed) {
|
||||||
|
int disk;
|
||||||
|
rcu_read_lock();
|
||||||
|
for (disk = 0; disk < conf->raid_disks * 2; disk++) {
|
||||||
|
struct md_rdev *rdev = rcu_dereference(
|
||||||
|
conf->mirrors[disk].rdev);
|
||||||
|
if (rdev && !test_bit(Faulty, &rdev->flags)) {
|
||||||
|
struct request_queue *q =
|
||||||
|
bdev_get_queue(rdev->bdev);
|
||||||
|
if (q->merge_bvec_fn) {
|
||||||
|
bvm->bi_sector = sector +
|
||||||
|
rdev->data_offset;
|
||||||
|
bvm->bi_bdev = rdev->bdev;
|
||||||
|
max = min(max, q->merge_bvec_fn(
|
||||||
|
q, bvm, biovec));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
rcu_read_unlock();
|
||||||
|
}
|
||||||
|
return max;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
int md_raid1_congested(struct mddev *mddev, int bits)
|
int md_raid1_congested(struct mddev *mddev, int bits)
|
||||||
{
|
{
|
||||||
struct r1conf *conf = mddev->private;
|
struct r1conf *conf = mddev->private;
|
||||||
|
@ -737,9 +771,22 @@ static void wait_barrier(struct r1conf *conf)
|
||||||
spin_lock_irq(&conf->resync_lock);
|
spin_lock_irq(&conf->resync_lock);
|
||||||
if (conf->barrier) {
|
if (conf->barrier) {
|
||||||
conf->nr_waiting++;
|
conf->nr_waiting++;
|
||||||
wait_event_lock_irq(conf->wait_barrier, !conf->barrier,
|
/* Wait for the barrier to drop.
|
||||||
|
* However if there are already pending
|
||||||
|
* requests (preventing the barrier from
|
||||||
|
* rising completely), and the
|
||||||
|
* pre-process bio queue isn't empty,
|
||||||
|
* then don't wait, as we need to empty
|
||||||
|
* that queue to get the nr_pending
|
||||||
|
* count down.
|
||||||
|
*/
|
||||||
|
wait_event_lock_irq(conf->wait_barrier,
|
||||||
|
!conf->barrier ||
|
||||||
|
(conf->nr_pending &&
|
||||||
|
current->bio_list &&
|
||||||
|
!bio_list_empty(current->bio_list)),
|
||||||
conf->resync_lock,
|
conf->resync_lock,
|
||||||
);
|
);
|
||||||
conf->nr_waiting--;
|
conf->nr_waiting--;
|
||||||
}
|
}
|
||||||
conf->nr_pending++;
|
conf->nr_pending++;
|
||||||
|
@ -1002,7 +1049,8 @@ read_again:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
r1_bio->bios[i] = NULL;
|
r1_bio->bios[i] = NULL;
|
||||||
if (!rdev || test_bit(Faulty, &rdev->flags)) {
|
if (!rdev || test_bit(Faulty, &rdev->flags)
|
||||||
|
|| test_bit(Unmerged, &rdev->flags)) {
|
||||||
if (i < conf->raid_disks)
|
if (i < conf->raid_disks)
|
||||||
set_bit(R1BIO_Degraded, &r1_bio->state);
|
set_bit(R1BIO_Degraded, &r1_bio->state);
|
||||||
continue;
|
continue;
|
||||||
|
@ -1322,6 +1370,7 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||||
struct mirror_info *p;
|
struct mirror_info *p;
|
||||||
int first = 0;
|
int first = 0;
|
||||||
int last = conf->raid_disks - 1;
|
int last = conf->raid_disks - 1;
|
||||||
|
struct request_queue *q = bdev_get_queue(rdev->bdev);
|
||||||
|
|
||||||
if (mddev->recovery_disabled == conf->recovery_disabled)
|
if (mddev->recovery_disabled == conf->recovery_disabled)
|
||||||
return -EBUSY;
|
return -EBUSY;
|
||||||
|
@ -1329,23 +1378,17 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||||
if (rdev->raid_disk >= 0)
|
if (rdev->raid_disk >= 0)
|
||||||
first = last = rdev->raid_disk;
|
first = last = rdev->raid_disk;
|
||||||
|
|
||||||
|
if (q->merge_bvec_fn) {
|
||||||
|
set_bit(Unmerged, &rdev->flags);
|
||||||
|
mddev->merge_check_needed = 1;
|
||||||
|
}
|
||||||
|
|
||||||
for (mirror = first; mirror <= last; mirror++) {
|
for (mirror = first; mirror <= last; mirror++) {
|
||||||
p = conf->mirrors+mirror;
|
p = conf->mirrors+mirror;
|
||||||
if (!p->rdev) {
|
if (!p->rdev) {
|
||||||
|
|
||||||
disk_stack_limits(mddev->gendisk, rdev->bdev,
|
disk_stack_limits(mddev->gendisk, rdev->bdev,
|
||||||
rdev->data_offset << 9);
|
rdev->data_offset << 9);
|
||||||
/* as we don't honour merge_bvec_fn, we must
|
|
||||||
* never risk violating it, so limit
|
|
||||||
* ->max_segments to one lying with a single
|
|
||||||
* page, as a one page request is never in
|
|
||||||
* violation.
|
|
||||||
*/
|
|
||||||
if (rdev->bdev->bd_disk->queue->merge_bvec_fn) {
|
|
||||||
blk_queue_max_segments(mddev->queue, 1);
|
|
||||||
blk_queue_segment_boundary(mddev->queue,
|
|
||||||
PAGE_CACHE_SIZE - 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
p->head_position = 0;
|
p->head_position = 0;
|
||||||
rdev->raid_disk = mirror;
|
rdev->raid_disk = mirror;
|
||||||
|
@ -1370,6 +1413,19 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (err == 0 && test_bit(Unmerged, &rdev->flags)) {
|
||||||
|
/* Some requests might not have seen this new
|
||||||
|
* merge_bvec_fn. We must wait for them to complete
|
||||||
|
* before merging the device fully.
|
||||||
|
* First we make sure any code which has tested
|
||||||
|
* our function has submitted the request, then
|
||||||
|
* we wait for all outstanding requests to complete.
|
||||||
|
*/
|
||||||
|
synchronize_sched();
|
||||||
|
raise_barrier(conf);
|
||||||
|
lower_barrier(conf);
|
||||||
|
clear_bit(Unmerged, &rdev->flags);
|
||||||
|
}
|
||||||
md_integrity_add_rdev(rdev, mddev);
|
md_integrity_add_rdev(rdev, mddev);
|
||||||
print_conf(conf);
|
print_conf(conf);
|
||||||
return err;
|
return err;
|
||||||
|
@ -2491,7 +2547,7 @@ static struct r1conf *setup_conf(struct mddev *mddev)
|
||||||
|
|
||||||
err = -EINVAL;
|
err = -EINVAL;
|
||||||
spin_lock_init(&conf->device_lock);
|
spin_lock_init(&conf->device_lock);
|
||||||
list_for_each_entry(rdev, &mddev->disks, same_set) {
|
rdev_for_each(rdev, mddev) {
|
||||||
int disk_idx = rdev->raid_disk;
|
int disk_idx = rdev->raid_disk;
|
||||||
if (disk_idx >= mddev->raid_disks
|
if (disk_idx >= mddev->raid_disks
|
||||||
|| disk_idx < 0)
|
|| disk_idx < 0)
|
||||||
|
@ -2609,20 +2665,11 @@ static int run(struct mddev *mddev)
|
||||||
if (IS_ERR(conf))
|
if (IS_ERR(conf))
|
||||||
return PTR_ERR(conf);
|
return PTR_ERR(conf);
|
||||||
|
|
||||||
list_for_each_entry(rdev, &mddev->disks, same_set) {
|
rdev_for_each(rdev, mddev) {
|
||||||
if (!mddev->gendisk)
|
if (!mddev->gendisk)
|
||||||
continue;
|
continue;
|
||||||
disk_stack_limits(mddev->gendisk, rdev->bdev,
|
disk_stack_limits(mddev->gendisk, rdev->bdev,
|
||||||
rdev->data_offset << 9);
|
rdev->data_offset << 9);
|
||||||
/* as we don't honour merge_bvec_fn, we must never risk
|
|
||||||
* violating it, so limit ->max_segments to 1 lying within
|
|
||||||
* a single page, as a one page request is never in violation.
|
|
||||||
*/
|
|
||||||
if (rdev->bdev->bd_disk->queue->merge_bvec_fn) {
|
|
||||||
blk_queue_max_segments(mddev->queue, 1);
|
|
||||||
blk_queue_segment_boundary(mddev->queue,
|
|
||||||
PAGE_CACHE_SIZE - 1);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
mddev->degraded = 0;
|
mddev->degraded = 0;
|
||||||
|
@ -2656,6 +2703,7 @@ static int run(struct mddev *mddev)
|
||||||
if (mddev->queue) {
|
if (mddev->queue) {
|
||||||
mddev->queue->backing_dev_info.congested_fn = raid1_congested;
|
mddev->queue->backing_dev_info.congested_fn = raid1_congested;
|
||||||
mddev->queue->backing_dev_info.congested_data = mddev;
|
mddev->queue->backing_dev_info.congested_data = mddev;
|
||||||
|
blk_queue_merge_bvec(mddev->queue, raid1_mergeable_bvec);
|
||||||
}
|
}
|
||||||
return md_integrity_register(mddev);
|
return md_integrity_register(mddev);
|
||||||
}
|
}
|
||||||
|
|
|
@ -586,25 +586,68 @@ static sector_t raid10_find_virt(struct r10conf *conf, sector_t sector, int dev)
|
||||||
* @biovec: the request that could be merged to it.
|
* @biovec: the request that could be merged to it.
|
||||||
*
|
*
|
||||||
* Return amount of bytes we can accept at this offset
|
* Return amount of bytes we can accept at this offset
|
||||||
* If near_copies == raid_disk, there are no striping issues,
|
* This requires checking for end-of-chunk if near_copies != raid_disks,
|
||||||
* but in that case, the function isn't called at all.
|
* and for subordinate merge_bvec_fns if merge_check_needed.
|
||||||
*/
|
*/
|
||||||
static int raid10_mergeable_bvec(struct request_queue *q,
|
static int raid10_mergeable_bvec(struct request_queue *q,
|
||||||
struct bvec_merge_data *bvm,
|
struct bvec_merge_data *bvm,
|
||||||
struct bio_vec *biovec)
|
struct bio_vec *biovec)
|
||||||
{
|
{
|
||||||
struct mddev *mddev = q->queuedata;
|
struct mddev *mddev = q->queuedata;
|
||||||
|
struct r10conf *conf = mddev->private;
|
||||||
sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
|
sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
|
||||||
int max;
|
int max;
|
||||||
unsigned int chunk_sectors = mddev->chunk_sectors;
|
unsigned int chunk_sectors = mddev->chunk_sectors;
|
||||||
unsigned int bio_sectors = bvm->bi_size >> 9;
|
unsigned int bio_sectors = bvm->bi_size >> 9;
|
||||||
|
|
||||||
max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9;
|
if (conf->near_copies < conf->raid_disks) {
|
||||||
if (max < 0) max = 0; /* bio_add cannot handle a negative return */
|
max = (chunk_sectors - ((sector & (chunk_sectors - 1))
|
||||||
if (max <= biovec->bv_len && bio_sectors == 0)
|
+ bio_sectors)) << 9;
|
||||||
return biovec->bv_len;
|
if (max < 0)
|
||||||
else
|
/* bio_add cannot handle a negative return */
|
||||||
return max;
|
max = 0;
|
||||||
|
if (max <= biovec->bv_len && bio_sectors == 0)
|
||||||
|
return biovec->bv_len;
|
||||||
|
} else
|
||||||
|
max = biovec->bv_len;
|
||||||
|
|
||||||
|
if (mddev->merge_check_needed) {
|
||||||
|
struct r10bio r10_bio;
|
||||||
|
int s;
|
||||||
|
r10_bio.sector = sector;
|
||||||
|
raid10_find_phys(conf, &r10_bio);
|
||||||
|
rcu_read_lock();
|
||||||
|
for (s = 0; s < conf->copies; s++) {
|
||||||
|
int disk = r10_bio.devs[s].devnum;
|
||||||
|
struct md_rdev *rdev = rcu_dereference(
|
||||||
|
conf->mirrors[disk].rdev);
|
||||||
|
if (rdev && !test_bit(Faulty, &rdev->flags)) {
|
||||||
|
struct request_queue *q =
|
||||||
|
bdev_get_queue(rdev->bdev);
|
||||||
|
if (q->merge_bvec_fn) {
|
||||||
|
bvm->bi_sector = r10_bio.devs[s].addr
|
||||||
|
+ rdev->data_offset;
|
||||||
|
bvm->bi_bdev = rdev->bdev;
|
||||||
|
max = min(max, q->merge_bvec_fn(
|
||||||
|
q, bvm, biovec));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
rdev = rcu_dereference(conf->mirrors[disk].replacement);
|
||||||
|
if (rdev && !test_bit(Faulty, &rdev->flags)) {
|
||||||
|
struct request_queue *q =
|
||||||
|
bdev_get_queue(rdev->bdev);
|
||||||
|
if (q->merge_bvec_fn) {
|
||||||
|
bvm->bi_sector = r10_bio.devs[s].addr
|
||||||
|
+ rdev->data_offset;
|
||||||
|
bvm->bi_bdev = rdev->bdev;
|
||||||
|
max = min(max, q->merge_bvec_fn(
|
||||||
|
q, bvm, biovec));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
rcu_read_unlock();
|
||||||
|
}
|
||||||
|
return max;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -668,11 +711,12 @@ retry:
|
||||||
disk = r10_bio->devs[slot].devnum;
|
disk = r10_bio->devs[slot].devnum;
|
||||||
rdev = rcu_dereference(conf->mirrors[disk].replacement);
|
rdev = rcu_dereference(conf->mirrors[disk].replacement);
|
||||||
if (rdev == NULL || test_bit(Faulty, &rdev->flags) ||
|
if (rdev == NULL || test_bit(Faulty, &rdev->flags) ||
|
||||||
|
test_bit(Unmerged, &rdev->flags) ||
|
||||||
r10_bio->devs[slot].addr + sectors > rdev->recovery_offset)
|
r10_bio->devs[slot].addr + sectors > rdev->recovery_offset)
|
||||||
rdev = rcu_dereference(conf->mirrors[disk].rdev);
|
rdev = rcu_dereference(conf->mirrors[disk].rdev);
|
||||||
if (rdev == NULL)
|
if (rdev == NULL ||
|
||||||
continue;
|
test_bit(Faulty, &rdev->flags) ||
|
||||||
if (test_bit(Faulty, &rdev->flags))
|
test_bit(Unmerged, &rdev->flags))
|
||||||
continue;
|
continue;
|
||||||
if (!test_bit(In_sync, &rdev->flags) &&
|
if (!test_bit(In_sync, &rdev->flags) &&
|
||||||
r10_bio->devs[slot].addr + sectors > rdev->recovery_offset)
|
r10_bio->devs[slot].addr + sectors > rdev->recovery_offset)
|
||||||
|
@ -863,9 +907,22 @@ static void wait_barrier(struct r10conf *conf)
|
||||||
spin_lock_irq(&conf->resync_lock);
|
spin_lock_irq(&conf->resync_lock);
|
||||||
if (conf->barrier) {
|
if (conf->barrier) {
|
||||||
conf->nr_waiting++;
|
conf->nr_waiting++;
|
||||||
wait_event_lock_irq(conf->wait_barrier, !conf->barrier,
|
/* Wait for the barrier to drop.
|
||||||
|
* However if there are already pending
|
||||||
|
* requests (preventing the barrier from
|
||||||
|
* rising completely), and the
|
||||||
|
* pre-process bio queue isn't empty,
|
||||||
|
* then don't wait, as we need to empty
|
||||||
|
* that queue to get the nr_pending
|
||||||
|
* count down.
|
||||||
|
*/
|
||||||
|
wait_event_lock_irq(conf->wait_barrier,
|
||||||
|
!conf->barrier ||
|
||||||
|
(conf->nr_pending &&
|
||||||
|
current->bio_list &&
|
||||||
|
!bio_list_empty(current->bio_list)),
|
||||||
conf->resync_lock,
|
conf->resync_lock,
|
||||||
);
|
);
|
||||||
conf->nr_waiting--;
|
conf->nr_waiting--;
|
||||||
}
|
}
|
||||||
conf->nr_pending++;
|
conf->nr_pending++;
|
||||||
|
@ -1121,12 +1178,14 @@ retry_write:
|
||||||
blocked_rdev = rrdev;
|
blocked_rdev = rrdev;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (rrdev && test_bit(Faulty, &rrdev->flags))
|
if (rrdev && (test_bit(Faulty, &rrdev->flags)
|
||||||
|
|| test_bit(Unmerged, &rrdev->flags)))
|
||||||
rrdev = NULL;
|
rrdev = NULL;
|
||||||
|
|
||||||
r10_bio->devs[i].bio = NULL;
|
r10_bio->devs[i].bio = NULL;
|
||||||
r10_bio->devs[i].repl_bio = NULL;
|
r10_bio->devs[i].repl_bio = NULL;
|
||||||
if (!rdev || test_bit(Faulty, &rdev->flags)) {
|
if (!rdev || test_bit(Faulty, &rdev->flags) ||
|
||||||
|
test_bit(Unmerged, &rdev->flags)) {
|
||||||
set_bit(R10BIO_Degraded, &r10_bio->state);
|
set_bit(R10BIO_Degraded, &r10_bio->state);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -1477,18 +1536,24 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||||
int mirror;
|
int mirror;
|
||||||
int first = 0;
|
int first = 0;
|
||||||
int last = conf->raid_disks - 1;
|
int last = conf->raid_disks - 1;
|
||||||
|
struct request_queue *q = bdev_get_queue(rdev->bdev);
|
||||||
|
|
||||||
if (mddev->recovery_cp < MaxSector)
|
if (mddev->recovery_cp < MaxSector)
|
||||||
/* only hot-add to in-sync arrays, as recovery is
|
/* only hot-add to in-sync arrays, as recovery is
|
||||||
* very different from resync
|
* very different from resync
|
||||||
*/
|
*/
|
||||||
return -EBUSY;
|
return -EBUSY;
|
||||||
if (!enough(conf, -1))
|
if (rdev->saved_raid_disk < 0 && !enough(conf, -1))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
if (rdev->raid_disk >= 0)
|
if (rdev->raid_disk >= 0)
|
||||||
first = last = rdev->raid_disk;
|
first = last = rdev->raid_disk;
|
||||||
|
|
||||||
|
if (q->merge_bvec_fn) {
|
||||||
|
set_bit(Unmerged, &rdev->flags);
|
||||||
|
mddev->merge_check_needed = 1;
|
||||||
|
}
|
||||||
|
|
||||||
if (rdev->saved_raid_disk >= first &&
|
if (rdev->saved_raid_disk >= first &&
|
||||||
conf->mirrors[rdev->saved_raid_disk].rdev == NULL)
|
conf->mirrors[rdev->saved_raid_disk].rdev == NULL)
|
||||||
mirror = rdev->saved_raid_disk;
|
mirror = rdev->saved_raid_disk;
|
||||||
|
@ -1508,11 +1573,6 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||||
err = 0;
|
err = 0;
|
||||||
disk_stack_limits(mddev->gendisk, rdev->bdev,
|
disk_stack_limits(mddev->gendisk, rdev->bdev,
|
||||||
rdev->data_offset << 9);
|
rdev->data_offset << 9);
|
||||||
if (rdev->bdev->bd_disk->queue->merge_bvec_fn) {
|
|
||||||
blk_queue_max_segments(mddev->queue, 1);
|
|
||||||
blk_queue_segment_boundary(mddev->queue,
|
|
||||||
PAGE_CACHE_SIZE - 1);
|
|
||||||
}
|
|
||||||
conf->fullsync = 1;
|
conf->fullsync = 1;
|
||||||
rcu_assign_pointer(p->replacement, rdev);
|
rcu_assign_pointer(p->replacement, rdev);
|
||||||
break;
|
break;
|
||||||
|
@ -1520,17 +1580,6 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||||
|
|
||||||
disk_stack_limits(mddev->gendisk, rdev->bdev,
|
disk_stack_limits(mddev->gendisk, rdev->bdev,
|
||||||
rdev->data_offset << 9);
|
rdev->data_offset << 9);
|
||||||
/* as we don't honour merge_bvec_fn, we must
|
|
||||||
* never risk violating it, so limit
|
|
||||||
* ->max_segments to one lying with a single
|
|
||||||
* page, as a one page request is never in
|
|
||||||
* violation.
|
|
||||||
*/
|
|
||||||
if (rdev->bdev->bd_disk->queue->merge_bvec_fn) {
|
|
||||||
blk_queue_max_segments(mddev->queue, 1);
|
|
||||||
blk_queue_segment_boundary(mddev->queue,
|
|
||||||
PAGE_CACHE_SIZE - 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
p->head_position = 0;
|
p->head_position = 0;
|
||||||
p->recovery_disabled = mddev->recovery_disabled - 1;
|
p->recovery_disabled = mddev->recovery_disabled - 1;
|
||||||
|
@ -1541,7 +1590,19 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||||
rcu_assign_pointer(p->rdev, rdev);
|
rcu_assign_pointer(p->rdev, rdev);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
if (err == 0 && test_bit(Unmerged, &rdev->flags)) {
|
||||||
|
/* Some requests might not have seen this new
|
||||||
|
* merge_bvec_fn. We must wait for them to complete
|
||||||
|
* before merging the device fully.
|
||||||
|
* First we make sure any code which has tested
|
||||||
|
* our function has submitted the request, then
|
||||||
|
* we wait for all outstanding requests to complete.
|
||||||
|
*/
|
||||||
|
synchronize_sched();
|
||||||
|
raise_barrier(conf, 0);
|
||||||
|
lower_barrier(conf);
|
||||||
|
clear_bit(Unmerged, &rdev->flags);
|
||||||
|
}
|
||||||
md_integrity_add_rdev(rdev, mddev);
|
md_integrity_add_rdev(rdev, mddev);
|
||||||
print_conf(conf);
|
print_conf(conf);
|
||||||
return err;
|
return err;
|
||||||
|
@ -1682,10 +1743,8 @@ static void end_sync_write(struct bio *bio, int error)
|
||||||
d = find_bio_disk(conf, r10_bio, bio, &slot, &repl);
|
d = find_bio_disk(conf, r10_bio, bio, &slot, &repl);
|
||||||
if (repl)
|
if (repl)
|
||||||
rdev = conf->mirrors[d].replacement;
|
rdev = conf->mirrors[d].replacement;
|
||||||
if (!rdev) {
|
else
|
||||||
smp_mb();
|
|
||||||
rdev = conf->mirrors[d].rdev;
|
rdev = conf->mirrors[d].rdev;
|
||||||
}
|
|
||||||
|
|
||||||
if (!uptodate) {
|
if (!uptodate) {
|
||||||
if (repl)
|
if (repl)
|
||||||
|
@ -2087,6 +2146,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
|
||||||
d = r10_bio->devs[sl].devnum;
|
d = r10_bio->devs[sl].devnum;
|
||||||
rdev = rcu_dereference(conf->mirrors[d].rdev);
|
rdev = rcu_dereference(conf->mirrors[d].rdev);
|
||||||
if (rdev &&
|
if (rdev &&
|
||||||
|
!test_bit(Unmerged, &rdev->flags) &&
|
||||||
test_bit(In_sync, &rdev->flags) &&
|
test_bit(In_sync, &rdev->flags) &&
|
||||||
is_badblock(rdev, r10_bio->devs[sl].addr + sect, s,
|
is_badblock(rdev, r10_bio->devs[sl].addr + sect, s,
|
||||||
&first_bad, &bad_sectors) == 0) {
|
&first_bad, &bad_sectors) == 0) {
|
||||||
|
@ -2140,6 +2200,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
|
||||||
d = r10_bio->devs[sl].devnum;
|
d = r10_bio->devs[sl].devnum;
|
||||||
rdev = rcu_dereference(conf->mirrors[d].rdev);
|
rdev = rcu_dereference(conf->mirrors[d].rdev);
|
||||||
if (!rdev ||
|
if (!rdev ||
|
||||||
|
test_bit(Unmerged, &rdev->flags) ||
|
||||||
!test_bit(In_sync, &rdev->flags))
|
!test_bit(In_sync, &rdev->flags))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
@ -3242,7 +3303,7 @@ static int run(struct mddev *mddev)
|
||||||
blk_queue_io_opt(mddev->queue, chunk_size *
|
blk_queue_io_opt(mddev->queue, chunk_size *
|
||||||
(conf->raid_disks / conf->near_copies));
|
(conf->raid_disks / conf->near_copies));
|
||||||
|
|
||||||
list_for_each_entry(rdev, &mddev->disks, same_set) {
|
rdev_for_each(rdev, mddev) {
|
||||||
|
|
||||||
disk_idx = rdev->raid_disk;
|
disk_idx = rdev->raid_disk;
|
||||||
if (disk_idx >= conf->raid_disks
|
if (disk_idx >= conf->raid_disks
|
||||||
|
@ -3262,15 +3323,6 @@ static int run(struct mddev *mddev)
|
||||||
|
|
||||||
disk_stack_limits(mddev->gendisk, rdev->bdev,
|
disk_stack_limits(mddev->gendisk, rdev->bdev,
|
||||||
rdev->data_offset << 9);
|
rdev->data_offset << 9);
|
||||||
/* as we don't honour merge_bvec_fn, we must never risk
|
|
||||||
* violating it, so limit max_segments to 1 lying
|
|
||||||
* within a single page.
|
|
||||||
*/
|
|
||||||
if (rdev->bdev->bd_disk->queue->merge_bvec_fn) {
|
|
||||||
blk_queue_max_segments(mddev->queue, 1);
|
|
||||||
blk_queue_segment_boundary(mddev->queue,
|
|
||||||
PAGE_CACHE_SIZE - 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
disk->head_position = 0;
|
disk->head_position = 0;
|
||||||
}
|
}
|
||||||
|
@ -3334,8 +3386,7 @@ static int run(struct mddev *mddev)
|
||||||
mddev->queue->backing_dev_info.ra_pages = 2* stripe;
|
mddev->queue->backing_dev_info.ra_pages = 2* stripe;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (conf->near_copies < conf->raid_disks)
|
blk_queue_merge_bvec(mddev->queue, raid10_mergeable_bvec);
|
||||||
blk_queue_merge_bvec(mddev->queue, raid10_mergeable_bvec);
|
|
||||||
|
|
||||||
if (md_integrity_register(mddev))
|
if (md_integrity_register(mddev))
|
||||||
goto out_free_conf;
|
goto out_free_conf;
|
||||||
|
@ -3385,6 +3436,43 @@ static void raid10_quiesce(struct mddev *mddev, int state)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int raid10_resize(struct mddev *mddev, sector_t sectors)
|
||||||
|
{
|
||||||
|
/* Resize of 'far' arrays is not supported.
|
||||||
|
* For 'near' and 'offset' arrays we can set the
|
||||||
|
* number of sectors used to be an appropriate multiple
|
||||||
|
* of the chunk size.
|
||||||
|
* For 'offset', this is far_copies*chunksize.
|
||||||
|
* For 'near' the multiplier is the LCM of
|
||||||
|
* near_copies and raid_disks.
|
||||||
|
* So if far_copies > 1 && !far_offset, fail.
|
||||||
|
* Else find LCM(raid_disks, near_copy)*far_copies and
|
||||||
|
* multiply by chunk_size. Then round to this number.
|
||||||
|
* This is mostly done by raid10_size()
|
||||||
|
*/
|
||||||
|
struct r10conf *conf = mddev->private;
|
||||||
|
sector_t oldsize, size;
|
||||||
|
|
||||||
|
if (conf->far_copies > 1 && !conf->far_offset)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
oldsize = raid10_size(mddev, 0, 0);
|
||||||
|
size = raid10_size(mddev, sectors, 0);
|
||||||
|
md_set_array_sectors(mddev, size);
|
||||||
|
if (mddev->array_sectors > size)
|
||||||
|
return -EINVAL;
|
||||||
|
set_capacity(mddev->gendisk, mddev->array_sectors);
|
||||||
|
revalidate_disk(mddev->gendisk);
|
||||||
|
if (sectors > mddev->dev_sectors &&
|
||||||
|
mddev->recovery_cp > oldsize) {
|
||||||
|
mddev->recovery_cp = oldsize;
|
||||||
|
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
||||||
|
}
|
||||||
|
mddev->dev_sectors = sectors;
|
||||||
|
mddev->resync_max_sectors = size;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static void *raid10_takeover_raid0(struct mddev *mddev)
|
static void *raid10_takeover_raid0(struct mddev *mddev)
|
||||||
{
|
{
|
||||||
struct md_rdev *rdev;
|
struct md_rdev *rdev;
|
||||||
|
@ -3408,7 +3496,7 @@ static void *raid10_takeover_raid0(struct mddev *mddev)
|
||||||
|
|
||||||
conf = setup_conf(mddev);
|
conf = setup_conf(mddev);
|
||||||
if (!IS_ERR(conf)) {
|
if (!IS_ERR(conf)) {
|
||||||
list_for_each_entry(rdev, &mddev->disks, same_set)
|
rdev_for_each(rdev, mddev)
|
||||||
if (rdev->raid_disk >= 0)
|
if (rdev->raid_disk >= 0)
|
||||||
rdev->new_raid_disk = rdev->raid_disk * 2;
|
rdev->new_raid_disk = rdev->raid_disk * 2;
|
||||||
conf->barrier = 1;
|
conf->barrier = 1;
|
||||||
|
@ -3454,6 +3542,7 @@ static struct md_personality raid10_personality =
|
||||||
.sync_request = sync_request,
|
.sync_request = sync_request,
|
||||||
.quiesce = raid10_quiesce,
|
.quiesce = raid10_quiesce,
|
||||||
.size = raid10_size,
|
.size = raid10_size,
|
||||||
|
.resize = raid10_resize,
|
||||||
.takeover = raid10_takeover,
|
.takeover = raid10_takeover,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -208,11 +208,10 @@ static void __release_stripe(struct r5conf *conf, struct stripe_head *sh)
|
||||||
md_wakeup_thread(conf->mddev->thread);
|
md_wakeup_thread(conf->mddev->thread);
|
||||||
} else {
|
} else {
|
||||||
BUG_ON(stripe_operations_active(sh));
|
BUG_ON(stripe_operations_active(sh));
|
||||||
if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
|
if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
|
||||||
atomic_dec(&conf->preread_active_stripes);
|
if (atomic_dec_return(&conf->preread_active_stripes)
|
||||||
if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD)
|
< IO_THRESHOLD)
|
||||||
md_wakeup_thread(conf->mddev->thread);
|
md_wakeup_thread(conf->mddev->thread);
|
||||||
}
|
|
||||||
atomic_dec(&conf->active_stripes);
|
atomic_dec(&conf->active_stripes);
|
||||||
if (!test_bit(STRIPE_EXPANDING, &sh->state)) {
|
if (!test_bit(STRIPE_EXPANDING, &sh->state)) {
|
||||||
list_add_tail(&sh->lru, &conf->inactive_list);
|
list_add_tail(&sh->lru, &conf->inactive_list);
|
||||||
|
@ -4843,7 +4842,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
|
||||||
|
|
||||||
pr_debug("raid456: run(%s) called.\n", mdname(mddev));
|
pr_debug("raid456: run(%s) called.\n", mdname(mddev));
|
||||||
|
|
||||||
list_for_each_entry(rdev, &mddev->disks, same_set) {
|
rdev_for_each(rdev, mddev) {
|
||||||
raid_disk = rdev->raid_disk;
|
raid_disk = rdev->raid_disk;
|
||||||
if (raid_disk >= max_disks
|
if (raid_disk >= max_disks
|
||||||
|| raid_disk < 0)
|
|| raid_disk < 0)
|
||||||
|
@ -5178,7 +5177,7 @@ static int run(struct mddev *mddev)
|
||||||
blk_queue_io_opt(mddev->queue, chunk_size *
|
blk_queue_io_opt(mddev->queue, chunk_size *
|
||||||
(conf->raid_disks - conf->max_degraded));
|
(conf->raid_disks - conf->max_degraded));
|
||||||
|
|
||||||
list_for_each_entry(rdev, &mddev->disks, same_set)
|
rdev_for_each(rdev, mddev)
|
||||||
disk_stack_limits(mddev->gendisk, rdev->bdev,
|
disk_stack_limits(mddev->gendisk, rdev->bdev,
|
||||||
rdev->data_offset << 9);
|
rdev->data_offset << 9);
|
||||||
}
|
}
|
||||||
|
@ -5362,7 +5361,7 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||||
if (mddev->recovery_disabled == conf->recovery_disabled)
|
if (mddev->recovery_disabled == conf->recovery_disabled)
|
||||||
return -EBUSY;
|
return -EBUSY;
|
||||||
|
|
||||||
if (has_failed(conf))
|
if (rdev->saved_raid_disk < 0 && has_failed(conf))
|
||||||
/* no point adding a device */
|
/* no point adding a device */
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
|
@ -5501,7 +5500,7 @@ static int raid5_start_reshape(struct mddev *mddev)
|
||||||
if (!check_stripe_cache(mddev))
|
if (!check_stripe_cache(mddev))
|
||||||
return -ENOSPC;
|
return -ENOSPC;
|
||||||
|
|
||||||
list_for_each_entry(rdev, &mddev->disks, same_set)
|
rdev_for_each(rdev, mddev)
|
||||||
if (!test_bit(In_sync, &rdev->flags)
|
if (!test_bit(In_sync, &rdev->flags)
|
||||||
&& !test_bit(Faulty, &rdev->flags))
|
&& !test_bit(Faulty, &rdev->flags))
|
||||||
spares++;
|
spares++;
|
||||||
|
@ -5547,16 +5546,14 @@ static int raid5_start_reshape(struct mddev *mddev)
|
||||||
* such devices during the reshape and confusion could result.
|
* such devices during the reshape and confusion could result.
|
||||||
*/
|
*/
|
||||||
if (mddev->delta_disks >= 0) {
|
if (mddev->delta_disks >= 0) {
|
||||||
int added_devices = 0;
|
rdev_for_each(rdev, mddev)
|
||||||
list_for_each_entry(rdev, &mddev->disks, same_set)
|
|
||||||
if (rdev->raid_disk < 0 &&
|
if (rdev->raid_disk < 0 &&
|
||||||
!test_bit(Faulty, &rdev->flags)) {
|
!test_bit(Faulty, &rdev->flags)) {
|
||||||
if (raid5_add_disk(mddev, rdev) == 0) {
|
if (raid5_add_disk(mddev, rdev) == 0) {
|
||||||
if (rdev->raid_disk
|
if (rdev->raid_disk
|
||||||
>= conf->previous_raid_disks) {
|
>= conf->previous_raid_disks)
|
||||||
set_bit(In_sync, &rdev->flags);
|
set_bit(In_sync, &rdev->flags);
|
||||||
added_devices++;
|
else
|
||||||
} else
|
|
||||||
rdev->recovery_offset = 0;
|
rdev->recovery_offset = 0;
|
||||||
|
|
||||||
if (sysfs_link_rdev(mddev, rdev))
|
if (sysfs_link_rdev(mddev, rdev))
|
||||||
|
@ -5566,7 +5563,6 @@ static int raid5_start_reshape(struct mddev *mddev)
|
||||||
&& !test_bit(Faulty, &rdev->flags)) {
|
&& !test_bit(Faulty, &rdev->flags)) {
|
||||||
/* This is a spare that was manually added */
|
/* This is a spare that was manually added */
|
||||||
set_bit(In_sync, &rdev->flags);
|
set_bit(In_sync, &rdev->flags);
|
||||||
added_devices++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* When a reshape changes the number of devices,
|
/* When a reshape changes the number of devices,
|
||||||
|
@ -5592,6 +5588,7 @@ static int raid5_start_reshape(struct mddev *mddev)
|
||||||
spin_lock_irq(&conf->device_lock);
|
spin_lock_irq(&conf->device_lock);
|
||||||
mddev->raid_disks = conf->raid_disks = conf->previous_raid_disks;
|
mddev->raid_disks = conf->raid_disks = conf->previous_raid_disks;
|
||||||
conf->reshape_progress = MaxSector;
|
conf->reshape_progress = MaxSector;
|
||||||
|
mddev->reshape_position = MaxSector;
|
||||||
spin_unlock_irq(&conf->device_lock);
|
spin_unlock_irq(&conf->device_lock);
|
||||||
return -EAGAIN;
|
return -EAGAIN;
|
||||||
}
|
}
|
||||||
|
|
|
@ -281,6 +281,10 @@ struct mdp_superblock_1 {
|
||||||
* active device with same 'role'.
|
* active device with same 'role'.
|
||||||
* 'recovery_offset' is also set.
|
* 'recovery_offset' is also set.
|
||||||
*/
|
*/
|
||||||
#define MD_FEATURE_ALL (1|2|4|8|16)
|
#define MD_FEATURE_ALL (MD_FEATURE_BITMAP_OFFSET \
|
||||||
|
|MD_FEATURE_RECOVERY_OFFSET \
|
||||||
|
|MD_FEATURE_RESHAPE_ACTIVE \
|
||||||
|
|MD_FEATURE_BAD_BLOCKS \
|
||||||
|
|MD_FEATURE_REPLACEMENT)
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
Loading…
Reference in a new issue