Merge branch 'for-linus' of git://neil.brown.name/md
* 'for-linus' of git://neil.brown.name/md: (27 commits) md: add 'recovery_start' per-device sysfs attribute md: rcu_read_lock() walk of mddev->disks in md_do_sync() md: integrate spares into array at earliest opportunity. md: move compat_ioctl handling into md.c md: revise Kconfig help for MD_MULTIPATH md: add MODULE_DESCRIPTION for all md related modules. raid: improve MD/raid10 handling of correctable read errors. md/raid10: print more useful messages on device failure. md/bitmap: update dirty flag when bitmap bits are explicitly set. md: Support write-intent bitmaps with externally managed metadata. md/bitmap: move setting of daemon_lastrun out of bitmap_read_sb md: support updating bitmap parameters via sysfs. md: factor out parsing of fixed-point numbers md: support bitmap offset appropriate for external-metadata arrays. md: remove needless setting of thread->timeout in raid10_quiesce md: change daemon_sleep to be in 'jiffies' rather than 'seconds'. md: move offset, daemon_sleep and chunksize out of bitmap structure md: collect bitmap-specific fields into one structure. md/raid1: add takeover support for raid5->raid1 md: add honouring of suspend_{lo,hi} to raid1. ...
This commit is contained in:
commit
37222e1c9e
17 changed files with 1148 additions and 317 deletions
|
@ -233,9 +233,9 @@ All md devices contain:
|
|||
|
||||
resync_start
|
||||
The point at which resync should start. If no resync is needed,
|
||||
this will be a very large number. At array creation it will
|
||||
default to 0, though starting the array as 'clean' will
|
||||
set it much larger.
|
||||
this will be a very large number (or 'none' since 2.6.30-rc1). At
|
||||
array creation it will default to 0, though starting the array as
|
||||
'clean' will set it much larger.
|
||||
|
||||
new_dev
|
||||
This file can be written but not read. The value written should
|
||||
|
@ -296,6 +296,51 @@ All md devices contain:
|
|||
active-idle
|
||||
like active, but no writes have been seen for a while (safe_mode_delay).
|
||||
|
||||
bitmap/location
|
||||
This indicates where the write-intent bitmap for the array is
|
||||
stored.
|
||||
It can be one of "none", "file" or "[+-]N".
|
||||
"file" may later be extended to "file:/file/name"
|
||||
"[+-]N" means that many sectors from the start of the metadata.
|
||||
This is replicated on all devices. For arrays with externally
|
||||
managed metadata, the offset is from the beginning of the
|
||||
device.
|
||||
bitmap/chunksize
|
||||
The size, in bytes, of the chunk which will be represented by a
|
||||
single bit. For RAID456, it is a portion of an individual
|
||||
device. For RAID10, it is a portion of the array. For RAID1, it
|
||||
is both (they come to the same thing).
|
||||
bitmap/time_base
|
||||
The time, in seconds, between looking for bits in the bitmap to
|
||||
be cleared. In the current implementation, a bit will be cleared
|
||||
between 2 and 3 times "time_base" after all the covered blocks
|
||||
are known to be in-sync.
|
||||
bitmap/backlog
|
||||
When write-mostly devices are active in a RAID1, write requests
|
||||
to those devices proceed in the background - the filesystem (or
|
||||
other user of the device) does not have to wait for them.
|
||||
'backlog' sets a limit on the number of concurrent background
|
||||
writes. If there are more than this, new writes will by
|
||||
synchronous.
|
||||
bitmap/metadata
|
||||
This can be either 'internal' or 'external'.
|
||||
'internal' is the default and means the metadata for the bitmap
|
||||
is stored in the first 256 bytes of the allocated space and is
|
||||
managed by the md module.
|
||||
'external' means that bitmap metadata is managed externally to
|
||||
the kernel (i.e. by some userspace program)
|
||||
bitmap/can_clear
|
||||
This is either 'true' or 'false'. If 'true', then bits in the
|
||||
bitmap will be cleared when the corresponding blocks are thought
|
||||
to be in-sync. If 'false', bits will never be cleared.
|
||||
This is automatically set to 'false' if a write happens on a
|
||||
degraded array, or if the array becomes degraded during a write.
|
||||
When metadata is managed externally, it should be set to true
|
||||
once the array becomes non-degraded, and this fact has been
|
||||
recorded in the metadata.
|
||||
|
||||
|
||||
|
||||
|
||||
As component devices are added to an md array, they appear in the 'md'
|
||||
directory as new directories named
|
||||
|
@ -334,8 +379,9 @@ Each directory contains:
|
|||
Writing "writemostly" sets the writemostly flag.
|
||||
Writing "-writemostly" clears the writemostly flag.
|
||||
Writing "blocked" sets the "blocked" flag.
|
||||
Writing "-blocked" clear the "blocked" flag and allows writes
|
||||
Writing "-blocked" clears the "blocked" flag and allows writes
|
||||
to complete.
|
||||
Writing "in_sync" sets the in_sync flag.
|
||||
|
||||
This file responds to select/poll. Any change to 'faulty'
|
||||
or 'blocked' causes an event.
|
||||
|
@ -372,6 +418,24 @@ Each directory contains:
|
|||
array. If a value less than the current component_size is
|
||||
written, it will be rejected.
|
||||
|
||||
recovery_start
|
||||
|
||||
When the device is not 'in_sync', this records the number of
|
||||
sectors from the start of the device which are known to be
|
||||
correct. This is normally zero, but during a recovery
|
||||
operation is will steadily increase, and if the recovery is
|
||||
interrupted, restoring this value can cause recovery to
|
||||
avoid repeating the earlier blocks. With v1.x metadata, this
|
||||
value is saved and restored automatically.
|
||||
|
||||
This can be set whenever the device is not an active member of
|
||||
the array, either before the array is activated, or before
|
||||
the 'slot' is set.
|
||||
|
||||
Setting this to 'none' is equivalent to setting 'in_sync'.
|
||||
Setting to any other value also clears the 'in_sync' flag.
|
||||
|
||||
|
||||
|
||||
An active md device will also contain and entry for each active device
|
||||
in the array. These are named
|
||||
|
|
|
@ -185,11 +185,10 @@ config MD_MULTIPATH
|
|||
tristate "Multipath I/O support"
|
||||
depends on BLK_DEV_MD
|
||||
help
|
||||
Multipath-IO is the ability of certain devices to address the same
|
||||
physical disk over multiple 'IO paths'. The code ensures that such
|
||||
paths can be defined and handled at runtime, and ensures that a
|
||||
transparent failover to the backup path(s) happens if a IO errors
|
||||
arrives on the primary path.
|
||||
MD_MULTIPATH provides a simple multi-path personality for use
|
||||
the MD framework. It is not under active development. New
|
||||
projects should consider using DM_MULTIPATH which has more
|
||||
features and more testing.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
|
|
|
@ -212,7 +212,7 @@ static void bitmap_checkfree(struct bitmap *bitmap, unsigned long page)
|
|||
*/
|
||||
|
||||
/* IO operations when bitmap is stored near all superblocks */
|
||||
static struct page *read_sb_page(mddev_t *mddev, long offset,
|
||||
static struct page *read_sb_page(mddev_t *mddev, loff_t offset,
|
||||
struct page *page,
|
||||
unsigned long index, int size)
|
||||
{
|
||||
|
@ -287,27 +287,36 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
|
|||
|
||||
while ((rdev = next_active_rdev(rdev, mddev)) != NULL) {
|
||||
int size = PAGE_SIZE;
|
||||
loff_t offset = mddev->bitmap_info.offset;
|
||||
if (page->index == bitmap->file_pages-1)
|
||||
size = roundup(bitmap->last_page_size,
|
||||
bdev_logical_block_size(rdev->bdev));
|
||||
/* Just make sure we aren't corrupting data or
|
||||
* metadata
|
||||
*/
|
||||
if (bitmap->offset < 0) {
|
||||
if (mddev->external) {
|
||||
/* Bitmap could be anywhere. */
|
||||
if (rdev->sb_start + offset + (page->index *(PAGE_SIZE/512)) >
|
||||
rdev->data_offset &&
|
||||
rdev->sb_start + offset <
|
||||
rdev->data_offset + mddev->dev_sectors +
|
||||
(PAGE_SIZE/512))
|
||||
goto bad_alignment;
|
||||
} else if (offset < 0) {
|
||||
/* DATA BITMAP METADATA */
|
||||
if (bitmap->offset
|
||||
if (offset
|
||||
+ (long)(page->index * (PAGE_SIZE/512))
|
||||
+ size/512 > 0)
|
||||
/* bitmap runs in to metadata */
|
||||
goto bad_alignment;
|
||||
if (rdev->data_offset + mddev->dev_sectors
|
||||
> rdev->sb_start + bitmap->offset)
|
||||
> rdev->sb_start + offset)
|
||||
/* data runs in to bitmap */
|
||||
goto bad_alignment;
|
||||
} else if (rdev->sb_start < rdev->data_offset) {
|
||||
/* METADATA BITMAP DATA */
|
||||
if (rdev->sb_start
|
||||
+ bitmap->offset
|
||||
+ offset
|
||||
+ page->index*(PAGE_SIZE/512) + size/512
|
||||
> rdev->data_offset)
|
||||
/* bitmap runs in to data */
|
||||
|
@ -316,7 +325,7 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
|
|||
/* DATA METADATA BITMAP - no problems */
|
||||
}
|
||||
md_super_write(mddev, rdev,
|
||||
rdev->sb_start + bitmap->offset
|
||||
rdev->sb_start + offset
|
||||
+ page->index * (PAGE_SIZE/512),
|
||||
size,
|
||||
page);
|
||||
|
@ -488,6 +497,8 @@ void bitmap_update_sb(struct bitmap *bitmap)
|
|||
|
||||
if (!bitmap || !bitmap->mddev) /* no bitmap for this array */
|
||||
return;
|
||||
if (bitmap->mddev->bitmap_info.external)
|
||||
return;
|
||||
spin_lock_irqsave(&bitmap->lock, flags);
|
||||
if (!bitmap->sb_page) { /* no superblock */
|
||||
spin_unlock_irqrestore(&bitmap->lock, flags);
|
||||
|
@ -501,6 +512,9 @@ void bitmap_update_sb(struct bitmap *bitmap)
|
|||
bitmap->events_cleared = bitmap->mddev->events;
|
||||
sb->events_cleared = cpu_to_le64(bitmap->events_cleared);
|
||||
}
|
||||
/* Just in case these have been changed via sysfs: */
|
||||
sb->daemon_sleep = cpu_to_le32(bitmap->mddev->bitmap_info.daemon_sleep/HZ);
|
||||
sb->write_behind = cpu_to_le32(bitmap->mddev->bitmap_info.max_write_behind);
|
||||
kunmap_atomic(sb, KM_USER0);
|
||||
write_page(bitmap, bitmap->sb_page, 1);
|
||||
}
|
||||
|
@ -550,7 +564,8 @@ static int bitmap_read_sb(struct bitmap *bitmap)
|
|||
|
||||
bitmap->sb_page = read_page(bitmap->file, 0, bitmap, bytes);
|
||||
} else {
|
||||
bitmap->sb_page = read_sb_page(bitmap->mddev, bitmap->offset,
|
||||
bitmap->sb_page = read_sb_page(bitmap->mddev,
|
||||
bitmap->mddev->bitmap_info.offset,
|
||||
NULL,
|
||||
0, sizeof(bitmap_super_t));
|
||||
}
|
||||
|
@ -563,7 +578,7 @@ static int bitmap_read_sb(struct bitmap *bitmap)
|
|||
sb = (bitmap_super_t *)kmap_atomic(bitmap->sb_page, KM_USER0);
|
||||
|
||||
chunksize = le32_to_cpu(sb->chunksize);
|
||||
daemon_sleep = le32_to_cpu(sb->daemon_sleep);
|
||||
daemon_sleep = le32_to_cpu(sb->daemon_sleep) * HZ;
|
||||
write_behind = le32_to_cpu(sb->write_behind);
|
||||
|
||||
/* verify that the bitmap-specific fields are valid */
|
||||
|
@ -576,7 +591,7 @@ static int bitmap_read_sb(struct bitmap *bitmap)
|
|||
reason = "bitmap chunksize too small";
|
||||
else if ((1 << ffz(~chunksize)) != chunksize)
|
||||
reason = "bitmap chunksize not a power of 2";
|
||||
else if (daemon_sleep < 1 || daemon_sleep > MAX_SCHEDULE_TIMEOUT / HZ)
|
||||
else if (daemon_sleep < 1 || daemon_sleep > MAX_SCHEDULE_TIMEOUT)
|
||||
reason = "daemon sleep period out of range";
|
||||
else if (write_behind > COUNTER_MAX)
|
||||
reason = "write-behind limit out of range (0 - 16383)";
|
||||
|
@ -610,10 +625,9 @@ static int bitmap_read_sb(struct bitmap *bitmap)
|
|||
}
|
||||
success:
|
||||
/* assign fields using values from superblock */
|
||||
bitmap->chunksize = chunksize;
|
||||
bitmap->daemon_sleep = daemon_sleep;
|
||||
bitmap->daemon_lastrun = jiffies;
|
||||
bitmap->max_write_behind = write_behind;
|
||||
bitmap->mddev->bitmap_info.chunksize = chunksize;
|
||||
bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
|
||||
bitmap->mddev->bitmap_info.max_write_behind = write_behind;
|
||||
bitmap->flags |= le32_to_cpu(sb->state);
|
||||
if (le32_to_cpu(sb->version) == BITMAP_MAJOR_HOSTENDIAN)
|
||||
bitmap->flags |= BITMAP_HOSTENDIAN;
|
||||
|
@ -664,16 +678,26 @@ static int bitmap_mask_state(struct bitmap *bitmap, enum bitmap_state bits,
|
|||
* general bitmap file operations
|
||||
*/
|
||||
|
||||
/*
|
||||
* on-disk bitmap:
|
||||
*
|
||||
* Use one bit per "chunk" (block set). We do the disk I/O on the bitmap
|
||||
* file a page at a time. There's a superblock at the start of the file.
|
||||
*/
|
||||
/* calculate the index of the page that contains this bit */
|
||||
static inline unsigned long file_page_index(unsigned long chunk)
|
||||
static inline unsigned long file_page_index(struct bitmap *bitmap, unsigned long chunk)
|
||||
{
|
||||
return CHUNK_BIT_OFFSET(chunk) >> PAGE_BIT_SHIFT;
|
||||
if (!bitmap->mddev->bitmap_info.external)
|
||||
chunk += sizeof(bitmap_super_t) << 3;
|
||||
return chunk >> PAGE_BIT_SHIFT;
|
||||
}
|
||||
|
||||
/* calculate the (bit) offset of this bit within a page */
|
||||
static inline unsigned long file_page_offset(unsigned long chunk)
|
||||
static inline unsigned long file_page_offset(struct bitmap *bitmap, unsigned long chunk)
|
||||
{
|
||||
return CHUNK_BIT_OFFSET(chunk) & (PAGE_BITS - 1);
|
||||
if (!bitmap->mddev->bitmap_info.external)
|
||||
chunk += sizeof(bitmap_super_t) << 3;
|
||||
return chunk & (PAGE_BITS - 1);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -686,8 +710,9 @@ static inline unsigned long file_page_offset(unsigned long chunk)
|
|||
static inline struct page *filemap_get_page(struct bitmap *bitmap,
|
||||
unsigned long chunk)
|
||||
{
|
||||
if (file_page_index(chunk) >= bitmap->file_pages) return NULL;
|
||||
return bitmap->filemap[file_page_index(chunk) - file_page_index(0)];
|
||||
if (file_page_index(bitmap, chunk) >= bitmap->file_pages) return NULL;
|
||||
return bitmap->filemap[file_page_index(bitmap, chunk)
|
||||
- file_page_index(bitmap, 0)];
|
||||
}
|
||||
|
||||
|
||||
|
@ -710,7 +735,7 @@ static void bitmap_file_unmap(struct bitmap *bitmap)
|
|||
spin_unlock_irqrestore(&bitmap->lock, flags);
|
||||
|
||||
while (pages--)
|
||||
if (map[pages]->index != 0) /* 0 is sb_page, release it below */
|
||||
if (map[pages] != sb_page) /* 0 is sb_page, release it below */
|
||||
free_buffers(map[pages]);
|
||||
kfree(map);
|
||||
kfree(attr);
|
||||
|
@ -821,7 +846,7 @@ static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
|
|||
|
||||
page = filemap_get_page(bitmap, chunk);
|
||||
if (!page) return;
|
||||
bit = file_page_offset(chunk);
|
||||
bit = file_page_offset(bitmap, chunk);
|
||||
|
||||
/* set the bit */
|
||||
kaddr = kmap_atomic(page, KM_USER0);
|
||||
|
@ -907,7 +932,7 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
|
|||
chunks = bitmap->chunks;
|
||||
file = bitmap->file;
|
||||
|
||||
BUG_ON(!file && !bitmap->offset);
|
||||
BUG_ON(!file && !bitmap->mddev->bitmap_info.offset);
|
||||
|
||||
#ifdef INJECT_FAULTS_3
|
||||
outofdate = 1;
|
||||
|
@ -919,14 +944,17 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
|
|||
"recovery\n", bmname(bitmap));
|
||||
|
||||
bytes = (chunks + 7) / 8;
|
||||
if (!bitmap->mddev->bitmap_info.external)
|
||||
bytes += sizeof(bitmap_super_t);
|
||||
|
||||
num_pages = (bytes + sizeof(bitmap_super_t) + PAGE_SIZE - 1) / PAGE_SIZE;
|
||||
|
||||
num_pages = (bytes + PAGE_SIZE - 1) / PAGE_SIZE;
|
||||
|
||||
if (file && i_size_read(file->f_mapping->host) < bytes + sizeof(bitmap_super_t)) {
|
||||
if (file && i_size_read(file->f_mapping->host) < bytes) {
|
||||
printk(KERN_INFO "%s: bitmap file too short %lu < %lu\n",
|
||||
bmname(bitmap),
|
||||
(unsigned long) i_size_read(file->f_mapping->host),
|
||||
bytes + sizeof(bitmap_super_t));
|
||||
bytes);
|
||||
goto err;
|
||||
}
|
||||
|
||||
|
@ -947,17 +975,16 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
|
|||
|
||||
for (i = 0; i < chunks; i++) {
|
||||
int b;
|
||||
index = file_page_index(i);
|
||||
bit = file_page_offset(i);
|
||||
index = file_page_index(bitmap, i);
|
||||
bit = file_page_offset(bitmap, i);
|
||||
if (index != oldindex) { /* this is a new page, read it in */
|
||||
int count;
|
||||
/* unmap the old page, we're done with it */
|
||||
if (index == num_pages-1)
|
||||
count = bytes + sizeof(bitmap_super_t)
|
||||
- index * PAGE_SIZE;
|
||||
count = bytes - index * PAGE_SIZE;
|
||||
else
|
||||
count = PAGE_SIZE;
|
||||
if (index == 0) {
|
||||
if (index == 0 && bitmap->sb_page) {
|
||||
/*
|
||||
* if we're here then the superblock page
|
||||
* contains some bits (PAGE_SIZE != sizeof sb)
|
||||
|
@ -967,14 +994,15 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
|
|||
offset = sizeof(bitmap_super_t);
|
||||
if (!file)
|
||||
read_sb_page(bitmap->mddev,
|
||||
bitmap->offset,
|
||||
bitmap->mddev->bitmap_info.offset,
|
||||
page,
|
||||
index, count);
|
||||
} else if (file) {
|
||||
page = read_page(file, index, bitmap, count);
|
||||
offset = 0;
|
||||
} else {
|
||||
page = read_sb_page(bitmap->mddev, bitmap->offset,
|
||||
page = read_sb_page(bitmap->mddev,
|
||||
bitmap->mddev->bitmap_info.offset,
|
||||
NULL,
|
||||
index, count);
|
||||
offset = 0;
|
||||
|
@ -1078,23 +1106,32 @@ static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap,
|
|||
* out to disk
|
||||
*/
|
||||
|
||||
void bitmap_daemon_work(struct bitmap *bitmap)
|
||||
void bitmap_daemon_work(mddev_t *mddev)
|
||||
{
|
||||
struct bitmap *bitmap;
|
||||
unsigned long j;
|
||||
unsigned long flags;
|
||||
struct page *page = NULL, *lastpage = NULL;
|
||||
int blocks;
|
||||
void *paddr;
|
||||
|
||||
if (bitmap == NULL)
|
||||
/* Use a mutex to guard daemon_work against
|
||||
* bitmap_destroy.
|
||||
*/
|
||||
mutex_lock(&mddev->bitmap_info.mutex);
|
||||
bitmap = mddev->bitmap;
|
||||
if (bitmap == NULL) {
|
||||
mutex_unlock(&mddev->bitmap_info.mutex);
|
||||
return;
|
||||
if (time_before(jiffies, bitmap->daemon_lastrun + bitmap->daemon_sleep*HZ))
|
||||
}
|
||||
if (time_before(jiffies, bitmap->daemon_lastrun
|
||||
+ bitmap->mddev->bitmap_info.daemon_sleep))
|
||||
goto done;
|
||||
|
||||
bitmap->daemon_lastrun = jiffies;
|
||||
if (bitmap->allclean) {
|
||||
bitmap->mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;
|
||||
return;
|
||||
goto done;
|
||||
}
|
||||
bitmap->allclean = 1;
|
||||
|
||||
|
@ -1142,7 +1179,8 @@ void bitmap_daemon_work(struct bitmap *bitmap)
|
|||
/* We are possibly going to clear some bits, so make
|
||||
* sure that events_cleared is up-to-date.
|
||||
*/
|
||||
if (bitmap->need_sync) {
|
||||
if (bitmap->need_sync &&
|
||||
bitmap->mddev->bitmap_info.external == 0) {
|
||||
bitmap_super_t *sb;
|
||||
bitmap->need_sync = 0;
|
||||
sb = kmap_atomic(bitmap->sb_page, KM_USER0);
|
||||
|
@ -1152,7 +1190,8 @@ void bitmap_daemon_work(struct bitmap *bitmap)
|
|||
write_page(bitmap, bitmap->sb_page, 1);
|
||||
}
|
||||
spin_lock_irqsave(&bitmap->lock, flags);
|
||||
clear_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
|
||||
if (!bitmap->need_sync)
|
||||
clear_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
|
||||
}
|
||||
bmc = bitmap_get_counter(bitmap,
|
||||
(sector_t)j << CHUNK_BLOCK_SHIFT(bitmap),
|
||||
|
@ -1167,7 +1206,7 @@ void bitmap_daemon_work(struct bitmap *bitmap)
|
|||
if (*bmc == 2) {
|
||||
*bmc=1; /* maybe clear the bit next time */
|
||||
set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
|
||||
} else if (*bmc == 1) {
|
||||
} else if (*bmc == 1 && !bitmap->need_sync) {
|
||||
/* we can clear the bit */
|
||||
*bmc = 0;
|
||||
bitmap_count_page(bitmap,
|
||||
|
@ -1177,9 +1216,11 @@ void bitmap_daemon_work(struct bitmap *bitmap)
|
|||
/* clear the bit */
|
||||
paddr = kmap_atomic(page, KM_USER0);
|
||||
if (bitmap->flags & BITMAP_HOSTENDIAN)
|
||||
clear_bit(file_page_offset(j), paddr);
|
||||
clear_bit(file_page_offset(bitmap, j),
|
||||
paddr);
|
||||
else
|
||||
ext2_clear_bit(file_page_offset(j), paddr);
|
||||
ext2_clear_bit(file_page_offset(bitmap, j),
|
||||
paddr);
|
||||
kunmap_atomic(paddr, KM_USER0);
|
||||
}
|
||||
} else
|
||||
|
@ -1202,7 +1243,9 @@ void bitmap_daemon_work(struct bitmap *bitmap)
|
|||
|
||||
done:
|
||||
if (bitmap->allclean == 0)
|
||||
bitmap->mddev->thread->timeout = bitmap->daemon_sleep * HZ;
|
||||
bitmap->mddev->thread->timeout =
|
||||
bitmap->mddev->bitmap_info.daemon_sleep;
|
||||
mutex_unlock(&mddev->bitmap_info.mutex);
|
||||
}
|
||||
|
||||
static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap,
|
||||
|
@ -1332,6 +1375,7 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto
|
|||
bitmap->events_cleared < bitmap->mddev->events) {
|
||||
bitmap->events_cleared = bitmap->mddev->events;
|
||||
bitmap->need_sync = 1;
|
||||
sysfs_notify_dirent(bitmap->sysfs_can_clear);
|
||||
}
|
||||
|
||||
if (!success && ! (*bmc & NEEDED_MASK))
|
||||
|
@ -1470,7 +1514,7 @@ void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector)
|
|||
return;
|
||||
}
|
||||
if (time_before(jiffies, (bitmap->last_end_sync
|
||||
+ bitmap->daemon_sleep * HZ)))
|
||||
+ bitmap->mddev->bitmap_info.daemon_sleep)))
|
||||
return;
|
||||
wait_event(bitmap->mddev->recovery_wait,
|
||||
atomic_read(&bitmap->mddev->recovery_active) == 0);
|
||||
|
@ -1522,6 +1566,12 @@ void bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long e)
|
|||
sector_t sec = (sector_t)chunk << CHUNK_BLOCK_SHIFT(bitmap);
|
||||
bitmap_set_memory_bits(bitmap, sec, 1);
|
||||
bitmap_file_set_bit(bitmap, sec);
|
||||
if (sec < bitmap->mddev->recovery_cp)
|
||||
/* We are asserting that the array is dirty,
|
||||
* so move the recovery_cp address back so
|
||||
* that it is obvious that it is dirty
|
||||
*/
|
||||
bitmap->mddev->recovery_cp = sec;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1531,7 +1581,7 @@ void bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long e)
|
|||
void bitmap_flush(mddev_t *mddev)
|
||||
{
|
||||
struct bitmap *bitmap = mddev->bitmap;
|
||||
int sleep;
|
||||
long sleep;
|
||||
|
||||
if (!bitmap) /* there was no bitmap */
|
||||
return;
|
||||
|
@ -1539,12 +1589,13 @@ void bitmap_flush(mddev_t *mddev)
|
|||
/* run the daemon_work three time to ensure everything is flushed
|
||||
* that can be
|
||||
*/
|
||||
sleep = bitmap->daemon_sleep;
|
||||
bitmap->daemon_sleep = 0;
|
||||
bitmap_daemon_work(bitmap);
|
||||
bitmap_daemon_work(bitmap);
|
||||
bitmap_daemon_work(bitmap);
|
||||
bitmap->daemon_sleep = sleep;
|
||||
sleep = mddev->bitmap_info.daemon_sleep * 2;
|
||||
bitmap->daemon_lastrun -= sleep;
|
||||
bitmap_daemon_work(mddev);
|
||||
bitmap->daemon_lastrun -= sleep;
|
||||
bitmap_daemon_work(mddev);
|
||||
bitmap->daemon_lastrun -= sleep;
|
||||
bitmap_daemon_work(mddev);
|
||||
bitmap_update_sb(bitmap);
|
||||
}
|
||||
|
||||
|
@ -1574,6 +1625,7 @@ static void bitmap_free(struct bitmap *bitmap)
|
|||
kfree(bp);
|
||||
kfree(bitmap);
|
||||
}
|
||||
|
||||
void bitmap_destroy(mddev_t *mddev)
|
||||
{
|
||||
struct bitmap *bitmap = mddev->bitmap;
|
||||
|
@ -1581,10 +1633,15 @@ void bitmap_destroy(mddev_t *mddev)
|
|||
if (!bitmap) /* there was no bitmap */
|
||||
return;
|
||||
|
||||
mutex_lock(&mddev->bitmap_info.mutex);
|
||||
mddev->bitmap = NULL; /* disconnect from the md device */
|
||||
mutex_unlock(&mddev->bitmap_info.mutex);
|
||||
if (mddev->thread)
|
||||
mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;
|
||||
|
||||
if (bitmap->sysfs_can_clear)
|
||||
sysfs_put(bitmap->sysfs_can_clear);
|
||||
|
||||
bitmap_free(bitmap);
|
||||
}
|
||||
|
||||
|
@ -1598,16 +1655,17 @@ int bitmap_create(mddev_t *mddev)
|
|||
sector_t blocks = mddev->resync_max_sectors;
|
||||
unsigned long chunks;
|
||||
unsigned long pages;
|
||||
struct file *file = mddev->bitmap_file;
|
||||
struct file *file = mddev->bitmap_info.file;
|
||||
int err;
|
||||
sector_t start;
|
||||
struct sysfs_dirent *bm;
|
||||
|
||||
BUILD_BUG_ON(sizeof(bitmap_super_t) != 256);
|
||||
|
||||
if (!file && !mddev->bitmap_offset) /* bitmap disabled, nothing to do */
|
||||
if (!file && !mddev->bitmap_info.offset) /* bitmap disabled, nothing to do */
|
||||
return 0;
|
||||
|
||||
BUG_ON(file && mddev->bitmap_offset);
|
||||
BUG_ON(file && mddev->bitmap_info.offset);
|
||||
|
||||
bitmap = kzalloc(sizeof(*bitmap), GFP_KERNEL);
|
||||
if (!bitmap)
|
||||
|
@ -1620,8 +1678,14 @@ int bitmap_create(mddev_t *mddev)
|
|||
|
||||
bitmap->mddev = mddev;
|
||||
|
||||
bm = sysfs_get_dirent(mddev->kobj.sd, "bitmap");
|
||||
if (bm) {
|
||||
bitmap->sysfs_can_clear = sysfs_get_dirent(bm, "can_clear");
|
||||
sysfs_put(bm);
|
||||
} else
|
||||
bitmap->sysfs_can_clear = NULL;
|
||||
|
||||
bitmap->file = file;
|
||||
bitmap->offset = mddev->bitmap_offset;
|
||||
if (file) {
|
||||
get_file(file);
|
||||
/* As future accesses to this file will use bmap,
|
||||
|
@ -1630,12 +1694,22 @@ int bitmap_create(mddev_t *mddev)
|
|||
*/
|
||||
vfs_fsync(file, file->f_dentry, 1);
|
||||
}
|
||||
/* read superblock from bitmap file (this sets bitmap->chunksize) */
|
||||
err = bitmap_read_sb(bitmap);
|
||||
/* read superblock from bitmap file (this sets mddev->bitmap_info.chunksize) */
|
||||
if (!mddev->bitmap_info.external)
|
||||
err = bitmap_read_sb(bitmap);
|
||||
else {
|
||||
err = 0;
|
||||
if (mddev->bitmap_info.chunksize == 0 ||
|
||||
mddev->bitmap_info.daemon_sleep == 0)
|
||||
/* chunksize and time_base need to be
|
||||
* set first. */
|
||||
err = -EINVAL;
|
||||
}
|
||||
if (err)
|
||||
goto error;
|
||||
|
||||
bitmap->chunkshift = ffz(~bitmap->chunksize);
|
||||
bitmap->daemon_lastrun = jiffies;
|
||||
bitmap->chunkshift = ffz(~mddev->bitmap_info.chunksize);
|
||||
|
||||
/* now that chunksize and chunkshift are set, we can use these macros */
|
||||
chunks = (blocks + CHUNK_BLOCK_RATIO(bitmap) - 1) >>
|
||||
|
@ -1677,7 +1751,8 @@ int bitmap_create(mddev_t *mddev)
|
|||
|
||||
mddev->bitmap = bitmap;
|
||||
|
||||
mddev->thread->timeout = bitmap->daemon_sleep * HZ;
|
||||
mddev->thread->timeout = mddev->bitmap_info.daemon_sleep;
|
||||
md_wakeup_thread(mddev->thread);
|
||||
|
||||
bitmap_update_sb(bitmap);
|
||||
|
||||
|
@ -1688,6 +1763,264 @@ int bitmap_create(mddev_t *mddev)
|
|||
return err;
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
location_show(mddev_t *mddev, char *page)
|
||||
{
|
||||
ssize_t len;
|
||||
if (mddev->bitmap_info.file) {
|
||||
len = sprintf(page, "file");
|
||||
} else if (mddev->bitmap_info.offset) {
|
||||
len = sprintf(page, "%+lld", (long long)mddev->bitmap_info.offset);
|
||||
} else
|
||||
len = sprintf(page, "none");
|
||||
len += sprintf(page+len, "\n");
|
||||
return len;
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
location_store(mddev_t *mddev, const char *buf, size_t len)
|
||||
{
|
||||
|
||||
if (mddev->pers) {
|
||||
if (!mddev->pers->quiesce)
|
||||
return -EBUSY;
|
||||
if (mddev->recovery || mddev->sync_thread)
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
if (mddev->bitmap || mddev->bitmap_info.file ||
|
||||
mddev->bitmap_info.offset) {
|
||||
/* bitmap already configured. Only option is to clear it */
|
||||
if (strncmp(buf, "none", 4) != 0)
|
||||
return -EBUSY;
|
||||
if (mddev->pers) {
|
||||
mddev->pers->quiesce(mddev, 1);
|
||||
bitmap_destroy(mddev);
|
||||
mddev->pers->quiesce(mddev, 0);
|
||||
}
|
||||
mddev->bitmap_info.offset = 0;
|
||||
if (mddev->bitmap_info.file) {
|
||||
struct file *f = mddev->bitmap_info.file;
|
||||
mddev->bitmap_info.file = NULL;
|
||||
restore_bitmap_write_access(f);
|
||||
fput(f);
|
||||
}
|
||||
} else {
|
||||
/* No bitmap, OK to set a location */
|
||||
long long offset;
|
||||
if (strncmp(buf, "none", 4) == 0)
|
||||
/* nothing to be done */;
|
||||
else if (strncmp(buf, "file:", 5) == 0) {
|
||||
/* Not supported yet */
|
||||
return -EINVAL;
|
||||
} else {
|
||||
int rv;
|
||||
if (buf[0] == '+')
|
||||
rv = strict_strtoll(buf+1, 10, &offset);
|
||||
else
|
||||
rv = strict_strtoll(buf, 10, &offset);
|
||||
if (rv)
|
||||
return rv;
|
||||
if (offset == 0)
|
||||
return -EINVAL;
|
||||
if (mddev->bitmap_info.external == 0 &&
|
||||
mddev->major_version == 0 &&
|
||||
offset != mddev->bitmap_info.default_offset)
|
||||
return -EINVAL;
|
||||
mddev->bitmap_info.offset = offset;
|
||||
if (mddev->pers) {
|
||||
mddev->pers->quiesce(mddev, 1);
|
||||
rv = bitmap_create(mddev);
|
||||
if (rv) {
|
||||
bitmap_destroy(mddev);
|
||||
mddev->bitmap_info.offset = 0;
|
||||
}
|
||||
mddev->pers->quiesce(mddev, 0);
|
||||
if (rv)
|
||||
return rv;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!mddev->external) {
|
||||
/* Ensure new bitmap info is stored in
|
||||
* metadata promptly.
|
||||
*/
|
||||
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
||||
md_wakeup_thread(mddev->thread);
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
static struct md_sysfs_entry bitmap_location =
|
||||
__ATTR(location, S_IRUGO|S_IWUSR, location_show, location_store);
|
||||
|
||||
static ssize_t
|
||||
timeout_show(mddev_t *mddev, char *page)
|
||||
{
|
||||
ssize_t len;
|
||||
unsigned long secs = mddev->bitmap_info.daemon_sleep / HZ;
|
||||
unsigned long jifs = mddev->bitmap_info.daemon_sleep % HZ;
|
||||
|
||||
len = sprintf(page, "%lu", secs);
|
||||
if (jifs)
|
||||
len += sprintf(page+len, ".%03u", jiffies_to_msecs(jifs));
|
||||
len += sprintf(page+len, "\n");
|
||||
return len;
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
timeout_store(mddev_t *mddev, const char *buf, size_t len)
|
||||
{
|
||||
/* timeout can be set at any time */
|
||||
unsigned long timeout;
|
||||
int rv = strict_strtoul_scaled(buf, &timeout, 4);
|
||||
if (rv)
|
||||
return rv;
|
||||
|
||||
/* just to make sure we don't overflow... */
|
||||
if (timeout >= LONG_MAX / HZ)
|
||||
return -EINVAL;
|
||||
|
||||
timeout = timeout * HZ / 10000;
|
||||
|
||||
if (timeout >= MAX_SCHEDULE_TIMEOUT)
|
||||
timeout = MAX_SCHEDULE_TIMEOUT-1;
|
||||
if (timeout < 1)
|
||||
timeout = 1;
|
||||
mddev->bitmap_info.daemon_sleep = timeout;
|
||||
if (mddev->thread) {
|
||||
/* if thread->timeout is MAX_SCHEDULE_TIMEOUT, then
|
||||
* the bitmap is all clean and we don't need to
|
||||
* adjust the timeout right now
|
||||
*/
|
||||
if (mddev->thread->timeout < MAX_SCHEDULE_TIMEOUT) {
|
||||
mddev->thread->timeout = timeout;
|
||||
md_wakeup_thread(mddev->thread);
|
||||
}
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
static struct md_sysfs_entry bitmap_timeout =
|
||||
__ATTR(time_base, S_IRUGO|S_IWUSR, timeout_show, timeout_store);
|
||||
|
||||
static ssize_t
|
||||
backlog_show(mddev_t *mddev, char *page)
|
||||
{
|
||||
return sprintf(page, "%lu\n", mddev->bitmap_info.max_write_behind);
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
backlog_store(mddev_t *mddev, const char *buf, size_t len)
|
||||
{
|
||||
unsigned long backlog;
|
||||
int rv = strict_strtoul(buf, 10, &backlog);
|
||||
if (rv)
|
||||
return rv;
|
||||
if (backlog > COUNTER_MAX)
|
||||
return -EINVAL;
|
||||
mddev->bitmap_info.max_write_behind = backlog;
|
||||
return len;
|
||||
}
|
||||
|
||||
static struct md_sysfs_entry bitmap_backlog =
|
||||
__ATTR(backlog, S_IRUGO|S_IWUSR, backlog_show, backlog_store);
|
||||
|
||||
static ssize_t
|
||||
chunksize_show(mddev_t *mddev, char *page)
|
||||
{
|
||||
return sprintf(page, "%lu\n", mddev->bitmap_info.chunksize);
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
chunksize_store(mddev_t *mddev, const char *buf, size_t len)
|
||||
{
|
||||
/* Can only be changed when no bitmap is active */
|
||||
int rv;
|
||||
unsigned long csize;
|
||||
if (mddev->bitmap)
|
||||
return -EBUSY;
|
||||
rv = strict_strtoul(buf, 10, &csize);
|
||||
if (rv)
|
||||
return rv;
|
||||
if (csize < 512 ||
|
||||
!is_power_of_2(csize))
|
||||
return -EINVAL;
|
||||
mddev->bitmap_info.chunksize = csize;
|
||||
return len;
|
||||
}
|
||||
|
||||
static struct md_sysfs_entry bitmap_chunksize =
|
||||
__ATTR(chunksize, S_IRUGO|S_IWUSR, chunksize_show, chunksize_store);
|
||||
|
||||
static ssize_t metadata_show(mddev_t *mddev, char *page)
|
||||
{
|
||||
return sprintf(page, "%s\n", (mddev->bitmap_info.external
|
||||
? "external" : "internal"));
|
||||
}
|
||||
|
||||
static ssize_t metadata_store(mddev_t *mddev, const char *buf, size_t len)
|
||||
{
|
||||
if (mddev->bitmap ||
|
||||
mddev->bitmap_info.file ||
|
||||
mddev->bitmap_info.offset)
|
||||
return -EBUSY;
|
||||
if (strncmp(buf, "external", 8) == 0)
|
||||
mddev->bitmap_info.external = 1;
|
||||
else if (strncmp(buf, "internal", 8) == 0)
|
||||
mddev->bitmap_info.external = 0;
|
||||
else
|
||||
return -EINVAL;
|
||||
return len;
|
||||
}
|
||||
|
||||
static struct md_sysfs_entry bitmap_metadata =
|
||||
__ATTR(metadata, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
|
||||
|
||||
static ssize_t can_clear_show(mddev_t *mddev, char *page)
|
||||
{
|
||||
int len;
|
||||
if (mddev->bitmap)
|
||||
len = sprintf(page, "%s\n", (mddev->bitmap->need_sync ?
|
||||
"false" : "true"));
|
||||
else
|
||||
len = sprintf(page, "\n");
|
||||
return len;
|
||||
}
|
||||
|
||||
static ssize_t can_clear_store(mddev_t *mddev, const char *buf, size_t len)
|
||||
{
|
||||
if (mddev->bitmap == NULL)
|
||||
return -ENOENT;
|
||||
if (strncmp(buf, "false", 5) == 0)
|
||||
mddev->bitmap->need_sync = 1;
|
||||
else if (strncmp(buf, "true", 4) == 0) {
|
||||
if (mddev->degraded)
|
||||
return -EBUSY;
|
||||
mddev->bitmap->need_sync = 0;
|
||||
} else
|
||||
return -EINVAL;
|
||||
return len;
|
||||
}
|
||||
|
||||
static struct md_sysfs_entry bitmap_can_clear =
|
||||
__ATTR(can_clear, S_IRUGO|S_IWUSR, can_clear_show, can_clear_store);
|
||||
|
||||
static struct attribute *md_bitmap_attrs[] = {
|
||||
&bitmap_location.attr,
|
||||
&bitmap_timeout.attr,
|
||||
&bitmap_backlog.attr,
|
||||
&bitmap_chunksize.attr,
|
||||
&bitmap_metadata.attr,
|
||||
&bitmap_can_clear.attr,
|
||||
NULL
|
||||
};
|
||||
struct attribute_group md_bitmap_group = {
|
||||
.name = "bitmap",
|
||||
.attrs = md_bitmap_attrs,
|
||||
};
|
||||
|
||||
|
||||
/* the bitmap API -- for raid personalities */
|
||||
EXPORT_SYMBOL(bitmap_startwrite);
|
||||
EXPORT_SYMBOL(bitmap_endwrite);
|
||||
|
|
|
@ -106,7 +106,7 @@ typedef __u16 bitmap_counter_t;
|
|||
#define BITMAP_BLOCK_SHIFT 9
|
||||
|
||||
/* how many blocks per chunk? (this is variable) */
|
||||
#define CHUNK_BLOCK_RATIO(bitmap) ((bitmap)->chunksize >> BITMAP_BLOCK_SHIFT)
|
||||
#define CHUNK_BLOCK_RATIO(bitmap) ((bitmap)->mddev->bitmap_info.chunksize >> BITMAP_BLOCK_SHIFT)
|
||||
#define CHUNK_BLOCK_SHIFT(bitmap) ((bitmap)->chunkshift - BITMAP_BLOCK_SHIFT)
|
||||
#define CHUNK_BLOCK_MASK(bitmap) (CHUNK_BLOCK_RATIO(bitmap) - 1)
|
||||
|
||||
|
@ -118,16 +118,6 @@ typedef __u16 bitmap_counter_t;
|
|||
(CHUNK_BLOCK_SHIFT(bitmap) + PAGE_COUNTER_SHIFT - 1)
|
||||
#define PAGEPTR_BLOCK_MASK(bitmap) (PAGEPTR_BLOCK_RATIO(bitmap) - 1)
|
||||
|
||||
/*
|
||||
* on-disk bitmap:
|
||||
*
|
||||
* Use one bit per "chunk" (block set). We do the disk I/O on the bitmap
|
||||
* file a page at a time. There's a superblock at the start of the file.
|
||||
*/
|
||||
|
||||
/* map chunks (bits) to file pages - offset by the size of the superblock */
|
||||
#define CHUNK_BIT_OFFSET(chunk) ((chunk) + (sizeof(bitmap_super_t) << 3))
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
|
@ -209,7 +199,6 @@ struct bitmap {
|
|||
int counter_bits; /* how many bits per block counter */
|
||||
|
||||
/* bitmap chunksize -- how much data does each bit represent? */
|
||||
unsigned long chunksize;
|
||||
unsigned long chunkshift; /* chunksize = 2^chunkshift (for bitops) */
|
||||
unsigned long chunks; /* total number of data chunks for the array */
|
||||
|
||||
|
@ -226,7 +215,6 @@ struct bitmap {
|
|||
/* bitmap spinlock */
|
||||
spinlock_t lock;
|
||||
|
||||
long offset; /* offset from superblock if file is NULL */
|
||||
struct file *file; /* backing disk file */
|
||||
struct page *sb_page; /* cached copy of the bitmap file superblock */
|
||||
struct page **filemap; /* list of cache pages for the file */
|
||||
|
@ -238,7 +226,6 @@ struct bitmap {
|
|||
|
||||
int allclean;
|
||||
|
||||
unsigned long max_write_behind; /* write-behind mode */
|
||||
atomic_t behind_writes;
|
||||
|
||||
/*
|
||||
|
@ -246,7 +233,6 @@ struct bitmap {
|
|||
* file, cleaning up bits and flushing out pages to disk as necessary
|
||||
*/
|
||||
unsigned long daemon_lastrun; /* jiffies of last run */
|
||||
unsigned long daemon_sleep; /* how many seconds between updates? */
|
||||
unsigned long last_end_sync; /* when we lasted called end_sync to
|
||||
* update bitmap with resync progress */
|
||||
|
||||
|
@ -254,6 +240,7 @@ struct bitmap {
|
|||
wait_queue_head_t write_wait;
|
||||
wait_queue_head_t overflow_wait;
|
||||
|
||||
struct sysfs_dirent *sysfs_can_clear;
|
||||
};
|
||||
|
||||
/* the bitmap API */
|
||||
|
@ -282,7 +269,7 @@ void bitmap_close_sync(struct bitmap *bitmap);
|
|||
void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector);
|
||||
|
||||
void bitmap_unplug(struct bitmap *bitmap);
|
||||
void bitmap_daemon_work(struct bitmap *bitmap);
|
||||
void bitmap_daemon_work(mddev_t *mddev);
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
|
@ -360,6 +360,7 @@ static void raid_exit(void)
|
|||
module_init(raid_init);
|
||||
module_exit(raid_exit);
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("Fault injection personality for MD");
|
||||
MODULE_ALIAS("md-personality-10"); /* faulty */
|
||||
MODULE_ALIAS("md-faulty");
|
||||
MODULE_ALIAS("md-level--5");
|
||||
|
|
|
@ -292,7 +292,7 @@ static int linear_make_request (struct request_queue *q, struct bio *bio)
|
|||
int cpu;
|
||||
|
||||
if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) {
|
||||
bio_endio(bio, -EOPNOTSUPP);
|
||||
md_barrier_request(mddev, bio);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -383,6 +383,7 @@ static void linear_exit (void)
|
|||
module_init(linear_init);
|
||||
module_exit(linear_exit);
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("Linear device concatenation personality for MD");
|
||||
MODULE_ALIAS("md-personality-1"); /* LINEAR - deprecated*/
|
||||
MODULE_ALIAS("md-linear");
|
||||
MODULE_ALIAS("md-level--1");
|
||||
|
|
395
drivers/md/md.c
395
drivers/md/md.c
|
@ -44,6 +44,7 @@
|
|||
#include <linux/random.h>
|
||||
#include <linux/reboot.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/compat.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/raid/md_p.h>
|
||||
#include <linux/raid/md_u.h>
|
||||
|
@ -67,6 +68,12 @@ static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
|
|||
|
||||
#define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); }
|
||||
|
||||
/*
|
||||
* Default number of read corrections we'll attempt on an rdev
|
||||
* before ejecting it from the array. We divide the read error
|
||||
* count by 2 for every hour elapsed between read errors.
|
||||
*/
|
||||
#define MD_DEFAULT_MAX_CORRECTED_READ_ERRORS 20
|
||||
/*
|
||||
* Current RAID-1,4,5 parallel reconstruction 'guaranteed speed limit'
|
||||
* is 1000 KB/sec, so the extra system load does not show up that much.
|
||||
|
@ -213,12 +220,12 @@ static int md_make_request(struct request_queue *q, struct bio *bio)
|
|||
return 0;
|
||||
}
|
||||
rcu_read_lock();
|
||||
if (mddev->suspended) {
|
||||
if (mddev->suspended || mddev->barrier) {
|
||||
DEFINE_WAIT(__wait);
|
||||
for (;;) {
|
||||
prepare_to_wait(&mddev->sb_wait, &__wait,
|
||||
TASK_UNINTERRUPTIBLE);
|
||||
if (!mddev->suspended)
|
||||
if (!mddev->suspended && !mddev->barrier)
|
||||
break;
|
||||
rcu_read_unlock();
|
||||
schedule();
|
||||
|
@ -260,10 +267,110 @@ static void mddev_resume(mddev_t *mddev)
|
|||
|
||||
int mddev_congested(mddev_t *mddev, int bits)
|
||||
{
|
||||
if (mddev->barrier)
|
||||
return 1;
|
||||
return mddev->suspended;
|
||||
}
|
||||
EXPORT_SYMBOL(mddev_congested);
|
||||
|
||||
/*
|
||||
* Generic barrier handling for md
|
||||
*/
|
||||
|
||||
#define POST_REQUEST_BARRIER ((void*)1)
|
||||
|
||||
static void md_end_barrier(struct bio *bio, int err)
|
||||
{
|
||||
mdk_rdev_t *rdev = bio->bi_private;
|
||||
mddev_t *mddev = rdev->mddev;
|
||||
if (err == -EOPNOTSUPP && mddev->barrier != POST_REQUEST_BARRIER)
|
||||
set_bit(BIO_EOPNOTSUPP, &mddev->barrier->bi_flags);
|
||||
|
||||
rdev_dec_pending(rdev, mddev);
|
||||
|
||||
if (atomic_dec_and_test(&mddev->flush_pending)) {
|
||||
if (mddev->barrier == POST_REQUEST_BARRIER) {
|
||||
/* This was a post-request barrier */
|
||||
mddev->barrier = NULL;
|
||||
wake_up(&mddev->sb_wait);
|
||||
} else
|
||||
/* The pre-request barrier has finished */
|
||||
schedule_work(&mddev->barrier_work);
|
||||
}
|
||||
bio_put(bio);
|
||||
}
|
||||
|
||||
static void submit_barriers(mddev_t *mddev)
|
||||
{
|
||||
mdk_rdev_t *rdev;
|
||||
|
||||
rcu_read_lock();
|
||||
list_for_each_entry_rcu(rdev, &mddev->disks, same_set)
|
||||
if (rdev->raid_disk >= 0 &&
|
||||
!test_bit(Faulty, &rdev->flags)) {
|
||||
/* Take two references, one is dropped
|
||||
* when request finishes, one after
|
||||
* we reclaim rcu_read_lock
|
||||
*/
|
||||
struct bio *bi;
|
||||
atomic_inc(&rdev->nr_pending);
|
||||
atomic_inc(&rdev->nr_pending);
|
||||
rcu_read_unlock();
|
||||
bi = bio_alloc(GFP_KERNEL, 0);
|
||||
bi->bi_end_io = md_end_barrier;
|
||||
bi->bi_private = rdev;
|
||||
bi->bi_bdev = rdev->bdev;
|
||||
atomic_inc(&mddev->flush_pending);
|
||||
submit_bio(WRITE_BARRIER, bi);
|
||||
rcu_read_lock();
|
||||
rdev_dec_pending(rdev, mddev);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static void md_submit_barrier(struct work_struct *ws)
|
||||
{
|
||||
mddev_t *mddev = container_of(ws, mddev_t, barrier_work);
|
||||
struct bio *bio = mddev->barrier;
|
||||
|
||||
atomic_set(&mddev->flush_pending, 1);
|
||||
|
||||
if (test_bit(BIO_EOPNOTSUPP, &bio->bi_flags))
|
||||
bio_endio(bio, -EOPNOTSUPP);
|
||||
else if (bio->bi_size == 0)
|
||||
/* an empty barrier - all done */
|
||||
bio_endio(bio, 0);
|
||||
else {
|
||||
bio->bi_rw &= ~(1<<BIO_RW_BARRIER);
|
||||
if (mddev->pers->make_request(mddev->queue, bio))
|
||||
generic_make_request(bio);
|
||||
mddev->barrier = POST_REQUEST_BARRIER;
|
||||
submit_barriers(mddev);
|
||||
}
|
||||
if (atomic_dec_and_test(&mddev->flush_pending)) {
|
||||
mddev->barrier = NULL;
|
||||
wake_up(&mddev->sb_wait);
|
||||
}
|
||||
}
|
||||
|
||||
void md_barrier_request(mddev_t *mddev, struct bio *bio)
|
||||
{
|
||||
spin_lock_irq(&mddev->write_lock);
|
||||
wait_event_lock_irq(mddev->sb_wait,
|
||||
!mddev->barrier,
|
||||
mddev->write_lock, /*nothing*/);
|
||||
mddev->barrier = bio;
|
||||
spin_unlock_irq(&mddev->write_lock);
|
||||
|
||||
atomic_set(&mddev->flush_pending, 1);
|
||||
INIT_WORK(&mddev->barrier_work, md_submit_barrier);
|
||||
|
||||
submit_barriers(mddev);
|
||||
|
||||
if (atomic_dec_and_test(&mddev->flush_pending))
|
||||
schedule_work(&mddev->barrier_work);
|
||||
}
|
||||
EXPORT_SYMBOL(md_barrier_request);
|
||||
|
||||
static inline mddev_t *mddev_get(mddev_t *mddev)
|
||||
{
|
||||
|
@ -363,6 +470,7 @@ static mddev_t * mddev_find(dev_t unit)
|
|||
|
||||
mutex_init(&new->open_mutex);
|
||||
mutex_init(&new->reconfig_mutex);
|
||||
mutex_init(&new->bitmap_info.mutex);
|
||||
INIT_LIST_HEAD(&new->disks);
|
||||
INIT_LIST_HEAD(&new->all_mddevs);
|
||||
init_timer(&new->safemode_timer);
|
||||
|
@ -370,6 +478,7 @@ static mddev_t * mddev_find(dev_t unit)
|
|||
atomic_set(&new->openers, 0);
|
||||
atomic_set(&new->active_io, 0);
|
||||
spin_lock_init(&new->write_lock);
|
||||
atomic_set(&new->flush_pending, 0);
|
||||
init_waitqueue_head(&new->sb_wait);
|
||||
init_waitqueue_head(&new->recovery_wait);
|
||||
new->reshape_position = MaxSector;
|
||||
|
@ -748,7 +857,7 @@ struct super_type {
|
|||
*/
|
||||
int md_check_no_bitmap(mddev_t *mddev)
|
||||
{
|
||||
if (!mddev->bitmap_file && !mddev->bitmap_offset)
|
||||
if (!mddev->bitmap_info.file && !mddev->bitmap_info.offset)
|
||||
return 0;
|
||||
printk(KERN_ERR "%s: bitmaps are not supported for %s\n",
|
||||
mdname(mddev), mddev->pers->name);
|
||||
|
@ -876,8 +985,8 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
|
|||
mddev->raid_disks = sb->raid_disks;
|
||||
mddev->dev_sectors = sb->size * 2;
|
||||
mddev->events = ev1;
|
||||
mddev->bitmap_offset = 0;
|
||||
mddev->default_bitmap_offset = MD_SB_BYTES >> 9;
|
||||
mddev->bitmap_info.offset = 0;
|
||||
mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
|
||||
|
||||
if (mddev->minor_version >= 91) {
|
||||
mddev->reshape_position = sb->reshape_position;
|
||||
|
@ -911,8 +1020,9 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
|
|||
mddev->max_disks = MD_SB_DISKS;
|
||||
|
||||
if (sb->state & (1<<MD_SB_BITMAP_PRESENT) &&
|
||||
mddev->bitmap_file == NULL)
|
||||
mddev->bitmap_offset = mddev->default_bitmap_offset;
|
||||
mddev->bitmap_info.file == NULL)
|
||||
mddev->bitmap_info.offset =
|
||||
mddev->bitmap_info.default_offset;
|
||||
|
||||
} else if (mddev->pers == NULL) {
|
||||
/* Insist on good event counter while assembling */
|
||||
|
@ -1029,7 +1139,7 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
|
|||
sb->layout = mddev->layout;
|
||||
sb->chunk_size = mddev->chunk_sectors << 9;
|
||||
|
||||
if (mddev->bitmap && mddev->bitmap_file == NULL)
|
||||
if (mddev->bitmap && mddev->bitmap_info.file == NULL)
|
||||
sb->state |= (1<<MD_SB_BITMAP_PRESENT);
|
||||
|
||||
sb->disks[0].state = (1<<MD_DISK_REMOVED);
|
||||
|
@ -1107,7 +1217,7 @@ super_90_rdev_size_change(mdk_rdev_t *rdev, sector_t num_sectors)
|
|||
{
|
||||
if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
|
||||
return 0; /* component must fit device */
|
||||
if (rdev->mddev->bitmap_offset)
|
||||
if (rdev->mddev->bitmap_info.offset)
|
||||
return 0; /* can't move bitmap */
|
||||
rdev->sb_start = calc_dev_sboffset(rdev->bdev);
|
||||
if (!num_sectors || num_sectors > rdev->sb_start)
|
||||
|
@ -1286,8 +1396,8 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
|
|||
mddev->raid_disks = le32_to_cpu(sb->raid_disks);
|
||||
mddev->dev_sectors = le64_to_cpu(sb->size);
|
||||
mddev->events = ev1;
|
||||
mddev->bitmap_offset = 0;
|
||||
mddev->default_bitmap_offset = 1024 >> 9;
|
||||
mddev->bitmap_info.offset = 0;
|
||||
mddev->bitmap_info.default_offset = 1024 >> 9;
|
||||
|
||||
mddev->recovery_cp = le64_to_cpu(sb->resync_offset);
|
||||
memcpy(mddev->uuid, sb->set_uuid, 16);
|
||||
|
@ -1295,8 +1405,9 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
|
|||
mddev->max_disks = (4096-256)/2;
|
||||
|
||||
if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET) &&
|
||||
mddev->bitmap_file == NULL )
|
||||
mddev->bitmap_offset = (__s32)le32_to_cpu(sb->bitmap_offset);
|
||||
mddev->bitmap_info.file == NULL )
|
||||
mddev->bitmap_info.offset =
|
||||
(__s32)le32_to_cpu(sb->bitmap_offset);
|
||||
|
||||
if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
|
||||
mddev->reshape_position = le64_to_cpu(sb->reshape_position);
|
||||
|
@ -1390,19 +1501,17 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
|
|||
sb->level = cpu_to_le32(mddev->level);
|
||||
sb->layout = cpu_to_le32(mddev->layout);
|
||||
|
||||
if (mddev->bitmap && mddev->bitmap_file == NULL) {
|
||||
sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset);
|
||||
if (mddev->bitmap && mddev->bitmap_info.file == NULL) {
|
||||
sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_info.offset);
|
||||
sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
|
||||
}
|
||||
|
||||
if (rdev->raid_disk >= 0 &&
|
||||
!test_bit(In_sync, &rdev->flags)) {
|
||||
if (rdev->recovery_offset > 0) {
|
||||
sb->feature_map |=
|
||||
cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET);
|
||||
sb->recovery_offset =
|
||||
cpu_to_le64(rdev->recovery_offset);
|
||||
}
|
||||
sb->feature_map |=
|
||||
cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET);
|
||||
sb->recovery_offset =
|
||||
cpu_to_le64(rdev->recovery_offset);
|
||||
}
|
||||
|
||||
if (mddev->reshape_position != MaxSector) {
|
||||
|
@ -1436,7 +1545,7 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
|
|||
sb->dev_roles[i] = cpu_to_le16(0xfffe);
|
||||
else if (test_bit(In_sync, &rdev2->flags))
|
||||
sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
|
||||
else if (rdev2->raid_disk >= 0 && rdev2->recovery_offset > 0)
|
||||
else if (rdev2->raid_disk >= 0)
|
||||
sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
|
||||
else
|
||||
sb->dev_roles[i] = cpu_to_le16(0xffff);
|
||||
|
@ -1458,7 +1567,7 @@ super_1_rdev_size_change(mdk_rdev_t *rdev, sector_t num_sectors)
|
|||
max_sectors -= rdev->data_offset;
|
||||
if (!num_sectors || num_sectors > max_sectors)
|
||||
num_sectors = max_sectors;
|
||||
} else if (rdev->mddev->bitmap_offset) {
|
||||
} else if (rdev->mddev->bitmap_info.offset) {
|
||||
/* minor version 0 with bitmap we can't move */
|
||||
return 0;
|
||||
} else {
|
||||
|
@ -2442,12 +2551,49 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len)
|
|||
static struct rdev_sysfs_entry rdev_size =
|
||||
__ATTR(size, S_IRUGO|S_IWUSR, rdev_size_show, rdev_size_store);
|
||||
|
||||
|
||||
static ssize_t recovery_start_show(mdk_rdev_t *rdev, char *page)
|
||||
{
|
||||
unsigned long long recovery_start = rdev->recovery_offset;
|
||||
|
||||
if (test_bit(In_sync, &rdev->flags) ||
|
||||
recovery_start == MaxSector)
|
||||
return sprintf(page, "none\n");
|
||||
|
||||
return sprintf(page, "%llu\n", recovery_start);
|
||||
}
|
||||
|
||||
static ssize_t recovery_start_store(mdk_rdev_t *rdev, const char *buf, size_t len)
|
||||
{
|
||||
unsigned long long recovery_start;
|
||||
|
||||
if (cmd_match(buf, "none"))
|
||||
recovery_start = MaxSector;
|
||||
else if (strict_strtoull(buf, 10, &recovery_start))
|
||||
return -EINVAL;
|
||||
|
||||
if (rdev->mddev->pers &&
|
||||
rdev->raid_disk >= 0)
|
||||
return -EBUSY;
|
||||
|
||||
rdev->recovery_offset = recovery_start;
|
||||
if (recovery_start == MaxSector)
|
||||
set_bit(In_sync, &rdev->flags);
|
||||
else
|
||||
clear_bit(In_sync, &rdev->flags);
|
||||
return len;
|
||||
}
|
||||
|
||||
static struct rdev_sysfs_entry rdev_recovery_start =
|
||||
__ATTR(recovery_start, S_IRUGO|S_IWUSR, recovery_start_show, recovery_start_store);
|
||||
|
||||
static struct attribute *rdev_default_attrs[] = {
|
||||
&rdev_state.attr,
|
||||
&rdev_errors.attr,
|
||||
&rdev_slot.attr,
|
||||
&rdev_offset.attr,
|
||||
&rdev_size.attr,
|
||||
&rdev_recovery_start.attr,
|
||||
NULL,
|
||||
};
|
||||
static ssize_t
|
||||
|
@ -2549,6 +2695,8 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi
|
|||
rdev->flags = 0;
|
||||
rdev->data_offset = 0;
|
||||
rdev->sb_events = 0;
|
||||
rdev->last_read_error.tv_sec = 0;
|
||||
rdev->last_read_error.tv_nsec = 0;
|
||||
atomic_set(&rdev->nr_pending, 0);
|
||||
atomic_set(&rdev->read_errors, 0);
|
||||
atomic_set(&rdev->corrected_errors, 0);
|
||||
|
@ -2659,6 +2807,47 @@ static void analyze_sbs(mddev_t * mddev)
|
|||
}
|
||||
}
|
||||
|
||||
/* Read a fixed-point number.
|
||||
* Numbers in sysfs attributes should be in "standard" units where
|
||||
* possible, so time should be in seconds.
|
||||
* However we internally use a a much smaller unit such as
|
||||
* milliseconds or jiffies.
|
||||
* This function takes a decimal number with a possible fractional
|
||||
* component, and produces an integer which is the result of
|
||||
* multiplying that number by 10^'scale'.
|
||||
* all without any floating-point arithmetic.
|
||||
*/
|
||||
int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale)
|
||||
{
|
||||
unsigned long result = 0;
|
||||
long decimals = -1;
|
||||
while (isdigit(*cp) || (*cp == '.' && decimals < 0)) {
|
||||
if (*cp == '.')
|
||||
decimals = 0;
|
||||
else if (decimals < scale) {
|
||||
unsigned int value;
|
||||
value = *cp - '0';
|
||||
result = result * 10 + value;
|
||||
if (decimals >= 0)
|
||||
decimals++;
|
||||
}
|
||||
cp++;
|
||||
}
|
||||
if (*cp == '\n')
|
||||
cp++;
|
||||
if (*cp)
|
||||
return -EINVAL;
|
||||
if (decimals < 0)
|
||||
decimals = 0;
|
||||
while (decimals < scale) {
|
||||
result *= 10;
|
||||
decimals ++;
|
||||
}
|
||||
*res = result;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static void md_safemode_timeout(unsigned long data);
|
||||
|
||||
static ssize_t
|
||||
|
@ -2670,31 +2859,10 @@ safe_delay_show(mddev_t *mddev, char *page)
|
|||
static ssize_t
|
||||
safe_delay_store(mddev_t *mddev, const char *cbuf, size_t len)
|
||||
{
|
||||
int scale=1;
|
||||
int dot=0;
|
||||
int i;
|
||||
unsigned long msec;
|
||||
char buf[30];
|
||||
|
||||
/* remove a period, and count digits after it */
|
||||
if (len >= sizeof(buf))
|
||||
if (strict_strtoul_scaled(cbuf, &msec, 3) < 0)
|
||||
return -EINVAL;
|
||||
strlcpy(buf, cbuf, sizeof(buf));
|
||||
for (i=0; i<len; i++) {
|
||||
if (dot) {
|
||||
if (isdigit(buf[i])) {
|
||||
buf[i-1] = buf[i];
|
||||
scale *= 10;
|
||||
}
|
||||
buf[i] = 0;
|
||||
} else if (buf[i] == '.') {
|
||||
dot=1;
|
||||
buf[i] = 0;
|
||||
}
|
||||
}
|
||||
if (strict_strtoul(buf, 10, &msec) < 0)
|
||||
return -EINVAL;
|
||||
msec = (msec * 1000) / scale;
|
||||
if (msec == 0)
|
||||
mddev->safemode_delay = 0;
|
||||
else {
|
||||
|
@ -2970,7 +3138,9 @@ resync_start_store(mddev_t *mddev, const char *buf, size_t len)
|
|||
|
||||
if (mddev->pers)
|
||||
return -EBUSY;
|
||||
if (!*buf || (*e && *e != '\n'))
|
||||
if (cmd_match(buf, "none"))
|
||||
n = MaxSector;
|
||||
else if (!*buf || (*e && *e != '\n'))
|
||||
return -EINVAL;
|
||||
|
||||
mddev->recovery_cp = n;
|
||||
|
@ -3165,6 +3335,29 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len)
|
|||
static struct md_sysfs_entry md_array_state =
|
||||
__ATTR(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store);
|
||||
|
||||
static ssize_t
|
||||
max_corrected_read_errors_show(mddev_t *mddev, char *page) {
|
||||
return sprintf(page, "%d\n",
|
||||
atomic_read(&mddev->max_corr_read_errors));
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
max_corrected_read_errors_store(mddev_t *mddev, const char *buf, size_t len)
|
||||
{
|
||||
char *e;
|
||||
unsigned long n = simple_strtoul(buf, &e, 10);
|
||||
|
||||
if (*buf && (*e == 0 || *e == '\n')) {
|
||||
atomic_set(&mddev->max_corr_read_errors, n);
|
||||
return len;
|
||||
}
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static struct md_sysfs_entry max_corr_read_errors =
|
||||
__ATTR(max_read_errors, S_IRUGO|S_IWUSR, max_corrected_read_errors_show,
|
||||
max_corrected_read_errors_store);
|
||||
|
||||
static ssize_t
|
||||
null_show(mddev_t *mddev, char *page)
|
||||
{
|
||||
|
@ -3790,6 +3983,7 @@ static struct attribute *md_default_attrs[] = {
|
|||
&md_array_state.attr,
|
||||
&md_reshape_position.attr,
|
||||
&md_array_size.attr,
|
||||
&max_corr_read_errors.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
|
@ -3894,6 +4088,7 @@ static void mddev_delayed_delete(struct work_struct *ws)
|
|||
mddev->sysfs_action = NULL;
|
||||
mddev->private = NULL;
|
||||
}
|
||||
sysfs_remove_group(&mddev->kobj, &md_bitmap_group);
|
||||
kobject_del(&mddev->kobj);
|
||||
kobject_put(&mddev->kobj);
|
||||
}
|
||||
|
@ -3985,6 +4180,8 @@ static int md_alloc(dev_t dev, char *name)
|
|||
disk->disk_name);
|
||||
error = 0;
|
||||
}
|
||||
if (sysfs_create_group(&mddev->kobj, &md_bitmap_group))
|
||||
printk(KERN_DEBUG "pointless warning\n");
|
||||
abort:
|
||||
mutex_unlock(&disks_mutex);
|
||||
if (!error) {
|
||||
|
@ -4206,6 +4403,8 @@ static int do_md_run(mddev_t * mddev)
|
|||
mddev->ro = 0;
|
||||
|
||||
atomic_set(&mddev->writes_pending,0);
|
||||
atomic_set(&mddev->max_corr_read_errors,
|
||||
MD_DEFAULT_MAX_CORRECTED_READ_ERRORS);
|
||||
mddev->safemode = 0;
|
||||
mddev->safemode_timer.function = md_safemode_timeout;
|
||||
mddev->safemode_timer.data = (unsigned long) mddev;
|
||||
|
@ -4310,7 +4509,7 @@ static int deny_bitmap_write_access(struct file * file)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void restore_bitmap_write_access(struct file *file)
|
||||
void restore_bitmap_write_access(struct file *file)
|
||||
{
|
||||
struct inode *inode = file->f_mapping->host;
|
||||
|
||||
|
@ -4405,12 +4604,12 @@ out:
|
|||
printk(KERN_INFO "md: %s stopped.\n", mdname(mddev));
|
||||
|
||||
bitmap_destroy(mddev);
|
||||
if (mddev->bitmap_file) {
|
||||
restore_bitmap_write_access(mddev->bitmap_file);
|
||||
fput(mddev->bitmap_file);
|
||||
mddev->bitmap_file = NULL;
|
||||
if (mddev->bitmap_info.file) {
|
||||
restore_bitmap_write_access(mddev->bitmap_info.file);
|
||||
fput(mddev->bitmap_info.file);
|
||||
mddev->bitmap_info.file = NULL;
|
||||
}
|
||||
mddev->bitmap_offset = 0;
|
||||
mddev->bitmap_info.offset = 0;
|
||||
|
||||
/* make sure all md_delayed_delete calls have finished */
|
||||
flush_scheduled_work();
|
||||
|
@ -4451,6 +4650,11 @@ out:
|
|||
mddev->degraded = 0;
|
||||
mddev->barriers_work = 0;
|
||||
mddev->safemode = 0;
|
||||
mddev->bitmap_info.offset = 0;
|
||||
mddev->bitmap_info.default_offset = 0;
|
||||
mddev->bitmap_info.chunksize = 0;
|
||||
mddev->bitmap_info.daemon_sleep = 0;
|
||||
mddev->bitmap_info.max_write_behind = 0;
|
||||
kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
|
||||
if (mddev->hold_active == UNTIL_STOP)
|
||||
mddev->hold_active = 0;
|
||||
|
@ -4636,7 +4840,7 @@ static int get_array_info(mddev_t * mddev, void __user * arg)
|
|||
info.state = 0;
|
||||
if (mddev->in_sync)
|
||||
info.state = (1<<MD_SB_CLEAN);
|
||||
if (mddev->bitmap && mddev->bitmap_offset)
|
||||
if (mddev->bitmap && mddev->bitmap_info.offset)
|
||||
info.state = (1<<MD_SB_BITMAP_PRESENT);
|
||||
info.active_disks = insync;
|
||||
info.working_disks = working;
|
||||
|
@ -4994,23 +5198,23 @@ static int set_bitmap_file(mddev_t *mddev, int fd)
|
|||
if (fd >= 0) {
|
||||
if (mddev->bitmap)
|
||||
return -EEXIST; /* cannot add when bitmap is present */
|
||||
mddev->bitmap_file = fget(fd);
|
||||
mddev->bitmap_info.file = fget(fd);
|
||||
|
||||
if (mddev->bitmap_file == NULL) {
|
||||
if (mddev->bitmap_info.file == NULL) {
|
||||
printk(KERN_ERR "%s: error: failed to get bitmap file\n",
|
||||
mdname(mddev));
|
||||
return -EBADF;
|
||||
}
|
||||
|
||||
err = deny_bitmap_write_access(mddev->bitmap_file);
|
||||
err = deny_bitmap_write_access(mddev->bitmap_info.file);
|
||||
if (err) {
|
||||
printk(KERN_ERR "%s: error: bitmap file is already in use\n",
|
||||
mdname(mddev));
|
||||
fput(mddev->bitmap_file);
|
||||
mddev->bitmap_file = NULL;
|
||||
fput(mddev->bitmap_info.file);
|
||||
mddev->bitmap_info.file = NULL;
|
||||
return err;
|
||||
}
|
||||
mddev->bitmap_offset = 0; /* file overrides offset */
|
||||
mddev->bitmap_info.offset = 0; /* file overrides offset */
|
||||
} else if (mddev->bitmap == NULL)
|
||||
return -ENOENT; /* cannot remove what isn't there */
|
||||
err = 0;
|
||||
|
@ -5025,11 +5229,11 @@ static int set_bitmap_file(mddev_t *mddev, int fd)
|
|||
mddev->pers->quiesce(mddev, 0);
|
||||
}
|
||||
if (fd < 0) {
|
||||
if (mddev->bitmap_file) {
|
||||
restore_bitmap_write_access(mddev->bitmap_file);
|
||||
fput(mddev->bitmap_file);
|
||||
if (mddev->bitmap_info.file) {
|
||||
restore_bitmap_write_access(mddev->bitmap_info.file);
|
||||
fput(mddev->bitmap_info.file);
|
||||
}
|
||||
mddev->bitmap_file = NULL;
|
||||
mddev->bitmap_info.file = NULL;
|
||||
}
|
||||
|
||||
return err;
|
||||
|
@ -5096,8 +5300,8 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info)
|
|||
mddev->flags = 0;
|
||||
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
||||
|
||||
mddev->default_bitmap_offset = MD_SB_BYTES >> 9;
|
||||
mddev->bitmap_offset = 0;
|
||||
mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
|
||||
mddev->bitmap_info.offset = 0;
|
||||
|
||||
mddev->reshape_position = MaxSector;
|
||||
|
||||
|
@ -5197,7 +5401,7 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info)
|
|||
int state = 0;
|
||||
|
||||
/* calculate expected state,ignoring low bits */
|
||||
if (mddev->bitmap && mddev->bitmap_offset)
|
||||
if (mddev->bitmap && mddev->bitmap_info.offset)
|
||||
state |= (1 << MD_SB_BITMAP_PRESENT);
|
||||
|
||||
if (mddev->major_version != info->major_version ||
|
||||
|
@ -5256,9 +5460,10 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info)
|
|||
/* add the bitmap */
|
||||
if (mddev->bitmap)
|
||||
return -EEXIST;
|
||||
if (mddev->default_bitmap_offset == 0)
|
||||
if (mddev->bitmap_info.default_offset == 0)
|
||||
return -EINVAL;
|
||||
mddev->bitmap_offset = mddev->default_bitmap_offset;
|
||||
mddev->bitmap_info.offset =
|
||||
mddev->bitmap_info.default_offset;
|
||||
mddev->pers->quiesce(mddev, 1);
|
||||
rv = bitmap_create(mddev);
|
||||
if (rv)
|
||||
|
@ -5273,7 +5478,7 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info)
|
|||
mddev->pers->quiesce(mddev, 1);
|
||||
bitmap_destroy(mddev);
|
||||
mddev->pers->quiesce(mddev, 0);
|
||||
mddev->bitmap_offset = 0;
|
||||
mddev->bitmap_info.offset = 0;
|
||||
}
|
||||
}
|
||||
md_update_sb(mddev, 1);
|
||||
|
@ -5524,6 +5729,25 @@ done:
|
|||
abort:
|
||||
return err;
|
||||
}
|
||||
#ifdef CONFIG_COMPAT
|
||||
static int md_compat_ioctl(struct block_device *bdev, fmode_t mode,
|
||||
unsigned int cmd, unsigned long arg)
|
||||
{
|
||||
switch (cmd) {
|
||||
case HOT_REMOVE_DISK:
|
||||
case HOT_ADD_DISK:
|
||||
case SET_DISK_FAULTY:
|
||||
case SET_BITMAP_FILE:
|
||||
/* These take in integer arg, do not convert */
|
||||
break;
|
||||
default:
|
||||
arg = (unsigned long)compat_ptr(arg);
|
||||
break;
|
||||
}
|
||||
|
||||
return md_ioctl(bdev, mode, cmd, arg);
|
||||
}
|
||||
#endif /* CONFIG_COMPAT */
|
||||
|
||||
static int md_open(struct block_device *bdev, fmode_t mode)
|
||||
{
|
||||
|
@ -5589,6 +5813,9 @@ static const struct block_device_operations md_fops =
|
|||
.open = md_open,
|
||||
.release = md_release,
|
||||
.ioctl = md_ioctl,
|
||||
#ifdef CONFIG_COMPAT
|
||||
.compat_ioctl = md_compat_ioctl,
|
||||
#endif
|
||||
.getgeo = md_getgeo,
|
||||
.media_changed = md_media_changed,
|
||||
.revalidate_disk= md_revalidate,
|
||||
|
@ -5982,14 +6209,14 @@ static int md_seq_show(struct seq_file *seq, void *v)
|
|||
unsigned long chunk_kb;
|
||||
unsigned long flags;
|
||||
spin_lock_irqsave(&bitmap->lock, flags);
|
||||
chunk_kb = bitmap->chunksize >> 10;
|
||||
chunk_kb = mddev->bitmap_info.chunksize >> 10;
|
||||
seq_printf(seq, "bitmap: %lu/%lu pages [%luKB], "
|
||||
"%lu%s chunk",
|
||||
bitmap->pages - bitmap->missing_pages,
|
||||
bitmap->pages,
|
||||
(bitmap->pages - bitmap->missing_pages)
|
||||
<< (PAGE_SHIFT - 10),
|
||||
chunk_kb ? chunk_kb : bitmap->chunksize,
|
||||
chunk_kb ? chunk_kb : mddev->bitmap_info.chunksize,
|
||||
chunk_kb ? "KB" : "B");
|
||||
if (bitmap->file) {
|
||||
seq_printf(seq, ", file: ");
|
||||
|
@ -6338,12 +6565,14 @@ void md_do_sync(mddev_t *mddev)
|
|||
/* recovery follows the physical size of devices */
|
||||
max_sectors = mddev->dev_sectors;
|
||||
j = MaxSector;
|
||||
list_for_each_entry(rdev, &mddev->disks, same_set)
|
||||
rcu_read_lock();
|
||||
list_for_each_entry_rcu(rdev, &mddev->disks, same_set)
|
||||
if (rdev->raid_disk >= 0 &&
|
||||
!test_bit(Faulty, &rdev->flags) &&
|
||||
!test_bit(In_sync, &rdev->flags) &&
|
||||
rdev->recovery_offset < j)
|
||||
j = rdev->recovery_offset;
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
printk(KERN_INFO "md: %s of RAID array %s\n", desc, mdname(mddev));
|
||||
|
@ -6380,6 +6609,7 @@ void md_do_sync(mddev_t *mddev)
|
|||
desc, mdname(mddev));
|
||||
mddev->curr_resync = j;
|
||||
}
|
||||
mddev->curr_resync_completed = mddev->curr_resync;
|
||||
|
||||
while (j < max_sectors) {
|
||||
sector_t sectors;
|
||||
|
@ -6512,22 +6742,29 @@ void md_do_sync(mddev_t *mddev)
|
|||
} else {
|
||||
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
|
||||
mddev->curr_resync = MaxSector;
|
||||
list_for_each_entry(rdev, &mddev->disks, same_set)
|
||||
rcu_read_lock();
|
||||
list_for_each_entry_rcu(rdev, &mddev->disks, same_set)
|
||||
if (rdev->raid_disk >= 0 &&
|
||||
!test_bit(Faulty, &rdev->flags) &&
|
||||
!test_bit(In_sync, &rdev->flags) &&
|
||||
rdev->recovery_offset < mddev->curr_resync)
|
||||
rdev->recovery_offset = mddev->curr_resync;
|
||||
rcu_read_unlock();
|
||||
}
|
||||
}
|
||||
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
||||
|
||||
skip:
|
||||
mddev->curr_resync = 0;
|
||||
mddev->curr_resync_completed = 0;
|
||||
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
|
||||
/* We completed so max setting can be forgotten. */
|
||||
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
|
||||
/* We completed so min/max setting can be forgotten if used. */
|
||||
if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
|
||||
mddev->resync_min = 0;
|
||||
mddev->resync_max = MaxSector;
|
||||
} else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
|
||||
mddev->resync_min = mddev->curr_resync_completed;
|
||||
mddev->curr_resync = 0;
|
||||
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
|
||||
mddev->curr_resync_completed = 0;
|
||||
sysfs_notify(&mddev->kobj, NULL, "sync_completed");
|
||||
wake_up(&resync_wait);
|
||||
set_bit(MD_RECOVERY_DONE, &mddev->recovery);
|
||||
|
@ -6590,6 +6827,7 @@ static int remove_and_add_spares(mddev_t *mddev)
|
|||
nm, mdname(mddev));
|
||||
spares++;
|
||||
md_new_event(mddev);
|
||||
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
||||
} else
|
||||
break;
|
||||
}
|
||||
|
@ -6625,7 +6863,7 @@ void md_check_recovery(mddev_t *mddev)
|
|||
|
||||
|
||||
if (mddev->bitmap)
|
||||
bitmap_daemon_work(mddev->bitmap);
|
||||
bitmap_daemon_work(mddev);
|
||||
|
||||
if (mddev->ro)
|
||||
return;
|
||||
|
@ -6995,5 +7233,6 @@ EXPORT_SYMBOL(md_unregister_thread);
|
|||
EXPORT_SYMBOL(md_wakeup_thread);
|
||||
EXPORT_SYMBOL(md_check_recovery);
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("MD RAID framework");
|
||||
MODULE_ALIAS("md");
|
||||
MODULE_ALIAS_BLOCKDEV_MAJOR(MD_MAJOR);
|
||||
|
|
|
@ -97,6 +97,9 @@ struct mdk_rdev_s
|
|||
atomic_t read_errors; /* number of consecutive read errors that
|
||||
* we have tried to ignore.
|
||||
*/
|
||||
struct timespec last_read_error; /* monotonic time since our
|
||||
* last read error
|
||||
*/
|
||||
atomic_t corrected_errors; /* number of corrected read errors,
|
||||
* for reporting to userspace and storing
|
||||
* in superblock.
|
||||
|
@ -280,17 +283,38 @@ struct mddev_s
|
|||
unsigned int max_write_behind; /* 0 = sync */
|
||||
|
||||
struct bitmap *bitmap; /* the bitmap for the device */
|
||||
struct file *bitmap_file; /* the bitmap file */
|
||||
long bitmap_offset; /* offset from superblock of
|
||||
* start of bitmap. May be
|
||||
* negative, but not '0'
|
||||
*/
|
||||
long default_bitmap_offset; /* this is the offset to use when
|
||||
* hot-adding a bitmap. It should
|
||||
* eventually be settable by sysfs.
|
||||
*/
|
||||
struct {
|
||||
struct file *file; /* the bitmap file */
|
||||
loff_t offset; /* offset from superblock of
|
||||
* start of bitmap. May be
|
||||
* negative, but not '0'
|
||||
* For external metadata, offset
|
||||
* from start of device.
|
||||
*/
|
||||
loff_t default_offset; /* this is the offset to use when
|
||||
* hot-adding a bitmap. It should
|
||||
* eventually be settable by sysfs.
|
||||
*/
|
||||
struct mutex mutex;
|
||||
unsigned long chunksize;
|
||||
unsigned long daemon_sleep; /* how many seconds between updates? */
|
||||
unsigned long max_write_behind; /* write-behind mode */
|
||||
int external;
|
||||
} bitmap_info;
|
||||
|
||||
atomic_t max_corr_read_errors; /* max read retries */
|
||||
struct list_head all_mddevs;
|
||||
|
||||
/* Generic barrier handling.
|
||||
* If there is a pending barrier request, all other
|
||||
* writes are blocked while the devices are flushed.
|
||||
* The last to finish a flush schedules a worker to
|
||||
* submit the barrier request (without the barrier flag),
|
||||
* then submit more flush requests.
|
||||
*/
|
||||
struct bio *barrier;
|
||||
atomic_t flush_pending;
|
||||
struct work_struct barrier_work;
|
||||
};
|
||||
|
||||
|
||||
|
@ -353,7 +377,7 @@ struct md_sysfs_entry {
|
|||
ssize_t (*show)(mddev_t *, char *);
|
||||
ssize_t (*store)(mddev_t *, const char *, size_t);
|
||||
};
|
||||
|
||||
extern struct attribute_group md_bitmap_group;
|
||||
|
||||
static inline char * mdname (mddev_t * mddev)
|
||||
{
|
||||
|
@ -431,6 +455,7 @@ extern void md_done_sync(mddev_t *mddev, int blocks, int ok);
|
|||
extern void md_error(mddev_t *mddev, mdk_rdev_t *rdev);
|
||||
|
||||
extern int mddev_congested(mddev_t *mddev, int bits);
|
||||
extern void md_barrier_request(mddev_t *mddev, struct bio *bio);
|
||||
extern void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
|
||||
sector_t sector, int size, struct page *page);
|
||||
extern void md_super_wait(mddev_t *mddev);
|
||||
|
@ -443,6 +468,8 @@ extern void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev);
|
|||
extern void md_set_array_sectors(mddev_t *mddev, sector_t array_sectors);
|
||||
extern int md_check_no_bitmap(mddev_t *mddev);
|
||||
extern int md_integrity_register(mddev_t *mddev);
|
||||
void md_integrity_add_rdev(mdk_rdev_t *rdev, mddev_t *mddev);
|
||||
extern void md_integrity_add_rdev(mdk_rdev_t *rdev, mddev_t *mddev);
|
||||
extern int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale);
|
||||
extern void restore_bitmap_write_access(struct file *file);
|
||||
|
||||
#endif /* _MD_MD_H */
|
||||
|
|
|
@ -145,7 +145,7 @@ static int multipath_make_request (struct request_queue *q, struct bio * bio)
|
|||
int cpu;
|
||||
|
||||
if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) {
|
||||
bio_endio(bio, -EOPNOTSUPP);
|
||||
md_barrier_request(mddev, bio);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -581,6 +581,7 @@ static void __exit multipath_exit (void)
|
|||
module_init(multipath_init);
|
||||
module_exit(multipath_exit);
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("simple multi-path personality for MD");
|
||||
MODULE_ALIAS("md-personality-7"); /* MULTIPATH */
|
||||
MODULE_ALIAS("md-multipath");
|
||||
MODULE_ALIAS("md-level--4");
|
||||
|
|
|
@ -453,7 +453,7 @@ static int raid0_make_request(struct request_queue *q, struct bio *bio)
|
|||
int cpu;
|
||||
|
||||
if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) {
|
||||
bio_endio(bio, -EOPNOTSUPP);
|
||||
md_barrier_request(mddev, bio);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -567,6 +567,7 @@ static void raid0_exit (void)
|
|||
module_init(raid0_init);
|
||||
module_exit(raid0_exit);
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("RAID0 (striping) personality for MD");
|
||||
MODULE_ALIAS("md-personality-2"); /* RAID0 */
|
||||
MODULE_ALIAS("md-raid0");
|
||||
MODULE_ALIAS("md-level-0");
|
||||
|
|
|
@ -677,6 +677,7 @@ static void raise_barrier(conf_t *conf)
|
|||
static void lower_barrier(conf_t *conf)
|
||||
{
|
||||
unsigned long flags;
|
||||
BUG_ON(conf->barrier <= 0);
|
||||
spin_lock_irqsave(&conf->resync_lock, flags);
|
||||
conf->barrier--;
|
||||
spin_unlock_irqrestore(&conf->resync_lock, flags);
|
||||
|
@ -801,6 +802,25 @@ static int make_request(struct request_queue *q, struct bio * bio)
|
|||
|
||||
md_write_start(mddev, bio); /* wait on superblock update early */
|
||||
|
||||
if (bio_data_dir(bio) == WRITE &&
|
||||
bio->bi_sector + bio->bi_size/512 > mddev->suspend_lo &&
|
||||
bio->bi_sector < mddev->suspend_hi) {
|
||||
/* As the suspend_* range is controlled by
|
||||
* userspace, we want an interruptible
|
||||
* wait.
|
||||
*/
|
||||
DEFINE_WAIT(w);
|
||||
for (;;) {
|
||||
flush_signals(current);
|
||||
prepare_to_wait(&conf->wait_barrier,
|
||||
&w, TASK_INTERRUPTIBLE);
|
||||
if (bio->bi_sector + bio->bi_size/512 <= mddev->suspend_lo ||
|
||||
bio->bi_sector >= mddev->suspend_hi)
|
||||
break;
|
||||
schedule();
|
||||
}
|
||||
finish_wait(&conf->wait_barrier, &w);
|
||||
}
|
||||
if (unlikely(!mddev->barriers_work &&
|
||||
bio_rw_flagged(bio, BIO_RW_BARRIER))) {
|
||||
if (rw == WRITE)
|
||||
|
@ -923,7 +943,8 @@ static int make_request(struct request_queue *q, struct bio * bio)
|
|||
|
||||
/* do behind I/O ? */
|
||||
if (bitmap &&
|
||||
atomic_read(&bitmap->behind_writes) < bitmap->max_write_behind &&
|
||||
(atomic_read(&bitmap->behind_writes)
|
||||
< mddev->bitmap_info.max_write_behind) &&
|
||||
(behind_pages = alloc_behind_pages(bio)) != NULL)
|
||||
set_bit(R1BIO_BehindIO, &r1_bio->state);
|
||||
|
||||
|
@ -1941,74 +1962,48 @@ static sector_t raid1_size(mddev_t *mddev, sector_t sectors, int raid_disks)
|
|||
return mddev->dev_sectors;
|
||||
}
|
||||
|
||||
static int run(mddev_t *mddev)
|
||||
static conf_t *setup_conf(mddev_t *mddev)
|
||||
{
|
||||
conf_t *conf;
|
||||
int i, j, disk_idx;
|
||||
int i;
|
||||
mirror_info_t *disk;
|
||||
mdk_rdev_t *rdev;
|
||||
int err = -ENOMEM;
|
||||
|
||||
if (mddev->level != 1) {
|
||||
printk("raid1: %s: raid level not set to mirroring (%d)\n",
|
||||
mdname(mddev), mddev->level);
|
||||
goto out;
|
||||
}
|
||||
if (mddev->reshape_position != MaxSector) {
|
||||
printk("raid1: %s: reshape_position set but not supported\n",
|
||||
mdname(mddev));
|
||||
goto out;
|
||||
}
|
||||
/*
|
||||
* copy the already verified devices into our private RAID1
|
||||
* bookkeeping area. [whatever we allocate in run(),
|
||||
* should be freed in stop()]
|
||||
*/
|
||||
conf = kzalloc(sizeof(conf_t), GFP_KERNEL);
|
||||
mddev->private = conf;
|
||||
if (!conf)
|
||||
goto out_no_mem;
|
||||
goto abort;
|
||||
|
||||
conf->mirrors = kzalloc(sizeof(struct mirror_info)*mddev->raid_disks,
|
||||
GFP_KERNEL);
|
||||
if (!conf->mirrors)
|
||||
goto out_no_mem;
|
||||
goto abort;
|
||||
|
||||
conf->tmppage = alloc_page(GFP_KERNEL);
|
||||
if (!conf->tmppage)
|
||||
goto out_no_mem;
|
||||
goto abort;
|
||||
|
||||
conf->poolinfo = kmalloc(sizeof(*conf->poolinfo), GFP_KERNEL);
|
||||
conf->poolinfo = kzalloc(sizeof(*conf->poolinfo), GFP_KERNEL);
|
||||
if (!conf->poolinfo)
|
||||
goto out_no_mem;
|
||||
conf->poolinfo->mddev = NULL;
|
||||
goto abort;
|
||||
conf->poolinfo->raid_disks = mddev->raid_disks;
|
||||
conf->r1bio_pool = mempool_create(NR_RAID1_BIOS, r1bio_pool_alloc,
|
||||
r1bio_pool_free,
|
||||
conf->poolinfo);
|
||||
if (!conf->r1bio_pool)
|
||||
goto out_no_mem;
|
||||
goto abort;
|
||||
|
||||
conf->poolinfo->mddev = mddev;
|
||||
|
||||
spin_lock_init(&conf->device_lock);
|
||||
mddev->queue->queue_lock = &conf->device_lock;
|
||||
|
||||
list_for_each_entry(rdev, &mddev->disks, same_set) {
|
||||
disk_idx = rdev->raid_disk;
|
||||
int disk_idx = rdev->raid_disk;
|
||||
if (disk_idx >= mddev->raid_disks
|
||||
|| disk_idx < 0)
|
||||
continue;
|
||||
disk = conf->mirrors + disk_idx;
|
||||
|
||||
disk->rdev = rdev;
|
||||
disk_stack_limits(mddev->gendisk, rdev->bdev,
|
||||
rdev->data_offset << 9);
|
||||
/* as we don't honour merge_bvec_fn, we must never risk
|
||||
* violating it, so limit ->max_sector to one PAGE, as
|
||||
* a one page request is never in violation.
|
||||
*/
|
||||
if (rdev->bdev->bd_disk->queue->merge_bvec_fn &&
|
||||
queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9))
|
||||
blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
|
||||
|
||||
disk->head_position = 0;
|
||||
}
|
||||
|
@ -2022,8 +2017,7 @@ static int run(mddev_t *mddev)
|
|||
bio_list_init(&conf->pending_bio_list);
|
||||
bio_list_init(&conf->flushing_bio_list);
|
||||
|
||||
|
||||
mddev->degraded = 0;
|
||||
conf->last_used = -1;
|
||||
for (i = 0; i < conf->raid_disks; i++) {
|
||||
|
||||
disk = conf->mirrors + i;
|
||||
|
@ -2031,38 +2025,97 @@ static int run(mddev_t *mddev)
|
|||
if (!disk->rdev ||
|
||||
!test_bit(In_sync, &disk->rdev->flags)) {
|
||||
disk->head_position = 0;
|
||||
mddev->degraded++;
|
||||
if (disk->rdev)
|
||||
conf->fullsync = 1;
|
||||
}
|
||||
} else if (conf->last_used < 0)
|
||||
/*
|
||||
* The first working device is used as a
|
||||
* starting point to read balancing.
|
||||
*/
|
||||
conf->last_used = i;
|
||||
}
|
||||
if (mddev->degraded == conf->raid_disks) {
|
||||
|
||||
err = -EIO;
|
||||
if (conf->last_used < 0) {
|
||||
printk(KERN_ERR "raid1: no operational mirrors for %s\n",
|
||||
mdname(mddev));
|
||||
goto out_free_conf;
|
||||
mdname(mddev));
|
||||
goto abort;
|
||||
}
|
||||
if (conf->raid_disks - mddev->degraded == 1)
|
||||
mddev->recovery_cp = MaxSector;
|
||||
|
||||
/*
|
||||
* find the first working one and use it as a starting point
|
||||
* to read balancing.
|
||||
*/
|
||||
for (j = 0; j < conf->raid_disks &&
|
||||
(!conf->mirrors[j].rdev ||
|
||||
!test_bit(In_sync, &conf->mirrors[j].rdev->flags)) ; j++)
|
||||
/* nothing */;
|
||||
conf->last_used = j;
|
||||
|
||||
|
||||
mddev->thread = md_register_thread(raid1d, mddev, NULL);
|
||||
if (!mddev->thread) {
|
||||
err = -ENOMEM;
|
||||
conf->thread = md_register_thread(raid1d, mddev, NULL);
|
||||
if (!conf->thread) {
|
||||
printk(KERN_ERR
|
||||
"raid1: couldn't allocate thread for %s\n",
|
||||
mdname(mddev));
|
||||
goto out_free_conf;
|
||||
goto abort;
|
||||
}
|
||||
|
||||
return conf;
|
||||
|
||||
abort:
|
||||
if (conf) {
|
||||
if (conf->r1bio_pool)
|
||||
mempool_destroy(conf->r1bio_pool);
|
||||
kfree(conf->mirrors);
|
||||
safe_put_page(conf->tmppage);
|
||||
kfree(conf->poolinfo);
|
||||
kfree(conf);
|
||||
}
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
static int run(mddev_t *mddev)
|
||||
{
|
||||
conf_t *conf;
|
||||
int i;
|
||||
mdk_rdev_t *rdev;
|
||||
|
||||
if (mddev->level != 1) {
|
||||
printk("raid1: %s: raid level not set to mirroring (%d)\n",
|
||||
mdname(mddev), mddev->level);
|
||||
return -EIO;
|
||||
}
|
||||
if (mddev->reshape_position != MaxSector) {
|
||||
printk("raid1: %s: reshape_position set but not supported\n",
|
||||
mdname(mddev));
|
||||
return -EIO;
|
||||
}
|
||||
/*
|
||||
* copy the already verified devices into our private RAID1
|
||||
* bookkeeping area. [whatever we allocate in run(),
|
||||
* should be freed in stop()]
|
||||
*/
|
||||
if (mddev->private == NULL)
|
||||
conf = setup_conf(mddev);
|
||||
else
|
||||
conf = mddev->private;
|
||||
|
||||
if (IS_ERR(conf))
|
||||
return PTR_ERR(conf);
|
||||
|
||||
mddev->queue->queue_lock = &conf->device_lock;
|
||||
list_for_each_entry(rdev, &mddev->disks, same_set) {
|
||||
disk_stack_limits(mddev->gendisk, rdev->bdev,
|
||||
rdev->data_offset << 9);
|
||||
/* as we don't honour merge_bvec_fn, we must never risk
|
||||
* violating it, so limit ->max_sector to one PAGE, as
|
||||
* a one page request is never in violation.
|
||||
*/
|
||||
if (rdev->bdev->bd_disk->queue->merge_bvec_fn &&
|
||||
queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9))
|
||||
blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
|
||||
}
|
||||
|
||||
mddev->degraded = 0;
|
||||
for (i=0; i < conf->raid_disks; i++)
|
||||
if (conf->mirrors[i].rdev == NULL ||
|
||||
!test_bit(In_sync, &conf->mirrors[i].rdev->flags) ||
|
||||
test_bit(Faulty, &conf->mirrors[i].rdev->flags))
|
||||
mddev->degraded++;
|
||||
|
||||
if (conf->raid_disks - mddev->degraded == 1)
|
||||
mddev->recovery_cp = MaxSector;
|
||||
|
||||
if (mddev->recovery_cp != MaxSector)
|
||||
printk(KERN_NOTICE "raid1: %s is not clean"
|
||||
" -- starting background reconstruction\n",
|
||||
|
@ -2071,9 +2124,14 @@ static int run(mddev_t *mddev)
|
|||
"raid1: raid set %s active with %d out of %d mirrors\n",
|
||||
mdname(mddev), mddev->raid_disks - mddev->degraded,
|
||||
mddev->raid_disks);
|
||||
|
||||
/*
|
||||
* Ok, everything is just fine now
|
||||
*/
|
||||
mddev->thread = conf->thread;
|
||||
conf->thread = NULL;
|
||||
mddev->private = conf;
|
||||
|
||||
md_set_array_sectors(mddev, raid1_size(mddev, 0, 0));
|
||||
|
||||
mddev->queue->unplug_fn = raid1_unplug;
|
||||
|
@ -2081,23 +2139,6 @@ static int run(mddev_t *mddev)
|
|||
mddev->queue->backing_dev_info.congested_data = mddev;
|
||||
md_integrity_register(mddev);
|
||||
return 0;
|
||||
|
||||
out_no_mem:
|
||||
printk(KERN_ERR "raid1: couldn't allocate memory for %s\n",
|
||||
mdname(mddev));
|
||||
|
||||
out_free_conf:
|
||||
if (conf) {
|
||||
if (conf->r1bio_pool)
|
||||
mempool_destroy(conf->r1bio_pool);
|
||||
kfree(conf->mirrors);
|
||||
safe_put_page(conf->tmppage);
|
||||
kfree(conf->poolinfo);
|
||||
kfree(conf);
|
||||
mddev->private = NULL;
|
||||
}
|
||||
out:
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
static int stop(mddev_t *mddev)
|
||||
|
@ -2271,6 +2312,9 @@ static void raid1_quiesce(mddev_t *mddev, int state)
|
|||
conf_t *conf = mddev->private;
|
||||
|
||||
switch(state) {
|
||||
case 2: /* wake for suspend */
|
||||
wake_up(&conf->wait_barrier);
|
||||
break;
|
||||
case 1:
|
||||
raise_barrier(conf);
|
||||
break;
|
||||
|
@ -2280,6 +2324,23 @@ static void raid1_quiesce(mddev_t *mddev, int state)
|
|||
}
|
||||
}
|
||||
|
||||
static void *raid1_takeover(mddev_t *mddev)
|
||||
{
|
||||
/* raid1 can take over:
|
||||
* raid5 with 2 devices, any layout or chunk size
|
||||
*/
|
||||
if (mddev->level == 5 && mddev->raid_disks == 2) {
|
||||
conf_t *conf;
|
||||
mddev->new_level = 1;
|
||||
mddev->new_layout = 0;
|
||||
mddev->new_chunk_sectors = 0;
|
||||
conf = setup_conf(mddev);
|
||||
if (!IS_ERR(conf))
|
||||
conf->barrier = 1;
|
||||
return conf;
|
||||
}
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
static struct mdk_personality raid1_personality =
|
||||
{
|
||||
|
@ -2299,6 +2360,7 @@ static struct mdk_personality raid1_personality =
|
|||
.size = raid1_size,
|
||||
.check_reshape = raid1_reshape,
|
||||
.quiesce = raid1_quiesce,
|
||||
.takeover = raid1_takeover,
|
||||
};
|
||||
|
||||
static int __init raid_init(void)
|
||||
|
@ -2314,6 +2376,7 @@ static void raid_exit(void)
|
|||
module_init(raid_init);
|
||||
module_exit(raid_exit);
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("RAID1 (mirroring) personality for MD");
|
||||
MODULE_ALIAS("md-personality-3"); /* RAID1 */
|
||||
MODULE_ALIAS("md-raid1");
|
||||
MODULE_ALIAS("md-level-1");
|
||||
|
|
|
@ -59,6 +59,11 @@ struct r1_private_data_s {
|
|||
|
||||
mempool_t *r1bio_pool;
|
||||
mempool_t *r1buf_pool;
|
||||
|
||||
/* When taking over an array from a different personality, we store
|
||||
* the new thread here until we fully activate the array.
|
||||
*/
|
||||
struct mdk_thread_s *thread;
|
||||
};
|
||||
|
||||
typedef struct r1_private_data_s conf_t;
|
||||
|
|
|
@ -804,7 +804,7 @@ static int make_request(struct request_queue *q, struct bio * bio)
|
|||
mdk_rdev_t *blocked_rdev;
|
||||
|
||||
if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) {
|
||||
bio_endio(bio, -EOPNOTSUPP);
|
||||
md_barrier_request(mddev, bio);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -1431,6 +1431,43 @@ static void recovery_request_write(mddev_t *mddev, r10bio_t *r10_bio)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* Used by fix_read_error() to decay the per rdev read_errors.
|
||||
* We halve the read error count for every hour that has elapsed
|
||||
* since the last recorded read error.
|
||||
*
|
||||
*/
|
||||
static void check_decay_read_errors(mddev_t *mddev, mdk_rdev_t *rdev)
|
||||
{
|
||||
struct timespec cur_time_mon;
|
||||
unsigned long hours_since_last;
|
||||
unsigned int read_errors = atomic_read(&rdev->read_errors);
|
||||
|
||||
ktime_get_ts(&cur_time_mon);
|
||||
|
||||
if (rdev->last_read_error.tv_sec == 0 &&
|
||||
rdev->last_read_error.tv_nsec == 0) {
|
||||
/* first time we've seen a read error */
|
||||
rdev->last_read_error = cur_time_mon;
|
||||
return;
|
||||
}
|
||||
|
||||
hours_since_last = (cur_time_mon.tv_sec -
|
||||
rdev->last_read_error.tv_sec) / 3600;
|
||||
|
||||
rdev->last_read_error = cur_time_mon;
|
||||
|
||||
/*
|
||||
* if hours_since_last is > the number of bits in read_errors
|
||||
* just set read errors to 0. We do this to avoid
|
||||
* overflowing the shift of read_errors by hours_since_last.
|
||||
*/
|
||||
if (hours_since_last >= 8 * sizeof(read_errors))
|
||||
atomic_set(&rdev->read_errors, 0);
|
||||
else
|
||||
atomic_set(&rdev->read_errors, read_errors >> hours_since_last);
|
||||
}
|
||||
|
||||
/*
|
||||
* This is a kernel thread which:
|
||||
*
|
||||
|
@ -1444,6 +1481,43 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
|
|||
int sect = 0; /* Offset from r10_bio->sector */
|
||||
int sectors = r10_bio->sectors;
|
||||
mdk_rdev_t*rdev;
|
||||
int max_read_errors = atomic_read(&mddev->max_corr_read_errors);
|
||||
|
||||
rcu_read_lock();
|
||||
{
|
||||
int d = r10_bio->devs[r10_bio->read_slot].devnum;
|
||||
char b[BDEVNAME_SIZE];
|
||||
int cur_read_error_count = 0;
|
||||
|
||||
rdev = rcu_dereference(conf->mirrors[d].rdev);
|
||||
bdevname(rdev->bdev, b);
|
||||
|
||||
if (test_bit(Faulty, &rdev->flags)) {
|
||||
rcu_read_unlock();
|
||||
/* drive has already been failed, just ignore any
|
||||
more fix_read_error() attempts */
|
||||
return;
|
||||
}
|
||||
|
||||
check_decay_read_errors(mddev, rdev);
|
||||
atomic_inc(&rdev->read_errors);
|
||||
cur_read_error_count = atomic_read(&rdev->read_errors);
|
||||
if (cur_read_error_count > max_read_errors) {
|
||||
rcu_read_unlock();
|
||||
printk(KERN_NOTICE
|
||||
"raid10: %s: Raid device exceeded "
|
||||
"read_error threshold "
|
||||
"[cur %d:max %d]\n",
|
||||
b, cur_read_error_count, max_read_errors);
|
||||
printk(KERN_NOTICE
|
||||
"raid10: %s: Failing raid "
|
||||
"device\n", b);
|
||||
md_error(mddev, conf->mirrors[d].rdev);
|
||||
return;
|
||||
}
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
while(sectors) {
|
||||
int s = sectors;
|
||||
int sl = r10_bio->read_slot;
|
||||
|
@ -1488,6 +1562,7 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
|
|||
/* write it back and re-read */
|
||||
rcu_read_lock();
|
||||
while (sl != r10_bio->read_slot) {
|
||||
char b[BDEVNAME_SIZE];
|
||||
int d;
|
||||
if (sl==0)
|
||||
sl = conf->copies;
|
||||
|
@ -1503,9 +1578,21 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
|
|||
r10_bio->devs[sl].addr +
|
||||
sect + rdev->data_offset,
|
||||
s<<9, conf->tmppage, WRITE)
|
||||
== 0)
|
||||
== 0) {
|
||||
/* Well, this device is dead */
|
||||
printk(KERN_NOTICE
|
||||
"raid10:%s: read correction "
|
||||
"write failed"
|
||||
" (%d sectors at %llu on %s)\n",
|
||||
mdname(mddev), s,
|
||||
(unsigned long long)(sect+
|
||||
rdev->data_offset),
|
||||
bdevname(rdev->bdev, b));
|
||||
printk(KERN_NOTICE "raid10:%s: failing "
|
||||
"drive\n",
|
||||
bdevname(rdev->bdev, b));
|
||||
md_error(mddev, rdev);
|
||||
}
|
||||
rdev_dec_pending(rdev, mddev);
|
||||
rcu_read_lock();
|
||||
}
|
||||
|
@ -1526,10 +1613,22 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
|
|||
if (sync_page_io(rdev->bdev,
|
||||
r10_bio->devs[sl].addr +
|
||||
sect + rdev->data_offset,
|
||||
s<<9, conf->tmppage, READ) == 0)
|
||||
s<<9, conf->tmppage,
|
||||
READ) == 0) {
|
||||
/* Well, this device is dead */
|
||||
printk(KERN_NOTICE
|
||||
"raid10:%s: unable to read back "
|
||||
"corrected sectors"
|
||||
" (%d sectors at %llu on %s)\n",
|
||||
mdname(mddev), s,
|
||||
(unsigned long long)(sect+
|
||||
rdev->data_offset),
|
||||
bdevname(rdev->bdev, b));
|
||||
printk(KERN_NOTICE "raid10:%s: failing drive\n",
|
||||
bdevname(rdev->bdev, b));
|
||||
|
||||
md_error(mddev, rdev);
|
||||
else
|
||||
} else {
|
||||
printk(KERN_INFO
|
||||
"raid10:%s: read error corrected"
|
||||
" (%d sectors at %llu on %s)\n",
|
||||
|
@ -1537,6 +1636,7 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
|
|||
(unsigned long long)(sect+
|
||||
rdev->data_offset),
|
||||
bdevname(rdev->bdev, b));
|
||||
}
|
||||
|
||||
rdev_dec_pending(rdev, mddev);
|
||||
rcu_read_lock();
|
||||
|
@ -2275,13 +2375,6 @@ static void raid10_quiesce(mddev_t *mddev, int state)
|
|||
lower_barrier(conf);
|
||||
break;
|
||||
}
|
||||
if (mddev->thread) {
|
||||
if (mddev->bitmap)
|
||||
mddev->thread->timeout = mddev->bitmap->daemon_sleep * HZ;
|
||||
else
|
||||
mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;
|
||||
md_wakeup_thread(mddev->thread);
|
||||
}
|
||||
}
|
||||
|
||||
static struct mdk_personality raid10_personality =
|
||||
|
@ -2315,6 +2408,7 @@ static void raid_exit(void)
|
|||
module_init(raid_init);
|
||||
module_exit(raid_exit);
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("RAID10 (striped mirror) personality for MD");
|
||||
MODULE_ALIAS("md-personality-9"); /* RAID10 */
|
||||
MODULE_ALIAS("md-raid10");
|
||||
MODULE_ALIAS("md-level-10");
|
||||
|
|
|
@ -2947,6 +2947,7 @@ static void handle_stripe5(struct stripe_head *sh)
|
|||
struct r5dev *dev;
|
||||
mdk_rdev_t *blocked_rdev = NULL;
|
||||
int prexor;
|
||||
int dec_preread_active = 0;
|
||||
|
||||
memset(&s, 0, sizeof(s));
|
||||
pr_debug("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d check:%d "
|
||||
|
@ -3096,12 +3097,8 @@ static void handle_stripe5(struct stripe_head *sh)
|
|||
set_bit(STRIPE_INSYNC, &sh->state);
|
||||
}
|
||||
}
|
||||
if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
|
||||
atomic_dec(&conf->preread_active_stripes);
|
||||
if (atomic_read(&conf->preread_active_stripes) <
|
||||
IO_THRESHOLD)
|
||||
md_wakeup_thread(conf->mddev->thread);
|
||||
}
|
||||
if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
|
||||
dec_preread_active = 1;
|
||||
}
|
||||
|
||||
/* Now to consider new write requests and what else, if anything
|
||||
|
@ -3208,6 +3205,16 @@ static void handle_stripe5(struct stripe_head *sh)
|
|||
|
||||
ops_run_io(sh, &s);
|
||||
|
||||
if (dec_preread_active) {
|
||||
/* We delay this until after ops_run_io so that if make_request
|
||||
* is waiting on a barrier, it won't continue until the writes
|
||||
* have actually been submitted.
|
||||
*/
|
||||
atomic_dec(&conf->preread_active_stripes);
|
||||
if (atomic_read(&conf->preread_active_stripes) <
|
||||
IO_THRESHOLD)
|
||||
md_wakeup_thread(conf->mddev->thread);
|
||||
}
|
||||
return_io(return_bi);
|
||||
}
|
||||
|
||||
|
@ -3221,6 +3228,7 @@ static void handle_stripe6(struct stripe_head *sh)
|
|||
struct r6_state r6s;
|
||||
struct r5dev *dev, *pdev, *qdev;
|
||||
mdk_rdev_t *blocked_rdev = NULL;
|
||||
int dec_preread_active = 0;
|
||||
|
||||
pr_debug("handling stripe %llu, state=%#lx cnt=%d, "
|
||||
"pd_idx=%d, qd_idx=%d\n, check:%d, reconstruct:%d\n",
|
||||
|
@ -3358,7 +3366,6 @@ static void handle_stripe6(struct stripe_head *sh)
|
|||
* completed
|
||||
*/
|
||||
if (sh->reconstruct_state == reconstruct_state_drain_result) {
|
||||
int qd_idx = sh->qd_idx;
|
||||
|
||||
sh->reconstruct_state = reconstruct_state_idle;
|
||||
/* All the 'written' buffers and the parity blocks are ready to
|
||||
|
@ -3380,12 +3387,8 @@ static void handle_stripe6(struct stripe_head *sh)
|
|||
set_bit(STRIPE_INSYNC, &sh->state);
|
||||
}
|
||||
}
|
||||
if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
|
||||
atomic_dec(&conf->preread_active_stripes);
|
||||
if (atomic_read(&conf->preread_active_stripes) <
|
||||
IO_THRESHOLD)
|
||||
md_wakeup_thread(conf->mddev->thread);
|
||||
}
|
||||
if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
|
||||
dec_preread_active = 1;
|
||||
}
|
||||
|
||||
/* Now to consider new write requests and what else, if anything
|
||||
|
@ -3494,6 +3497,18 @@ static void handle_stripe6(struct stripe_head *sh)
|
|||
|
||||
ops_run_io(sh, &s);
|
||||
|
||||
|
||||
if (dec_preread_active) {
|
||||
/* We delay this until after ops_run_io so that if make_request
|
||||
* is waiting on a barrier, it won't continue until the writes
|
||||
* have actually been submitted.
|
||||
*/
|
||||
atomic_dec(&conf->preread_active_stripes);
|
||||
if (atomic_read(&conf->preread_active_stripes) <
|
||||
IO_THRESHOLD)
|
||||
md_wakeup_thread(conf->mddev->thread);
|
||||
}
|
||||
|
||||
return_io(return_bi);
|
||||
}
|
||||
|
||||
|
@ -3741,7 +3756,7 @@ static int chunk_aligned_read(struct request_queue *q, struct bio * raid_bio)
|
|||
{
|
||||
mddev_t *mddev = q->queuedata;
|
||||
raid5_conf_t *conf = mddev->private;
|
||||
unsigned int dd_idx;
|
||||
int dd_idx;
|
||||
struct bio* align_bi;
|
||||
mdk_rdev_t *rdev;
|
||||
|
||||
|
@ -3866,7 +3881,13 @@ static int make_request(struct request_queue *q, struct bio * bi)
|
|||
int cpu, remaining;
|
||||
|
||||
if (unlikely(bio_rw_flagged(bi, BIO_RW_BARRIER))) {
|
||||
bio_endio(bi, -EOPNOTSUPP);
|
||||
/* Drain all pending writes. We only really need
|
||||
* to ensure they have been submitted, but this is
|
||||
* easier.
|
||||
*/
|
||||
mddev->pers->quiesce(mddev, 1);
|
||||
mddev->pers->quiesce(mddev, 0);
|
||||
md_barrier_request(mddev, bi);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -3990,6 +4011,9 @@ static int make_request(struct request_queue *q, struct bio * bi)
|
|||
finish_wait(&conf->wait_for_overlap, &w);
|
||||
set_bit(STRIPE_HANDLE, &sh->state);
|
||||
clear_bit(STRIPE_DELAYED, &sh->state);
|
||||
if (mddev->barrier &&
|
||||
!test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
|
||||
atomic_inc(&conf->preread_active_stripes);
|
||||
release_stripe(sh);
|
||||
} else {
|
||||
/* cannot get stripe for read-ahead, just give-up */
|
||||
|
@ -4009,6 +4033,14 @@ static int make_request(struct request_queue *q, struct bio * bi)
|
|||
|
||||
bio_endio(bi, 0);
|
||||
}
|
||||
|
||||
if (mddev->barrier) {
|
||||
/* We need to wait for the stripes to all be handled.
|
||||
* So: wait for preread_active_stripes to drop to 0.
|
||||
*/
|
||||
wait_event(mddev->thread->wqueue,
|
||||
atomic_read(&conf->preread_active_stripes) == 0);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -5860,6 +5892,7 @@ static void raid5_exit(void)
|
|||
module_init(raid5_init);
|
||||
module_exit(raid5_exit);
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("RAID4/5/6 (striping with parity) personality for MD");
|
||||
MODULE_ALIAS("md-personality-4"); /* RAID5 */
|
||||
MODULE_ALIAS("md-raid5");
|
||||
MODULE_ALIAS("md-raid4");
|
||||
|
|
|
@ -31,25 +31,6 @@ EXPORT_SYMBOL(raid6_empty_zero_page);
|
|||
struct raid6_calls raid6_call;
|
||||
EXPORT_SYMBOL_GPL(raid6_call);
|
||||
|
||||
/* Various routine sets */
|
||||
extern const struct raid6_calls raid6_intx1;
|
||||
extern const struct raid6_calls raid6_intx2;
|
||||
extern const struct raid6_calls raid6_intx4;
|
||||
extern const struct raid6_calls raid6_intx8;
|
||||
extern const struct raid6_calls raid6_intx16;
|
||||
extern const struct raid6_calls raid6_intx32;
|
||||
extern const struct raid6_calls raid6_mmxx1;
|
||||
extern const struct raid6_calls raid6_mmxx2;
|
||||
extern const struct raid6_calls raid6_sse1x1;
|
||||
extern const struct raid6_calls raid6_sse1x2;
|
||||
extern const struct raid6_calls raid6_sse2x1;
|
||||
extern const struct raid6_calls raid6_sse2x2;
|
||||
extern const struct raid6_calls raid6_sse2x4;
|
||||
extern const struct raid6_calls raid6_altivec1;
|
||||
extern const struct raid6_calls raid6_altivec2;
|
||||
extern const struct raid6_calls raid6_altivec4;
|
||||
extern const struct raid6_calls raid6_altivec8;
|
||||
|
||||
const struct raid6_calls * const raid6_algos[] = {
|
||||
&raid6_intx1,
|
||||
&raid6_intx2,
|
||||
|
@ -169,3 +150,4 @@ static void raid6_exit(void)
|
|||
subsys_initcall(raid6_select_algo);
|
||||
module_exit(raid6_exit);
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("RAID6 Q-syndrome calculations");
|
||||
|
|
|
@ -979,24 +979,6 @@ COMPATIBLE_IOCTL(FIGETBSZ)
|
|||
/* 'X' - originally XFS but some now in the VFS */
|
||||
COMPATIBLE_IOCTL(FIFREEZE)
|
||||
COMPATIBLE_IOCTL(FITHAW)
|
||||
/* RAID */
|
||||
COMPATIBLE_IOCTL(RAID_VERSION)
|
||||
COMPATIBLE_IOCTL(GET_ARRAY_INFO)
|
||||
COMPATIBLE_IOCTL(GET_DISK_INFO)
|
||||
COMPATIBLE_IOCTL(PRINT_RAID_DEBUG)
|
||||
COMPATIBLE_IOCTL(RAID_AUTORUN)
|
||||
COMPATIBLE_IOCTL(CLEAR_ARRAY)
|
||||
COMPATIBLE_IOCTL(ADD_NEW_DISK)
|
||||
COMPATIBLE_IOCTL(SET_ARRAY_INFO)
|
||||
COMPATIBLE_IOCTL(SET_DISK_INFO)
|
||||
COMPATIBLE_IOCTL(WRITE_RAID_INFO)
|
||||
COMPATIBLE_IOCTL(UNPROTECT_ARRAY)
|
||||
COMPATIBLE_IOCTL(PROTECT_ARRAY)
|
||||
COMPATIBLE_IOCTL(RUN_ARRAY)
|
||||
COMPATIBLE_IOCTL(STOP_ARRAY)
|
||||
COMPATIBLE_IOCTL(STOP_ARRAY_RO)
|
||||
COMPATIBLE_IOCTL(RESTART_ARRAY_RW)
|
||||
COMPATIBLE_IOCTL(GET_BITMAP_FILE)
|
||||
COMPATIBLE_IOCTL(KDGETKEYCODE)
|
||||
COMPATIBLE_IOCTL(KDSETKEYCODE)
|
||||
COMPATIBLE_IOCTL(KDGKBTYPE)
|
||||
|
|
|
@ -78,6 +78,25 @@ struct raid6_calls {
|
|||
/* Selected algorithm */
|
||||
extern struct raid6_calls raid6_call;
|
||||
|
||||
/* Various routine sets */
|
||||
extern const struct raid6_calls raid6_intx1;
|
||||
extern const struct raid6_calls raid6_intx2;
|
||||
extern const struct raid6_calls raid6_intx4;
|
||||
extern const struct raid6_calls raid6_intx8;
|
||||
extern const struct raid6_calls raid6_intx16;
|
||||
extern const struct raid6_calls raid6_intx32;
|
||||
extern const struct raid6_calls raid6_mmxx1;
|
||||
extern const struct raid6_calls raid6_mmxx2;
|
||||
extern const struct raid6_calls raid6_sse1x1;
|
||||
extern const struct raid6_calls raid6_sse1x2;
|
||||
extern const struct raid6_calls raid6_sse2x1;
|
||||
extern const struct raid6_calls raid6_sse2x2;
|
||||
extern const struct raid6_calls raid6_sse2x4;
|
||||
extern const struct raid6_calls raid6_altivec1;
|
||||
extern const struct raid6_calls raid6_altivec2;
|
||||
extern const struct raid6_calls raid6_altivec4;
|
||||
extern const struct raid6_calls raid6_altivec8;
|
||||
|
||||
/* Algorithm list */
|
||||
extern const struct raid6_calls * const raid6_algos[];
|
||||
int raid6_select_algo(void);
|
||||
|
|
Loading…
Reference in a new issue