rewrite rd
This is a rewrite of the ramdisk block device driver.
The old one is really difficult because it effectively implements a block
device which serves data out of its own buffer cache. It relies on the dirty
bit being set, to pin its backing store in cache, however there are non
trivial paths which can clear the dirty bit (eg. try_to_free_buffers()),
which had recently lead to data corruption. And in general it is completely
wrong for a block device driver to do this.
The new one is more like a regular block device driver. It has no idea about
vm/vfs stuff. It's backing store is similar to the buffer cache (a simple
radix-tree of pages), but it doesn't know anything about page cache (the pages
in the radix tree are not pagecache pages).
There is one slight downside -- direct block device access and filesystem
metadata access goes through an extra copy and gets stored in RAM twice.
However, this downside is only slight, because the real buffercache of the
device is now reclaimable (because we're not playing crazy games with it), so
under memory intensive situations, footprint should effectively be the same --
maybe even a slight advantage to the new driver because it can also reclaim
buffer heads.
The fact that it now goes through all the regular vm/fs paths makes it
much more useful for testing, too.
text data bss dec hex filename
2837 849 384 4070 fe6 drivers/block/rd.o
3528 371 12 3911 f47 drivers/block/brd.o
Text is larger, but data and bss are smaller, making total size smaller.
A few other nice things about it:
- Similar structure and layout to the new loop device handlinag.
- Dynamic ramdisk creation.
- Runtime flexible buffer head size (because it is no longer part of the
ramdisk code).
- Boot / load time flexible ramdisk size, which could easily be extended
to a per-ramdisk runtime changeable size (eg. with an ioctl).
- Can use highmem for the backing store.
[akpm@linux-foundation.org: fix build]
[byron.bbradley@gmail.com: make rd_size non-static]
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Byron Bradley <byron.bbradley@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-02-08 13:19:49 +01:00
|
|
|
/*
|
|
|
|
* Ram backed block device driver.
|
|
|
|
*
|
|
|
|
* Copyright (C) 2007 Nick Piggin
|
|
|
|
* Copyright (C) 2007 Novell Inc.
|
|
|
|
*
|
|
|
|
* Parts derived from drivers/block/rd.c, and drivers/block/loop.c, copyright
|
|
|
|
* of their respective owners.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/init.h>
|
|
|
|
#include <linux/module.h>
|
|
|
|
#include <linux/moduleparam.h>
|
|
|
|
#include <linux/major.h>
|
|
|
|
#include <linux/blkdev.h>
|
|
|
|
#include <linux/bio.h>
|
|
|
|
#include <linux/highmem.h>
|
|
|
|
#include <linux/gfp.h>
|
|
|
|
#include <linux/radix-tree.h>
|
|
|
|
#include <linux/buffer_head.h> /* invalidate_bh_lrus() */
|
|
|
|
|
|
|
|
#include <asm/uaccess.h>
|
|
|
|
|
|
|
|
#define SECTOR_SHIFT 9
|
|
|
|
#define PAGE_SECTORS_SHIFT (PAGE_SHIFT - SECTOR_SHIFT)
|
|
|
|
#define PAGE_SECTORS (1 << PAGE_SECTORS_SHIFT)
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Each block ramdisk device has a radix_tree brd_pages of pages that stores
|
|
|
|
* the pages containing the block device's contents. A brd page's ->index is
|
|
|
|
* its offset in PAGE_SIZE units. This is similar to, but in no way connected
|
|
|
|
* with, the kernel's pagecache or buffer cache (which sit above our block
|
|
|
|
* device).
|
|
|
|
*/
|
|
|
|
struct brd_device {
|
|
|
|
int brd_number;
|
|
|
|
int brd_refcnt;
|
|
|
|
loff_t brd_offset;
|
|
|
|
loff_t brd_sizelimit;
|
|
|
|
unsigned brd_blocksize;
|
|
|
|
|
|
|
|
struct request_queue *brd_queue;
|
|
|
|
struct gendisk *brd_disk;
|
|
|
|
struct list_head brd_list;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Backing store of pages and lock to protect it. This is the contents
|
|
|
|
* of the block device.
|
|
|
|
*/
|
|
|
|
spinlock_t brd_lock;
|
|
|
|
struct radix_tree_root brd_pages;
|
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Look up and return a brd's page for a given sector.
|
|
|
|
*/
|
|
|
|
static struct page *brd_lookup_page(struct brd_device *brd, sector_t sector)
|
|
|
|
{
|
|
|
|
pgoff_t idx;
|
|
|
|
struct page *page;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The page lifetime is protected by the fact that we have opened the
|
|
|
|
* device node -- brd pages will never be deleted under us, so we
|
|
|
|
* don't need any further locking or refcounting.
|
|
|
|
*
|
|
|
|
* This is strictly true for the radix-tree nodes as well (ie. we
|
|
|
|
* don't actually need the rcu_read_lock()), however that is not a
|
|
|
|
* documented feature of the radix-tree API so it is better to be
|
|
|
|
* safe here (we don't have total exclusion from radix tree updates
|
|
|
|
* here, only deletes).
|
|
|
|
*/
|
|
|
|
rcu_read_lock();
|
|
|
|
idx = sector >> PAGE_SECTORS_SHIFT; /* sector to page index */
|
|
|
|
page = radix_tree_lookup(&brd->brd_pages, idx);
|
|
|
|
rcu_read_unlock();
|
|
|
|
|
|
|
|
BUG_ON(page && page->index != idx);
|
|
|
|
|
|
|
|
return page;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Look up and return a brd's page for a given sector.
|
|
|
|
* If one does not exist, allocate an empty page, and insert that. Then
|
|
|
|
* return it.
|
|
|
|
*/
|
|
|
|
static struct page *brd_insert_page(struct brd_device *brd, sector_t sector)
|
|
|
|
{
|
|
|
|
pgoff_t idx;
|
|
|
|
struct page *page;
|
2008-02-08 13:19:50 +01:00
|
|
|
gfp_t gfp_flags;
|
rewrite rd
This is a rewrite of the ramdisk block device driver.
The old one is really difficult because it effectively implements a block
device which serves data out of its own buffer cache. It relies on the dirty
bit being set, to pin its backing store in cache, however there are non
trivial paths which can clear the dirty bit (eg. try_to_free_buffers()),
which had recently lead to data corruption. And in general it is completely
wrong for a block device driver to do this.
The new one is more like a regular block device driver. It has no idea about
vm/vfs stuff. It's backing store is similar to the buffer cache (a simple
radix-tree of pages), but it doesn't know anything about page cache (the pages
in the radix tree are not pagecache pages).
There is one slight downside -- direct block device access and filesystem
metadata access goes through an extra copy and gets stored in RAM twice.
However, this downside is only slight, because the real buffercache of the
device is now reclaimable (because we're not playing crazy games with it), so
under memory intensive situations, footprint should effectively be the same --
maybe even a slight advantage to the new driver because it can also reclaim
buffer heads.
The fact that it now goes through all the regular vm/fs paths makes it
much more useful for testing, too.
text data bss dec hex filename
2837 849 384 4070 fe6 drivers/block/rd.o
3528 371 12 3911 f47 drivers/block/brd.o
Text is larger, but data and bss are smaller, making total size smaller.
A few other nice things about it:
- Similar structure and layout to the new loop device handlinag.
- Dynamic ramdisk creation.
- Runtime flexible buffer head size (because it is no longer part of the
ramdisk code).
- Boot / load time flexible ramdisk size, which could easily be extended
to a per-ramdisk runtime changeable size (eg. with an ioctl).
- Can use highmem for the backing store.
[akpm@linux-foundation.org: fix build]
[byron.bbradley@gmail.com: make rd_size non-static]
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Byron Bradley <byron.bbradley@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-02-08 13:19:49 +01:00
|
|
|
|
|
|
|
page = brd_lookup_page(brd, sector);
|
|
|
|
if (page)
|
|
|
|
return page;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Must use NOIO because we don't want to recurse back into the
|
|
|
|
* block or filesystem layers from page reclaim.
|
2008-02-08 13:19:50 +01:00
|
|
|
*
|
|
|
|
* Cannot support XIP and highmem, because our ->direct_access
|
|
|
|
* routine for XIP must return memory that is always addressable.
|
|
|
|
* If XIP was reworked to use pfns and kmap throughout, this
|
|
|
|
* restriction might be able to be lifted.
|
rewrite rd
This is a rewrite of the ramdisk block device driver.
The old one is really difficult because it effectively implements a block
device which serves data out of its own buffer cache. It relies on the dirty
bit being set, to pin its backing store in cache, however there are non
trivial paths which can clear the dirty bit (eg. try_to_free_buffers()),
which had recently lead to data corruption. And in general it is completely
wrong for a block device driver to do this.
The new one is more like a regular block device driver. It has no idea about
vm/vfs stuff. It's backing store is similar to the buffer cache (a simple
radix-tree of pages), but it doesn't know anything about page cache (the pages
in the radix tree are not pagecache pages).
There is one slight downside -- direct block device access and filesystem
metadata access goes through an extra copy and gets stored in RAM twice.
However, this downside is only slight, because the real buffercache of the
device is now reclaimable (because we're not playing crazy games with it), so
under memory intensive situations, footprint should effectively be the same --
maybe even a slight advantage to the new driver because it can also reclaim
buffer heads.
The fact that it now goes through all the regular vm/fs paths makes it
much more useful for testing, too.
text data bss dec hex filename
2837 849 384 4070 fe6 drivers/block/rd.o
3528 371 12 3911 f47 drivers/block/brd.o
Text is larger, but data and bss are smaller, making total size smaller.
A few other nice things about it:
- Similar structure and layout to the new loop device handlinag.
- Dynamic ramdisk creation.
- Runtime flexible buffer head size (because it is no longer part of the
ramdisk code).
- Boot / load time flexible ramdisk size, which could easily be extended
to a per-ramdisk runtime changeable size (eg. with an ioctl).
- Can use highmem for the backing store.
[akpm@linux-foundation.org: fix build]
[byron.bbradley@gmail.com: make rd_size non-static]
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Byron Bradley <byron.bbradley@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-02-08 13:19:49 +01:00
|
|
|
*/
|
2008-02-08 13:19:50 +01:00
|
|
|
gfp_flags = GFP_NOIO | __GFP_ZERO;
|
|
|
|
#ifndef CONFIG_BLK_DEV_XIP
|
|
|
|
gfp_flags |= __GFP_HIGHMEM;
|
|
|
|
#endif
|
2008-04-22 05:36:52 +02:00
|
|
|
page = alloc_page(gfp_flags);
|
rewrite rd
This is a rewrite of the ramdisk block device driver.
The old one is really difficult because it effectively implements a block
device which serves data out of its own buffer cache. It relies on the dirty
bit being set, to pin its backing store in cache, however there are non
trivial paths which can clear the dirty bit (eg. try_to_free_buffers()),
which had recently lead to data corruption. And in general it is completely
wrong for a block device driver to do this.
The new one is more like a regular block device driver. It has no idea about
vm/vfs stuff. It's backing store is similar to the buffer cache (a simple
radix-tree of pages), but it doesn't know anything about page cache (the pages
in the radix tree are not pagecache pages).
There is one slight downside -- direct block device access and filesystem
metadata access goes through an extra copy and gets stored in RAM twice.
However, this downside is only slight, because the real buffercache of the
device is now reclaimable (because we're not playing crazy games with it), so
under memory intensive situations, footprint should effectively be the same --
maybe even a slight advantage to the new driver because it can also reclaim
buffer heads.
The fact that it now goes through all the regular vm/fs paths makes it
much more useful for testing, too.
text data bss dec hex filename
2837 849 384 4070 fe6 drivers/block/rd.o
3528 371 12 3911 f47 drivers/block/brd.o
Text is larger, but data and bss are smaller, making total size smaller.
A few other nice things about it:
- Similar structure and layout to the new loop device handlinag.
- Dynamic ramdisk creation.
- Runtime flexible buffer head size (because it is no longer part of the
ramdisk code).
- Boot / load time flexible ramdisk size, which could easily be extended
to a per-ramdisk runtime changeable size (eg. with an ioctl).
- Can use highmem for the backing store.
[akpm@linux-foundation.org: fix build]
[byron.bbradley@gmail.com: make rd_size non-static]
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Byron Bradley <byron.bbradley@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-02-08 13:19:49 +01:00
|
|
|
if (!page)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
if (radix_tree_preload(GFP_NOIO)) {
|
|
|
|
__free_page(page);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
spin_lock(&brd->brd_lock);
|
|
|
|
idx = sector >> PAGE_SECTORS_SHIFT;
|
|
|
|
if (radix_tree_insert(&brd->brd_pages, idx, page)) {
|
|
|
|
__free_page(page);
|
|
|
|
page = radix_tree_lookup(&brd->brd_pages, idx);
|
|
|
|
BUG_ON(!page);
|
|
|
|
BUG_ON(page->index != idx);
|
|
|
|
} else
|
|
|
|
page->index = idx;
|
|
|
|
spin_unlock(&brd->brd_lock);
|
|
|
|
|
|
|
|
radix_tree_preload_end();
|
|
|
|
|
|
|
|
return page;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Free all backing store pages and radix tree. This must only be called when
|
|
|
|
* there are no other users of the device.
|
|
|
|
*/
|
|
|
|
#define FREE_BATCH 16
|
|
|
|
static void brd_free_pages(struct brd_device *brd)
|
|
|
|
{
|
|
|
|
unsigned long pos = 0;
|
|
|
|
struct page *pages[FREE_BATCH];
|
|
|
|
int nr_pages;
|
|
|
|
|
|
|
|
do {
|
|
|
|
int i;
|
|
|
|
|
|
|
|
nr_pages = radix_tree_gang_lookup(&brd->brd_pages,
|
|
|
|
(void **)pages, pos, FREE_BATCH);
|
|
|
|
|
|
|
|
for (i = 0; i < nr_pages; i++) {
|
|
|
|
void *ret;
|
|
|
|
|
|
|
|
BUG_ON(pages[i]->index < pos);
|
|
|
|
pos = pages[i]->index;
|
|
|
|
ret = radix_tree_delete(&brd->brd_pages, pos);
|
|
|
|
BUG_ON(!ret || ret != pages[i]);
|
|
|
|
__free_page(pages[i]);
|
|
|
|
}
|
|
|
|
|
|
|
|
pos++;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This assumes radix_tree_gang_lookup always returns as
|
|
|
|
* many pages as possible. If the radix-tree code changes,
|
|
|
|
* so will this have to.
|
|
|
|
*/
|
|
|
|
} while (nr_pages == FREE_BATCH);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* copy_to_brd_setup must be called before copy_to_brd. It may sleep.
|
|
|
|
*/
|
|
|
|
static int copy_to_brd_setup(struct brd_device *brd, sector_t sector, size_t n)
|
|
|
|
{
|
|
|
|
unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT;
|
|
|
|
size_t copy;
|
|
|
|
|
|
|
|
copy = min_t(size_t, n, PAGE_SIZE - offset);
|
|
|
|
if (!brd_insert_page(brd, sector))
|
|
|
|
return -ENOMEM;
|
|
|
|
if (copy < n) {
|
|
|
|
sector += copy >> SECTOR_SHIFT;
|
|
|
|
if (!brd_insert_page(brd, sector))
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Copy n bytes from src to the brd starting at sector. Does not sleep.
|
|
|
|
*/
|
|
|
|
static void copy_to_brd(struct brd_device *brd, const void *src,
|
|
|
|
sector_t sector, size_t n)
|
|
|
|
{
|
|
|
|
struct page *page;
|
|
|
|
void *dst;
|
|
|
|
unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT;
|
|
|
|
size_t copy;
|
|
|
|
|
|
|
|
copy = min_t(size_t, n, PAGE_SIZE - offset);
|
|
|
|
page = brd_lookup_page(brd, sector);
|
|
|
|
BUG_ON(!page);
|
|
|
|
|
|
|
|
dst = kmap_atomic(page, KM_USER1);
|
|
|
|
memcpy(dst + offset, src, copy);
|
|
|
|
kunmap_atomic(dst, KM_USER1);
|
|
|
|
|
|
|
|
if (copy < n) {
|
|
|
|
src += copy;
|
|
|
|
sector += copy >> SECTOR_SHIFT;
|
|
|
|
copy = n - copy;
|
|
|
|
page = brd_lookup_page(brd, sector);
|
|
|
|
BUG_ON(!page);
|
|
|
|
|
|
|
|
dst = kmap_atomic(page, KM_USER1);
|
|
|
|
memcpy(dst, src, copy);
|
|
|
|
kunmap_atomic(dst, KM_USER1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Copy n bytes to dst from the brd starting at sector. Does not sleep.
|
|
|
|
*/
|
|
|
|
static void copy_from_brd(void *dst, struct brd_device *brd,
|
|
|
|
sector_t sector, size_t n)
|
|
|
|
{
|
|
|
|
struct page *page;
|
|
|
|
void *src;
|
|
|
|
unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT;
|
|
|
|
size_t copy;
|
|
|
|
|
|
|
|
copy = min_t(size_t, n, PAGE_SIZE - offset);
|
|
|
|
page = brd_lookup_page(brd, sector);
|
|
|
|
if (page) {
|
|
|
|
src = kmap_atomic(page, KM_USER1);
|
|
|
|
memcpy(dst, src + offset, copy);
|
|
|
|
kunmap_atomic(src, KM_USER1);
|
|
|
|
} else
|
|
|
|
memset(dst, 0, copy);
|
|
|
|
|
|
|
|
if (copy < n) {
|
|
|
|
dst += copy;
|
|
|
|
sector += copy >> SECTOR_SHIFT;
|
|
|
|
copy = n - copy;
|
|
|
|
page = brd_lookup_page(brd, sector);
|
|
|
|
if (page) {
|
|
|
|
src = kmap_atomic(page, KM_USER1);
|
|
|
|
memcpy(dst, src, copy);
|
|
|
|
kunmap_atomic(src, KM_USER1);
|
|
|
|
} else
|
|
|
|
memset(dst, 0, copy);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Process a single bvec of a bio.
|
|
|
|
*/
|
|
|
|
static int brd_do_bvec(struct brd_device *brd, struct page *page,
|
|
|
|
unsigned int len, unsigned int off, int rw,
|
|
|
|
sector_t sector)
|
|
|
|
{
|
|
|
|
void *mem;
|
|
|
|
int err = 0;
|
|
|
|
|
|
|
|
if (rw != READ) {
|
|
|
|
err = copy_to_brd_setup(brd, sector, len);
|
|
|
|
if (err)
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
mem = kmap_atomic(page, KM_USER0);
|
|
|
|
if (rw == READ) {
|
|
|
|
copy_from_brd(mem + off, brd, sector, len);
|
|
|
|
flush_dcache_page(page);
|
|
|
|
} else
|
|
|
|
copy_to_brd(brd, mem + off, sector, len);
|
|
|
|
kunmap_atomic(mem, KM_USER0);
|
|
|
|
|
|
|
|
out:
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int brd_make_request(struct request_queue *q, struct bio *bio)
|
|
|
|
{
|
|
|
|
struct block_device *bdev = bio->bi_bdev;
|
|
|
|
struct brd_device *brd = bdev->bd_disk->private_data;
|
|
|
|
int rw;
|
|
|
|
struct bio_vec *bvec;
|
|
|
|
sector_t sector;
|
|
|
|
int i;
|
|
|
|
int err = -EIO;
|
|
|
|
|
|
|
|
sector = bio->bi_sector;
|
|
|
|
if (sector + (bio->bi_size >> SECTOR_SHIFT) >
|
|
|
|
get_capacity(bdev->bd_disk))
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
rw = bio_rw(bio);
|
|
|
|
if (rw == READA)
|
|
|
|
rw = READ;
|
|
|
|
|
|
|
|
bio_for_each_segment(bvec, bio, i) {
|
|
|
|
unsigned int len = bvec->bv_len;
|
|
|
|
err = brd_do_bvec(brd, bvec->bv_page, len,
|
|
|
|
bvec->bv_offset, rw, sector);
|
|
|
|
if (err)
|
|
|
|
break;
|
|
|
|
sector += len >> SECTOR_SHIFT;
|
|
|
|
}
|
|
|
|
|
|
|
|
out:
|
|
|
|
bio_endio(bio, err);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2008-02-08 13:19:50 +01:00
|
|
|
#ifdef CONFIG_BLK_DEV_XIP
|
|
|
|
static int brd_direct_access (struct block_device *bdev, sector_t sector,
|
2008-04-28 11:13:02 +02:00
|
|
|
void **kaddr, unsigned long *pfn)
|
2008-02-08 13:19:50 +01:00
|
|
|
{
|
|
|
|
struct brd_device *brd = bdev->bd_disk->private_data;
|
|
|
|
struct page *page;
|
|
|
|
|
|
|
|
if (!brd)
|
|
|
|
return -ENODEV;
|
|
|
|
if (sector & (PAGE_SECTORS-1))
|
|
|
|
return -EINVAL;
|
|
|
|
if (sector + PAGE_SECTORS > get_capacity(bdev->bd_disk))
|
|
|
|
return -ERANGE;
|
|
|
|
page = brd_insert_page(brd, sector);
|
|
|
|
if (!page)
|
|
|
|
return -ENOMEM;
|
2008-04-28 11:13:02 +02:00
|
|
|
*kaddr = page_address(page);
|
|
|
|
*pfn = page_to_pfn(page);
|
2008-02-08 13:19:50 +01:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
rewrite rd
This is a rewrite of the ramdisk block device driver.
The old one is really difficult because it effectively implements a block
device which serves data out of its own buffer cache. It relies on the dirty
bit being set, to pin its backing store in cache, however there are non
trivial paths which can clear the dirty bit (eg. try_to_free_buffers()),
which had recently lead to data corruption. And in general it is completely
wrong for a block device driver to do this.
The new one is more like a regular block device driver. It has no idea about
vm/vfs stuff. It's backing store is similar to the buffer cache (a simple
radix-tree of pages), but it doesn't know anything about page cache (the pages
in the radix tree are not pagecache pages).
There is one slight downside -- direct block device access and filesystem
metadata access goes through an extra copy and gets stored in RAM twice.
However, this downside is only slight, because the real buffercache of the
device is now reclaimable (because we're not playing crazy games with it), so
under memory intensive situations, footprint should effectively be the same --
maybe even a slight advantage to the new driver because it can also reclaim
buffer heads.
The fact that it now goes through all the regular vm/fs paths makes it
much more useful for testing, too.
text data bss dec hex filename
2837 849 384 4070 fe6 drivers/block/rd.o
3528 371 12 3911 f47 drivers/block/brd.o
Text is larger, but data and bss are smaller, making total size smaller.
A few other nice things about it:
- Similar structure and layout to the new loop device handlinag.
- Dynamic ramdisk creation.
- Runtime flexible buffer head size (because it is no longer part of the
ramdisk code).
- Boot / load time flexible ramdisk size, which could easily be extended
to a per-ramdisk runtime changeable size (eg. with an ioctl).
- Can use highmem for the backing store.
[akpm@linux-foundation.org: fix build]
[byron.bbradley@gmail.com: make rd_size non-static]
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Byron Bradley <byron.bbradley@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-02-08 13:19:49 +01:00
|
|
|
static int brd_ioctl(struct inode *inode, struct file *file,
|
|
|
|
unsigned int cmd, unsigned long arg)
|
|
|
|
{
|
|
|
|
int error;
|
|
|
|
struct block_device *bdev = inode->i_bdev;
|
|
|
|
struct brd_device *brd = bdev->bd_disk->private_data;
|
|
|
|
|
|
|
|
if (cmd != BLKFLSBUF)
|
|
|
|
return -ENOTTY;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* ram device BLKFLSBUF has special semantics, we want to actually
|
|
|
|
* release and destroy the ramdisk data.
|
|
|
|
*/
|
|
|
|
mutex_lock(&bdev->bd_mutex);
|
|
|
|
error = -EBUSY;
|
|
|
|
if (bdev->bd_openers <= 1) {
|
|
|
|
/*
|
|
|
|
* Invalidate the cache first, so it isn't written
|
|
|
|
* back to the device.
|
|
|
|
*
|
|
|
|
* Another thread might instantiate more buffercache here,
|
|
|
|
* but there is not much we can do to close that race.
|
|
|
|
*/
|
|
|
|
invalidate_bh_lrus();
|
|
|
|
truncate_inode_pages(bdev->bd_inode->i_mapping, 0);
|
|
|
|
brd_free_pages(brd);
|
|
|
|
error = 0;
|
|
|
|
}
|
|
|
|
mutex_unlock(&bdev->bd_mutex);
|
|
|
|
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct block_device_operations brd_fops = {
|
2008-02-08 13:19:50 +01:00
|
|
|
.owner = THIS_MODULE,
|
|
|
|
.ioctl = brd_ioctl,
|
|
|
|
#ifdef CONFIG_BLK_DEV_XIP
|
|
|
|
.direct_access = brd_direct_access,
|
|
|
|
#endif
|
rewrite rd
This is a rewrite of the ramdisk block device driver.
The old one is really difficult because it effectively implements a block
device which serves data out of its own buffer cache. It relies on the dirty
bit being set, to pin its backing store in cache, however there are non
trivial paths which can clear the dirty bit (eg. try_to_free_buffers()),
which had recently lead to data corruption. And in general it is completely
wrong for a block device driver to do this.
The new one is more like a regular block device driver. It has no idea about
vm/vfs stuff. It's backing store is similar to the buffer cache (a simple
radix-tree of pages), but it doesn't know anything about page cache (the pages
in the radix tree are not pagecache pages).
There is one slight downside -- direct block device access and filesystem
metadata access goes through an extra copy and gets stored in RAM twice.
However, this downside is only slight, because the real buffercache of the
device is now reclaimable (because we're not playing crazy games with it), so
under memory intensive situations, footprint should effectively be the same --
maybe even a slight advantage to the new driver because it can also reclaim
buffer heads.
The fact that it now goes through all the regular vm/fs paths makes it
much more useful for testing, too.
text data bss dec hex filename
2837 849 384 4070 fe6 drivers/block/rd.o
3528 371 12 3911 f47 drivers/block/brd.o
Text is larger, but data and bss are smaller, making total size smaller.
A few other nice things about it:
- Similar structure and layout to the new loop device handlinag.
- Dynamic ramdisk creation.
- Runtime flexible buffer head size (because it is no longer part of the
ramdisk code).
- Boot / load time flexible ramdisk size, which could easily be extended
to a per-ramdisk runtime changeable size (eg. with an ioctl).
- Can use highmem for the backing store.
[akpm@linux-foundation.org: fix build]
[byron.bbradley@gmail.com: make rd_size non-static]
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Byron Bradley <byron.bbradley@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-02-08 13:19:49 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* And now the modules code and kernel interface.
|
|
|
|
*/
|
|
|
|
static int rd_nr;
|
|
|
|
int rd_size = CONFIG_BLK_DEV_RAM_SIZE;
|
brd: modify ramdisk device to be able to manage partitions
This patch adds partition management for Block RAM Device (BRD).
This patch is done to keep in sync BRD and loop device drivers.
This patch adds a parameter to the module, max_part, to specify
the maximum number of partitions per RAM device.
Example:
# modprobe brd max_part=63
# ls -l /dev/ram*
brw-rw---- 1 root disk 1, 0 2008-04-03 13:39 /dev/ram0
brw-rw---- 1 root disk 1, 64 2008-04-03 13:39 /dev/ram1
brw-rw---- 1 root disk 1, 640 2008-04-03 13:39 /dev/ram10
brw-rw---- 1 root disk 1, 704 2008-04-03 13:39 /dev/ram11
brw-rw---- 1 root disk 1, 768 2008-04-03 13:39 /dev/ram12
brw-rw---- 1 root disk 1, 832 2008-04-03 13:39 /dev/ram13
brw-rw---- 1 root disk 1, 896 2008-04-03 13:39 /dev/ram14
brw-rw---- 1 root disk 1, 960 2008-04-03 13:39 /dev/ram15
brw-rw---- 1 root disk 1, 128 2008-04-03 13:39 /dev/ram2
brw-rw---- 1 root disk 1, 192 2008-04-03 13:39 /dev/ram3
brw-rw---- 1 root disk 1, 256 2008-04-03 13:39 /dev/ram4
brw-rw---- 1 root disk 1, 320 2008-04-03 13:39 /dev/ram5
brw-rw---- 1 root disk 1, 384 2008-04-03 13:39 /dev/ram6
brw-rw---- 1 root disk 1, 448 2008-04-03 13:39 /dev/ram7
brw-rw---- 1 root disk 1, 512 2008-04-03 13:39 /dev/ram8
brw-rw---- 1 root disk 1, 576 2008-04-03 13:39 /dev/ram9
# fdisk /dev/ram0
Device contains neither a valid DOS partition table, nor Sun, SGI or OSF disklabel
Building a new DOS disklabel. Changes will remain in memory only,
until you decide to write them. After that, of course, the previous
content won't be recoverable.
Warning: invalid flag 0x0000 of partition table 4 will be corrected by w(rite)
Command (m for help): o
Building a new DOS disklabel. Changes will remain in memory only,
until you decide to write them. After that, of course, the previous
content won't be recoverable.
Warning: invalid flag 0x0000 of partition table 4 will be corrected by w(rite)
Command (m for help): n
Command action
e extended
p primary partition (1-4)
p
Partition number (1-4): 1
First cylinder (1-2, default 1): 1
Last cylinder or +size or +sizeM or +sizeK (1-2, default 2): 2
Command (m for help): w
The partition table has been altered!
Calling ioctl() to re-read partition table.
Syncing disks.
# ls -l /dev/ram0*
brw-rw---- 1 root disk 1, 0 2008-04-03 13:40 /dev/ram0
brw-rw---- 1 root disk 1, 1 2008-04-03 13:40 /dev/ram0p1
# mkfs /dev/ram0p1
mke2fs 1.40-WIP (14-Nov-2006)
Filesystem label=
OS type: Linux
Block size=1024 (log=0)
Fragment size=1024 (log=0)
4016 inodes, 16032 blocks
801 blocks (5.00%) reserved for the super user
First data block=1
Maximum filesystem blocks=16515072
2 block groups
8192 blocks per group, 8192 fragments per group
2008 inodes per group
Superblock backups stored on blocks:
8193
Writing inode tables: done
Writing superblocks and filesystem accounting information: done
This filesystem will be automatically checked every 26 mounts or
180 days, whichever comes first. Use tune2fs -c or -i to override.
# mount /dev/ram0p1 /mnt
df /mnt
Filesystem 1K-blocks Used Available Use% Mounted on
/dev/ram0p1 15521 138 14582 1% /mnt
# ls -l /mnt
total 12
drwx------ 2 root root 12288 2008-04-03 13:41 lost+found
# umount /mnt
# rmmod brd
Signed-off-by: Laurent Vivier <Laurent.Vivier@bull.net>
Acked-by: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-04-30 09:55:06 +02:00
|
|
|
static int max_part;
|
|
|
|
static int part_shift;
|
rewrite rd
This is a rewrite of the ramdisk block device driver.
The old one is really difficult because it effectively implements a block
device which serves data out of its own buffer cache. It relies on the dirty
bit being set, to pin its backing store in cache, however there are non
trivial paths which can clear the dirty bit (eg. try_to_free_buffers()),
which had recently lead to data corruption. And in general it is completely
wrong for a block device driver to do this.
The new one is more like a regular block device driver. It has no idea about
vm/vfs stuff. It's backing store is similar to the buffer cache (a simple
radix-tree of pages), but it doesn't know anything about page cache (the pages
in the radix tree are not pagecache pages).
There is one slight downside -- direct block device access and filesystem
metadata access goes through an extra copy and gets stored in RAM twice.
However, this downside is only slight, because the real buffercache of the
device is now reclaimable (because we're not playing crazy games with it), so
under memory intensive situations, footprint should effectively be the same --
maybe even a slight advantage to the new driver because it can also reclaim
buffer heads.
The fact that it now goes through all the regular vm/fs paths makes it
much more useful for testing, too.
text data bss dec hex filename
2837 849 384 4070 fe6 drivers/block/rd.o
3528 371 12 3911 f47 drivers/block/brd.o
Text is larger, but data and bss are smaller, making total size smaller.
A few other nice things about it:
- Similar structure and layout to the new loop device handlinag.
- Dynamic ramdisk creation.
- Runtime flexible buffer head size (because it is no longer part of the
ramdisk code).
- Boot / load time flexible ramdisk size, which could easily be extended
to a per-ramdisk runtime changeable size (eg. with an ioctl).
- Can use highmem for the backing store.
[akpm@linux-foundation.org: fix build]
[byron.bbradley@gmail.com: make rd_size non-static]
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Byron Bradley <byron.bbradley@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-02-08 13:19:49 +01:00
|
|
|
module_param(rd_nr, int, 0);
|
|
|
|
MODULE_PARM_DESC(rd_nr, "Maximum number of brd devices");
|
|
|
|
module_param(rd_size, int, 0);
|
|
|
|
MODULE_PARM_DESC(rd_size, "Size of each RAM disk in kbytes.");
|
brd: modify ramdisk device to be able to manage partitions
This patch adds partition management for Block RAM Device (BRD).
This patch is done to keep in sync BRD and loop device drivers.
This patch adds a parameter to the module, max_part, to specify
the maximum number of partitions per RAM device.
Example:
# modprobe brd max_part=63
# ls -l /dev/ram*
brw-rw---- 1 root disk 1, 0 2008-04-03 13:39 /dev/ram0
brw-rw---- 1 root disk 1, 64 2008-04-03 13:39 /dev/ram1
brw-rw---- 1 root disk 1, 640 2008-04-03 13:39 /dev/ram10
brw-rw---- 1 root disk 1, 704 2008-04-03 13:39 /dev/ram11
brw-rw---- 1 root disk 1, 768 2008-04-03 13:39 /dev/ram12
brw-rw---- 1 root disk 1, 832 2008-04-03 13:39 /dev/ram13
brw-rw---- 1 root disk 1, 896 2008-04-03 13:39 /dev/ram14
brw-rw---- 1 root disk 1, 960 2008-04-03 13:39 /dev/ram15
brw-rw---- 1 root disk 1, 128 2008-04-03 13:39 /dev/ram2
brw-rw---- 1 root disk 1, 192 2008-04-03 13:39 /dev/ram3
brw-rw---- 1 root disk 1, 256 2008-04-03 13:39 /dev/ram4
brw-rw---- 1 root disk 1, 320 2008-04-03 13:39 /dev/ram5
brw-rw---- 1 root disk 1, 384 2008-04-03 13:39 /dev/ram6
brw-rw---- 1 root disk 1, 448 2008-04-03 13:39 /dev/ram7
brw-rw---- 1 root disk 1, 512 2008-04-03 13:39 /dev/ram8
brw-rw---- 1 root disk 1, 576 2008-04-03 13:39 /dev/ram9
# fdisk /dev/ram0
Device contains neither a valid DOS partition table, nor Sun, SGI or OSF disklabel
Building a new DOS disklabel. Changes will remain in memory only,
until you decide to write them. After that, of course, the previous
content won't be recoverable.
Warning: invalid flag 0x0000 of partition table 4 will be corrected by w(rite)
Command (m for help): o
Building a new DOS disklabel. Changes will remain in memory only,
until you decide to write them. After that, of course, the previous
content won't be recoverable.
Warning: invalid flag 0x0000 of partition table 4 will be corrected by w(rite)
Command (m for help): n
Command action
e extended
p primary partition (1-4)
p
Partition number (1-4): 1
First cylinder (1-2, default 1): 1
Last cylinder or +size or +sizeM or +sizeK (1-2, default 2): 2
Command (m for help): w
The partition table has been altered!
Calling ioctl() to re-read partition table.
Syncing disks.
# ls -l /dev/ram0*
brw-rw---- 1 root disk 1, 0 2008-04-03 13:40 /dev/ram0
brw-rw---- 1 root disk 1, 1 2008-04-03 13:40 /dev/ram0p1
# mkfs /dev/ram0p1
mke2fs 1.40-WIP (14-Nov-2006)
Filesystem label=
OS type: Linux
Block size=1024 (log=0)
Fragment size=1024 (log=0)
4016 inodes, 16032 blocks
801 blocks (5.00%) reserved for the super user
First data block=1
Maximum filesystem blocks=16515072
2 block groups
8192 blocks per group, 8192 fragments per group
2008 inodes per group
Superblock backups stored on blocks:
8193
Writing inode tables: done
Writing superblocks and filesystem accounting information: done
This filesystem will be automatically checked every 26 mounts or
180 days, whichever comes first. Use tune2fs -c or -i to override.
# mount /dev/ram0p1 /mnt
df /mnt
Filesystem 1K-blocks Used Available Use% Mounted on
/dev/ram0p1 15521 138 14582 1% /mnt
# ls -l /mnt
total 12
drwx------ 2 root root 12288 2008-04-03 13:41 lost+found
# umount /mnt
# rmmod brd
Signed-off-by: Laurent Vivier <Laurent.Vivier@bull.net>
Acked-by: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-04-30 09:55:06 +02:00
|
|
|
module_param(max_part, int, 0);
|
|
|
|
MODULE_PARM_DESC(max_part, "Maximum number of partitions per RAM disk");
|
rewrite rd
This is a rewrite of the ramdisk block device driver.
The old one is really difficult because it effectively implements a block
device which serves data out of its own buffer cache. It relies on the dirty
bit being set, to pin its backing store in cache, however there are non
trivial paths which can clear the dirty bit (eg. try_to_free_buffers()),
which had recently lead to data corruption. And in general it is completely
wrong for a block device driver to do this.
The new one is more like a regular block device driver. It has no idea about
vm/vfs stuff. It's backing store is similar to the buffer cache (a simple
radix-tree of pages), but it doesn't know anything about page cache (the pages
in the radix tree are not pagecache pages).
There is one slight downside -- direct block device access and filesystem
metadata access goes through an extra copy and gets stored in RAM twice.
However, this downside is only slight, because the real buffercache of the
device is now reclaimable (because we're not playing crazy games with it), so
under memory intensive situations, footprint should effectively be the same --
maybe even a slight advantage to the new driver because it can also reclaim
buffer heads.
The fact that it now goes through all the regular vm/fs paths makes it
much more useful for testing, too.
text data bss dec hex filename
2837 849 384 4070 fe6 drivers/block/rd.o
3528 371 12 3911 f47 drivers/block/brd.o
Text is larger, but data and bss are smaller, making total size smaller.
A few other nice things about it:
- Similar structure and layout to the new loop device handlinag.
- Dynamic ramdisk creation.
- Runtime flexible buffer head size (because it is no longer part of the
ramdisk code).
- Boot / load time flexible ramdisk size, which could easily be extended
to a per-ramdisk runtime changeable size (eg. with an ioctl).
- Can use highmem for the backing store.
[akpm@linux-foundation.org: fix build]
[byron.bbradley@gmail.com: make rd_size non-static]
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Byron Bradley <byron.bbradley@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-02-08 13:19:49 +01:00
|
|
|
MODULE_LICENSE("GPL");
|
|
|
|
MODULE_ALIAS_BLOCKDEV_MAJOR(RAMDISK_MAJOR);
|
|
|
|
|
|
|
|
#ifndef MODULE
|
|
|
|
/* Legacy boot options - nonmodular */
|
|
|
|
static int __init ramdisk_size(char *str)
|
|
|
|
{
|
|
|
|
rd_size = simple_strtol(str, NULL, 0);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
static int __init ramdisk_size2(char *str)
|
|
|
|
{
|
|
|
|
return ramdisk_size(str);
|
|
|
|
}
|
|
|
|
__setup("ramdisk=", ramdisk_size);
|
|
|
|
__setup("ramdisk_size=", ramdisk_size2);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The device scheme is derived from loop.c. Keep them in synch where possible
|
|
|
|
* (should share code eventually).
|
|
|
|
*/
|
|
|
|
static LIST_HEAD(brd_devices);
|
|
|
|
static DEFINE_MUTEX(brd_devices_mutex);
|
|
|
|
|
|
|
|
static struct brd_device *brd_alloc(int i)
|
|
|
|
{
|
|
|
|
struct brd_device *brd;
|
|
|
|
struct gendisk *disk;
|
|
|
|
|
|
|
|
brd = kzalloc(sizeof(*brd), GFP_KERNEL);
|
|
|
|
if (!brd)
|
|
|
|
goto out;
|
|
|
|
brd->brd_number = i;
|
|
|
|
spin_lock_init(&brd->brd_lock);
|
|
|
|
INIT_RADIX_TREE(&brd->brd_pages, GFP_ATOMIC);
|
|
|
|
|
|
|
|
brd->brd_queue = blk_alloc_queue(GFP_KERNEL);
|
|
|
|
if (!brd->brd_queue)
|
|
|
|
goto out_free_dev;
|
|
|
|
blk_queue_make_request(brd->brd_queue, brd_make_request);
|
|
|
|
blk_queue_max_sectors(brd->brd_queue, 1024);
|
|
|
|
blk_queue_bounce_limit(brd->brd_queue, BLK_BOUNCE_ANY);
|
|
|
|
|
brd: modify ramdisk device to be able to manage partitions
This patch adds partition management for Block RAM Device (BRD).
This patch is done to keep in sync BRD and loop device drivers.
This patch adds a parameter to the module, max_part, to specify
the maximum number of partitions per RAM device.
Example:
# modprobe brd max_part=63
# ls -l /dev/ram*
brw-rw---- 1 root disk 1, 0 2008-04-03 13:39 /dev/ram0
brw-rw---- 1 root disk 1, 64 2008-04-03 13:39 /dev/ram1
brw-rw---- 1 root disk 1, 640 2008-04-03 13:39 /dev/ram10
brw-rw---- 1 root disk 1, 704 2008-04-03 13:39 /dev/ram11
brw-rw---- 1 root disk 1, 768 2008-04-03 13:39 /dev/ram12
brw-rw---- 1 root disk 1, 832 2008-04-03 13:39 /dev/ram13
brw-rw---- 1 root disk 1, 896 2008-04-03 13:39 /dev/ram14
brw-rw---- 1 root disk 1, 960 2008-04-03 13:39 /dev/ram15
brw-rw---- 1 root disk 1, 128 2008-04-03 13:39 /dev/ram2
brw-rw---- 1 root disk 1, 192 2008-04-03 13:39 /dev/ram3
brw-rw---- 1 root disk 1, 256 2008-04-03 13:39 /dev/ram4
brw-rw---- 1 root disk 1, 320 2008-04-03 13:39 /dev/ram5
brw-rw---- 1 root disk 1, 384 2008-04-03 13:39 /dev/ram6
brw-rw---- 1 root disk 1, 448 2008-04-03 13:39 /dev/ram7
brw-rw---- 1 root disk 1, 512 2008-04-03 13:39 /dev/ram8
brw-rw---- 1 root disk 1, 576 2008-04-03 13:39 /dev/ram9
# fdisk /dev/ram0
Device contains neither a valid DOS partition table, nor Sun, SGI or OSF disklabel
Building a new DOS disklabel. Changes will remain in memory only,
until you decide to write them. After that, of course, the previous
content won't be recoverable.
Warning: invalid flag 0x0000 of partition table 4 will be corrected by w(rite)
Command (m for help): o
Building a new DOS disklabel. Changes will remain in memory only,
until you decide to write them. After that, of course, the previous
content won't be recoverable.
Warning: invalid flag 0x0000 of partition table 4 will be corrected by w(rite)
Command (m for help): n
Command action
e extended
p primary partition (1-4)
p
Partition number (1-4): 1
First cylinder (1-2, default 1): 1
Last cylinder or +size or +sizeM or +sizeK (1-2, default 2): 2
Command (m for help): w
The partition table has been altered!
Calling ioctl() to re-read partition table.
Syncing disks.
# ls -l /dev/ram0*
brw-rw---- 1 root disk 1, 0 2008-04-03 13:40 /dev/ram0
brw-rw---- 1 root disk 1, 1 2008-04-03 13:40 /dev/ram0p1
# mkfs /dev/ram0p1
mke2fs 1.40-WIP (14-Nov-2006)
Filesystem label=
OS type: Linux
Block size=1024 (log=0)
Fragment size=1024 (log=0)
4016 inodes, 16032 blocks
801 blocks (5.00%) reserved for the super user
First data block=1
Maximum filesystem blocks=16515072
2 block groups
8192 blocks per group, 8192 fragments per group
2008 inodes per group
Superblock backups stored on blocks:
8193
Writing inode tables: done
Writing superblocks and filesystem accounting information: done
This filesystem will be automatically checked every 26 mounts or
180 days, whichever comes first. Use tune2fs -c or -i to override.
# mount /dev/ram0p1 /mnt
df /mnt
Filesystem 1K-blocks Used Available Use% Mounted on
/dev/ram0p1 15521 138 14582 1% /mnt
# ls -l /mnt
total 12
drwx------ 2 root root 12288 2008-04-03 13:41 lost+found
# umount /mnt
# rmmod brd
Signed-off-by: Laurent Vivier <Laurent.Vivier@bull.net>
Acked-by: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-04-30 09:55:06 +02:00
|
|
|
disk = brd->brd_disk = alloc_disk(1 << part_shift);
|
rewrite rd
This is a rewrite of the ramdisk block device driver.
The old one is really difficult because it effectively implements a block
device which serves data out of its own buffer cache. It relies on the dirty
bit being set, to pin its backing store in cache, however there are non
trivial paths which can clear the dirty bit (eg. try_to_free_buffers()),
which had recently lead to data corruption. And in general it is completely
wrong for a block device driver to do this.
The new one is more like a regular block device driver. It has no idea about
vm/vfs stuff. It's backing store is similar to the buffer cache (a simple
radix-tree of pages), but it doesn't know anything about page cache (the pages
in the radix tree are not pagecache pages).
There is one slight downside -- direct block device access and filesystem
metadata access goes through an extra copy and gets stored in RAM twice.
However, this downside is only slight, because the real buffercache of the
device is now reclaimable (because we're not playing crazy games with it), so
under memory intensive situations, footprint should effectively be the same --
maybe even a slight advantage to the new driver because it can also reclaim
buffer heads.
The fact that it now goes through all the regular vm/fs paths makes it
much more useful for testing, too.
text data bss dec hex filename
2837 849 384 4070 fe6 drivers/block/rd.o
3528 371 12 3911 f47 drivers/block/brd.o
Text is larger, but data and bss are smaller, making total size smaller.
A few other nice things about it:
- Similar structure and layout to the new loop device handlinag.
- Dynamic ramdisk creation.
- Runtime flexible buffer head size (because it is no longer part of the
ramdisk code).
- Boot / load time flexible ramdisk size, which could easily be extended
to a per-ramdisk runtime changeable size (eg. with an ioctl).
- Can use highmem for the backing store.
[akpm@linux-foundation.org: fix build]
[byron.bbradley@gmail.com: make rd_size non-static]
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Byron Bradley <byron.bbradley@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-02-08 13:19:49 +01:00
|
|
|
if (!disk)
|
|
|
|
goto out_free_queue;
|
|
|
|
disk->major = RAMDISK_MAJOR;
|
brd: modify ramdisk device to be able to manage partitions
This patch adds partition management for Block RAM Device (BRD).
This patch is done to keep in sync BRD and loop device drivers.
This patch adds a parameter to the module, max_part, to specify
the maximum number of partitions per RAM device.
Example:
# modprobe brd max_part=63
# ls -l /dev/ram*
brw-rw---- 1 root disk 1, 0 2008-04-03 13:39 /dev/ram0
brw-rw---- 1 root disk 1, 64 2008-04-03 13:39 /dev/ram1
brw-rw---- 1 root disk 1, 640 2008-04-03 13:39 /dev/ram10
brw-rw---- 1 root disk 1, 704 2008-04-03 13:39 /dev/ram11
brw-rw---- 1 root disk 1, 768 2008-04-03 13:39 /dev/ram12
brw-rw---- 1 root disk 1, 832 2008-04-03 13:39 /dev/ram13
brw-rw---- 1 root disk 1, 896 2008-04-03 13:39 /dev/ram14
brw-rw---- 1 root disk 1, 960 2008-04-03 13:39 /dev/ram15
brw-rw---- 1 root disk 1, 128 2008-04-03 13:39 /dev/ram2
brw-rw---- 1 root disk 1, 192 2008-04-03 13:39 /dev/ram3
brw-rw---- 1 root disk 1, 256 2008-04-03 13:39 /dev/ram4
brw-rw---- 1 root disk 1, 320 2008-04-03 13:39 /dev/ram5
brw-rw---- 1 root disk 1, 384 2008-04-03 13:39 /dev/ram6
brw-rw---- 1 root disk 1, 448 2008-04-03 13:39 /dev/ram7
brw-rw---- 1 root disk 1, 512 2008-04-03 13:39 /dev/ram8
brw-rw---- 1 root disk 1, 576 2008-04-03 13:39 /dev/ram9
# fdisk /dev/ram0
Device contains neither a valid DOS partition table, nor Sun, SGI or OSF disklabel
Building a new DOS disklabel. Changes will remain in memory only,
until you decide to write them. After that, of course, the previous
content won't be recoverable.
Warning: invalid flag 0x0000 of partition table 4 will be corrected by w(rite)
Command (m for help): o
Building a new DOS disklabel. Changes will remain in memory only,
until you decide to write them. After that, of course, the previous
content won't be recoverable.
Warning: invalid flag 0x0000 of partition table 4 will be corrected by w(rite)
Command (m for help): n
Command action
e extended
p primary partition (1-4)
p
Partition number (1-4): 1
First cylinder (1-2, default 1): 1
Last cylinder or +size or +sizeM or +sizeK (1-2, default 2): 2
Command (m for help): w
The partition table has been altered!
Calling ioctl() to re-read partition table.
Syncing disks.
# ls -l /dev/ram0*
brw-rw---- 1 root disk 1, 0 2008-04-03 13:40 /dev/ram0
brw-rw---- 1 root disk 1, 1 2008-04-03 13:40 /dev/ram0p1
# mkfs /dev/ram0p1
mke2fs 1.40-WIP (14-Nov-2006)
Filesystem label=
OS type: Linux
Block size=1024 (log=0)
Fragment size=1024 (log=0)
4016 inodes, 16032 blocks
801 blocks (5.00%) reserved for the super user
First data block=1
Maximum filesystem blocks=16515072
2 block groups
8192 blocks per group, 8192 fragments per group
2008 inodes per group
Superblock backups stored on blocks:
8193
Writing inode tables: done
Writing superblocks and filesystem accounting information: done
This filesystem will be automatically checked every 26 mounts or
180 days, whichever comes first. Use tune2fs -c or -i to override.
# mount /dev/ram0p1 /mnt
df /mnt
Filesystem 1K-blocks Used Available Use% Mounted on
/dev/ram0p1 15521 138 14582 1% /mnt
# ls -l /mnt
total 12
drwx------ 2 root root 12288 2008-04-03 13:41 lost+found
# umount /mnt
# rmmod brd
Signed-off-by: Laurent Vivier <Laurent.Vivier@bull.net>
Acked-by: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-04-30 09:55:06 +02:00
|
|
|
disk->first_minor = i << part_shift;
|
rewrite rd
This is a rewrite of the ramdisk block device driver.
The old one is really difficult because it effectively implements a block
device which serves data out of its own buffer cache. It relies on the dirty
bit being set, to pin its backing store in cache, however there are non
trivial paths which can clear the dirty bit (eg. try_to_free_buffers()),
which had recently lead to data corruption. And in general it is completely
wrong for a block device driver to do this.
The new one is more like a regular block device driver. It has no idea about
vm/vfs stuff. It's backing store is similar to the buffer cache (a simple
radix-tree of pages), but it doesn't know anything about page cache (the pages
in the radix tree are not pagecache pages).
There is one slight downside -- direct block device access and filesystem
metadata access goes through an extra copy and gets stored in RAM twice.
However, this downside is only slight, because the real buffercache of the
device is now reclaimable (because we're not playing crazy games with it), so
under memory intensive situations, footprint should effectively be the same --
maybe even a slight advantage to the new driver because it can also reclaim
buffer heads.
The fact that it now goes through all the regular vm/fs paths makes it
much more useful for testing, too.
text data bss dec hex filename
2837 849 384 4070 fe6 drivers/block/rd.o
3528 371 12 3911 f47 drivers/block/brd.o
Text is larger, but data and bss are smaller, making total size smaller.
A few other nice things about it:
- Similar structure and layout to the new loop device handlinag.
- Dynamic ramdisk creation.
- Runtime flexible buffer head size (because it is no longer part of the
ramdisk code).
- Boot / load time flexible ramdisk size, which could easily be extended
to a per-ramdisk runtime changeable size (eg. with an ioctl).
- Can use highmem for the backing store.
[akpm@linux-foundation.org: fix build]
[byron.bbradley@gmail.com: make rd_size non-static]
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Byron Bradley <byron.bbradley@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-02-08 13:19:49 +01:00
|
|
|
disk->fops = &brd_fops;
|
|
|
|
disk->private_data = brd;
|
|
|
|
disk->queue = brd->brd_queue;
|
|
|
|
sprintf(disk->disk_name, "ram%d", i);
|
|
|
|
set_capacity(disk, rd_size * 2);
|
|
|
|
|
|
|
|
return brd;
|
|
|
|
|
|
|
|
out_free_queue:
|
|
|
|
blk_cleanup_queue(brd->brd_queue);
|
|
|
|
out_free_dev:
|
|
|
|
kfree(brd);
|
|
|
|
out:
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void brd_free(struct brd_device *brd)
|
|
|
|
{
|
|
|
|
put_disk(brd->brd_disk);
|
|
|
|
blk_cleanup_queue(brd->brd_queue);
|
|
|
|
brd_free_pages(brd);
|
|
|
|
kfree(brd);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct brd_device *brd_init_one(int i)
|
|
|
|
{
|
|
|
|
struct brd_device *brd;
|
|
|
|
|
|
|
|
list_for_each_entry(brd, &brd_devices, brd_list) {
|
|
|
|
if (brd->brd_number == i)
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
brd = brd_alloc(i);
|
|
|
|
if (brd) {
|
|
|
|
add_disk(brd->brd_disk);
|
|
|
|
list_add_tail(&brd->brd_list, &brd_devices);
|
|
|
|
}
|
|
|
|
out:
|
|
|
|
return brd;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void brd_del_one(struct brd_device *brd)
|
|
|
|
{
|
|
|
|
list_del(&brd->brd_list);
|
|
|
|
del_gendisk(brd->brd_disk);
|
|
|
|
brd_free(brd);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct kobject *brd_probe(dev_t dev, int *part, void *data)
|
|
|
|
{
|
|
|
|
struct brd_device *brd;
|
|
|
|
struct kobject *kobj;
|
|
|
|
|
|
|
|
mutex_lock(&brd_devices_mutex);
|
|
|
|
brd = brd_init_one(dev & MINORMASK);
|
|
|
|
kobj = brd ? get_disk(brd->brd_disk) : ERR_PTR(-ENOMEM);
|
|
|
|
mutex_unlock(&brd_devices_mutex);
|
|
|
|
|
|
|
|
*part = 0;
|
|
|
|
return kobj;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int __init brd_init(void)
|
|
|
|
{
|
|
|
|
int i, nr;
|
|
|
|
unsigned long range;
|
|
|
|
struct brd_device *brd, *next;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* brd module now has a feature to instantiate underlying device
|
|
|
|
* structure on-demand, provided that there is an access dev node.
|
|
|
|
* However, this will not work well with user space tool that doesn't
|
|
|
|
* know about such "feature". In order to not break any existing
|
|
|
|
* tool, we do the following:
|
|
|
|
*
|
|
|
|
* (1) if rd_nr is specified, create that many upfront, and this
|
|
|
|
* also becomes a hard limit.
|
|
|
|
* (2) if rd_nr is not specified, create 1 rd device on module
|
|
|
|
* load, user can further extend brd device by create dev node
|
|
|
|
* themselves and have kernel automatically instantiate actual
|
|
|
|
* device on-demand.
|
|
|
|
*/
|
brd: modify ramdisk device to be able to manage partitions
This patch adds partition management for Block RAM Device (BRD).
This patch is done to keep in sync BRD and loop device drivers.
This patch adds a parameter to the module, max_part, to specify
the maximum number of partitions per RAM device.
Example:
# modprobe brd max_part=63
# ls -l /dev/ram*
brw-rw---- 1 root disk 1, 0 2008-04-03 13:39 /dev/ram0
brw-rw---- 1 root disk 1, 64 2008-04-03 13:39 /dev/ram1
brw-rw---- 1 root disk 1, 640 2008-04-03 13:39 /dev/ram10
brw-rw---- 1 root disk 1, 704 2008-04-03 13:39 /dev/ram11
brw-rw---- 1 root disk 1, 768 2008-04-03 13:39 /dev/ram12
brw-rw---- 1 root disk 1, 832 2008-04-03 13:39 /dev/ram13
brw-rw---- 1 root disk 1, 896 2008-04-03 13:39 /dev/ram14
brw-rw---- 1 root disk 1, 960 2008-04-03 13:39 /dev/ram15
brw-rw---- 1 root disk 1, 128 2008-04-03 13:39 /dev/ram2
brw-rw---- 1 root disk 1, 192 2008-04-03 13:39 /dev/ram3
brw-rw---- 1 root disk 1, 256 2008-04-03 13:39 /dev/ram4
brw-rw---- 1 root disk 1, 320 2008-04-03 13:39 /dev/ram5
brw-rw---- 1 root disk 1, 384 2008-04-03 13:39 /dev/ram6
brw-rw---- 1 root disk 1, 448 2008-04-03 13:39 /dev/ram7
brw-rw---- 1 root disk 1, 512 2008-04-03 13:39 /dev/ram8
brw-rw---- 1 root disk 1, 576 2008-04-03 13:39 /dev/ram9
# fdisk /dev/ram0
Device contains neither a valid DOS partition table, nor Sun, SGI or OSF disklabel
Building a new DOS disklabel. Changes will remain in memory only,
until you decide to write them. After that, of course, the previous
content won't be recoverable.
Warning: invalid flag 0x0000 of partition table 4 will be corrected by w(rite)
Command (m for help): o
Building a new DOS disklabel. Changes will remain in memory only,
until you decide to write them. After that, of course, the previous
content won't be recoverable.
Warning: invalid flag 0x0000 of partition table 4 will be corrected by w(rite)
Command (m for help): n
Command action
e extended
p primary partition (1-4)
p
Partition number (1-4): 1
First cylinder (1-2, default 1): 1
Last cylinder or +size or +sizeM or +sizeK (1-2, default 2): 2
Command (m for help): w
The partition table has been altered!
Calling ioctl() to re-read partition table.
Syncing disks.
# ls -l /dev/ram0*
brw-rw---- 1 root disk 1, 0 2008-04-03 13:40 /dev/ram0
brw-rw---- 1 root disk 1, 1 2008-04-03 13:40 /dev/ram0p1
# mkfs /dev/ram0p1
mke2fs 1.40-WIP (14-Nov-2006)
Filesystem label=
OS type: Linux
Block size=1024 (log=0)
Fragment size=1024 (log=0)
4016 inodes, 16032 blocks
801 blocks (5.00%) reserved for the super user
First data block=1
Maximum filesystem blocks=16515072
2 block groups
8192 blocks per group, 8192 fragments per group
2008 inodes per group
Superblock backups stored on blocks:
8193
Writing inode tables: done
Writing superblocks and filesystem accounting information: done
This filesystem will be automatically checked every 26 mounts or
180 days, whichever comes first. Use tune2fs -c or -i to override.
# mount /dev/ram0p1 /mnt
df /mnt
Filesystem 1K-blocks Used Available Use% Mounted on
/dev/ram0p1 15521 138 14582 1% /mnt
# ls -l /mnt
total 12
drwx------ 2 root root 12288 2008-04-03 13:41 lost+found
# umount /mnt
# rmmod brd
Signed-off-by: Laurent Vivier <Laurent.Vivier@bull.net>
Acked-by: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-04-30 09:55:06 +02:00
|
|
|
|
|
|
|
part_shift = 0;
|
|
|
|
if (max_part > 0)
|
|
|
|
part_shift = fls(max_part);
|
|
|
|
|
|
|
|
if (rd_nr > 1UL << (MINORBITS - part_shift))
|
rewrite rd
This is a rewrite of the ramdisk block device driver.
The old one is really difficult because it effectively implements a block
device which serves data out of its own buffer cache. It relies on the dirty
bit being set, to pin its backing store in cache, however there are non
trivial paths which can clear the dirty bit (eg. try_to_free_buffers()),
which had recently lead to data corruption. And in general it is completely
wrong for a block device driver to do this.
The new one is more like a regular block device driver. It has no idea about
vm/vfs stuff. It's backing store is similar to the buffer cache (a simple
radix-tree of pages), but it doesn't know anything about page cache (the pages
in the radix tree are not pagecache pages).
There is one slight downside -- direct block device access and filesystem
metadata access goes through an extra copy and gets stored in RAM twice.
However, this downside is only slight, because the real buffercache of the
device is now reclaimable (because we're not playing crazy games with it), so
under memory intensive situations, footprint should effectively be the same --
maybe even a slight advantage to the new driver because it can also reclaim
buffer heads.
The fact that it now goes through all the regular vm/fs paths makes it
much more useful for testing, too.
text data bss dec hex filename
2837 849 384 4070 fe6 drivers/block/rd.o
3528 371 12 3911 f47 drivers/block/brd.o
Text is larger, but data and bss are smaller, making total size smaller.
A few other nice things about it:
- Similar structure and layout to the new loop device handlinag.
- Dynamic ramdisk creation.
- Runtime flexible buffer head size (because it is no longer part of the
ramdisk code).
- Boot / load time flexible ramdisk size, which could easily be extended
to a per-ramdisk runtime changeable size (eg. with an ioctl).
- Can use highmem for the backing store.
[akpm@linux-foundation.org: fix build]
[byron.bbradley@gmail.com: make rd_size non-static]
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Byron Bradley <byron.bbradley@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-02-08 13:19:49 +01:00
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
if (rd_nr) {
|
|
|
|
nr = rd_nr;
|
|
|
|
range = rd_nr;
|
|
|
|
} else {
|
|
|
|
nr = CONFIG_BLK_DEV_RAM_COUNT;
|
brd: modify ramdisk device to be able to manage partitions
This patch adds partition management for Block RAM Device (BRD).
This patch is done to keep in sync BRD and loop device drivers.
This patch adds a parameter to the module, max_part, to specify
the maximum number of partitions per RAM device.
Example:
# modprobe brd max_part=63
# ls -l /dev/ram*
brw-rw---- 1 root disk 1, 0 2008-04-03 13:39 /dev/ram0
brw-rw---- 1 root disk 1, 64 2008-04-03 13:39 /dev/ram1
brw-rw---- 1 root disk 1, 640 2008-04-03 13:39 /dev/ram10
brw-rw---- 1 root disk 1, 704 2008-04-03 13:39 /dev/ram11
brw-rw---- 1 root disk 1, 768 2008-04-03 13:39 /dev/ram12
brw-rw---- 1 root disk 1, 832 2008-04-03 13:39 /dev/ram13
brw-rw---- 1 root disk 1, 896 2008-04-03 13:39 /dev/ram14
brw-rw---- 1 root disk 1, 960 2008-04-03 13:39 /dev/ram15
brw-rw---- 1 root disk 1, 128 2008-04-03 13:39 /dev/ram2
brw-rw---- 1 root disk 1, 192 2008-04-03 13:39 /dev/ram3
brw-rw---- 1 root disk 1, 256 2008-04-03 13:39 /dev/ram4
brw-rw---- 1 root disk 1, 320 2008-04-03 13:39 /dev/ram5
brw-rw---- 1 root disk 1, 384 2008-04-03 13:39 /dev/ram6
brw-rw---- 1 root disk 1, 448 2008-04-03 13:39 /dev/ram7
brw-rw---- 1 root disk 1, 512 2008-04-03 13:39 /dev/ram8
brw-rw---- 1 root disk 1, 576 2008-04-03 13:39 /dev/ram9
# fdisk /dev/ram0
Device contains neither a valid DOS partition table, nor Sun, SGI or OSF disklabel
Building a new DOS disklabel. Changes will remain in memory only,
until you decide to write them. After that, of course, the previous
content won't be recoverable.
Warning: invalid flag 0x0000 of partition table 4 will be corrected by w(rite)
Command (m for help): o
Building a new DOS disklabel. Changes will remain in memory only,
until you decide to write them. After that, of course, the previous
content won't be recoverable.
Warning: invalid flag 0x0000 of partition table 4 will be corrected by w(rite)
Command (m for help): n
Command action
e extended
p primary partition (1-4)
p
Partition number (1-4): 1
First cylinder (1-2, default 1): 1
Last cylinder or +size or +sizeM or +sizeK (1-2, default 2): 2
Command (m for help): w
The partition table has been altered!
Calling ioctl() to re-read partition table.
Syncing disks.
# ls -l /dev/ram0*
brw-rw---- 1 root disk 1, 0 2008-04-03 13:40 /dev/ram0
brw-rw---- 1 root disk 1, 1 2008-04-03 13:40 /dev/ram0p1
# mkfs /dev/ram0p1
mke2fs 1.40-WIP (14-Nov-2006)
Filesystem label=
OS type: Linux
Block size=1024 (log=0)
Fragment size=1024 (log=0)
4016 inodes, 16032 blocks
801 blocks (5.00%) reserved for the super user
First data block=1
Maximum filesystem blocks=16515072
2 block groups
8192 blocks per group, 8192 fragments per group
2008 inodes per group
Superblock backups stored on blocks:
8193
Writing inode tables: done
Writing superblocks and filesystem accounting information: done
This filesystem will be automatically checked every 26 mounts or
180 days, whichever comes first. Use tune2fs -c or -i to override.
# mount /dev/ram0p1 /mnt
df /mnt
Filesystem 1K-blocks Used Available Use% Mounted on
/dev/ram0p1 15521 138 14582 1% /mnt
# ls -l /mnt
total 12
drwx------ 2 root root 12288 2008-04-03 13:41 lost+found
# umount /mnt
# rmmod brd
Signed-off-by: Laurent Vivier <Laurent.Vivier@bull.net>
Acked-by: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-04-30 09:55:06 +02:00
|
|
|
range = 1UL << (MINORBITS - part_shift);
|
rewrite rd
This is a rewrite of the ramdisk block device driver.
The old one is really difficult because it effectively implements a block
device which serves data out of its own buffer cache. It relies on the dirty
bit being set, to pin its backing store in cache, however there are non
trivial paths which can clear the dirty bit (eg. try_to_free_buffers()),
which had recently lead to data corruption. And in general it is completely
wrong for a block device driver to do this.
The new one is more like a regular block device driver. It has no idea about
vm/vfs stuff. It's backing store is similar to the buffer cache (a simple
radix-tree of pages), but it doesn't know anything about page cache (the pages
in the radix tree are not pagecache pages).
There is one slight downside -- direct block device access and filesystem
metadata access goes through an extra copy and gets stored in RAM twice.
However, this downside is only slight, because the real buffercache of the
device is now reclaimable (because we're not playing crazy games with it), so
under memory intensive situations, footprint should effectively be the same --
maybe even a slight advantage to the new driver because it can also reclaim
buffer heads.
The fact that it now goes through all the regular vm/fs paths makes it
much more useful for testing, too.
text data bss dec hex filename
2837 849 384 4070 fe6 drivers/block/rd.o
3528 371 12 3911 f47 drivers/block/brd.o
Text is larger, but data and bss are smaller, making total size smaller.
A few other nice things about it:
- Similar structure and layout to the new loop device handlinag.
- Dynamic ramdisk creation.
- Runtime flexible buffer head size (because it is no longer part of the
ramdisk code).
- Boot / load time flexible ramdisk size, which could easily be extended
to a per-ramdisk runtime changeable size (eg. with an ioctl).
- Can use highmem for the backing store.
[akpm@linux-foundation.org: fix build]
[byron.bbradley@gmail.com: make rd_size non-static]
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Byron Bradley <byron.bbradley@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-02-08 13:19:49 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
if (register_blkdev(RAMDISK_MAJOR, "ramdisk"))
|
|
|
|
return -EIO;
|
|
|
|
|
|
|
|
for (i = 0; i < nr; i++) {
|
|
|
|
brd = brd_alloc(i);
|
|
|
|
if (!brd)
|
|
|
|
goto out_free;
|
|
|
|
list_add_tail(&brd->brd_list, &brd_devices);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* point of no return */
|
|
|
|
|
|
|
|
list_for_each_entry(brd, &brd_devices, brd_list)
|
|
|
|
add_disk(brd->brd_disk);
|
|
|
|
|
|
|
|
blk_register_region(MKDEV(RAMDISK_MAJOR, 0), range,
|
|
|
|
THIS_MODULE, brd_probe, NULL, NULL);
|
|
|
|
|
|
|
|
printk(KERN_INFO "brd: module loaded\n");
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
out_free:
|
|
|
|
list_for_each_entry_safe(brd, next, &brd_devices, brd_list) {
|
|
|
|
list_del(&brd->brd_list);
|
|
|
|
brd_free(brd);
|
|
|
|
}
|
|
|
|
|
|
|
|
unregister_blkdev(RAMDISK_MAJOR, "brd");
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void __exit brd_exit(void)
|
|
|
|
{
|
|
|
|
unsigned long range;
|
|
|
|
struct brd_device *brd, *next;
|
|
|
|
|
brd: modify ramdisk device to be able to manage partitions
This patch adds partition management for Block RAM Device (BRD).
This patch is done to keep in sync BRD and loop device drivers.
This patch adds a parameter to the module, max_part, to specify
the maximum number of partitions per RAM device.
Example:
# modprobe brd max_part=63
# ls -l /dev/ram*
brw-rw---- 1 root disk 1, 0 2008-04-03 13:39 /dev/ram0
brw-rw---- 1 root disk 1, 64 2008-04-03 13:39 /dev/ram1
brw-rw---- 1 root disk 1, 640 2008-04-03 13:39 /dev/ram10
brw-rw---- 1 root disk 1, 704 2008-04-03 13:39 /dev/ram11
brw-rw---- 1 root disk 1, 768 2008-04-03 13:39 /dev/ram12
brw-rw---- 1 root disk 1, 832 2008-04-03 13:39 /dev/ram13
brw-rw---- 1 root disk 1, 896 2008-04-03 13:39 /dev/ram14
brw-rw---- 1 root disk 1, 960 2008-04-03 13:39 /dev/ram15
brw-rw---- 1 root disk 1, 128 2008-04-03 13:39 /dev/ram2
brw-rw---- 1 root disk 1, 192 2008-04-03 13:39 /dev/ram3
brw-rw---- 1 root disk 1, 256 2008-04-03 13:39 /dev/ram4
brw-rw---- 1 root disk 1, 320 2008-04-03 13:39 /dev/ram5
brw-rw---- 1 root disk 1, 384 2008-04-03 13:39 /dev/ram6
brw-rw---- 1 root disk 1, 448 2008-04-03 13:39 /dev/ram7
brw-rw---- 1 root disk 1, 512 2008-04-03 13:39 /dev/ram8
brw-rw---- 1 root disk 1, 576 2008-04-03 13:39 /dev/ram9
# fdisk /dev/ram0
Device contains neither a valid DOS partition table, nor Sun, SGI or OSF disklabel
Building a new DOS disklabel. Changes will remain in memory only,
until you decide to write them. After that, of course, the previous
content won't be recoverable.
Warning: invalid flag 0x0000 of partition table 4 will be corrected by w(rite)
Command (m for help): o
Building a new DOS disklabel. Changes will remain in memory only,
until you decide to write them. After that, of course, the previous
content won't be recoverable.
Warning: invalid flag 0x0000 of partition table 4 will be corrected by w(rite)
Command (m for help): n
Command action
e extended
p primary partition (1-4)
p
Partition number (1-4): 1
First cylinder (1-2, default 1): 1
Last cylinder or +size or +sizeM or +sizeK (1-2, default 2): 2
Command (m for help): w
The partition table has been altered!
Calling ioctl() to re-read partition table.
Syncing disks.
# ls -l /dev/ram0*
brw-rw---- 1 root disk 1, 0 2008-04-03 13:40 /dev/ram0
brw-rw---- 1 root disk 1, 1 2008-04-03 13:40 /dev/ram0p1
# mkfs /dev/ram0p1
mke2fs 1.40-WIP (14-Nov-2006)
Filesystem label=
OS type: Linux
Block size=1024 (log=0)
Fragment size=1024 (log=0)
4016 inodes, 16032 blocks
801 blocks (5.00%) reserved for the super user
First data block=1
Maximum filesystem blocks=16515072
2 block groups
8192 blocks per group, 8192 fragments per group
2008 inodes per group
Superblock backups stored on blocks:
8193
Writing inode tables: done
Writing superblocks and filesystem accounting information: done
This filesystem will be automatically checked every 26 mounts or
180 days, whichever comes first. Use tune2fs -c or -i to override.
# mount /dev/ram0p1 /mnt
df /mnt
Filesystem 1K-blocks Used Available Use% Mounted on
/dev/ram0p1 15521 138 14582 1% /mnt
# ls -l /mnt
total 12
drwx------ 2 root root 12288 2008-04-03 13:41 lost+found
# umount /mnt
# rmmod brd
Signed-off-by: Laurent Vivier <Laurent.Vivier@bull.net>
Acked-by: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-04-30 09:55:06 +02:00
|
|
|
range = rd_nr ? rd_nr : 1UL << (MINORBITS - part_shift);
|
rewrite rd
This is a rewrite of the ramdisk block device driver.
The old one is really difficult because it effectively implements a block
device which serves data out of its own buffer cache. It relies on the dirty
bit being set, to pin its backing store in cache, however there are non
trivial paths which can clear the dirty bit (eg. try_to_free_buffers()),
which had recently lead to data corruption. And in general it is completely
wrong for a block device driver to do this.
The new one is more like a regular block device driver. It has no idea about
vm/vfs stuff. It's backing store is similar to the buffer cache (a simple
radix-tree of pages), but it doesn't know anything about page cache (the pages
in the radix tree are not pagecache pages).
There is one slight downside -- direct block device access and filesystem
metadata access goes through an extra copy and gets stored in RAM twice.
However, this downside is only slight, because the real buffercache of the
device is now reclaimable (because we're not playing crazy games with it), so
under memory intensive situations, footprint should effectively be the same --
maybe even a slight advantage to the new driver because it can also reclaim
buffer heads.
The fact that it now goes through all the regular vm/fs paths makes it
much more useful for testing, too.
text data bss dec hex filename
2837 849 384 4070 fe6 drivers/block/rd.o
3528 371 12 3911 f47 drivers/block/brd.o
Text is larger, but data and bss are smaller, making total size smaller.
A few other nice things about it:
- Similar structure and layout to the new loop device handlinag.
- Dynamic ramdisk creation.
- Runtime flexible buffer head size (because it is no longer part of the
ramdisk code).
- Boot / load time flexible ramdisk size, which could easily be extended
to a per-ramdisk runtime changeable size (eg. with an ioctl).
- Can use highmem for the backing store.
[akpm@linux-foundation.org: fix build]
[byron.bbradley@gmail.com: make rd_size non-static]
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Byron Bradley <byron.bbradley@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-02-08 13:19:49 +01:00
|
|
|
|
|
|
|
list_for_each_entry_safe(brd, next, &brd_devices, brd_list)
|
|
|
|
brd_del_one(brd);
|
|
|
|
|
|
|
|
blk_unregister_region(MKDEV(RAMDISK_MAJOR, 0), range);
|
|
|
|
unregister_blkdev(RAMDISK_MAJOR, "ramdisk");
|
|
|
|
}
|
|
|
|
|
|
|
|
module_init(brd_init);
|
|
|
|
module_exit(brd_exit);
|
|
|
|
|