Btrfs: patch queue: page_mkwrite

Signed-off-by: Chris Mason <chris.mason@oracle.com>
This commit is contained in:
Chris Mason 2007-06-15 13:50:00 -04:00 committed by David Woodhouse
parent f1ace244c8
commit 9ebefb180b
5 changed files with 328 additions and 159 deletions

View file

@ -1142,6 +1142,8 @@ int btrfs_csum_truncate(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct btrfs_path *path,
u64 isize);
/* inode.c */
int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page);
int btrfs_readpage(struct file *file, struct page *page);
void btrfs_delete_inode(struct inode *inode);
void btrfs_read_locked_inode(struct inode *inode);
int btrfs_write_inode(struct inode *inode, int wait);

View file

@ -155,9 +155,9 @@ int btrfs_csum_data(struct btrfs_root * root, char *data, size_t len,
desc.tfm = tfm;
desc.flags = 0;
sg_init_one(&sg, data, len);
spin_lock(&root->fs_info->hash_lock);
spin_lock_irq(&root->fs_info->hash_lock);
ret = crypto_hash_digest(&desc, &sg, 1, result);
spin_unlock(&root->fs_info->hash_lock);
spin_unlock_irq(&root->fs_info->hash_lock);
if (ret) {
printk("digest failed\n");
}

View file

@ -228,6 +228,7 @@ found:
path->nodes[0]->b_data,
root->fs_info->sb->s_blocksize);
ret = btrfs_csum_data(root, data, len, &item->csum);
// printk("file %lu offset %llu csum %X\n", objectid, (unsigned long long)offset, *(int *)(&item->csum));
btrfs_mark_buffer_dirty(path->nodes[0]);
fail:
btrfs_release_path(root, path);
@ -298,4 +299,3 @@ fail:
mutex_unlock(&root->fs_info->fs_mutex);
return ret;
}

View file

@ -103,10 +103,6 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
this_write = min((size_t)PAGE_CACHE_SIZE - offset, write_bytes);
/* FIXME, one block at a time */
mutex_lock(&root->fs_info->fs_mutex);
trans = btrfs_start_transaction(root, 1);
btrfs_set_trans_block_group(trans, inode);
bh = page_buffers(pages[i]);
if (buffer_mapped(bh) && bh->b_blocknr == 0) {
@ -115,6 +111,10 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
char *ptr, *kaddr;
u32 datasize;
mutex_lock(&root->fs_info->fs_mutex);
trans = btrfs_start_transaction(root, 1);
btrfs_set_trans_block_group(trans, inode);
/* create an inline extent, and copy the data in */
path = btrfs_alloc_path();
BUG_ON(!path);
@ -135,24 +135,19 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
btrfs_set_file_extent_type(ei,
BTRFS_FILE_EXTENT_INLINE);
ptr = btrfs_file_extent_inline_start(ei);
kaddr = kmap_atomic(bh->b_page, KM_USER0);
btrfs_memcpy(root, path->nodes[0]->b_data,
ptr, kaddr + bh_offset(bh),
offset + write_bytes);
kunmap_atomic(kaddr, KM_USER0);
mark_buffer_dirty(path->nodes[0]);
btrfs_free_path(path);
} else if (buffer_mapped(bh)) {
/* csum the file data */
btrfs_csum_file_block(trans, root, inode->i_ino,
pages[i]->index << PAGE_CACHE_SHIFT,
kmap(pages[i]), PAGE_CACHE_SIZE);
kunmap(pages[i]);
ret = btrfs_end_transaction(trans, root);
BUG_ON(ret);
mutex_unlock(&root->fs_info->fs_mutex);
}
SetPageChecked(pages[i]);
ret = btrfs_end_transaction(trans, root);
BUG_ON(ret);
mutex_unlock(&root->fs_info->fs_mutex);
ret = btrfs_commit_write(file, pages[i], offset,
offset + this_write);
@ -503,7 +498,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
if ((pos & (PAGE_CACHE_SIZE - 1))) {
pinned[0] = grab_cache_page(inode->i_mapping, first_index);
if (!PageUptodate(pinned[0])) {
ret = mpage_readpage(pinned[0], btrfs_get_block);
ret = btrfs_readpage(NULL, pinned[0]);
BUG_ON(ret);
wait_on_page_locked(pinned[0]);
} else {
@ -513,7 +508,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
if ((pos + count) & (PAGE_CACHE_SIZE - 1)) {
pinned[1] = grab_cache_page(inode->i_mapping, last_index);
if (!PageUptodate(pinned[1])) {
ret = mpage_readpage(pinned[1], btrfs_get_block);
ret = btrfs_readpage(NULL, pinned[1]);
BUG_ON(ret);
wait_on_page_locked(pinned[1]);
} else {
@ -633,138 +628,6 @@ out:
return num_written ? num_written : err;
}
/*
* FIXME, do this by stuffing the csum we want in the info hanging off
* page->private. For now, verify file csums on read
*/
static int btrfs_read_actor(read_descriptor_t *desc, struct page *page,
unsigned long offset, unsigned long size)
{
char *kaddr;
unsigned long left, count = desc->count;
struct inode *inode = page->mapping->host;
if (size > count)
size = count;
if (!PageChecked(page)) {
/* FIXME, do it per block */
struct btrfs_root *root = BTRFS_I(inode)->root;
int ret;
struct buffer_head *bh;
if (page_has_buffers(page)) {
bh = page_buffers(page);
if (!buffer_mapped(bh)) {
SetPageChecked(page);
goto checked;
}
}
ret = btrfs_csum_verify_file_block(root,
page->mapping->host->i_ino,
page->index << PAGE_CACHE_SHIFT,
kmap(page), PAGE_CACHE_SIZE);
if (ret) {
if (ret != -ENOENT) {
printk("failed to verify ino %lu page %lu ret %d\n",
page->mapping->host->i_ino,
page->index, ret);
memset(page_address(page), 1, PAGE_CACHE_SIZE);
flush_dcache_page(page);
}
}
SetPageChecked(page);
kunmap(page);
}
checked:
/*
* Faults on the destination of a read are common, so do it before
* taking the kmap.
*/
if (!fault_in_pages_writeable(desc->arg.buf, size)) {
kaddr = kmap_atomic(page, KM_USER0);
left = __copy_to_user_inatomic(desc->arg.buf,
kaddr + offset, size);
kunmap_atomic(kaddr, KM_USER0);
if (left == 0)
goto success;
}
/* Do it the slow way */
kaddr = kmap(page);
left = __copy_to_user(desc->arg.buf, kaddr + offset, size);
kunmap(page);
if (left) {
size -= left;
desc->error = -EFAULT;
}
success:
desc->count = count - size;
desc->written += size;
desc->arg.buf += size;
return size;
}
/**
* btrfs_file_aio_read - filesystem read routine, with a mod to csum verify
* @iocb: kernel I/O control block
* @iov: io vector request
* @nr_segs: number of segments in the iovec
* @pos: current file position
*/
static ssize_t btrfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
struct file *filp = iocb->ki_filp;
ssize_t retval;
unsigned long seg;
size_t count;
loff_t *ppos = &iocb->ki_pos;
count = 0;
for (seg = 0; seg < nr_segs; seg++) {
const struct iovec *iv = &iov[seg];
/*
* If any segment has a negative length, or the cumulative
* length ever wraps negative then return -EINVAL.
*/
count += iv->iov_len;
if (unlikely((ssize_t)(count|iv->iov_len) < 0))
return -EINVAL;
if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len))
continue;
if (seg == 0)
return -EFAULT;
nr_segs = seg;
count -= iv->iov_len; /* This segment is no good */
break;
}
retval = 0;
if (count) {
for (seg = 0; seg < nr_segs; seg++) {
read_descriptor_t desc;
desc.written = 0;
desc.arg.buf = iov[seg].iov_base;
desc.count = iov[seg].iov_len;
if (desc.count == 0)
continue;
desc.error = 0;
do_generic_file_read(filp, ppos, &desc,
btrfs_read_actor);
retval += desc.written;
if (desc.error) {
retval = retval ?: desc.error;
break;
}
}
}
return retval;
}
static int btrfs_sync_file(struct file *file,
struct dentry *dentry, int datasync)
{
@ -789,12 +652,25 @@ out:
return ret > 0 ? EIO : ret;
}
static struct vm_operations_struct btrfs_file_vm_ops = {
.nopage = filemap_nopage,
.populate = filemap_populate,
.page_mkwrite = btrfs_page_mkwrite,
};
static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma)
{
vma->vm_ops = &btrfs_file_vm_ops;
file_accessed(filp);
return 0;
}
struct file_operations btrfs_file_operations = {
.llseek = generic_file_llseek,
.read = do_sync_read,
.aio_read = btrfs_file_aio_read,
.aio_read = generic_file_aio_read,
.write = btrfs_file_write,
.mmap = generic_file_mmap,
.mmap = btrfs_file_mmap,
.open = generic_file_open,
.ioctl = btrfs_ioctl,
.fsync = btrfs_sync_file,

View file

@ -31,6 +31,7 @@
#include <linux/writeback.h>
#include <linux/statfs.h>
#include <linux/compat.h>
#include <linux/bit_spinlock.h>
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
@ -561,7 +562,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
goto out;
if (!PageUptodate(page)) {
ret = mpage_readpage(page, btrfs_get_block);
ret = btrfs_readpage(NULL, page);
lock_page(page);
if (!PageUptodate(page)) {
ret = -EIO;
@ -1257,8 +1258,10 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock,
path = btrfs_alloc_path();
BUG_ON(!path);
if (create & BTRFS_GET_BLOCK_CREATE) {
WARN_ON(1);
/* this almost but not quite works */
/*
* danger!, this only works if the page is properly up
* to date somehow
*/
trans = btrfs_start_transaction(root, 1);
if (!trans) {
err = -ENOMEM;
@ -1353,7 +1356,6 @@ not_found:
ins.objectid, ins.offset,
ins.offset);
BUG_ON(ret);
SetPageChecked(result->b_page);
btrfs_map_bh_to_logical(root, result, ins.objectid);
}
out:
@ -1374,6 +1376,40 @@ int btrfs_get_block(struct inode *inode, sector_t iblock,
return err;
}
int btrfs_get_block_csum(struct inode *inode, sector_t iblock,
struct buffer_head *result, int create)
{
int ret;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct page *page = result->b_page;
u64 offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(result);
struct btrfs_csum_item *item;
struct btrfs_path *path = NULL;
mutex_lock(&root->fs_info->fs_mutex);
ret = btrfs_get_block_lock(inode, iblock, result, create);
if (ret)
goto out;
path = btrfs_alloc_path();
item = btrfs_lookup_csum(NULL, root, path, inode->i_ino, offset, 0);
if (IS_ERR(item)) {
ret = PTR_ERR(item);
/* a csum that isn't present is a preallocated region. */
if (ret == -ENOENT || ret == -EFBIG)
ret = 0;
result->b_private = 0;
goto out;
}
memcpy((char *)&result->b_private, &item->csum, BTRFS_CRC32_SIZE);
printk("get_block_sum file %lu offset %llu csum %X\n", inode->i_ino, (unsigned long long)offset, *(int *)(&item->csum));
out:
if (path)
btrfs_free_path(path);
mutex_unlock(&root->fs_info->fs_mutex);
return ret;
}
static int btrfs_get_block_bmap(struct inode *inode, sector_t iblock,
struct buffer_head *result, int create)
{
@ -1395,9 +1431,198 @@ static int btrfs_prepare_write(struct file *file, struct page *page,
return block_prepare_write(page, from, to, btrfs_get_block);
}
static int btrfs_readpage(struct file *file, struct page *page)
static void buffer_io_error(struct buffer_head *bh)
{
return mpage_readpage(page, btrfs_get_block);
char b[BDEVNAME_SIZE];
printk(KERN_ERR "Buffer I/O error on device %s, logical block %Lu\n",
bdevname(bh->b_bdev, b),
(unsigned long long)bh->b_blocknr);
}
/*
* I/O completion handler for block_read_full_page() - pages
* which come unlocked at the end of I/O.
*/
static void btrfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
{
unsigned long flags;
struct buffer_head *first;
struct buffer_head *tmp;
struct page *page;
int page_uptodate = 1;
struct inode *inode;
int ret;
BUG_ON(!buffer_async_read(bh));
page = bh->b_page;
inode = page->mapping->host;
if (uptodate) {
void *kaddr;
struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
if (bh->b_private) {
char csum[BTRFS_CRC32_SIZE];
kaddr = kmap_atomic(page, KM_IRQ0);
ret = btrfs_csum_data(root, kaddr + bh_offset(bh),
bh->b_size, csum);
BUG_ON(ret);
if (memcmp(csum, &bh->b_private, BTRFS_CRC32_SIZE)) {
u64 offset;
offset = (page->index << PAGE_CACHE_SHIFT) +
bh_offset(bh);
printk("btrfs csum failed ino %lu off %llu\n",
page->mapping->host->i_ino,
(unsigned long long)offset);
memset(kaddr + bh_offset(bh), 1, bh->b_size);
flush_dcache_page(page);
printk("bad verify file %lu offset %llu bh_private %lX csum %X\n", inode->i_ino, (unsigned long long)offset, (unsigned long)(bh->b_private), *(int *)csum);
}
kunmap_atomic(kaddr, KM_IRQ0);
}
set_buffer_uptodate(bh);
} else {
clear_buffer_uptodate(bh);
if (printk_ratelimit())
buffer_io_error(bh);
SetPageError(page);
}
/*
* Be _very_ careful from here on. Bad things can happen if
* two buffer heads end IO at almost the same time and both
* decide that the page is now completely done.
*/
first = page_buffers(page);
local_irq_save(flags);
bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
clear_buffer_async_read(bh);
unlock_buffer(bh);
tmp = bh;
do {
if (!buffer_uptodate(tmp))
page_uptodate = 0;
if (buffer_async_read(tmp)) {
BUG_ON(!buffer_locked(tmp));
goto still_busy;
}
tmp = tmp->b_this_page;
} while (tmp != bh);
bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
local_irq_restore(flags);
/*
* If none of the buffers had errors and they are all
* uptodate then we can set the page uptodate.
*/
if (page_uptodate && !PageError(page))
SetPageUptodate(page);
unlock_page(page);
return;
still_busy:
bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
local_irq_restore(flags);
return;
}
/*
* Generic "read page" function for block devices that have the normal
* get_block functionality. This is most of the block device filesystems.
* Reads the page asynchronously --- the unlock_buffer() and
* set/clear_buffer_uptodate() functions propagate buffer state into the
* page struct once IO has completed.
*/
int btrfs_readpage(struct file *file, struct page *page)
{
struct inode *inode = page->mapping->host;
sector_t iblock, lblock;
struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
unsigned int blocksize;
int nr, i;
int fully_mapped = 1;
BUG_ON(!PageLocked(page));
blocksize = 1 << inode->i_blkbits;
if (!page_has_buffers(page))
create_empty_buffers(page, blocksize, 0);
head = page_buffers(page);
iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
lblock = (i_size_read(inode)+blocksize-1) >> inode->i_blkbits;
bh = head;
nr = 0;
i = 0;
do {
if (buffer_uptodate(bh))
continue;
if (!buffer_mapped(bh)) {
int err = 0;
fully_mapped = 0;
if (iblock < lblock) {
WARN_ON(bh->b_size != blocksize);
err = btrfs_get_block_csum(inode, iblock,
bh, 0);
if (err)
SetPageError(page);
}
if (!buffer_mapped(bh)) {
void *kaddr = kmap_atomic(page, KM_USER0);
memset(kaddr + i * blocksize, 0, blocksize);
flush_dcache_page(page);
kunmap_atomic(kaddr, KM_USER0);
if (!err)
set_buffer_uptodate(bh);
continue;
}
/*
* get_block() might have updated the buffer
* synchronously
*/
if (buffer_uptodate(bh))
continue;
}
arr[nr++] = bh;
} while (i++, iblock++, (bh = bh->b_this_page) != head);
if (fully_mapped)
SetPageMappedToDisk(page);
if (!nr) {
/*
* All buffers are uptodate - we can set the page uptodate
* as well. But not if get_block() returned an error.
*/
if (!PageError(page))
SetPageUptodate(page);
unlock_page(page);
return 0;
}
/* Stage two: lock the buffers */
for (i = 0; i < nr; i++) {
bh = arr[i];
lock_buffer(bh);
bh->b_end_io = btrfs_end_buffer_async_read;
set_buffer_async_read(bh);
}
/*
* Stage 3: start the IO. Check for uptodateness
* inside the buffer lock in case another process reading
* the underlying blockdev brought it uptodate (the sct fix).
*/
for (i = 0; i < nr; i++) {
bh = arr[i];
if (buffer_uptodate(bh))
btrfs_end_buffer_async_read(bh, 1);
else
submit_bh(READ, bh);
}
return 0;
}
/*
@ -1424,6 +1649,7 @@ static int __btrfs_write_full_page(struct inode *inode, struct page *page,
struct buffer_head *bh, *head;
const unsigned blocksize = 1 << inode->i_blkbits;
int nr_underway = 0;
struct btrfs_root *root = BTRFS_I(inode)->root;
BUG_ON(!PageLocked(page));
@ -1496,6 +1722,24 @@ static int __btrfs_write_full_page(struct inode *inode, struct page *page,
continue;
}
if (test_clear_buffer_dirty(bh) && bh->b_blocknr != 0) {
struct btrfs_trans_handle *trans;
int ret;
u64 off = page->index << PAGE_CACHE_SHIFT;
char *kaddr;
off += bh_offset(bh);
mutex_lock(&root->fs_info->fs_mutex);
trans = btrfs_start_transaction(root, 1);
btrfs_set_trans_block_group(trans, inode);
kaddr = kmap(page);
ret = btrfs_csum_file_block(trans, root, inode->i_ino,
off, kaddr + bh_offset(bh),
bh->b_size);
kunmap(page);
BUG_ON(ret);
ret = btrfs_end_transaction(trans, root);
BUG_ON(ret);
mutex_unlock(&root->fs_info->fs_mutex);
mark_buffer_async_write(bh);
} else {
unlock_buffer(bh);
@ -1617,6 +1861,53 @@ static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
return __btrfs_write_full_page(inode, page, wbc);
}
/*
* btrfs_page_mkwrite() is not allowed to change the file size as it gets
* called from a page fault handler when a page is first dirtied. Hence we must
* be careful to check for EOF conditions here. We set the page up correctly
* for a written page which means we get ENOSPC checking when writing into
* holes and correct delalloc and unwritten extent mapping on filesystems that
* support these features.
*
* We are not allowed to take the i_mutex here so we have to play games to
* protect against truncate races as the page could now be beyond EOF. Because
* vmtruncate() writes the inode size before removing pages, once we have the
* page lock we can determine safely if the page is beyond EOF. If it is not
* beyond EOF, then the page is guaranteed safe against truncation until we
* unlock the page.
*/
int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page)
{
struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
unsigned long end;
loff_t size;
int ret = -EINVAL;
lock_page(page);
wait_on_page_writeback(page);
printk("btrfs_page_mkwrite %lu %lu\n", page->mapping->host->i_ino, page->index);
size = i_size_read(inode);
if ((page->mapping != inode->i_mapping) ||
((page->index << PAGE_CACHE_SHIFT) > size)) {
/* page got truncated out from underneath us */
goto out_unlock;
}
/* page is wholly or partially inside EOF */
if (((page->index + 1) << PAGE_CACHE_SHIFT) > size)
end = size & ~PAGE_CACHE_MASK;
else
end = PAGE_CACHE_SIZE;
ret = btrfs_prepare_write(NULL, page, 0, end);
if (!ret)
ret = btrfs_commit_write(NULL, page, 0, end);
out_unlock:
unlock_page(page);
return ret;
}
static void btrfs_truncate(struct inode *inode)
{
struct btrfs_root *root = BTRFS_I(inode)->root;